molcraft 0.1.0a4__tar.gz → 0.1.0a6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of molcraft might be problematic. Click here for more details.

Files changed (32) hide show
  1. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/PKG-INFO +4 -17
  2. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/README.md +3 -16
  3. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/__init__.py +3 -2
  4. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/chem.py +70 -4
  5. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/conformers.py +1 -1
  6. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/featurizers.py +20 -14
  7. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/layers.py +258 -185
  8. molcraft-0.1.0a6/molcraft/losses.py +36 -0
  9. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/models.py +119 -8
  10. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/ops.py +10 -0
  11. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/records.py +32 -31
  12. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/tensors.py +1 -1
  13. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft.egg-info/PKG-INFO +4 -17
  14. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft.egg-info/SOURCES.txt +2 -2
  15. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/tests/test_layers.py +32 -32
  16. molcraft-0.1.0a6/tests/test_losses.py +24 -0
  17. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/tests/test_models.py +1 -0
  18. molcraft-0.1.0a4/molcraft/experimental/__init__.py +0 -1
  19. molcraft-0.1.0a4/molcraft/experimental/peptides.py +0 -281
  20. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/LICENSE +0 -0
  21. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/callbacks.py +0 -0
  22. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/datasets.py +0 -0
  23. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/descriptors.py +0 -0
  24. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/features.py +0 -0
  25. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft.egg-info/dependency_links.txt +0 -0
  26. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft.egg-info/requires.txt +0 -0
  27. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft.egg-info/top_level.txt +0 -0
  28. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/pyproject.toml +0 -0
  29. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/setup.cfg +0 -0
  30. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/tests/test_chem.py +0 -0
  31. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/tests/test_featurizers.py +0 -0
  32. {molcraft-0.1.0a4 → molcraft-0.1.0a6}/tests/test_tensors.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: molcraft
3
- Version: 0.1.0a4
3
+ Version: 0.1.0a6
4
4
  Summary: Graph Neural Networks for Molecular Machine Learning
5
5
  Author-email: Alexander Kensert <alexander.kensert@gmail.com>
6
6
  License: MIT License
@@ -55,7 +55,7 @@ Dynamic: license-file
55
55
  - Customizable and serializable **layers** and **models**
56
56
  - Customizable **GraphTensor**
57
57
  - Fast and efficient featurization of molecular graphs
58
- - Efficient and easy-to-use input pipelines using TF **records**
58
+ - Fast and efficient input pipelines using TF **records**
59
59
 
60
60
  ## Examples
61
61
 
@@ -106,20 +106,7 @@ print(pred)
106
106
  # featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
107
107
  # models.save_model(model, '/tmp/model.keras')
108
108
 
109
- # featurizers.load_featurizer('/tmp/featurizer.json')
110
- # models.load_model('/tmp/model.keras')
109
+ # loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
110
+ # loaded_model = models.load_model('/tmp/model.keras')
111
111
  ```
112
112
 
113
- ## Installation
114
-
115
- Install the pre-release of molcraft via pip:
116
-
117
- ```bash
118
- pip install molcraft --pre
119
- ```
120
-
121
- with GPU support:
122
-
123
- ```bash
124
- pip install molcraft[gpu] --pre
125
- ```
@@ -11,7 +11,7 @@
11
11
  - Customizable and serializable **layers** and **models**
12
12
  - Customizable **GraphTensor**
13
13
  - Fast and efficient featurization of molecular graphs
14
- - Efficient and easy-to-use input pipelines using TF **records**
14
+ - Fast and efficient input pipelines using TF **records**
15
15
 
16
16
  ## Examples
17
17
 
@@ -62,20 +62,7 @@ print(pred)
62
62
  # featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
63
63
  # models.save_model(model, '/tmp/model.keras')
64
64
 
65
- # featurizers.load_featurizer('/tmp/featurizer.json')
66
- # models.load_model('/tmp/model.keras')
65
+ # loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
66
+ # loaded_model = models.load_model('/tmp/model.keras')
67
67
  ```
68
68
 
69
- ## Installation
70
-
71
- Install the pre-release of molcraft via pip:
72
-
73
- ```bash
74
- pip install molcraft --pre
75
- ```
76
-
77
- with GPU support:
78
-
79
- ```bash
80
- pip install molcraft[gpu] --pre
81
- ```
@@ -1,4 +1,4 @@
1
- __version__ = '0.1.0a4'
1
+ __version__ = '0.1.0a6'
2
2
 
3
3
  import os
4
4
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -14,4 +14,5 @@ from molcraft import ops
14
14
  from molcraft import records
15
15
  from molcraft import tensors
16
16
  from molcraft import callbacks
17
- from molcraft import datasets
17
+ from molcraft import datasets
18
+ from molcraft import losses
@@ -11,6 +11,7 @@ from rdkit.Chem import rdMolTransforms
11
11
  from rdkit.Chem import rdPartialCharges
12
12
  from rdkit.Chem import rdMolDescriptors
13
13
  from rdkit.Chem import rdForceFieldHelpers
14
+ from rdkit.Chem import rdFingerprintGenerator
14
15
 
15
16
 
16
17
  class Mol(Chem.Mol):
@@ -579,8 +580,7 @@ def _calc_mmff_energies(
579
580
  energies.append(float('nan'))
580
581
  return energies
581
582
 
582
-
583
- def _split_mol_by_confs(mol: Mol) -> list[Mol]:
583
+ def unpack_conformers(mol: Mol) -> list[Mol]:
584
584
  mols = []
585
585
  for conf in mol.get_conformers():
586
586
  new_mol = Chem.Mol(mol)
@@ -590,11 +590,77 @@ def _split_mol_by_confs(mol: Mol) -> list[Mol]:
590
590
  mols.append(new_mol)
591
591
  return mols
592
592
 
593
+ _fingerprint_types = {
594
+ 'rdkit': rdFingerprintGenerator.GetRDKitFPGenerator,
595
+ 'morgan': rdFingerprintGenerator.GetMorganGenerator,
596
+ 'topological_torsion': rdFingerprintGenerator.GetTopologicalTorsionGenerator,
597
+ 'atom_pair': rdFingerprintGenerator.GetAtomPairGenerator,
598
+ }
599
+
600
+ def _get_fingerprint(
601
+ mol: Mol,
602
+ fp_type: str = 'morgan',
603
+ binary: bool = True,
604
+ dtype: str = 'float32',
605
+ **kwargs,
606
+ ) -> np.ndarray:
607
+ fingerprint: rdFingerprintGenerator.FingerprintGenerator64 = (
608
+ _fingerprint_types[fp_type](**kwargs)
609
+ )
610
+ if not isinstance(mol, Mol):
611
+ mol = Mol.from_encoding(mol)
612
+ if binary:
613
+ fp: np.ndarray = fingerprint.GetFingerprintAsNumPy(mol)
614
+ else:
615
+ fp: np.ndarray = fingerprint.GetCountFingerprintAsNumPy(mol)
616
+ return fp.astype(dtype)
617
+
618
+ def _rdkit_fingerprint(
619
+ mol: Chem.Mol,
620
+ size: int = 2048,
621
+ *,
622
+ min_path: int = 1,
623
+ max_path: int = 7,
624
+ binary: bool = True,
625
+ dtype: str = 'float32',
626
+ ) -> np.ndarray:
627
+ fp_param = {'fpSize': size, 'minPath': min_path, 'maxPath': max_path}
628
+ return _get_fingerprint(mol, 'rdkit', binary, dtype, **fp_param)
629
+
630
+ def _morgan_fingerprint(
631
+ mol: Chem.Mol,
632
+ size: int = 2048,
633
+ *,
634
+ radius: int = 3,
635
+ binary: bool = True,
636
+ dtype: str = 'float32',
637
+ ) -> np.ndarray:
638
+ fp_param = {'radius': radius, 'fpSize': size}
639
+ return _get_fingerprint(mol, 'morgan', binary, dtype, **fp_param)
640
+
641
+ def _topological_torsion_fingerprint(
642
+ mol: Chem.Mol,
643
+ size: int = 2048,
644
+ *,
645
+ binary: bool = True,
646
+ dtype: str = 'float32',
647
+ ) -> np.ndarray:
648
+ fp_param = {'fpSize': size}
649
+ return _get_fingerprint(mol, 'topological_torsion', binary, dtype, **fp_param)
650
+
651
+ def _atom_pair_fingerprint(
652
+ mol: Chem.Mol,
653
+ size: int = 2048,
654
+ *,
655
+ binary: bool = True,
656
+ dtype: str = 'float32',
657
+ ) -> np.ndarray:
658
+ fp_param = {'fpSize': size}
659
+ return _get_fingerprint(mol, 'atom_pair', binary, dtype, **fp_param)
660
+
593
661
  def warn(message: str) -> None:
594
662
  warnings.warn(
595
663
  message=message,
596
664
  category=UserWarning,
597
665
  stacklevel=1,
598
666
  )
599
-
600
-
@@ -23,7 +23,7 @@ class ConformerEmbedder(ConformerProcessor):
23
23
  def __init__(
24
24
  self,
25
25
  method: str = 'ETKDGv3',
26
- num_conformers: int = 10,
26
+ num_conformers: int = 5,
27
27
  force: bool = True,
28
28
  **kwargs,
29
29
  ) -> None:
@@ -175,7 +175,7 @@ class MolGraphFeaturizer(Featurizer):
175
175
  default_bond_features = (
176
176
  bond_features == 'auto' or bond_features == 'default'
177
177
  )
178
- if default_bond_features or self.radius > 1 or self.self_loops:
178
+ if default_bond_features or self.radius > 1:
179
179
  vocab = ['zero', 'single', 'double', 'triple', 'aromatic']
180
180
  bond_features = [
181
181
  features.BondType(vocab)
@@ -215,7 +215,7 @@ class MolGraphFeaturizer(Featurizer):
215
215
  if mol is None:
216
216
  warn(
217
217
  f'Could not obtain `chem.Mol` from {x}. '
218
- 'Proceeding without it.'
218
+ 'Returning `None` (proceeding without it).'
219
219
  )
220
220
  return None
221
221
 
@@ -254,24 +254,17 @@ class MolGraphFeaturizer(Featurizer):
254
254
 
255
255
  node = {}
256
256
  node['feature'] = atom_feature
257
-
258
- if bond_feature is not None and (self.radius > 1 or self.self_loops):
259
- # Append 'zero order' bond feature encoding, which encodes non-bonds.
260
- zero_bond_feature = np.array(
261
- [[1., 0., 0., 0., 0.]], dtype=bond_feature.dtype
262
- )
263
- bond_feature = np.concatenate(
264
- [bond_feature, zero_bond_feature], axis=0
265
- )
266
257
 
267
258
  edge = {}
268
259
  if self.radius == 1:
269
260
  edge['source'], edge['target'] = mol.adjacency(
270
261
  fill='full', sparse=True, self_loops=self.self_loops, dtype=self.index_dtype
271
262
  )
263
+ if self.self_loops:
264
+ bond_feature = np.pad(bond_feature, [(0, 1), (0, 0)])
272
265
  if bond_feature is not None:
273
266
  bond_indices = []
274
- for (atom_i, atom_j) in zip(edge['source'], edge['target']):
267
+ for atom_i, atom_j in zip(edge['source'], edge['target']):
275
268
  if atom_i == atom_j:
276
269
  bond_indices.append(-1)
277
270
  else:
@@ -279,6 +272,8 @@ class MolGraphFeaturizer(Featurizer):
279
272
  mol.get_bond_between_atoms(atom_i, atom_j).index
280
273
  )
281
274
  edge['feature'] = bond_feature[bond_indices]
275
+ if self.self_loops:
276
+ edge['self_loop'] = (edge['source'] == edge['target'])
282
277
  else:
283
278
  paths = chem.get_shortest_paths(
284
279
  mol, radius=self.radius, self_loops=self.self_loops
@@ -293,6 +288,12 @@ class MolGraphFeaturizer(Featurizer):
293
288
  [len(path) - 1 for path in paths], dtype=self.index_dtype
294
289
  )
295
290
  if bond_feature is not None:
291
+ zero_bond_feature = np.array(
292
+ [[1., 0., 0., 0., 0.]], dtype=bond_feature.dtype
293
+ )
294
+ bond_feature = np.concatenate(
295
+ [bond_feature, zero_bond_feature], axis=0
296
+ )
296
297
  edge['feature'] = self._expand_bond_features(
297
298
  mol, paths, bond_feature,
298
299
  )
@@ -511,7 +512,7 @@ class MolGraphFeaturizer3D(MolGraphFeaturizer):
511
512
  steps=[
512
513
  conformers.ConformerEmbedder(
513
514
  method='ETKDGv3',
514
- num_conformers=10
515
+ num_conformers=5
515
516
  ),
516
517
  ]
517
518
  )
@@ -588,7 +589,7 @@ class MolGraphFeaturizer3D(MolGraphFeaturizer):
588
589
  edge_feature = self.bond_features(mol)
589
590
 
590
591
  edge = {}
591
- mols = chem._split_mol_by_confs(mol)
592
+ mols = chem.unpack_conformers(mol)
592
593
  tensor_list = []
593
594
  for i, mol in enumerate(mols):
594
595
  node_conformer = copy.deepcopy(node)
@@ -734,6 +735,11 @@ def _add_super_edges(
734
735
  ]
735
736
  )
736
737
 
738
+ if 'self_loop' in edge:
739
+ edge['self_loop'] = np.pad(
740
+ edge['self_loop'], [(0, num_nodes * num_super_nodes * 2)],
741
+ constant_values=False,
742
+ )
737
743
  if 'length' in edge:
738
744
  edge['length'] = np.pad(edge['length'], [(0, 0), (1, 0)])
739
745
  zero_array = np.zeros([num_nodes * num_super_nodes * 2], dtype='int32')