molcraft 0.1.0a5__tar.gz → 0.1.0a6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of molcraft might be problematic. Click here for more details.
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/PKG-INFO +4 -17
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/README.md +3 -16
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/__init__.py +3 -2
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/chem.py +70 -4
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/conformers.py +1 -1
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/featurizers.py +20 -14
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/layers.py +258 -185
- molcraft-0.1.0a6/molcraft/losses.py +36 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/models.py +119 -8
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/ops.py +10 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/records.py +32 -31
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/tensors.py +1 -1
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft.egg-info/PKG-INFO +4 -17
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft.egg-info/SOURCES.txt +2 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/tests/test_layers.py +32 -32
- molcraft-0.1.0a6/tests/test_losses.py +24 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/tests/test_models.py +1 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/LICENSE +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/callbacks.py +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/datasets.py +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/descriptors.py +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft/features.py +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft.egg-info/dependency_links.txt +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft.egg-info/requires.txt +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/molcraft.egg-info/top_level.txt +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/pyproject.toml +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/setup.cfg +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/tests/test_chem.py +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/tests/test_featurizers.py +0 -0
- {molcraft-0.1.0a5 → molcraft-0.1.0a6}/tests/test_tensors.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: molcraft
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0a6
|
|
4
4
|
Summary: Graph Neural Networks for Molecular Machine Learning
|
|
5
5
|
Author-email: Alexander Kensert <alexander.kensert@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -55,7 +55,7 @@ Dynamic: license-file
|
|
|
55
55
|
- Customizable and serializable **layers** and **models**
|
|
56
56
|
- Customizable **GraphTensor**
|
|
57
57
|
- Fast and efficient featurization of molecular graphs
|
|
58
|
-
-
|
|
58
|
+
- Fast and efficient input pipelines using TF **records**
|
|
59
59
|
|
|
60
60
|
## Examples
|
|
61
61
|
|
|
@@ -106,20 +106,7 @@ print(pred)
|
|
|
106
106
|
# featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
|
|
107
107
|
# models.save_model(model, '/tmp/model.keras')
|
|
108
108
|
|
|
109
|
-
# featurizers.load_featurizer('/tmp/featurizer.json')
|
|
110
|
-
# models.load_model('/tmp/model.keras')
|
|
109
|
+
# loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
|
|
110
|
+
# loaded_model = models.load_model('/tmp/model.keras')
|
|
111
111
|
```
|
|
112
112
|
|
|
113
|
-
## Installation
|
|
114
|
-
|
|
115
|
-
Install the pre-release of molcraft via pip:
|
|
116
|
-
|
|
117
|
-
```bash
|
|
118
|
-
pip install molcraft --pre
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
with GPU support:
|
|
122
|
-
|
|
123
|
-
```bash
|
|
124
|
-
pip install molcraft[gpu] --pre
|
|
125
|
-
```
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
- Customizable and serializable **layers** and **models**
|
|
12
12
|
- Customizable **GraphTensor**
|
|
13
13
|
- Fast and efficient featurization of molecular graphs
|
|
14
|
-
-
|
|
14
|
+
- Fast and efficient input pipelines using TF **records**
|
|
15
15
|
|
|
16
16
|
## Examples
|
|
17
17
|
|
|
@@ -62,20 +62,7 @@ print(pred)
|
|
|
62
62
|
# featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
|
|
63
63
|
# models.save_model(model, '/tmp/model.keras')
|
|
64
64
|
|
|
65
|
-
# featurizers.load_featurizer('/tmp/featurizer.json')
|
|
66
|
-
# models.load_model('/tmp/model.keras')
|
|
65
|
+
# loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
|
|
66
|
+
# loaded_model = models.load_model('/tmp/model.keras')
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
-
## Installation
|
|
70
|
-
|
|
71
|
-
Install the pre-release of molcraft via pip:
|
|
72
|
-
|
|
73
|
-
```bash
|
|
74
|
-
pip install molcraft --pre
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
with GPU support:
|
|
78
|
-
|
|
79
|
-
```bash
|
|
80
|
-
pip install molcraft[gpu] --pre
|
|
81
|
-
```
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = '0.1.
|
|
1
|
+
__version__ = '0.1.0a6'
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
@@ -14,4 +14,5 @@ from molcraft import ops
|
|
|
14
14
|
from molcraft import records
|
|
15
15
|
from molcraft import tensors
|
|
16
16
|
from molcraft import callbacks
|
|
17
|
-
from molcraft import datasets
|
|
17
|
+
from molcraft import datasets
|
|
18
|
+
from molcraft import losses
|
|
@@ -11,6 +11,7 @@ from rdkit.Chem import rdMolTransforms
|
|
|
11
11
|
from rdkit.Chem import rdPartialCharges
|
|
12
12
|
from rdkit.Chem import rdMolDescriptors
|
|
13
13
|
from rdkit.Chem import rdForceFieldHelpers
|
|
14
|
+
from rdkit.Chem import rdFingerprintGenerator
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class Mol(Chem.Mol):
|
|
@@ -579,8 +580,7 @@ def _calc_mmff_energies(
|
|
|
579
580
|
energies.append(float('nan'))
|
|
580
581
|
return energies
|
|
581
582
|
|
|
582
|
-
|
|
583
|
-
def _split_mol_by_confs(mol: Mol) -> list[Mol]:
|
|
583
|
+
def unpack_conformers(mol: Mol) -> list[Mol]:
|
|
584
584
|
mols = []
|
|
585
585
|
for conf in mol.get_conformers():
|
|
586
586
|
new_mol = Chem.Mol(mol)
|
|
@@ -590,11 +590,77 @@ def _split_mol_by_confs(mol: Mol) -> list[Mol]:
|
|
|
590
590
|
mols.append(new_mol)
|
|
591
591
|
return mols
|
|
592
592
|
|
|
593
|
+
_fingerprint_types = {
|
|
594
|
+
'rdkit': rdFingerprintGenerator.GetRDKitFPGenerator,
|
|
595
|
+
'morgan': rdFingerprintGenerator.GetMorganGenerator,
|
|
596
|
+
'topological_torsion': rdFingerprintGenerator.GetTopologicalTorsionGenerator,
|
|
597
|
+
'atom_pair': rdFingerprintGenerator.GetAtomPairGenerator,
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
def _get_fingerprint(
|
|
601
|
+
mol: Mol,
|
|
602
|
+
fp_type: str = 'morgan',
|
|
603
|
+
binary: bool = True,
|
|
604
|
+
dtype: str = 'float32',
|
|
605
|
+
**kwargs,
|
|
606
|
+
) -> np.ndarray:
|
|
607
|
+
fingerprint: rdFingerprintGenerator.FingerprintGenerator64 = (
|
|
608
|
+
_fingerprint_types[fp_type](**kwargs)
|
|
609
|
+
)
|
|
610
|
+
if not isinstance(mol, Mol):
|
|
611
|
+
mol = Mol.from_encoding(mol)
|
|
612
|
+
if binary:
|
|
613
|
+
fp: np.ndarray = fingerprint.GetFingerprintAsNumPy(mol)
|
|
614
|
+
else:
|
|
615
|
+
fp: np.ndarray = fingerprint.GetCountFingerprintAsNumPy(mol)
|
|
616
|
+
return fp.astype(dtype)
|
|
617
|
+
|
|
618
|
+
def _rdkit_fingerprint(
|
|
619
|
+
mol: Chem.Mol,
|
|
620
|
+
size: int = 2048,
|
|
621
|
+
*,
|
|
622
|
+
min_path: int = 1,
|
|
623
|
+
max_path: int = 7,
|
|
624
|
+
binary: bool = True,
|
|
625
|
+
dtype: str = 'float32',
|
|
626
|
+
) -> np.ndarray:
|
|
627
|
+
fp_param = {'fpSize': size, 'minPath': min_path, 'maxPath': max_path}
|
|
628
|
+
return _get_fingerprint(mol, 'rdkit', binary, dtype, **fp_param)
|
|
629
|
+
|
|
630
|
+
def _morgan_fingerprint(
|
|
631
|
+
mol: Chem.Mol,
|
|
632
|
+
size: int = 2048,
|
|
633
|
+
*,
|
|
634
|
+
radius: int = 3,
|
|
635
|
+
binary: bool = True,
|
|
636
|
+
dtype: str = 'float32',
|
|
637
|
+
) -> np.ndarray:
|
|
638
|
+
fp_param = {'radius': radius, 'fpSize': size}
|
|
639
|
+
return _get_fingerprint(mol, 'morgan', binary, dtype, **fp_param)
|
|
640
|
+
|
|
641
|
+
def _topological_torsion_fingerprint(
|
|
642
|
+
mol: Chem.Mol,
|
|
643
|
+
size: int = 2048,
|
|
644
|
+
*,
|
|
645
|
+
binary: bool = True,
|
|
646
|
+
dtype: str = 'float32',
|
|
647
|
+
) -> np.ndarray:
|
|
648
|
+
fp_param = {'fpSize': size}
|
|
649
|
+
return _get_fingerprint(mol, 'topological_torsion', binary, dtype, **fp_param)
|
|
650
|
+
|
|
651
|
+
def _atom_pair_fingerprint(
|
|
652
|
+
mol: Chem.Mol,
|
|
653
|
+
size: int = 2048,
|
|
654
|
+
*,
|
|
655
|
+
binary: bool = True,
|
|
656
|
+
dtype: str = 'float32',
|
|
657
|
+
) -> np.ndarray:
|
|
658
|
+
fp_param = {'fpSize': size}
|
|
659
|
+
return _get_fingerprint(mol, 'atom_pair', binary, dtype, **fp_param)
|
|
660
|
+
|
|
593
661
|
def warn(message: str) -> None:
|
|
594
662
|
warnings.warn(
|
|
595
663
|
message=message,
|
|
596
664
|
category=UserWarning,
|
|
597
665
|
stacklevel=1,
|
|
598
666
|
)
|
|
599
|
-
|
|
600
|
-
|
|
@@ -175,7 +175,7 @@ class MolGraphFeaturizer(Featurizer):
|
|
|
175
175
|
default_bond_features = (
|
|
176
176
|
bond_features == 'auto' or bond_features == 'default'
|
|
177
177
|
)
|
|
178
|
-
if default_bond_features or self.radius > 1
|
|
178
|
+
if default_bond_features or self.radius > 1:
|
|
179
179
|
vocab = ['zero', 'single', 'double', 'triple', 'aromatic']
|
|
180
180
|
bond_features = [
|
|
181
181
|
features.BondType(vocab)
|
|
@@ -215,7 +215,7 @@ class MolGraphFeaturizer(Featurizer):
|
|
|
215
215
|
if mol is None:
|
|
216
216
|
warn(
|
|
217
217
|
f'Could not obtain `chem.Mol` from {x}. '
|
|
218
|
-
'
|
|
218
|
+
'Returning `None` (proceeding without it).'
|
|
219
219
|
)
|
|
220
220
|
return None
|
|
221
221
|
|
|
@@ -254,24 +254,17 @@ class MolGraphFeaturizer(Featurizer):
|
|
|
254
254
|
|
|
255
255
|
node = {}
|
|
256
256
|
node['feature'] = atom_feature
|
|
257
|
-
|
|
258
|
-
if bond_feature is not None and (self.radius > 1 or self.self_loops):
|
|
259
|
-
# Append 'zero order' bond feature encoding, which encodes non-bonds.
|
|
260
|
-
zero_bond_feature = np.array(
|
|
261
|
-
[[1., 0., 0., 0., 0.]], dtype=bond_feature.dtype
|
|
262
|
-
)
|
|
263
|
-
bond_feature = np.concatenate(
|
|
264
|
-
[bond_feature, zero_bond_feature], axis=0
|
|
265
|
-
)
|
|
266
257
|
|
|
267
258
|
edge = {}
|
|
268
259
|
if self.radius == 1:
|
|
269
260
|
edge['source'], edge['target'] = mol.adjacency(
|
|
270
261
|
fill='full', sparse=True, self_loops=self.self_loops, dtype=self.index_dtype
|
|
271
262
|
)
|
|
263
|
+
if self.self_loops:
|
|
264
|
+
bond_feature = np.pad(bond_feature, [(0, 1), (0, 0)])
|
|
272
265
|
if bond_feature is not None:
|
|
273
266
|
bond_indices = []
|
|
274
|
-
for
|
|
267
|
+
for atom_i, atom_j in zip(edge['source'], edge['target']):
|
|
275
268
|
if atom_i == atom_j:
|
|
276
269
|
bond_indices.append(-1)
|
|
277
270
|
else:
|
|
@@ -279,6 +272,8 @@ class MolGraphFeaturizer(Featurizer):
|
|
|
279
272
|
mol.get_bond_between_atoms(atom_i, atom_j).index
|
|
280
273
|
)
|
|
281
274
|
edge['feature'] = bond_feature[bond_indices]
|
|
275
|
+
if self.self_loops:
|
|
276
|
+
edge['self_loop'] = (edge['source'] == edge['target'])
|
|
282
277
|
else:
|
|
283
278
|
paths = chem.get_shortest_paths(
|
|
284
279
|
mol, radius=self.radius, self_loops=self.self_loops
|
|
@@ -293,6 +288,12 @@ class MolGraphFeaturizer(Featurizer):
|
|
|
293
288
|
[len(path) - 1 for path in paths], dtype=self.index_dtype
|
|
294
289
|
)
|
|
295
290
|
if bond_feature is not None:
|
|
291
|
+
zero_bond_feature = np.array(
|
|
292
|
+
[[1., 0., 0., 0., 0.]], dtype=bond_feature.dtype
|
|
293
|
+
)
|
|
294
|
+
bond_feature = np.concatenate(
|
|
295
|
+
[bond_feature, zero_bond_feature], axis=0
|
|
296
|
+
)
|
|
296
297
|
edge['feature'] = self._expand_bond_features(
|
|
297
298
|
mol, paths, bond_feature,
|
|
298
299
|
)
|
|
@@ -511,7 +512,7 @@ class MolGraphFeaturizer3D(MolGraphFeaturizer):
|
|
|
511
512
|
steps=[
|
|
512
513
|
conformers.ConformerEmbedder(
|
|
513
514
|
method='ETKDGv3',
|
|
514
|
-
num_conformers=
|
|
515
|
+
num_conformers=5
|
|
515
516
|
),
|
|
516
517
|
]
|
|
517
518
|
)
|
|
@@ -588,7 +589,7 @@ class MolGraphFeaturizer3D(MolGraphFeaturizer):
|
|
|
588
589
|
edge_feature = self.bond_features(mol)
|
|
589
590
|
|
|
590
591
|
edge = {}
|
|
591
|
-
mols = chem.
|
|
592
|
+
mols = chem.unpack_conformers(mol)
|
|
592
593
|
tensor_list = []
|
|
593
594
|
for i, mol in enumerate(mols):
|
|
594
595
|
node_conformer = copy.deepcopy(node)
|
|
@@ -734,6 +735,11 @@ def _add_super_edges(
|
|
|
734
735
|
]
|
|
735
736
|
)
|
|
736
737
|
|
|
738
|
+
if 'self_loop' in edge:
|
|
739
|
+
edge['self_loop'] = np.pad(
|
|
740
|
+
edge['self_loop'], [(0, num_nodes * num_super_nodes * 2)],
|
|
741
|
+
constant_values=False,
|
|
742
|
+
)
|
|
737
743
|
if 'length' in edge:
|
|
738
744
|
edge['length'] = np.pad(edge['length'], [(0, 0), (1, 0)])
|
|
739
745
|
zero_array = np.zeros([num_nodes * num_super_nodes * 2], dtype='int32')
|