PyPI - molcraft - Versions diffs - 0.1.0a4__tar.gz → 0.1.0a6__tar.gz - Mend

molcraft 0.1.0a4tar.gz → 0.1.0a6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of molcraft might be problematic. Click here for more details.

Files changed (32) hide show

{molcraft-0.1.0a4 → molcraft-0.1.0a6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: molcraft
-Version: 0.1.0a4
+Version: 0.1.0a6
 Summary: Graph Neural Networks for Molecular Machine Learning
 Author-email: Alexander Kensert <alexander.kensert@gmail.com>
 License: MIT License
@@ -55,7 +55,7 @@ Dynamic: license-file
 - Customizable and serializable **layers** and **models**
 - Customizable **GraphTensor**
 - Fast and efficient featurization of molecular graphs
-- Efficient and easy-to-use input pipelines using TF **records**
+- Fast and efficient input pipelines using TF **records**
 ## Examples
@@ -106,20 +106,7 @@ print(pred)
 # featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
 # models.save_model(model, '/tmp/model.keras')
-# featurizers.load_featurizer('/tmp/featurizer.json')
-# models.load_model('/tmp/model.keras')
+# loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
+# loaded_model = models.load_model('/tmp/model.keras')
 ```
-## Installation
-Install the pre-release of molcraft via pip:
-```bash
-pip install molcraft --pre
-```
-with GPU support:
-```bash
-pip install molcraft[gpu] --pre
-```

{molcraft-0.1.0a4 → molcraft-0.1.0a6}/README.md RENAMED Viewed

@@ -11,7 +11,7 @@
 - Customizable and serializable **layers** and **models**
 - Customizable **GraphTensor**
 - Fast and efficient featurization of molecular graphs
-- Efficient and easy-to-use input pipelines using TF **records**
+- Fast and efficient input pipelines using TF **records**
 ## Examples
@@ -62,20 +62,7 @@ print(pred)
 # featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
 # models.save_model(model, '/tmp/model.keras')
-# featurizers.load_featurizer('/tmp/featurizer.json')
-# models.load_model('/tmp/model.keras')
+# loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
+# loaded_model = models.load_model('/tmp/model.keras')
 ```
-## Installation
-Install the pre-release of molcraft via pip:
-```bash
-pip install molcraft --pre
-```
-with GPU support:
-```bash
-pip install molcraft[gpu] --pre
-```

{molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = '0.1.0a4'
+__version__ = '0.1.0a6'
 import os
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -14,4 +14,5 @@ from molcraft import ops
 from molcraft import records
 from molcraft import tensors
 from molcraft import callbacks
-from molcraft import datasets
+from molcraft import datasets
+from molcraft import losses

{molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/chem.py RENAMED Viewed

@@ -11,6 +11,7 @@ from rdkit.Chem import rdMolTransforms
 from rdkit.Chem import rdPartialCharges
 from rdkit.Chem import rdMolDescriptors
 from rdkit.Chem import rdForceFieldHelpers
+from rdkit.Chem import rdFingerprintGenerator
 class Mol(Chem.Mol):
@@ -579,8 +580,7 @@ def _calc_mmff_energies(
             energies.append(float('nan'))
     return energies
-def _split_mol_by_confs(mol: Mol) -> list[Mol]:
+def unpack_conformers(mol: Mol) -> list[Mol]:
     mols = []
     for conf in mol.get_conformers():
         new_mol = Chem.Mol(mol)
@@ -590,11 +590,77 @@ def _split_mol_by_confs(mol: Mol) -> list[Mol]:
         mols.append(new_mol)
     return mols
+_fingerprint_types = {
+    'rdkit': rdFingerprintGenerator.GetRDKitFPGenerator,
+    'morgan': rdFingerprintGenerator.GetMorganGenerator,
+    'topological_torsion': rdFingerprintGenerator.GetTopologicalTorsionGenerator,
+    'atom_pair': rdFingerprintGenerator.GetAtomPairGenerator,
+}
+def _get_fingerprint(
+    mol: Mol,
+    fp_type: str = 'morgan',
+    binary: bool = True,
+    dtype: str = 'float32',
+    **kwargs,
+) -> np.ndarray:
+    fingerprint: rdFingerprintGenerator.FingerprintGenerator64 = (
+        _fingerprint_types[fp_type](**kwargs)
+    )
+    if not isinstance(mol, Mol):
+        mol = Mol.from_encoding(mol)
+    if binary:
+        fp: np.ndarray = fingerprint.GetFingerprintAsNumPy(mol)
+    else:
+        fp: np.ndarray = fingerprint.GetCountFingerprintAsNumPy(mol)
+    return fp.astype(dtype)
+def _rdkit_fingerprint(
+    mol: Chem.Mol,
+    size: int = 2048,
+    *,
+    min_path: int = 1,
+    max_path: int = 7,
+    binary: bool = True,
+    dtype: str = 'float32',
+) -> np.ndarray:
+    fp_param = {'fpSize': size, 'minPath': min_path, 'maxPath': max_path}
+    return _get_fingerprint(mol, 'rdkit', binary, dtype, **fp_param)
+def _morgan_fingerprint(
+    mol: Chem.Mol,
+    size: int = 2048,
+    *,
+    radius: int = 3,
+    binary: bool = True,
+    dtype: str = 'float32',
+) -> np.ndarray:
+    fp_param = {'radius': radius, 'fpSize': size}
+    return _get_fingerprint(mol, 'morgan', binary, dtype, **fp_param)
+def _topological_torsion_fingerprint(
+    mol: Chem.Mol,
+    size: int = 2048,
+    *,
+    binary: bool = True,
+    dtype: str = 'float32',
+) -> np.ndarray:
+    fp_param = {'fpSize': size}
+    return _get_fingerprint(mol, 'topological_torsion', binary, dtype, **fp_param)
+def _atom_pair_fingerprint(
+    mol: Chem.Mol,
+    size: int = 2048,
+    *,
+    binary: bool = True,
+    dtype: str = 'float32',
+) -> np.ndarray:
+    fp_param = {'fpSize': size}
+    return _get_fingerprint(mol, 'atom_pair', binary, dtype, **fp_param)
 def warn(message: str) -> None:
     warnings.warn(
         message=message,
         category=UserWarning,
         stacklevel=1,
     )

{molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/conformers.py RENAMED Viewed

@@ -23,7 +23,7 @@ class ConformerEmbedder(ConformerProcessor):
     def __init__(
         self,
         method: str = 'ETKDGv3',
-        num_conformers: int = 10,
+        num_conformers: int = 5,
         force: bool = True,
         **kwargs,
     ) -> None:

{molcraft-0.1.0a4 → molcraft-0.1.0a6}/molcraft/featurizers.py RENAMED Viewed

@@ -175,7 +175,7 @@ class MolGraphFeaturizer(Featurizer):
             default_bond_features = (
                 bond_features == 'auto' or bond_features == 'default'
             )
-            if default_bond_features or self.radius > 1 or self.self_loops:
+            if default_bond_features or self.radius > 1:
                 vocab = ['zero', 'single', 'double', 'triple', 'aromatic']
                 bond_features = [
                     features.BondType(vocab)
@@ -215,7 +215,7 @@ class MolGraphFeaturizer(Featurizer):
         if mol is None:
             warn(
                 f'Could not obtain `chem.Mol` from {x}. '
-                'Proceeding without it.'
+                'Returning `None` (proceeding without it).'
             )
             return None
@@ -254,24 +254,17 @@ class MolGraphFeaturizer(Featurizer):
         node = {}
         node['feature'] = atom_feature
-        if bond_feature is not None and (self.radius > 1 or self.self_loops):
-            # Append 'zero order' bond feature encoding, which encodes non-bonds.
-            zero_bond_feature = np.array(
-                [[1., 0., 0., 0., 0.]], dtype=bond_feature.dtype
-            )
-            bond_feature = np.concatenate(
-                [bond_feature, zero_bond_feature], axis=0
-            )
         edge = {}
         if self.radius == 1:
             edge['source'], edge['target'] = mol.adjacency(
                 fill='full', sparse=True, self_loops=self.self_loops, dtype=self.index_dtype
             )
+            if self.self_loops:
+                bond_feature = np.pad(bond_feature, [(0, 1), (0, 0)])
             if bond_feature is not None:
                 bond_indices = []
-                for (atom_i, atom_j) in zip(edge['source'], edge['target']):
+                for atom_i, atom_j in zip(edge['source'], edge['target']):
                     if atom_i == atom_j:
                         bond_indices.append(-1)
                     else:
@@ -279,6 +272,8 @@ class MolGraphFeaturizer(Featurizer):
                             mol.get_bond_between_atoms(atom_i, atom_j).index
                         )
                 edge['feature'] = bond_feature[bond_indices]
+                if self.self_loops:
+                    edge['self_loop'] = (edge['source'] == edge['target'])
         else:
             paths = chem.get_shortest_paths(
                 mol, radius=self.radius, self_loops=self.self_loops
@@ -293,6 +288,12 @@ class MolGraphFeaturizer(Featurizer):
                 [len(path) - 1 for path in paths], dtype=self.index_dtype
             )
             if bond_feature is not None:
+                zero_bond_feature = np.array(
+                    [[1., 0., 0., 0., 0.]], dtype=bond_feature.dtype
+                )
+                bond_feature = np.concatenate(
+                    [bond_feature, zero_bond_feature], axis=0
+                )
                 edge['feature'] = self._expand_bond_features(
                     mol, paths, bond_feature,
                 )
@@ -511,7 +512,7 @@ class MolGraphFeaturizer3D(MolGraphFeaturizer):
                 steps=[
                     conformers.ConformerEmbedder(
                         method='ETKDGv3',
-                        num_conformers=10
+                        num_conformers=5
                     ),
                 ]
             )
@@ -588,7 +589,7 @@ class MolGraphFeaturizer3D(MolGraphFeaturizer):
             edge_feature = self.bond_features(mol)
         edge = {}
-        mols = chem._split_mol_by_confs(mol)
+        mols = chem.unpack_conformers(mol)
         tensor_list = []
         for i, mol in enumerate(mols):
             node_conformer = copy.deepcopy(node)
@@ -734,6 +735,11 @@ def _add_super_edges(
             ]
         )
+    if 'self_loop' in edge:
+        edge['self_loop'] = np.pad(
+            edge['self_loop'], [(0, num_nodes * num_super_nodes * 2)],
+            constant_values=False,
+        )
     if 'length' in edge:
         edge['length'] = np.pad(edge['length'], [(0, 0), (1, 0)])
         zero_array = np.zeros([num_nodes * num_super_nodes * 2], dtype='int32')

molcraft 0.1.0a4__tar.gz → 0.1.0a6__tar.gz

Potentially problematic release.

molcraft 0.1.0a4tar.gz → 0.1.0a6tar.gz