PyPI - molcraft - Versions diffs - 0.1.0a5__tar.gz → 0.1.0a7__tar.gz - Mend

molcraft 0.1.0a5tar.gz → 0.1.0a7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of molcraft might be problematic. Click here for more details.

Files changed (31) hide show

{molcraft-0.1.0a5 → molcraft-0.1.0a7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: molcraft
-Version: 0.1.0a5
+Version: 0.1.0a7
 Summary: Graph Neural Networks for Molecular Machine Learning
 Author-email: Alexander Kensert <alexander.kensert@gmail.com>
 License: MIT License
@@ -55,7 +55,7 @@ Dynamic: license-file
 - Customizable and serializable **layers** and **models**
 - Customizable **GraphTensor**
 - Fast and efficient featurization of molecular graphs
-- Efficient and easy-to-use input pipelines using TF **records**
+- Fast and efficient input pipelines using TF **records**
 ## Examples
@@ -106,20 +106,7 @@ print(pred)
 # featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
 # models.save_model(model, '/tmp/model.keras')
-# featurizers.load_featurizer('/tmp/featurizer.json')
-# models.load_model('/tmp/model.keras')
+# loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
+# loaded_model = models.load_model('/tmp/model.keras')
 ```
-## Installation
-Install the pre-release of molcraft via pip:
-```bash
-pip install molcraft --pre
-```
-with GPU support:
-```bash
-pip install molcraft[gpu] --pre
-```

{molcraft-0.1.0a5 → molcraft-0.1.0a7}/README.md RENAMED Viewed

@@ -11,7 +11,7 @@
 - Customizable and serializable **layers** and **models**
 - Customizable **GraphTensor**
 - Fast and efficient featurization of molecular graphs
-- Efficient and easy-to-use input pipelines using TF **records**
+- Fast and efficient input pipelines using TF **records**
 ## Examples
@@ -62,20 +62,7 @@ print(pred)
 # featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
 # models.save_model(model, '/tmp/model.keras')
-# featurizers.load_featurizer('/tmp/featurizer.json')
-# models.load_model('/tmp/model.keras')
+# loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
+# loaded_model = models.load_model('/tmp/model.keras')
 ```
-## Installation
-Install the pre-release of molcraft via pip:
-```bash
-pip install molcraft --pre
-```
-with GPU support:
-```bash
-pip install molcraft[gpu] --pre
-```

{molcraft-0.1.0a5 → molcraft-0.1.0a7}/molcraft/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = '0.1.0a5'
+__version__ = '0.1.0a7'
 import os
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -14,4 +14,5 @@ from molcraft import ops
 from molcraft import records
 from molcraft import tensors
 from molcraft import callbacks
-from molcraft import datasets
+from molcraft import datasets
+from molcraft import losses

molcraft-0.1.0a7/molcraft/callbacks.py ADDED Viewed

@@ -0,0 +1,93 @@
+import keras
+import warnings
+import numpy as np
+class TensorBoard(keras.callbacks.TensorBoard):
+    def _log_weights(self, epoch):
+        with self._train_writer.as_default():
+            for layer in self.model.layers:
+                for weight in layer.weights:
+                    # Use weight.path istead of weight.name to distinguish
+                    # weights of different layers.
+                    histogram_weight_name = weight.path + "/histogram"
+                    self.summary.histogram(
+                        histogram_weight_name, weight, step=epoch
+                    )
+                    if self.write_images:
+                        image_weight_name = weight.path + "/image"
+                        self._log_weight_as_image(
+                            weight, image_weight_name, epoch
+                        )
+            self._train_writer.flush()
+class LearningRateDecay(keras.callbacks.LearningRateScheduler):
+    def __init__(self, rate: float, delay: int = 0, **kwargs):
+        def lr_schedule(epoch: int, lr: float):
+            if epoch < delay:
+                return float(lr)
+            return float(lr * keras.ops.exp(-rate))
+        super().__init__(schedule=lr_schedule, **kwargs)
+class Rollback(keras.callbacks.Callback):
+    def __init__(
+        self,
+        frequency: int = None,
+        tolerance: float = 0.5,
+        rollback_optimizer: bool = True,
+    ):
+        super().__init__()
+        self.frequency = frequency or 1_000_000_000
+        self.tolerance = tolerance
+        self.rollback_optimizer = rollback_optimizer
+    def on_train_begin(self, logs=None):
+        self.rollback_weights = self._get_model_vars()
+        self.rollback_optimizer_vars = self._get_optimizer_vars()
+        self.rollback_loss = float('inf')
+    def on_epoch_end(self, epoch: int, logs: dict = None):
+        current_loss = logs.get('val_loss', logs.get('loss'))
+        deviation = (current_loss - self.rollback_loss) / self.rollback_loss
+        if np.isnan(current_loss) or np.isinf(current_loss):
+            self._rollback()
+            print("\nRolling back model, found nan or inf loss.\n")
+            return
+        if deviation > self.tolerance:
+            self._rollback()
+            print(f"\nRolling back model, found too large deviation: {deviation:.3f}\n")
+        if epoch and epoch % self.frequency == 0:
+            self._rollback()
+            print(f"\nRolling back model, {epoch} % {self.frequency} == 0\n")
+            return
+        if current_loss < self.rollback_loss:
+            self._save_state(current_loss)
+    def _save_state(self, current_loss: float) -> None:
+        self.rollback_loss = current_loss
+        self.rollback_weights = self._get_model_vars()
+        if self.rollback_optimizer:
+            self.rollback_optimizer_vars = self._get_optimizer_vars()
+    def _rollback(self) -> None:
+        self.model.set_weights(self.rollback_weights)
+        if self.rollback_optimizer:
+            self.model.optimizer.set_weights(self.rollback_optimizer_vars)
+    def _get_optimizer_vars(self):
+        return [v.numpy() for v in self.model.optimizer.variables]
+    def _get_model_vars(self):
+        return self.model.get_weights()

{molcraft-0.1.0a5 → molcraft-0.1.0a7}/molcraft/chem.py RENAMED Viewed

@@ -11,6 +11,7 @@ from rdkit.Chem import rdMolTransforms
 from rdkit.Chem import rdPartialCharges
 from rdkit.Chem import rdMolDescriptors
 from rdkit.Chem import rdForceFieldHelpers
+from rdkit.Chem import rdFingerprintGenerator
 class Mol(Chem.Mol):
@@ -399,7 +400,6 @@ def embed_conformers(
     mol: Mol,
     num_conformers: int,
     method: str = 'ETKDGv3',
-    force: bool = True,
     **kwargs
 ) -> None:
     available_embedding_methods = {
@@ -410,27 +410,39 @@ def embed_conformers(
         'srETKDGv3': rdDistGeom.srETKDGv3(),
         'KDG': rdDistGeom.KDG()
     }
-    default_embedding_method = 'ETKDGv3'
     mol = Mol(mol)
-    params = available_embedding_methods.get(method)
-    if params is None:
-        warn(
-            f"Could not find `method` {method}. "
-            f"Automatically setting method to {default_embedding_method}."
+    embedding_method = available_embedding_methods.get(method)
+    if embedding_method is None:
+        raise ValueError(
+            f'Could not find `method` {method!r}. Specify either of: '
+            '`ETDG`, `ETKDG`, `ETKDGv2`, `ETKDGv3`, `srETKDGv3` or `KDG`.'
         )
-        params = available_embedding_methods[default_embedding_method]
     for key, value in kwargs.items():
-        setattr(params, key, value)
+        setattr(embedding_method, key, value)
-    success = rdDistGeom.EmbedMultipleConfs(mol, numConfs=num_conformers, params=params)
+    success = rdDistGeom.EmbedMultipleConfs(
+        mol, numConfs=num_conformers, params=embedding_method
+    )
     if not len(success):
-        warning = 'Could not embed conformer(s).'
-        if not force:
-            warn(warning)
+        warn(
+            f'Could not embed conformer(s) for {mol.canonical_smiles!r} using the '
+            'speified method. Giving it another try with more permissive methods.'
+        )
+        max_attempts = (20 * mol.num_atoms) # increasing it from 10xN to 20xN
+        for fallback_method in [method, 'ETDG', 'KDG']:
+            fallback_embedding_method = available_embedding_methods[fallback_method]
+            fallback_embedding_method.useRandomCoords = True
+            fallback_embedding_method.maxAttempts = max_attempts
+            success = rdDistGeom.EmbedMultipleConfs(
+                mol, numConfs=num_conformers, params=fallback_embedding_method
+            )
+            if len(success):
+                break
         else:
-            solution = ' Embedding a conformer (in 3D space) using (x, y) coordinates.'
-            warn(warning + solution)
-            rdDepictor.Compute2DCoords(mol)
+            raise RuntimeError(
+                f'Could not embed conformer(s) for {mol.canonical_smiles!r}. '
+            )
     return mol
 def optimize_conformers(
@@ -444,6 +456,11 @@ def optimize_conformers(
     available_force_field_methods = [
         'MMFF', 'MMFF94', 'MMFF94s', 'UFF'
     ]
+    if method not in available_force_field_methods:
+        raise ValueError(
+            f'Could not find `method` {method!r}. Specify either of: '
+            '`UFF`, `MMFF`, `MMFF94` or `MMFF94s`.'
+        )
     mol = Mol(mol)
     try:
         if method.startswith('MMFF'):
@@ -468,7 +485,7 @@ def optimize_conformers(
     except RuntimeError as e:
         warn(
             f'{method} force field minimization raised {e}. '
-            '\nProceeding without force field minimization...'
+            '\nProceeding without force field minimization.'
         )
     return mol
@@ -579,8 +596,7 @@ def _calc_mmff_energies(
             energies.append(float('nan'))
     return energies
-def _split_mol_by_confs(mol: Mol) -> list[Mol]:
+def unpack_conformers(mol: Mol) -> list[Mol]:
     mols = []
     for conf in mol.get_conformers():
         new_mol = Chem.Mol(mol)
@@ -590,11 +606,77 @@ def _split_mol_by_confs(mol: Mol) -> list[Mol]:
         mols.append(new_mol)
     return mols
+_fingerprint_types = {
+    'rdkit': rdFingerprintGenerator.GetRDKitFPGenerator,
+    'morgan': rdFingerprintGenerator.GetMorganGenerator,
+    'topological_torsion': rdFingerprintGenerator.GetTopologicalTorsionGenerator,
+    'atom_pair': rdFingerprintGenerator.GetAtomPairGenerator,
+}
+def _get_fingerprint(
+    mol: Mol,
+    fp_type: str = 'morgan',
+    binary: bool = True,
+    dtype: str = 'float32',
+    **kwargs,
+) -> np.ndarray:
+    fingerprint: rdFingerprintGenerator.FingerprintGenerator64 = (
+        _fingerprint_types[fp_type](**kwargs)
+    )
+    if not isinstance(mol, Mol):
+        mol = Mol.from_encoding(mol)
+    if binary:
+        fp: np.ndarray = fingerprint.GetFingerprintAsNumPy(mol)
+    else:
+        fp: np.ndarray = fingerprint.GetCountFingerprintAsNumPy(mol)
+    return fp.astype(dtype)
+def _rdkit_fingerprint(
+    mol: Chem.Mol,
+    size: int = 2048,
+    *,
+    min_path: int = 1,
+    max_path: int = 7,
+    binary: bool = True,
+    dtype: str = 'float32',
+) -> np.ndarray:
+    fp_param = {'fpSize': size, 'minPath': min_path, 'maxPath': max_path}
+    return _get_fingerprint(mol, 'rdkit', binary, dtype, **fp_param)
+def _morgan_fingerprint(
+    mol: Chem.Mol,
+    size: int = 2048,
+    *,
+    radius: int = 3,
+    binary: bool = True,
+    dtype: str = 'float32',
+) -> np.ndarray:
+    fp_param = {'radius': radius, 'fpSize': size}
+    return _get_fingerprint(mol, 'morgan', binary, dtype, **fp_param)
+def _topological_torsion_fingerprint(
+    mol: Chem.Mol,
+    size: int = 2048,
+    *,
+    binary: bool = True,
+    dtype: str = 'float32',
+) -> np.ndarray:
+    fp_param = {'fpSize': size}
+    return _get_fingerprint(mol, 'topological_torsion', binary, dtype, **fp_param)
+def _atom_pair_fingerprint(
+    mol: Chem.Mol,
+    size: int = 2048,
+    *,
+    binary: bool = True,
+    dtype: str = 'float32',
+) -> np.ndarray:
+    fp_param = {'fpSize': size}
+    return _get_fingerprint(mol, 'atom_pair', binary, dtype, **fp_param)
 def warn(message: str) -> None:
     warnings.warn(
         message=message,
         category=UserWarning,
         stacklevel=1,
     )

{molcraft-0.1.0a5 → molcraft-0.1.0a7}/molcraft/conformers.py RENAMED Viewed

@@ -23,20 +23,17 @@ class ConformerEmbedder(ConformerProcessor):
     def __init__(
         self,
         method: str = 'ETKDGv3',
-        num_conformers: int = 10,
-        force: bool = True,
+        num_conformers: int = 5,
         **kwargs,
     ) -> None:
         self.method = method
         self.num_conformers = num_conformers
-        self.force = force
         self.kwargs = kwargs
     def get_config(self) -> dict:
         config = {
             'method': self.method,
             'num_conformers': self.num_conformers,
-            'force': self.force,
         }
         config.update({
             k: v for (k, v) in self.kwargs.items()
@@ -48,7 +45,6 @@ class ConformerEmbedder(ConformerProcessor):
             mol,
             method=self.method,
             num_conformers=self.num_conformers,
-            force=self.force,
             **self.kwargs,
         )

{molcraft-0.1.0a5 → molcraft-0.1.0a7}/molcraft/featurizers.py RENAMED Viewed

@@ -175,7 +175,7 @@ class MolGraphFeaturizer(Featurizer):
             default_bond_features = (
                 bond_features == 'auto' or bond_features == 'default'
             )
-            if default_bond_features or self.radius > 1 or self.self_loops:
+            if default_bond_features or self.radius > 1:
                 vocab = ['zero', 'single', 'double', 'triple', 'aromatic']
                 bond_features = [
                     features.BondType(vocab)
@@ -215,7 +215,7 @@ class MolGraphFeaturizer(Featurizer):
         if mol is None:
             warn(
                 f'Could not obtain `chem.Mol` from {x}. '
-                'Proceeding without it.'
+                'Returning `None` (proceeding without it).'
             )
             return None
@@ -254,24 +254,17 @@ class MolGraphFeaturizer(Featurizer):
         node = {}
         node['feature'] = atom_feature
-        if bond_feature is not None and (self.radius > 1 or self.self_loops):
-            # Append 'zero order' bond feature encoding, which encodes non-bonds.
-            zero_bond_feature = np.array(
-                [[1., 0., 0., 0., 0.]], dtype=bond_feature.dtype
-            )
-            bond_feature = np.concatenate(
-                [bond_feature, zero_bond_feature], axis=0
-            )
         edge = {}
         if self.radius == 1:
             edge['source'], edge['target'] = mol.adjacency(
                 fill='full', sparse=True, self_loops=self.self_loops, dtype=self.index_dtype
             )
+            if self.self_loops:
+                bond_feature = np.pad(bond_feature, [(0, 1), (0, 0)])
             if bond_feature is not None:
                 bond_indices = []
-                for (atom_i, atom_j) in zip(edge['source'], edge['target']):
+                for atom_i, atom_j in zip(edge['source'], edge['target']):
                     if atom_i == atom_j:
                         bond_indices.append(-1)
                     else:
@@ -279,6 +272,8 @@ class MolGraphFeaturizer(Featurizer):
                             mol.get_bond_between_atoms(atom_i, atom_j).index
                         )
                 edge['feature'] = bond_feature[bond_indices]
+                if self.self_loops:
+                    edge['self_loop'] = (edge['source'] == edge['target'])
         else:
             paths = chem.get_shortest_paths(
                 mol, radius=self.radius, self_loops=self.self_loops
@@ -293,6 +288,12 @@ class MolGraphFeaturizer(Featurizer):
                 [len(path) - 1 for path in paths], dtype=self.index_dtype
             )
             if bond_feature is not None:
+                zero_bond_feature = np.array(
+                    [[1., 0., 0., 0., 0.]], dtype=bond_feature.dtype
+                )
+                bond_feature = np.concatenate(
+                    [bond_feature, zero_bond_feature], axis=0
+                )
                 edge['feature'] = self._expand_bond_features(
                     mol, paths, bond_feature,
                 )
@@ -511,7 +512,7 @@ class MolGraphFeaturizer3D(MolGraphFeaturizer):
                 steps=[
                     conformers.ConformerEmbedder(
                         method='ETKDGv3',
-                        num_conformers=10
+                        num_conformers=5
                     ),
                 ]
             )
@@ -588,7 +589,7 @@ class MolGraphFeaturizer3D(MolGraphFeaturizer):
             edge_feature = self.bond_features(mol)
         edge = {}
-        mols = chem._split_mol_by_confs(mol)
+        mols = chem.unpack_conformers(mol)
         tensor_list = []
         for i, mol in enumerate(mols):
             node_conformer = copy.deepcopy(node)
@@ -734,6 +735,11 @@ def _add_super_edges(
             ]
         )
+    if 'self_loop' in edge:
+        edge['self_loop'] = np.pad(
+            edge['self_loop'], [(0, num_nodes * num_super_nodes * 2)],
+            constant_values=False,
+        )
     if 'length' in edge:
         edge['length'] = np.pad(edge['length'], [(0, 0), (1, 0)])
         zero_array = np.zeros([num_nodes * num_super_nodes * 2], dtype='int32')

molcraft 0.1.0a5__tar.gz → 0.1.0a7__tar.gz

Potentially problematic release.

molcraft 0.1.0a5tar.gz → 0.1.0a7tar.gz