PyPI - molcraft - Versions diffs - 0.1.0a21__tar.gz → 0.1.0a23__tar.gz - Mend

molcraft 0.1.0a21tar.gz → 0.1.0a23tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of molcraft might be problematic. Click here for more details.

Files changed (33) hide show

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: molcraft
-Version: 0.1.0a21
+Version: 0.1.0a23
 Summary: Graph Neural Networks for Molecular Machine Learning
 Author-email: Alexander Kensert <alexander.kensert@gmail.com>
 License: MIT License
@@ -35,7 +35,6 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: tensorflow>=2.16
-Requires-Dist: tensorflow-text>=2.16
 Requires-Dist: rdkit>=2023.9.5
 Requires-Dist: pandas>=1.0.3
 Requires-Dist: ipython>=8.12.0
@@ -43,7 +42,7 @@ Provides-Extra: gpu
 Requires-Dist: tensorflow[and-cuda]>=2.16; extra == "gpu"
 Dynamic: license-file
-<img src="https://github.com/akensert/molcraft/blob/main/docs/_static/molcraft-logo.png" alt="molcraft-logo", width="90%">
+<img src="https://github.com/akensert/molcraft/blob/main/docs/_static/molcraft-logo.png" alt="molcraft-logo" width="90%">
 **Deep Learning on Molecules**: A Minimalistic GNN package for Molecular ML.

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/README.md RENAMED Viewed

@@ -1,4 +1,4 @@
-<img src="https://github.com/akensert/molcraft/blob/main/docs/_static/molcraft-logo.png" alt="molcraft-logo", width="90%">
+<img src="https://github.com/akensert/molcraft/blob/main/docs/_static/molcraft-logo.png" alt="molcraft-logo" width="90%">
 **Deep Learning on Molecules**: A Minimalistic GNN package for Molecular ML.

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = '0.1.0a21'
+__version__ = '0.1.0a23'
 import os
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -15,5 +15,3 @@ from molcraft import tensors
 from molcraft import callbacks
 from molcraft import datasets
 from molcraft import losses
-from molcraft.applications import proteomics

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/chem.py RENAMED Viewed

@@ -3,6 +3,7 @@ import collections
 import numpy as np
 from rdkit import Chem
+from rdkit.Chem import AllChem
 from rdkit.Chem import Lipinski
 from rdkit.Chem import rdDistGeom
 from rdkit.Chem import rdDepictor
@@ -22,12 +23,17 @@ class Mol(Chem.Mol):
         if explicit_hs:
             rdkit_mol = Chem.AddHs(rdkit_mol)
         rdkit_mol.__class__ = cls
+        setattr(rdkit_mol, '_encoding', encoding)
         return rdkit_mol
     @property
     def canonical_smiles(self) -> str:
         return Chem.MolToSmiles(self, canonical=True)
+    @property
+    def encoding(self):
+        return getattr(self, '_encoding', None)
     @property
     def bonds(self) -> list['Bond']:
         if not hasattr(self, '_bonds'):
@@ -60,7 +66,7 @@ class Mol(Chem.Mol):
             atom = atom.GetIdx()
         return Atom.cast(self.GetAtomWithIdx(int(atom)))
-    def get_path_between_atoms(
+    def get_shortest_path_between_atoms(
         self,
         atom_i: int | Chem.Atom,
         atom_j: int | Chem.Atom
@@ -100,13 +106,13 @@ class Mol(Chem.Mol):
     def get_conformer(self, index: int = 0) -> 'Conformer':
         if self.num_conformers == 0:
-            warnings.warn('Molecule has no conformer.')
+            warnings.warn(f'{self} has no conformer. Returning None.')
             return None
         return Conformer.cast(self.GetConformer(index))
     def get_conformers(self) -> list['Conformer']:
         if self.num_conformers == 0:
-            warnings.warn('Molecule has no conformer.')
+            warnings.warn(f'{self} has no conformers. Returning an empty list.')
             return []
         return [Conformer.cast(x) for x in self.GetConformers()]
@@ -117,7 +123,8 @@ class Mol(Chem.Mol):
         return None
     def __repr__(self) -> str:
-        return f'<{self.__class__.__name__} {self.canonical_smiles} at {hex(id(self))}>'
+        encoding = self.encoding or self.canonical_smiles
+        return f'<{self.__class__.__name__} {encoding} at {hex(id(self))}>'
 class Conformer(Chem.Conformer):
@@ -244,7 +251,10 @@ def sanitize_mol(
     flag = Chem.SanitizeMol(mol, catchErrors=True)
     if flag != Chem.SanitizeFlags.SANITIZE_NONE:
         if strict:
-            return None
+            raise ValueError(f'Could not sanitize {mol}.')
+        warnings.warn(
+            f'Could not sanitize {mol}. Proceeding with partial sanitization.'
+        )
         # Sanitize mol, excluding the steps causing the error previously
         Chem.SanitizeMol(mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL^flag)
     if assign_stereo_chemistry:
@@ -391,6 +401,7 @@ def embed_conformers(
     mol: Mol,
     num_conformers: int,
     method: str = 'ETKDGv3',
+    timeout: int | None = None,
     random_seed: int | None = None,
     **kwargs
 ) -> Mol:
@@ -405,16 +416,22 @@ def embed_conformers(
     mol = Mol(mol)
     embedding_method = available_embedding_methods.get(method)
     if embedding_method is None:
-        raise ValueError(
-            f'Could not find `method` {method!r}. Specify either of: '
-            '`ETDG`, `ETKDG`, `ETKDGv2`, `ETKDGv3`, `srETKDGv3` or `KDG`.'
+        warnings.warn(
+            f'{method} is not available. Proceeding with ETKDGv3.'
         )
+        embedding_method = available_embedding_methods['ETKDGv3']
     for key, value in kwargs.items():
         setattr(embedding_method, key, value)
-    if random_seed is not None:
-        embedding_method.randomSeed = random_seed
+    if not timeout:
+        timeout = 0 # No timeout
+    if not random_seed:
+        random_seed = -1 # No random seed
+    embedding_method.randomSeed = random_seed
+    embedding_method.timeout = timeout
     success = rdDistGeom.EmbedMultipleConfs(
         mol, numConfs=num_conformers, params=embedding_method
@@ -422,17 +439,18 @@ def embed_conformers(
     num_successes = len(success)
     if num_successes < num_conformers:
         warnings.warn(
-            f'Could only embed {num_successes} out of {num_conformers} conformer(s) '
-            f'for {mol.canonical_smiles!r} using {method}. Embedding the remaining '
-            f'{num_conformers - num_successes} conformer(s) using different embedding methods.',
-            stacklevel=2
+            f'Could only embed {num_successes} out of {num_conformers} conformer(s) for '
+            f'{mol} using the specified method ({method}) and parameters. Attempting to '
+            f'embed the remaining {num_conformers-num_successes} using fallback methods.',
         )
+        max_iters = 20 * mol.num_atoms # Doubling the number of iterations
         for fallback_method in [method, 'ETDG', 'KDG']:
             fallback_embedding_method = available_embedding_methods[fallback_method]
             fallback_embedding_method.useRandomCoords = True
+            fallback_embedding_method.maxIterations = int(max_iters)
             fallback_embedding_method.clearConfs = False
-            if random_seed is not None:
-                fallback_embedding_method.randomSeed = random_seed
+            fallback_embedding_method.timeout = int(timeout)
+            fallback_embedding_method.randomSeed = int(random_seed)
             success = rdDistGeom.EmbedMultipleConfs(
                 mol, numConfs=(num_conformers - num_successes), params=fallback_embedding_method
             )
@@ -441,10 +459,13 @@ def embed_conformers(
                 break
         else:
             raise RuntimeError(
-                f'Could not embed {num_conformers} conformer(s) for {mol.canonical_smiles!r}. '
+                f'Could not embed {num_conformers} conformer(s) for {mol}. '
             )
     return mol
+import warnings
 def optimize_conformers(
     mol: Mol,
     method: str = 'UFF',
@@ -453,14 +474,17 @@ def optimize_conformers(
     ignore_interfragment_interactions: bool = True,
     vdw_threshold: float = 10.0,
 ) -> Mol:
-    available_force_field_methods = [
-        'MMFF', 'MMFF94', 'MMFF94s', 'UFF'
-    ]
+    if mol.num_conformers == 0:
+        warnings.warn(
+            f'{mol} has no conformers to optimize. Proceeding without it.'
+        )
+        return Mol(mol)
+    available_force_field_methods = ['MMFF', 'MMFF94', 'MMFF94s', 'UFF']
     if method not in available_force_field_methods:
-        raise ValueError(
-            f'Could not find `method` {method!r}. Specify either of: '
-            '`UFF`, `MMFF`, `MMFF94` or `MMFF94s`.'
+        warnings.warn(
+            f'{method} is not available. Proceeding with universal force field (UFF).'
         )
+        method = 'UFF'
     mol_optimized = Mol(mol)
     try:
         if method.startswith('MMFF'):
@@ -484,10 +508,9 @@ def optimize_conformers(
             )
     except RuntimeError as e:
         warnings.warn(
-            f'{method} force field minimization did not succeed. Proceeding without it.',
-            stacklevel=2
+            f'Unsuccessful {method} force field minimization for {mol}. Proceeding without it.',
         )
-        return mol
+        return Mol(mol)
     return mol_optimized
 def prune_conformers(
@@ -498,11 +521,9 @@ def prune_conformers(
 ) -> Mol:
     if mol.num_conformers == 0:
         warnings.warn(
-            'Molecule has no conformers. To embed conformers, invoke the `embed` method, '
-            'and optionally followed by `minimize()` to perform force field minimization.',
-            stacklevel=2
+            f'{mol} has no conformers to prune. Proceeding without it.'
         )
-        return mol
+        return Chem.Mol(mol)
     threshold = threshold or 0.0
     deviations = conformer_deviations(mol)

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/datasets.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import warnings
 import numpy as np
 import pandas as pd
 import typing

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/descriptors.py RENAMED Viewed

@@ -1,5 +1,7 @@
+import warnings
 import keras
 import numpy as np
 from rdkit.Chem import rdMolDescriptors
 from molcraft import chem
@@ -12,9 +14,7 @@ class Descriptor(features.Feature):
     def __call__(self, mol: chem.Mol) -> np.ndarray:
         if not isinstance(mol, chem.Mol):
             raise ValueError(
-                f'Input to {self.name} needs to be a `chem.Mol`, which '
-                'implements two properties that should be iterated over '
-                'to compute features: `atoms` and `bonds`.'
+                f'Input to {self.name} must be a `chem.Mol` object.'
             )
         descriptor = self.call(mol)
         func = (
@@ -30,6 +30,23 @@ class Descriptor(features.Feature):
         return np.concatenate(descriptors)
+@keras.saving.register_keras_serializable(package='molcraft')
+class Descriptor3D(Descriptor):
+    def __call__(self, mol: chem.Mol) -> np.ndarray:
+        if not isinstance(mol, chem.Mol):
+            raise ValueError(
+                f'Input to {self.name} must be a `chem.Mol` object.'
+            )
+        if mol.num_conformers == 0:
+            raise ValueError(
+                f'The inputted `chem.Mol` to {self.name} must embed a conformer. '
+                f'It is recommended that {self.name} is used as a molecule feature '
+                'for `MolGraphFeaturizer3D`, which by default embeds a conformer.'
+            )
+        return super().__call__(mol)
 @keras.saving.register_keras_serializable(package='molcraft')
 class MolWeight(Descriptor):
     def call(self, mol: chem.Mol) -> np.ndarray:
@@ -77,7 +94,7 @@ class NumHydrogenDonors(Descriptor):
 @keras.saving.register_keras_serializable(package='molcraft')
 class NumHydrogenAcceptors(Descriptor):
     def call(self, mol: chem.Mol) -> np.ndarray:
-        return rdMolDescriptors.CalcNumHBA(mol)
+        return rdMolDescriptors.CalcNumHBA(mol)
 @keras.saving.register_keras_serializable(package='molcraft')
@@ -89,7 +106,7 @@ class NumRotatableBonds(Descriptor):
 @keras.saving.register_keras_serializable(package='molcraft')
 class NumRings(Descriptor):
     def call(self, mol: chem.Mol) -> np.ndarray:
-        return rdMolDescriptors.CalcNumRings(mol)
+        return rdMolDescriptors.CalcNumRings(mol)
 @keras.saving.register_keras_serializable(package='molcraft')
@@ -105,3 +122,18 @@ class AtomCount(Descriptor):
             if atom.GetSymbol() == self.atom_type:
                 count += 1
         return count
+    def get_config(self) -> dict:
+        config = super().get_config()
+        config['atom_type'] = self.atom_type
+        return config
+@keras.saving.register_keras_serializable(package='molcraft')
+class ForceFieldEnergy(Descriptor3D):
+    """Universal Force Field (UFF) Energy."""
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        mol_copy = chem.Mol(mol)
+        mol_copy = chem.add_hs(mol_copy)
+        return chem.conformer_energies(mol_copy, method="UFF")

molcraft-0.1.0a23/molcraft/diffusion.py ADDED Viewed

@@ -0,0 +1,241 @@
+import warnings
+import keras
+import tensorflow as tf
+import numpy as np
+from molcraft import ops
+from molcraft import tensors
+from molcraft import layers
+from molcraft import models
+# smiles = pd.read_csv('../../data/rt/RIKEN.csv')['smiles'].values
+# graph = featurizers.MolGraphFeaturizer3D(super_node=False)(smiles)
+# graph.node['coordinate']
+# encoder = molcraft.models.GraphModel.from_layers(
+#     [
+#         diffusion.CoordinateNoise(),
+#         molcraft.layers.NodeEmbedding(128),
+#         molcraft.layers.EdgeEmbedding(128),
+#         molcraft.layers.AddContext('position'),
+#         molcraft.layers.MPConv(128),
+#         molcraft.layers.AddContext('position'),
+#         molcraft.layers.MPConv(128),
+#         molcraft.layers.AddContext('position'),
+#     ]
+# )
+# decoder = keras.Sequential([
+#     keras.layers.Dense(128, activation='relu'),
+#     keras.layers.Dense(3),
+# ])
+# model = diffusion.CoordinateNoisePredictor(encoder, decoder)
+# model(graph)
+# model.save('/tmp/model.keras')
+# model = molcraft.models.load_model('/tmp/model.keras')
+# model.compile(keras.optimizers.Adam(1e-3), 'mse')
+# model.fit(graph, epochs=100)
+# from rdkit.Geometry import Point3D
+# def energy(smiles, coordinate):
+#     m = chem.Mol.from_encoding(smiles)
+#     m = chem.embed_conformers(m, 1)
+#     conf = m.GetConformer()
+#     for i in range(m.GetNumAtoms()):
+#         x, y, z = coordinate[i]
+#         conf.SetAtomPosition(i, Point3D(float(x), float(y), float(z)))
+#     return m, chem.conformer_energies(m)[0]
+# def denoise(
+#     graph: tensors.GraphTensor,
+#     model,
+# ):
+#     print("----")
+#     print(energy(smiles[0], graph[0].node['coordinate'])[-1])
+#     print("----")
+#     beta = keras.ops.linspace(1e-4, 1e-2, 100)
+#     alpha = 1 - beta
+#     alpha_bar = keras.ops.cumprod(alpha)
+#     sigma = keras.ops.sqrt(beta[1:] * (1.0 - alpha_bar[:-1]) / (1.0 - alpha_bar[1:]))
+#     graph = graph.update(
+#         {
+#             'context': {
+#                 'position': keras.ops.ones_like(graph.context['size']) * 99
+#             },
+#             'node': {
+#                 'coordinate': keras.random.normal(graph.node['coordinate'].shape)
+#             }
+#         }
+#     )
+#     for t in reversed(range(100)):
+#         alpha_t = alpha[t]
+#         alpha_bar_t = alpha_bar[t]
+#         a = 1 / keras.ops.sqrt(alpha_t)
+#         b = (1 - alpha_t) / keras.ops.sqrt(1 - alpha_bar_t)
+#         if t > 0:
+#             z = keras.random.normal(()) * sigma[t-1]
+#         else:
+#             z = 0.0
+#         graph = graph.update({
+#             'node': {
+#                 'coordinate': (
+#                     a * (graph.node['coordinate'] - b * model(graph)) + z
+#                 )
+#             }
+#         })
+#         print(energy(smiles[0], graph[0].node['coordinate'])[-1])
+#     return graph
+# graph_updated = denoise(graph[:1], model)x
+# mol, e = energy(smiles[0], graph_updated[0].node['coordinate'])
+# print(e)
+# Chem.Mol(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class CoordinateNoisePredictor(models.GraphModel):
+    def __init__(self, encoder, decoder, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.encoder = encoder
+        self.decoder = decoder
+    def propagate(self, tensor):
+        return self.decoder(self.encoder(tensor).node['feature'])
+    def train_step(self, tensor: tensors.GraphTensor) -> dict[str, float]:
+        with tf.GradientTape() as tape:
+            tensor = self.encoder(tensor)
+            feature = tensor.node['feature']
+            noise_true = tensor.node['label']
+            noise_pred = self.decoder(feature)
+            loss = self.compute_loss(tensor, noise_true, noise_pred)
+            loss = self.optimizer.scale_loss(loss)
+        trainable_weights = self.trainable_weights
+        gradients = tape.gradient(loss, trainable_weights)
+        self.optimizer.apply_gradients(zip(gradients, trainable_weights))
+        return self.compute_metrics(tensor, noise_true, noise_pred)
+    def test_step(self, tensor: tensors.GraphTensor) -> dict[str, float]:
+        tensor = self.encoder(tensor)
+        feature = tensor.node['feature']
+        noise_true = tensor.node['label']
+        noise_pred = self.decoder(feature)
+        return self.compute_metrics(tensor, noise_true, noise_pred)
+    def get_config(self) -> dict:
+        config = super().get_config()
+        config['encoder'] = keras.saving.serialize_keras_object(self.encoder)
+        config['decoder'] = keras.saving.serialize_keras_object(self.decoder)
+        return config
+    @classmethod
+    def from_config(cls, config: dict):
+        config['encoder'] = keras.saving.deserialize_keras_object(config['encoder'])
+        config['decoder'] = keras.saving.deserialize_keras_object(config['decoder'])
+        return super().from_config(config)
+@keras.saving.register_keras_serializable(package='molcraft')
+class CoordinateNoise(layers.GraphLayer):
+    def __init__(
+        self,
+        beta: tuple[float, float] = (1e-4, 1e-2),
+        position_dim: int = 128,
+        max_timesteps: int = 100,
+        **kwargs
+    ) -> None:
+        super().__init__(**kwargs)
+        self._beta = beta
+        self._max_timesteps = max_timesteps
+        beta = keras.ops.linspace(*self._beta, self._max_timesteps)
+        alpha = 1 - beta
+        alpha_cumprod = keras.ops.cumprod(alpha)
+        alpha_cumprod = keras.ops.expand_dims(alpha_cumprod, -1)
+        self._alpha_cumprod = alpha_cumprod
+        self._timestep_embedding = TimestepEmbedding(dim=position_dim)
+    def propagate(self, graph: tensors.GraphTensor) -> tensors.GraphTensor:
+        if 'position' in graph.context:
+            return graph.update({'context': {'position': self._timestep_embedding(graph.context['position'])}})
+        timestep = keras.random.randint(
+            shape=(graph.num_subgraphs,), minval=0, maxval=self._max_timesteps
+        )
+        alpha_cumprod = ops.gather(
+            ops.gather(self._alpha_cumprod, timestep), graph.graph_indicator
+        )
+        epsilon = keras.random.normal(
+            shape=keras.ops.shape(graph.node['coordinate']), mean=0, stddev=1
+        )
+        noisy_coordinate = (
+            keras.ops.sqrt(alpha_cumprod) * graph.node['coordinate'] +
+            keras.ops.sqrt(1 - alpha_cumprod) * epsilon
+        )
+        timestep = self._timestep_embedding(timestep)
+        return graph.update(
+            {
+                'context': {
+                    'position': timestep,
+                },
+                'node': {
+                    'coordinate': noisy_coordinate,
+                    'label': epsilon
+                },
+            }
+        )
+    def get_config(self) -> dict:
+        config = super().get_config()
+        config['beta'] = self._beta
+        config['max_timesteps'] = self._max_timesteps
+        return config
+class TimestepEmbedding(keras.layers.Layer):
+    def __init__(self, dim: int, max_wavelength: int = 10000, **kwargs) -> None:
+        super().__init__(**kwargs)
+        self._dim = dim
+        self._max_wavelength = max_wavelength
+    def call(self, inputs: tf.Tensor) -> tf.Tensor:
+        timestep = keras.ops.cast(inputs, 'float32')
+        embedding = keras.ops.log(self._max_wavelength) / (self._dim // 2 - 1)
+        embedding = keras.ops.exp(
+            -embedding * keras.ops.arange(self._dim // 2, dtype='float32')
+        )
+        embedding = timestep[:, None] * embedding[None, :]
+        embedding = keras.ops.concatenate(
+            [keras.ops.sin(embedding), keras.ops.cos(embedding)], axis=-1
+        )
+        return embedding
+    def get_config(self) -> dict:
+        config = super().get_config()
+        config['dim'] = self._dim
+        config['max_wavelength'] = self._max_wavelength
+        return config

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/features.py RENAMED Viewed

@@ -1,7 +1,7 @@
+import warnings
 import abc
 import math
 import keras
-import warnings
 import numpy as np
 from molcraft import chem
@@ -41,14 +41,14 @@ class Feature(abc.ABC):
     def __call__(self, mol: chem.Mol) -> np.ndarray:
         if not isinstance(mol, chem.Mol):
-            raise TypeError(f'Input to {self.name} must be a `chem.Mol` instance.')
+            raise TypeError(f'Input to {self.name} must be a `chem.Mol` object.')
         features = self.call(mol)
         if len(features) != mol.num_atoms and len(features) != mol.num_bonds:
             raise ValueError(
                 f'The number of features computed by {self.name} does not '
                 'match the number of atoms or bonds of the `chem.Mol` object. '
-                'Make sure to iterate over `atoms` or `bonds` of `chem.Mol` '
-                'when computing features.'
+                'Make sure to iterate over `atoms` or `bonds` of the `chem.Mol` '
+                'object when computing features.'
             )
         if len(features) == 0:
             # Edge case: no atoms or bonds in the molecule.
@@ -109,7 +109,6 @@ class Feature(abc.ABC):
             warnings.warn(
                 f'Found value of {self.name} to be non-finite. '
                 f'Value received: {value}. Converting it to a value of 0.',
-                stacklevel=2
             )
             value = 0.0
         return np.asarray([value], dtype=self.dtype)

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/featurizers.py RENAMED Viewed

@@ -1,9 +1,8 @@
+import warnings
 import keras
 import json
 import abc
 import typing
-import copy
-import warnings
 import numpy as np
 import pandas as pd
 import tensorflow as tf
@@ -13,6 +12,7 @@ from pathlib import Path
 from molcraft import tensors
 from molcraft import features
+from molcraft import records
 from molcraft import chem
 from molcraft import descriptors
@@ -41,6 +41,17 @@ class GraphFeaturizer(abc.ABC):
     def load(filepath: str | Path, *args, **kwargs) -> 'GraphFeaturizer':
         return load_featurizer(filepath, *args, **kwargs)
+    def write_records(self, inputs: str | chem.Mol | tuple, path: str | Path, **kwargs) -> None:
+         records.write(
+            inputs, featurizer=self, path=path, **kwargs
+         )
+    @staticmethod
+    def read_records(path: str | Path, **kwargs) -> tf.data.Dataset:
+        return records.read(
+            path=path, **kwargs
+        )
     def __call__(
         self,
         inputs: str | chem.Mol | tuple | typing.Iterable,

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/layers.py RENAMED Viewed

@@ -1,6 +1,6 @@
+import warnings
 import keras
 import tensorflow as tf
-import warnings
 import functools
 from keras.src.models import functional
@@ -350,11 +350,8 @@ class GraphConv(GraphLayer):
         )
         if self._project_residual:
             warnings.warn(
-                '`skip_connect` is set to `True`, but found incompatible dim '
-                'between input (node feature dim) and output (`self.units`). '
-                'Automatically applying a projection layer to residual to '
-                'match input and output. ',
-                stacklevel=2,
+                'Found incompatible dim between input and output. Applying '
+                'a projection layer to residual to match input and output dim.',
             )
             self._residual_dense = self.get_dense(
                 self.units, name='residual_dense'
@@ -613,10 +610,8 @@ class GIConv(GraphConv):
             if not self._update_edge_feature:
                 if (edge_feature_dim != node_feature_dim):
                     warnings.warn(
-                        'Found edge feature dim to be incompatible with node feature dim. '
-                        'Automatically adding a edge feature projection layer to match '
-                        'the dim of node features.',
-                        stacklevel=2,
+                        'Found edge and node feature dim to be incompatible. Applying a '
+                        'projection layer to edge features to match the dim of the node features.',
                     )
                     self._update_edge_feature = True
@@ -870,10 +865,10 @@ class MPConv(GraphConv):
         self._project_previous_node_feature = node_feature_dim != self.units
         if self._project_previous_node_feature:
             warnings.warn(
-                'Input node feature dim does not match updated node feature dim. '
-                'To make sure input node feature can be passed as `states` to the '
-                'GRU cell, it will automatically be projected prior to it.',
-                stacklevel=2
+                'Inputted node feature dim does not match updated node feature dim, '
+                'which is required for the GRU update. Applying a projection layer to '
+                'the inputted node features prior to the GRU update, to match dim '
+                'of the updated node feature dim.'
             )
             self._previous_node_dense = self.get_dense(self.units)
@@ -1497,6 +1492,7 @@ class AddContext(GraphLayer):
     def build(self, spec: tensors.GraphTensor.Spec) -> None:
         feature_dim = spec.node['feature'].shape[-1]
+        self._has_super_node = 'super' in spec.node
         if self._intermediate_dim is None:
             self._intermediate_dim = feature_dim * 2
         self._intermediate_dense = self.get_dense(
@@ -1515,9 +1511,14 @@ class AddContext(GraphLayer):
         context = self._intermediate_dense(context)
         context = self._intermediate_norm(context)
         context = self._final_dense(context)
-        node_feature = ops.scatter_add(
-            tensor.node['feature'], tensor.node['super'], context
-        )
+        if self._has_super_node:
+            node_feature = ops.scatter_add(
+                tensor.node['feature'], tensor.node['super'], context
+            )
+        else:
+            node_feature = (
+                tensor.node['feature'] + ops.gather(context, tensor.graph_indicator)
+            )
         data = {'node': {'feature': node_feature}}
         if self._drop:
             data['context'] = {self._field: None}
@@ -1561,8 +1562,7 @@ class GraphNetwork(GraphLayer):
         if self._update_node_feature:
             warnings.warn(
                 'Node feature dim does not match `units` of the first layer. '
-                'Automatically adding a node projection layer to match `units`.',
-                stacklevel=2
+                'Applying a projection layer to node features to match `units`.',
             )
             self._node_dense = self.get_dense(units)
         self._has_edge_feature = 'feature' in spec.edge
@@ -1572,8 +1572,7 @@ class GraphNetwork(GraphLayer):
             if self._update_edge_feature:
                 warnings.warn(
                     'Edge feature dim does not match `units` of the first layer. '
-                    'Automatically adding a edge projection layer to match `units`.',
-                    stacklevel=2
+                    'Applying projection layer to edge features to match `units`.'
                 )
                 self._edge_dense = self.get_dense(units)

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/losses.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import warnings
 import keras
 import numpy as np

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/models.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import warnings
 import typing
 import keras
 import numpy as np
@@ -111,7 +112,7 @@ class GraphModel(layers.GraphLayer, keras.models.Model):
     def __new__(cls, *args, **kwargs):
         if _functional_init_arguments(args, kwargs) and cls == GraphModel:
             return FunctionalGraphModel(*args, **kwargs)
-        return typing.cast(GraphModel, super().__new__(cls))
+        return super().__new__(cls)
     def __init__(self, *args, **kwargs):
         self._model_layers = kwargs.pop('model_layers', None)
@@ -137,6 +138,8 @@ class GraphModel(layers.GraphLayer, keras.models.Model):
         """
         if not tensors.is_graph(graph_layers[0]):
             return cls(model_layers=graph_layers)
+        elif cls != GraphModel:
+            return cls(model_layers=graph_layers[1:])
         inputs: dict = graph_layers.pop(0)
         x = inputs
         for layer in graph_layers:

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/ops.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import warnings
 import keras
 import numpy as np
 import tensorflow as tf

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/records.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import warnings
 import os
 import math
 import glob
@@ -9,14 +10,17 @@ import pandas as pd
 import multiprocessing as mp
 from molcraft import tensors
-from molcraft import featurizers
+if typing.TYPE_CHECKING:
+    from molcraft import featurizers
 def write(
     inputs: list[str | tuple],
-    featurizer: featurizers.GraphFeaturizer,
+    featurizer: 'featurizers.GraphFeaturizer',
     path: str,
-    overwrite: bool = True,
+    exist_ok: bool = False,
+    overwrite: bool = False,
     num_files: typing.Optional[int] = None,
     num_processes: typing.Optional[int] = None,
     multiprocessing: bool = False,
@@ -24,6 +28,8 @@ def write(
 ) -> None:
     if os.path.isdir(path):
+        if not exist_ok:
+            raise FileExistsError(f'Records already exist: {path}')
         if not overwrite:
             return
         else:
@@ -60,9 +66,11 @@ def write(
             chunk_sizes[i % num_files] += 1
         input_chunks = []
+        start_indices = []
         current_index = 0
         for size in chunk_sizes:
             input_chunks.append(inputs[current_index: current_index + size])
+            start_indices.append(current_index)
             current_index += size
         assert current_index == num_examples
@@ -73,13 +81,13 @@ def write(
         ]
         if not multiprocessing:
-            for path, input_chunk in zip(paths, input_chunks):
-                _write_tfrecord(input_chunk, path, featurizer)
+            for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
+                _write_tfrecord(input_chunk, path, featurizer, start_index)
             return
         processes = []
-        for path, input_chunk in zip(paths, input_chunks):
+        for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
             while len(processes) >= num_processes:
                 for process in processes:
@@ -91,7 +99,7 @@ def write(
             process = mp.Process(
                 target=_write_tfrecord,
-                args=(input_chunk, path, featurizer)
+                args=(input_chunk, path, featurizer, start_index)
             )
             processes.append(process)
             process.start()
@@ -134,9 +142,10 @@ def load_spec(path: str) -> tensors.GraphTensor.Spec:
     return spec
 def _write_tfrecord(
-    inputs,
+    inputs: list[str, tuple],
     path: str,
-    featurizer: featurizers.GraphFeaturizer,
+    featurizer: 'featurizers.GraphFeaturizer',
+    start_index: int,
 ) -> None:
     def _write_example(tensor):
@@ -147,12 +156,17 @@ def _write_tfrecord(
         writer.write(serialized_feature)
     with tf.io.TFRecordWriter(path) as writer:
-        for x in inputs:
+        for i, x in enumerate(inputs):
             if isinstance(x, (list, np.ndarray)):
                 x = tuple(x)
-            tensor = featurizer(x)
-            if tensor is not None:
+            try:
+                tensor = featurizer(x)
                 _write_example(tensor)
+            except Exception as e:
+                warnings.warn(
+                    f'Could not write record for index {i + start_index}, proceeding without it.'
+                    f'Exception raised:\n{e}'
+                )
 def _serialize_example(
     feature: dict[str, tf.train.Feature]

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft/tensors.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import warnings
 import tensorflow as tf
 import keras
 import typing

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: molcraft
-Version: 0.1.0a21
+Version: 0.1.0a23
 Summary: Graph Neural Networks for Molecular Machine Learning
 Author-email: Alexander Kensert <alexander.kensert@gmail.com>
 License: MIT License
@@ -35,7 +35,6 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: tensorflow>=2.16
-Requires-Dist: tensorflow-text>=2.16
 Requires-Dist: rdkit>=2023.9.5
 Requires-Dist: pandas>=1.0.3
 Requires-Dist: ipython>=8.12.0
@@ -43,7 +42,7 @@ Provides-Extra: gpu
 Requires-Dist: tensorflow[and-cuda]>=2.16; extra == "gpu"
 Dynamic: license-file
-<img src="https://github.com/akensert/molcraft/blob/main/docs/_static/molcraft-logo.png" alt="molcraft-logo", width="90%">
+<img src="https://github.com/akensert/molcraft/blob/main/docs/_static/molcraft-logo.png" alt="molcraft-logo" width="90%">
 **Deep Learning on Molecules**: A Minimalistic GNN package for Molecular ML.

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft.egg-info/SOURCES.txt RENAMED Viewed

@@ -6,6 +6,7 @@ molcraft/callbacks.py
 molcraft/chem.py
 molcraft/datasets.py
 molcraft/descriptors.py
+molcraft/diffusion.py
 molcraft/features.py
 molcraft/featurizers.py
 molcraft/layers.py

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/molcraft.egg-info/requires.txt RENAMED Viewed

@@ -1,5 +1,4 @@
 tensorflow>=2.16
-tensorflow-text>=2.16
 rdkit>=2023.9.5
 pandas>=1.0.3
 ipython>=8.12.0

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/pyproject.toml RENAMED Viewed

@@ -26,7 +26,6 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
     "tensorflow>=2.16",
-    "tensorflow-text>=2.16",
     "rdkit>=2023.9.5",
     "pandas>=1.0.3",
     "ipython>=8.12.0"

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/tests/test_chem.py RENAMED Viewed

@@ -12,3 +12,6 @@ class TestChem(unittest.TestCase):
             "N1[C@@H](CCC1)C(=O)O",
         ]
+if __name__ == '__main__':
+    unittest.main()

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/tests/test_featurizers.py RENAMED Viewed

@@ -3,6 +3,7 @@ import tempfile
 import shutil
 from molcraft import features
+from molcraft import descriptors
 from molcraft import featurizers
@@ -129,6 +130,9 @@ class TestFeaturizer(unittest.TestCase):
             pair_features=[
                 features.PairDistance(max_distance=20)
             ],
+            molecule_features=[
+                descriptors.ForceFieldEnergy(),
+            ],
             super_node=True,
             self_loops=False,
             include_hydrogens=False,
@@ -199,6 +203,5 @@ class TestFeaturizer(unittest.TestCase):
             self.assertEqual(graph.edge['target'].dtype.name, 'int32')
 if __name__ == '__main__':
     unittest.main()

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/tests/test_losses.py RENAMED Viewed

@@ -21,4 +21,8 @@ class TestLoss(unittest.TestCase):
             keras.ops.array([[2., 0.1], [4., 0.2], [5., 0.3]])
         )
         self.assertGreater(value, 0)
-        self.assertEqual(len(keras.ops.shape(value)), 0)
+        self.assertEqual(len(keras.ops.shape(value)), 0)
+if __name__ == '__main__':
+    unittest.main()

{molcraft-0.1.0a21 → molcraft-0.1.0a23}/tests/test_models.py RENAMED Viewed

@@ -266,4 +266,8 @@ class TestModel(unittest.TestCase):
                 model = get_model(tensor)
                 out = model.embedding()(tensor)
                 self.assertTrue(out.shape[0] == tensor.context['size'].shape[0])
-                self.assertTrue(out.shape[1] == units)
+                self.assertTrue(out.shape[1] == units)
+if __name__ == '__main__':
+    unittest.main()