PyPI - molcraft - Versions diffs - 0.1.0a20__tar.gz → 0.1.0a22__tar.gz - Mend

molcraft 0.1.0a20tar.gz → 0.1.0a22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of molcraft might be problematic. Click here for more details.

Files changed (32) hide show

{molcraft-0.1.0a20 → molcraft-0.1.0a22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: molcraft
-Version: 0.1.0a20
+Version: 0.1.0a22
 Summary: Graph Neural Networks for Molecular Machine Learning
 Author-email: Alexander Kensert <alexander.kensert@gmail.com>
 License: MIT License

{molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = '0.1.0a20'
+__version__ = '0.1.0a22'
 import os
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -14,6 +14,4 @@ from molcraft import records
 from molcraft import tensors
 from molcraft import callbacks
 from molcraft import datasets
-from molcraft import losses
-from molcraft.applications import proteomics
+from molcraft import losses

{molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/chem.py RENAMED Viewed

@@ -22,12 +22,19 @@ class Mol(Chem.Mol):
         if explicit_hs:
             rdkit_mol = Chem.AddHs(rdkit_mol)
         rdkit_mol.__class__ = cls
+        setattr(rdkit_mol, '_encoding', encoding)
         return rdkit_mol
     @property
     def canonical_smiles(self) -> str:
         return Chem.MolToSmiles(self, canonical=True)
+    @property
+    def encoding(self):
+        if hasattr(self, '_encoding'):
+            return self._encoding
+        return None
     @property
     def bonds(self) -> list['Bond']:
         if not hasattr(self, '_bonds'):
@@ -391,6 +398,7 @@ def embed_conformers(
     mol: Mol,
     num_conformers: int,
     method: str = 'ETKDGv3',
+    timeout: int | None = None,
     random_seed: int | None = None,
     **kwargs
 ) -> Mol:
@@ -403,6 +411,7 @@ def embed_conformers(
         'KDG': rdDistGeom.KDG()
     }
     mol = Mol(mol)
+    encoding = mol.encoding or mol.canonical_smiles
     embedding_method = available_embedding_methods.get(method)
     if embedding_method is None:
         raise ValueError(
@@ -413,8 +422,14 @@ def embed_conformers(
     for key, value in kwargs.items():
         setattr(embedding_method, key, value)
-    if random_seed is not None:
-        embedding_method.randomSeed = random_seed
+    if not timeout:
+        timeout = 0 # No timeout
+    if not random_seed:
+        random_seed = -1 # No random seed
+    embedding_method.randomSeed = random_seed
+    embedding_method.timeout = timeout
     success = rdDistGeom.EmbedMultipleConfs(
         mol, numConfs=num_conformers, params=embedding_method
@@ -422,19 +437,18 @@ def embed_conformers(
     num_successes = len(success)
     if num_successes < num_conformers:
         warnings.warn(
-            f'Could only embed {num_successes} out of {num_conformers} conformer(s) '
-            f'for {mol.canonical_smiles!r} using {method}. Embedding the remaining '
-            f'{num_conformers - num_successes} conformer(s) using different embedding methods.',
-            stacklevel=2
+            f'Could only embed {num_successes} out of {num_conformers} conformer(s) for '
+            f'{encoding!r} using the specified method ({method!r}) and parameters. Attempting '
+            f'to embed the remaining {num_conformers-num_successes} using fallback methods.',
         )
-        max_attempts = (20 * mol.num_atoms) # increasing it from 10xN to 20xN
+        max_iters = 20 * mol.num_atoms # Doubling the number of iterations
         for fallback_method in [method, 'ETDG', 'KDG']:
             fallback_embedding_method = available_embedding_methods[fallback_method]
             fallback_embedding_method.useRandomCoords = True
-            fallback_embedding_method.maxAttempts = max_attempts
+            fallback_embedding_method.maxIterations = int(max_iters)
             fallback_embedding_method.clearConfs = False
-            if random_seed is not None:
-                fallback_embedding_method.randomSeed = random_seed
+            fallback_embedding_method.timeout = int(timeout)
+            fallback_embedding_method.randomSeed = int(random_seed)
             success = rdDistGeom.EmbedMultipleConfs(
                 mol, numConfs=(num_conformers - num_successes), params=fallback_embedding_method
             )
@@ -443,7 +457,7 @@ def embed_conformers(
                 break
         else:
             raise RuntimeError(
-                f'Could not embed {num_conformers} conformer(s) for {mol.canonical_smiles!r}. '
+                f'Could not embed {num_conformers} conformer(s) for {encoding!r}. '
             )
     return mol
@@ -463,14 +477,14 @@ def optimize_conformers(
             f'Could not find `method` {method!r}. Specify either of: '
             '`UFF`, `MMFF`, `MMFF94` or `MMFF94s`.'
         )
-    mol = Mol(mol)
+    mol_optimized = Mol(mol)
     try:
         if method.startswith('MMFF'):
             variant = method
             if variant == 'MMFF':
                 variant += '94'
             _, _ = _mmff_optimize_conformers(
-                mol,
+                mol_optimized,
                 num_threads=num_threads,
                 max_iter=max_iter,
                 variant=variant,
@@ -478,7 +492,7 @@ def optimize_conformers(
             )
         else:
             _, _ = _uff_optimize_conformers(
-                mol,
+                mol_optimized,
                 num_threads=num_threads,
                 max_iter=max_iter,
                 vdw_threshold=vdw_threshold,
@@ -486,11 +500,10 @@ def optimize_conformers(
             )
     except RuntimeError as e:
         warnings.warn(
-            f'{method} force field minimization raised {e}. '
-            '\nProceeding without force field minimization.',
-            stacklevel=2
+            f'{method} force field minimization did not succeed. Proceeding without it.',
         )
-    return mol
+        return Mol(mol)
+    return mol_optimized
 def prune_conformers(
     mol: Mol,
@@ -502,7 +515,6 @@ def prune_conformers(
         warnings.warn(
             'Molecule has no conformers. To embed conformers, invoke the `embed` method, '
             'and optionally followed by `minimize()` to perform force field minimization.',
-            stacklevel=2
         )
         return mol

{molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/featurizers.py RENAMED Viewed

@@ -2,8 +2,7 @@ import keras
 import json
 import abc
 import typing
-import copy
-import warnings
+import os
 import numpy as np
 import pandas as pd
 import tensorflow as tf
@@ -13,6 +12,7 @@ from pathlib import Path
 from molcraft import tensors
 from molcraft import features
+from molcraft import records
 from molcraft import chem
 from molcraft import descriptors
@@ -41,6 +41,17 @@ class GraphFeaturizer(abc.ABC):
     def load(filepath: str | Path, *args, **kwargs) -> 'GraphFeaturizer':
         return load_featurizer(filepath, *args, **kwargs)
+    def write_records(self, inputs: str | chem.Mol | tuple, path: str | Path, **kwargs) -> None:
+         records.write(
+            inputs, featurizer=self, path=path, **kwargs
+         )
+    @staticmethod
+    def read_records(path: str | Path, **kwargs) -> tf.data.Dataset:
+        return records.read(
+            path=path, **kwargs
+        )
     def __call__(
         self,
         inputs: str | chem.Mol | tuple | typing.Iterable,

{molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/records.py RENAMED Viewed

@@ -3,20 +3,24 @@ import math
 import glob
 import time
 import typing
+import warnings
 import tensorflow as tf
 import numpy as np
 import pandas as pd
 import multiprocessing as mp
 from molcraft import tensors
-from molcraft import featurizers
+if typing.TYPE_CHECKING:
+    from molcraft import featurizers
 def write(
     inputs: list[str | tuple],
-    featurizer: featurizers.GraphFeaturizer,
+    featurizer: 'featurizers.GraphFeaturizer',
     path: str,
-    overwrite: bool = True,
+    exist_ok: bool = False,
+    overwrite: bool = False,
     num_files: typing.Optional[int] = None,
     num_processes: typing.Optional[int] = None,
     multiprocessing: bool = False,
@@ -24,6 +28,8 @@ def write(
 ) -> None:
     if os.path.isdir(path):
+        if not exist_ok:
+            raise FileExistsError(f'Records already exist: {path}')
         if not overwrite:
             return
         else:
@@ -60,9 +66,11 @@ def write(
             chunk_sizes[i % num_files] += 1
         input_chunks = []
+        start_indices = []
         current_index = 0
         for size in chunk_sizes:
             input_chunks.append(inputs[current_index: current_index + size])
+            start_indices.append(current_index)
             current_index += size
         assert current_index == num_examples
@@ -73,13 +81,13 @@ def write(
         ]
         if not multiprocessing:
-            for path, input_chunk in zip(paths, input_chunks):
-                _write_tfrecord(input_chunk, path, featurizer)
+            for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
+                _write_tfrecord(input_chunk, path, featurizer, start_index)
             return
         processes = []
-        for path, input_chunk in zip(paths, input_chunks):
+        for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
             while len(processes) >= num_processes:
                 for process in processes:
@@ -91,7 +99,7 @@ def write(
             process = mp.Process(
                 target=_write_tfrecord,
-                args=(input_chunk, path, featurizer)
+                args=(input_chunk, path, featurizer, start_index)
             )
             processes.append(process)
             process.start()
@@ -134,9 +142,10 @@ def load_spec(path: str) -> tensors.GraphTensor.Spec:
     return spec
 def _write_tfrecord(
-    inputs,
+    inputs: list[str, tuple],
     path: str,
-    featurizer: featurizers.GraphFeaturizer,
+    featurizer: 'featurizers.GraphFeaturizer',
+    start_index: int,
 ) -> None:
     def _write_example(tensor):
@@ -147,12 +156,17 @@ def _write_tfrecord(
         writer.write(serialized_feature)
     with tf.io.TFRecordWriter(path) as writer:
-        for x in inputs:
+        for i, x in enumerate(inputs):
             if isinstance(x, (list, np.ndarray)):
                 x = tuple(x)
-            tensor = featurizer(x)
-            if tensor is not None:
+            try:
+                tensor = featurizer(x)
                 _write_example(tensor)
+            except Exception as e:
+                warnings.warn(
+                    f"Could not write record for index {i + start_index}, proceeding without it."
+                    f"Exception raised:\n{e}"
+                )
 def _serialize_example(
     feature: dict[str, tf.train.Feature]

{molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: molcraft
-Version: 0.1.0a20
+Version: 0.1.0a22
 Summary: Graph Neural Networks for Molecular Machine Learning
 Author-email: Alexander Kensert <alexander.kensert@gmail.com>
 License: MIT License