molcraft 0.1.0a20__py3-none-any.whl → 0.1.0a22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of molcraft might be problematic. Click here for more details.

molcraft/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.1.0a20'
1
+ __version__ = '0.1.0a22'
2
2
 
3
3
  import os
4
4
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -14,6 +14,4 @@ from molcraft import records
14
14
  from molcraft import tensors
15
15
  from molcraft import callbacks
16
16
  from molcraft import datasets
17
- from molcraft import losses
18
-
19
- from molcraft.applications import proteomics
17
+ from molcraft import losses
molcraft/chem.py CHANGED
@@ -22,12 +22,19 @@ class Mol(Chem.Mol):
22
22
  if explicit_hs:
23
23
  rdkit_mol = Chem.AddHs(rdkit_mol)
24
24
  rdkit_mol.__class__ = cls
25
+ setattr(rdkit_mol, '_encoding', encoding)
25
26
  return rdkit_mol
26
27
 
27
28
  @property
28
29
  def canonical_smiles(self) -> str:
29
30
  return Chem.MolToSmiles(self, canonical=True)
30
31
 
32
+ @property
33
+ def encoding(self):
34
+ if hasattr(self, '_encoding'):
35
+ return self._encoding
36
+ return None
37
+
31
38
  @property
32
39
  def bonds(self) -> list['Bond']:
33
40
  if not hasattr(self, '_bonds'):
@@ -391,6 +398,7 @@ def embed_conformers(
391
398
  mol: Mol,
392
399
  num_conformers: int,
393
400
  method: str = 'ETKDGv3',
401
+ timeout: int | None = None,
394
402
  random_seed: int | None = None,
395
403
  **kwargs
396
404
  ) -> Mol:
@@ -403,6 +411,7 @@ def embed_conformers(
403
411
  'KDG': rdDistGeom.KDG()
404
412
  }
405
413
  mol = Mol(mol)
414
+ encoding = mol.encoding or mol.canonical_smiles
406
415
  embedding_method = available_embedding_methods.get(method)
407
416
  if embedding_method is None:
408
417
  raise ValueError(
@@ -413,8 +422,14 @@ def embed_conformers(
413
422
  for key, value in kwargs.items():
414
423
  setattr(embedding_method, key, value)
415
424
 
416
- if random_seed is not None:
417
- embedding_method.randomSeed = random_seed
425
+ if not timeout:
426
+ timeout = 0 # No timeout
427
+
428
+ if not random_seed:
429
+ random_seed = -1 # No random seed
430
+
431
+ embedding_method.randomSeed = random_seed
432
+ embedding_method.timeout = timeout
418
433
 
419
434
  success = rdDistGeom.EmbedMultipleConfs(
420
435
  mol, numConfs=num_conformers, params=embedding_method
@@ -422,19 +437,18 @@ def embed_conformers(
422
437
  num_successes = len(success)
423
438
  if num_successes < num_conformers:
424
439
  warnings.warn(
425
- f'Could only embed {num_successes} out of {num_conformers} conformer(s) '
426
- f'for {mol.canonical_smiles!r} using {method}. Embedding the remaining '
427
- f'{num_conformers - num_successes} conformer(s) using different embedding methods.',
428
- stacklevel=2
440
+ f'Could only embed {num_successes} out of {num_conformers} conformer(s) for '
441
+ f'{encoding!r} using the specified method ({method!r}) and parameters. Attempting '
442
+ f'to embed the remaining {num_conformers-num_successes} using fallback methods.',
429
443
  )
430
- max_attempts = (20 * mol.num_atoms) # increasing it from 10xN to 20xN
444
+ max_iters = 20 * mol.num_atoms # Doubling the number of iterations
431
445
  for fallback_method in [method, 'ETDG', 'KDG']:
432
446
  fallback_embedding_method = available_embedding_methods[fallback_method]
433
447
  fallback_embedding_method.useRandomCoords = True
434
- fallback_embedding_method.maxAttempts = max_attempts
448
+ fallback_embedding_method.maxIterations = int(max_iters)
435
449
  fallback_embedding_method.clearConfs = False
436
- if random_seed is not None:
437
- fallback_embedding_method.randomSeed = random_seed
450
+ fallback_embedding_method.timeout = int(timeout)
451
+ fallback_embedding_method.randomSeed = int(random_seed)
438
452
  success = rdDistGeom.EmbedMultipleConfs(
439
453
  mol, numConfs=(num_conformers - num_successes), params=fallback_embedding_method
440
454
  )
@@ -443,7 +457,7 @@ def embed_conformers(
443
457
  break
444
458
  else:
445
459
  raise RuntimeError(
446
- f'Could not embed {num_conformers} conformer(s) for {mol.canonical_smiles!r}. '
460
+ f'Could not embed {num_conformers} conformer(s) for {encoding!r}. '
447
461
  )
448
462
  return mol
449
463
 
@@ -463,14 +477,14 @@ def optimize_conformers(
463
477
  f'Could not find `method` {method!r}. Specify either of: '
464
478
  '`UFF`, `MMFF`, `MMFF94` or `MMFF94s`.'
465
479
  )
466
- mol = Mol(mol)
480
+ mol_optimized = Mol(mol)
467
481
  try:
468
482
  if method.startswith('MMFF'):
469
483
  variant = method
470
484
  if variant == 'MMFF':
471
485
  variant += '94'
472
486
  _, _ = _mmff_optimize_conformers(
473
- mol,
487
+ mol_optimized,
474
488
  num_threads=num_threads,
475
489
  max_iter=max_iter,
476
490
  variant=variant,
@@ -478,7 +492,7 @@ def optimize_conformers(
478
492
  )
479
493
  else:
480
494
  _, _ = _uff_optimize_conformers(
481
- mol,
495
+ mol_optimized,
482
496
  num_threads=num_threads,
483
497
  max_iter=max_iter,
484
498
  vdw_threshold=vdw_threshold,
@@ -486,11 +500,10 @@ def optimize_conformers(
486
500
  )
487
501
  except RuntimeError as e:
488
502
  warnings.warn(
489
- f'{method} force field minimization raised {e}. '
490
- '\nProceeding without force field minimization.',
491
- stacklevel=2
503
+ f'{method} force field minimization did not succeed. Proceeding without it.',
492
504
  )
493
- return mol
505
+ return Mol(mol)
506
+ return mol_optimized
494
507
 
495
508
  def prune_conformers(
496
509
  mol: Mol,
@@ -502,7 +515,6 @@ def prune_conformers(
502
515
  warnings.warn(
503
516
  'Molecule has no conformers. To embed conformers, invoke the `embed` method, '
504
517
  'and optionally followed by `minimize()` to perform force field minimization.',
505
- stacklevel=2
506
518
  )
507
519
  return mol
508
520
 
molcraft/featurizers.py CHANGED
@@ -2,8 +2,7 @@ import keras
2
2
  import json
3
3
  import abc
4
4
  import typing
5
- import copy
6
- import warnings
5
+ import os
7
6
  import numpy as np
8
7
  import pandas as pd
9
8
  import tensorflow as tf
@@ -13,6 +12,7 @@ from pathlib import Path
13
12
 
14
13
  from molcraft import tensors
15
14
  from molcraft import features
15
+ from molcraft import records
16
16
  from molcraft import chem
17
17
  from molcraft import descriptors
18
18
 
@@ -41,6 +41,17 @@ class GraphFeaturizer(abc.ABC):
41
41
  def load(filepath: str | Path, *args, **kwargs) -> 'GraphFeaturizer':
42
42
  return load_featurizer(filepath, *args, **kwargs)
43
43
 
44
+ def write_records(self, inputs: str | chem.Mol | tuple, path: str | Path, **kwargs) -> None:
45
+ records.write(
46
+ inputs, featurizer=self, path=path, **kwargs
47
+ )
48
+
49
+ @staticmethod
50
+ def read_records(path: str | Path, **kwargs) -> tf.data.Dataset:
51
+ return records.read(
52
+ path=path, **kwargs
53
+ )
54
+
44
55
  def __call__(
45
56
  self,
46
57
  inputs: str | chem.Mol | tuple | typing.Iterable,
molcraft/records.py CHANGED
@@ -3,20 +3,24 @@ import math
3
3
  import glob
4
4
  import time
5
5
  import typing
6
+ import warnings
6
7
  import tensorflow as tf
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import multiprocessing as mp
10
11
 
11
12
  from molcraft import tensors
12
- from molcraft import featurizers
13
+
14
+ if typing.TYPE_CHECKING:
15
+ from molcraft import featurizers
13
16
 
14
17
 
15
18
  def write(
16
19
  inputs: list[str | tuple],
17
- featurizer: featurizers.GraphFeaturizer,
20
+ featurizer: 'featurizers.GraphFeaturizer',
18
21
  path: str,
19
- overwrite: bool = True,
22
+ exist_ok: bool = False,
23
+ overwrite: bool = False,
20
24
  num_files: typing.Optional[int] = None,
21
25
  num_processes: typing.Optional[int] = None,
22
26
  multiprocessing: bool = False,
@@ -24,6 +28,8 @@ def write(
24
28
  ) -> None:
25
29
 
26
30
  if os.path.isdir(path):
31
+ if not exist_ok:
32
+ raise FileExistsError(f'Records already exist: {path}')
27
33
  if not overwrite:
28
34
  return
29
35
  else:
@@ -60,9 +66,11 @@ def write(
60
66
  chunk_sizes[i % num_files] += 1
61
67
 
62
68
  input_chunks = []
69
+ start_indices = []
63
70
  current_index = 0
64
71
  for size in chunk_sizes:
65
72
  input_chunks.append(inputs[current_index: current_index + size])
73
+ start_indices.append(current_index)
66
74
  current_index += size
67
75
 
68
76
  assert current_index == num_examples
@@ -73,13 +81,13 @@ def write(
73
81
  ]
74
82
 
75
83
  if not multiprocessing:
76
- for path, input_chunk in zip(paths, input_chunks):
77
- _write_tfrecord(input_chunk, path, featurizer)
84
+ for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
85
+ _write_tfrecord(input_chunk, path, featurizer, start_index)
78
86
  return
79
87
 
80
88
  processes = []
81
89
 
82
- for path, input_chunk in zip(paths, input_chunks):
90
+ for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
83
91
 
84
92
  while len(processes) >= num_processes:
85
93
  for process in processes:
@@ -91,7 +99,7 @@ def write(
91
99
 
92
100
  process = mp.Process(
93
101
  target=_write_tfrecord,
94
- args=(input_chunk, path, featurizer)
102
+ args=(input_chunk, path, featurizer, start_index)
95
103
  )
96
104
  processes.append(process)
97
105
  process.start()
@@ -134,9 +142,10 @@ def load_spec(path: str) -> tensors.GraphTensor.Spec:
134
142
  return spec
135
143
 
136
144
  def _write_tfrecord(
137
- inputs,
145
+ inputs: list[str, tuple],
138
146
  path: str,
139
- featurizer: featurizers.GraphFeaturizer,
147
+ featurizer: 'featurizers.GraphFeaturizer',
148
+ start_index: int,
140
149
  ) -> None:
141
150
 
142
151
  def _write_example(tensor):
@@ -147,12 +156,17 @@ def _write_tfrecord(
147
156
  writer.write(serialized_feature)
148
157
 
149
158
  with tf.io.TFRecordWriter(path) as writer:
150
- for x in inputs:
159
+ for i, x in enumerate(inputs):
151
160
  if isinstance(x, (list, np.ndarray)):
152
161
  x = tuple(x)
153
- tensor = featurizer(x)
154
- if tensor is not None:
162
+ try:
163
+ tensor = featurizer(x)
155
164
  _write_example(tensor)
165
+ except Exception as e:
166
+ warnings.warn(
167
+ f"Could not write record for index {i + start_index}, proceeding without it."
168
+ f"Exception raised:\n{e}"
169
+ )
156
170
 
157
171
  def _serialize_example(
158
172
  feature: dict[str, tf.train.Feature]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: molcraft
3
- Version: 0.1.0a20
3
+ Version: 0.1.0a22
4
4
  Summary: Graph Neural Networks for Molecular Machine Learning
5
5
  Author-email: Alexander Kensert <alexander.kensert@gmail.com>
6
6
  License: MIT License
@@ -1,21 +1,21 @@
1
- molcraft/__init__.py,sha256=Wyi1uJsnW3jzksFRcgtcyHJXNHczxn5ClKBLUUC2-rA,477
1
+ molcraft/__init__.py,sha256=O88EmicQAD8oz9oFMXk_IzFChQEbbU-BCs3IE-c9Dkk,431
2
2
  molcraft/callbacks.py,sha256=x5HnkZhqcFRrW6xdApt_jZ4X08A-0fxcnFKfdmRKa0c,3571
3
- molcraft/chem.py,sha256=e56qBDuqh8rq_4-UMyp6LCQNxxSx8hZ7gzuz-87DHgw,21652
3
+ molcraft/chem.py,sha256=ynrEpWZL2D370p7CqH2kE1KhBByq7IiuQbUNoKQt96I,22028
4
4
  molcraft/datasets.py,sha256=Nd2lw5USUZE52vvAiNr-q-n03Y3--NlZlK0NzqHgp-E,4145
5
5
  molcraft/descriptors.py,sha256=Cl3KnBPsTST7XLgRLktkX5LwY9MV0P_lUlrt8iPV5no,3508
6
6
  molcraft/features.py,sha256=s0WeV8eZcDEypPgC1m37f4s9QkvWIlVgn-L43Cdsa14,13525
7
- molcraft/featurizers.py,sha256=b9b8XUGqMku3EwwQcjeE2rsaMQ0T_XovupUXjX0Awug,17724
7
+ molcraft/featurizers.py,sha256=1yBz5-JA7IhNm0dGivvVm1nJ5QGck8VQXtwHPWFbTuQ,18091
8
8
  molcraft/layers.py,sha256=H7XZru4XGJA6gbRO9V1BsGqh1mIrMdhzNCKS5o6oNok,64544
9
9
  molcraft/losses.py,sha256=qnS2yC5g-O3n_zVea9MR6TNiFraW2yqRgePOisoUP4A,1065
10
10
  molcraft/models.py,sha256=2Pc1htT9fCukGd8ZxrvE0rzEHsPBm0pluHw4FZXaUE4,21963
11
11
  molcraft/ops.py,sha256=bQbdFDt9waxVCzF5-dkTB6vlpj9eoSt8I4Qg7ZGXbsU,6178
12
- molcraft/records.py,sha256=0j4EWP55sfnkoQIH5trdaAIevPfVbAtPLrygTRmLyFw,5686
12
+ molcraft/records.py,sha256=sopYElKWC3A9QE5I8_957v3faLb2Wt5WILHZv_FLLds,6283
13
13
  molcraft/tensors.py,sha256=vk-W8zZu-re1g18YevDEEoVQRxT4AdIiMdI-4EvtJI4,22477
14
14
  molcraft/applications/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  molcraft/applications/chromatography.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  molcraft/applications/proteomics.py,sha256=BL3EtW-q-0j79pLYO7npC67mA2ApRhH-XI4rOaP8_wc,8407
17
- molcraft-0.1.0a20.dist-info/licenses/LICENSE,sha256=sbVeqlrtZ0V63uYhZGL5dCxUm8rBAOqe2avyA1zIQNk,1074
18
- molcraft-0.1.0a20.dist-info/METADATA,sha256=YntROeUGzofMiRKkfjtpIWFXHzJ6Jo5FG1t1dNraUTE,3930
19
- molcraft-0.1.0a20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
- molcraft-0.1.0a20.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
21
- molcraft-0.1.0a20.dist-info/RECORD,,
17
+ molcraft-0.1.0a22.dist-info/licenses/LICENSE,sha256=sbVeqlrtZ0V63uYhZGL5dCxUm8rBAOqe2avyA1zIQNk,1074
18
+ molcraft-0.1.0a22.dist-info/METADATA,sha256=1OHx3-Q94fFEi21l0p3bnMjU-Q0EHaZLm4PU1A6QbkU,3930
19
+ molcraft-0.1.0a22.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
+ molcraft-0.1.0a22.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
21
+ molcraft-0.1.0a22.dist-info/RECORD,,