molcraft 0.1.0a21__tar.gz → 0.1.0a22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of molcraft might be problematic. Click here for more details.

Files changed (32) hide show
  1. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/PKG-INFO +1 -1
  2. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/__init__.py +2 -4
  3. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/chem.py +26 -12
  4. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/featurizers.py +13 -2
  5. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/records.py +26 -12
  6. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft.egg-info/PKG-INFO +1 -1
  7. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/LICENSE +0 -0
  8. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/README.md +0 -0
  9. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/applications/__init__.py +0 -0
  10. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/applications/chromatography.py +0 -0
  11. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/applications/proteomics.py +0 -0
  12. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/callbacks.py +0 -0
  13. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/datasets.py +0 -0
  14. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/descriptors.py +0 -0
  15. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/features.py +0 -0
  16. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/layers.py +0 -0
  17. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/losses.py +0 -0
  18. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/models.py +0 -0
  19. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/ops.py +0 -0
  20. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft/tensors.py +0 -0
  21. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft.egg-info/SOURCES.txt +0 -0
  22. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft.egg-info/dependency_links.txt +0 -0
  23. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft.egg-info/requires.txt +0 -0
  24. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/molcraft.egg-info/top_level.txt +0 -0
  25. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/pyproject.toml +0 -0
  26. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/setup.cfg +0 -0
  27. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/tests/test_chem.py +0 -0
  28. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/tests/test_featurizers.py +0 -0
  29. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/tests/test_layers.py +0 -0
  30. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/tests/test_losses.py +0 -0
  31. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/tests/test_models.py +0 -0
  32. {molcraft-0.1.0a21 → molcraft-0.1.0a22}/tests/test_tensors.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: molcraft
3
- Version: 0.1.0a21
3
+ Version: 0.1.0a22
4
4
  Summary: Graph Neural Networks for Molecular Machine Learning
5
5
  Author-email: Alexander Kensert <alexander.kensert@gmail.com>
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
- __version__ = '0.1.0a21'
1
+ __version__ = '0.1.0a22'
2
2
 
3
3
  import os
4
4
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -14,6 +14,4 @@ from molcraft import records
14
14
  from molcraft import tensors
15
15
  from molcraft import callbacks
16
16
  from molcraft import datasets
17
- from molcraft import losses
18
-
19
- from molcraft.applications import proteomics
17
+ from molcraft import losses
@@ -22,12 +22,19 @@ class Mol(Chem.Mol):
22
22
  if explicit_hs:
23
23
  rdkit_mol = Chem.AddHs(rdkit_mol)
24
24
  rdkit_mol.__class__ = cls
25
+ setattr(rdkit_mol, '_encoding', encoding)
25
26
  return rdkit_mol
26
27
 
27
28
  @property
28
29
  def canonical_smiles(self) -> str:
29
30
  return Chem.MolToSmiles(self, canonical=True)
30
31
 
32
+ @property
33
+ def encoding(self):
34
+ if hasattr(self, '_encoding'):
35
+ return self._encoding
36
+ return None
37
+
31
38
  @property
32
39
  def bonds(self) -> list['Bond']:
33
40
  if not hasattr(self, '_bonds'):
@@ -391,6 +398,7 @@ def embed_conformers(
391
398
  mol: Mol,
392
399
  num_conformers: int,
393
400
  method: str = 'ETKDGv3',
401
+ timeout: int | None = None,
394
402
  random_seed: int | None = None,
395
403
  **kwargs
396
404
  ) -> Mol:
@@ -403,6 +411,7 @@ def embed_conformers(
403
411
  'KDG': rdDistGeom.KDG()
404
412
  }
405
413
  mol = Mol(mol)
414
+ encoding = mol.encoding or mol.canonical_smiles
406
415
  embedding_method = available_embedding_methods.get(method)
407
416
  if embedding_method is None:
408
417
  raise ValueError(
@@ -413,8 +422,14 @@ def embed_conformers(
413
422
  for key, value in kwargs.items():
414
423
  setattr(embedding_method, key, value)
415
424
 
416
- if random_seed is not None:
417
- embedding_method.randomSeed = random_seed
425
+ if not timeout:
426
+ timeout = 0 # No timeout
427
+
428
+ if not random_seed:
429
+ random_seed = -1 # No random seed
430
+
431
+ embedding_method.randomSeed = random_seed
432
+ embedding_method.timeout = timeout
418
433
 
419
434
  success = rdDistGeom.EmbedMultipleConfs(
420
435
  mol, numConfs=num_conformers, params=embedding_method
@@ -422,17 +437,18 @@ def embed_conformers(
422
437
  num_successes = len(success)
423
438
  if num_successes < num_conformers:
424
439
  warnings.warn(
425
- f'Could only embed {num_successes} out of {num_conformers} conformer(s) '
426
- f'for {mol.canonical_smiles!r} using {method}. Embedding the remaining '
427
- f'{num_conformers - num_successes} conformer(s) using different embedding methods.',
428
- stacklevel=2
440
+ f'Could only embed {num_successes} out of {num_conformers} conformer(s) for '
441
+ f'{encoding!r} using the specified method ({method!r}) and parameters. Attempting '
442
+ f'to embed the remaining {num_conformers-num_successes} using fallback methods.',
429
443
  )
444
+ max_iters = 20 * mol.num_atoms # Doubling the number of iterations
430
445
  for fallback_method in [method, 'ETDG', 'KDG']:
431
446
  fallback_embedding_method = available_embedding_methods[fallback_method]
432
447
  fallback_embedding_method.useRandomCoords = True
448
+ fallback_embedding_method.maxIterations = int(max_iters)
433
449
  fallback_embedding_method.clearConfs = False
434
- if random_seed is not None:
435
- fallback_embedding_method.randomSeed = random_seed
450
+ fallback_embedding_method.timeout = int(timeout)
451
+ fallback_embedding_method.randomSeed = int(random_seed)
436
452
  success = rdDistGeom.EmbedMultipleConfs(
437
453
  mol, numConfs=(num_conformers - num_successes), params=fallback_embedding_method
438
454
  )
@@ -441,7 +457,7 @@ def embed_conformers(
441
457
  break
442
458
  else:
443
459
  raise RuntimeError(
444
- f'Could not embed {num_conformers} conformer(s) for {mol.canonical_smiles!r}. '
460
+ f'Could not embed {num_conformers} conformer(s) for {encoding!r}. '
445
461
  )
446
462
  return mol
447
463
 
@@ -485,9 +501,8 @@ def optimize_conformers(
485
501
  except RuntimeError as e:
486
502
  warnings.warn(
487
503
  f'{method} force field minimization did not succeed. Proceeding without it.',
488
- stacklevel=2
489
504
  )
490
- return mol
505
+ return Mol(mol)
491
506
  return mol_optimized
492
507
 
493
508
  def prune_conformers(
@@ -500,7 +515,6 @@ def prune_conformers(
500
515
  warnings.warn(
501
516
  'Molecule has no conformers. To embed conformers, invoke the `embed` method, '
502
517
  'and optionally followed by `minimize()` to perform force field minimization.',
503
- stacklevel=2
504
518
  )
505
519
  return mol
506
520
 
@@ -2,8 +2,7 @@ import keras
2
2
  import json
3
3
  import abc
4
4
  import typing
5
- import copy
6
- import warnings
5
+ import os
7
6
  import numpy as np
8
7
  import pandas as pd
9
8
  import tensorflow as tf
@@ -13,6 +12,7 @@ from pathlib import Path
13
12
 
14
13
  from molcraft import tensors
15
14
  from molcraft import features
15
+ from molcraft import records
16
16
  from molcraft import chem
17
17
  from molcraft import descriptors
18
18
 
@@ -41,6 +41,17 @@ class GraphFeaturizer(abc.ABC):
41
41
  def load(filepath: str | Path, *args, **kwargs) -> 'GraphFeaturizer':
42
42
  return load_featurizer(filepath, *args, **kwargs)
43
43
 
44
+ def write_records(self, inputs: str | chem.Mol | tuple, path: str | Path, **kwargs) -> None:
45
+ records.write(
46
+ inputs, featurizer=self, path=path, **kwargs
47
+ )
48
+
49
+ @staticmethod
50
+ def read_records(path: str | Path, **kwargs) -> tf.data.Dataset:
51
+ return records.read(
52
+ path=path, **kwargs
53
+ )
54
+
44
55
  def __call__(
45
56
  self,
46
57
  inputs: str | chem.Mol | tuple | typing.Iterable,
@@ -3,20 +3,24 @@ import math
3
3
  import glob
4
4
  import time
5
5
  import typing
6
+ import warnings
6
7
  import tensorflow as tf
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
  import multiprocessing as mp
10
11
 
11
12
  from molcraft import tensors
12
- from molcraft import featurizers
13
+
14
+ if typing.TYPE_CHECKING:
15
+ from molcraft import featurizers
13
16
 
14
17
 
15
18
  def write(
16
19
  inputs: list[str | tuple],
17
- featurizer: featurizers.GraphFeaturizer,
20
+ featurizer: 'featurizers.GraphFeaturizer',
18
21
  path: str,
19
- overwrite: bool = True,
22
+ exist_ok: bool = False,
23
+ overwrite: bool = False,
20
24
  num_files: typing.Optional[int] = None,
21
25
  num_processes: typing.Optional[int] = None,
22
26
  multiprocessing: bool = False,
@@ -24,6 +28,8 @@ def write(
24
28
  ) -> None:
25
29
 
26
30
  if os.path.isdir(path):
31
+ if not exist_ok:
32
+ raise FileExistsError(f'Records already exist: {path}')
27
33
  if not overwrite:
28
34
  return
29
35
  else:
@@ -60,9 +66,11 @@ def write(
60
66
  chunk_sizes[i % num_files] += 1
61
67
 
62
68
  input_chunks = []
69
+ start_indices = []
63
70
  current_index = 0
64
71
  for size in chunk_sizes:
65
72
  input_chunks.append(inputs[current_index: current_index + size])
73
+ start_indices.append(current_index)
66
74
  current_index += size
67
75
 
68
76
  assert current_index == num_examples
@@ -73,13 +81,13 @@ def write(
73
81
  ]
74
82
 
75
83
  if not multiprocessing:
76
- for path, input_chunk in zip(paths, input_chunks):
77
- _write_tfrecord(input_chunk, path, featurizer)
84
+ for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
85
+ _write_tfrecord(input_chunk, path, featurizer, start_index)
78
86
  return
79
87
 
80
88
  processes = []
81
89
 
82
- for path, input_chunk in zip(paths, input_chunks):
90
+ for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
83
91
 
84
92
  while len(processes) >= num_processes:
85
93
  for process in processes:
@@ -91,7 +99,7 @@ def write(
91
99
 
92
100
  process = mp.Process(
93
101
  target=_write_tfrecord,
94
- args=(input_chunk, path, featurizer)
102
+ args=(input_chunk, path, featurizer, start_index)
95
103
  )
96
104
  processes.append(process)
97
105
  process.start()
@@ -134,9 +142,10 @@ def load_spec(path: str) -> tensors.GraphTensor.Spec:
134
142
  return spec
135
143
 
136
144
  def _write_tfrecord(
137
- inputs,
145
+ inputs: list[str, tuple],
138
146
  path: str,
139
- featurizer: featurizers.GraphFeaturizer,
147
+ featurizer: 'featurizers.GraphFeaturizer',
148
+ start_index: int,
140
149
  ) -> None:
141
150
 
142
151
  def _write_example(tensor):
@@ -147,12 +156,17 @@ def _write_tfrecord(
147
156
  writer.write(serialized_feature)
148
157
 
149
158
  with tf.io.TFRecordWriter(path) as writer:
150
- for x in inputs:
159
+ for i, x in enumerate(inputs):
151
160
  if isinstance(x, (list, np.ndarray)):
152
161
  x = tuple(x)
153
- tensor = featurizer(x)
154
- if tensor is not None:
162
+ try:
163
+ tensor = featurizer(x)
155
164
  _write_example(tensor)
165
+ except Exception as e:
166
+ warnings.warn(
167
+ f"Could not write record for index {i + start_index}, proceeding without it."
168
+ f"Exception raised:\n{e}"
169
+ )
156
170
 
157
171
  def _serialize_example(
158
172
  feature: dict[str, tf.train.Feature]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: molcraft
3
- Version: 0.1.0a21
3
+ Version: 0.1.0a22
4
4
  Summary: Graph Neural Networks for Molecular Machine Learning
5
5
  Author-email: Alexander Kensert <alexander.kensert@gmail.com>
6
6
  License: MIT License
File without changes
File without changes
File without changes
File without changes
File without changes