molcraft 0.1.0a21__py3-none-any.whl → 0.1.0a22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of molcraft might be problematic. Click here for more details.
- molcraft/__init__.py +2 -4
- molcraft/chem.py +26 -12
- molcraft/featurizers.py +13 -2
- molcraft/records.py +26 -12
- {molcraft-0.1.0a21.dist-info → molcraft-0.1.0a22.dist-info}/METADATA +1 -1
- {molcraft-0.1.0a21.dist-info → molcraft-0.1.0a22.dist-info}/RECORD +9 -9
- {molcraft-0.1.0a21.dist-info → molcraft-0.1.0a22.dist-info}/WHEEL +0 -0
- {molcraft-0.1.0a21.dist-info → molcraft-0.1.0a22.dist-info}/licenses/LICENSE +0 -0
- {molcraft-0.1.0a21.dist-info → molcraft-0.1.0a22.dist-info}/top_level.txt +0 -0
molcraft/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = '0.1.
|
|
1
|
+
__version__ = '0.1.0a22'
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
@@ -14,6 +14,4 @@ from molcraft import records
|
|
|
14
14
|
from molcraft import tensors
|
|
15
15
|
from molcraft import callbacks
|
|
16
16
|
from molcraft import datasets
|
|
17
|
-
from molcraft import losses
|
|
18
|
-
|
|
19
|
-
from molcraft.applications import proteomics
|
|
17
|
+
from molcraft import losses
|
molcraft/chem.py
CHANGED
|
@@ -22,12 +22,19 @@ class Mol(Chem.Mol):
|
|
|
22
22
|
if explicit_hs:
|
|
23
23
|
rdkit_mol = Chem.AddHs(rdkit_mol)
|
|
24
24
|
rdkit_mol.__class__ = cls
|
|
25
|
+
setattr(rdkit_mol, '_encoding', encoding)
|
|
25
26
|
return rdkit_mol
|
|
26
27
|
|
|
27
28
|
@property
|
|
28
29
|
def canonical_smiles(self) -> str:
|
|
29
30
|
return Chem.MolToSmiles(self, canonical=True)
|
|
30
31
|
|
|
32
|
+
@property
|
|
33
|
+
def encoding(self):
|
|
34
|
+
if hasattr(self, '_encoding'):
|
|
35
|
+
return self._encoding
|
|
36
|
+
return None
|
|
37
|
+
|
|
31
38
|
@property
|
|
32
39
|
def bonds(self) -> list['Bond']:
|
|
33
40
|
if not hasattr(self, '_bonds'):
|
|
@@ -391,6 +398,7 @@ def embed_conformers(
|
|
|
391
398
|
mol: Mol,
|
|
392
399
|
num_conformers: int,
|
|
393
400
|
method: str = 'ETKDGv3',
|
|
401
|
+
timeout: int | None = None,
|
|
394
402
|
random_seed: int | None = None,
|
|
395
403
|
**kwargs
|
|
396
404
|
) -> Mol:
|
|
@@ -403,6 +411,7 @@ def embed_conformers(
|
|
|
403
411
|
'KDG': rdDistGeom.KDG()
|
|
404
412
|
}
|
|
405
413
|
mol = Mol(mol)
|
|
414
|
+
encoding = mol.encoding or mol.canonical_smiles
|
|
406
415
|
embedding_method = available_embedding_methods.get(method)
|
|
407
416
|
if embedding_method is None:
|
|
408
417
|
raise ValueError(
|
|
@@ -413,8 +422,14 @@ def embed_conformers(
|
|
|
413
422
|
for key, value in kwargs.items():
|
|
414
423
|
setattr(embedding_method, key, value)
|
|
415
424
|
|
|
416
|
-
if
|
|
417
|
-
|
|
425
|
+
if not timeout:
|
|
426
|
+
timeout = 0 # No timeout
|
|
427
|
+
|
|
428
|
+
if not random_seed:
|
|
429
|
+
random_seed = -1 # No random seed
|
|
430
|
+
|
|
431
|
+
embedding_method.randomSeed = random_seed
|
|
432
|
+
embedding_method.timeout = timeout
|
|
418
433
|
|
|
419
434
|
success = rdDistGeom.EmbedMultipleConfs(
|
|
420
435
|
mol, numConfs=num_conformers, params=embedding_method
|
|
@@ -422,17 +437,18 @@ def embed_conformers(
|
|
|
422
437
|
num_successes = len(success)
|
|
423
438
|
if num_successes < num_conformers:
|
|
424
439
|
warnings.warn(
|
|
425
|
-
f'Could only embed {num_successes} out of {num_conformers} conformer(s) '
|
|
426
|
-
f'
|
|
427
|
-
f'{num_conformers
|
|
428
|
-
stacklevel=2
|
|
440
|
+
f'Could only embed {num_successes} out of {num_conformers} conformer(s) for '
|
|
441
|
+
f'{encoding!r} using the specified method ({method!r}) and parameters. Attempting '
|
|
442
|
+
f'to embed the remaining {num_conformers-num_successes} using fallback methods.',
|
|
429
443
|
)
|
|
444
|
+
max_iters = 20 * mol.num_atoms # Doubling the number of iterations
|
|
430
445
|
for fallback_method in [method, 'ETDG', 'KDG']:
|
|
431
446
|
fallback_embedding_method = available_embedding_methods[fallback_method]
|
|
432
447
|
fallback_embedding_method.useRandomCoords = True
|
|
448
|
+
fallback_embedding_method.maxIterations = int(max_iters)
|
|
433
449
|
fallback_embedding_method.clearConfs = False
|
|
434
|
-
|
|
435
|
-
|
|
450
|
+
fallback_embedding_method.timeout = int(timeout)
|
|
451
|
+
fallback_embedding_method.randomSeed = int(random_seed)
|
|
436
452
|
success = rdDistGeom.EmbedMultipleConfs(
|
|
437
453
|
mol, numConfs=(num_conformers - num_successes), params=fallback_embedding_method
|
|
438
454
|
)
|
|
@@ -441,7 +457,7 @@ def embed_conformers(
|
|
|
441
457
|
break
|
|
442
458
|
else:
|
|
443
459
|
raise RuntimeError(
|
|
444
|
-
f'Could not embed {num_conformers} conformer(s) for {
|
|
460
|
+
f'Could not embed {num_conformers} conformer(s) for {encoding!r}. '
|
|
445
461
|
)
|
|
446
462
|
return mol
|
|
447
463
|
|
|
@@ -485,9 +501,8 @@ def optimize_conformers(
|
|
|
485
501
|
except RuntimeError as e:
|
|
486
502
|
warnings.warn(
|
|
487
503
|
f'{method} force field minimization did not succeed. Proceeding without it.',
|
|
488
|
-
stacklevel=2
|
|
489
504
|
)
|
|
490
|
-
return mol
|
|
505
|
+
return Mol(mol)
|
|
491
506
|
return mol_optimized
|
|
492
507
|
|
|
493
508
|
def prune_conformers(
|
|
@@ -500,7 +515,6 @@ def prune_conformers(
|
|
|
500
515
|
warnings.warn(
|
|
501
516
|
'Molecule has no conformers. To embed conformers, invoke the `embed` method, '
|
|
502
517
|
'and optionally followed by `minimize()` to perform force field minimization.',
|
|
503
|
-
stacklevel=2
|
|
504
518
|
)
|
|
505
519
|
return mol
|
|
506
520
|
|
molcraft/featurizers.py
CHANGED
|
@@ -2,8 +2,7 @@ import keras
|
|
|
2
2
|
import json
|
|
3
3
|
import abc
|
|
4
4
|
import typing
|
|
5
|
-
import
|
|
6
|
-
import warnings
|
|
5
|
+
import os
|
|
7
6
|
import numpy as np
|
|
8
7
|
import pandas as pd
|
|
9
8
|
import tensorflow as tf
|
|
@@ -13,6 +12,7 @@ from pathlib import Path
|
|
|
13
12
|
|
|
14
13
|
from molcraft import tensors
|
|
15
14
|
from molcraft import features
|
|
15
|
+
from molcraft import records
|
|
16
16
|
from molcraft import chem
|
|
17
17
|
from molcraft import descriptors
|
|
18
18
|
|
|
@@ -41,6 +41,17 @@ class GraphFeaturizer(abc.ABC):
|
|
|
41
41
|
def load(filepath: str | Path, *args, **kwargs) -> 'GraphFeaturizer':
|
|
42
42
|
return load_featurizer(filepath, *args, **kwargs)
|
|
43
43
|
|
|
44
|
+
def write_records(self, inputs: str | chem.Mol | tuple, path: str | Path, **kwargs) -> None:
|
|
45
|
+
records.write(
|
|
46
|
+
inputs, featurizer=self, path=path, **kwargs
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def read_records(path: str | Path, **kwargs) -> tf.data.Dataset:
|
|
51
|
+
return records.read(
|
|
52
|
+
path=path, **kwargs
|
|
53
|
+
)
|
|
54
|
+
|
|
44
55
|
def __call__(
|
|
45
56
|
self,
|
|
46
57
|
inputs: str | chem.Mol | tuple | typing.Iterable,
|
molcraft/records.py
CHANGED
|
@@ -3,20 +3,24 @@ import math
|
|
|
3
3
|
import glob
|
|
4
4
|
import time
|
|
5
5
|
import typing
|
|
6
|
+
import warnings
|
|
6
7
|
import tensorflow as tf
|
|
7
8
|
import numpy as np
|
|
8
9
|
import pandas as pd
|
|
9
10
|
import multiprocessing as mp
|
|
10
11
|
|
|
11
12
|
from molcraft import tensors
|
|
12
|
-
|
|
13
|
+
|
|
14
|
+
if typing.TYPE_CHECKING:
|
|
15
|
+
from molcraft import featurizers
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
def write(
|
|
16
19
|
inputs: list[str | tuple],
|
|
17
|
-
featurizer: featurizers.GraphFeaturizer,
|
|
20
|
+
featurizer: 'featurizers.GraphFeaturizer',
|
|
18
21
|
path: str,
|
|
19
|
-
|
|
22
|
+
exist_ok: bool = False,
|
|
23
|
+
overwrite: bool = False,
|
|
20
24
|
num_files: typing.Optional[int] = None,
|
|
21
25
|
num_processes: typing.Optional[int] = None,
|
|
22
26
|
multiprocessing: bool = False,
|
|
@@ -24,6 +28,8 @@ def write(
|
|
|
24
28
|
) -> None:
|
|
25
29
|
|
|
26
30
|
if os.path.isdir(path):
|
|
31
|
+
if not exist_ok:
|
|
32
|
+
raise FileExistsError(f'Records already exist: {path}')
|
|
27
33
|
if not overwrite:
|
|
28
34
|
return
|
|
29
35
|
else:
|
|
@@ -60,9 +66,11 @@ def write(
|
|
|
60
66
|
chunk_sizes[i % num_files] += 1
|
|
61
67
|
|
|
62
68
|
input_chunks = []
|
|
69
|
+
start_indices = []
|
|
63
70
|
current_index = 0
|
|
64
71
|
for size in chunk_sizes:
|
|
65
72
|
input_chunks.append(inputs[current_index: current_index + size])
|
|
73
|
+
start_indices.append(current_index)
|
|
66
74
|
current_index += size
|
|
67
75
|
|
|
68
76
|
assert current_index == num_examples
|
|
@@ -73,13 +81,13 @@ def write(
|
|
|
73
81
|
]
|
|
74
82
|
|
|
75
83
|
if not multiprocessing:
|
|
76
|
-
for path, input_chunk in zip(paths, input_chunks):
|
|
77
|
-
_write_tfrecord(input_chunk, path, featurizer)
|
|
84
|
+
for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
|
|
85
|
+
_write_tfrecord(input_chunk, path, featurizer, start_index)
|
|
78
86
|
return
|
|
79
87
|
|
|
80
88
|
processes = []
|
|
81
89
|
|
|
82
|
-
for path, input_chunk in zip(paths, input_chunks):
|
|
90
|
+
for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
|
|
83
91
|
|
|
84
92
|
while len(processes) >= num_processes:
|
|
85
93
|
for process in processes:
|
|
@@ -91,7 +99,7 @@ def write(
|
|
|
91
99
|
|
|
92
100
|
process = mp.Process(
|
|
93
101
|
target=_write_tfrecord,
|
|
94
|
-
args=(input_chunk, path, featurizer)
|
|
102
|
+
args=(input_chunk, path, featurizer, start_index)
|
|
95
103
|
)
|
|
96
104
|
processes.append(process)
|
|
97
105
|
process.start()
|
|
@@ -134,9 +142,10 @@ def load_spec(path: str) -> tensors.GraphTensor.Spec:
|
|
|
134
142
|
return spec
|
|
135
143
|
|
|
136
144
|
def _write_tfrecord(
|
|
137
|
-
inputs,
|
|
145
|
+
inputs: list[str, tuple],
|
|
138
146
|
path: str,
|
|
139
|
-
featurizer: featurizers.GraphFeaturizer,
|
|
147
|
+
featurizer: 'featurizers.GraphFeaturizer',
|
|
148
|
+
start_index: int,
|
|
140
149
|
) -> None:
|
|
141
150
|
|
|
142
151
|
def _write_example(tensor):
|
|
@@ -147,12 +156,17 @@ def _write_tfrecord(
|
|
|
147
156
|
writer.write(serialized_feature)
|
|
148
157
|
|
|
149
158
|
with tf.io.TFRecordWriter(path) as writer:
|
|
150
|
-
for x in inputs:
|
|
159
|
+
for i, x in enumerate(inputs):
|
|
151
160
|
if isinstance(x, (list, np.ndarray)):
|
|
152
161
|
x = tuple(x)
|
|
153
|
-
|
|
154
|
-
|
|
162
|
+
try:
|
|
163
|
+
tensor = featurizer(x)
|
|
155
164
|
_write_example(tensor)
|
|
165
|
+
except Exception as e:
|
|
166
|
+
warnings.warn(
|
|
167
|
+
f"Could not write record for index {i + start_index}, proceeding without it."
|
|
168
|
+
f"Exception raised:\n{e}"
|
|
169
|
+
)
|
|
156
170
|
|
|
157
171
|
def _serialize_example(
|
|
158
172
|
feature: dict[str, tf.train.Feature]
|
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
molcraft/__init__.py,sha256=
|
|
1
|
+
molcraft/__init__.py,sha256=O88EmicQAD8oz9oFMXk_IzFChQEbbU-BCs3IE-c9Dkk,431
|
|
2
2
|
molcraft/callbacks.py,sha256=x5HnkZhqcFRrW6xdApt_jZ4X08A-0fxcnFKfdmRKa0c,3571
|
|
3
|
-
molcraft/chem.py,sha256=
|
|
3
|
+
molcraft/chem.py,sha256=ynrEpWZL2D370p7CqH2kE1KhBByq7IiuQbUNoKQt96I,22028
|
|
4
4
|
molcraft/datasets.py,sha256=Nd2lw5USUZE52vvAiNr-q-n03Y3--NlZlK0NzqHgp-E,4145
|
|
5
5
|
molcraft/descriptors.py,sha256=Cl3KnBPsTST7XLgRLktkX5LwY9MV0P_lUlrt8iPV5no,3508
|
|
6
6
|
molcraft/features.py,sha256=s0WeV8eZcDEypPgC1m37f4s9QkvWIlVgn-L43Cdsa14,13525
|
|
7
|
-
molcraft/featurizers.py,sha256=
|
|
7
|
+
molcraft/featurizers.py,sha256=1yBz5-JA7IhNm0dGivvVm1nJ5QGck8VQXtwHPWFbTuQ,18091
|
|
8
8
|
molcraft/layers.py,sha256=H7XZru4XGJA6gbRO9V1BsGqh1mIrMdhzNCKS5o6oNok,64544
|
|
9
9
|
molcraft/losses.py,sha256=qnS2yC5g-O3n_zVea9MR6TNiFraW2yqRgePOisoUP4A,1065
|
|
10
10
|
molcraft/models.py,sha256=2Pc1htT9fCukGd8ZxrvE0rzEHsPBm0pluHw4FZXaUE4,21963
|
|
11
11
|
molcraft/ops.py,sha256=bQbdFDt9waxVCzF5-dkTB6vlpj9eoSt8I4Qg7ZGXbsU,6178
|
|
12
|
-
molcraft/records.py,sha256=
|
|
12
|
+
molcraft/records.py,sha256=sopYElKWC3A9QE5I8_957v3faLb2Wt5WILHZv_FLLds,6283
|
|
13
13
|
molcraft/tensors.py,sha256=vk-W8zZu-re1g18YevDEEoVQRxT4AdIiMdI-4EvtJI4,22477
|
|
14
14
|
molcraft/applications/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
molcraft/applications/chromatography.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
molcraft/applications/proteomics.py,sha256=BL3EtW-q-0j79pLYO7npC67mA2ApRhH-XI4rOaP8_wc,8407
|
|
17
|
-
molcraft-0.1.
|
|
18
|
-
molcraft-0.1.
|
|
19
|
-
molcraft-0.1.
|
|
20
|
-
molcraft-0.1.
|
|
21
|
-
molcraft-0.1.
|
|
17
|
+
molcraft-0.1.0a22.dist-info/licenses/LICENSE,sha256=sbVeqlrtZ0V63uYhZGL5dCxUm8rBAOqe2avyA1zIQNk,1074
|
|
18
|
+
molcraft-0.1.0a22.dist-info/METADATA,sha256=1OHx3-Q94fFEi21l0p3bnMjU-Q0EHaZLm4PU1A6QbkU,3930
|
|
19
|
+
molcraft-0.1.0a22.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
+
molcraft-0.1.0a22.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
|
|
21
|
+
molcraft-0.1.0a22.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|