molcraft 0.1.0a20__tar.gz → 0.1.0a22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of molcraft might be problematic. Click here for more details.
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/PKG-INFO +1 -1
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/__init__.py +2 -4
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/chem.py +31 -19
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/featurizers.py +13 -2
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/records.py +26 -12
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft.egg-info/PKG-INFO +1 -1
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/LICENSE +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/README.md +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/applications/__init__.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/applications/chromatography.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/applications/proteomics.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/callbacks.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/datasets.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/descriptors.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/features.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/layers.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/losses.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/models.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/ops.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft/tensors.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft.egg-info/SOURCES.txt +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft.egg-info/dependency_links.txt +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft.egg-info/requires.txt +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/molcraft.egg-info/top_level.txt +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/pyproject.toml +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/setup.cfg +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/tests/test_chem.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/tests/test_featurizers.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/tests/test_layers.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/tests/test_losses.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/tests/test_models.py +0 -0
- {molcraft-0.1.0a20 → molcraft-0.1.0a22}/tests/test_tensors.py +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = '0.1.
|
|
1
|
+
__version__ = '0.1.0a22'
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
@@ -14,6 +14,4 @@ from molcraft import records
|
|
|
14
14
|
from molcraft import tensors
|
|
15
15
|
from molcraft import callbacks
|
|
16
16
|
from molcraft import datasets
|
|
17
|
-
from molcraft import losses
|
|
18
|
-
|
|
19
|
-
from molcraft.applications import proteomics
|
|
17
|
+
from molcraft import losses
|
|
@@ -22,12 +22,19 @@ class Mol(Chem.Mol):
|
|
|
22
22
|
if explicit_hs:
|
|
23
23
|
rdkit_mol = Chem.AddHs(rdkit_mol)
|
|
24
24
|
rdkit_mol.__class__ = cls
|
|
25
|
+
setattr(rdkit_mol, '_encoding', encoding)
|
|
25
26
|
return rdkit_mol
|
|
26
27
|
|
|
27
28
|
@property
|
|
28
29
|
def canonical_smiles(self) -> str:
|
|
29
30
|
return Chem.MolToSmiles(self, canonical=True)
|
|
30
31
|
|
|
32
|
+
@property
|
|
33
|
+
def encoding(self):
|
|
34
|
+
if hasattr(self, '_encoding'):
|
|
35
|
+
return self._encoding
|
|
36
|
+
return None
|
|
37
|
+
|
|
31
38
|
@property
|
|
32
39
|
def bonds(self) -> list['Bond']:
|
|
33
40
|
if not hasattr(self, '_bonds'):
|
|
@@ -391,6 +398,7 @@ def embed_conformers(
|
|
|
391
398
|
mol: Mol,
|
|
392
399
|
num_conformers: int,
|
|
393
400
|
method: str = 'ETKDGv3',
|
|
401
|
+
timeout: int | None = None,
|
|
394
402
|
random_seed: int | None = None,
|
|
395
403
|
**kwargs
|
|
396
404
|
) -> Mol:
|
|
@@ -403,6 +411,7 @@ def embed_conformers(
|
|
|
403
411
|
'KDG': rdDistGeom.KDG()
|
|
404
412
|
}
|
|
405
413
|
mol = Mol(mol)
|
|
414
|
+
encoding = mol.encoding or mol.canonical_smiles
|
|
406
415
|
embedding_method = available_embedding_methods.get(method)
|
|
407
416
|
if embedding_method is None:
|
|
408
417
|
raise ValueError(
|
|
@@ -413,8 +422,14 @@ def embed_conformers(
|
|
|
413
422
|
for key, value in kwargs.items():
|
|
414
423
|
setattr(embedding_method, key, value)
|
|
415
424
|
|
|
416
|
-
if
|
|
417
|
-
|
|
425
|
+
if not timeout:
|
|
426
|
+
timeout = 0 # No timeout
|
|
427
|
+
|
|
428
|
+
if not random_seed:
|
|
429
|
+
random_seed = -1 # No random seed
|
|
430
|
+
|
|
431
|
+
embedding_method.randomSeed = random_seed
|
|
432
|
+
embedding_method.timeout = timeout
|
|
418
433
|
|
|
419
434
|
success = rdDistGeom.EmbedMultipleConfs(
|
|
420
435
|
mol, numConfs=num_conformers, params=embedding_method
|
|
@@ -422,19 +437,18 @@ def embed_conformers(
|
|
|
422
437
|
num_successes = len(success)
|
|
423
438
|
if num_successes < num_conformers:
|
|
424
439
|
warnings.warn(
|
|
425
|
-
f'Could only embed {num_successes} out of {num_conformers} conformer(s) '
|
|
426
|
-
f'
|
|
427
|
-
f'{num_conformers
|
|
428
|
-
stacklevel=2
|
|
440
|
+
f'Could only embed {num_successes} out of {num_conformers} conformer(s) for '
|
|
441
|
+
f'{encoding!r} using the specified method ({method!r}) and parameters. Attempting '
|
|
442
|
+
f'to embed the remaining {num_conformers-num_successes} using fallback methods.',
|
|
429
443
|
)
|
|
430
|
-
|
|
444
|
+
max_iters = 20 * mol.num_atoms # Doubling the number of iterations
|
|
431
445
|
for fallback_method in [method, 'ETDG', 'KDG']:
|
|
432
446
|
fallback_embedding_method = available_embedding_methods[fallback_method]
|
|
433
447
|
fallback_embedding_method.useRandomCoords = True
|
|
434
|
-
fallback_embedding_method.
|
|
448
|
+
fallback_embedding_method.maxIterations = int(max_iters)
|
|
435
449
|
fallback_embedding_method.clearConfs = False
|
|
436
|
-
|
|
437
|
-
|
|
450
|
+
fallback_embedding_method.timeout = int(timeout)
|
|
451
|
+
fallback_embedding_method.randomSeed = int(random_seed)
|
|
438
452
|
success = rdDistGeom.EmbedMultipleConfs(
|
|
439
453
|
mol, numConfs=(num_conformers - num_successes), params=fallback_embedding_method
|
|
440
454
|
)
|
|
@@ -443,7 +457,7 @@ def embed_conformers(
|
|
|
443
457
|
break
|
|
444
458
|
else:
|
|
445
459
|
raise RuntimeError(
|
|
446
|
-
f'Could not embed {num_conformers} conformer(s) for {
|
|
460
|
+
f'Could not embed {num_conformers} conformer(s) for {encoding!r}. '
|
|
447
461
|
)
|
|
448
462
|
return mol
|
|
449
463
|
|
|
@@ -463,14 +477,14 @@ def optimize_conformers(
|
|
|
463
477
|
f'Could not find `method` {method!r}. Specify either of: '
|
|
464
478
|
'`UFF`, `MMFF`, `MMFF94` or `MMFF94s`.'
|
|
465
479
|
)
|
|
466
|
-
|
|
480
|
+
mol_optimized = Mol(mol)
|
|
467
481
|
try:
|
|
468
482
|
if method.startswith('MMFF'):
|
|
469
483
|
variant = method
|
|
470
484
|
if variant == 'MMFF':
|
|
471
485
|
variant += '94'
|
|
472
486
|
_, _ = _mmff_optimize_conformers(
|
|
473
|
-
|
|
487
|
+
mol_optimized,
|
|
474
488
|
num_threads=num_threads,
|
|
475
489
|
max_iter=max_iter,
|
|
476
490
|
variant=variant,
|
|
@@ -478,7 +492,7 @@ def optimize_conformers(
|
|
|
478
492
|
)
|
|
479
493
|
else:
|
|
480
494
|
_, _ = _uff_optimize_conformers(
|
|
481
|
-
|
|
495
|
+
mol_optimized,
|
|
482
496
|
num_threads=num_threads,
|
|
483
497
|
max_iter=max_iter,
|
|
484
498
|
vdw_threshold=vdw_threshold,
|
|
@@ -486,11 +500,10 @@ def optimize_conformers(
|
|
|
486
500
|
)
|
|
487
501
|
except RuntimeError as e:
|
|
488
502
|
warnings.warn(
|
|
489
|
-
f'{method} force field minimization
|
|
490
|
-
'\nProceeding without force field minimization.',
|
|
491
|
-
stacklevel=2
|
|
503
|
+
f'{method} force field minimization did not succeed. Proceeding without it.',
|
|
492
504
|
)
|
|
493
|
-
|
|
505
|
+
return Mol(mol)
|
|
506
|
+
return mol_optimized
|
|
494
507
|
|
|
495
508
|
def prune_conformers(
|
|
496
509
|
mol: Mol,
|
|
@@ -502,7 +515,6 @@ def prune_conformers(
|
|
|
502
515
|
warnings.warn(
|
|
503
516
|
'Molecule has no conformers. To embed conformers, invoke the `embed` method, '
|
|
504
517
|
'and optionally followed by `minimize()` to perform force field minimization.',
|
|
505
|
-
stacklevel=2
|
|
506
518
|
)
|
|
507
519
|
return mol
|
|
508
520
|
|
|
@@ -2,8 +2,7 @@ import keras
|
|
|
2
2
|
import json
|
|
3
3
|
import abc
|
|
4
4
|
import typing
|
|
5
|
-
import
|
|
6
|
-
import warnings
|
|
5
|
+
import os
|
|
7
6
|
import numpy as np
|
|
8
7
|
import pandas as pd
|
|
9
8
|
import tensorflow as tf
|
|
@@ -13,6 +12,7 @@ from pathlib import Path
|
|
|
13
12
|
|
|
14
13
|
from molcraft import tensors
|
|
15
14
|
from molcraft import features
|
|
15
|
+
from molcraft import records
|
|
16
16
|
from molcraft import chem
|
|
17
17
|
from molcraft import descriptors
|
|
18
18
|
|
|
@@ -41,6 +41,17 @@ class GraphFeaturizer(abc.ABC):
|
|
|
41
41
|
def load(filepath: str | Path, *args, **kwargs) -> 'GraphFeaturizer':
|
|
42
42
|
return load_featurizer(filepath, *args, **kwargs)
|
|
43
43
|
|
|
44
|
+
def write_records(self, inputs: str | chem.Mol | tuple, path: str | Path, **kwargs) -> None:
|
|
45
|
+
records.write(
|
|
46
|
+
inputs, featurizer=self, path=path, **kwargs
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def read_records(path: str | Path, **kwargs) -> tf.data.Dataset:
|
|
51
|
+
return records.read(
|
|
52
|
+
path=path, **kwargs
|
|
53
|
+
)
|
|
54
|
+
|
|
44
55
|
def __call__(
|
|
45
56
|
self,
|
|
46
57
|
inputs: str | chem.Mol | tuple | typing.Iterable,
|
|
@@ -3,20 +3,24 @@ import math
|
|
|
3
3
|
import glob
|
|
4
4
|
import time
|
|
5
5
|
import typing
|
|
6
|
+
import warnings
|
|
6
7
|
import tensorflow as tf
|
|
7
8
|
import numpy as np
|
|
8
9
|
import pandas as pd
|
|
9
10
|
import multiprocessing as mp
|
|
10
11
|
|
|
11
12
|
from molcraft import tensors
|
|
12
|
-
|
|
13
|
+
|
|
14
|
+
if typing.TYPE_CHECKING:
|
|
15
|
+
from molcraft import featurizers
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
def write(
|
|
16
19
|
inputs: list[str | tuple],
|
|
17
|
-
featurizer: featurizers.GraphFeaturizer,
|
|
20
|
+
featurizer: 'featurizers.GraphFeaturizer',
|
|
18
21
|
path: str,
|
|
19
|
-
|
|
22
|
+
exist_ok: bool = False,
|
|
23
|
+
overwrite: bool = False,
|
|
20
24
|
num_files: typing.Optional[int] = None,
|
|
21
25
|
num_processes: typing.Optional[int] = None,
|
|
22
26
|
multiprocessing: bool = False,
|
|
@@ -24,6 +28,8 @@ def write(
|
|
|
24
28
|
) -> None:
|
|
25
29
|
|
|
26
30
|
if os.path.isdir(path):
|
|
31
|
+
if not exist_ok:
|
|
32
|
+
raise FileExistsError(f'Records already exist: {path}')
|
|
27
33
|
if not overwrite:
|
|
28
34
|
return
|
|
29
35
|
else:
|
|
@@ -60,9 +66,11 @@ def write(
|
|
|
60
66
|
chunk_sizes[i % num_files] += 1
|
|
61
67
|
|
|
62
68
|
input_chunks = []
|
|
69
|
+
start_indices = []
|
|
63
70
|
current_index = 0
|
|
64
71
|
for size in chunk_sizes:
|
|
65
72
|
input_chunks.append(inputs[current_index: current_index + size])
|
|
73
|
+
start_indices.append(current_index)
|
|
66
74
|
current_index += size
|
|
67
75
|
|
|
68
76
|
assert current_index == num_examples
|
|
@@ -73,13 +81,13 @@ def write(
|
|
|
73
81
|
]
|
|
74
82
|
|
|
75
83
|
if not multiprocessing:
|
|
76
|
-
for path, input_chunk in zip(paths, input_chunks):
|
|
77
|
-
_write_tfrecord(input_chunk, path, featurizer)
|
|
84
|
+
for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
|
|
85
|
+
_write_tfrecord(input_chunk, path, featurizer, start_index)
|
|
78
86
|
return
|
|
79
87
|
|
|
80
88
|
processes = []
|
|
81
89
|
|
|
82
|
-
for path, input_chunk in zip(paths, input_chunks):
|
|
90
|
+
for path, input_chunk, start_index in zip(paths, input_chunks, start_indices):
|
|
83
91
|
|
|
84
92
|
while len(processes) >= num_processes:
|
|
85
93
|
for process in processes:
|
|
@@ -91,7 +99,7 @@ def write(
|
|
|
91
99
|
|
|
92
100
|
process = mp.Process(
|
|
93
101
|
target=_write_tfrecord,
|
|
94
|
-
args=(input_chunk, path, featurizer)
|
|
102
|
+
args=(input_chunk, path, featurizer, start_index)
|
|
95
103
|
)
|
|
96
104
|
processes.append(process)
|
|
97
105
|
process.start()
|
|
@@ -134,9 +142,10 @@ def load_spec(path: str) -> tensors.GraphTensor.Spec:
|
|
|
134
142
|
return spec
|
|
135
143
|
|
|
136
144
|
def _write_tfrecord(
|
|
137
|
-
inputs,
|
|
145
|
+
inputs: list[str, tuple],
|
|
138
146
|
path: str,
|
|
139
|
-
featurizer: featurizers.GraphFeaturizer,
|
|
147
|
+
featurizer: 'featurizers.GraphFeaturizer',
|
|
148
|
+
start_index: int,
|
|
140
149
|
) -> None:
|
|
141
150
|
|
|
142
151
|
def _write_example(tensor):
|
|
@@ -147,12 +156,17 @@ def _write_tfrecord(
|
|
|
147
156
|
writer.write(serialized_feature)
|
|
148
157
|
|
|
149
158
|
with tf.io.TFRecordWriter(path) as writer:
|
|
150
|
-
for x in inputs:
|
|
159
|
+
for i, x in enumerate(inputs):
|
|
151
160
|
if isinstance(x, (list, np.ndarray)):
|
|
152
161
|
x = tuple(x)
|
|
153
|
-
|
|
154
|
-
|
|
162
|
+
try:
|
|
163
|
+
tensor = featurizer(x)
|
|
155
164
|
_write_example(tensor)
|
|
165
|
+
except Exception as e:
|
|
166
|
+
warnings.warn(
|
|
167
|
+
f"Could not write record for index {i + start_index}, proceeding without it."
|
|
168
|
+
f"Exception raised:\n{e}"
|
|
169
|
+
)
|
|
156
170
|
|
|
157
171
|
def _serialize_example(
|
|
158
172
|
feature: dict[str, tf.train.Feature]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|