molcraft 0.1.0a4__py3-none-any.whl → 0.1.0a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of molcraft might be problematic. Click here for more details.
- molcraft/__init__.py +3 -2
- molcraft/chem.py +70 -4
- molcraft/conformers.py +1 -1
- molcraft/featurizers.py +20 -14
- molcraft/layers.py +258 -185
- molcraft/losses.py +36 -0
- molcraft/models.py +119 -8
- molcraft/ops.py +10 -0
- molcraft/records.py +32 -31
- molcraft/tensors.py +1 -1
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a6.dist-info}/METADATA +4 -17
- molcraft-0.1.0a6.dist-info/RECORD +19 -0
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a6.dist-info}/WHEEL +1 -1
- molcraft/experimental/__init__.py +0 -1
- molcraft/experimental/peptides.py +0 -281
- molcraft-0.1.0a4.dist-info/RECORD +0 -20
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a6.dist-info}/licenses/LICENSE +0 -0
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a6.dist-info}/top_level.txt +0 -0
|
@@ -1,281 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
import keras
|
|
3
|
-
import numpy as np
|
|
4
|
-
import tensorflow as tf
|
|
5
|
-
import tensorflow_text as tf_text
|
|
6
|
-
from rdkit import Chem
|
|
7
|
-
|
|
8
|
-
from molcraft import ops
|
|
9
|
-
from molcraft import chem
|
|
10
|
-
from molcraft import features
|
|
11
|
-
from molcraft import featurizers
|
|
12
|
-
from molcraft import tensors
|
|
13
|
-
from molcraft import descriptors
|
|
14
|
-
from molcraft import layers
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def Graph(
|
|
18
|
-
inputs,
|
|
19
|
-
atom_features: list[features.Feature] | str | None = 'auto',
|
|
20
|
-
bond_features: list[features.Feature] | str | None = 'auto',
|
|
21
|
-
molecule_features: list[descriptors.Descriptor] | str | None = 'auto',
|
|
22
|
-
super_atom: bool = True,
|
|
23
|
-
radius: int | float | None = None,
|
|
24
|
-
self_loops: bool = False,
|
|
25
|
-
include_hs: bool = False,
|
|
26
|
-
**kwargs,
|
|
27
|
-
):
|
|
28
|
-
featurizer = featurizers.MolGraphFeaturizer(
|
|
29
|
-
atom_features=atom_features,
|
|
30
|
-
bond_features=bond_features,
|
|
31
|
-
molecule_features=molecule_features,
|
|
32
|
-
super_atom=super_atom,
|
|
33
|
-
radius=radius,
|
|
34
|
-
self_loops=self_loops,
|
|
35
|
-
include_hs=include_hs,
|
|
36
|
-
**kwargs,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
tensor_list: list[tensors.GraphTensor] = [
|
|
40
|
-
featurizer(residues[tag]).update({'context': {'tag': tag}}) for tag in inputs
|
|
41
|
-
]
|
|
42
|
-
return tf.stack(tensor_list, axis=0)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def Lookup(graph: tensors.GraphTensor) -> 'LookupLayer':
|
|
46
|
-
lookup = LookupLayer()
|
|
47
|
-
lookup._build(graph)
|
|
48
|
-
return lookup
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@keras.saving.register_keras_serializable(package='molcraft')
|
|
52
|
-
class SequenceSplit(keras.layers.Layer):
|
|
53
|
-
|
|
54
|
-
_pattern = "|".join([
|
|
55
|
-
r'(\[[A-Za-z0-9]+\]-[A-Z]\[[A-Za-z0-9]+\])', # N-term mod + mod
|
|
56
|
-
r'([A-Z]\[[A-Za-z0-9]+\]-\[[A-Za-z0-9]+\])', # C-term mod + mod
|
|
57
|
-
r'([A-Z]-\[[A-Za-z0-9]+\])', # C-term mod
|
|
58
|
-
r'(\[[A-Za-z0-9]+\]-[A-Z])', # N-term mod
|
|
59
|
-
r'([A-Z]\[[A-Za-z0-9]+\])', # Mod
|
|
60
|
-
r'([A-Z])', # No mod
|
|
61
|
-
])
|
|
62
|
-
|
|
63
|
-
def call(self, inputs):
|
|
64
|
-
inputs = tf_text.regex_split(inputs, self._pattern, self._pattern)
|
|
65
|
-
inputs = keras.ops.concatenate([
|
|
66
|
-
tf.strings.join([inputs[:, :-1], '-[X]']),
|
|
67
|
-
inputs[:, -1:]
|
|
68
|
-
], axis=1)
|
|
69
|
-
return inputs.to_tensor()
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
@keras.saving.register_keras_serializable(package='molcraft')
|
|
75
|
-
class LookupLayer(keras.layers.Layer):
|
|
76
|
-
|
|
77
|
-
def __init__(self, **kwargs):
|
|
78
|
-
super().__init__(**kwargs)
|
|
79
|
-
self._sequence_splitter = SequenceSplit()
|
|
80
|
-
|
|
81
|
-
def call(self, sequence: tf.Tensor) -> tensors.GraphTensor:
|
|
82
|
-
sequence = self._sequence_splitter(sequence)
|
|
83
|
-
indices = self._tag_to_index.lookup(sequence)
|
|
84
|
-
indices = tf.sort(tf.unique(tf.reshape(indices, [-1]))[0])[1:]
|
|
85
|
-
graph = self.graph[indices]
|
|
86
|
-
return tensors.to_dict(graph)
|
|
87
|
-
|
|
88
|
-
def _build(self, x):
|
|
89
|
-
|
|
90
|
-
if isinstance(x, tensors.GraphTensor):
|
|
91
|
-
tensor = tensors.to_dict(x)
|
|
92
|
-
self._spec = tf.nest.map_structure(
|
|
93
|
-
tf.type_spec_from_value, tensor
|
|
94
|
-
)
|
|
95
|
-
else:
|
|
96
|
-
self._spec = x
|
|
97
|
-
|
|
98
|
-
self._graph = tf.nest.map_structure(
|
|
99
|
-
lambda s: self.add_weight(
|
|
100
|
-
shape=s.shape,
|
|
101
|
-
dtype=s.dtype,
|
|
102
|
-
trainable=False,
|
|
103
|
-
initializer='zeros'
|
|
104
|
-
),
|
|
105
|
-
self._spec
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
if isinstance(x, tensors.GraphTensor):
|
|
109
|
-
tf.nest.map_structure(
|
|
110
|
-
lambda v, x: v.assign(x),
|
|
111
|
-
self._graph, tensor
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
graph = tf.nest.map_structure(
|
|
115
|
-
keras.ops.convert_to_tensor, self._graph
|
|
116
|
-
)
|
|
117
|
-
self._graph_tensor = tensors.from_dict(graph)
|
|
118
|
-
|
|
119
|
-
tags = self._graph_tensor.context['tag']
|
|
120
|
-
|
|
121
|
-
self._tag_to_index = tf.lookup.StaticHashTable(
|
|
122
|
-
tf.lookup.KeyValueTensorInitializer(
|
|
123
|
-
keys=tags,
|
|
124
|
-
values=range(len(tags)),
|
|
125
|
-
),
|
|
126
|
-
default_value=-1,
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
def get_config(self):
|
|
130
|
-
config = super().get_config()
|
|
131
|
-
spec = keras.saving.serialize_keras_object(self._spec)
|
|
132
|
-
config['spec'] = spec
|
|
133
|
-
return config
|
|
134
|
-
|
|
135
|
-
@classmethod
|
|
136
|
-
def from_config(cls, config: dict) -> 'LookupLayer':
|
|
137
|
-
spec = config.pop('spec')
|
|
138
|
-
spec = keras.saving.deserialize_keras_object(spec)
|
|
139
|
-
layer = cls(**config)
|
|
140
|
-
layer._build(spec)
|
|
141
|
-
return layer
|
|
142
|
-
|
|
143
|
-
@property
|
|
144
|
-
def graph(self) -> tensors.GraphTensor:
|
|
145
|
-
return self._graph_tensor
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
@keras.saving.register_keras_serializable(package='molcraft')
|
|
149
|
-
class Gather(keras.layers.Layer):
|
|
150
|
-
|
|
151
|
-
def __init__(
|
|
152
|
-
self,
|
|
153
|
-
padding: list[tuple[int]] | tuple[int] | int = 1,
|
|
154
|
-
mask_value: int = 0,
|
|
155
|
-
**kwargs
|
|
156
|
-
) -> None:
|
|
157
|
-
super().__init__(**kwargs)
|
|
158
|
-
self.padding = padding
|
|
159
|
-
self.mask_value = mask_value
|
|
160
|
-
self._readout_layer = layers.Readout(mode='mean')
|
|
161
|
-
self.supports_masking = True
|
|
162
|
-
self._sequence_splitter = SequenceSplit()
|
|
163
|
-
|
|
164
|
-
def get_config(self):
|
|
165
|
-
config = super().get_config()
|
|
166
|
-
config['mask_value'] = self.mask_value
|
|
167
|
-
config['padding'] = self.padding
|
|
168
|
-
return config
|
|
169
|
-
|
|
170
|
-
def call(self, inputs) -> tf.Tensor:
|
|
171
|
-
|
|
172
|
-
graph, sequence = inputs
|
|
173
|
-
|
|
174
|
-
tag = graph['context']['tag']
|
|
175
|
-
data = self._readout_layer(graph)
|
|
176
|
-
|
|
177
|
-
table = tf.lookup.experimental.MutableHashTable(
|
|
178
|
-
key_dtype=tf.string,
|
|
179
|
-
value_dtype=tf.int32,
|
|
180
|
-
default_value=-1
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
table.insert(tag, tf.range(tf.shape(tag)[0]))
|
|
184
|
-
sequence = self._sequence_splitter(sequence)
|
|
185
|
-
sequence = table.lookup(sequence)
|
|
186
|
-
|
|
187
|
-
readout = ops.gather(data, keras.ops.where(sequence == -1, 0, sequence))
|
|
188
|
-
readout = keras.ops.where(sequence[..., None] == -1, 0.0, readout)
|
|
189
|
-
return readout
|
|
190
|
-
|
|
191
|
-
def compute_mask(
|
|
192
|
-
self,
|
|
193
|
-
inputs: tensors.GraphTensor,
|
|
194
|
-
mask: bool | None = None
|
|
195
|
-
) -> tf.Tensor | None:
|
|
196
|
-
# if self.mask_value is None:
|
|
197
|
-
# return None
|
|
198
|
-
_, sequence = inputs
|
|
199
|
-
sequence = self._sequence_splitter(sequence)
|
|
200
|
-
return keras.ops.not_equal(sequence, '')
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
residues = {
|
|
204
|
-
"A": "N[C@@H](C)C(=O)O",
|
|
205
|
-
"C": "N[C@@H](CS)C(=O)O",
|
|
206
|
-
"C[Carbamidomethyl]": "N[C@@H](CSCC(=O)N)C(=O)O",
|
|
207
|
-
"D": "N[C@@H](CC(=O)O)C(=O)O",
|
|
208
|
-
"E": "N[C@@H](CCC(=O)O)C(=O)O",
|
|
209
|
-
"F": "N[C@@H](Cc1ccccc1)C(=O)O",
|
|
210
|
-
"G": "NCC(=O)O",
|
|
211
|
-
"H": "N[C@@H](CC1=CN=C-N1)C(=O)O",
|
|
212
|
-
"I": "N[C@@H](C(CC)C)C(=O)O",
|
|
213
|
-
"K": "N[C@@H](CCCCN)C(=O)O",
|
|
214
|
-
"K[Acetyl]": "N[C@@H](CCCCNC(=O)C)C(=O)O",
|
|
215
|
-
"K[Crotonyl]": "N[C@@H](CCCCNC(C=CC)=O)C(=O)O",
|
|
216
|
-
"K[Dimethyl]": "N[C@@H](CCCCN(C)C)C(=O)O",
|
|
217
|
-
"K[Formyl]": "N[C@@H](CCCCNC=O)C(=O)O",
|
|
218
|
-
"K[Malonyl]": "N[C@@H](CCCCNC(=O)CC(O)=O)C(=O)O",
|
|
219
|
-
"K[Methyl]": "N[C@@H](CCCCNC)C(=O)O",
|
|
220
|
-
"K[Propionyl]": "N[C@@H](CCCCNC(=O)CC)C(=O)O",
|
|
221
|
-
"K[Succinyl]": "N[C@@H](CCCCNC(CCC(O)=O)=O)C(=O)O",
|
|
222
|
-
"K[Trimethyl]": "N[C@@H](CCCC[N+](C)(C)C)C(=O)O",
|
|
223
|
-
"L": "N[C@@H](CC(C)C)C(=O)O",
|
|
224
|
-
"M": "N[C@@H](CCSC)C(=O)O",
|
|
225
|
-
"M[Oxidation]": "N[C@@H](CCS(=O)C)C(=O)O",
|
|
226
|
-
"N": "N[C@@H](CC(=O)N)C(=O)O",
|
|
227
|
-
"P": "N1[C@@H](CCC1)C(=O)O",
|
|
228
|
-
"P[Oxidation]": "N1CC(O)C[C@H]1C(=O)O",
|
|
229
|
-
"Q": "N[C@@H](CCC(=O)N)C(=O)O",
|
|
230
|
-
"R": "N[C@@H](CCCNC(=N)N)C(=O)O",
|
|
231
|
-
"R[Deamidated]": "N[C@@H](CCCNC(N)=O)C(=O)O",
|
|
232
|
-
"R[Dimethyl]": "N[C@@H](CCCNC(N(C)C)=N)C(=O)O",
|
|
233
|
-
"R[Methyl]": "N[C@@H](CCCNC(=N)NC)C(=O)O",
|
|
234
|
-
"S": "N[C@@H](CO)C(=O)O",
|
|
235
|
-
"T": "N[C@@H](C(O)C)C(=O)O",
|
|
236
|
-
"V": "N[C@@H](C(C)C)C(=O)O",
|
|
237
|
-
"W": "N[C@@H](CC(=CN2)C1=C2C=CC=C1)C(=O)O",
|
|
238
|
-
"Y": "N[C@@H](Cc1ccc(O)cc1)C(=O)O",
|
|
239
|
-
"Y[Nitro]": "N[C@@H](Cc1ccc(O)c(N(=O)=O)c1)C(=O)O",
|
|
240
|
-
"Y[Phospho]": "N[C@@H](Cc1ccc(OP(O)(=O)O)cc1)C(=O)O",
|
|
241
|
-
"[Acetyl]-A": "N(C(C)=O)[C@@H](C)C(=O)O",
|
|
242
|
-
"[Acetyl]-C": "N(C(C)=O)[C@@H](CS)C(=O)O",
|
|
243
|
-
"[Acetyl]-D": "N(C(=O)C)[C@H](C(=O)O)CC(=O)O",
|
|
244
|
-
"[Acetyl]-E": "N(C(=O)C)[C@@H](CCC(O)=O)C(=O)O",
|
|
245
|
-
"[Acetyl]-F": "N(C(C)=O)[C@@H](Cc1ccccc1)C(=O)O",
|
|
246
|
-
"[Acetyl]-G": "N(C(=O)C)CC(=O)O",
|
|
247
|
-
"[Acetyl]-H": "N(C(=O)C)[C@@H](Cc1[nH]cnc1)C(=O)O",
|
|
248
|
-
"[Acetyl]-I": "N(C(=O)C)[C@@H]([C@H](CC)C)C(=O)O",
|
|
249
|
-
"[Acetyl]-K": "N(C(C)=O)[C@@H](CCCCN)C(=O)O",
|
|
250
|
-
"[Acetyl]-L": "N(C(=O)C)[C@@H](CC(C)C)C(=O)O",
|
|
251
|
-
"[Acetyl]-M": "N(C(=O)C)[C@@H](CCSC)C(=O)O",
|
|
252
|
-
"[Acetyl]-N": "N(C(C)=O)[C@@H](CC(=O)N)C(=O)O",
|
|
253
|
-
"[Acetyl]-P": "N1(C(=O)C)CCC[C@H]1C(=O)O",
|
|
254
|
-
"[Acetyl]-Q": "N(C(=O)C)[C@@H](CCC(=O)N)C(=O)O",
|
|
255
|
-
"[Acetyl]-R": "N(C(C)=O)[C@@H](CCCN=C(N)N)C(=O)O",
|
|
256
|
-
"[Acetyl]-S": "N(C(C)=O)[C@@H](CO)C(=O)O",
|
|
257
|
-
"[Acetyl]-T": "N(C(=O)C)[C@@H]([C@H](O)C)C(=O)O",
|
|
258
|
-
"[Acetyl]-V": "N(C(=O)C)[C@@H](C(C)C)C(=O)O",
|
|
259
|
-
"[Acetyl]-W": "N(C(C)=O)[C@@H](Cc1c2ccccc2[nH]c1)C(=O)O",
|
|
260
|
-
"[Acetyl]-Y": "N(C(C)=O)[C@@H](Cc1ccc(O)cc1)C(=O)O"
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
residues_reverse = {}
|
|
264
|
-
def register_peptide_residues(residues_: dict[str, str], canonicalize=True):
|
|
265
|
-
for residue, smiles in residues_.items():
|
|
266
|
-
if canonicalize:
|
|
267
|
-
smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
|
|
268
|
-
residues[residue] = smiles
|
|
269
|
-
residues_reverse[residues[residue]] = residue
|
|
270
|
-
|
|
271
|
-
register_peptide_residues(residues, canonicalize=False)
|
|
272
|
-
|
|
273
|
-
def _extract_residue_type(residue_tag: str) -> str:
|
|
274
|
-
pattern = r"(?<!\[)[A-Z](?![^\[]*\])"
|
|
275
|
-
return [match.group(0) for match in re.finditer(pattern, residue_tag)][0]
|
|
276
|
-
|
|
277
|
-
special_residues = {}
|
|
278
|
-
for key, value in residues.items():
|
|
279
|
-
special_residues[key + '-[X]'] = value.rstrip('O')
|
|
280
|
-
|
|
281
|
-
register_peptide_residues(special_residues, canonicalize=False)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
molcraft/__init__.py,sha256=FQyasgy1kEz2v9sKdr3am6ap7Cm1oHEuCKhHwH-CQpM,435
|
|
2
|
-
molcraft/callbacks.py,sha256=mkz4ALjJFPy8nHd2nCAuMbKceKnq4tIpZhUuUOvie2Y,1209
|
|
3
|
-
molcraft/chem.py,sha256=_UO5O-I7KUtGf3vRrFEYoAUGlW5xi2x8ylu5f-Ybumo,18696
|
|
4
|
-
molcraft/conformers.py,sha256=p09gOQOdxLSj3yohZOMkxxLriHsZ1ZqOoiWLi73OpIg,4325
|
|
5
|
-
molcraft/datasets.py,sha256=rFgXTC1ZheLhfgQgcCspP_wEE54a33PIneH7OplbS-8,4047
|
|
6
|
-
molcraft/descriptors.py,sha256=gKqlJ3BqJLTeR2ft8isftSEaJDC8cv64eTq5IYhy4XM,3032
|
|
7
|
-
molcraft/features.py,sha256=69oV_GHNdBKPA4sp6Tpo6brvNmaauk_IVIzNjX7VDmg,13648
|
|
8
|
-
molcraft/featurizers.py,sha256=kV5RN_Z2pELjDcwE65KYy_JagbDUueXoClpsIOFsI9I,27073
|
|
9
|
-
molcraft/layers.py,sha256=y-sBLXWttr-fkGZ-acL1srMB8QqeXnHotYK9KCcyJNU,70581
|
|
10
|
-
molcraft/models.py,sha256=0MN4PAlsacni7RfIcYm_imxuzBVL2K8w3MnaUM24DeI,18021
|
|
11
|
-
molcraft/ops.py,sha256=uSnBYQwxYJ1ATdDpr290bxiyQZkrSCVxlB7btlh_n2I,4112
|
|
12
|
-
molcraft/records.py,sha256=w4-bcWZEC0oVInrE1e0kQBroIaSCA0PN1JBPOtO6VUY,5251
|
|
13
|
-
molcraft/tensors.py,sha256=8hwlad000wQ5pNLSdzd3rCXVbaUHBxUq2MbBx27dKzU,22391
|
|
14
|
-
molcraft/experimental/__init__.py,sha256=x5h6LOO8bo3NPjkKKM9M1H-Kz6R3yxYhRSePoxHCdRE,42
|
|
15
|
-
molcraft/experimental/peptides.py,sha256=82Bzw9FEnlymOUgTIIKha-ELNbqEFkv9T4hspDGRetw,9266
|
|
16
|
-
molcraft-0.1.0a4.dist-info/licenses/LICENSE,sha256=sbVeqlrtZ0V63uYhZGL5dCxUm8rBAOqe2avyA1zIQNk,1074
|
|
17
|
-
molcraft-0.1.0a4.dist-info/METADATA,sha256=bhsytRfa6BIbfmph0Cm2NfubmZJPumsMQt4lbch33kQ,4201
|
|
18
|
-
molcraft-0.1.0a4.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
|
19
|
-
molcraft-0.1.0a4.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
|
|
20
|
-
molcraft-0.1.0a4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|