molcraft 0.1.0a4__py3-none-any.whl → 0.1.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of molcraft might be problematic. Click here for more details.
- molcraft/__init__.py +1 -1
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a5.dist-info}/METADATA +1 -1
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a5.dist-info}/RECORD +6 -8
- molcraft/experimental/__init__.py +0 -1
- molcraft/experimental/peptides.py +0 -281
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a5.dist-info}/WHEEL +0 -0
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a5.dist-info}/licenses/LICENSE +0 -0
- {molcraft-0.1.0a4.dist-info → molcraft-0.1.0a5.dist-info}/top_level.txt +0 -0
molcraft/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
molcraft/__init__.py,sha256=
|
|
1
|
+
molcraft/__init__.py,sha256=eTGjgMlXf3I8ThkUwgdiONb5Yc-5fWOFvY8U8WXOMwc,435
|
|
2
2
|
molcraft/callbacks.py,sha256=mkz4ALjJFPy8nHd2nCAuMbKceKnq4tIpZhUuUOvie2Y,1209
|
|
3
3
|
molcraft/chem.py,sha256=_UO5O-I7KUtGf3vRrFEYoAUGlW5xi2x8ylu5f-Ybumo,18696
|
|
4
4
|
molcraft/conformers.py,sha256=p09gOQOdxLSj3yohZOMkxxLriHsZ1ZqOoiWLi73OpIg,4325
|
|
@@ -11,10 +11,8 @@ molcraft/models.py,sha256=0MN4PAlsacni7RfIcYm_imxuzBVL2K8w3MnaUM24DeI,18021
|
|
|
11
11
|
molcraft/ops.py,sha256=uSnBYQwxYJ1ATdDpr290bxiyQZkrSCVxlB7btlh_n2I,4112
|
|
12
12
|
molcraft/records.py,sha256=w4-bcWZEC0oVInrE1e0kQBroIaSCA0PN1JBPOtO6VUY,5251
|
|
13
13
|
molcraft/tensors.py,sha256=8hwlad000wQ5pNLSdzd3rCXVbaUHBxUq2MbBx27dKzU,22391
|
|
14
|
-
molcraft/
|
|
15
|
-
molcraft/
|
|
16
|
-
molcraft-0.1.
|
|
17
|
-
molcraft-0.1.
|
|
18
|
-
molcraft-0.1.
|
|
19
|
-
molcraft-0.1.0a4.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
|
|
20
|
-
molcraft-0.1.0a4.dist-info/RECORD,,
|
|
14
|
+
molcraft-0.1.0a5.dist-info/licenses/LICENSE,sha256=sbVeqlrtZ0V63uYhZGL5dCxUm8rBAOqe2avyA1zIQNk,1074
|
|
15
|
+
molcraft-0.1.0a5.dist-info/METADATA,sha256=mb5KnvJUzofmx-MNraJxyiBBug2QNIQTQDGyC1L3SDw,4201
|
|
16
|
+
molcraft-0.1.0a5.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
|
17
|
+
molcraft-0.1.0a5.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
|
|
18
|
+
molcraft-0.1.0a5.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from molcraft.experimental import peptides
|
|
@@ -1,281 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
import keras
|
|
3
|
-
import numpy as np
|
|
4
|
-
import tensorflow as tf
|
|
5
|
-
import tensorflow_text as tf_text
|
|
6
|
-
from rdkit import Chem
|
|
7
|
-
|
|
8
|
-
from molcraft import ops
|
|
9
|
-
from molcraft import chem
|
|
10
|
-
from molcraft import features
|
|
11
|
-
from molcraft import featurizers
|
|
12
|
-
from molcraft import tensors
|
|
13
|
-
from molcraft import descriptors
|
|
14
|
-
from molcraft import layers
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def Graph(
|
|
18
|
-
inputs,
|
|
19
|
-
atom_features: list[features.Feature] | str | None = 'auto',
|
|
20
|
-
bond_features: list[features.Feature] | str | None = 'auto',
|
|
21
|
-
molecule_features: list[descriptors.Descriptor] | str | None = 'auto',
|
|
22
|
-
super_atom: bool = True,
|
|
23
|
-
radius: int | float | None = None,
|
|
24
|
-
self_loops: bool = False,
|
|
25
|
-
include_hs: bool = False,
|
|
26
|
-
**kwargs,
|
|
27
|
-
):
|
|
28
|
-
featurizer = featurizers.MolGraphFeaturizer(
|
|
29
|
-
atom_features=atom_features,
|
|
30
|
-
bond_features=bond_features,
|
|
31
|
-
molecule_features=molecule_features,
|
|
32
|
-
super_atom=super_atom,
|
|
33
|
-
radius=radius,
|
|
34
|
-
self_loops=self_loops,
|
|
35
|
-
include_hs=include_hs,
|
|
36
|
-
**kwargs,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
tensor_list: list[tensors.GraphTensor] = [
|
|
40
|
-
featurizer(residues[tag]).update({'context': {'tag': tag}}) for tag in inputs
|
|
41
|
-
]
|
|
42
|
-
return tf.stack(tensor_list, axis=0)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def Lookup(graph: tensors.GraphTensor) -> 'LookupLayer':
|
|
46
|
-
lookup = LookupLayer()
|
|
47
|
-
lookup._build(graph)
|
|
48
|
-
return lookup
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@keras.saving.register_keras_serializable(package='molcraft')
|
|
52
|
-
class SequenceSplit(keras.layers.Layer):
|
|
53
|
-
|
|
54
|
-
_pattern = "|".join([
|
|
55
|
-
r'(\[[A-Za-z0-9]+\]-[A-Z]\[[A-Za-z0-9]+\])', # N-term mod + mod
|
|
56
|
-
r'([A-Z]\[[A-Za-z0-9]+\]-\[[A-Za-z0-9]+\])', # C-term mod + mod
|
|
57
|
-
r'([A-Z]-\[[A-Za-z0-9]+\])', # C-term mod
|
|
58
|
-
r'(\[[A-Za-z0-9]+\]-[A-Z])', # N-term mod
|
|
59
|
-
r'([A-Z]\[[A-Za-z0-9]+\])', # Mod
|
|
60
|
-
r'([A-Z])', # No mod
|
|
61
|
-
])
|
|
62
|
-
|
|
63
|
-
def call(self, inputs):
|
|
64
|
-
inputs = tf_text.regex_split(inputs, self._pattern, self._pattern)
|
|
65
|
-
inputs = keras.ops.concatenate([
|
|
66
|
-
tf.strings.join([inputs[:, :-1], '-[X]']),
|
|
67
|
-
inputs[:, -1:]
|
|
68
|
-
], axis=1)
|
|
69
|
-
return inputs.to_tensor()
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
@keras.saving.register_keras_serializable(package='molcraft')
|
|
75
|
-
class LookupLayer(keras.layers.Layer):
|
|
76
|
-
|
|
77
|
-
def __init__(self, **kwargs):
|
|
78
|
-
super().__init__(**kwargs)
|
|
79
|
-
self._sequence_splitter = SequenceSplit()
|
|
80
|
-
|
|
81
|
-
def call(self, sequence: tf.Tensor) -> tensors.GraphTensor:
|
|
82
|
-
sequence = self._sequence_splitter(sequence)
|
|
83
|
-
indices = self._tag_to_index.lookup(sequence)
|
|
84
|
-
indices = tf.sort(tf.unique(tf.reshape(indices, [-1]))[0])[1:]
|
|
85
|
-
graph = self.graph[indices]
|
|
86
|
-
return tensors.to_dict(graph)
|
|
87
|
-
|
|
88
|
-
def _build(self, x):
|
|
89
|
-
|
|
90
|
-
if isinstance(x, tensors.GraphTensor):
|
|
91
|
-
tensor = tensors.to_dict(x)
|
|
92
|
-
self._spec = tf.nest.map_structure(
|
|
93
|
-
tf.type_spec_from_value, tensor
|
|
94
|
-
)
|
|
95
|
-
else:
|
|
96
|
-
self._spec = x
|
|
97
|
-
|
|
98
|
-
self._graph = tf.nest.map_structure(
|
|
99
|
-
lambda s: self.add_weight(
|
|
100
|
-
shape=s.shape,
|
|
101
|
-
dtype=s.dtype,
|
|
102
|
-
trainable=False,
|
|
103
|
-
initializer='zeros'
|
|
104
|
-
),
|
|
105
|
-
self._spec
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
if isinstance(x, tensors.GraphTensor):
|
|
109
|
-
tf.nest.map_structure(
|
|
110
|
-
lambda v, x: v.assign(x),
|
|
111
|
-
self._graph, tensor
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
graph = tf.nest.map_structure(
|
|
115
|
-
keras.ops.convert_to_tensor, self._graph
|
|
116
|
-
)
|
|
117
|
-
self._graph_tensor = tensors.from_dict(graph)
|
|
118
|
-
|
|
119
|
-
tags = self._graph_tensor.context['tag']
|
|
120
|
-
|
|
121
|
-
self._tag_to_index = tf.lookup.StaticHashTable(
|
|
122
|
-
tf.lookup.KeyValueTensorInitializer(
|
|
123
|
-
keys=tags,
|
|
124
|
-
values=range(len(tags)),
|
|
125
|
-
),
|
|
126
|
-
default_value=-1,
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
def get_config(self):
|
|
130
|
-
config = super().get_config()
|
|
131
|
-
spec = keras.saving.serialize_keras_object(self._spec)
|
|
132
|
-
config['spec'] = spec
|
|
133
|
-
return config
|
|
134
|
-
|
|
135
|
-
@classmethod
|
|
136
|
-
def from_config(cls, config: dict) -> 'LookupLayer':
|
|
137
|
-
spec = config.pop('spec')
|
|
138
|
-
spec = keras.saving.deserialize_keras_object(spec)
|
|
139
|
-
layer = cls(**config)
|
|
140
|
-
layer._build(spec)
|
|
141
|
-
return layer
|
|
142
|
-
|
|
143
|
-
@property
|
|
144
|
-
def graph(self) -> tensors.GraphTensor:
|
|
145
|
-
return self._graph_tensor
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
@keras.saving.register_keras_serializable(package='molcraft')
|
|
149
|
-
class Gather(keras.layers.Layer):
|
|
150
|
-
|
|
151
|
-
def __init__(
|
|
152
|
-
self,
|
|
153
|
-
padding: list[tuple[int]] | tuple[int] | int = 1,
|
|
154
|
-
mask_value: int = 0,
|
|
155
|
-
**kwargs
|
|
156
|
-
) -> None:
|
|
157
|
-
super().__init__(**kwargs)
|
|
158
|
-
self.padding = padding
|
|
159
|
-
self.mask_value = mask_value
|
|
160
|
-
self._readout_layer = layers.Readout(mode='mean')
|
|
161
|
-
self.supports_masking = True
|
|
162
|
-
self._sequence_splitter = SequenceSplit()
|
|
163
|
-
|
|
164
|
-
def get_config(self):
|
|
165
|
-
config = super().get_config()
|
|
166
|
-
config['mask_value'] = self.mask_value
|
|
167
|
-
config['padding'] = self.padding
|
|
168
|
-
return config
|
|
169
|
-
|
|
170
|
-
def call(self, inputs) -> tf.Tensor:
|
|
171
|
-
|
|
172
|
-
graph, sequence = inputs
|
|
173
|
-
|
|
174
|
-
tag = graph['context']['tag']
|
|
175
|
-
data = self._readout_layer(graph)
|
|
176
|
-
|
|
177
|
-
table = tf.lookup.experimental.MutableHashTable(
|
|
178
|
-
key_dtype=tf.string,
|
|
179
|
-
value_dtype=tf.int32,
|
|
180
|
-
default_value=-1
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
table.insert(tag, tf.range(tf.shape(tag)[0]))
|
|
184
|
-
sequence = self._sequence_splitter(sequence)
|
|
185
|
-
sequence = table.lookup(sequence)
|
|
186
|
-
|
|
187
|
-
readout = ops.gather(data, keras.ops.where(sequence == -1, 0, sequence))
|
|
188
|
-
readout = keras.ops.where(sequence[..., None] == -1, 0.0, readout)
|
|
189
|
-
return readout
|
|
190
|
-
|
|
191
|
-
def compute_mask(
|
|
192
|
-
self,
|
|
193
|
-
inputs: tensors.GraphTensor,
|
|
194
|
-
mask: bool | None = None
|
|
195
|
-
) -> tf.Tensor | None:
|
|
196
|
-
# if self.mask_value is None:
|
|
197
|
-
# return None
|
|
198
|
-
_, sequence = inputs
|
|
199
|
-
sequence = self._sequence_splitter(sequence)
|
|
200
|
-
return keras.ops.not_equal(sequence, '')
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
residues = {
|
|
204
|
-
"A": "N[C@@H](C)C(=O)O",
|
|
205
|
-
"C": "N[C@@H](CS)C(=O)O",
|
|
206
|
-
"C[Carbamidomethyl]": "N[C@@H](CSCC(=O)N)C(=O)O",
|
|
207
|
-
"D": "N[C@@H](CC(=O)O)C(=O)O",
|
|
208
|
-
"E": "N[C@@H](CCC(=O)O)C(=O)O",
|
|
209
|
-
"F": "N[C@@H](Cc1ccccc1)C(=O)O",
|
|
210
|
-
"G": "NCC(=O)O",
|
|
211
|
-
"H": "N[C@@H](CC1=CN=C-N1)C(=O)O",
|
|
212
|
-
"I": "N[C@@H](C(CC)C)C(=O)O",
|
|
213
|
-
"K": "N[C@@H](CCCCN)C(=O)O",
|
|
214
|
-
"K[Acetyl]": "N[C@@H](CCCCNC(=O)C)C(=O)O",
|
|
215
|
-
"K[Crotonyl]": "N[C@@H](CCCCNC(C=CC)=O)C(=O)O",
|
|
216
|
-
"K[Dimethyl]": "N[C@@H](CCCCN(C)C)C(=O)O",
|
|
217
|
-
"K[Formyl]": "N[C@@H](CCCCNC=O)C(=O)O",
|
|
218
|
-
"K[Malonyl]": "N[C@@H](CCCCNC(=O)CC(O)=O)C(=O)O",
|
|
219
|
-
"K[Methyl]": "N[C@@H](CCCCNC)C(=O)O",
|
|
220
|
-
"K[Propionyl]": "N[C@@H](CCCCNC(=O)CC)C(=O)O",
|
|
221
|
-
"K[Succinyl]": "N[C@@H](CCCCNC(CCC(O)=O)=O)C(=O)O",
|
|
222
|
-
"K[Trimethyl]": "N[C@@H](CCCC[N+](C)(C)C)C(=O)O",
|
|
223
|
-
"L": "N[C@@H](CC(C)C)C(=O)O",
|
|
224
|
-
"M": "N[C@@H](CCSC)C(=O)O",
|
|
225
|
-
"M[Oxidation]": "N[C@@H](CCS(=O)C)C(=O)O",
|
|
226
|
-
"N": "N[C@@H](CC(=O)N)C(=O)O",
|
|
227
|
-
"P": "N1[C@@H](CCC1)C(=O)O",
|
|
228
|
-
"P[Oxidation]": "N1CC(O)C[C@H]1C(=O)O",
|
|
229
|
-
"Q": "N[C@@H](CCC(=O)N)C(=O)O",
|
|
230
|
-
"R": "N[C@@H](CCCNC(=N)N)C(=O)O",
|
|
231
|
-
"R[Deamidated]": "N[C@@H](CCCNC(N)=O)C(=O)O",
|
|
232
|
-
"R[Dimethyl]": "N[C@@H](CCCNC(N(C)C)=N)C(=O)O",
|
|
233
|
-
"R[Methyl]": "N[C@@H](CCCNC(=N)NC)C(=O)O",
|
|
234
|
-
"S": "N[C@@H](CO)C(=O)O",
|
|
235
|
-
"T": "N[C@@H](C(O)C)C(=O)O",
|
|
236
|
-
"V": "N[C@@H](C(C)C)C(=O)O",
|
|
237
|
-
"W": "N[C@@H](CC(=CN2)C1=C2C=CC=C1)C(=O)O",
|
|
238
|
-
"Y": "N[C@@H](Cc1ccc(O)cc1)C(=O)O",
|
|
239
|
-
"Y[Nitro]": "N[C@@H](Cc1ccc(O)c(N(=O)=O)c1)C(=O)O",
|
|
240
|
-
"Y[Phospho]": "N[C@@H](Cc1ccc(OP(O)(=O)O)cc1)C(=O)O",
|
|
241
|
-
"[Acetyl]-A": "N(C(C)=O)[C@@H](C)C(=O)O",
|
|
242
|
-
"[Acetyl]-C": "N(C(C)=O)[C@@H](CS)C(=O)O",
|
|
243
|
-
"[Acetyl]-D": "N(C(=O)C)[C@H](C(=O)O)CC(=O)O",
|
|
244
|
-
"[Acetyl]-E": "N(C(=O)C)[C@@H](CCC(O)=O)C(=O)O",
|
|
245
|
-
"[Acetyl]-F": "N(C(C)=O)[C@@H](Cc1ccccc1)C(=O)O",
|
|
246
|
-
"[Acetyl]-G": "N(C(=O)C)CC(=O)O",
|
|
247
|
-
"[Acetyl]-H": "N(C(=O)C)[C@@H](Cc1[nH]cnc1)C(=O)O",
|
|
248
|
-
"[Acetyl]-I": "N(C(=O)C)[C@@H]([C@H](CC)C)C(=O)O",
|
|
249
|
-
"[Acetyl]-K": "N(C(C)=O)[C@@H](CCCCN)C(=O)O",
|
|
250
|
-
"[Acetyl]-L": "N(C(=O)C)[C@@H](CC(C)C)C(=O)O",
|
|
251
|
-
"[Acetyl]-M": "N(C(=O)C)[C@@H](CCSC)C(=O)O",
|
|
252
|
-
"[Acetyl]-N": "N(C(C)=O)[C@@H](CC(=O)N)C(=O)O",
|
|
253
|
-
"[Acetyl]-P": "N1(C(=O)C)CCC[C@H]1C(=O)O",
|
|
254
|
-
"[Acetyl]-Q": "N(C(=O)C)[C@@H](CCC(=O)N)C(=O)O",
|
|
255
|
-
"[Acetyl]-R": "N(C(C)=O)[C@@H](CCCN=C(N)N)C(=O)O",
|
|
256
|
-
"[Acetyl]-S": "N(C(C)=O)[C@@H](CO)C(=O)O",
|
|
257
|
-
"[Acetyl]-T": "N(C(=O)C)[C@@H]([C@H](O)C)C(=O)O",
|
|
258
|
-
"[Acetyl]-V": "N(C(=O)C)[C@@H](C(C)C)C(=O)O",
|
|
259
|
-
"[Acetyl]-W": "N(C(C)=O)[C@@H](Cc1c2ccccc2[nH]c1)C(=O)O",
|
|
260
|
-
"[Acetyl]-Y": "N(C(C)=O)[C@@H](Cc1ccc(O)cc1)C(=O)O"
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
residues_reverse = {}
|
|
264
|
-
def register_peptide_residues(residues_: dict[str, str], canonicalize=True):
|
|
265
|
-
for residue, smiles in residues_.items():
|
|
266
|
-
if canonicalize:
|
|
267
|
-
smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
|
|
268
|
-
residues[residue] = smiles
|
|
269
|
-
residues_reverse[residues[residue]] = residue
|
|
270
|
-
|
|
271
|
-
register_peptide_residues(residues, canonicalize=False)
|
|
272
|
-
|
|
273
|
-
def _extract_residue_type(residue_tag: str) -> str:
|
|
274
|
-
pattern = r"(?<!\[)[A-Z](?![^\[]*\])"
|
|
275
|
-
return [match.group(0) for match in re.finditer(pattern, residue_tag)][0]
|
|
276
|
-
|
|
277
|
-
special_residues = {}
|
|
278
|
-
for key, value in residues.items():
|
|
279
|
-
special_residues[key + '-[X]'] = value.rstrip('O')
|
|
280
|
-
|
|
281
|
-
register_peptide_residues(special_residues, canonicalize=False)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|