molcraft 0.1.0a4__tar.gz → 0.1.0a5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of molcraft might be problematic. Click here for more details.

Files changed (30) hide show
  1. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/PKG-INFO +1 -1
  2. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/__init__.py +1 -1
  3. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft.egg-info/PKG-INFO +1 -1
  4. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft.egg-info/SOURCES.txt +0 -2
  5. molcraft-0.1.0a4/molcraft/experimental/__init__.py +0 -1
  6. molcraft-0.1.0a4/molcraft/experimental/peptides.py +0 -281
  7. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/LICENSE +0 -0
  8. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/README.md +0 -0
  9. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/callbacks.py +0 -0
  10. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/chem.py +0 -0
  11. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/conformers.py +0 -0
  12. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/datasets.py +0 -0
  13. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/descriptors.py +0 -0
  14. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/features.py +0 -0
  15. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/featurizers.py +0 -0
  16. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/layers.py +0 -0
  17. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/models.py +0 -0
  18. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/ops.py +0 -0
  19. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/records.py +0 -0
  20. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft/tensors.py +0 -0
  21. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft.egg-info/dependency_links.txt +0 -0
  22. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft.egg-info/requires.txt +0 -0
  23. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/molcraft.egg-info/top_level.txt +0 -0
  24. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/pyproject.toml +0 -0
  25. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/setup.cfg +0 -0
  26. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/tests/test_chem.py +0 -0
  27. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/tests/test_featurizers.py +0 -0
  28. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/tests/test_layers.py +0 -0
  29. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/tests/test_models.py +0 -0
  30. {molcraft-0.1.0a4 → molcraft-0.1.0a5}/tests/test_tensors.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: molcraft
3
- Version: 0.1.0a4
3
+ Version: 0.1.0a5
4
4
  Summary: Graph Neural Networks for Molecular Machine Learning
5
5
  Author-email: Alexander Kensert <alexander.kensert@gmail.com>
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
- __version__ = '0.1.0a4'
1
+ __version__ = '0.1.0a5'
2
2
 
3
3
  import os
4
4
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: molcraft
3
- Version: 0.1.0a4
3
+ Version: 0.1.0a5
4
4
  Summary: Graph Neural Networks for Molecular Machine Learning
5
5
  Author-email: Alexander Kensert <alexander.kensert@gmail.com>
6
6
  License: MIT License
@@ -19,8 +19,6 @@ molcraft.egg-info/SOURCES.txt
19
19
  molcraft.egg-info/dependency_links.txt
20
20
  molcraft.egg-info/requires.txt
21
21
  molcraft.egg-info/top_level.txt
22
- molcraft/experimental/__init__.py
23
- molcraft/experimental/peptides.py
24
22
  tests/test_chem.py
25
23
  tests/test_featurizers.py
26
24
  tests/test_layers.py
@@ -1 +0,0 @@
1
- from molcraft.experimental import peptides
@@ -1,281 +0,0 @@
1
- import re
2
- import keras
3
- import numpy as np
4
- import tensorflow as tf
5
- import tensorflow_text as tf_text
6
- from rdkit import Chem
7
-
8
- from molcraft import ops
9
- from molcraft import chem
10
- from molcraft import features
11
- from molcraft import featurizers
12
- from molcraft import tensors
13
- from molcraft import descriptors
14
- from molcraft import layers
15
-
16
-
17
- def Graph(
18
- inputs,
19
- atom_features: list[features.Feature] | str | None = 'auto',
20
- bond_features: list[features.Feature] | str | None = 'auto',
21
- molecule_features: list[descriptors.Descriptor] | str | None = 'auto',
22
- super_atom: bool = True,
23
- radius: int | float | None = None,
24
- self_loops: bool = False,
25
- include_hs: bool = False,
26
- **kwargs,
27
- ):
28
- featurizer = featurizers.MolGraphFeaturizer(
29
- atom_features=atom_features,
30
- bond_features=bond_features,
31
- molecule_features=molecule_features,
32
- super_atom=super_atom,
33
- radius=radius,
34
- self_loops=self_loops,
35
- include_hs=include_hs,
36
- **kwargs,
37
- )
38
-
39
- tensor_list: list[tensors.GraphTensor] = [
40
- featurizer(residues[tag]).update({'context': {'tag': tag}}) for tag in inputs
41
- ]
42
- return tf.stack(tensor_list, axis=0)
43
-
44
-
45
- def Lookup(graph: tensors.GraphTensor) -> 'LookupLayer':
46
- lookup = LookupLayer()
47
- lookup._build(graph)
48
- return lookup
49
-
50
-
51
- @keras.saving.register_keras_serializable(package='molcraft')
52
- class SequenceSplit(keras.layers.Layer):
53
-
54
- _pattern = "|".join([
55
- r'(\[[A-Za-z0-9]+\]-[A-Z]\[[A-Za-z0-9]+\])', # N-term mod + mod
56
- r'([A-Z]\[[A-Za-z0-9]+\]-\[[A-Za-z0-9]+\])', # C-term mod + mod
57
- r'([A-Z]-\[[A-Za-z0-9]+\])', # C-term mod
58
- r'(\[[A-Za-z0-9]+\]-[A-Z])', # N-term mod
59
- r'([A-Z]\[[A-Za-z0-9]+\])', # Mod
60
- r'([A-Z])', # No mod
61
- ])
62
-
63
- def call(self, inputs):
64
- inputs = tf_text.regex_split(inputs, self._pattern, self._pattern)
65
- inputs = keras.ops.concatenate([
66
- tf.strings.join([inputs[:, :-1], '-[X]']),
67
- inputs[:, -1:]
68
- ], axis=1)
69
- return inputs.to_tensor()
70
-
71
-
72
-
73
-
74
- @keras.saving.register_keras_serializable(package='molcraft')
75
- class LookupLayer(keras.layers.Layer):
76
-
77
- def __init__(self, **kwargs):
78
- super().__init__(**kwargs)
79
- self._sequence_splitter = SequenceSplit()
80
-
81
- def call(self, sequence: tf.Tensor) -> tensors.GraphTensor:
82
- sequence = self._sequence_splitter(sequence)
83
- indices = self._tag_to_index.lookup(sequence)
84
- indices = tf.sort(tf.unique(tf.reshape(indices, [-1]))[0])[1:]
85
- graph = self.graph[indices]
86
- return tensors.to_dict(graph)
87
-
88
- def _build(self, x):
89
-
90
- if isinstance(x, tensors.GraphTensor):
91
- tensor = tensors.to_dict(x)
92
- self._spec = tf.nest.map_structure(
93
- tf.type_spec_from_value, tensor
94
- )
95
- else:
96
- self._spec = x
97
-
98
- self._graph = tf.nest.map_structure(
99
- lambda s: self.add_weight(
100
- shape=s.shape,
101
- dtype=s.dtype,
102
- trainable=False,
103
- initializer='zeros'
104
- ),
105
- self._spec
106
- )
107
-
108
- if isinstance(x, tensors.GraphTensor):
109
- tf.nest.map_structure(
110
- lambda v, x: v.assign(x),
111
- self._graph, tensor
112
- )
113
-
114
- graph = tf.nest.map_structure(
115
- keras.ops.convert_to_tensor, self._graph
116
- )
117
- self._graph_tensor = tensors.from_dict(graph)
118
-
119
- tags = self._graph_tensor.context['tag']
120
-
121
- self._tag_to_index = tf.lookup.StaticHashTable(
122
- tf.lookup.KeyValueTensorInitializer(
123
- keys=tags,
124
- values=range(len(tags)),
125
- ),
126
- default_value=-1,
127
- )
128
-
129
- def get_config(self):
130
- config = super().get_config()
131
- spec = keras.saving.serialize_keras_object(self._spec)
132
- config['spec'] = spec
133
- return config
134
-
135
- @classmethod
136
- def from_config(cls, config: dict) -> 'LookupLayer':
137
- spec = config.pop('spec')
138
- spec = keras.saving.deserialize_keras_object(spec)
139
- layer = cls(**config)
140
- layer._build(spec)
141
- return layer
142
-
143
- @property
144
- def graph(self) -> tensors.GraphTensor:
145
- return self._graph_tensor
146
-
147
-
148
- @keras.saving.register_keras_serializable(package='molcraft')
149
- class Gather(keras.layers.Layer):
150
-
151
- def __init__(
152
- self,
153
- padding: list[tuple[int]] | tuple[int] | int = 1,
154
- mask_value: int = 0,
155
- **kwargs
156
- ) -> None:
157
- super().__init__(**kwargs)
158
- self.padding = padding
159
- self.mask_value = mask_value
160
- self._readout_layer = layers.Readout(mode='mean')
161
- self.supports_masking = True
162
- self._sequence_splitter = SequenceSplit()
163
-
164
- def get_config(self):
165
- config = super().get_config()
166
- config['mask_value'] = self.mask_value
167
- config['padding'] = self.padding
168
- return config
169
-
170
- def call(self, inputs) -> tf.Tensor:
171
-
172
- graph, sequence = inputs
173
-
174
- tag = graph['context']['tag']
175
- data = self._readout_layer(graph)
176
-
177
- table = tf.lookup.experimental.MutableHashTable(
178
- key_dtype=tf.string,
179
- value_dtype=tf.int32,
180
- default_value=-1
181
- )
182
-
183
- table.insert(tag, tf.range(tf.shape(tag)[0]))
184
- sequence = self._sequence_splitter(sequence)
185
- sequence = table.lookup(sequence)
186
-
187
- readout = ops.gather(data, keras.ops.where(sequence == -1, 0, sequence))
188
- readout = keras.ops.where(sequence[..., None] == -1, 0.0, readout)
189
- return readout
190
-
191
- def compute_mask(
192
- self,
193
- inputs: tensors.GraphTensor,
194
- mask: bool | None = None
195
- ) -> tf.Tensor | None:
196
- # if self.mask_value is None:
197
- # return None
198
- _, sequence = inputs
199
- sequence = self._sequence_splitter(sequence)
200
- return keras.ops.not_equal(sequence, '')
201
-
202
-
203
- residues = {
204
- "A": "N[C@@H](C)C(=O)O",
205
- "C": "N[C@@H](CS)C(=O)O",
206
- "C[Carbamidomethyl]": "N[C@@H](CSCC(=O)N)C(=O)O",
207
- "D": "N[C@@H](CC(=O)O)C(=O)O",
208
- "E": "N[C@@H](CCC(=O)O)C(=O)O",
209
- "F": "N[C@@H](Cc1ccccc1)C(=O)O",
210
- "G": "NCC(=O)O",
211
- "H": "N[C@@H](CC1=CN=C-N1)C(=O)O",
212
- "I": "N[C@@H](C(CC)C)C(=O)O",
213
- "K": "N[C@@H](CCCCN)C(=O)O",
214
- "K[Acetyl]": "N[C@@H](CCCCNC(=O)C)C(=O)O",
215
- "K[Crotonyl]": "N[C@@H](CCCCNC(C=CC)=O)C(=O)O",
216
- "K[Dimethyl]": "N[C@@H](CCCCN(C)C)C(=O)O",
217
- "K[Formyl]": "N[C@@H](CCCCNC=O)C(=O)O",
218
- "K[Malonyl]": "N[C@@H](CCCCNC(=O)CC(O)=O)C(=O)O",
219
- "K[Methyl]": "N[C@@H](CCCCNC)C(=O)O",
220
- "K[Propionyl]": "N[C@@H](CCCCNC(=O)CC)C(=O)O",
221
- "K[Succinyl]": "N[C@@H](CCCCNC(CCC(O)=O)=O)C(=O)O",
222
- "K[Trimethyl]": "N[C@@H](CCCC[N+](C)(C)C)C(=O)O",
223
- "L": "N[C@@H](CC(C)C)C(=O)O",
224
- "M": "N[C@@H](CCSC)C(=O)O",
225
- "M[Oxidation]": "N[C@@H](CCS(=O)C)C(=O)O",
226
- "N": "N[C@@H](CC(=O)N)C(=O)O",
227
- "P": "N1[C@@H](CCC1)C(=O)O",
228
- "P[Oxidation]": "N1CC(O)C[C@H]1C(=O)O",
229
- "Q": "N[C@@H](CCC(=O)N)C(=O)O",
230
- "R": "N[C@@H](CCCNC(=N)N)C(=O)O",
231
- "R[Deamidated]": "N[C@@H](CCCNC(N)=O)C(=O)O",
232
- "R[Dimethyl]": "N[C@@H](CCCNC(N(C)C)=N)C(=O)O",
233
- "R[Methyl]": "N[C@@H](CCCNC(=N)NC)C(=O)O",
234
- "S": "N[C@@H](CO)C(=O)O",
235
- "T": "N[C@@H](C(O)C)C(=O)O",
236
- "V": "N[C@@H](C(C)C)C(=O)O",
237
- "W": "N[C@@H](CC(=CN2)C1=C2C=CC=C1)C(=O)O",
238
- "Y": "N[C@@H](Cc1ccc(O)cc1)C(=O)O",
239
- "Y[Nitro]": "N[C@@H](Cc1ccc(O)c(N(=O)=O)c1)C(=O)O",
240
- "Y[Phospho]": "N[C@@H](Cc1ccc(OP(O)(=O)O)cc1)C(=O)O",
241
- "[Acetyl]-A": "N(C(C)=O)[C@@H](C)C(=O)O",
242
- "[Acetyl]-C": "N(C(C)=O)[C@@H](CS)C(=O)O",
243
- "[Acetyl]-D": "N(C(=O)C)[C@H](C(=O)O)CC(=O)O",
244
- "[Acetyl]-E": "N(C(=O)C)[C@@H](CCC(O)=O)C(=O)O",
245
- "[Acetyl]-F": "N(C(C)=O)[C@@H](Cc1ccccc1)C(=O)O",
246
- "[Acetyl]-G": "N(C(=O)C)CC(=O)O",
247
- "[Acetyl]-H": "N(C(=O)C)[C@@H](Cc1[nH]cnc1)C(=O)O",
248
- "[Acetyl]-I": "N(C(=O)C)[C@@H]([C@H](CC)C)C(=O)O",
249
- "[Acetyl]-K": "N(C(C)=O)[C@@H](CCCCN)C(=O)O",
250
- "[Acetyl]-L": "N(C(=O)C)[C@@H](CC(C)C)C(=O)O",
251
- "[Acetyl]-M": "N(C(=O)C)[C@@H](CCSC)C(=O)O",
252
- "[Acetyl]-N": "N(C(C)=O)[C@@H](CC(=O)N)C(=O)O",
253
- "[Acetyl]-P": "N1(C(=O)C)CCC[C@H]1C(=O)O",
254
- "[Acetyl]-Q": "N(C(=O)C)[C@@H](CCC(=O)N)C(=O)O",
255
- "[Acetyl]-R": "N(C(C)=O)[C@@H](CCCN=C(N)N)C(=O)O",
256
- "[Acetyl]-S": "N(C(C)=O)[C@@H](CO)C(=O)O",
257
- "[Acetyl]-T": "N(C(=O)C)[C@@H]([C@H](O)C)C(=O)O",
258
- "[Acetyl]-V": "N(C(=O)C)[C@@H](C(C)C)C(=O)O",
259
- "[Acetyl]-W": "N(C(C)=O)[C@@H](Cc1c2ccccc2[nH]c1)C(=O)O",
260
- "[Acetyl]-Y": "N(C(C)=O)[C@@H](Cc1ccc(O)cc1)C(=O)O"
261
- }
262
-
263
- residues_reverse = {}
264
- def register_peptide_residues(residues_: dict[str, str], canonicalize=True):
265
- for residue, smiles in residues_.items():
266
- if canonicalize:
267
- smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
268
- residues[residue] = smiles
269
- residues_reverse[residues[residue]] = residue
270
-
271
- register_peptide_residues(residues, canonicalize=False)
272
-
273
- def _extract_residue_type(residue_tag: str) -> str:
274
- pattern = r"(?<!\[)[A-Z](?![^\[]*\])"
275
- return [match.group(0) for match in re.finditer(pattern, residue_tag)][0]
276
-
277
- special_residues = {}
278
- for key, value in residues.items():
279
- special_residues[key + '-[X]'] = value.rstrip('O')
280
-
281
- register_peptide_residues(special_residues, canonicalize=False)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes