molcraft 0.1.0a4__py3-none-any.whl → 0.1.0a6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of molcraft might be problematic. Click here for more details.

molcraft/losses.py ADDED
@@ -0,0 +1,36 @@
1
+ import keras
2
+ import numpy as np
3
+
4
+
5
+ @keras.saving.register_keras_serializable(package='molgraph')
6
+ class GaussianNegativeLogLikelihood(keras.losses.Loss):
7
+
8
+ def __init__(
9
+ self,
10
+ events: int = 1,
11
+ name="gaussian_nll",
12
+ **kwargs
13
+ ):
14
+ super().__init__(name=name, **kwargs)
15
+ self.events = events
16
+
17
+ def call(self, y_true, y_pred):
18
+ mean = y_pred[..., :self.events]
19
+ scale = y_pred[..., self.events:]
20
+ variance = keras.ops.square(scale)
21
+ expected_rank = len(keras.ops.shape(mean))
22
+ current_rank = len(keras.ops.shape(y_true))
23
+ for _ in range(expected_rank - current_rank):
24
+ y_true = keras.ops.expand_dims(y_true, axis=-1)
25
+ return keras.ops.mean(
26
+ 0.5 * keras.ops.log(2.0 * np.pi * variance) +
27
+ 0.5 * keras.ops.square(y_true - mean) / variance
28
+ )
29
+
30
+ def get_config(self):
31
+ config = super().get_config()
32
+ config['events'] = self.events
33
+ return config
34
+
35
+
36
+ GaussianNLL = GaussianNegativeLogLikelihood
molcraft/models.py CHANGED
@@ -17,7 +17,70 @@ class GraphModel(layers.GraphLayer, keras.models.Model):
17
17
 
18
18
  Currently, the `GraphModel` only supports `GraphTensor` input.
19
19
 
20
- Example (using `from_layers`):
20
+ Build a subclassed GraphModel:
21
+
22
+ >>> import molcraft
23
+ >>> import keras
24
+ >>>
25
+ >>> featurizer = molcraft.featurizers.MolGraphFeaturizer()
26
+ >>> graph = featurizer([('N[C@@H](C)C(=O)O', 1.0), ('N[C@@H](CS)C(=O)O', 2.0)])
27
+ >>>
28
+ >>> @keras.saving.register_keras_serializable()
29
+ >>> class GraphNeuralNetwork(molcraft.models.GraphModel):
30
+ ... def __init__(self, units, **kwargs):
31
+ ... super().__init__(**kwargs)
32
+ ... self.units = units
33
+ ... self.node_embedding = molcraft.layers.NodeEmbedding(self.units)
34
+ ... self.edge_embedding = molcraft.layers.EdgeEmbedding(self.units)
35
+ ... self.conv_1 = molcraft.layers.GraphTransformer(self.units)
36
+ ... self.conv_2 = molcraft.layers.GraphTransformer(self.units)
37
+ ... self.readout = molcraft.layers.Readout('mean')
38
+ ... self.dense = keras.layers.Dense(1)
39
+ ... def propagate(self, graph):
40
+ ... x = self.edge_embedding(self.node_embedding(graph))
41
+ ... x = self.conv_2(self.conv_1(x))
42
+ ... return self.dense(self.readout(x))
43
+ ... def get_config(self):
44
+ ... config = super().get_config()
45
+ ... config['units'] = self.units
46
+ ... return config
47
+ >>>
48
+ >>> model = GraphNeuralNetwork(128)
49
+ >>> model.compile(
50
+ ... optimizer=keras.optimizers.Adam(1e-3),
51
+ ... loss=keras.losses.MeanSquaredError(),
52
+ ... metrics=[keras.metrics.MeanAbsolutePercentageError(name='mape')]
53
+ ... )
54
+ >>> model.fit(graph, epochs=10)
55
+ >>> mse, mape = model.evaluate(graph)
56
+ >>> preds = model.predict(graph)
57
+
58
+ Build a functional GraphModel:
59
+
60
+ >>> import molcraft
61
+ >>> import keras
62
+ >>>
63
+ >>> featurizer = molcraft.featurizers.MolGraphFeaturizer()
64
+ >>> graph = featurizer([('N[C@@H](C)C(=O)O', 1.0), ('N[C@@H](CS)C(=O)O', 2.0)])
65
+ >>>
66
+ >>> inputs = molcraft.layers.Input(graph.spec)
67
+ >>> x = molcraft.layers.NodeEmbedding(128)(inputs)
68
+ >>> x = molcraft.layers.EdgeEmbedding(128)(x)
69
+ >>> x = molcraft.layers.GraphTransformer(128)(x)
70
+ >>> x = molcraft.layers.GraphTransformer(128)(x)
71
+ >>> x = molcraft.layers.Readout('mean')(x)
72
+ >>> outputs = keras.layers.Dense(1)(x)
73
+ >>> model = molcraft.models.GraphModel(inputs, outputs)
74
+ >>> model.compile(
75
+ ... optimizer=keras.optimizers.Adam(1e-3),
76
+ ... loss=keras.losses.MeanSquaredError(),
77
+ ... metrics=[keras.metrics.MeanAbsolutePercentageError(name='mape')]
78
+ ... )
79
+ >>> model.fit(graph, epochs=10)
80
+ >>> mse, mape = model.evaluate(graph)
81
+ >>> preds = model.predict(graph)
82
+
83
+ Build a GraphModel using `from_layers`:
21
84
 
22
85
  >>> import molcraft
23
86
  >>> import keras
@@ -25,14 +88,14 @@ class GraphModel(layers.GraphLayer, keras.models.Model):
25
88
  >>> featurizer = molcraft.featurizers.MolGraphFeaturizer()
26
89
  >>> graph = featurizer([('N[C@@H](C)C(=O)O', 1.0), ('N[C@@H](CS)C(=O)O', 2.0)])
27
90
  >>>
28
- >>> model = molcraft.models.GraphModel.from_layers(
91
+ >>> model = molcraft.models.GraphModel.from_layers([
29
92
  ... molcraft.layers.Input(graph.spec),
30
93
  ... molcraft.layers.NodeEmbedding(128),
31
94
  ... molcraft.layers.EdgeEmbedding(128),
32
95
  ... molcraft.layers.GraphTransformer(128),
33
96
  ... molcraft.layers.GraphTransformer(128),
34
97
  ... molcraft.layers.Readout('mean'),
35
- ... molcraft.layers.Dense(1)
98
+ ... keras.layers.Dense(1)
36
99
  ... ])
37
100
  >>> model.compile(
38
101
  ... optimizer=keras.optimizers.Adam(1e-3),
@@ -42,6 +105,7 @@ class GraphModel(layers.GraphLayer, keras.models.Model):
42
105
  >>> model.fit(graph, epochs=10)
43
106
  >>> mse, mape = model.evaluate(graph)
44
107
  >>> preds = model.predict(graph)
108
+
45
109
  """
46
110
 
47
111
  def __new__(cls, *args, **kwargs):
@@ -270,7 +334,7 @@ class GraphModel(layers.GraphLayer, keras.models.Model):
270
334
  """
271
335
  super().load_weights(filepath, *args, **kwargs)
272
336
 
273
- def embedding(self) -> 'FunctionalGraphModel':
337
+ def embedding(self, layer_name: str = None) -> 'FunctionalGraphModel':
274
338
  model = self
275
339
  if not isinstance(model, FunctionalGraphModel):
276
340
  raise ValueError(
@@ -278,11 +342,58 @@ class GraphModel(layers.GraphLayer, keras.models.Model):
278
342
  'it needs to be a `FunctionalGraphModel`. '
279
343
  )
280
344
  inputs = model.input
281
- for layer in model.layers:
282
- if isinstance(layer, layers.Readout):
283
- outputs = layer.output
345
+ if not layer_name:
346
+ for layer in model.layers:
347
+ if isinstance(layer, layers.Readout):
348
+ outputs = layer.output
349
+ else:
350
+ layer = model.get_layer(layer_name)
351
+ outputs = (
352
+ layer.output if isinstance(layer, keras.layers.Layer) else None
353
+ )
354
+ if outputs is None:
355
+ raise ValueError(
356
+ f'Could not find `{layer_name}` or '
357
+ f'`{layer_name} is not a `keras.layers.Layer`.'
358
+ )
284
359
  return self.__class__(inputs, outputs, name=f'{self.name}_embedding')
285
-
360
+
361
+ def backbone(self) -> 'FunctionalGraphModel':
362
+ if not isinstance(self, FunctionalGraphModel):
363
+ raise ValueError(
364
+ 'Currently, to extract the backbone part of the model, '
365
+ 'it needs to be a `FunctionalGraphModel`, with a `Readout` '
366
+ 'layer dividing the backbone and the head part of the model.'
367
+ )
368
+ inputs = self.input
369
+ outputs = None
370
+ for layer in self.layers:
371
+ if isinstance(layer, layers.Readout):
372
+ outputs = layer.output
373
+ if outputs is None:
374
+ raise ValueError(
375
+ 'Could not extract output. `Readout` layer not found.'
376
+ )
377
+ return self.__class__(inputs, outputs, name=f'{self.name}_head')
378
+
379
+ def head(self) -> functional.Functional:
380
+ if not isinstance(self, FunctionalGraphModel):
381
+ raise ValueError(
382
+ 'Currently, to extract the head part of the model, '
383
+ 'it needs to be a `FunctionalGraphModel`, with a `Readout` '
384
+ 'layer dividing the backbone and the head part of the model.'
385
+ )
386
+ inputs = None
387
+ for layer in self.layers:
388
+ if isinstance(layer, layers.Readout):
389
+ inputs = layer.output
390
+ if inputs is None:
391
+ raise ValueError(
392
+ 'Could not extract input. `Readout` layer not found.'
393
+ )
394
+ outputs = layer.output
395
+ return keras.models.Model(inputs, outputs, name=f'{self.name}_head')
396
+
286
397
  def train_step(self, tensor: tensors.GraphTensor) -> dict[str, float]:
287
398
  y = tensor.context.get('label')
288
399
  sample_weight = tensor.context.get('weight')
molcraft/ops.py CHANGED
@@ -82,6 +82,16 @@ def edge_softmax(
82
82
  denominator = gather(denominator, edge_target)
83
83
  return numerator / denominator
84
84
 
85
+ def edge_weight(
86
+ edge: tf.Tensor,
87
+ edge_weight: tf.Tensor,
88
+ ) -> tf.Tensor:
89
+ expected_rank = len(keras.ops.shape(edge))
90
+ current_rank = len(keras.ops.shape(edge_weight))
91
+ for _ in range(expected_rank - current_rank):
92
+ edge_weight = keras.ops.expand_dims(edge_weight, axis=-1)
93
+ return edge * edge_weight
94
+
85
95
  def segment_mean(
86
96
  data: tf.Tensor,
87
97
  segment_ids: tf.Tensor,
molcraft/records.py CHANGED
@@ -36,7 +36,10 @@ def write(
36
36
  if not isinstance(inputs, list):
37
37
  inputs = list(inputs)
38
38
 
39
- example = _featurize_input(inputs[0], featurizer)
39
+ example = inputs[0]
40
+ if isinstance(example, (list, np.ndarray)):
41
+ example = tuple(example)
42
+ example = featurizer(example)
40
43
  if not isinstance(example, tensors.GraphTensor):
41
44
  example = example[0]
42
45
 
@@ -46,7 +49,7 @@ def write(
46
49
  num_processes = mp.cpu_count()
47
50
 
48
51
  if num_files is None:
49
- num_files = min(len(inputs), num_processes)
52
+ num_files = min(len(inputs), max(1, math.ceil(len(inputs) / 1_000)))
50
53
 
51
54
  chunk_size = math.ceil(len(inputs) / num_files)
52
55
  num_files = math.ceil(len(inputs) / chunk_size)
@@ -88,7 +91,7 @@ def write(
88
91
  for process in processes:
89
92
  process.join()
90
93
 
91
- def load(
94
+ def read(
92
95
  path: str,
93
96
  shuffle_files: bool = False
94
97
  ) -> tf.data.Dataset:
@@ -107,13 +110,28 @@ def load(
107
110
  ds = ds.unbatch()
108
111
  return ds
109
112
 
113
+ def save_spec(path: str, spec: tensors.GraphTensor.Spec) -> None:
114
+ proto = spec.experimental_as_proto()
115
+ with open(path, 'wb') as fh:
116
+ fh.write(proto.SerializeToString())
117
+
118
+ def load_spec(path: str) -> tensors.GraphTensor.Spec:
119
+ with open(path, 'rb') as fh:
120
+ serialized_proto = fh.read()
121
+ spec = tensors.GraphTensor.Spec.experimental_from_proto(
122
+ tensors.GraphTensor.Spec
123
+ .experimental_type_proto()
124
+ .FromString(serialized_proto)
125
+ )
126
+ return spec
127
+
110
128
  def _write_tfrecord(
111
129
  inputs,
112
130
  path: str,
113
131
  featurizer: featurizers.Featurizer,
114
132
  ) -> None:
115
133
 
116
- def write_example(tensor):
134
+ def _write_example(tensor):
117
135
  flat_values = tf.nest.flatten(tensor, expand_composites=True)
118
136
  flat_values = [tf.io.serialize_tensor(value).numpy() for value in flat_values]
119
137
  feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=flat_values))
@@ -122,17 +140,15 @@ def _write_tfrecord(
122
140
 
123
141
  with tf.io.TFRecordWriter(path) as writer:
124
142
  for x in inputs:
125
- tensor = _featurize_input(x, featurizer)
126
- if isinstance(tensor, tensors.GraphTensor):
127
- write_example(tensor)
128
- else:
129
- for t in tensor:
130
- write_example(t)
131
-
132
- def _featurize_input(x, featurizer):
133
- if isinstance(x, (list, np.ndarray)):
134
- x = tuple(x)
135
- return featurizer(x)
143
+ if isinstance(x, (list, np.ndarray)):
144
+ x = tuple(x)
145
+ tensor = featurizer(x)
146
+ if tensor is not None:
147
+ if isinstance(tensor, tensors.GraphTensor):
148
+ _write_example(tensor)
149
+ else:
150
+ for t in tensor:
151
+ _write_example(t)
136
152
 
137
153
  def _serialize_example(
138
154
  feature: dict[str, tf.train.Feature]
@@ -151,19 +167,4 @@ def _parse_example(
151
167
  tf.split(out, len(tf.nest.flatten(spec, expand_composites=True))),
152
168
  tf.nest.flatten(spec, expand_composites=True))]
153
169
  out = tf.nest.pack_sequence_as(spec, tf.nest.flatten(out), expand_composites=True)
154
- return out
155
-
156
- def save_spec(path: str, spec: tensors.GraphTensor.Spec) -> None:
157
- proto = spec.experimental_as_proto()
158
- with open(path, 'wb') as fh:
159
- fh.write(proto.SerializeToString())
160
-
161
- def load_spec(path: str) -> tensors.GraphTensor.Spec:
162
- with open(path, 'rb') as fh:
163
- serialized_proto = fh.read()
164
- spec = tensors.GraphTensor.Spec.experimental_from_proto(
165
- tensors.GraphTensor.Spec
166
- .experimental_type_proto()
167
- .FromString(serialized_proto))
168
- return spec
169
-
170
+ return out
molcraft/tensors.py CHANGED
@@ -224,7 +224,7 @@ class GraphTensor(tf.experimental.BatchableExtensionType):
224
224
  raise ValueError('`edge_attr` needs to be `source` or `target`.')
225
225
  edge_attr = self.edge[edge_attr]
226
226
  if 'weight' in self.edge:
227
- edge_attr = edge_attr * self.edge['weight']
227
+ edge_attr = ops.edge_weight(edge_attr, self.edge['weight'])
228
228
  return ops.aggregate(edge_attr, self.edge[edge_type], self.num_nodes, mode=mode)
229
229
 
230
230
  def propagate(self, add_edge_feature: bool = False):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: molcraft
3
- Version: 0.1.0a4
3
+ Version: 0.1.0a6
4
4
  Summary: Graph Neural Networks for Molecular Machine Learning
5
5
  Author-email: Alexander Kensert <alexander.kensert@gmail.com>
6
6
  License: MIT License
@@ -55,7 +55,7 @@ Dynamic: license-file
55
55
  - Customizable and serializable **layers** and **models**
56
56
  - Customizable **GraphTensor**
57
57
  - Fast and efficient featurization of molecular graphs
58
- - Efficient and easy-to-use input pipelines using TF **records**
58
+ - Fast and efficient input pipelines using TF **records**
59
59
 
60
60
  ## Examples
61
61
 
@@ -106,20 +106,7 @@ print(pred)
106
106
  # featurizers.save_featurizer(featurizer, '/tmp/featurizer.json')
107
107
  # models.save_model(model, '/tmp/model.keras')
108
108
 
109
- # featurizers.load_featurizer('/tmp/featurizer.json')
110
- # models.load_model('/tmp/model.keras')
109
+ # loaded_featurizer = featurizers.load_featurizer('/tmp/featurizer.json')
110
+ # loaded_model = models.load_model('/tmp/model.keras')
111
111
  ```
112
112
 
113
- ## Installation
114
-
115
- Install the pre-release of molcraft via pip:
116
-
117
- ```bash
118
- pip install molcraft --pre
119
- ```
120
-
121
- with GPU support:
122
-
123
- ```bash
124
- pip install molcraft[gpu] --pre
125
- ```
@@ -0,0 +1,19 @@
1
+ molcraft/__init__.py,sha256=eKMk4e5Wki4Ay7_BUuY7B-j3Po2l5FDDefPjkFQw3OM,463
2
+ molcraft/callbacks.py,sha256=mkz4ALjJFPy8nHd2nCAuMbKceKnq4tIpZhUuUOvie2Y,1209
3
+ molcraft/chem.py,sha256=apaECcQSuAMs3Tm12yc6ne4x0BGx5JzfoRhTC1WMhlI,20695
4
+ molcraft/conformers.py,sha256=rojo8OaZrKAesx0JA5kf-JVNEpmsQyLSpcxbWhV9cd4,4324
5
+ molcraft/datasets.py,sha256=rFgXTC1ZheLhfgQgcCspP_wEE54a33PIneH7OplbS-8,4047
6
+ molcraft/descriptors.py,sha256=gKqlJ3BqJLTeR2ft8isftSEaJDC8cv64eTq5IYhy4XM,3032
7
+ molcraft/features.py,sha256=69oV_GHNdBKPA4sp6Tpo6brvNmaauk_IVIzNjX7VDmg,13648
8
+ molcraft/featurizers.py,sha256=aJJibnHCxvSu3bNbE2xQk34QvFb47Mnm__0MxlRLA0w,27323
9
+ molcraft/layers.py,sha256=RyKmdHmHlYJJL15LvHH32daTKsChJ_pHmHUnpUcwS1U,73437
10
+ molcraft/losses.py,sha256=JEKZEX2f8vDgky_fUocsF8vZjy9VMzRjZUBa20Uf9Qw,1065
11
+ molcraft/models.py,sha256=Rl9CkQlOVkj20TLjGlwI8vaQwX07EqqWz22bFYtJlpk,22636
12
+ molcraft/ops.py,sha256=eAi79aawJwxuIVVamjA1kPRHGlUm0PsvN-7d2CYu15I,4441
13
+ molcraft/records.py,sha256=0sjOdcr266ZER4F-aTBQ3AVPNAwflKWNiNJVsSc1-PQ,5370
14
+ molcraft/tensors.py,sha256=EOUKx496KUZsjA1zA2ABc7tU_TW3Jv7AXDsug_QsLbA,22407
15
+ molcraft-0.1.0a6.dist-info/licenses/LICENSE,sha256=sbVeqlrtZ0V63uYhZGL5dCxUm8rBAOqe2avyA1zIQNk,1074
16
+ molcraft-0.1.0a6.dist-info/METADATA,sha256=Zzl1K3WleDp056zbLChy5B1AQ3U26t22oMkIKRUpbMY,4063
17
+ molcraft-0.1.0a6.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
18
+ molcraft-0.1.0a6.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
19
+ molcraft-0.1.0a6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.0.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1 +0,0 @@
1
- from molcraft.experimental import peptides