molcraft 0.1.0a8__py3-none-any.whl → 0.1.0a10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of molcraft might be problematic. Click here for more details.

molcraft/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.1.0a8'
1
+ __version__ = '0.1.0a10'
2
2
 
3
3
  import os
4
4
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
molcraft/featurizers.py CHANGED
@@ -280,8 +280,6 @@ class MolGraphFeaturizer(Featurizer):
280
280
  mol.get_bond_between_atoms(atom_i, atom_j).index
281
281
  )
282
282
  edge['feature'] = bond_feature[bond_indices]
283
- if self.self_loops:
284
- edge['self_loop'] = (edge['source'] == edge['target'])
285
283
  else:
286
284
  paths = chem.get_shortest_paths(
287
285
  mol, radius=self.radius, self_loops=self.self_loops
@@ -376,7 +374,7 @@ class MolGraphFeaturizer(Featurizer):
376
374
  num_nodes = node['feature'].shape[0]
377
375
  node = _add_super_nodes(node, num_super_nodes)
378
376
  edge = _add_super_edges(
379
- edge, num_nodes, num_super_nodes, self.feature_dtype, self.index_dtype
377
+ edge, num_nodes, num_super_nodes, self.feature_dtype, self.index_dtype, self.self_loops
380
378
  )
381
379
  return node, edge
382
380
 
@@ -708,11 +706,15 @@ def _add_super_edges(
708
706
  num_super_nodes: int,
709
707
  feature_dtype: str,
710
708
  index_dtype: str,
709
+ self_loops: bool,
711
710
  ) -> dict[str, np.ndarray]:
712
711
  edge = copy.deepcopy(edge)
713
- super_node_indices = (
714
- np.repeat(np.arange(num_super_nodes), [num_nodes]) + num_nodes
715
- )
712
+
713
+ super_node_indices = np.arange(num_super_nodes) + num_nodes
714
+ if self_loops:
715
+ edge['source'] = np.concatenate([edge['source'], super_node_indices])
716
+ edge['target'] = np.concatenate([edge['target'], super_node_indices])
717
+ super_node_indices = np.repeat(super_node_indices, [num_nodes])
716
718
  node_indices = (
717
719
  np.tile(np.arange(num_nodes), [num_super_nodes])
718
720
  )
@@ -727,6 +729,8 @@ def _add_super_edges(
727
729
  if 'feature' in edge:
728
730
  num_edges = int(edge['feature'].shape[0])
729
731
  num_super_edges = int(num_super_nodes * num_nodes * 2)
732
+ if self_loops:
733
+ num_super_edges += num_super_nodes
730
734
  edge['super'] = np.asarray(
731
735
  ([False] * num_edges + [True] * num_super_edges),
732
736
  dtype=bool
@@ -741,12 +745,6 @@ def _add_super_edges(
741
745
  ]
742
746
  )
743
747
 
744
- if 'self_loop' in edge:
745
- edge['self_loop'] = np.pad(
746
- edge['self_loop'], [(0, num_nodes * num_super_nodes * 2)],
747
- constant_values=False,
748
- )
749
-
750
748
  return edge
751
749
 
752
750
 
molcraft/layers.py CHANGED
@@ -350,7 +350,7 @@ class GraphConv(GraphLayer):
350
350
  )
351
351
  if self._project_residual:
352
352
  warnings.warn(
353
- '`skip_connect` is set to `True`, but found incompatible dim '
353
+ '`skip_connect` is set to `True`, but found incompatible dim '
354
354
  'between input (node feature dim) and output (`self.units`). '
355
355
  'Automatically applying a projection layer to residual to '
356
356
  'match input and output. ',
@@ -369,7 +369,7 @@ class GraphConv(GraphLayer):
369
369
  self._message_intermediate_activation = self.activation
370
370
  self._message_final_dense = self.get_dense(self.units)
371
371
 
372
- has_overridden_aggregate = self.__class__.message != GraphConv.aggregate
372
+ has_overridden_aggregate = self.__class__.message != GraphConv.aggregate
373
373
  if not has_overridden_aggregate:
374
374
  pass
375
375
 
@@ -401,13 +401,15 @@ class GraphConv(GraphLayer):
401
401
  residual = self._residual_dense(residual)
402
402
 
403
403
  message = self.message(tensor)
404
- if not isinstance(message, tensors.GraphTensor):
404
+ add_message = not isinstance(message, tensors.GraphTensor)
405
+ if add_message:
405
406
  message = tensor.update({'edge': {'message': message}})
406
407
  elif not 'message' in message.edge:
407
408
  raise ValueError('Could not find `message` in `edge` output.')
408
-
409
+
409
410
  aggregate = self.aggregate(message)
410
- if not isinstance(aggregate, tensors.GraphTensor):
411
+ add_aggregate = not isinstance(aggregate, tensors.GraphTensor)
412
+ if add_aggregate:
411
413
  aggregate = tensor.update({'node': {'aggregate': aggregate}})
412
414
  elif not 'aggregate' in aggregate.node:
413
415
  raise ValueError('Could not find `aggregate` in `node` output.')
@@ -421,6 +423,16 @@ class GraphConv(GraphLayer):
421
423
  if update.node['feature'].shape[-1] != self.units:
422
424
  raise ValueError('Updated node `feature` is not equal to `self.units`.')
423
425
 
426
+ if add_message and add_aggregate:
427
+ update = update.update({'node': {'aggregate': None}, 'edge': {'message': None}})
428
+ elif add_message:
429
+ update = update.update({'edge': {'message': None}})
430
+ elif add_aggregate:
431
+ update = update.update({'node': {'aggregate': None}})
432
+
433
+ if not self._skip_connect and not self._normalize:
434
+ return update
435
+
424
436
  feature = update.node['feature']
425
437
 
426
438
  if self._skip_connect:
@@ -1303,17 +1315,12 @@ class NodeEmbedding(GraphLayer):
1303
1315
  dim: int = None,
1304
1316
  normalize: bool = False,
1305
1317
  embed_context: bool = False,
1306
- allow_reconstruction: bool = False,
1307
- allow_masking: bool = False,
1308
1318
  **kwargs
1309
1319
  ) -> None:
1310
1320
  super().__init__(**kwargs)
1311
1321
  self.dim = dim
1312
1322
  self._normalize = normalize
1313
1323
  self._embed_context = embed_context
1314
- self._masking_rate = None
1315
- self._allow_masking = allow_masking
1316
- self._allow_reconstruction = allow_reconstruction
1317
1324
 
1318
1325
  def build(self, spec: tensors.GraphTensor.Spec) -> None:
1319
1326
  feature_dim = spec.node['feature'].shape[-1]
@@ -1327,8 +1334,6 @@ class NodeEmbedding(GraphLayer):
1327
1334
  self._embed_context = False
1328
1335
  if self._has_super and not self._embed_context:
1329
1336
  self._super_feature = self.get_weight(shape=[self.dim], name='super_node_feature')
1330
- if self._allow_masking:
1331
- self._mask_feature = self.get_weight(shape=[self.dim], name='mask_node_feature')
1332
1337
  if self._embed_context:
1333
1338
  self._context_dense = self.get_dense(self.dim)
1334
1339
 
@@ -1342,28 +1347,18 @@ class NodeEmbedding(GraphLayer):
1342
1347
  def propagate(self, tensor: tensors.GraphTensor) -> tensors.GraphTensor:
1343
1348
  feature = self._node_dense(tensor.node['feature'])
1344
1349
 
1345
- if self._has_super:
1346
- super_feature = (0 if self._embed_context else self._super_feature)
1350
+ if self._has_super and not self._embed_context:
1347
1351
  super_mask = keras.ops.expand_dims(tensor.node['super'], 1)
1348
- feature = keras.ops.where(super_mask, super_feature, feature)
1352
+ feature = keras.ops.where(super_mask, self._super_feature, feature)
1349
1353
 
1350
1354
  if self._embed_context:
1351
1355
  context_feature = self._context_dense(tensor.context['feature'])
1352
1356
  feature = ops.scatter_update(feature, tensor.node['super'], context_feature)
1353
1357
  tensor = tensor.update({'context': {'feature': None}})
1354
1358
 
1355
- apply_mask = (self._allow_masking and 'mask' in tensor.node)
1356
- if apply_mask:
1357
- mask = keras.ops.expand_dims(tensor.node['mask'], -1)
1358
- feature = keras.ops.where(mask, self._mask_feature, feature)
1359
- elif self._allow_masking:
1360
- feature = feature + (self._mask_feature * 0.0)
1361
-
1362
1359
  feature = self._norm(feature)
1363
1360
 
1364
- if not self._allow_reconstruction:
1365
- return tensor.update({'node': {'feature': feature}})
1366
- return tensor.update({'node': {'feature': feature, 'target_feature': feature}})
1361
+ return tensor.update({'node': {'feature': feature}})
1367
1362
 
1368
1363
  def get_config(self) -> dict:
1369
1364
  config = super().get_config()
@@ -1371,8 +1366,6 @@ class NodeEmbedding(GraphLayer):
1371
1366
  'dim': self.dim,
1372
1367
  'normalize': self._normalize,
1373
1368
  'embed_context': self._embed_context,
1374
- 'allow_masking': self._allow_masking,
1375
- 'allow_reconstruction': self._allow_reconstruction,
1376
1369
  })
1377
1370
  return config
1378
1371
 
@@ -1389,39 +1382,30 @@ class EdgeEmbedding(GraphLayer):
1389
1382
  self,
1390
1383
  dim: int = None,
1391
1384
  normalize: bool = False,
1392
- allow_masking: bool = True,
1393
1385
  **kwargs
1394
1386
  ) -> None:
1395
1387
  super().__init__(**kwargs)
1396
1388
  self.dim = dim
1397
1389
  self._normalize = normalize
1398
- self._masking_rate = None
1399
- self._allow_masking = allow_masking
1400
1390
 
1401
1391
  def build(self, spec: tensors.GraphTensor.Spec) -> None:
1402
1392
  feature_dim = spec.edge['feature'].shape[-1]
1403
1393
  if not self.dim:
1404
1394
  self.dim = feature_dim
1405
- self._edge_dense = self.get_dense(self.dim)
1395
+ self._edge_dense = self.get_dense(self.dim)
1396
+
1397
+ self._self_loop_feature = self.get_weight(shape=[self.dim], name='self_loop_edge_feature')
1406
1398
 
1407
1399
  self._has_super = 'super' in spec.edge
1408
- self._has_self_loop = 'self_loop' in spec.edge
1409
1400
  if self._has_super:
1410
1401
  self._super_feature = self.get_weight(shape=[self.dim], name='super_edge_feature')
1411
- if self._has_self_loop:
1412
- self._self_loop_feature = self.get_weight(shape=[self.dim], name='self_loop_edge_feature')
1413
- if self._allow_masking:
1414
- self._mask_feature = self.get_weight(shape=[self.dim], name='mask_edge_feature')
1415
-
1416
- if self._normalize:
1417
- if str(self._normalize).lower().startswith('batch'):
1418
- self._norm = keras.layers.BatchNormalization(
1419
- name='output_batch_norm'
1420
- )
1421
- else:
1422
- self._norm = keras.layers.LayerNormalization(
1423
- name='output_layer_norm'
1424
- )
1402
+
1403
+ if not self._normalize:
1404
+ self._norm = keras.layers.Identity()
1405
+ elif str(self._normalize).lower().startswith('layer'):
1406
+ self._norm = keras.layers.LayerNormalization()
1407
+ else:
1408
+ self._norm = keras.layers.BatchNormalization()
1425
1409
 
1426
1410
  def propagate(self, tensor: tensors.GraphTensor) -> tensors.GraphTensor:
1427
1411
  feature = self._edge_dense(tensor.edge['feature'])
@@ -1430,51 +1414,18 @@ class EdgeEmbedding(GraphLayer):
1430
1414
  super_mask = keras.ops.expand_dims(tensor.edge['super'], 1)
1431
1415
  feature = keras.ops.where(super_mask, self._super_feature, feature)
1432
1416
 
1433
- if self._has_self_loop:
1434
- self_loop_mask = keras.ops.expand_dims(tensor.edge['self_loop'], 1)
1435
- feature = keras.ops.where(self_loop_mask, self._self_loop_feature, feature)
1436
-
1437
- if (
1438
- self._allow_masking and
1439
- self._masking_rate is not None and
1440
- self._masking_rate > 0
1441
- ):
1442
- random = keras.random.uniform(shape=[tensor.num_edges])
1443
- mask = random <= self._masking_rate
1444
- if self._has_super:
1445
- mask = keras.ops.logical_and(
1446
- mask, keras.ops.logical_not(tensor.edge['super'])
1447
- )
1448
- mask = keras.ops.expand_dims(mask, -1)
1449
- feature = keras.ops.where(mask, self._mask_feature, feature)
1450
- elif self._allow_masking:
1451
- # Simply added to silence warning ('no gradients for variables ...')
1452
- feature += (0.0 * self._mask_feature)
1417
+ self_loop_mask = keras.ops.expand_dims(tensor.edge['source'] == tensor.edge['target'], 1)
1418
+ feature = keras.ops.where(self_loop_mask, self._self_loop_feature, feature)
1453
1419
 
1454
- if self._normalize:
1455
- feature = self._norm(feature)
1456
-
1457
- return tensor.update({'edge': {'feature': feature, 'embedding': feature}})
1420
+ feature = self._norm(feature)
1458
1421
 
1459
- @property
1460
- def masking_rate(self):
1461
- return self._masking_rate
1462
-
1463
- @masking_rate.setter
1464
- def masking_rate(self, rate: float):
1465
- if not self._allow_masking and rate is not None:
1466
- raise ValueError(
1467
- f'Cannot set `masking_rate` for layer {self} '
1468
- 'as `allow_masking` was set to `False`.'
1469
- )
1470
- self._masking_rate = float(rate)
1422
+ return tensor.update({'edge': {'feature': feature}})
1471
1423
 
1472
1424
  def get_config(self) -> dict:
1473
1425
  config = super().get_config()
1474
1426
  config.update({
1475
1427
  'dim': self.dim,
1476
1428
  'normalize': self._normalize,
1477
- 'allow_masking': self._allow_masking
1478
1429
  })
1479
1430
  return config
1480
1431
 
molcraft/ops.py CHANGED
@@ -105,7 +105,11 @@ def segment_mean(
105
105
  lambda: 0
106
106
  )
107
107
  if backend.backend() == 'tensorflow':
108
- return tf.math.unsorted_segment_mean(
108
+ segment_mean_fn = (
109
+ tf.math.unsorted_segment_mean if not sorted else
110
+ tf.math.segment_mean
111
+ )
112
+ return segment_mean_fn(
109
113
  data=data,
110
114
  segment_ids=segment_ids,
111
115
  num_segments=num_segments
molcraft/records.py CHANGED
@@ -51,19 +51,24 @@ def write(
51
51
  if num_files is None:
52
52
  num_files = min(len(inputs), max(1, math.ceil(len(inputs) / 1_000)))
53
53
 
54
- chunk_size = math.ceil(len(inputs) / num_files)
55
- num_files = math.ceil(len(inputs) / chunk_size)
54
+ num_examples = len(inputs)
55
+ chunk_sizes = [0] * num_files
56
+ for i in range(num_examples):
57
+ chunk_sizes[i % num_files] += 1
58
+
59
+ input_chunks = []
60
+ current_index = 0
61
+ for size in chunk_sizes:
62
+ input_chunks.append(inputs[current_index: current_index + size])
63
+ current_index += size
64
+
65
+ assert current_index == num_examples
56
66
 
57
67
  paths = [
58
68
  os.path.join(path, f'tfrecord-{i:04d}.tfrecord')
59
69
  for i in range(num_files)
60
70
  ]
61
71
 
62
- input_chunks = [
63
- inputs[i * chunk_size: (i + 1) * chunk_size]
64
- for i in range(num_files)
65
- ]
66
-
67
72
  if not multiprocessing:
68
73
  for path, input_chunk in zip(paths, input_chunks):
69
74
  _write_tfrecord(input_chunk, path, featurizer)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: molcraft
3
- Version: 0.1.0a8
3
+ Version: 0.1.0a10
4
4
  Summary: Graph Neural Networks for Molecular Machine Learning
5
5
  Author-email: Alexander Kensert <alexander.kensert@gmail.com>
6
6
  License: MIT License
@@ -25,7 +25,7 @@ License: MIT License
25
25
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
26
  SOFTWARE.
27
27
 
28
- Project-URL: Homepage, https://github.com/akensert/molcraft
28
+ Project-URL: Homepage, https://github.com/compomics/molcraft
29
29
  Keywords: python,machine-learning,deep-learning,graph-neural-networks,molecular-machine-learning,molecular-graphs,computational-chemistry,computational-biology
30
30
  Classifier: Programming Language :: Python :: 3
31
31
  Classifier: Intended Audience :: Science/Research
@@ -0,0 +1,19 @@
1
+ molcraft/__init__.py,sha256=1te1sOK-k4LT9l-mlxRmOhY3_Za-7jPezx_B3gdToiQ,464
2
+ molcraft/callbacks.py,sha256=x5HnkZhqcFRrW6xdApt_jZ4X08A-0fxcnFKfdmRKa0c,3571
3
+ molcraft/chem.py,sha256=zHH7iX0ZJ7QmP-YqR_IXCpylTwCXHXptWf1DsblnZR4,21496
4
+ molcraft/conformers.py,sha256=K6ZtiSUNDN_fwqGP9JrPcwALLFFvlMlF_XejEJH3Sr4,4205
5
+ molcraft/datasets.py,sha256=rFgXTC1ZheLhfgQgcCspP_wEE54a33PIneH7OplbS-8,4047
6
+ molcraft/descriptors.py,sha256=gKqlJ3BqJLTeR2ft8isftSEaJDC8cv64eTq5IYhy4XM,3032
7
+ molcraft/features.py,sha256=aBYxDfQqQsVuyjKaPUlwEgvCjbNZ-FJhuKo2Cg5ajrA,13554
8
+ molcraft/featurizers.py,sha256=ybJ1djH747cgsftztWHxAX2iTq6k03MYr17btQ2Gtcs,27063
9
+ molcraft/layers.py,sha256=G-ZFhnyiSny0YHGXg5tBYrvmhZsurBEJj_0mHD1zmlw,60135
10
+ molcraft/losses.py,sha256=JEKZEX2f8vDgky_fUocsF8vZjy9VMzRjZUBa20Uf9Qw,1065
11
+ molcraft/models.py,sha256=FLXpO3OUmRxLmxG3MjBK4ZwcVFlea1gqEgs1ibKly2w,23263
12
+ molcraft/ops.py,sha256=PVxKfY_XbWCyntiSnmpyeBb-coFGT_VNNP9QzmeUwC0,4870
13
+ molcraft/records.py,sha256=MbvYkcCunbAmpy_MWXmQ9WBGi2WvwxFUlwQSPKPvSSk,5534
14
+ molcraft/tensors.py,sha256=EOUKx496KUZsjA1zA2ABc7tU_TW3Jv7AXDsug_QsLbA,22407
15
+ molcraft-0.1.0a10.dist-info/licenses/LICENSE,sha256=sbVeqlrtZ0V63uYhZGL5dCxUm8rBAOqe2avyA1zIQNk,1074
16
+ molcraft-0.1.0a10.dist-info/METADATA,sha256=Tmh4KckmdKr20q8RVPOKlogt343qTdOMzci6zgT6CfQ,4064
17
+ molcraft-0.1.0a10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ molcraft-0.1.0a10.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
19
+ molcraft-0.1.0a10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,19 +0,0 @@
1
- molcraft/__init__.py,sha256=s8dUh6Fjq34j2aNgF13Y2NUkDwBWmsOAuIJVgY3gwCE,463
2
- molcraft/callbacks.py,sha256=x5HnkZhqcFRrW6xdApt_jZ4X08A-0fxcnFKfdmRKa0c,3571
3
- molcraft/chem.py,sha256=zHH7iX0ZJ7QmP-YqR_IXCpylTwCXHXptWf1DsblnZR4,21496
4
- molcraft/conformers.py,sha256=K6ZtiSUNDN_fwqGP9JrPcwALLFFvlMlF_XejEJH3Sr4,4205
5
- molcraft/datasets.py,sha256=rFgXTC1ZheLhfgQgcCspP_wEE54a33PIneH7OplbS-8,4047
6
- molcraft/descriptors.py,sha256=gKqlJ3BqJLTeR2ft8isftSEaJDC8cv64eTq5IYhy4XM,3032
7
- molcraft/features.py,sha256=aBYxDfQqQsVuyjKaPUlwEgvCjbNZ-FJhuKo2Cg5ajrA,13554
8
- molcraft/featurizers.py,sha256=qNmXSOAeplICN3j-nzvWACVuKoJ_ZBzhYP9LterKVH8,27042
9
- molcraft/layers.py,sha256=KKaH58zuov5aARj72BS_xK3ZQEwSFJrIPkoXQAAcqz8,62285
10
- molcraft/losses.py,sha256=JEKZEX2f8vDgky_fUocsF8vZjy9VMzRjZUBa20Uf9Qw,1065
11
- molcraft/models.py,sha256=FLXpO3OUmRxLmxG3MjBK4ZwcVFlea1gqEgs1ibKly2w,23263
12
- molcraft/ops.py,sha256=dLIUq-KG8nOzEcphJqNbF_f82VZRDNrB1UKrcPt5JNM,4752
13
- molcraft/records.py,sha256=0sjOdcr266ZER4F-aTBQ3AVPNAwflKWNiNJVsSc1-PQ,5370
14
- molcraft/tensors.py,sha256=EOUKx496KUZsjA1zA2ABc7tU_TW3Jv7AXDsug_QsLbA,22407
15
- molcraft-0.1.0a8.dist-info/licenses/LICENSE,sha256=sbVeqlrtZ0V63uYhZGL5dCxUm8rBAOqe2avyA1zIQNk,1074
16
- molcraft-0.1.0a8.dist-info/METADATA,sha256=CtHK0DVlQECWUdlhg0KzvvpPyUD150BSyfzkdNF3fT8,4062
17
- molcraft-0.1.0a8.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
18
- molcraft-0.1.0a8.dist-info/top_level.txt,sha256=dENV6MfOceshM6MQCgJlcN1ojZkiCL9B4F7XyUge3QM,9
19
- molcraft-0.1.0a8.dist-info/RECORD,,