bayesianflow-for-chem 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bayesianflow-for-chem might be problematic. Click here for more details.

@@ -7,5 +7,5 @@ from . import data, tool, train, scorer
7
7
  from .model import ChemBFN, MLP, EnsembleChemBFN
8
8
 
9
9
  __all__ = ["data", "tool", "train", "scorer", "ChemBFN", "MLP", "EnsembleChemBFN"]
10
- __version__ = "1.4.1"
10
+ __version__ = "1.4.3"
11
11
  __author__ = "Nianze A. Tao (Omozawa Sueno)"
@@ -55,8 +55,16 @@ class Linear(nn.Linear):
55
55
  :rtype: None
56
56
  """
57
57
  assert r > 0, "Rank should be larger than 0."
58
- self.lora_A = nn.Parameter(self.weight.new_zeros((r, self.in_features)))
59
- self.lora_B = nn.Parameter(self.weight.new_zeros((self.out_features, r)))
58
+ try:
59
+ self.lora_A = nn.Parameter(self.weight.new_zeros((r, self.in_features)))
60
+ self.lora_B = nn.Parameter(self.weight.new_zeros((self.out_features, r)))
61
+ except NotImplementedError:
62
+ self.lora_A = nn.Parameter(
63
+ torch.zeros((r, self.in_features), device=self.weight.device)
64
+ )
65
+ self.lora_B = nn.Parameter(
66
+ torch.zeros((self.out_features, r), device=self.weight.device)
67
+ )
60
68
  self.scaling = lora_alpha / r
61
69
  self.lora_dropout = lora_dropout
62
70
  self.lora_enabled = True
@@ -1207,23 +1215,23 @@ class EnsembleChemBFN(ChemBFN):
1207
1215
  )
1208
1216
 
1209
1217
  def quantise(
1210
- self, quantise_method: Optional[Callable[[ChemBFN], nn.Module]] = None
1218
+ self, quantise_method: Optional[Callable[[ChemBFN], None]] = None
1211
1219
  ) -> None:
1212
1220
  """
1213
1221
  Quantise the submodels. \n
1214
1222
  This method should be called, if necessary, before `torch.compile()`.
1215
1223
 
1216
- :param quantise_method: quantisation method; default is `bayesianflow_for_chem.tool.quantise_model`
1224
+ :param quantise_method: quantisation method; default is `bayesianflow_for_chem.tool.quantise_model_`
1217
1225
  :type quantise_method: callable | None
1218
1226
  :return:
1219
1227
  :rtype: None
1220
1228
  """
1221
1229
  if quantise_method is None:
1222
- from bayesianflow_for_chem.tool import quantise_model
1230
+ from bayesianflow_for_chem.tool import quantise_model_
1223
1231
 
1224
- quantise_method = quantise_model
1225
- for k, v in self.models.items():
1226
- self.models[k] = quantise_method(v)
1232
+ quantise_method = quantise_model_
1233
+ for _, v in self.models.items():
1234
+ quantise_method(v)
1227
1235
 
1228
1236
  def jit(self, freeze: bool = False) -> None:
1229
1237
  """
@@ -5,10 +5,12 @@ Define essential scorers.
5
5
  """
6
6
  from typing import List, Callable, Union, Optional
7
7
  import torch
8
+ import numpy as np
8
9
  from torch import Tensor
9
10
  from rdkit import RDLogger
10
11
  from rdkit.Contrib.SA_Score import sascorer # type: ignore
11
12
  from rdkit.Chem import MolFromSmiles, QED
13
+ from scipy.stats import wasserstein_distance
12
14
 
13
15
  RDLogger.DisableLog("rdApp.*") # type: ignore
14
16
 
@@ -49,6 +51,27 @@ def sa_score(smiles: str) -> float:
49
51
  return sascorer.calculateScore(MolFromSmiles(smiles))
50
52
 
51
53
 
54
+ def spectra_wasserstein_score(
55
+ spectrum_u: np.ndarray, spectrum_v: np.ndarray, x_axis: np.ndarray
56
+ ) -> float:
57
+ """
58
+ Return the Wasserstein distance (earth mover's distance) between two
59
+ continuous spectra scaled by the area under the first spectrum curve `spectrum_u`.
60
+
61
+ :param spectrum_u: the reference spectrum
62
+ :param spectrum_v: the
63
+ :param x_axis: the shared x-axis of the spectra
64
+ :type spectrum_u: numpy.ndarray
65
+ :type spectrum_v: numpy.ndarray
66
+ :type x_axis: numpy.ndarray
67
+ :return: spectra Wasserstein score
68
+ :rtype: float
69
+ """
70
+ assert spectrum_u.size == spectrum_v.size, "Spectra sizes should be matched."
71
+ a = np.sqrt(np.trapezoid(spectrum_u, x_axis))
72
+ return (wasserstein_distance(spectrum_u, spectrum_v) / a).item()
73
+
74
+
52
75
  class Scorer:
53
76
  def __init__(
54
77
  self,
@@ -126,7 +149,7 @@ class Scorer:
126
149
  ]
127
150
  for i, scorer in enumerate(self.scorers)
128
151
  ]
129
- loss = (e_k * p).sum(2).mean(1) * torch.tensor(scores, device=p.device).mean(0)
152
+ loss = (e_k * p).sum(2).mean(1) * p.new_tensor(scores).mean(0)
130
153
  return loss.mean()
131
154
 
132
155
 
@@ -6,16 +6,12 @@ Essential tools.
6
6
  import csv
7
7
  import random
8
8
  import warnings
9
- from copy import deepcopy
10
9
  from pathlib import Path
11
10
  from typing import List, Dict, Tuple, Union, Optional
12
11
  import torch
13
12
  import numpy as np
14
- import torch.nn as nn
15
13
  from torch import cuda, Tensor, softmax
16
- from torch.ao import quantization
17
14
  from torch.utils.data import DataLoader
18
- from typing_extensions import Self
19
15
  from rdkit.Chem import (
20
16
  rdDetermineBonds,
21
17
  GetFormalCharge,
@@ -37,7 +33,7 @@ from sklearn.metrics import (
37
33
  root_mean_squared_error,
38
34
  )
39
35
  from .data import VOCAB_KEYS
40
- from .model import ChemBFN, MLP, Linear, EnsembleChemBFN
36
+ from .model import ChemBFN, MLP, EnsembleChemBFN
41
37
 
42
38
 
43
39
  def _find_device() -> torch.device:
@@ -386,145 +382,46 @@ def inpaint(
386
382
  ]
387
383
 
388
384
 
389
- def quantise_model(model: ChemBFN) -> nn.Module:
385
+ def quantise_model_(model: ChemBFN) -> None:
390
386
  """
391
- Dynamic quantisation of the trained model to `torch.qint8` data type.
387
+ In-place dynamic quantisation of the trained model to `int8` data type. \n
388
+ Due to some limitations of `torchao` module, not all layers will be quantised.
392
389
 
393
390
  :param model: trained ChemBFN model
394
391
  :type model: bayesianflow_for_chem.model.ChemBFN
395
- :return: quantised model
396
- :rtype: torch.nn.Module
392
+ :return:
393
+ :rtype: None
397
394
  """
398
- from torch.ao.nn.quantized import dynamic
399
- from torch.ao.nn.quantized.modules.utils import _quantize_weight
400
- from torch.ao.quantization.qconfig import default_dynamic_qconfig
401
-
402
- class QuantisedLinear(dynamic.Linear):
403
- # Modified from https://github.com/pytorch/pytorch/blob/main/torch/ao/nn/quantized/dynamic/modules/linear.py
404
- # We made it compatible with our LoRA linear layer.
405
- # LoRA parameters will not be quantised.
406
- def __init__(
407
- self,
408
- in_features: int,
409
- out_features: int,
410
- bias_: bool = True,
411
- dtype: torch.dtype = torch.qint8,
412
- ) -> None:
413
- super().__init__(in_features, out_features, bias_, dtype=dtype)
414
- self.version = self._version
415
- self.lora_enabled: bool = False
416
- self.lora_A: Optional[nn.Parameter] = None
417
- self.lora_B: Optional[nn.Parameter] = None
418
- self.scaling: Optional[float] = None
419
- self.lora_dropout: Optional[float] = None
420
-
421
- def _get_name(self) -> str:
422
- return "DynamicQuantizedLoRALinear"
423
-
424
- def enable_lora(
425
- self, r: int = 8, lora_alpha: int = 1, lora_dropout: float = 0.0
426
- ) -> None:
427
- assert r > 0, "Rank should be larger than 0."
428
- device = self._weight_bias()[0].device
429
- self.lora_A = nn.Parameter(
430
- torch.zeros((r, self.in_features), device=device)
431
- )
432
- self.lora_B = nn.Parameter(
433
- torch.zeros((self.out_features, r), device=device)
434
- )
435
- self.scaling = lora_alpha / r
436
- self.lora_dropout = lora_dropout
437
- self.lora_enabled = True
438
- nn.init.kaiming_uniform_(self.lora_A, a=5**0.5)
439
- nn.init.zeros_(self.lora_B)
440
- self._packed_params.requires_grad_(False)
441
-
442
- def forward(self, x: Tensor) -> Tensor:
443
- if self._packed_params.dtype == torch.qint8:
444
- if self.version is None or self.version < 4:
445
- Y = torch.ops.quantized.linear_dynamic(
446
- x, self._packed_params._packed_params
447
- )
448
- else:
449
- Y = torch.ops.quantized.linear_dynamic(
450
- x, self._packed_params._packed_params, reduce_range=True
451
- )
452
- elif self._packed_params.dtype == torch.float16:
453
- Y = torch.ops.quantized.linear_dynamic_fp16(
454
- x, self._packed_params._packed_params
455
- )
456
- else:
457
- raise RuntimeError("Unsupported dtype on dynamic quantized linear!")
458
- result = Y.to(x.dtype)
459
- if self.lora_enabled and isinstance(self.lora_dropout, float):
460
- result += (
461
- nn.functional.dropout(x, self.lora_dropout, self.training)
462
- @ self.lora_A.transpose(0, 1)
463
- @ self.lora_B.transpose(0, 1)
464
- ) * self.scaling
465
- return result
395
+ from torchao.quantization.quant_api import (
396
+ quantize_,
397
+ Int8DynamicActivationInt8WeightConfig,
398
+ )
466
399
 
467
- @classmethod
468
- def from_float(
469
- cls, mod: Linear, use_precomputed_fake_quant: bool = False
470
- ) -> Self:
471
- assert hasattr(
472
- mod, "qconfig"
473
- ), "Input float module must have qconfig defined"
474
- if use_precomputed_fake_quant:
475
- warnings.warn("Fake quantize operator is not implemented.")
476
- if mod.qconfig is not None and mod.qconfig.weight is not None:
477
- weight_observer = mod.qconfig.weight()
478
- else:
479
- weight_observer = default_dynamic_qconfig.weight()
480
- dtype = weight_observer.dtype
481
- assert dtype in [torch.qint8, torch.float16], (
482
- "The only supported dtypes for "
483
- f"dynamic quantized linear are qint8 and float16 got: {dtype}"
484
- )
485
- weight_observer(mod.weight)
486
- if dtype == torch.qint8:
487
- qweight = _quantize_weight(mod.weight.float(), weight_observer)
488
- elif dtype == torch.float16:
489
- qweight = mod.weight.float()
490
- else:
491
- raise RuntimeError(
492
- "Unsupported dtype specified for dynamic quantized Linear!"
493
- )
494
- qlinear = cls(mod.in_features, mod.out_features, dtype=dtype)
495
- qlinear.set_weight_bias(qweight, mod.bias)
496
- if mod.lora_enabled:
497
- qlinear.lora_enabled = True
498
- qlinear.lora_A = nn.Parameter(mod.lora_A.clone().detach_())
499
- qlinear.lora_B = nn.Parameter(mod.lora_B.clone().detach_())
500
- qlinear.scaling = deepcopy(mod.scaling)
501
- qlinear.lora_dropout = deepcopy(mod.lora_dropout)
502
- return qlinear
400
+ quantize_(model, Int8DynamicActivationInt8WeightConfig())
503
401
 
504
- @classmethod
505
- def from_reference(cls, ref_qlinear: Self) -> Self:
506
- qlinear = cls(
507
- ref_qlinear.in_features,
508
- ref_qlinear.out_features,
509
- dtype=ref_qlinear.weight_dtype,
510
- )
511
- qweight = ref_qlinear.get_quantized_weight()
512
- bias = ref_qlinear.bias
513
- qlinear.set_weight_bias(qweight, bias)
514
- if ref_qlinear.lora_enabled:
515
- qlinear.lora_enabled = True
516
- qlinear.lora_A = nn.Parameter(ref_qlinear.lora_A.clone().detach_())
517
- qlinear.lora_B = nn.Parameter(ref_qlinear.lora_B.clone().detach_())
518
- qlinear.scaling = deepcopy(ref_qlinear.scaling)
519
- qlinear.lora_dropout = deepcopy(ref_qlinear.lora_dropout)
520
- return qlinear
521
402
 
522
- mapping = deepcopy(quantization.DEFAULT_DYNAMIC_QUANT_MODULE_MAPPINGS)
523
- mapping[Linear] = QuantisedLinear
524
- quantised_model = quantization.quantize_dynamic(
525
- model, {nn.Linear, Linear}, torch.qint8, mapping
526
- )
527
- return quantised_model
403
+ def build_uv_vis_sepctrum(
404
+ etoscs: np.ndarray, etenergies: np.ndarray, lambdas: np.ndarray
405
+ ) -> np.ndarray:
406
+ """
407
+ Build UV/Vis spectrum from calculated electron transtion energies and oscillator strengths. \n
408
+ This function follows the GaussView style: https://gaussian.com/uvvisplot/.
409
+
410
+ :param etoscs: oscillator strengths
411
+ :param etenergies: transtion energies
412
+ :param lambdas: wavelengths
413
+ :type etoscs: numpy.ndarray
414
+ :type etenergies: numpy.ndarray
415
+ :type lambdas: numpy.ndarray
416
+ :return: absorption coefficient corrospending to the wavelengths
417
+ :rtype: numpy.ndarray
418
+ """
419
+ return (
420
+ etoscs[:, None]
421
+ * np.exp(
422
+ -np.pow((1 / lambdas[None, :] - etenergies[:, None] / 45.5634) * 3099.6, 2)
423
+ )
424
+ ).sum(0) * 40489.99421
528
425
 
529
426
 
530
427
  class GeometryConverter:
@@ -543,7 +440,7 @@ class GeometryConverter:
543
440
  @staticmethod
544
441
  def smiles2cartesian(
545
442
  smiles: str,
546
- num_conformers: int = 50,
443
+ num_conformers: int = 250,
547
444
  rdkit_ff_type: str = "MMFF",
548
445
  refine_with_crest: bool = False,
549
446
  spin: float = 0.0,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bayesianflow_for_chem
3
- Version: 1.4.1
3
+ Version: 1.4.3
4
4
  Summary: Bayesian flow network framework for Chemistry
5
5
  Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
6
6
  Author: Nianze A. Tao
@@ -21,13 +21,15 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
21
  Requires-Python: >=3.9
22
22
  Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
- Requires-Dist: rdkit>=2023.9.6
25
- Requires-Dist: torch>=2.3.1
26
- Requires-Dist: numpy>=1.26.4
24
+ Requires-Dist: rdkit>=2025.3.5
25
+ Requires-Dist: torch>=2.8.0
26
+ Requires-Dist: torchao>=0.12
27
+ Requires-Dist: numpy>=2.3.2
28
+ Requires-Dist: scipy>=1.16.1
27
29
  Requires-Dist: loralib>=0.1.2
28
- Requires-Dist: lightning>=2.2.0
29
- Requires-Dist: scikit-learn>=1.5.0
30
- Requires-Dist: typing_extensions>=4.8.0
30
+ Requires-Dist: lightning>=2.5.3
31
+ Requires-Dist: scikit-learn>=1.7.1
32
+ Requires-Dist: typing_extensions>=4.14.1
31
33
  Dynamic: author
32
34
  Dynamic: author-email
33
35
  Dynamic: classifier
@@ -0,0 +1,12 @@
1
+ bayesianflow_for_chem/__init__.py,sha256=lcVwcAWdsmdf8cFIromTsT2Hhn4hIBRlA47kCWWeHm0,329
2
+ bayesianflow_for_chem/data.py,sha256=WoOCOVmJX4WeHa2WeO4i66J2FS8rvRaYRCdlBN7ZeOM,6576
3
+ bayesianflow_for_chem/model.py,sha256=CIcFruMFg5IbaVRkc5geygcEV7LiBEN7X_2dqUqJi8w,50489
4
+ bayesianflow_for_chem/scorer.py,sha256=i-1y6pd22n7ExzdXozCjOTXbEIJTkz_6Bb4g0RA5kQI,4928
5
+ bayesianflow_for_chem/tool.py,sha256=AhW_dXuWEe7vl2epw_EZdEpmAYRd_3NMOQTuC25oHio,20558
6
+ bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
7
+ bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
8
+ bayesianflow_for_chem-1.4.3.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
9
+ bayesianflow_for_chem-1.4.3.dist-info/METADATA,sha256=Xvq_QOy7kk17IonxrE5onkJrgvWQ-Atcs4zCODpxTXI,5703
10
+ bayesianflow_for_chem-1.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ bayesianflow_for_chem-1.4.3.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
12
+ bayesianflow_for_chem-1.4.3.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- bayesianflow_for_chem/__init__.py,sha256=N_7P9Ea0eUmdC0wQKXIHiuMzPK4p9_cBF_YOexjo5yo,329
2
- bayesianflow_for_chem/data.py,sha256=WoOCOVmJX4WeHa2WeO4i66J2FS8rvRaYRCdlBN7ZeOM,6576
3
- bayesianflow_for_chem/model.py,sha256=zJkcUnZcxFa4iTo9_-BHzAM1MkJm1pbEGiczVgyUcxo,50186
4
- bayesianflow_for_chem/scorer.py,sha256=7G1TVSwC0qONtNm6kiDZUWwvuFPzasNSjp4eJAk5TL0,4101
5
- bayesianflow_for_chem/tool.py,sha256=Ma4dEBWP5GFKk-Tj5vBJgxGs_WAp4F87-b1UcsqUAn4,25486
6
- bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
7
- bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
8
- bayesianflow_for_chem-1.4.1.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
9
- bayesianflow_for_chem-1.4.1.dist-info/METADATA,sha256=460yUOjHG9PTavIddJJ2Ufdq0bkLBZqbmMugyq6LVPQ,5643
10
- bayesianflow_for_chem-1.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- bayesianflow_for_chem-1.4.1.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
12
- bayesianflow_for_chem-1.4.1.dist-info/RECORD,,