bayesianflow-for-chem 1.4.2__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bayesianflow-for-chem might be problematic. Click here for more details.

@@ -7,5 +7,5 @@ from . import data, tool, train, scorer
7
7
  from .model import ChemBFN, MLP, EnsembleChemBFN
8
8
 
9
9
  __all__ = ["data", "tool", "train", "scorer", "ChemBFN", "MLP", "EnsembleChemBFN"]
10
- __version__ = "1.4.2"
10
+ __version__ = "1.4.3"
11
11
  __author__ = "Nianze A. Tao (Omozawa Sueno)"
@@ -54,19 +54,17 @@ class Linear(nn.Linear):
54
54
  :return:
55
55
  :rtype: None
56
56
  """
57
- from torchao.dtypes.affine_quantized_tensor import AffineQuantizedTensor
58
-
59
57
  assert r > 0, "Rank should be larger than 0."
60
- if isinstance(self.weight, AffineQuantizedTensor):
58
+ try:
59
+ self.lora_A = nn.Parameter(self.weight.new_zeros((r, self.in_features)))
60
+ self.lora_B = nn.Parameter(self.weight.new_zeros((self.out_features, r)))
61
+ except NotImplementedError:
61
62
  self.lora_A = nn.Parameter(
62
63
  torch.zeros((r, self.in_features), device=self.weight.device)
63
64
  )
64
65
  self.lora_B = nn.Parameter(
65
66
  torch.zeros((self.out_features, r), device=self.weight.device)
66
67
  )
67
- else:
68
- self.lora_A = nn.Parameter(self.weight.new_zeros((r, self.in_features)))
69
- self.lora_B = nn.Parameter(self.weight.new_zeros((self.out_features, r)))
70
68
  self.scaling = lora_alpha / r
71
69
  self.lora_dropout = lora_dropout
72
70
  self.lora_enabled = True
@@ -5,10 +5,12 @@ Define essential scorers.
5
5
  """
6
6
  from typing import List, Callable, Union, Optional
7
7
  import torch
8
+ import numpy as np
8
9
  from torch import Tensor
9
10
  from rdkit import RDLogger
10
11
  from rdkit.Contrib.SA_Score import sascorer # type: ignore
11
12
  from rdkit.Chem import MolFromSmiles, QED
13
+ from scipy.stats import wasserstein_distance
12
14
 
13
15
  RDLogger.DisableLog("rdApp.*") # type: ignore
14
16
 
@@ -49,6 +51,27 @@ def sa_score(smiles: str) -> float:
49
51
  return sascorer.calculateScore(MolFromSmiles(smiles))
50
52
 
51
53
 
54
+ def spectra_wasserstein_score(
55
+ spectrum_u: np.ndarray, spectrum_v: np.ndarray, x_axis: np.ndarray
56
+ ) -> float:
57
+ """
58
+ Return the Wasserstein distance (earth mover's distance) between two
59
+ continuous spectra scaled by the area under the first spectrum curve `spectrum_u`.
60
+
61
+ :param spectrum_u: the reference spectrum
62
+ :param spectrum_v: the
63
+ :param x_axis: the shared x-axis of the spectra
64
+ :type spectrum_u: numpy.ndarray
65
+ :type spectrum_v: numpy.ndarray
66
+ :type x_axis: numpy.ndarray
67
+ :return: spectra Wasserstein score
68
+ :rtype: float
69
+ """
70
+ assert spectrum_u.size == spectrum_v.size, "Spectra sizes should be matched."
71
+ a = np.sqrt(np.trapezoid(spectrum_u, x_axis))
72
+ return (wasserstein_distance(spectrum_u, spectrum_v) / a).item()
73
+
74
+
52
75
  class Scorer:
53
76
  def __init__(
54
77
  self,
@@ -126,7 +149,7 @@ class Scorer:
126
149
  ]
127
150
  for i, scorer in enumerate(self.scorers)
128
151
  ]
129
- loss = (e_k * p).sum(2).mean(1) * torch.tensor(scores, device=p.device).mean(0)
152
+ loss = (e_k * p).sum(2).mean(1) * p.new_tensor(scores).mean(0)
130
153
  return loss.mean()
131
154
 
132
155
 
@@ -6,15 +6,12 @@ Essential tools.
6
6
  import csv
7
7
  import random
8
8
  import warnings
9
- from copy import deepcopy
10
9
  from pathlib import Path
11
10
  from typing import List, Dict, Tuple, Union, Optional
12
11
  import torch
13
12
  import numpy as np
14
- import torch.nn as nn
15
13
  from torch import cuda, Tensor, softmax
16
14
  from torch.utils.data import DataLoader
17
- from typing_extensions import Self, deprecated
18
15
  from rdkit.Chem import (
19
16
  rdDetermineBonds,
20
17
  GetFormalCharge,
@@ -36,7 +33,7 @@ from sklearn.metrics import (
36
33
  root_mean_squared_error,
37
34
  )
38
35
  from .data import VOCAB_KEYS
39
- from .model import ChemBFN, MLP, Linear, EnsembleChemBFN
36
+ from .model import ChemBFN, MLP, EnsembleChemBFN
40
37
 
41
38
 
42
39
  def _find_device() -> torch.device:
@@ -385,157 +382,10 @@ def inpaint(
385
382
  ]
386
383
 
387
384
 
388
- @deprecated(
389
- "Eager mode quantization from `torch.ao` is deprecated and will be remove in version 2.10, "
390
- "so this fuction will stop working since that time. "
391
- "Please use `quantise_model_` instead."
392
- )
393
- def quantise_model(model: ChemBFN) -> nn.Module:
394
- """
395
- Dynamic quantisation of the trained model to `torch.qint8` data type.
396
-
397
- :param model: trained ChemBFN model
398
- :type model: bayesianflow_for_chem.model.ChemBFN
399
- :return: quantised model
400
- :rtype: torch.nn.Module
401
- """
402
- from torch.ao import quantization
403
- from torch.ao.nn.quantized import dynamic
404
- from torch.ao.nn.quantized.modules.utils import _quantize_weight
405
- from torch.ao.quantization.qconfig import default_dynamic_qconfig
406
-
407
- class QuantisedLinear(dynamic.Linear):
408
- # Modified from https://github.com/pytorch/pytorch/blob/main/torch/ao/nn/quantized/dynamic/modules/linear.py
409
- # We made it compatible with our LoRA linear layer.
410
- # LoRA parameters will not be quantised.
411
- def __init__(
412
- self,
413
- in_features: int,
414
- out_features: int,
415
- bias_: bool = True,
416
- dtype: torch.dtype = torch.qint8,
417
- ) -> None:
418
- super().__init__(in_features, out_features, bias_, dtype=dtype)
419
- self.version = self._version
420
- self.lora_enabled: bool = False
421
- self.lora_A: Optional[nn.Parameter] = None
422
- self.lora_B: Optional[nn.Parameter] = None
423
- self.scaling: Optional[float] = None
424
- self.lora_dropout: Optional[float] = None
425
-
426
- def _get_name(self) -> str:
427
- return "DynamicQuantizedLoRALinear"
428
-
429
- def enable_lora(
430
- self, r: int = 8, lora_alpha: int = 1, lora_dropout: float = 0.0
431
- ) -> None:
432
- assert r > 0, "Rank should be larger than 0."
433
- device = self._weight_bias()[0].device
434
- self.lora_A = nn.Parameter(
435
- torch.zeros((r, self.in_features), device=device)
436
- )
437
- self.lora_B = nn.Parameter(
438
- torch.zeros((self.out_features, r), device=device)
439
- )
440
- self.scaling = lora_alpha / r
441
- self.lora_dropout = lora_dropout
442
- self.lora_enabled = True
443
- nn.init.kaiming_uniform_(self.lora_A, a=5**0.5)
444
- nn.init.zeros_(self.lora_B)
445
- self._packed_params.requires_grad_(False)
446
-
447
- def forward(self, x: Tensor) -> Tensor:
448
- if self._packed_params.dtype == torch.qint8:
449
- if self.version is None or self.version < 4:
450
- Y = torch.ops.quantized.linear_dynamic(
451
- x, self._packed_params._packed_params
452
- )
453
- else:
454
- Y = torch.ops.quantized.linear_dynamic(
455
- x, self._packed_params._packed_params, reduce_range=True
456
- )
457
- elif self._packed_params.dtype == torch.float16:
458
- Y = torch.ops.quantized.linear_dynamic_fp16(
459
- x, self._packed_params._packed_params
460
- )
461
- else:
462
- raise RuntimeError("Unsupported dtype on dynamic quantized linear!")
463
- result = Y.to(x.dtype)
464
- if self.lora_enabled and isinstance(self.lora_dropout, float):
465
- result += (
466
- nn.functional.dropout(x, self.lora_dropout, self.training)
467
- @ self.lora_A.transpose(0, 1)
468
- @ self.lora_B.transpose(0, 1)
469
- ) * self.scaling
470
- return result
471
-
472
- @classmethod
473
- def from_float(
474
- cls, mod: Linear, use_precomputed_fake_quant: bool = False
475
- ) -> Self:
476
- assert hasattr(
477
- mod, "qconfig"
478
- ), "Input float module must have qconfig defined"
479
- if use_precomputed_fake_quant:
480
- warnings.warn("Fake quantize operator is not implemented.")
481
- if mod.qconfig is not None and mod.qconfig.weight is not None:
482
- weight_observer = mod.qconfig.weight()
483
- else:
484
- weight_observer = default_dynamic_qconfig.weight()
485
- dtype = weight_observer.dtype
486
- assert dtype in [torch.qint8, torch.float16], (
487
- "The only supported dtypes for "
488
- f"dynamic quantized linear are qint8 and float16 got: {dtype}"
489
- )
490
- weight_observer(mod.weight)
491
- if dtype == torch.qint8:
492
- qweight = _quantize_weight(mod.weight.float(), weight_observer)
493
- elif dtype == torch.float16:
494
- qweight = mod.weight.float()
495
- else:
496
- raise RuntimeError(
497
- "Unsupported dtype specified for dynamic quantized Linear!"
498
- )
499
- qlinear = cls(mod.in_features, mod.out_features, dtype=dtype)
500
- qlinear.set_weight_bias(qweight, mod.bias)
501
- if mod.lora_enabled:
502
- qlinear.lora_enabled = True
503
- qlinear.lora_A = nn.Parameter(mod.lora_A.clone().detach_())
504
- qlinear.lora_B = nn.Parameter(mod.lora_B.clone().detach_())
505
- qlinear.scaling = deepcopy(mod.scaling)
506
- qlinear.lora_dropout = deepcopy(mod.lora_dropout)
507
- return qlinear
508
-
509
- @classmethod
510
- def from_reference(cls, ref_qlinear: Self) -> Self:
511
- qlinear = cls(
512
- ref_qlinear.in_features,
513
- ref_qlinear.out_features,
514
- dtype=ref_qlinear.weight_dtype,
515
- )
516
- qweight = ref_qlinear.get_quantized_weight()
517
- bias = ref_qlinear.bias
518
- qlinear.set_weight_bias(qweight, bias)
519
- if ref_qlinear.lora_enabled:
520
- qlinear.lora_enabled = True
521
- qlinear.lora_A = nn.Parameter(ref_qlinear.lora_A.clone().detach_())
522
- qlinear.lora_B = nn.Parameter(ref_qlinear.lora_B.clone().detach_())
523
- qlinear.scaling = deepcopy(ref_qlinear.scaling)
524
- qlinear.lora_dropout = deepcopy(ref_qlinear.lora_dropout)
525
- return qlinear
526
-
527
- mapping = deepcopy(quantization.DEFAULT_DYNAMIC_QUANT_MODULE_MAPPINGS)
528
- mapping[Linear] = QuantisedLinear
529
- quantised_model = quantization.quantize_dynamic(
530
- model, {nn.Linear, Linear}, torch.qint8, mapping
531
- )
532
- return quantised_model
533
-
534
-
535
385
  def quantise_model_(model: ChemBFN) -> None:
536
386
  """
537
387
  In-place dynamic quantisation of the trained model to `int8` data type. \n
538
- Due to some limitations of `torchao` module, it is slower than method previded by `torch.ao`.
388
+ Due to some limitations of `torchao` module, not all layers will be quantised.
539
389
 
540
390
  :param model: trained ChemBFN model
541
391
  :type model: bayesianflow_for_chem.model.ChemBFN
@@ -550,6 +400,30 @@ def quantise_model_(model: ChemBFN) -> None:
550
400
  quantize_(model, Int8DynamicActivationInt8WeightConfig())
551
401
 
552
402
 
403
+ def build_uv_vis_sepctrum(
404
+ etoscs: np.ndarray, etenergies: np.ndarray, lambdas: np.ndarray
405
+ ) -> np.ndarray:
406
+ """
407
+ Build UV/Vis spectrum from calculated electron transtion energies and oscillator strengths. \n
408
+ This function follows the GaussView style: https://gaussian.com/uvvisplot/.
409
+
410
+ :param etoscs: oscillator strengths
411
+ :param etenergies: transtion energies
412
+ :param lambdas: wavelengths
413
+ :type etoscs: numpy.ndarray
414
+ :type etenergies: numpy.ndarray
415
+ :type lambdas: numpy.ndarray
416
+ :return: absorption coefficient corrospending to the wavelengths
417
+ :rtype: numpy.ndarray
418
+ """
419
+ return (
420
+ etoscs[:, None]
421
+ * np.exp(
422
+ -np.pow((1 / lambdas[None, :] - etenergies[:, None] / 45.5634) * 3099.6, 2)
423
+ )
424
+ ).sum(0) * 40489.99421
425
+
426
+
553
427
  class GeometryConverter:
554
428
  """
555
429
  Converting between different 2D/3D molecular representations.
@@ -566,7 +440,7 @@ class GeometryConverter:
566
440
  @staticmethod
567
441
  def smiles2cartesian(
568
442
  smiles: str,
569
- num_conformers: int = 50,
443
+ num_conformers: int = 250,
570
444
  rdkit_ff_type: str = "MMFF",
571
445
  refine_with_crest: bool = False,
572
446
  spin: float = 0.0,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bayesianflow_for_chem
3
- Version: 1.4.2
3
+ Version: 1.4.3
4
4
  Summary: Bayesian flow network framework for Chemistry
5
5
  Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
6
6
  Author: Nianze A. Tao
@@ -21,14 +21,15 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
21
  Requires-Python: >=3.9
22
22
  Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
- Requires-Dist: rdkit>=2023.9.6
25
- Requires-Dist: torch>=2.3.1
24
+ Requires-Dist: rdkit>=2025.3.5
25
+ Requires-Dist: torch>=2.8.0
26
26
  Requires-Dist: torchao>=0.12
27
- Requires-Dist: numpy>=1.26.4
27
+ Requires-Dist: numpy>=2.3.2
28
+ Requires-Dist: scipy>=1.16.1
28
29
  Requires-Dist: loralib>=0.1.2
29
- Requires-Dist: lightning>=2.2.0
30
- Requires-Dist: scikit-learn>=1.5.0
31
- Requires-Dist: typing_extensions>=4.8.0
30
+ Requires-Dist: lightning>=2.5.3
31
+ Requires-Dist: scikit-learn>=1.7.1
32
+ Requires-Dist: typing_extensions>=4.14.1
32
33
  Dynamic: author
33
34
  Dynamic: author-email
34
35
  Dynamic: classifier
@@ -0,0 +1,12 @@
1
+ bayesianflow_for_chem/__init__.py,sha256=lcVwcAWdsmdf8cFIromTsT2Hhn4hIBRlA47kCWWeHm0,329
2
+ bayesianflow_for_chem/data.py,sha256=WoOCOVmJX4WeHa2WeO4i66J2FS8rvRaYRCdlBN7ZeOM,6576
3
+ bayesianflow_for_chem/model.py,sha256=CIcFruMFg5IbaVRkc5geygcEV7LiBEN7X_2dqUqJi8w,50489
4
+ bayesianflow_for_chem/scorer.py,sha256=i-1y6pd22n7ExzdXozCjOTXbEIJTkz_6Bb4g0RA5kQI,4928
5
+ bayesianflow_for_chem/tool.py,sha256=AhW_dXuWEe7vl2epw_EZdEpmAYRd_3NMOQTuC25oHio,20558
6
+ bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
7
+ bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
8
+ bayesianflow_for_chem-1.4.3.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
9
+ bayesianflow_for_chem-1.4.3.dist-info/METADATA,sha256=Xvq_QOy7kk17IonxrE5onkJrgvWQ-Atcs4zCODpxTXI,5703
10
+ bayesianflow_for_chem-1.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ bayesianflow_for_chem-1.4.3.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
12
+ bayesianflow_for_chem-1.4.3.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- bayesianflow_for_chem/__init__.py,sha256=IeIasLe6wLuGbH7DIlB38ehDPqvlMBT388hf58I3J30,329
2
- bayesianflow_for_chem/data.py,sha256=WoOCOVmJX4WeHa2WeO4i66J2FS8rvRaYRCdlBN7ZeOM,6576
3
- bayesianflow_for_chem/model.py,sha256=6pxGuIM7rKyawcz2hI8dT88rv3qFsnCvlLhDj1CB9YU,50595
4
- bayesianflow_for_chem/scorer.py,sha256=7G1TVSwC0qONtNm6kiDZUWwvuFPzasNSjp4eJAk5TL0,4101
5
- bayesianflow_for_chem/tool.py,sha256=Ne_ew1P8r6KWOqUZpb-BL_q7Dm6fnSTtxhJvgV1JHHs,26264
6
- bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
7
- bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
8
- bayesianflow_for_chem-1.4.2.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
9
- bayesianflow_for_chem-1.4.2.dist-info/METADATA,sha256=s6k85HFXvasxvZBJD3Rj8cFNJXehS-utcMeKC6tP8F8,5673
10
- bayesianflow_for_chem-1.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- bayesianflow_for_chem-1.4.2.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
12
- bayesianflow_for_chem-1.4.2.dist-info/RECORD,,