bayesianflow-for-chem 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bayesianflow-for-chem might be problematic. Click here for more details.
- bayesianflow_for_chem/__init__.py +1 -1
- bayesianflow_for_chem/data.py +1 -38
- bayesianflow_for_chem/model.py +4 -4
- bayesianflow_for_chem/tool.py +47 -165
- {bayesianflow_for_chem-1.3.0.dist-info → bayesianflow_for_chem-1.4.0.dist-info}/METADATA +2 -5
- bayesianflow_for_chem-1.4.0.dist-info/RECORD +12 -0
- bayesianflow_for_chem-1.3.0.dist-info/RECORD +0 -12
- {bayesianflow_for_chem-1.3.0.dist-info → bayesianflow_for_chem-1.4.0.dist-info}/WHEEL +0 -0
- {bayesianflow_for_chem-1.3.0.dist-info → bayesianflow_for_chem-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {bayesianflow_for_chem-1.3.0.dist-info → bayesianflow_for_chem-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -7,5 +7,5 @@ from . import data, tool, train, scorer
|
|
|
7
7
|
from .model import ChemBFN, MLP, EnsembleChemBFN
|
|
8
8
|
|
|
9
9
|
__all__ = ["data", "tool", "train", "scorer", "ChemBFN", "MLP", "EnsembleChemBFN"]
|
|
10
|
-
__version__ = "1.
|
|
10
|
+
__version__ = "1.4.0"
|
|
11
11
|
__author__ = "Nianze A. Tao (Omozawa Sueno)"
|
bayesianflow_for_chem/data.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
# Author: Nianze A. TAO (Omozawa SUENO)
|
|
3
3
|
"""
|
|
4
|
-
Tokenise SMILES/SAFE/SELFIES/
|
|
4
|
+
Tokenise SMILES/SAFE/SELFIES/protein-sequence strings.
|
|
5
5
|
"""
|
|
6
6
|
import os
|
|
7
7
|
import re
|
|
@@ -32,25 +32,9 @@ SMI_REGEX_PATTERN = (
|
|
|
32
32
|
r"~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])"
|
|
33
33
|
)
|
|
34
34
|
SEL_REGEX_PATTERN = r"(\[[^\]]+]|\.)"
|
|
35
|
-
GEO_REGEX_PATTERN = (
|
|
36
|
-
r"(H[e,f,g,s,o]?|"
|
|
37
|
-
r"L[i,v,a,r,u]|"
|
|
38
|
-
r"B[e,r,a,i,h,k]?|"
|
|
39
|
-
r"C[l,a,r,o,u,d,s,n,e,m,f]?|"
|
|
40
|
-
r"N[e,a,i,b,h,d,o,p]?|"
|
|
41
|
-
r"O[s,g]?|S[i,c,e,r,n,m,b,g]?|"
|
|
42
|
-
r"K[r]?|T[i,c,e,a,l,b,h,m,s]|"
|
|
43
|
-
r"G[a,e,d]|R[b,u,h,e,n,a,f,g]|"
|
|
44
|
-
r"Yb?|Z[n,r]|P[t,o,d,r,a,u,b,m]?|"
|
|
45
|
-
r"F[e,r,l,m]?|M[g,n,o,t,c,d]|"
|
|
46
|
-
r"A[l,r,s,g,u,t,c,m]|I[n,r]?|"
|
|
47
|
-
r"W|X[e]|E[u,r,s]|U|D[b,s,y]|"
|
|
48
|
-
r"-|.| |[0-9])"
|
|
49
|
-
)
|
|
50
35
|
AA_REGEX_PATTERN = r"(A|B|C|D|E|F|G|H|I|K|L|M|N|P|Q|R|S|T|V|W|Y|Z|-|.)"
|
|
51
36
|
smi_regex = re.compile(SMI_REGEX_PATTERN)
|
|
52
37
|
sel_regex = re.compile(SEL_REGEX_PATTERN)
|
|
53
|
-
geo_regex = re.compile(GEO_REGEX_PATTERN)
|
|
54
38
|
aa_regex = re.compile(AA_REGEX_PATTERN)
|
|
55
39
|
|
|
56
40
|
|
|
@@ -86,9 +70,6 @@ AA_VOCAB_KEYS = (
|
|
|
86
70
|
)
|
|
87
71
|
AA_VOCAB_COUNT = len(AA_VOCAB_KEYS)
|
|
88
72
|
AA_VOCAB_DICT = dict(zip(AA_VOCAB_KEYS, range(AA_VOCAB_COUNT)))
|
|
89
|
-
GEO_VOCAB_KEYS = VOCAB_KEYS[0:3] + [" "] + VOCAB_KEYS[22:150] + [".", "-"]
|
|
90
|
-
GEO_VOCAB_COUNT = len(GEO_VOCAB_KEYS)
|
|
91
|
-
GEO_VOCAB_DICT = dict(zip(GEO_VOCAB_KEYS, range(GEO_VOCAB_COUNT)))
|
|
92
73
|
|
|
93
74
|
|
|
94
75
|
def smiles2vec(smiles: str) -> List[int]:
|
|
@@ -104,19 +85,6 @@ def smiles2vec(smiles: str) -> List[int]:
|
|
|
104
85
|
return [VOCAB_DICT[token] for token in tokens]
|
|
105
86
|
|
|
106
87
|
|
|
107
|
-
def geo2vec(geo2seq: str) -> List[int]:
|
|
108
|
-
"""
|
|
109
|
-
Geo2Seq tokenisation using a dataset-independent regex pattern.
|
|
110
|
-
|
|
111
|
-
:param geo2seq: `GEO2SEQ` string
|
|
112
|
-
:type geo2seq: str
|
|
113
|
-
:return: tokens w/o `<start>` and `<end>`
|
|
114
|
-
:rtype: list
|
|
115
|
-
"""
|
|
116
|
-
tokens = [token for token in geo_regex.findall(geo2seq)]
|
|
117
|
-
return [GEO_VOCAB_DICT[token] for token in tokens]
|
|
118
|
-
|
|
119
|
-
|
|
120
88
|
def aa2vec(aa_seq: str) -> List[int]:
|
|
121
89
|
"""
|
|
122
90
|
Protein sequence tokenisation using a dataset-independent regex pattern.
|
|
@@ -147,11 +115,6 @@ def smiles2token(smiles: str) -> Tensor:
|
|
|
147
115
|
return torch.tensor([1] + smiles2vec(smiles) + [2], dtype=torch.long)
|
|
148
116
|
|
|
149
117
|
|
|
150
|
-
def geo2token(geo2seq: str) -> Tensor:
|
|
151
|
-
# start token: <start> = 1; end token: <esc> = 2
|
|
152
|
-
return torch.tensor([1] + geo2vec(geo2seq) + [2], dtype=torch.long)
|
|
153
|
-
|
|
154
|
-
|
|
155
118
|
def aa2token(aa_seq: str) -> Tensor:
|
|
156
119
|
# start token: <start> = 1; end token: <end> = 2
|
|
157
120
|
return torch.tensor([1] + aa2vec(aa_seq) + [2], dtype=torch.long)
|
bayesianflow_for_chem/model.py
CHANGED
|
@@ -162,8 +162,8 @@ class Attention(nn.Module):
|
|
|
162
162
|
:return: attentioned output; shape: (n_b, n_t, n_f)
|
|
163
163
|
:rtype: torch.Tensor
|
|
164
164
|
"""
|
|
165
|
-
n_b,
|
|
166
|
-
split = (n_b,
|
|
165
|
+
n_b, n_t, _ = shape = x.shape
|
|
166
|
+
split = (n_b, n_t, self.nh, self.d)
|
|
167
167
|
q, k, v = self.qkv(x).chunk(3, -1)
|
|
168
168
|
q = q.view(split).permute(2, 0, 1, 3).contiguous()
|
|
169
169
|
k = k.view(split).permute(2, 0, 1, 3).contiguous()
|
|
@@ -428,12 +428,12 @@ class ChemBFN(nn.Module):
|
|
|
428
428
|
c = self.time_embed(t)
|
|
429
429
|
if y is not None:
|
|
430
430
|
c += y
|
|
431
|
-
pe = self.position(
|
|
431
|
+
pe = self.position(n_t)
|
|
432
432
|
x = self.embedding(x)
|
|
433
433
|
attn_mask: Optional[Tensor] = None
|
|
434
434
|
if self.semi_autoregressive:
|
|
435
435
|
attn_mask = torch.tril(
|
|
436
|
-
torch.ones((1, n_b, n_t, n_t), device=
|
|
436
|
+
torch.ones((1, n_b, n_t, n_t), device=x.device), diagonal=0
|
|
437
437
|
)
|
|
438
438
|
else:
|
|
439
439
|
if mask is not None:
|
bayesianflow_for_chem/tool.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
"""
|
|
4
4
|
Essential tools.
|
|
5
5
|
"""
|
|
6
|
-
import re
|
|
7
6
|
import csv
|
|
8
7
|
import random
|
|
9
8
|
import warnings
|
|
@@ -18,7 +17,15 @@ from torch.ao import quantization
|
|
|
18
17
|
from torch.utils.data import DataLoader
|
|
19
18
|
from typing_extensions import Self
|
|
20
19
|
from rdkit.Chem.rdchem import Mol, Bond
|
|
21
|
-
from rdkit.Chem import
|
|
20
|
+
from rdkit.Chem import (
|
|
21
|
+
rdDetermineBonds,
|
|
22
|
+
MolFromXYZBlock,
|
|
23
|
+
MolFromSmiles,
|
|
24
|
+
MolToSmiles,
|
|
25
|
+
CanonSmiles,
|
|
26
|
+
AllChem,
|
|
27
|
+
AddHs,
|
|
28
|
+
)
|
|
22
29
|
from rdkit.Chem.Scaffolds.MurckoScaffold import MurckoScaffoldSmiles # type: ignore
|
|
23
30
|
from sklearn.metrics import (
|
|
24
31
|
roc_auc_score,
|
|
@@ -28,36 +35,10 @@ from sklearn.metrics import (
|
|
|
28
35
|
mean_absolute_error,
|
|
29
36
|
root_mean_squared_error,
|
|
30
37
|
)
|
|
31
|
-
|
|
32
|
-
try:
|
|
33
|
-
from pynauty import Graph, canon_label # type: ignore
|
|
34
|
-
|
|
35
|
-
_use_pynauty = True
|
|
36
|
-
except ImportError:
|
|
37
|
-
import platform
|
|
38
|
-
|
|
39
|
-
_use_pynauty = False
|
|
40
38
|
from .data import VOCAB_KEYS
|
|
41
39
|
from .model import ChemBFN, MLP, Linear, EnsembleChemBFN
|
|
42
40
|
|
|
43
41
|
|
|
44
|
-
_atom_regex_pattern = (
|
|
45
|
-
r"(H[e,f,g,s,o]?|"
|
|
46
|
-
r"L[i,v,a,r,u]|"
|
|
47
|
-
r"B[e,r,a,i,h,k]?|"
|
|
48
|
-
r"C[l,a,r,o,u,d,s,n,e,m,f]?|"
|
|
49
|
-
r"N[e,a,i,b,h,d,o,p]?|"
|
|
50
|
-
r"O[s,g]?|S[i,c,e,r,n,m,b,g]?|"
|
|
51
|
-
r"K[r]?|T[i,c,e,a,l,b,h,m,s]|"
|
|
52
|
-
r"G[a,e,d]|R[b,u,h,e,n,a,f,g]|"
|
|
53
|
-
r"Yb?|Z[n,r]|P[t,o,d,r,a,u,b,m]?|"
|
|
54
|
-
r"F[e,r,l,m]?|M[g,n,o,t,c,d]|"
|
|
55
|
-
r"A[l,r,s,g,u,t,c,m]|I[n,r]?|"
|
|
56
|
-
r"W|X[e]|E[u,r,s]|U|D[b,s,y])"
|
|
57
|
-
)
|
|
58
|
-
_atom_regex = re.compile(_atom_regex_pattern)
|
|
59
|
-
|
|
60
|
-
|
|
61
42
|
def _find_device() -> torch.device:
|
|
62
43
|
if cuda.is_available():
|
|
63
44
|
return torch.device("cuda")
|
|
@@ -66,10 +47,6 @@ def _find_device() -> torch.device:
|
|
|
66
47
|
return torch.device("cpu")
|
|
67
48
|
|
|
68
49
|
|
|
69
|
-
def _bond_pair_idx(bonds: Bond) -> List[List[int]]:
|
|
70
|
-
return [[i.GetBeginAtomIdx(), i.GetEndAtomIdx()] for i in bonds]
|
|
71
|
-
|
|
72
|
-
|
|
73
50
|
@torch.no_grad()
|
|
74
51
|
def test(
|
|
75
52
|
model: ChemBFN,
|
|
@@ -493,6 +470,8 @@ def quantise_model(model: ChemBFN) -> nn.Module:
|
|
|
493
470
|
assert hasattr(
|
|
494
471
|
mod, "qconfig"
|
|
495
472
|
), "Input float module must have qconfig defined"
|
|
473
|
+
if use_precomputed_fake_quant:
|
|
474
|
+
warnings.warn("Fake quantize operator is not implemented.")
|
|
496
475
|
if mod.qconfig is not None and mod.qconfig.weight is not None:
|
|
497
476
|
weight_observer = mod.qconfig.weight()
|
|
498
477
|
else:
|
|
@@ -560,6 +539,42 @@ class GeometryConverter:
|
|
|
560
539
|
xyz_block.append(f"{atom} {r[i][0]:.10f} {r[i][1]:.10f} {r[i][2]:.10f}")
|
|
561
540
|
return MolFromXYZBlock("\n".join(xyz_block))
|
|
562
541
|
|
|
542
|
+
@staticmethod
|
|
543
|
+
def _bond_pair_idx(bonds: Bond) -> List[List[int]]:
|
|
544
|
+
return [[i.GetBeginAtomIdx(), i.GetEndAtomIdx()] for i in bonds]
|
|
545
|
+
|
|
546
|
+
@staticmethod
|
|
547
|
+
def smiles2cartesian(
|
|
548
|
+
smiles: str, num_conformers: int = 50, random_seed: int = 42
|
|
549
|
+
) -> Tuple[List[str], np.ndarray]:
|
|
550
|
+
"""
|
|
551
|
+
Guess the 3D geometry from SMILES string via MMFF conformer search.
|
|
552
|
+
|
|
553
|
+
:param smiles: a valid SMILES string
|
|
554
|
+
:param num_conformers: number of initial conformers
|
|
555
|
+
:param random_seed: random seed used to generate conformers
|
|
556
|
+
:type smiles: str
|
|
557
|
+
:type num_conformers: int
|
|
558
|
+
:type random_seed: int
|
|
559
|
+
:return: atomic symbols \n
|
|
560
|
+
cartesian coordinates; shape: (n_a, 3)
|
|
561
|
+
:rtype: tuple
|
|
562
|
+
"""
|
|
563
|
+
mol = MolFromSmiles(smiles)
|
|
564
|
+
mol = AddHs(mol)
|
|
565
|
+
AllChem.EmbedMultipleConfs(mol, numConfs=num_conformers, randomSeed=random_seed)
|
|
566
|
+
symbols = [atom.GetSymbol() for atom in mol.GetAtoms()]
|
|
567
|
+
energies = []
|
|
568
|
+
for conf_id in range(num_conformers):
|
|
569
|
+
ff = AllChem.MMFFGetMoleculeForceField(
|
|
570
|
+
mol, AllChem.MMFFGetMoleculeProperties(mol), confId=conf_id
|
|
571
|
+
)
|
|
572
|
+
energy = ff.CalcEnergy()
|
|
573
|
+
energies.append((conf_id, energy))
|
|
574
|
+
lowest_energy_conf = min(energies, key=lambda x: x[1])
|
|
575
|
+
coordinates = mol.GetConformer(id=lowest_energy_conf[0]).GetPositions()
|
|
576
|
+
return symbols, coordinates
|
|
577
|
+
|
|
563
578
|
def cartesian2smiles(
|
|
564
579
|
self,
|
|
565
580
|
symbols: List[str],
|
|
@@ -587,136 +602,3 @@ class GeometryConverter:
|
|
|
587
602
|
if canonical:
|
|
588
603
|
smiles = CanonSmiles(smiles)
|
|
589
604
|
return smiles
|
|
590
|
-
|
|
591
|
-
def canonicalise(
|
|
592
|
-
self, symbols: List[str], coordinates: np.ndarray
|
|
593
|
-
) -> Tuple[List[str], np.ndarray]:
|
|
594
|
-
"""
|
|
595
|
-
Canonicalising the 3D molecular graph.
|
|
596
|
-
|
|
597
|
-
:param symbols: a list of atomic symbols
|
|
598
|
-
:param coordinates: Cartesian coordinates; shape: (n_a, 3)
|
|
599
|
-
:type symbols: list
|
|
600
|
-
:type coordinates: numpy.ndarray
|
|
601
|
-
:return: canonicalised symbols \n
|
|
602
|
-
canonicalised coordinates; shape: (n_a, 3)
|
|
603
|
-
:rtype: tuple
|
|
604
|
-
"""
|
|
605
|
-
if not _use_pynauty:
|
|
606
|
-
if platform.system() == "Windows":
|
|
607
|
-
raise NotImplementedError(
|
|
608
|
-
"This method is not implemented on Windows platform."
|
|
609
|
-
)
|
|
610
|
-
else:
|
|
611
|
-
raise ImportError("`pynauty` is not installed.")
|
|
612
|
-
n = len(symbols)
|
|
613
|
-
if n == 1:
|
|
614
|
-
return symbols, coordinates
|
|
615
|
-
mol = self._xyz2mol(symbols, coordinates)
|
|
616
|
-
rdDetermineBonds.DetermineConnectivity(mol)
|
|
617
|
-
# ------- Canonicalization -------
|
|
618
|
-
pair_idx = np.array(_bond_pair_idx(mol.GetBonds())).T.tolist()
|
|
619
|
-
pair_dict: Dict[int, List[int]] = {}
|
|
620
|
-
for key, i in enumerate(pair_idx[0]):
|
|
621
|
-
if i not in pair_dict:
|
|
622
|
-
pair_dict[i] = [pair_idx[1][key]]
|
|
623
|
-
else:
|
|
624
|
-
pair_dict[i].append(pair_idx[1][key])
|
|
625
|
-
g = Graph(n, adjacency_dict=pair_dict)
|
|
626
|
-
cl = canon_label(g) # type: list
|
|
627
|
-
symbols = np.array([[s] for s in symbols])[cl].flatten().tolist()
|
|
628
|
-
coordinates = coordinates[cl]
|
|
629
|
-
return symbols, coordinates
|
|
630
|
-
|
|
631
|
-
@staticmethod
|
|
632
|
-
def cartesian2spherical(coordinates: np.ndarray) -> np.ndarray:
|
|
633
|
-
"""
|
|
634
|
-
Transforming Cartesian coordinate to spherical form.\n
|
|
635
|
-
The method is adapted from the paper: https://arxiv.org/abs/2408.10120.
|
|
636
|
-
|
|
637
|
-
:param coordinates: Cartesian coordinates; shape: (n_a, 3)
|
|
638
|
-
:type coordinates: numpy.ndarray
|
|
639
|
-
:return: spherical coordinates; shape: (n_a, 3)
|
|
640
|
-
:rtype: numpy.ndarray
|
|
641
|
-
"""
|
|
642
|
-
n = coordinates.shape[0]
|
|
643
|
-
if n == 1:
|
|
644
|
-
return np.array([[0.0, 0.0, 0.0]])
|
|
645
|
-
# ------- Find global coordinate frame -------
|
|
646
|
-
if n == 2:
|
|
647
|
-
d = np.linalg.norm(coordinates[0] - coordinates[1], 2)
|
|
648
|
-
return np.array([[0.0, 0.0, 0.0], [d, 0.0, 0.0]])
|
|
649
|
-
for idx_0 in range(n - 2):
|
|
650
|
-
_vec0 = coordinates[idx_0] - coordinates[idx_0 + 1]
|
|
651
|
-
_vec1 = coordinates[idx_0] - coordinates[idx_0 + 2]
|
|
652
|
-
_d1 = np.linalg.norm(_vec0, 2)
|
|
653
|
-
_d2 = np.linalg.norm(_vec1, 2)
|
|
654
|
-
if 1 - np.abs(np.dot(_vec0, _vec1) / (_d1 * _d2)) > 1e-6:
|
|
655
|
-
break
|
|
656
|
-
x = (coordinates[idx_0 + 1] - coordinates[idx_0]) / _d1
|
|
657
|
-
y = np.cross((coordinates[idx_0 + 2] - coordinates[idx_0]), x)
|
|
658
|
-
y_d = np.linalg.norm(y, 2)
|
|
659
|
-
y = y / np.ma.filled(np.ma.array(y_d, mask=y_d == 0), np.inf)
|
|
660
|
-
z = np.cross(x, y)
|
|
661
|
-
# ------- Build spherical coordinates -------
|
|
662
|
-
vec = coordinates - coordinates[idx_0]
|
|
663
|
-
d = np.linalg.norm(vec, 2, axis=-1)
|
|
664
|
-
_d = np.ma.filled(np.ma.array(d, mask=d == 0), np.inf)
|
|
665
|
-
theta = np.arccos(np.dot(vec, z) / _d) # in [0, \pi]
|
|
666
|
-
phi = np.arctan2(np.dot(vec, y), np.dot(vec, x)) # in [-\pi, \pi]
|
|
667
|
-
info = np.vstack([d, theta, phi]).T
|
|
668
|
-
info[idx_0] = np.zeros_like(info[idx_0])
|
|
669
|
-
return info
|
|
670
|
-
|
|
671
|
-
def geo2seq(
|
|
672
|
-
self, symbols: List[str], coordinates: np.ndarray, decimals: int = 2
|
|
673
|
-
) -> str:
|
|
674
|
-
"""
|
|
675
|
-
Geometry-to-sequence function.\n
|
|
676
|
-
The algorithm follows the descriptions in paper: https://arxiv.org/abs/2408.10120.
|
|
677
|
-
|
|
678
|
-
:param symbols: a list of atomic symbols
|
|
679
|
-
:param coordinates: Cartesian coordinates; shape: (n_a, 3)
|
|
680
|
-
:param decimals: the maxmium number of decimals to keep; default is 2
|
|
681
|
-
:type symbols: list
|
|
682
|
-
:type coordinates: numpy.ndarray
|
|
683
|
-
:type decimals: int
|
|
684
|
-
:return: `Geo2Seq` string
|
|
685
|
-
:rtype: str
|
|
686
|
-
"""
|
|
687
|
-
symbols, coordinates = self.canonicalise(symbols, coordinates)
|
|
688
|
-
info = self.cartesian2spherical(coordinates)
|
|
689
|
-
info = [
|
|
690
|
-
f"{symbols[i]} {r[0]} {r[1]} {r[2]}"
|
|
691
|
-
for i, r in enumerate(np.round(info, decimals))
|
|
692
|
-
]
|
|
693
|
-
return " ".join(info)
|
|
694
|
-
|
|
695
|
-
@staticmethod
|
|
696
|
-
def seq2geo(seq: str) -> Tuple[Optional[List[str]], Optional[np.ndarray]]:
|
|
697
|
-
"""
|
|
698
|
-
Sequence-to-geometry function.\n
|
|
699
|
-
The method follows the descriptions in paper: https://arxiv.org/abs/2408.10120.
|
|
700
|
-
|
|
701
|
-
:param seq: `Geo2Seq` string
|
|
702
|
-
:type seq: str
|
|
703
|
-
:return: (symbols, coordinates) if `seq` is valid
|
|
704
|
-
:rtype: tuple
|
|
705
|
-
"""
|
|
706
|
-
tokens = seq.split()
|
|
707
|
-
if len(tokens) % 4 != 0:
|
|
708
|
-
return None, None
|
|
709
|
-
tokens = np.array(tokens).reshape(-1, 4)
|
|
710
|
-
symbols, coordinates = tokens[::, 0], tokens[::, 1:]
|
|
711
|
-
if sum([len(_atom_regex.findall(sym)) for sym in symbols]) != len(symbols):
|
|
712
|
-
return None, None
|
|
713
|
-
try:
|
|
714
|
-
coord = [[float(i) for i in j] for j in coordinates]
|
|
715
|
-
coord = np.array(coord)
|
|
716
|
-
except ValueError:
|
|
717
|
-
return None, None
|
|
718
|
-
d, theta, phi = coord[::, 0, None], coord[::, 1, None], coord[::, 2, None]
|
|
719
|
-
x = d * np.sin(theta) * np.cos(phi)
|
|
720
|
-
y = d * np.sin(theta) * np.sin(phi)
|
|
721
|
-
z = d * np.cos(theta)
|
|
722
|
-
return symbols, np.concatenate([x, y, z], -1)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bayesianflow_for_chem
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: Bayesian flow network framework for Chemistry
|
|
5
5
|
Home-page: https://augus1999.github.io/bayesian-flow-network-for-chemistry/
|
|
6
6
|
Author: Nianze A. Tao
|
|
@@ -28,8 +28,6 @@ Requires-Dist: loralib>=0.1.2
|
|
|
28
28
|
Requires-Dist: lightning>=2.2.0
|
|
29
29
|
Requires-Dist: scikit-learn>=1.5.0
|
|
30
30
|
Requires-Dist: typing_extensions>=4.8.0
|
|
31
|
-
Provides-Extra: geo2seq
|
|
32
|
-
Requires-Dist: pynauty>=2.8.8.1; extra == "geo2seq"
|
|
33
31
|
Dynamic: author
|
|
34
32
|
Dynamic: author-email
|
|
35
33
|
Dynamic: classifier
|
|
@@ -40,7 +38,6 @@ Dynamic: keywords
|
|
|
40
38
|
Dynamic: license
|
|
41
39
|
Dynamic: license-file
|
|
42
40
|
Dynamic: project-url
|
|
43
|
-
Dynamic: provides-extra
|
|
44
41
|
Dynamic: requires-dist
|
|
45
42
|
Dynamic: requires-python
|
|
46
43
|
Dynamic: summary
|
|
@@ -92,7 +89,7 @@ You can find pretrained models on our [🤗Hugging Face model page](https://hugg
|
|
|
92
89
|
|
|
93
90
|
We provide a Python class [`CSVData`](./bayesianflow_for_chem/data.py) to handle data stored in CSV or similar format containing headers to identify the entities. The following is a quickstart.
|
|
94
91
|
|
|
95
|
-
1. Download your dataset file (e.g., ESOL
|
|
92
|
+
1. Download your dataset file (e.g., ESOL from [MoleculeNet](https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/delaney-processed.csv)) and split the file:
|
|
96
93
|
```python
|
|
97
94
|
>>> from bayesianflow_for_chem.tool import split_data
|
|
98
95
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
bayesianflow_for_chem/__init__.py,sha256=3sP8nM4_idOX-ksvpBJEApxPAVAPijKvQHxidTO5790,329
|
|
2
|
+
bayesianflow_for_chem/data.py,sha256=WoOCOVmJX4WeHa2WeO4i66J2FS8rvRaYRCdlBN7ZeOM,6576
|
|
3
|
+
bayesianflow_for_chem/model.py,sha256=fUrXKhn2U9FrVPJyb4lqACqPTePkIgI0v6_1jPs5c0Q,50784
|
|
4
|
+
bayesianflow_for_chem/scorer.py,sha256=7G1TVSwC0qONtNm6kiDZUWwvuFPzasNSjp4eJAk5TL0,4101
|
|
5
|
+
bayesianflow_for_chem/tool.py,sha256=NMMRHk2FJY0fyA76zCrz6tkcylCuExMUMj5hohWTnkE,23155
|
|
6
|
+
bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
|
|
7
|
+
bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
|
|
8
|
+
bayesianflow_for_chem-1.4.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
9
|
+
bayesianflow_for_chem-1.4.0.dist-info/METADATA,sha256=1Y5mLIOaPsHcyCCm2SkWz7OCniQYVJ67-cVq3cUU0Mw,5643
|
|
10
|
+
bayesianflow_for_chem-1.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
bayesianflow_for_chem-1.4.0.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
|
|
12
|
+
bayesianflow_for_chem-1.4.0.dist-info/RECORD,,
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
bayesianflow_for_chem/__init__.py,sha256=3BW4-ri8OcMZAIPJBT2q-48L3LAY776xluMDC6WXaZU,329
|
|
2
|
-
bayesianflow_for_chem/data.py,sha256=EbCfhA1bCieVHVOYVk7nvgsaOzhKyFdnHd261qNR4BY,7763
|
|
3
|
-
bayesianflow_for_chem/model.py,sha256=fFcfg1RZuoJeptAtglo2U8j1EGNSGjItMHqlKdLGGhU,50799
|
|
4
|
-
bayesianflow_for_chem/scorer.py,sha256=7G1TVSwC0qONtNm6kiDZUWwvuFPzasNSjp4eJAk5TL0,4101
|
|
5
|
-
bayesianflow_for_chem/tool.py,sha256=Z9qF80qzK-CJk9MJaWuSNOLnA-LPiD6CiC7S3sZbBP8,27704
|
|
6
|
-
bayesianflow_for_chem/train.py,sha256=hGKyhGhLch-exSYPZdLXrLn3gf39Q1VLSJs2qtuikQE,9709
|
|
7
|
-
bayesianflow_for_chem/vocab.txt,sha256=HgtAZmpWYk4y8PqEVC4vqut1vE75DfRKE_10s2UW0rU,790
|
|
8
|
-
bayesianflow_for_chem-1.3.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
9
|
-
bayesianflow_for_chem-1.3.0.dist-info/METADATA,sha256=2BDjaVhIkd0TLolVETa2kb7xUGYhn8kdlq2CMfF-i7Y,5746
|
|
10
|
-
bayesianflow_for_chem-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
bayesianflow_for_chem-1.3.0.dist-info/top_level.txt,sha256=KHsanI3BMCt8D9Qpze2ycrF6nMa3PyojgO6eS1c8kco,22
|
|
12
|
-
bayesianflow_for_chem-1.3.0.dist-info/RECORD,,
|
|
File without changes
|
{bayesianflow_for_chem-1.3.0.dist-info → bayesianflow_for_chem-1.4.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{bayesianflow_for_chem-1.3.0.dist-info → bayesianflow_for_chem-1.4.0.dist-info}/top_level.txt
RENAMED
|
File without changes
|