boltz-vsynthes 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltz/data/module/inferencev2.py +17 -7
- boltz/data/parse/pdb.py +29 -18
- boltz/data/parse/pdb_download.py +29 -19
- boltz/main.py +18 -9
- {boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/METADATA +1 -1
- {boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/RECORD +10 -10
- {boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/WHEEL +0 -0
- {boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/entry_points.txt +0 -0
- {boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/top_level.txt +0 -0
boltz/data/module/inferencev2.py
CHANGED
@@ -30,6 +30,7 @@ def load_input(
|
|
30
30
|
msa_dir: Path,
|
31
31
|
constraints_dir: Optional[Path] = None,
|
32
32
|
template_dir: Optional[Path] = None,
|
33
|
+
only_prediction: bool = False,
|
33
34
|
extra_mols_dir: Optional[Path] = None,
|
34
35
|
affinity: bool = False,
|
35
36
|
) -> Input:
|
@@ -60,15 +61,18 @@ def load_input(
|
|
60
61
|
"""
|
61
62
|
# Load the structure
|
62
63
|
if affinity:
|
63
|
-
|
64
|
+
if only_prediction:
|
65
|
+
#TODO: Formalize this
|
66
|
+
if target_dir.name == "predictions":
|
67
|
+
target_dir = target_dir.parent / "processed"
|
68
|
+
structure = StructureV2.load(
|
69
|
+
target_dir / f"structures/{record.id}.npz"
|
70
|
+
)
|
71
|
+
else:
|
72
|
+
structure = StructureV2.load(
|
64
73
|
target_dir / record.id / f"pre_affinity_{record.id}.npz"
|
65
74
|
)
|
66
|
-
|
67
|
-
# if target_dir.name == "predictions":
|
68
|
-
# target_dir = target_dir.parent / "processed"
|
69
|
-
# structure = StructureV2.load(
|
70
|
-
# target_dir / f"structures/{record.id}.npz"
|
71
|
-
# )
|
75
|
+
|
72
76
|
else:
|
73
77
|
structure = StructureV2.load(target_dir / f"{record.id}.npz")
|
74
78
|
|
@@ -171,6 +175,7 @@ class PredictionDataset(torch.utils.data.Dataset):
|
|
171
175
|
mol_dir: Path,
|
172
176
|
constraints_dir: Optional[Path] = None,
|
173
177
|
template_dir: Optional[Path] = None,
|
178
|
+
only_prediction: bool = False,
|
174
179
|
extra_mols_dir: Optional[Path] = None,
|
175
180
|
override_method: Optional[str] = None,
|
176
181
|
affinity: bool = False,
|
@@ -203,6 +208,7 @@ class PredictionDataset(torch.utils.data.Dataset):
|
|
203
208
|
self.tokenizer = Boltz2Tokenizer()
|
204
209
|
self.featurizer = Boltz2Featurizer()
|
205
210
|
self.canonicals = load_canonicals(self.mol_dir)
|
211
|
+
self.only_prediction = only_prediction
|
206
212
|
self.extra_mols_dir = extra_mols_dir
|
207
213
|
self.override_method = override_method
|
208
214
|
self.affinity = affinity
|
@@ -228,6 +234,7 @@ class PredictionDataset(torch.utils.data.Dataset):
|
|
228
234
|
msa_dir=self.msa_dir,
|
229
235
|
constraints_dir=self.constraints_dir,
|
230
236
|
template_dir=self.template_dir,
|
237
|
+
only_prediction=self.only_prediction,
|
231
238
|
extra_mols_dir=self.extra_mols_dir,
|
232
239
|
affinity=self.affinity,
|
233
240
|
)
|
@@ -329,6 +336,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
|
|
329
336
|
constraints_dir: Optional[Path] = None,
|
330
337
|
template_dir: Optional[Path] = None,
|
331
338
|
extra_mols_dir: Optional[Path] = None,
|
339
|
+
only_prediction: bool = False,
|
332
340
|
override_method: Optional[str] = None,
|
333
341
|
affinity: bool = False,
|
334
342
|
) -> None:
|
@@ -365,6 +373,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
|
|
365
373
|
self.constraints_dir = constraints_dir
|
366
374
|
self.template_dir = template_dir
|
367
375
|
self.extra_mols_dir = extra_mols_dir
|
376
|
+
self.only_prediction = only_prediction
|
368
377
|
self.override_method = override_method
|
369
378
|
self.affinity = affinity
|
370
379
|
|
@@ -384,6 +393,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
|
|
384
393
|
mol_dir=self.mol_dir,
|
385
394
|
constraints_dir=self.constraints_dir,
|
386
395
|
template_dir=self.template_dir,
|
396
|
+
only_prediction=self.only_prediction,
|
387
397
|
extra_mols_dir=self.extra_mols_dir,
|
388
398
|
override_method=self.override_method,
|
389
399
|
affinity=self.affinity,
|
boltz/data/parse/pdb.py
CHANGED
@@ -8,6 +8,9 @@ from Bio.PDB.Polypeptide import PPBuilder
|
|
8
8
|
from Bio.Data.IUPACData import protein_letters_3to1
|
9
9
|
from rdkit import Chem
|
10
10
|
from rdkit.Chem.rdchem import Mol
|
11
|
+
from Bio.SeqUtils import seq1
|
12
|
+
from collections import defaultdict
|
13
|
+
|
11
14
|
|
12
15
|
from boltz.data.types import Target
|
13
16
|
from boltz.data.parse.schema import parse_boltz_schema
|
@@ -38,27 +41,35 @@ def parse_pdb(
|
|
38
41
|
Dictionary containing sequences and bonds.
|
39
42
|
"""
|
40
43
|
# Read PDB file
|
41
|
-
parser = PDBParser(QUIET=True)
|
42
|
-
structure = parser.get_structure("protein", str(pdb_path))
|
43
|
-
ppb = PPBuilder()
|
44
|
-
|
45
|
-
# Convert to yaml format
|
46
44
|
sequences = []
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
45
|
+
sequence_by_chain = defaultdict(list)
|
46
|
+
|
47
|
+
# Parse SEQRES records directly
|
48
|
+
with open(pdb_path) as f:
|
49
|
+
for line in f:
|
50
|
+
if line.startswith("SEQRES"):
|
51
|
+
parts = line.split()
|
52
|
+
chain_id = parts[2]
|
53
|
+
residues = parts[4:]
|
54
|
+
for res in residues:
|
55
|
+
try:
|
56
|
+
aa = seq1(res)
|
57
|
+
except KeyError:
|
58
|
+
aa = 'X'
|
59
|
+
sequence_by_chain[chain_id].append(aa)
|
59
60
|
|
61
|
+
# Convert to yaml-style list
|
62
|
+
for chain_id, aa_list in sequence_by_chain.items():
|
63
|
+
sequences.append({
|
64
|
+
"protein": {
|
65
|
+
"id": chain_id,
|
66
|
+
"sequence": ''.join(aa_list),
|
67
|
+
"modifications": [],
|
68
|
+
}
|
69
|
+
})
|
70
|
+
print(sequences)
|
60
71
|
return {
|
61
72
|
"sequences": sequences,
|
62
73
|
"bonds": [],
|
63
74
|
"version": 1,
|
64
|
-
}
|
75
|
+
}
|
boltz/data/parse/pdb_download.py
CHANGED
@@ -8,6 +8,8 @@ from Bio.PDB.PDBParser import PDBParser
|
|
8
8
|
from Bio.PDB.Polypeptide import PPBuilder
|
9
9
|
from rdkit import Chem
|
10
10
|
from rdkit.Chem.rdchem import Mol
|
11
|
+
from Bio.SeqUtils import seq1
|
12
|
+
from collections import defaultdict
|
11
13
|
|
12
14
|
from boltz.data.types import Target
|
13
15
|
from boltz.data.parse.schema import parse_boltz_schema
|
@@ -79,28 +81,36 @@ def parse_pdb_id(
|
|
79
81
|
# Download PDB file
|
80
82
|
pdb_path = download_pdb(pdb_id, cache_dir)
|
81
83
|
|
82
|
-
# Read PDB file
|
83
|
-
parser = PDBParser(QUIET=True)
|
84
|
-
structure = parser.get_structure("protein", str(pdb_path))
|
85
|
-
ppb = PPBuilder()
|
86
|
-
|
87
|
-
# Convert to yaml format
|
88
84
|
sequences = []
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
85
|
+
sequence_by_chain = defaultdict(list)
|
86
|
+
|
87
|
+
# Parse SEQRES records directly
|
88
|
+
with open(pdb_path) as f:
|
89
|
+
for line in f:
|
90
|
+
if line.startswith("SEQRES"):
|
91
|
+
parts = line.split()
|
92
|
+
chain_id = parts[2]
|
93
|
+
residues = parts[4:]
|
94
|
+
for res in residues:
|
95
|
+
try:
|
96
|
+
aa = seq1(res)
|
97
|
+
except KeyError:
|
98
|
+
aa = 'X'
|
99
|
+
sequence_by_chain[chain_id].append(aa)
|
100
|
+
|
101
|
+
# Convert to yaml-style list
|
102
|
+
for chain_id, aa_list in sequence_by_chain.items():
|
103
|
+
sequences.append({
|
104
|
+
"protein": {
|
105
|
+
"id": chain_id,
|
106
|
+
"sequence": ''.join(aa_list),
|
107
|
+
"modifications": [],
|
108
|
+
}
|
109
|
+
})
|
110
|
+
print(sequences)
|
101
111
|
|
102
112
|
return {
|
103
113
|
"sequences": sequences,
|
104
114
|
"bonds": [],
|
105
115
|
"version": 1,
|
106
|
-
}
|
116
|
+
}
|
boltz/main.py
CHANGED
@@ -935,6 +935,11 @@ def cli() -> None:
|
|
935
935
|
is_flag=True,
|
936
936
|
help="Whether to disable the kernels. Default False",
|
937
937
|
)
|
938
|
+
@click.option(
|
939
|
+
"--only_prediction",
|
940
|
+
is_flag=True,
|
941
|
+
help="Run only prediction. Default False",
|
942
|
+
)
|
938
943
|
def predict( # noqa: C901, PLR0915, PLR0912
|
939
944
|
data: str,
|
940
945
|
out_dir: str,
|
@@ -968,6 +973,7 @@ def predict( # noqa: C901, PLR0915, PLR0912
|
|
968
973
|
subsample_msa: bool = True,
|
969
974
|
num_subsampled_msa: int = 1024,
|
970
975
|
no_kernels: bool = False,
|
976
|
+
only_prediction: bool = False,
|
971
977
|
) -> None:
|
972
978
|
"""Run predictions with Boltz."""
|
973
979
|
# If cpu, write a friendly warning
|
@@ -1176,6 +1182,7 @@ def predict( # noqa: C901, PLR0915, PLR0912
|
|
1176
1182
|
constraints_dir=processed.constraints_dir,
|
1177
1183
|
template_dir=processed.template_dir,
|
1178
1184
|
extra_mols_dir=processed.extra_mols_dir,
|
1185
|
+
only_prediction=only_prediction,
|
1179
1186
|
override_method=method,
|
1180
1187
|
)
|
1181
1188
|
else:
|
@@ -1224,15 +1231,16 @@ def predict( # noqa: C901, PLR0915, PLR0912
|
|
1224
1231
|
model_module.eval()
|
1225
1232
|
print(f"[{datetime.now().strftime('%H:%M:%S')}] Model loaded in {time.time() - t_model:.2f} seconds")
|
1226
1233
|
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1234
|
+
if not only_prediction:
|
1235
|
+
# 9. Before and after compute structure predictions (predict)
|
1236
|
+
t_predict = time.time()
|
1237
|
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Computing structure predictions...")
|
1238
|
+
trainer.predict(
|
1239
|
+
model_module,
|
1240
|
+
datamodule=data_module,
|
1241
|
+
return_predictions=False,
|
1242
|
+
)
|
1243
|
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure predictions computed in {time.time() - t_predict:.2f} seconds")
|
1236
1244
|
|
1237
1245
|
# Check if affinity predictions are needed
|
1238
1246
|
if any(r.affinity for r in manifest.records):
|
@@ -1266,6 +1274,7 @@ def predict( # noqa: C901, PLR0915, PLR0912
|
|
1266
1274
|
num_workers=num_workers,
|
1267
1275
|
constraints_dir=processed.constraints_dir,
|
1268
1276
|
template_dir=processed.template_dir,
|
1277
|
+
only_prediction=only_prediction,
|
1269
1278
|
extra_mols_dir=processed.extra_mols_dir,
|
1270
1279
|
override_method="other",
|
1271
1280
|
affinity=True,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
|
2
|
-
boltz/main.py,sha256=
|
2
|
+
boltz/main.py,sha256=CQ4lGxzXPw9kvnWYpuh0KRLtfc2Qmk1KcSTo4GquVv4,42848
|
3
3
|
boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
|
5
5
|
boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
|
@@ -27,7 +27,7 @@ boltz/data/filter/static/ligand.py,sha256=LamC-Z9IjYj3DmfxwMFmPbKBBhRMby3uWQj74w
|
|
27
27
|
boltz/data/filter/static/polymer.py,sha256=LNsQMsOOnhYpeKuM9AStktoTQPMZE3H0yu4mRg-jwPc,9386
|
28
28
|
boltz/data/module/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
29
|
boltz/data/module/inference.py,sha256=xk8ZJ8UhjPiPTdOluH_v4gnV8GtTX3sr1WZ1s5Ox8I8,8100
|
30
|
-
boltz/data/module/inferencev2.py,sha256=
|
30
|
+
boltz/data/module/inferencev2.py,sha256=83GMvlQihS0QzTd51TB6sFeErQ4xFyCn2M1BD2z0bZE,13125
|
31
31
|
boltz/data/module/training.py,sha256=iNzmq9ufs20S4M947CCzdYzGTFjmCTf2tFExJ2PtXnA,22428
|
32
32
|
boltz/data/module/trainingv2.py,sha256=ZsYUHYXxfuPgIpbTwCj5QLO0XK__xjsqIw6GARSNGW0,21276
|
33
33
|
boltz/data/msa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -38,8 +38,8 @@ boltz/data/parse/csv.py,sha256=Hcq8rJW2njczahEr8jfd_o-zxLaNSgJ3YIoC9srIqpw,2518
|
|
38
38
|
boltz/data/parse/fasta.py,sha256=taI4s_CqPtyF0XaLJAsVAJHCL0GXm2g1g8Qeccdxikk,3906
|
39
39
|
boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,36822
|
40
40
|
boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
|
41
|
-
boltz/data/parse/pdb.py,sha256=
|
42
|
-
boltz/data/parse/pdb_download.py,sha256=
|
41
|
+
boltz/data/parse/pdb.py,sha256=873jPx4D-OTBTd4lIn3GquYt0OLX4gbfzyAldxtzIIA,1913
|
42
|
+
boltz/data/parse/pdb_download.py,sha256=Ys2fepXD6RLYyq6xjHFNsoAStxISuvQ_EUEt0JwmrK0,2958
|
43
43
|
boltz/data/parse/schema.py,sha256=kNu28U2_MGiecwWNlcxgaDH3WOcO0P-q2LdoSPSb66w,63826
|
44
44
|
boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
|
45
45
|
boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
|
@@ -110,9 +110,9 @@ boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRp
|
|
110
110
|
boltz/utils/sdf_splitter.py,sha256=ZHn_syOcmm-fDnJ3YEGyGv_vYz2IRzUW7vbbMSU2JBY,2108
|
111
111
|
boltz/utils/sdf_to_pre_affinity_npz.py,sha256=ro0KGe24JexbJm47J8S8w8Lmr_KaQbzOAb_dKZO2G9I,40384
|
112
112
|
boltz/utils/yaml_generator.py,sha256=ermWIG-BE6nNWHFvpEwpk92N9J-YATpGXZGLvD1I2oQ,4012
|
113
|
-
boltz_vsynthes-0.1.
|
114
|
-
boltz_vsynthes-0.1.
|
115
|
-
boltz_vsynthes-0.1.
|
116
|
-
boltz_vsynthes-0.1.
|
117
|
-
boltz_vsynthes-0.1.
|
118
|
-
boltz_vsynthes-0.1.
|
113
|
+
boltz_vsynthes-0.1.4.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
|
114
|
+
boltz_vsynthes-0.1.4.dist-info/METADATA,sha256=_MxyMdy8K71CIe85VHRqIvHArfBjy7pd-fZjoDpInJo,7234
|
115
|
+
boltz_vsynthes-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
116
|
+
boltz_vsynthes-0.1.4.dist-info/entry_points.txt,sha256=nZNYPKKrmAr-MVA0K-ClNRT2p90FV1_14d7HpsESZFQ,211
|
117
|
+
boltz_vsynthes-0.1.4.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
|
118
|
+
boltz_vsynthes-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|