boltz-vsynthes 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,6 +30,7 @@ def load_input(
30
30
  msa_dir: Path,
31
31
  constraints_dir: Optional[Path] = None,
32
32
  template_dir: Optional[Path] = None,
33
+ only_prediction: bool = False,
33
34
  extra_mols_dir: Optional[Path] = None,
34
35
  affinity: bool = False,
35
36
  ) -> Input:
@@ -60,15 +61,18 @@ def load_input(
60
61
  """
61
62
  # Load the structure
62
63
  if affinity:
63
- structure = StructureV2.load(
64
+ if only_prediction:
65
+ #TODO: Formalize this
66
+ if target_dir.name == "predictions":
67
+ target_dir = target_dir.parent / "processed"
68
+ structure = StructureV2.load(
69
+ target_dir / f"structures/{record.id}.npz"
70
+ )
71
+ else:
72
+ structure = StructureV2.load(
64
73
  target_dir / record.id / f"pre_affinity_{record.id}.npz"
65
74
  )
66
- # if affinity:
67
- # if target_dir.name == "predictions":
68
- # target_dir = target_dir.parent / "processed"
69
- # structure = StructureV2.load(
70
- # target_dir / f"structures/{record.id}.npz"
71
- # )
75
+
72
76
  else:
73
77
  structure = StructureV2.load(target_dir / f"{record.id}.npz")
74
78
 
@@ -171,6 +175,7 @@ class PredictionDataset(torch.utils.data.Dataset):
171
175
  mol_dir: Path,
172
176
  constraints_dir: Optional[Path] = None,
173
177
  template_dir: Optional[Path] = None,
178
+ only_prediction: bool = False,
174
179
  extra_mols_dir: Optional[Path] = None,
175
180
  override_method: Optional[str] = None,
176
181
  affinity: bool = False,
@@ -203,6 +208,7 @@ class PredictionDataset(torch.utils.data.Dataset):
203
208
  self.tokenizer = Boltz2Tokenizer()
204
209
  self.featurizer = Boltz2Featurizer()
205
210
  self.canonicals = load_canonicals(self.mol_dir)
211
+ self.only_prediction = only_prediction
206
212
  self.extra_mols_dir = extra_mols_dir
207
213
  self.override_method = override_method
208
214
  self.affinity = affinity
@@ -228,6 +234,7 @@ class PredictionDataset(torch.utils.data.Dataset):
228
234
  msa_dir=self.msa_dir,
229
235
  constraints_dir=self.constraints_dir,
230
236
  template_dir=self.template_dir,
237
+ only_prediction=self.only_prediction,
231
238
  extra_mols_dir=self.extra_mols_dir,
232
239
  affinity=self.affinity,
233
240
  )
@@ -329,6 +336,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
329
336
  constraints_dir: Optional[Path] = None,
330
337
  template_dir: Optional[Path] = None,
331
338
  extra_mols_dir: Optional[Path] = None,
339
+ only_prediction: bool = False,
332
340
  override_method: Optional[str] = None,
333
341
  affinity: bool = False,
334
342
  ) -> None:
@@ -365,6 +373,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
365
373
  self.constraints_dir = constraints_dir
366
374
  self.template_dir = template_dir
367
375
  self.extra_mols_dir = extra_mols_dir
376
+ self.only_prediction = only_prediction
368
377
  self.override_method = override_method
369
378
  self.affinity = affinity
370
379
 
@@ -384,6 +393,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
384
393
  mol_dir=self.mol_dir,
385
394
  constraints_dir=self.constraints_dir,
386
395
  template_dir=self.template_dir,
396
+ only_prediction=self.only_prediction,
387
397
  extra_mols_dir=self.extra_mols_dir,
388
398
  override_method=self.override_method,
389
399
  affinity=self.affinity,
boltz/data/parse/pdb.py CHANGED
@@ -8,6 +8,9 @@ from Bio.PDB.Polypeptide import PPBuilder
8
8
  from Bio.Data.IUPACData import protein_letters_3to1
9
9
  from rdkit import Chem
10
10
  from rdkit.Chem.rdchem import Mol
11
+ from Bio.SeqUtils import seq1
12
+ from collections import defaultdict
13
+
11
14
 
12
15
  from boltz.data.types import Target
13
16
  from boltz.data.parse.schema import parse_boltz_schema
@@ -38,27 +41,35 @@ def parse_pdb(
38
41
  Dictionary containing sequences and bonds.
39
42
  """
40
43
  # Read PDB file
41
- parser = PDBParser(QUIET=True)
42
- structure = parser.get_structure("protein", str(pdb_path))
43
- ppb = PPBuilder()
44
-
45
- # Convert to yaml format
46
44
  sequences = []
47
- for model in structure:
48
- for chain in model:
49
- for pp in ppb.build_peptides(chain):
50
- seq = str(pp.get_sequence())
51
- if seq: # Only add if sequence is not empty
52
- sequences.append({
53
- "protein": {
54
- "id": chain.id,
55
- "sequence": seq,
56
- "modifications": [],
57
- }
58
- })
45
+ sequence_by_chain = defaultdict(list)
46
+
47
+ # Parse SEQRES records directly
48
+ with open(pdb_path) as f:
49
+ for line in f:
50
+ if line.startswith("SEQRES"):
51
+ parts = line.split()
52
+ chain_id = parts[2]
53
+ residues = parts[4:]
54
+ for res in residues:
55
+ try:
56
+ aa = seq1(res)
57
+ except KeyError:
58
+ aa = 'X'
59
+ sequence_by_chain[chain_id].append(aa)
59
60
 
61
+ # Convert to yaml-style list
62
+ for chain_id, aa_list in sequence_by_chain.items():
63
+ sequences.append({
64
+ "protein": {
65
+ "id": chain_id,
66
+ "sequence": ''.join(aa_list),
67
+ "modifications": [],
68
+ }
69
+ })
70
+ print(sequences)
60
71
  return {
61
72
  "sequences": sequences,
62
73
  "bonds": [],
63
74
  "version": 1,
64
- }
75
+ }
@@ -8,6 +8,8 @@ from Bio.PDB.PDBParser import PDBParser
8
8
  from Bio.PDB.Polypeptide import PPBuilder
9
9
  from rdkit import Chem
10
10
  from rdkit.Chem.rdchem import Mol
11
+ from Bio.SeqUtils import seq1
12
+ from collections import defaultdict
11
13
 
12
14
  from boltz.data.types import Target
13
15
  from boltz.data.parse.schema import parse_boltz_schema
@@ -79,28 +81,36 @@ def parse_pdb_id(
79
81
  # Download PDB file
80
82
  pdb_path = download_pdb(pdb_id, cache_dir)
81
83
 
82
- # Read PDB file
83
- parser = PDBParser(QUIET=True)
84
- structure = parser.get_structure("protein", str(pdb_path))
85
- ppb = PPBuilder()
86
-
87
- # Convert to yaml format
88
84
  sequences = []
89
- for model in structure:
90
- for chain in model:
91
- for pp in ppb.build_peptides(chain):
92
- seq = str(pp.get_sequence())
93
- if seq: # Only add if sequence is not empty
94
- sequences.append({
95
- "protein": {
96
- "id": chain.id,
97
- "sequence": seq,
98
- "modifications": [],
99
- }
100
- })
85
+ sequence_by_chain = defaultdict(list)
86
+
87
+ # Parse SEQRES records directly
88
+ with open(pdb_path) as f:
89
+ for line in f:
90
+ if line.startswith("SEQRES"):
91
+ parts = line.split()
92
+ chain_id = parts[2]
93
+ residues = parts[4:]
94
+ for res in residues:
95
+ try:
96
+ aa = seq1(res)
97
+ except KeyError:
98
+ aa = 'X'
99
+ sequence_by_chain[chain_id].append(aa)
100
+
101
+ # Convert to yaml-style list
102
+ for chain_id, aa_list in sequence_by_chain.items():
103
+ sequences.append({
104
+ "protein": {
105
+ "id": chain_id,
106
+ "sequence": ''.join(aa_list),
107
+ "modifications": [],
108
+ }
109
+ })
110
+ print(sequences)
101
111
 
102
112
  return {
103
113
  "sequences": sequences,
104
114
  "bonds": [],
105
115
  "version": 1,
106
- }
116
+ }
boltz/main.py CHANGED
@@ -935,6 +935,11 @@ def cli() -> None:
935
935
  is_flag=True,
936
936
  help="Whether to disable the kernels. Default False",
937
937
  )
938
+ @click.option(
939
+ "--only_prediction",
940
+ is_flag=True,
941
+ help="Run only prediction. Default False",
942
+ )
938
943
  def predict( # noqa: C901, PLR0915, PLR0912
939
944
  data: str,
940
945
  out_dir: str,
@@ -968,6 +973,7 @@ def predict( # noqa: C901, PLR0915, PLR0912
968
973
  subsample_msa: bool = True,
969
974
  num_subsampled_msa: int = 1024,
970
975
  no_kernels: bool = False,
976
+ only_prediction: bool = False,
971
977
  ) -> None:
972
978
  """Run predictions with Boltz."""
973
979
  # If cpu, write a friendly warning
@@ -1176,6 +1182,7 @@ def predict( # noqa: C901, PLR0915, PLR0912
1176
1182
  constraints_dir=processed.constraints_dir,
1177
1183
  template_dir=processed.template_dir,
1178
1184
  extra_mols_dir=processed.extra_mols_dir,
1185
+ only_prediction=only_prediction,
1179
1186
  override_method=method,
1180
1187
  )
1181
1188
  else:
@@ -1224,15 +1231,16 @@ def predict( # noqa: C901, PLR0915, PLR0912
1224
1231
  model_module.eval()
1225
1232
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Model loaded in {time.time() - t_model:.2f} seconds")
1226
1233
 
1227
- # 9. Before and after compute structure predictions (predict)
1228
- t_predict = time.time()
1229
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Computing structure predictions...")
1230
- trainer.predict(
1231
- model_module,
1232
- datamodule=data_module,
1233
- return_predictions=False,
1234
- )
1235
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure predictions computed in {time.time() - t_predict:.2f} seconds")
1234
+ if not only_prediction:
1235
+ # 9. Before and after compute structure predictions (predict)
1236
+ t_predict = time.time()
1237
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Computing structure predictions...")
1238
+ trainer.predict(
1239
+ model_module,
1240
+ datamodule=data_module,
1241
+ return_predictions=False,
1242
+ )
1243
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure predictions computed in {time.time() - t_predict:.2f} seconds")
1236
1244
 
1237
1245
  # Check if affinity predictions are needed
1238
1246
  if any(r.affinity for r in manifest.records):
@@ -1266,6 +1274,7 @@ def predict( # noqa: C901, PLR0915, PLR0912
1266
1274
  num_workers=num_workers,
1267
1275
  constraints_dir=processed.constraints_dir,
1268
1276
  template_dir=processed.template_dir,
1277
+ only_prediction=only_prediction,
1269
1278
  extra_mols_dir=processed.extra_mols_dir,
1270
1279
  override_method="other",
1271
1280
  affinity=True,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boltz-vsynthes
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Boltz for VSYNTHES
5
5
  Requires-Python: <3.13,>=3.10
6
6
  Description-Content-Type: text/markdown
@@ -1,5 +1,5 @@
1
1
  boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
2
- boltz/main.py,sha256=nJVS1bG79_pqbMUCiRs0ak7WeOrPtIsJ_LeuHbsa_ms,42544
2
+ boltz/main.py,sha256=CQ4lGxzXPw9kvnWYpuh0KRLtfc2Qmk1KcSTo4GquVv4,42848
3
3
  boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
5
5
  boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
@@ -27,7 +27,7 @@ boltz/data/filter/static/ligand.py,sha256=LamC-Z9IjYj3DmfxwMFmPbKBBhRMby3uWQj74w
27
27
  boltz/data/filter/static/polymer.py,sha256=LNsQMsOOnhYpeKuM9AStktoTQPMZE3H0yu4mRg-jwPc,9386
28
28
  boltz/data/module/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  boltz/data/module/inference.py,sha256=xk8ZJ8UhjPiPTdOluH_v4gnV8GtTX3sr1WZ1s5Ox8I8,8100
30
- boltz/data/module/inferencev2.py,sha256=aLUm1WR6E1814JUrF6sJfoe5y8y7d_s4zlQ3pdFBVy8,12742
30
+ boltz/data/module/inferencev2.py,sha256=83GMvlQihS0QzTd51TB6sFeErQ4xFyCn2M1BD2z0bZE,13125
31
31
  boltz/data/module/training.py,sha256=iNzmq9ufs20S4M947CCzdYzGTFjmCTf2tFExJ2PtXnA,22428
32
32
  boltz/data/module/trainingv2.py,sha256=ZsYUHYXxfuPgIpbTwCj5QLO0XK__xjsqIw6GARSNGW0,21276
33
33
  boltz/data/msa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -38,8 +38,8 @@ boltz/data/parse/csv.py,sha256=Hcq8rJW2njczahEr8jfd_o-zxLaNSgJ3YIoC9srIqpw,2518
38
38
  boltz/data/parse/fasta.py,sha256=taI4s_CqPtyF0XaLJAsVAJHCL0GXm2g1g8Qeccdxikk,3906
39
39
  boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,36822
40
40
  boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
41
- boltz/data/parse/pdb.py,sha256=iybk4p2UgUy_ABGprDq_xxyPSdm1HAZsGTM0lhxVEwM,1654
42
- boltz/data/parse/pdb_download.py,sha256=wge-scX-lOatX0q83W1wOsaql99rYp-6uGWSHEc995M,2718
41
+ boltz/data/parse/pdb.py,sha256=873jPx4D-OTBTd4lIn3GquYt0OLX4gbfzyAldxtzIIA,1913
42
+ boltz/data/parse/pdb_download.py,sha256=Ys2fepXD6RLYyq6xjHFNsoAStxISuvQ_EUEt0JwmrK0,2958
43
43
  boltz/data/parse/schema.py,sha256=kNu28U2_MGiecwWNlcxgaDH3WOcO0P-q2LdoSPSb66w,63826
44
44
  boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
45
45
  boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
@@ -110,9 +110,9 @@ boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRp
110
110
  boltz/utils/sdf_splitter.py,sha256=ZHn_syOcmm-fDnJ3YEGyGv_vYz2IRzUW7vbbMSU2JBY,2108
111
111
  boltz/utils/sdf_to_pre_affinity_npz.py,sha256=ro0KGe24JexbJm47J8S8w8Lmr_KaQbzOAb_dKZO2G9I,40384
112
112
  boltz/utils/yaml_generator.py,sha256=ermWIG-BE6nNWHFvpEwpk92N9J-YATpGXZGLvD1I2oQ,4012
113
- boltz_vsynthes-0.1.2.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
114
- boltz_vsynthes-0.1.2.dist-info/METADATA,sha256=RUv2LCKZ3YPmu2-YqB2EI-Pbe45u2SlXnAoTtgmlEOM,7234
115
- boltz_vsynthes-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
116
- boltz_vsynthes-0.1.2.dist-info/entry_points.txt,sha256=nZNYPKKrmAr-MVA0K-ClNRT2p90FV1_14d7HpsESZFQ,211
117
- boltz_vsynthes-0.1.2.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
118
- boltz_vsynthes-0.1.2.dist-info/RECORD,,
113
+ boltz_vsynthes-0.1.4.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
114
+ boltz_vsynthes-0.1.4.dist-info/METADATA,sha256=_MxyMdy8K71CIe85VHRqIvHArfBjy7pd-fZjoDpInJo,7234
115
+ boltz_vsynthes-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
116
+ boltz_vsynthes-0.1.4.dist-info/entry_points.txt,sha256=nZNYPKKrmAr-MVA0K-ClNRT2p90FV1_14d7HpsESZFQ,211
117
+ boltz_vsynthes-0.1.4.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
118
+ boltz_vsynthes-0.1.4.dist-info/RECORD,,