boltz-vsynthes 1.0.38__py3-none-any.whl → 1.0.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1326,6 +1326,60 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1326
1326
  "cyclic", False
1327
1327
  ), "Cyclic flag is not supported for ligands"
1328
1328
 
1329
+ elif (entity_type == "ligand") and ("sdf" in items[0][entity_type]):
1330
+ # Handle SDF file
1331
+ sdf_path = Path(items[0][entity_type]["sdf"])
1332
+ from boltz.data.parse.sdf import parse_sdf
1333
+ target = parse_sdf(sdf_path, ccd, mol_dir)
1334
+ mol = target["sequences"][0]["ligand"]["smiles"]
1335
+
1336
+ if affinity:
1337
+ mol = standardize(mol)
1338
+
1339
+ mol = AllChem.MolFromSmiles(mol)
1340
+ mol = AllChem.AddHs(mol)
1341
+
1342
+ # Set atom names
1343
+ canonical_order = AllChem.CanonicalRankAtoms(mol)
1344
+ for atom, can_idx in zip(mol.GetAtoms(), canonical_order):
1345
+ atom_name = atom.GetSymbol().upper() + str(can_idx + 1)
1346
+ if len(atom_name) > 4:
1347
+ msg = (
1348
+ f"{mol} has an atom with a name longer than "
1349
+ f"4 characters: {atom_name}."
1350
+ )
1351
+ raise ValueError(msg)
1352
+ atom.SetProp("name", atom_name)
1353
+
1354
+ success = compute_3d_conformer(mol)
1355
+ if not success:
1356
+ msg = f"Failed to compute 3D conformer for {mol}"
1357
+ raise ValueError(msg)
1358
+
1359
+ mol_no_h = AllChem.RemoveHs(mol, sanitize=False)
1360
+ affinity_mw = AllChem.Descriptors.MolWt(mol_no_h) if affinity else None
1361
+ extra_mols[f"LIG{ligand_id}"] = mol_no_h
1362
+ residue = parse_ccd_residue(
1363
+ name=f"LIG{ligand_id}",
1364
+ ref_mol=mol,
1365
+ res_idx=0,
1366
+ )
1367
+
1368
+ ligand_id += 1
1369
+ parsed_chain = ParsedChain(
1370
+ entity=entity_id,
1371
+ residues=[residue],
1372
+ type=const.chain_type_ids["NONPOLYMER"],
1373
+ cyclic_period=0,
1374
+ sequence=None,
1375
+ affinity=affinity,
1376
+ affinity_mw=affinity_mw,
1377
+ )
1378
+
1379
+ assert not items[0][entity_type].get(
1380
+ "cyclic", False
1381
+ ), "Cyclic flag is not supported for ligands"
1382
+
1329
1383
  else:
1330
1384
  msg = f"Invalid entity type: {entity_type}"
1331
1385
  raise ValueError(msg)
@@ -1393,15 +1447,6 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1393
1447
  chain_id=asym_id,
1394
1448
  mw=chain.affinity_mw,
1395
1449
  )
1396
- # Save affinity info if output directory is specified
1397
- if output_dir is not None:
1398
- affinity_path = subfolder / "affinity_info.json"
1399
- with open(affinity_path, "w") as f:
1400
- json.dump({
1401
- "chain_id": asym_id,
1402
- "mw": chain.affinity_mw,
1403
- "chain_name": chain_name
1404
- }, f)
1405
1450
 
1406
1451
  # Find all copies of this chain in the assembly
1407
1452
  entity_id = int(chain.entity)
boltz/main.py CHANGED
@@ -498,13 +498,25 @@ def process_input( # noqa: C901, PLR0912, PLR0915, D103
498
498
  ) -> None:
499
499
  try:
500
500
  # Parse data
501
- if path.suffix in (".fa", ".fas", ".fasta"):
501
+ if path.is_dir():
502
+ # Process all YAML and FASTA files in the directory
503
+ targets = []
504
+ for file_path in path.glob("*"):
505
+ if file_path.suffix in (".fa", ".fas", ".fasta"):
506
+ target = parse_fasta(file_path, ccd, mol_dir, boltz2)
507
+ targets.append(target)
508
+ elif file_path.suffix in (".yml", ".yaml"):
509
+ target = parse_yaml(file_path, ccd, mol_dir, boltz2)
510
+ if not isinstance(target, list):
511
+ target = [target]
512
+ targets.extend(target)
513
+ elif path.suffix in (".fa", ".fas", ".fasta"):
502
514
  target = parse_fasta(path, ccd, mol_dir, boltz2)
515
+ targets = [target]
503
516
  elif path.suffix in (".yml", ".yaml"):
504
- target = parse_yaml(path, ccd, mol_dir, boltz2)
505
- elif path.is_dir():
506
- msg = f"Found directory {path} instead of .fasta or .yaml, skipping."
507
- raise RuntimeError(msg) # noqa: TRY301
517
+ targets = parse_yaml(path, ccd, mol_dir, boltz2)
518
+ if not isinstance(targets, list):
519
+ targets = [targets]
508
520
  else:
509
521
  msg = (
510
522
  f"Unable to parse filetype {path.suffix}, "
@@ -512,96 +524,98 @@ def process_input( # noqa: C901, PLR0912, PLR0915, D103
512
524
  )
513
525
  raise RuntimeError(msg) # noqa: TRY301
514
526
 
515
- # Get target id
516
- target_id = target.record.id
517
-
518
- # Get all MSA ids and decide whether to generate MSA
519
- to_generate = {}
520
- prot_id = const.chain_type_ids["PROTEIN"]
521
- for chain in target.record.chains:
522
- # Add to generate list, assigning entity id
523
- if (chain.mol_type == prot_id) and (chain.msa_id == 0):
524
- entity_id = chain.entity_id
525
- msa_id = f"{target_id}_{entity_id}"
526
- to_generate[msa_id] = target.sequences[entity_id]
527
- chain.msa_id = msa_dir / f"{msa_id}.csv"
528
-
529
- # We do not support msa generation for non-protein chains
530
- elif chain.msa_id == 0:
531
- chain.msa_id = -1
532
-
533
- # Generate MSA
534
- if to_generate and not use_msa_server:
535
- msg = "Missing MSA's in input and --use_msa_server flag not set."
536
- raise RuntimeError(msg) # noqa: TRY301
537
-
538
- if to_generate:
539
- msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
540
- click.echo(msg)
541
- compute_msa(
542
- data=to_generate,
543
- target_id=target_id,
544
- msa_dir=msa_dir,
545
- msa_server_url=msa_server_url,
546
- msa_pairing_strategy=msa_pairing_strategy,
547
- )
527
+ # Process each target
528
+ for target in targets:
529
+ # Get target id
530
+ target_id = target.record.id
531
+
532
+ # Get all MSA ids and decide whether to generate MSA
533
+ to_generate = {}
534
+ prot_id = const.chain_type_ids["PROTEIN"]
535
+ for chain in target.record.chains:
536
+ # Add to generate list, assigning entity id
537
+ if (chain.mol_type == prot_id) and (chain.msa_id == 0):
538
+ entity_id = chain.entity_id
539
+ msa_id = f"{target_id}_{entity_id}"
540
+ to_generate[msa_id] = target.sequences[entity_id]
541
+ chain.msa_id = msa_dir / f"{msa_id}.csv"
542
+
543
+ # We do not support msa generation for non-protein chains
544
+ elif chain.msa_id == 0:
545
+ chain.msa_id = -1
546
+
547
+ # Generate MSA
548
+ if to_generate and not use_msa_server:
549
+ msg = "Missing MSA's in input and --use_msa_server flag not set."
550
+ raise RuntimeError(msg) # noqa: TRY301
551
+
552
+ if to_generate:
553
+ msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
554
+ click.echo(msg)
555
+ compute_msa(
556
+ data=to_generate,
557
+ target_id=target_id,
558
+ msa_dir=msa_dir,
559
+ msa_server_url=msa_server_url,
560
+ msa_pairing_strategy=msa_pairing_strategy,
561
+ )
548
562
 
549
- # Parse MSA data
550
- msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
551
- msa_id_map = {}
552
- for msa_idx, msa_id in enumerate(msas):
553
- # Check that raw MSA exists
554
- msa_path = Path(msa_id)
555
- if not msa_path.exists():
556
- msg = f"MSA file {msa_path} not found."
557
- raise FileNotFoundError(msg) # noqa: TRY301
558
-
559
- # Dump processed MSA
560
- processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
561
- msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
562
- if not processed.exists():
563
- # Parse A3M
564
- if msa_path.suffix == ".a3m":
565
- msa: MSA = parse_a3m(
566
- msa_path,
567
- taxonomy=None,
568
- max_seqs=max_msa_seqs,
569
- )
570
- elif msa_path.suffix == ".csv":
571
- msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
572
- else:
573
- msg = f"MSA file {msa_path} not supported, only a3m or csv."
574
- raise RuntimeError(msg) # noqa: TRY301
575
-
576
- msa.dump(processed)
577
-
578
- # Modify records to point to processed MSA
579
- for c in target.record.chains:
580
- if (c.msa_id != -1) and (c.msa_id in msa_id_map):
581
- c.msa_id = msa_id_map[c.msa_id]
582
-
583
- # Dump templates
584
- for template_id, template in target.templates.items():
585
- name = f"{target.record.id}_{template_id}.npz"
586
- template_path = processed_templates_dir / name
587
- template.dump(template_path)
588
-
589
- # Dump constraints
590
- constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
591
- target.residue_constraints.dump(constraints_path)
592
-
593
- # Dump extra molecules
594
- Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
595
- with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
596
- pickle.dump(target.extra_mols, f)
597
-
598
- # Dump structure
599
- struct_path = structure_dir / f"{target.record.id}.npz"
600
- target.structure.dump(struct_path)
601
-
602
- # Dump record
603
- record_path = records_dir / f"{target.record.id}.json"
604
- target.record.dump(record_path)
563
+ # Parse MSA data
564
+ msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
565
+ msa_id_map = {}
566
+ for msa_idx, msa_id in enumerate(msas):
567
+ # Check that raw MSA exists
568
+ msa_path = Path(msa_id)
569
+ if not msa_path.exists():
570
+ msg = f"MSA file {msa_path} not found."
571
+ raise FileNotFoundError(msg) # noqa: TRY301
572
+
573
+ # Dump processed MSA
574
+ processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
575
+ msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
576
+ if not processed.exists():
577
+ # Parse A3M
578
+ if msa_path.suffix == ".a3m":
579
+ msa: MSA = parse_a3m(
580
+ msa_path,
581
+ taxonomy=None,
582
+ max_seqs=max_msa_seqs,
583
+ )
584
+ elif msa_path.suffix == ".csv":
585
+ msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
586
+ else:
587
+ msg = f"MSA file {msa_path} not supported, only a3m or csv."
588
+ raise RuntimeError(msg) # noqa: TRY301
589
+
590
+ msa.dump(processed)
591
+
592
+ # Modify records to point to processed MSA
593
+ for c in target.record.chains:
594
+ if (c.msa_id != -1) and (c.msa_id in msa_id_map):
595
+ c.msa_id = msa_id_map[c.msa_id]
596
+
597
+ # Dump templates
598
+ for template_id, template in target.templates.items():
599
+ name = f"{target.record.id}_{template_id}.npz"
600
+ template_path = processed_templates_dir / name
601
+ template.dump(template_path)
602
+
603
+ # Dump constraints
604
+ constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
605
+ target.residue_constraints.dump(constraints_path)
606
+
607
+ # Dump extra molecules
608
+ Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
609
+ with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
610
+ pickle.dump(target.extra_mols, f)
611
+
612
+ # Dump structure
613
+ struct_path = structure_dir / f"{target.record.id}.npz"
614
+ target.structure.dump(struct_path)
615
+
616
+ # Dump record
617
+ record_path = records_dir / f"{target.record.id}.json"
618
+ target.record.dump(record_path)
605
619
 
606
620
  except Exception as e: # noqa: BLE001
607
621
  import traceback
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boltz-vsynthes
3
- Version: 1.0.38
3
+ Version: 1.0.40
4
4
  Summary: Boltz for VSYNTHES
5
5
  Requires-Python: <3.13,>=3.10
6
6
  Description-Content-Type: text/markdown
@@ -1,5 +1,5 @@
1
1
  boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
2
- boltz/main.py,sha256=i5_15JZ9vjZ9RSLZb2F0a7scuQ0QfFkgUQVftTiD3h0,39945
2
+ boltz/main.py,sha256=SHM-t-9wjwjTJmWR4N5SrAHxk2vgz7fTruz5shiixVc,40882
3
3
  boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
5
5
  boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
@@ -40,7 +40,7 @@ boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,368
40
40
  boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
41
41
  boltz/data/parse/pdb.py,sha256=iybk4p2UgUy_ABGprDq_xxyPSdm1HAZsGTM0lhxVEwM,1654
42
42
  boltz/data/parse/pdb_download.py,sha256=wge-scX-lOatX0q83W1wOsaql99rYp-6uGWSHEc995M,2718
43
- boltz/data/parse/schema.py,sha256=b0Mh1eCg6gTyOQt7GkEFAQdYCZJ1jqAJbUy9Tv53K4E,64781
43
+ boltz/data/parse/schema.py,sha256=p4KIAVzQAuApcxRLHc6-KKG7ICgLmEWVzE8Qqm6v04w,66402
44
44
  boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
45
45
  boltz/data/parse/yaml.py,sha256=M3dRQK2mMDue3bPSO_T2ThaVojSMrOV7rMY-KXQvaGQ,2047
46
46
  boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
107
107
  boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
108
  boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
109
109
  boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
110
- boltz_vsynthes-1.0.38.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
111
- boltz_vsynthes-1.0.38.dist-info/METADATA,sha256=HtZ8GekM9xBSPv24CUhVnarUX_GnVP-_tvIM8HfLCZc,7171
112
- boltz_vsynthes-1.0.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
113
- boltz_vsynthes-1.0.38.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
114
- boltz_vsynthes-1.0.38.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
115
- boltz_vsynthes-1.0.38.dist-info/RECORD,,
110
+ boltz_vsynthes-1.0.40.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
111
+ boltz_vsynthes-1.0.40.dist-info/METADATA,sha256=z2kizv_5w3PrpKHsDV_GXjhzQDRxRCWWT2pOESvbcFU,7171
112
+ boltz_vsynthes-1.0.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
113
+ boltz_vsynthes-1.0.40.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
114
+ boltz_vsynthes-1.0.40.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
115
+ boltz_vsynthes-1.0.40.dist-info/RECORD,,