boltz-vsynthes 1.0.34__py3-none-any.whl → 1.0.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ from dataclasses import dataclass
3
3
  from pathlib import Path
4
4
  from typing import Optional
5
5
  import json
6
+ import yaml
6
7
 
7
8
  import click
8
9
  import numpy as np
@@ -936,6 +937,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
936
937
  ccd: Mapping[str, Mol],
937
938
  mol_dir: Optional[Path] = None,
938
939
  boltz_2: bool = False,
940
+ output_dir: Optional[Path] = None,
939
941
  ) -> Target:
940
942
  """Parse a Boltz input yaml / json.
941
943
 
@@ -987,6 +989,8 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
987
989
  Path to the directory containing the molecules.
988
990
  boltz2: bool
989
991
  Whether to parse the input for Boltz2.
992
+ output_dir: Path, optional
993
+ Path to the output directory. If provided, results will be saved in a subfolder named after the input file.
990
994
 
991
995
  Returns
992
996
  -------
@@ -994,6 +998,14 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
994
998
  The parsed target.
995
999
 
996
1000
  """
1001
+ # Create output directory if specified
1002
+ if output_dir is not None:
1003
+ output_dir = Path(output_dir)
1004
+ output_dir.mkdir(parents=True, exist_ok=True)
1005
+ # Create subfolder based on input name
1006
+ subfolder = output_dir / name
1007
+ subfolder.mkdir(parents=True, exist_ok=True)
1008
+
997
1009
  # Assert version 1
998
1010
  version = schema.get("version", 1)
999
1011
  if version != 1:
@@ -1222,7 +1234,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1222
1234
  )
1223
1235
 
1224
1236
  # Parse a non-polymer
1225
- elif (entity_type == "ligand") and ("ccd" in items[0][entity_type]):
1237
+ elif (entity_type == "ligand") and "ccd" in (items[0][entity_type]):
1226
1238
  seq = items[0][entity_type]["ccd"]
1227
1239
 
1228
1240
  if isinstance(seq, str):
@@ -1314,60 +1326,6 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1314
1326
  "cyclic", False
1315
1327
  ), "Cyclic flag is not supported for ligands"
1316
1328
 
1317
- elif (entity_type == "ligand") and ("sdf" in items[0][entity_type]):
1318
- # Handle SDF file
1319
- sdf_path = Path(items[0][entity_type]["sdf"])
1320
- from boltz.data.parse.sdf import parse_sdf
1321
- target = parse_sdf(sdf_path, ccd, mol_dir)
1322
- mol = target["sequences"][0]["ligand"]["smiles"]
1323
-
1324
- if affinity:
1325
- mol = standardize(mol)
1326
-
1327
- mol = AllChem.MolFromSmiles(mol)
1328
- mol = AllChem.AddHs(mol)
1329
-
1330
- # Set atom names
1331
- canonical_order = AllChem.CanonicalRankAtoms(mol)
1332
- for atom, can_idx in zip(mol.GetAtoms(), canonical_order):
1333
- atom_name = atom.GetSymbol().upper() + str(can_idx + 1)
1334
- if len(atom_name) > 4:
1335
- msg = (
1336
- f"{mol} has an atom with a name longer than "
1337
- f"4 characters: {atom_name}."
1338
- )
1339
- raise ValueError(msg)
1340
- atom.SetProp("name", atom_name)
1341
-
1342
- success = compute_3d_conformer(mol)
1343
- if not success:
1344
- msg = f"Failed to compute 3D conformer for {mol}"
1345
- raise ValueError(msg)
1346
-
1347
- mol_no_h = AllChem.RemoveHs(mol, sanitize=False)
1348
- affinity_mw = AllChem.Descriptors.MolWt(mol_no_h) if affinity else None
1349
- extra_mols[f"LIG{ligand_id}"] = mol_no_h
1350
- residue = parse_ccd_residue(
1351
- name=f"LIG{ligand_id}",
1352
- ref_mol=mol,
1353
- res_idx=0,
1354
- )
1355
-
1356
- ligand_id += 1
1357
- parsed_chain = ParsedChain(
1358
- entity=entity_id,
1359
- residues=[residue],
1360
- type=const.chain_type_ids["NONPOLYMER"],
1361
- cyclic_period=0,
1362
- sequence=None,
1363
- affinity=affinity,
1364
- affinity_mw=affinity_mw,
1365
- )
1366
-
1367
- assert not items[0][entity_type].get(
1368
- "cyclic", False
1369
- ), "Cyclic flag is not supported for ligands"
1370
-
1371
1329
  else:
1372
1330
  msg = f"Invalid entity type: {entity_type}"
1373
1331
  raise ValueError(msg)
@@ -1426,43 +1384,24 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1426
1384
  protein_chains.add(chain_name)
1427
1385
 
1428
1386
  # Add affinity info
1387
+ if chain.affinity and affinity_info is not None:
1388
+ msg = "Cannot compute affinity for multiple ligands!"
1389
+ raise ValueError(msg)
1390
+
1429
1391
  if chain.affinity:
1430
- # If this is a protein binder, we need to create affinity info for each ligand
1431
- if chain_name in affinity_proteins:
1432
- # Find all ligand chains
1433
- ligand_chains = [
1434
- (name, c) for name, c in chains.items()
1435
- if c.type == const.chain_type_ids["NONPOLYMER"]
1436
- ]
1437
- if not ligand_chains:
1438
- msg = "No ligand chains found for protein binder!"
1439
- raise ValueError(msg)
1440
-
1441
- # Create affinity info for each ligand
1442
- for ligand_name, ligand_chain in ligand_chains:
1443
- affinity_info = AffinityInfo(
1444
- chain_id=asym_id,
1445
- mw=chain.affinity_mw,
1446
- )
1447
- # Save the affinity info in a subfolder named after the ligand
1448
- output_dir = Path(f"output/{ligand_name}")
1449
- output_dir.mkdir(parents=True, exist_ok=True)
1450
- # Save the affinity info
1451
- with open(output_dir / "affinity_info.json", "w") as f:
1452
- json.dump({
1453
- "chain_id": asym_id,
1454
- "mw": chain.affinity_mw,
1455
- "ligand_name": ligand_name
1456
- }, f)
1457
- else:
1458
- # This is a ligand binder
1459
- if affinity_info is not None:
1460
- msg = "Cannot compute affinity for multiple ligands!"
1461
- raise ValueError(msg)
1462
- affinity_info = AffinityInfo(
1463
- chain_id=asym_id,
1464
- mw=chain.affinity_mw,
1465
- )
1392
+ affinity_info = AffinityInfo(
1393
+ chain_id=asym_id,
1394
+ mw=chain.affinity_mw,
1395
+ )
1396
+ # Save affinity info if output directory is specified
1397
+ if output_dir is not None:
1398
+ affinity_path = subfolder / "affinity_info.json"
1399
+ with open(affinity_path, "w") as f:
1400
+ json.dump({
1401
+ "chain_id": asym_id,
1402
+ "mw": chain.affinity_mw,
1403
+ "chain_name": chain_name
1404
+ }, f)
1466
1405
 
1467
1406
  # Find all copies of this chain in the assembly
1468
1407
  entity_id = int(chain.entity)
@@ -1925,3 +1864,60 @@ def standardize(smiles: str) -> Optional[str]:
1925
1864
  raise ValueError("Molecule is broken")
1926
1865
 
1927
1866
  return smiles
1867
+
1868
+
1869
+ def parse_boltz_directory(
1870
+ input_dir: Path,
1871
+ output_dir: Path,
1872
+ ccd: Mapping[str, Mol],
1873
+ mol_dir: Optional[Path] = None,
1874
+ boltz_2: bool = False,
1875
+ ) -> list[Target]:
1876
+ """Parse all YAML files in a directory.
1877
+
1878
+ Parameters
1879
+ ----------
1880
+ input_dir : Path
1881
+ Path to the directory containing YAML files.
1882
+ output_dir : Path
1883
+ Path to the output directory where results will be saved.
1884
+ ccd : Mapping[str, Mol]
1885
+ Dictionary of CCD components.
1886
+ mol_dir : Path, optional
1887
+ Path to the directory containing the molecules.
1888
+ boltz_2 : bool, optional
1889
+ Whether to parse the input for Boltz2.
1890
+
1891
+ Returns
1892
+ -------
1893
+ list[Target]
1894
+ List of parsed targets.
1895
+
1896
+ """
1897
+ input_dir = Path(input_dir)
1898
+ output_dir = Path(output_dir)
1899
+ output_dir.mkdir(parents=True, exist_ok=True)
1900
+
1901
+ targets = []
1902
+ for yaml_file in input_dir.glob("*.yaml"):
1903
+ try:
1904
+ # Load YAML file
1905
+ with open(yaml_file, "r") as f:
1906
+ schema = yaml.safe_load(f)
1907
+
1908
+ # Parse schema
1909
+ target = parse_boltz_schema(
1910
+ name=yaml_file.stem,
1911
+ schema=schema,
1912
+ ccd=ccd,
1913
+ mol_dir=mol_dir,
1914
+ boltz_2=boltz_2,
1915
+ output_dir=output_dir,
1916
+ )
1917
+ targets.append(target)
1918
+
1919
+ except Exception as e:
1920
+ print(f"Error processing {yaml_file}: {str(e)}")
1921
+ continue
1922
+
1923
+ return targets
boltz/data/parse/yaml.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from pathlib import Path
2
+ from typing import Union, List, Optional
2
3
 
3
4
  import yaml
4
5
  from rdkit.Chem.rdchem import Mol
5
6
 
6
- from boltz.data.parse.schema import parse_boltz_schema
7
+ from boltz.data.parse.schema import parse_boltz_schema, parse_boltz_directory
7
8
  from boltz.data.types import Target
8
9
 
9
10
 
@@ -12,8 +13,9 @@ def parse_yaml(
12
13
  ccd: dict[str, Mol],
13
14
  mol_dir: Path,
14
15
  boltz2: bool = False,
15
- ) -> Target:
16
- """Parse a Boltz input yaml / json.
16
+ output_dir: Optional[Path] = None,
17
+ ) -> Union[Target, List[Target]]:
18
+ """Parse a Boltz input yaml / json file or directory.
17
19
 
18
20
  The input file should be a yaml file with the following format:
19
21
 
@@ -49,20 +51,28 @@ def parse_yaml(
49
51
  Parameters
50
52
  ----------
51
53
  path : Path
52
- Path to the YAML input format.
53
- components : Dict
54
+ Path to the YAML input file or directory containing YAML files.
55
+ ccd : Dict
54
56
  Dictionary of CCD components.
55
- boltz2 : bool
57
+ mol_dir : Path
58
+ Path to the directory containing molecules.
59
+ boltz2 : bool, optional
56
60
  Whether to parse the input for Boltz2.
61
+ output_dir : Path, optional
62
+ Path to the output directory where results will be saved.
57
63
 
58
64
  Returns
59
65
  -------
60
- Target
61
- The parsed target.
66
+ Union[Target, List[Target]]
67
+ The parsed target(s).
62
68
 
63
69
  """
64
- with path.open("r") as file:
65
- data = yaml.safe_load(file)
66
-
67
- name = path.stem
68
- return parse_boltz_schema(name, data, ccd, mol_dir, boltz2)
70
+ path = Path(path)
71
+
72
+ if path.is_dir():
73
+ return parse_boltz_directory(path, output_dir or path, ccd, mol_dir, boltz2)
74
+ else:
75
+ with path.open("r") as file:
76
+ data = yaml.safe_load(file)
77
+ name = path.stem
78
+ return parse_boltz_schema(name, data, ccd, mol_dir, boltz2, output_dir)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boltz-vsynthes
3
- Version: 1.0.34
3
+ Version: 1.0.36
4
4
  Summary: Boltz for VSYNTHES
5
5
  Requires-Python: <3.13,>=3.10
6
6
  Description-Content-Type: text/markdown
@@ -40,9 +40,9 @@ boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,368
40
40
  boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
41
41
  boltz/data/parse/pdb.py,sha256=iybk4p2UgUy_ABGprDq_xxyPSdm1HAZsGTM0lhxVEwM,1654
42
42
  boltz/data/parse/pdb_download.py,sha256=wge-scX-lOatX0q83W1wOsaql99rYp-6uGWSHEc995M,2718
43
- boltz/data/parse/schema.py,sha256=1saQOFI15H6TqoEupjsjnbX77UQfuST174kIErThdwY,65676
43
+ boltz/data/parse/schema.py,sha256=6MmG1adbbwi_y1J9jgIjx_Tsh0sHpQ-9zwwlKxdBwNk,64603
44
44
  boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
45
- boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
45
+ boltz/data/parse/yaml.py,sha256=M3dRQK2mMDue3bPSO_T2ThaVojSMrOV7rMY-KXQvaGQ,2047
46
46
  boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  boltz/data/sample/cluster.py,sha256=9Sx8qP7zGZOAyEspwYFtCTbGTBZnuN-zfCKFbbA_6oI,8175
48
48
  boltz/data/sample/distillation.py,sha256=ABzst2FBr_E54KqZWIHc1bYtKYr79lxRJM7PnS4ifK0,1789
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
107
107
  boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
108
  boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
109
109
  boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
110
- boltz_vsynthes-1.0.34.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
111
- boltz_vsynthes-1.0.34.dist-info/METADATA,sha256=h-T8W9FPQG5hynUMxYWhnJN4Ztt1XWoX4LgorJCEEYY,7171
112
- boltz_vsynthes-1.0.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
113
- boltz_vsynthes-1.0.34.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
114
- boltz_vsynthes-1.0.34.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
115
- boltz_vsynthes-1.0.34.dist-info/RECORD,,
110
+ boltz_vsynthes-1.0.36.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
111
+ boltz_vsynthes-1.0.36.dist-info/METADATA,sha256=GuLxeSu8jH6mmmism0QOM1o6JYEA6LjIK4lcFuLaIms,7171
112
+ boltz_vsynthes-1.0.36.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
113
+ boltz_vsynthes-1.0.36.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
114
+ boltz_vsynthes-1.0.36.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
115
+ boltz_vsynthes-1.0.36.dist-info/RECORD,,