boltz-vsynthes 1.0.14__tar.gz → 1.0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {boltz_vsynthes-1.0.14/src/boltz_vsynthes.egg-info → boltz_vsynthes-1.0.16}/PKG-INFO +1 -1
  2. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/pyproject.toml +1 -1
  3. boltz_vsynthes-1.0.16/src/boltz/data/parse/pdb.py +66 -0
  4. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/pdb_download.py +18 -23
  5. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/schema.py +8 -6
  6. boltz_vsynthes-1.0.16/src/boltz/data/parse/sdf.py +90 -0
  7. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16/src/boltz_vsynthes.egg-info}/PKG-INFO +1 -1
  8. boltz_vsynthes-1.0.14/src/boltz/data/parse/pdb.py +0 -71
  9. boltz_vsynthes-1.0.14/src/boltz/data/parse/sdf.py +0 -60
  10. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/LICENSE +0 -0
  11. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/README.md +0 -0
  12. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/setup.cfg +0 -0
  13. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/__init__.py +0 -0
  14. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/__init__.py +0 -0
  15. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/const.py +0 -0
  16. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/crop/__init__.py +0 -0
  17. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/crop/affinity.py +0 -0
  18. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/crop/boltz.py +0 -0
  19. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/crop/cropper.py +0 -0
  20. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/feature/__init__.py +0 -0
  21. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/feature/featurizer.py +0 -0
  22. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/feature/featurizerv2.py +0 -0
  23. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/feature/symmetry.py +0 -0
  24. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/__init__.py +0 -0
  25. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/__init__.py +0 -0
  26. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/date.py +0 -0
  27. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/filter.py +0 -0
  28. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/max_residues.py +0 -0
  29. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/resolution.py +0 -0
  30. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/size.py +0 -0
  31. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/subset.py +0 -0
  32. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/static/__init__.py +0 -0
  33. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/static/filter.py +0 -0
  34. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/static/ligand.py +0 -0
  35. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/static/polymer.py +0 -0
  36. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/module/__init__.py +0 -0
  37. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/module/inference.py +0 -0
  38. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/module/inferencev2.py +0 -0
  39. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/module/training.py +0 -0
  40. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/module/trainingv2.py +0 -0
  41. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/mol.py +0 -0
  42. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/msa/__init__.py +0 -0
  43. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/msa/mmseqs2.py +0 -0
  44. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/pad.py +0 -0
  45. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/__init__.py +0 -0
  46. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/a3m.py +0 -0
  47. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/csv.py +0 -0
  48. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/fasta.py +0 -0
  49. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/mmcif.py +0 -0
  50. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/mmcif_with_constraints.py +0 -0
  51. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/yaml.py +0 -0
  52. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/__init__.py +0 -0
  53. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/cluster.py +0 -0
  54. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/distillation.py +0 -0
  55. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/random.py +0 -0
  56. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/sampler.py +0 -0
  57. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/tokenize/__init__.py +0 -0
  58. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/tokenize/boltz.py +0 -0
  59. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/tokenize/boltz2.py +0 -0
  60. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/tokenize/tokenizer.py +0 -0
  61. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/types.py +0 -0
  62. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/write/__init__.py +0 -0
  63. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/write/mmcif.py +0 -0
  64. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/write/pdb.py +0 -0
  65. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/write/utils.py +0 -0
  66. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/data/write/writer.py +0 -0
  67. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/main.py +0 -0
  68. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/__init__.py +0 -0
  69. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/__init__.py +0 -0
  70. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/attention.py +0 -0
  71. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/attentionv2.py +0 -0
  72. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/confidence_utils.py +0 -0
  73. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/dropout.py +0 -0
  74. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/initialize.py +0 -0
  75. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/outer_product_mean.py +0 -0
  76. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/pair_averaging.py +0 -0
  77. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/pairformer.py +0 -0
  78. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/relative.py +0 -0
  79. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/transition.py +0 -0
  80. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/__init__.py +0 -0
  81. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/attention.py +0 -0
  82. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/primitives.py +0 -0
  83. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/utils.py +0 -0
  84. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_mult.py +0 -0
  85. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/__init__.py +0 -0
  86. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/bfactor.py +0 -0
  87. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/confidence.py +0 -0
  88. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/confidencev2.py +0 -0
  89. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/diffusion.py +0 -0
  90. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/diffusionv2.py +0 -0
  91. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/distogram.py +0 -0
  92. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/distogramv2.py +0 -0
  93. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/validation.py +0 -0
  94. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/models/__init__.py +0 -0
  95. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/models/boltz1.py +0 -0
  96. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/models/boltz2.py +0 -0
  97. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/__init__.py +0 -0
  98. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/affinity.py +0 -0
  99. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/confidence.py +0 -0
  100. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/confidence_utils.py +0 -0
  101. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/confidencev2.py +0 -0
  102. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/diffusion.py +0 -0
  103. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/diffusion_conditioning.py +0 -0
  104. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/diffusionv2.py +0 -0
  105. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/encoders.py +0 -0
  106. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/encodersv2.py +0 -0
  107. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/transformers.py +0 -0
  108. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/transformersv2.py +0 -0
  109. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/trunk.py +0 -0
  110. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/trunkv2.py +0 -0
  111. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/utils.py +0 -0
  112. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/optim/__init__.py +0 -0
  113. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/optim/ema.py +0 -0
  114. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/optim/scheduler.py +0 -0
  115. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/potentials/__init__.py +0 -0
  116. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/potentials/potentials.py +0 -0
  117. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz/model/potentials/schedules.py +0 -0
  118. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/SOURCES.txt +0 -0
  119. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/dependency_links.txt +0 -0
  120. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/entry_points.txt +0 -0
  121. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/requires.txt +0 -0
  122. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/top_level.txt +0 -0
  123. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/tests/test_kernels.py +0 -0
  124. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/tests/test_regression.py +0 -0
  125. {boltz_vsynthes-1.0.14 → boltz_vsynthes-1.0.16}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boltz-vsynthes
3
- Version: 1.0.14
3
+ Version: 1.0.16
4
4
  Summary: Boltz for VSYNTHES
5
5
  Requires-Python: <3.13,>=3.10
6
6
  Description-Content-Type: text/markdown
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "boltz-vsynthes"
7
- version = "1.0.14"
7
+ version = "1.0.16"
8
8
  requires-python = ">=3.10,<3.13"
9
9
  description = "Boltz for VSYNTHES"
10
10
  readme = "README.md"
@@ -0,0 +1,66 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from Bio import PDB
6
+ from Bio.PDB.PDBParser import PDBParser
7
+ from Bio.PDB.PPBuilder import PPBuilder
8
+ from rdkit import Chem
9
+ from rdkit.Chem.rdchem import Mol
10
+
11
+ from boltz.data.types import Target
12
+ from boltz.data.parse.schema import parse_boltz_schema
13
+
14
+
15
+ def parse_pdb(
16
+ pdb_path: Path,
17
+ ccd: dict[str, Mol],
18
+ mol_dir: Path,
19
+ boltz2: bool = False,
20
+ ) -> Target:
21
+ """Parse a PDB file.
22
+
23
+ Parameters
24
+ ----------
25
+ pdb_path : Path
26
+ Path to the PDB file.
27
+ ccd : Dict
28
+ Dictionary of CCD components.
29
+ mol_dir : Path
30
+ Path to the directory containing the molecules.
31
+ boltz2 : bool, optional
32
+ Whether to use Boltz2 format, by default False.
33
+
34
+ Returns
35
+ -------
36
+ Target
37
+ The parsed target.
38
+ """
39
+ # Read PDB file
40
+ parser = PDBParser(QUIET=True)
41
+ structure = parser.get_structure("protein", str(pdb_path))
42
+ ppb = PPBuilder()
43
+
44
+ # Convert to yaml format
45
+ sequences = []
46
+ for model in structure:
47
+ for chain in model:
48
+ for pp in ppb.build_peptides(chain):
49
+ seq = str(pp.get_sequence())
50
+ if seq: # Only add if sequence is not empty
51
+ molecule = {
52
+ "protein": {
53
+ "id": chain.id,
54
+ "sequence": seq,
55
+ "modifications": [],
56
+ },
57
+ }
58
+ sequences.append(molecule)
59
+
60
+ data = {
61
+ "sequences": sequences,
62
+ "bonds": [],
63
+ "version": 1,
64
+ }
65
+
66
+ return parse_boltz_schema(pdb_path.stem, data, ccd, mol_dir, boltz2)
@@ -4,12 +4,13 @@ from typing import Optional
4
4
 
5
5
  import requests
6
6
  from Bio import PDB
7
- from Bio.Data.IUPACData import protein_letters_3to1
7
+ from Bio.PDB.PDBParser import PDBParser
8
+ from Bio.PDB.PPBuilder import PPBuilder
8
9
  from rdkit import Chem
9
10
  from rdkit.Chem.rdchem import Mol
10
11
 
11
12
  from boltz.data.types import Target
12
- from boltz.data.parse.yaml import parse_boltz_schema
13
+ from boltz.data.parse.schema import parse_boltz_schema
13
14
 
14
15
 
15
16
  def download_pdb(pdb_id: str, cache_dir: Path) -> Path:
@@ -67,8 +68,8 @@ def parse_pdb_id(
67
68
  Path to the directory containing the molecules.
68
69
  cache_dir : Path
69
70
  The directory to cache downloaded PDB files.
70
- boltz2 : bool
71
- Whether to parse the input for Boltz2.
71
+ boltz2 : bool, optional
72
+ Whether to use Boltz2 format, by default False.
72
73
 
73
74
  Returns
74
75
  -------
@@ -79,31 +80,25 @@ def parse_pdb_id(
79
80
  pdb_path = download_pdb(pdb_id, cache_dir)
80
81
 
81
82
  # Read PDB file
82
- parser = PDB.PDBParser(QUIET=True)
83
+ parser = PDBParser(QUIET=True)
83
84
  structure = parser.get_structure("protein", str(pdb_path))
85
+ ppb = PPBuilder()
84
86
 
85
87
  # Convert to yaml format
86
88
  sequences = []
87
89
  for model in structure:
88
90
  for chain in model:
89
- # Get chain sequence
90
- seq = ""
91
- for residue in chain:
92
- if residue.id[0] == " ": # Only standard residues
93
- try:
94
- seq += protein_letters_3to1[residue.resname]
95
- except KeyError:
96
- continue
97
-
98
- if seq: # Only add if sequence is not empty
99
- molecule = {
100
- "protein": {
101
- "id": chain.id,
102
- "sequence": seq,
103
- "modifications": [],
104
- },
105
- }
106
- sequences.append(molecule)
91
+ for pp in ppb.build_peptides(chain):
92
+ seq = str(pp.get_sequence())
93
+ if seq: # Only add if sequence is not empty
94
+ molecule = {
95
+ "protein": {
96
+ "id": chain.id,
97
+ "sequence": seq,
98
+ "modifications": [],
99
+ },
100
+ }
101
+ sequences.append(molecule)
107
102
 
108
103
  data = {
109
104
  "sequences": sequences,
@@ -1023,13 +1023,15 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1023
1023
  if len(pdb_path.stem) == 4 and pdb_path.stem.isalnum():
1024
1024
  # This is a PDB ID
1025
1025
  from boltz.data.parse.pdb_download import parse_pdb_id
1026
- target = parse_pdb_id(pdb_path.stem, ccd, mol_dir, pdb_path.parent, boltz2)
1026
+ target = parse_pdb_id(pdb_path.stem, ccd, mol_dir, pdb_path.parent)
1027
+ # Get sequence from the first chain
1028
+ seq = target.sequences[0]["protein"]["sequence"]
1027
1029
  else:
1028
1030
  # This is a PDB file
1029
1031
  from boltz.data.parse.pdb import parse_pdb
1030
- target = parse_pdb(pdb_path, ccd, mol_dir, boltz2)
1031
- # Get sequence from the first chain
1032
- seq = target.sequences[0]
1032
+ target = parse_pdb(pdb_path, ccd, mol_dir)
1033
+ # Get sequence from the first chain
1034
+ seq = target.sequences[0]["protein"]["sequence"]
1033
1035
  else:
1034
1036
  msg = f"Protein must have either 'sequence' or 'pdb' field: {item}"
1035
1037
  raise ValueError(msg)
@@ -1040,9 +1042,9 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1040
1042
  # Handle SDF file
1041
1043
  sdf_path = Path(item[entity_type]["sdf"])
1042
1044
  from boltz.data.parse.sdf import parse_sdf
1043
- target = parse_sdf(sdf_path, ccd, mol_dir, boltz2)
1045
+ target = parse_sdf(sdf_path, ccd, mol_dir)
1044
1046
  # Get sequence from the first ligand
1045
- seq = target.sequences[0]
1047
+ seq = target.sequences[0]["ligand"]["sequence"]
1046
1048
  elif "ccd" in item[entity_type]:
1047
1049
  seq = str(item[entity_type]["ccd"])
1048
1050
  else:
@@ -0,0 +1,90 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from rdkit import Chem
6
+ from rdkit.Chem import AllChem
7
+ from rdkit.Chem.rdchem import Mol
8
+ import rdkit.Chem.rdmolfiles as rdmolfiles
9
+
10
+ from boltz.data.types import Target
11
+ from boltz.data.parse.schema import parse_boltz_schema
12
+
13
+
14
+ def _process_sdf(sdf_path: str) -> dict[str, str]:
15
+ """Process an SDF file and extract SMILES strings.
16
+
17
+ Parameters
18
+ ----------
19
+ sdf_path : str
20
+ Path to the SDF file.
21
+
22
+ Returns
23
+ -------
24
+ dict[str, str]
25
+ Dictionary mapping molecule names to SMILES strings.
26
+ """
27
+ output_dict = {}
28
+ suppl = rdmolfiles.ForwardSDMolSupplier(sdf_path)
29
+
30
+ for mol in suppl:
31
+ if mol is not None:
32
+ mol_smiles = rdmolfiles.MolToSmiles(mol)
33
+ if mol.HasProp("_Name"):
34
+ mol_name = mol.GetProp("_Name")
35
+ if mol_name == "":
36
+ mol_name = mol_smiles
37
+ else:
38
+ mol_name = mol_smiles
39
+
40
+ output_dict[mol_name] = mol_smiles
41
+
42
+ return output_dict
43
+
44
+
45
+ def parse_sdf(
46
+ sdf_path: Path,
47
+ ccd: dict[str, Mol],
48
+ mol_dir: Path,
49
+ boltz2: bool = False,
50
+ ) -> Target:
51
+ """Parse an SDF file.
52
+
53
+ Parameters
54
+ ----------
55
+ sdf_path : Path
56
+ Path to the SDF file.
57
+ ccd : Dict
58
+ Dictionary of CCD components.
59
+ mol_dir : Path
60
+ Path to the directory containing the molecules.
61
+ boltz2 : bool, optional
62
+ Whether to use Boltz2 format, by default False.
63
+
64
+ Returns
65
+ -------
66
+ Target
67
+ The parsed target.
68
+ """
69
+ # Process SDF file
70
+ mol_dict = _process_sdf(str(sdf_path))
71
+
72
+ # Convert to yaml format
73
+ sequences = []
74
+ for mol_name, smiles in mol_dict.items():
75
+ molecule = {
76
+ "ligand": {
77
+ "id": mol_name,
78
+ "sequence": smiles,
79
+ "modifications": [],
80
+ },
81
+ }
82
+ sequences.append(molecule)
83
+
84
+ data = {
85
+ "sequences": sequences,
86
+ "bonds": [],
87
+ "version": 1,
88
+ }
89
+
90
+ return parse_boltz_schema(sdf_path.stem, data, ccd, mol_dir, boltz2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boltz-vsynthes
3
- Version: 1.0.14
3
+ Version: 1.0.16
4
4
  Summary: Boltz for VSYNTHES
5
5
  Requires-Python: <3.13,>=3.10
6
6
  Description-Content-Type: text/markdown
@@ -1,71 +0,0 @@
1
- from pathlib import Path
2
- from typing import Optional
3
-
4
- from Bio import PDB
5
- from Bio.Data.IUPACData import protein_letters_3to1
6
- from rdkit import Chem
7
- from rdkit.Chem.rdchem import Mol
8
-
9
- from boltz.data.types import Target
10
- from boltz.data.parse.yaml import parse_boltz_schema
11
-
12
-
13
- def parse_pdb(
14
- path: Path,
15
- ccd: dict[str, Mol],
16
- mol_dir: Path,
17
- boltz2: bool = False,
18
- ) -> Target:
19
- """Parse a PDB file.
20
-
21
- Parameters
22
- ----------
23
- path : Path
24
- Path to the PDB file.
25
- ccd : Dict
26
- Dictionary of CCD components.
27
- mol_dir : Path
28
- Path to the directory containing the molecules.
29
- boltz2 : bool
30
- Whether to parse the input for Boltz2.
31
-
32
- Returns
33
- -------
34
- Target
35
- The parsed target.
36
- """
37
- # Read PDB file
38
- parser = PDB.PDBParser(QUIET=True)
39
- structure = parser.get_structure("protein", str(path))
40
-
41
- # Convert to yaml format
42
- sequences = []
43
- for model in structure:
44
- for chain in model:
45
- # Get chain sequence
46
- seq = ""
47
- for residue in chain:
48
- if residue.id[0] == " ": # Only standard residues
49
- try:
50
- seq += protein_letters_3to1[residue.resname]
51
- except KeyError:
52
- continue
53
-
54
- if seq: # Only add if sequence is not empty
55
- molecule = {
56
- "protein": {
57
- "id": chain.id,
58
- "sequence": seq,
59
- "modifications": [],
60
- },
61
- }
62
- sequences.append(molecule)
63
-
64
- data = {
65
- "sequences": sequences,
66
- "bonds": [],
67
- "version": 1,
68
- }
69
-
70
- name = path.stem
71
- return parse_boltz_schema(name, data, ccd, mol_dir, boltz2)
@@ -1,60 +0,0 @@
1
- from pathlib import Path
2
- from typing import Optional
3
-
4
- from rdkit import Chem
5
- from rdkit.Chem.rdchem import Mol
6
-
7
- from boltz.data.types import Target
8
- from boltz.data.parse.yaml import parse_boltz_schema
9
-
10
-
11
- def parse_sdf(
12
- path: Path,
13
- ccd: dict[str, Mol],
14
- mol_dir: Path,
15
- boltz2: bool = False,
16
- ) -> Target:
17
- """Parse an SDF file.
18
-
19
- Parameters
20
- ----------
21
- path : Path
22
- Path to the SDF file.
23
- ccd : Dict
24
- Dictionary of CCD components.
25
- mol_dir : Path
26
- Path to the directory containing the molecules.
27
- boltz2 : bool
28
- Whether to parse the input for Boltz2.
29
-
30
- Returns
31
- -------
32
- Target
33
- The parsed target.
34
- """
35
- # Read SDF file
36
- supplier = Chem.SDMolSupplier(str(path))
37
-
38
- # Convert to yaml format
39
- sequences = []
40
- for i, mol in enumerate(supplier):
41
- if mol is not None:
42
- # Get SMILES
43
- smiles = Chem.MolToSmiles(mol)
44
-
45
- molecule = {
46
- "ligand": {
47
- "id": f"L{i+1}", # Use L1, L2, etc. as chain IDs
48
- "smiles": smiles,
49
- },
50
- }
51
- sequences.append(molecule)
52
-
53
- data = {
54
- "sequences": sequences,
55
- "bonds": [],
56
- "version": 1,
57
- }
58
-
59
- name = path.stem
60
- return parse_boltz_schema(name, data, ccd, mol_dir, boltz2)
File without changes