boltz-vsynthes 1.0.15__tar.gz → 1.0.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {boltz_vsynthes-1.0.15/src/boltz_vsynthes.egg-info → boltz_vsynthes-1.0.16}/PKG-INFO +1 -1
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/pyproject.toml +1 -1
- boltz_vsynthes-1.0.16/src/boltz/data/parse/pdb.py +66 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/pdb_download.py +18 -23
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/schema.py +5 -3
- boltz_vsynthes-1.0.16/src/boltz/data/parse/sdf.py +90 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16/src/boltz_vsynthes.egg-info}/PKG-INFO +1 -1
- boltz_vsynthes-1.0.15/src/boltz/data/parse/pdb.py +0 -71
- boltz_vsynthes-1.0.15/src/boltz/data/parse/sdf.py +0 -60
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/LICENSE +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/README.md +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/setup.cfg +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/const.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/crop/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/crop/affinity.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/crop/boltz.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/crop/cropper.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/feature/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/feature/featurizer.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/feature/featurizerv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/feature/symmetry.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/date.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/filter.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/max_residues.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/resolution.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/size.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/subset.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/static/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/static/filter.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/static/ligand.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/static/polymer.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/module/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/module/inference.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/module/inferencev2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/module/training.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/module/trainingv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/mol.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/msa/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/msa/mmseqs2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/pad.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/a3m.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/csv.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/fasta.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/mmcif.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/mmcif_with_constraints.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/yaml.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/cluster.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/distillation.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/random.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/sample/sampler.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/tokenize/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/tokenize/boltz.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/tokenize/boltz2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/tokenize/tokenizer.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/types.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/write/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/write/mmcif.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/write/pdb.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/write/utils.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/write/writer.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/main.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/attention.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/attentionv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/confidence_utils.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/dropout.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/initialize.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/outer_product_mean.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/pair_averaging.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/pairformer.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/relative.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/transition.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/attention.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/primitives.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/utils.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_mult.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/bfactor.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/confidence.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/confidencev2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/diffusion.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/diffusionv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/distogram.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/distogramv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/loss/validation.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/models/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/models/boltz1.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/models/boltz2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/affinity.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/confidence.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/confidence_utils.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/confidencev2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/diffusion.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/diffusion_conditioning.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/diffusionv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/encoders.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/encodersv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/transformers.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/transformersv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/trunk.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/trunkv2.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/utils.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/optim/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/optim/ema.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/optim/scheduler.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/potentials/__init__.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/potentials/potentials.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/potentials/schedules.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/SOURCES.txt +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/dependency_links.txt +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/entry_points.txt +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/requires.txt +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/top_level.txt +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/tests/test_kernels.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/tests/test_regression.py +0 -0
- {boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/tests/test_utils.py +0 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from Bio import PDB
|
6
|
+
from Bio.PDB.PDBParser import PDBParser
|
7
|
+
from Bio.PDB.PPBuilder import PPBuilder
|
8
|
+
from rdkit import Chem
|
9
|
+
from rdkit.Chem.rdchem import Mol
|
10
|
+
|
11
|
+
from boltz.data.types import Target
|
12
|
+
from boltz.data.parse.schema import parse_boltz_schema
|
13
|
+
|
14
|
+
|
15
|
+
def parse_pdb(
|
16
|
+
pdb_path: Path,
|
17
|
+
ccd: dict[str, Mol],
|
18
|
+
mol_dir: Path,
|
19
|
+
boltz2: bool = False,
|
20
|
+
) -> Target:
|
21
|
+
"""Parse a PDB file.
|
22
|
+
|
23
|
+
Parameters
|
24
|
+
----------
|
25
|
+
pdb_path : Path
|
26
|
+
Path to the PDB file.
|
27
|
+
ccd : Dict
|
28
|
+
Dictionary of CCD components.
|
29
|
+
mol_dir : Path
|
30
|
+
Path to the directory containing the molecules.
|
31
|
+
boltz2 : bool, optional
|
32
|
+
Whether to use Boltz2 format, by default False.
|
33
|
+
|
34
|
+
Returns
|
35
|
+
-------
|
36
|
+
Target
|
37
|
+
The parsed target.
|
38
|
+
"""
|
39
|
+
# Read PDB file
|
40
|
+
parser = PDBParser(QUIET=True)
|
41
|
+
structure = parser.get_structure("protein", str(pdb_path))
|
42
|
+
ppb = PPBuilder()
|
43
|
+
|
44
|
+
# Convert to yaml format
|
45
|
+
sequences = []
|
46
|
+
for model in structure:
|
47
|
+
for chain in model:
|
48
|
+
for pp in ppb.build_peptides(chain):
|
49
|
+
seq = str(pp.get_sequence())
|
50
|
+
if seq: # Only add if sequence is not empty
|
51
|
+
molecule = {
|
52
|
+
"protein": {
|
53
|
+
"id": chain.id,
|
54
|
+
"sequence": seq,
|
55
|
+
"modifications": [],
|
56
|
+
},
|
57
|
+
}
|
58
|
+
sequences.append(molecule)
|
59
|
+
|
60
|
+
data = {
|
61
|
+
"sequences": sequences,
|
62
|
+
"bonds": [],
|
63
|
+
"version": 1,
|
64
|
+
}
|
65
|
+
|
66
|
+
return parse_boltz_schema(pdb_path.stem, data, ccd, mol_dir, boltz2)
|
@@ -4,12 +4,13 @@ from typing import Optional
|
|
4
4
|
|
5
5
|
import requests
|
6
6
|
from Bio import PDB
|
7
|
-
from Bio.
|
7
|
+
from Bio.PDB.PDBParser import PDBParser
|
8
|
+
from Bio.PDB.PPBuilder import PPBuilder
|
8
9
|
from rdkit import Chem
|
9
10
|
from rdkit.Chem.rdchem import Mol
|
10
11
|
|
11
12
|
from boltz.data.types import Target
|
12
|
-
from boltz.data.parse.
|
13
|
+
from boltz.data.parse.schema import parse_boltz_schema
|
13
14
|
|
14
15
|
|
15
16
|
def download_pdb(pdb_id: str, cache_dir: Path) -> Path:
|
@@ -67,8 +68,8 @@ def parse_pdb_id(
|
|
67
68
|
Path to the directory containing the molecules.
|
68
69
|
cache_dir : Path
|
69
70
|
The directory to cache downloaded PDB files.
|
70
|
-
boltz2 : bool
|
71
|
-
Whether to
|
71
|
+
boltz2 : bool, optional
|
72
|
+
Whether to use Boltz2 format, by default False.
|
72
73
|
|
73
74
|
Returns
|
74
75
|
-------
|
@@ -79,31 +80,25 @@ def parse_pdb_id(
|
|
79
80
|
pdb_path = download_pdb(pdb_id, cache_dir)
|
80
81
|
|
81
82
|
# Read PDB file
|
82
|
-
parser =
|
83
|
+
parser = PDBParser(QUIET=True)
|
83
84
|
structure = parser.get_structure("protein", str(pdb_path))
|
85
|
+
ppb = PPBuilder()
|
84
86
|
|
85
87
|
# Convert to yaml format
|
86
88
|
sequences = []
|
87
89
|
for model in structure:
|
88
90
|
for chain in model:
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
"protein": {
|
101
|
-
"id": chain.id,
|
102
|
-
"sequence": seq,
|
103
|
-
"modifications": [],
|
104
|
-
},
|
105
|
-
}
|
106
|
-
sequences.append(molecule)
|
91
|
+
for pp in ppb.build_peptides(chain):
|
92
|
+
seq = str(pp.get_sequence())
|
93
|
+
if seq: # Only add if sequence is not empty
|
94
|
+
molecule = {
|
95
|
+
"protein": {
|
96
|
+
"id": chain.id,
|
97
|
+
"sequence": seq,
|
98
|
+
"modifications": [],
|
99
|
+
},
|
100
|
+
}
|
101
|
+
sequences.append(molecule)
|
107
102
|
|
108
103
|
data = {
|
109
104
|
"sequences": sequences,
|
@@ -1024,12 +1024,14 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1024
1024
|
# This is a PDB ID
|
1025
1025
|
from boltz.data.parse.pdb_download import parse_pdb_id
|
1026
1026
|
target = parse_pdb_id(pdb_path.stem, ccd, mol_dir, pdb_path.parent)
|
1027
|
+
# Get sequence from the first chain
|
1028
|
+
seq = target.sequences[0]["protein"]["sequence"]
|
1027
1029
|
else:
|
1028
1030
|
# This is a PDB file
|
1029
1031
|
from boltz.data.parse.pdb import parse_pdb
|
1030
1032
|
target = parse_pdb(pdb_path, ccd, mol_dir)
|
1031
|
-
|
1032
|
-
|
1033
|
+
# Get sequence from the first chain
|
1034
|
+
seq = target.sequences[0]["protein"]["sequence"]
|
1033
1035
|
else:
|
1034
1036
|
msg = f"Protein must have either 'sequence' or 'pdb' field: {item}"
|
1035
1037
|
raise ValueError(msg)
|
@@ -1042,7 +1044,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1042
1044
|
from boltz.data.parse.sdf import parse_sdf
|
1043
1045
|
target = parse_sdf(sdf_path, ccd, mol_dir)
|
1044
1046
|
# Get sequence from the first ligand
|
1045
|
-
seq = target.sequences[0]
|
1047
|
+
seq = target.sequences[0]["ligand"]["sequence"]
|
1046
1048
|
elif "ccd" in item[entity_type]:
|
1047
1049
|
seq = str(item[entity_type]["ccd"])
|
1048
1050
|
else:
|
@@ -0,0 +1,90 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from rdkit import Chem
|
6
|
+
from rdkit.Chem import AllChem
|
7
|
+
from rdkit.Chem.rdchem import Mol
|
8
|
+
import rdkit.Chem.rdmolfiles as rdmolfiles
|
9
|
+
|
10
|
+
from boltz.data.types import Target
|
11
|
+
from boltz.data.parse.schema import parse_boltz_schema
|
12
|
+
|
13
|
+
|
14
|
+
def _process_sdf(sdf_path: str) -> dict[str, str]:
|
15
|
+
"""Process an SDF file and extract SMILES strings.
|
16
|
+
|
17
|
+
Parameters
|
18
|
+
----------
|
19
|
+
sdf_path : str
|
20
|
+
Path to the SDF file.
|
21
|
+
|
22
|
+
Returns
|
23
|
+
-------
|
24
|
+
dict[str, str]
|
25
|
+
Dictionary mapping molecule names to SMILES strings.
|
26
|
+
"""
|
27
|
+
output_dict = {}
|
28
|
+
suppl = rdmolfiles.ForwardSDMolSupplier(sdf_path)
|
29
|
+
|
30
|
+
for mol in suppl:
|
31
|
+
if mol is not None:
|
32
|
+
mol_smiles = rdmolfiles.MolToSmiles(mol)
|
33
|
+
if mol.HasProp("_Name"):
|
34
|
+
mol_name = mol.GetProp("_Name")
|
35
|
+
if mol_name == "":
|
36
|
+
mol_name = mol_smiles
|
37
|
+
else:
|
38
|
+
mol_name = mol_smiles
|
39
|
+
|
40
|
+
output_dict[mol_name] = mol_smiles
|
41
|
+
|
42
|
+
return output_dict
|
43
|
+
|
44
|
+
|
45
|
+
def parse_sdf(
|
46
|
+
sdf_path: Path,
|
47
|
+
ccd: dict[str, Mol],
|
48
|
+
mol_dir: Path,
|
49
|
+
boltz2: bool = False,
|
50
|
+
) -> Target:
|
51
|
+
"""Parse an SDF file.
|
52
|
+
|
53
|
+
Parameters
|
54
|
+
----------
|
55
|
+
sdf_path : Path
|
56
|
+
Path to the SDF file.
|
57
|
+
ccd : Dict
|
58
|
+
Dictionary of CCD components.
|
59
|
+
mol_dir : Path
|
60
|
+
Path to the directory containing the molecules.
|
61
|
+
boltz2 : bool, optional
|
62
|
+
Whether to use Boltz2 format, by default False.
|
63
|
+
|
64
|
+
Returns
|
65
|
+
-------
|
66
|
+
Target
|
67
|
+
The parsed target.
|
68
|
+
"""
|
69
|
+
# Process SDF file
|
70
|
+
mol_dict = _process_sdf(str(sdf_path))
|
71
|
+
|
72
|
+
# Convert to yaml format
|
73
|
+
sequences = []
|
74
|
+
for mol_name, smiles in mol_dict.items():
|
75
|
+
molecule = {
|
76
|
+
"ligand": {
|
77
|
+
"id": mol_name,
|
78
|
+
"sequence": smiles,
|
79
|
+
"modifications": [],
|
80
|
+
},
|
81
|
+
}
|
82
|
+
sequences.append(molecule)
|
83
|
+
|
84
|
+
data = {
|
85
|
+
"sequences": sequences,
|
86
|
+
"bonds": [],
|
87
|
+
"version": 1,
|
88
|
+
}
|
89
|
+
|
90
|
+
return parse_boltz_schema(sdf_path.stem, data, ccd, mol_dir, boltz2)
|
@@ -1,71 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from typing import Optional
|
3
|
-
|
4
|
-
from Bio import PDB
|
5
|
-
from Bio.Data.IUPACData import protein_letters_3to1
|
6
|
-
from rdkit import Chem
|
7
|
-
from rdkit.Chem.rdchem import Mol
|
8
|
-
|
9
|
-
from boltz.data.types import Target
|
10
|
-
from boltz.data.parse.yaml import parse_boltz_schema
|
11
|
-
|
12
|
-
|
13
|
-
def parse_pdb(
|
14
|
-
path: Path,
|
15
|
-
ccd: dict[str, Mol],
|
16
|
-
mol_dir: Path,
|
17
|
-
boltz2: bool = False,
|
18
|
-
) -> Target:
|
19
|
-
"""Parse a PDB file.
|
20
|
-
|
21
|
-
Parameters
|
22
|
-
----------
|
23
|
-
path : Path
|
24
|
-
Path to the PDB file.
|
25
|
-
ccd : Dict
|
26
|
-
Dictionary of CCD components.
|
27
|
-
mol_dir : Path
|
28
|
-
Path to the directory containing the molecules.
|
29
|
-
boltz2 : bool
|
30
|
-
Whether to parse the input for Boltz2.
|
31
|
-
|
32
|
-
Returns
|
33
|
-
-------
|
34
|
-
Target
|
35
|
-
The parsed target.
|
36
|
-
"""
|
37
|
-
# Read PDB file
|
38
|
-
parser = PDB.PDBParser(QUIET=True)
|
39
|
-
structure = parser.get_structure("protein", str(path))
|
40
|
-
|
41
|
-
# Convert to yaml format
|
42
|
-
sequences = []
|
43
|
-
for model in structure:
|
44
|
-
for chain in model:
|
45
|
-
# Get chain sequence
|
46
|
-
seq = ""
|
47
|
-
for residue in chain:
|
48
|
-
if residue.id[0] == " ": # Only standard residues
|
49
|
-
try:
|
50
|
-
seq += protein_letters_3to1[residue.resname]
|
51
|
-
except KeyError:
|
52
|
-
continue
|
53
|
-
|
54
|
-
if seq: # Only add if sequence is not empty
|
55
|
-
molecule = {
|
56
|
-
"protein": {
|
57
|
-
"id": chain.id,
|
58
|
-
"sequence": seq,
|
59
|
-
"modifications": [],
|
60
|
-
},
|
61
|
-
}
|
62
|
-
sequences.append(molecule)
|
63
|
-
|
64
|
-
data = {
|
65
|
-
"sequences": sequences,
|
66
|
-
"bonds": [],
|
67
|
-
"version": 1,
|
68
|
-
}
|
69
|
-
|
70
|
-
name = path.stem
|
71
|
-
return parse_boltz_schema(name, data, ccd, mol_dir, boltz2)
|
@@ -1,60 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from typing import Optional
|
3
|
-
|
4
|
-
from rdkit import Chem
|
5
|
-
from rdkit.Chem.rdchem import Mol
|
6
|
-
|
7
|
-
from boltz.data.types import Target
|
8
|
-
from boltz.data.parse.yaml import parse_boltz_schema
|
9
|
-
|
10
|
-
|
11
|
-
def parse_sdf(
|
12
|
-
path: Path,
|
13
|
-
ccd: dict[str, Mol],
|
14
|
-
mol_dir: Path,
|
15
|
-
boltz2: bool = False,
|
16
|
-
) -> Target:
|
17
|
-
"""Parse an SDF file.
|
18
|
-
|
19
|
-
Parameters
|
20
|
-
----------
|
21
|
-
path : Path
|
22
|
-
Path to the SDF file.
|
23
|
-
ccd : Dict
|
24
|
-
Dictionary of CCD components.
|
25
|
-
mol_dir : Path
|
26
|
-
Path to the directory containing the molecules.
|
27
|
-
boltz2 : bool
|
28
|
-
Whether to parse the input for Boltz2.
|
29
|
-
|
30
|
-
Returns
|
31
|
-
-------
|
32
|
-
Target
|
33
|
-
The parsed target.
|
34
|
-
"""
|
35
|
-
# Read SDF file
|
36
|
-
supplier = Chem.SDMolSupplier(str(path))
|
37
|
-
|
38
|
-
# Convert to yaml format
|
39
|
-
sequences = []
|
40
|
-
for i, mol in enumerate(supplier):
|
41
|
-
if mol is not None:
|
42
|
-
# Get SMILES
|
43
|
-
smiles = Chem.MolToSmiles(mol)
|
44
|
-
|
45
|
-
molecule = {
|
46
|
-
"ligand": {
|
47
|
-
"id": f"L{i+1}", # Use L1, L2, etc. as chain IDs
|
48
|
-
"smiles": smiles,
|
49
|
-
},
|
50
|
-
}
|
51
|
-
sequences.append(molecule)
|
52
|
-
|
53
|
-
data = {
|
54
|
-
"sequences": sequences,
|
55
|
-
"bonds": [],
|
56
|
-
"version": 1,
|
57
|
-
}
|
58
|
-
|
59
|
-
name = path.stem
|
60
|
-
return parse_boltz_schema(name, data, ccd, mol_dir, boltz2)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/filter/dynamic/max_residues.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/data/parse/mmcif_with_constraints.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/outer_product_mean.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/layers/triangular_attention/utils.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz/model/modules/diffusion_conditioning.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/dependency_links.txt
RENAMED
File without changes
|
{boltz_vsynthes-1.0.15 → boltz_vsynthes-1.0.16}/src/boltz_vsynthes.egg-info/entry_points.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|