boltz-vsynthes 1.0.39__py3-none-any.whl → 1.0.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltz/data/parse/schema.py +66 -101
- boltz/data/parse/yaml.py +13 -23
- boltz/main.py +98 -117
- {boltz_vsynthes-1.0.39.dist-info → boltz_vsynthes-1.0.41.dist-info}/METADATA +1 -1
- {boltz_vsynthes-1.0.39.dist-info → boltz_vsynthes-1.0.41.dist-info}/RECORD +9 -9
- {boltz_vsynthes-1.0.39.dist-info → boltz_vsynthes-1.0.41.dist-info}/WHEEL +0 -0
- {boltz_vsynthes-1.0.39.dist-info → boltz_vsynthes-1.0.41.dist-info}/entry_points.txt +0 -0
- {boltz_vsynthes-1.0.39.dist-info → boltz_vsynthes-1.0.41.dist-info}/licenses/LICENSE +0 -0
- {boltz_vsynthes-1.0.39.dist-info → boltz_vsynthes-1.0.41.dist-info}/top_level.txt +0 -0
boltz/data/parse/schema.py
CHANGED
@@ -2,8 +2,6 @@ from collections.abc import Mapping
|
|
2
2
|
from dataclasses import dataclass
|
3
3
|
from pathlib import Path
|
4
4
|
from typing import Optional
|
5
|
-
import json
|
6
|
-
import yaml
|
7
5
|
|
8
6
|
import click
|
9
7
|
import numpy as np
|
@@ -937,7 +935,6 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
937
935
|
ccd: Mapping[str, Mol],
|
938
936
|
mol_dir: Optional[Path] = None,
|
939
937
|
boltz_2: bool = False,
|
940
|
-
output_dir: Optional[Path] = None,
|
941
938
|
) -> Target:
|
942
939
|
"""Parse a Boltz input yaml / json.
|
943
940
|
|
@@ -989,8 +986,6 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
989
986
|
Path to the directory containing the molecules.
|
990
987
|
boltz2: bool
|
991
988
|
Whether to parse the input for Boltz2.
|
992
|
-
output_dir: Path, optional
|
993
|
-
Path to the output directory. If provided, results will be saved in a subfolder named after the input file.
|
994
989
|
|
995
990
|
Returns
|
996
991
|
-------
|
@@ -998,14 +993,6 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
998
993
|
The parsed target.
|
999
994
|
|
1000
995
|
"""
|
1001
|
-
# Create output directory if specified
|
1002
|
-
if output_dir is not None:
|
1003
|
-
output_dir = Path(output_dir)
|
1004
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
1005
|
-
# Create subfolder based on input name
|
1006
|
-
subfolder = output_dir / name
|
1007
|
-
subfolder.mkdir(parents=True, exist_ok=True)
|
1008
|
-
|
1009
996
|
# Assert version 1
|
1010
997
|
version = schema.get("version", 1)
|
1011
998
|
if version != 1:
|
@@ -1072,7 +1059,6 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1072
1059
|
|
1073
1060
|
# Check if any affinity ligand is present
|
1074
1061
|
affinity_ligands = set()
|
1075
|
-
affinity_proteins = set()
|
1076
1062
|
properties = schema.get("properties", [])
|
1077
1063
|
if properties and not boltz_2:
|
1078
1064
|
msg = "Affinity prediction is only supported for Boltz2!"
|
@@ -1083,6 +1069,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1083
1069
|
if prop_type == "affinity":
|
1084
1070
|
binder = prop["affinity"]["binder"]
|
1085
1071
|
if not isinstance(binder, str):
|
1072
|
+
# TODO: support multi residue ligands and ccd's
|
1086
1073
|
msg = "Binder must be a single chain."
|
1087
1074
|
raise ValueError(msg)
|
1088
1075
|
|
@@ -1090,21 +1077,18 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1090
1077
|
msg = f"Could not find binder with name {binder} in the input!"
|
1091
1078
|
raise ValueError(msg)
|
1092
1079
|
|
1093
|
-
|
1094
|
-
if chain_name_to_entity_type[binder] == "protein":
|
1095
|
-
affinity_proteins.add(binder)
|
1096
|
-
elif chain_name_to_entity_type[binder] == "ligand":
|
1097
|
-
affinity_ligands.add(binder)
|
1098
|
-
else:
|
1080
|
+
if chain_name_to_entity_type[binder] != "ligand":
|
1099
1081
|
msg = (
|
1100
|
-
f"Chain {binder} is not a
|
1101
|
-
"Affinity is currently only supported for
|
1082
|
+
f"Chain {binder} is not a ligand! "
|
1083
|
+
"Affinity is currently only supported for ligands."
|
1102
1084
|
)
|
1103
1085
|
raise ValueError(msg)
|
1104
1086
|
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1087
|
+
affinity_ligands.add(binder)
|
1088
|
+
|
1089
|
+
# Check only one affinity ligand is present
|
1090
|
+
if len(affinity_ligands) > 1:
|
1091
|
+
msg = "Only one affinity ligand is currently supported!"
|
1108
1092
|
raise ValueError(msg)
|
1109
1093
|
|
1110
1094
|
# Go through entities and parse them
|
@@ -1127,15 +1111,12 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1127
1111
|
elif isinstance(item[entity_type]["id"], list):
|
1128
1112
|
ids.extend(item[entity_type]["id"])
|
1129
1113
|
|
1130
|
-
# Check if any affinity
|
1114
|
+
# Check if any affinity ligand is present
|
1131
1115
|
if len(ids) == 1:
|
1132
|
-
affinity = ids[0] in affinity_ligands
|
1116
|
+
affinity = ids[0] in affinity_ligands
|
1133
1117
|
elif (len(ids) > 1) and any(x in affinity_ligands for x in ids):
|
1134
1118
|
msg = "Cannot compute affinity for a ligand that has multiple copies!"
|
1135
1119
|
raise ValueError(msg)
|
1136
|
-
elif (len(ids) > 1) and any(x in affinity_proteins for x in ids):
|
1137
|
-
# If binder is a protein, allow multiple ligands
|
1138
|
-
affinity = True
|
1139
1120
|
else:
|
1140
1121
|
affinity = False
|
1141
1122
|
|
@@ -1234,7 +1215,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1234
1215
|
)
|
1235
1216
|
|
1236
1217
|
# Parse a non-polymer
|
1237
|
-
elif (entity_type == "ligand") and "ccd" in
|
1218
|
+
elif (entity_type == "ligand") and ("ccd" in items[0][entity_type]):
|
1238
1219
|
seq = items[0][entity_type]["ccd"]
|
1239
1220
|
|
1240
1221
|
if isinstance(seq, str):
|
@@ -1326,6 +1307,60 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1326
1307
|
"cyclic", False
|
1327
1308
|
), "Cyclic flag is not supported for ligands"
|
1328
1309
|
|
1310
|
+
elif (entity_type == "ligand") and ("sdf" in items[0][entity_type]):
|
1311
|
+
# Handle SDF file
|
1312
|
+
sdf_path = Path(items[0][entity_type]["sdf"])
|
1313
|
+
from boltz.data.parse.sdf import parse_sdf
|
1314
|
+
target = parse_sdf(sdf_path, ccd, mol_dir)
|
1315
|
+
mol = target["sequences"][0]["ligand"]["smiles"]
|
1316
|
+
|
1317
|
+
if affinity:
|
1318
|
+
mol = standardize(mol)
|
1319
|
+
|
1320
|
+
mol = AllChem.MolFromSmiles(mol)
|
1321
|
+
mol = AllChem.AddHs(mol)
|
1322
|
+
|
1323
|
+
# Set atom names
|
1324
|
+
canonical_order = AllChem.CanonicalRankAtoms(mol)
|
1325
|
+
for atom, can_idx in zip(mol.GetAtoms(), canonical_order):
|
1326
|
+
atom_name = atom.GetSymbol().upper() + str(can_idx + 1)
|
1327
|
+
if len(atom_name) > 4:
|
1328
|
+
msg = (
|
1329
|
+
f"{mol} has an atom with a name longer than "
|
1330
|
+
f"4 characters: {atom_name}."
|
1331
|
+
)
|
1332
|
+
raise ValueError(msg)
|
1333
|
+
atom.SetProp("name", atom_name)
|
1334
|
+
|
1335
|
+
success = compute_3d_conformer(mol)
|
1336
|
+
if not success:
|
1337
|
+
msg = f"Failed to compute 3D conformer for {mol}"
|
1338
|
+
raise ValueError(msg)
|
1339
|
+
|
1340
|
+
mol_no_h = AllChem.RemoveHs(mol, sanitize=False)
|
1341
|
+
affinity_mw = AllChem.Descriptors.MolWt(mol_no_h) if affinity else None
|
1342
|
+
extra_mols[f"LIG{ligand_id}"] = mol_no_h
|
1343
|
+
residue = parse_ccd_residue(
|
1344
|
+
name=f"LIG{ligand_id}",
|
1345
|
+
ref_mol=mol,
|
1346
|
+
res_idx=0,
|
1347
|
+
)
|
1348
|
+
|
1349
|
+
ligand_id += 1
|
1350
|
+
parsed_chain = ParsedChain(
|
1351
|
+
entity=entity_id,
|
1352
|
+
residues=[residue],
|
1353
|
+
type=const.chain_type_ids["NONPOLYMER"],
|
1354
|
+
cyclic_period=0,
|
1355
|
+
sequence=None,
|
1356
|
+
affinity=affinity,
|
1357
|
+
affinity_mw=affinity_mw,
|
1358
|
+
)
|
1359
|
+
|
1360
|
+
assert not items[0][entity_type].get(
|
1361
|
+
"cyclic", False
|
1362
|
+
), "Cyclic flag is not supported for ligands"
|
1363
|
+
|
1329
1364
|
else:
|
1330
1365
|
msg = f"Invalid entity type: {entity_type}"
|
1331
1366
|
raise ValueError(msg)
|
@@ -1393,15 +1428,6 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1393
1428
|
chain_id=asym_id,
|
1394
1429
|
mw=chain.affinity_mw,
|
1395
1430
|
)
|
1396
|
-
# Save affinity info if output directory is specified
|
1397
|
-
if output_dir is not None:
|
1398
|
-
affinity_path = subfolder / "affinity_info.json"
|
1399
|
-
with open(affinity_path, "w") as f:
|
1400
|
-
json.dump({
|
1401
|
-
"chain_id": asym_id,
|
1402
|
-
"mw": chain.affinity_mw,
|
1403
|
-
"chain_name": chain_name
|
1404
|
-
}, f)
|
1405
1431
|
|
1406
1432
|
# Find all copies of this chain in the assembly
|
1407
1433
|
entity_id = int(chain.entity)
|
@@ -1864,64 +1890,3 @@ def standardize(smiles: str) -> Optional[str]:
|
|
1864
1890
|
raise ValueError("Molecule is broken")
|
1865
1891
|
|
1866
1892
|
return smiles
|
1867
|
-
|
1868
|
-
|
1869
|
-
def parse_boltz_directory(
|
1870
|
-
input_dir: Path,
|
1871
|
-
output_dir: Path,
|
1872
|
-
ccd: Mapping[str, Mol],
|
1873
|
-
mol_dir: Optional[Path] = None,
|
1874
|
-
boltz_2: bool = False,
|
1875
|
-
) -> list[Target]:
|
1876
|
-
"""Parse all YAML files in a directory.
|
1877
|
-
|
1878
|
-
Parameters
|
1879
|
-
----------
|
1880
|
-
input_dir : Path
|
1881
|
-
Path to the directory containing YAML files.
|
1882
|
-
output_dir : Path
|
1883
|
-
Path to the output directory where results will be saved.
|
1884
|
-
ccd : Mapping[str, Mol]
|
1885
|
-
Dictionary of CCD components.
|
1886
|
-
mol_dir : Path, optional
|
1887
|
-
Path to the directory containing the molecules.
|
1888
|
-
boltz_2 : bool, optional
|
1889
|
-
Whether to parse the input for Boltz2.
|
1890
|
-
|
1891
|
-
Returns
|
1892
|
-
-------
|
1893
|
-
list[Target]
|
1894
|
-
List of parsed targets.
|
1895
|
-
|
1896
|
-
"""
|
1897
|
-
input_dir = Path(input_dir)
|
1898
|
-
output_dir = Path(output_dir)
|
1899
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
1900
|
-
|
1901
|
-
targets = []
|
1902
|
-
for yaml_file in input_dir.glob("*.yaml"):
|
1903
|
-
# Skip hidden files and directories
|
1904
|
-
if yaml_file.name.startswith('.') or any(part.startswith('.') for part in yaml_file.parts):
|
1905
|
-
continue
|
1906
|
-
|
1907
|
-
try:
|
1908
|
-
# Load YAML file
|
1909
|
-
with open(yaml_file, "r") as f:
|
1910
|
-
schema = yaml.safe_load(f)
|
1911
|
-
|
1912
|
-
# Parse schema
|
1913
|
-
target = parse_boltz_schema(
|
1914
|
-
name=yaml_file.stem,
|
1915
|
-
schema=schema,
|
1916
|
-
ccd=ccd,
|
1917
|
-
mol_dir=mol_dir,
|
1918
|
-
boltz_2=boltz_2,
|
1919
|
-
output_dir=output_dir,
|
1920
|
-
)
|
1921
|
-
targets.append(target)
|
1922
|
-
|
1923
|
-
except Exception as e:
|
1924
|
-
print(f"Error processing {yaml_file}: {str(e)}")
|
1925
|
-
continue
|
1926
|
-
|
1927
|
-
return targets
|
boltz/data/parse/yaml.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
from pathlib import Path
|
2
|
-
from typing import Union, List, Optional
|
3
2
|
|
4
3
|
import yaml
|
5
4
|
from rdkit.Chem.rdchem import Mol
|
6
5
|
|
7
|
-
from boltz.data.parse.schema import parse_boltz_schema
|
6
|
+
from boltz.data.parse.schema import parse_boltz_schema
|
8
7
|
from boltz.data.types import Target
|
9
8
|
|
10
9
|
|
@@ -13,9 +12,8 @@ def parse_yaml(
|
|
13
12
|
ccd: dict[str, Mol],
|
14
13
|
mol_dir: Path,
|
15
14
|
boltz2: bool = False,
|
16
|
-
|
17
|
-
|
18
|
-
"""Parse a Boltz input yaml / json file or directory.
|
15
|
+
) -> Target:
|
16
|
+
"""Parse a Boltz input yaml / json.
|
19
17
|
|
20
18
|
The input file should be a yaml file with the following format:
|
21
19
|
|
@@ -51,28 +49,20 @@ def parse_yaml(
|
|
51
49
|
Parameters
|
52
50
|
----------
|
53
51
|
path : Path
|
54
|
-
Path to the YAML input
|
55
|
-
|
52
|
+
Path to the YAML input format.
|
53
|
+
components : Dict
|
56
54
|
Dictionary of CCD components.
|
57
|
-
|
58
|
-
Path to the directory containing molecules.
|
59
|
-
boltz2 : bool, optional
|
55
|
+
boltz2 : bool
|
60
56
|
Whether to parse the input for Boltz2.
|
61
|
-
output_dir : Path, optional
|
62
|
-
Path to the output directory where results will be saved.
|
63
57
|
|
64
58
|
Returns
|
65
59
|
-------
|
66
|
-
|
67
|
-
The parsed target
|
60
|
+
Target
|
61
|
+
The parsed target.
|
68
62
|
|
69
63
|
"""
|
70
|
-
path
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
with path.open("r") as file:
|
76
|
-
data = yaml.safe_load(file)
|
77
|
-
name = path.stem
|
78
|
-
return parse_boltz_schema(name, data, ccd, mol_dir, boltz2, output_dir)
|
64
|
+
with path.open("r") as file:
|
65
|
+
data = yaml.safe_load(file)
|
66
|
+
|
67
|
+
name = path.stem
|
68
|
+
return parse_boltz_schema(name, data, ccd, mol_dir, boltz2)
|
boltz/main.py
CHANGED
@@ -272,7 +272,7 @@ def get_cache_path() -> str:
|
|
272
272
|
|
273
273
|
|
274
274
|
def check_inputs(data: Path) -> list[Path]:
|
275
|
-
"""Check the input data.
|
275
|
+
"""Check the input data and output directory.
|
276
276
|
|
277
277
|
Parameters
|
278
278
|
----------
|
@@ -282,21 +282,18 @@ def check_inputs(data: Path) -> list[Path]:
|
|
282
282
|
Returns
|
283
283
|
-------
|
284
284
|
list[Path]
|
285
|
-
The list of input
|
285
|
+
The list of input data.
|
286
286
|
|
287
287
|
"""
|
288
|
+
click.echo("Checking input data.")
|
289
|
+
|
288
290
|
# Check if data is a directory
|
289
291
|
if data.is_dir():
|
290
292
|
data: list[Path] = list(data.glob("*"))
|
291
293
|
|
292
294
|
# Filter out non .fasta or .yaml files, raise
|
293
295
|
# an error on directory and other file types
|
294
|
-
filtered_data = []
|
295
296
|
for d in data:
|
296
|
-
# Skip hidden files and directories
|
297
|
-
if d.name.startswith('.') or any(part.startswith('.') for part in d.parts):
|
298
|
-
continue
|
299
|
-
|
300
297
|
if d.is_dir():
|
301
298
|
msg = f"Found directory {d} instead of .fasta or .yaml."
|
302
299
|
raise RuntimeError(msg)
|
@@ -306,8 +303,6 @@ def check_inputs(data: Path) -> list[Path]:
|
|
306
303
|
"please provide a .fasta or .yaml file."
|
307
304
|
)
|
308
305
|
raise RuntimeError(msg)
|
309
|
-
filtered_data.append(d)
|
310
|
-
data = filtered_data
|
311
306
|
else:
|
312
307
|
data = [data]
|
313
308
|
|
@@ -498,25 +493,13 @@ def process_input( # noqa: C901, PLR0912, PLR0915, D103
|
|
498
493
|
) -> None:
|
499
494
|
try:
|
500
495
|
# Parse data
|
501
|
-
if path.
|
502
|
-
# Process all YAML and FASTA files in the directory
|
503
|
-
targets = []
|
504
|
-
for file_path in path.glob("*"):
|
505
|
-
if file_path.suffix in (".fa", ".fas", ".fasta"):
|
506
|
-
target = parse_fasta(file_path, ccd, mol_dir, boltz2)
|
507
|
-
targets.append(target)
|
508
|
-
elif file_path.suffix in (".yml", ".yaml"):
|
509
|
-
target = parse_yaml(file_path, ccd, mol_dir, boltz2)
|
510
|
-
if not isinstance(target, list):
|
511
|
-
target = [target]
|
512
|
-
targets.extend(target)
|
513
|
-
elif path.suffix in (".fa", ".fas", ".fasta"):
|
496
|
+
if path.suffix in (".fa", ".fas", ".fasta"):
|
514
497
|
target = parse_fasta(path, ccd, mol_dir, boltz2)
|
515
|
-
targets = [target]
|
516
498
|
elif path.suffix in (".yml", ".yaml"):
|
517
|
-
|
518
|
-
|
519
|
-
|
499
|
+
target = parse_yaml(path, ccd, mol_dir, boltz2)
|
500
|
+
elif path.is_dir():
|
501
|
+
msg = f"Found directory {path} instead of .fasta or .yaml, skipping."
|
502
|
+
raise RuntimeError(msg) # noqa: TRY301
|
520
503
|
else:
|
521
504
|
msg = (
|
522
505
|
f"Unable to parse filetype {path.suffix}, "
|
@@ -524,98 +507,96 @@ def process_input( # noqa: C901, PLR0912, PLR0915, D103
|
|
524
507
|
)
|
525
508
|
raise RuntimeError(msg) # noqa: TRY301
|
526
509
|
|
527
|
-
#
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
msa_pairing_strategy=msa_pairing_strategy,
|
561
|
-
)
|
510
|
+
# Get target id
|
511
|
+
target_id = target.record.id
|
512
|
+
|
513
|
+
# Get all MSA ids and decide whether to generate MSA
|
514
|
+
to_generate = {}
|
515
|
+
prot_id = const.chain_type_ids["PROTEIN"]
|
516
|
+
for chain in target.record.chains:
|
517
|
+
# Add to generate list, assigning entity id
|
518
|
+
if (chain.mol_type == prot_id) and (chain.msa_id == 0):
|
519
|
+
entity_id = chain.entity_id
|
520
|
+
msa_id = f"{target_id}_{entity_id}"
|
521
|
+
to_generate[msa_id] = target.sequences[entity_id]
|
522
|
+
chain.msa_id = msa_dir / f"{msa_id}.csv"
|
523
|
+
|
524
|
+
# We do not support msa generation for non-protein chains
|
525
|
+
elif chain.msa_id == 0:
|
526
|
+
chain.msa_id = -1
|
527
|
+
|
528
|
+
# Generate MSA
|
529
|
+
if to_generate and not use_msa_server:
|
530
|
+
msg = "Missing MSA's in input and --use_msa_server flag not set."
|
531
|
+
raise RuntimeError(msg) # noqa: TRY301
|
532
|
+
|
533
|
+
if to_generate:
|
534
|
+
msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
|
535
|
+
click.echo(msg)
|
536
|
+
compute_msa(
|
537
|
+
data=to_generate,
|
538
|
+
target_id=target_id,
|
539
|
+
msa_dir=msa_dir,
|
540
|
+
msa_server_url=msa_server_url,
|
541
|
+
msa_pairing_strategy=msa_pairing_strategy,
|
542
|
+
)
|
562
543
|
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
544
|
+
# Parse MSA data
|
545
|
+
msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
|
546
|
+
msa_id_map = {}
|
547
|
+
for msa_idx, msa_id in enumerate(msas):
|
548
|
+
# Check that raw MSA exists
|
549
|
+
msa_path = Path(msa_id)
|
550
|
+
if not msa_path.exists():
|
551
|
+
msg = f"MSA file {msa_path} not found."
|
552
|
+
raise FileNotFoundError(msg) # noqa: TRY301
|
553
|
+
|
554
|
+
# Dump processed MSA
|
555
|
+
processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
|
556
|
+
msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
|
557
|
+
if not processed.exists():
|
558
|
+
# Parse A3M
|
559
|
+
if msa_path.suffix == ".a3m":
|
560
|
+
msa: MSA = parse_a3m(
|
561
|
+
msa_path,
|
562
|
+
taxonomy=None,
|
563
|
+
max_seqs=max_msa_seqs,
|
564
|
+
)
|
565
|
+
elif msa_path.suffix == ".csv":
|
566
|
+
msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
|
567
|
+
else:
|
568
|
+
msg = f"MSA file {msa_path} not supported, only a3m or csv."
|
569
|
+
raise RuntimeError(msg) # noqa: TRY301
|
570
|
+
|
571
|
+
msa.dump(processed)
|
572
|
+
|
573
|
+
# Modify records to point to processed MSA
|
574
|
+
for c in target.record.chains:
|
575
|
+
if (c.msa_id != -1) and (c.msa_id in msa_id_map):
|
576
|
+
c.msa_id = msa_id_map[c.msa_id]
|
577
|
+
|
578
|
+
# Dump templates
|
579
|
+
for template_id, template in target.templates.items():
|
580
|
+
name = f"{target.record.id}_{template_id}.npz"
|
581
|
+
template_path = processed_templates_dir / name
|
582
|
+
template.dump(template_path)
|
583
|
+
|
584
|
+
# Dump constraints
|
585
|
+
constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
|
586
|
+
target.residue_constraints.dump(constraints_path)
|
587
|
+
|
588
|
+
# Dump extra molecules
|
589
|
+
Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
|
590
|
+
with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
|
591
|
+
pickle.dump(target.extra_mols, f)
|
592
|
+
|
593
|
+
# Dump structure
|
594
|
+
struct_path = structure_dir / f"{target.record.id}.npz"
|
595
|
+
target.structure.dump(struct_path)
|
596
|
+
|
597
|
+
# Dump record
|
598
|
+
record_path = records_dir / f"{target.record.id}.json"
|
599
|
+
target.record.dump(record_path)
|
619
600
|
|
620
601
|
except Exception as e: # noqa: BLE001
|
621
602
|
import traceback
|
@@ -1,5 +1,5 @@
|
|
1
1
|
boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
|
2
|
-
boltz/main.py,sha256=
|
2
|
+
boltz/main.py,sha256=AMYdcqTLOL5Mbo8P2ix1KeNwTijH5fWNzKUnLHBNtn0,39735
|
3
3
|
boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
|
5
5
|
boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
|
@@ -40,9 +40,9 @@ boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,368
|
|
40
40
|
boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
|
41
41
|
boltz/data/parse/pdb.py,sha256=iybk4p2UgUy_ABGprDq_xxyPSdm1HAZsGTM0lhxVEwM,1654
|
42
42
|
boltz/data/parse/pdb_download.py,sha256=wge-scX-lOatX0q83W1wOsaql99rYp-6uGWSHEc995M,2718
|
43
|
-
boltz/data/parse/schema.py,sha256=
|
43
|
+
boltz/data/parse/schema.py,sha256=kNu28U2_MGiecwWNlcxgaDH3WOcO0P-q2LdoSPSb66w,63826
|
44
44
|
boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
|
45
|
-
boltz/data/parse/yaml.py,sha256=
|
45
|
+
boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
|
46
46
|
boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
47
|
boltz/data/sample/cluster.py,sha256=9Sx8qP7zGZOAyEspwYFtCTbGTBZnuN-zfCKFbbA_6oI,8175
|
48
48
|
boltz/data/sample/distillation.py,sha256=ABzst2FBr_E54KqZWIHc1bYtKYr79lxRJM7PnS4ifK0,1789
|
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
|
|
107
107
|
boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
108
108
|
boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
|
109
109
|
boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
|
110
|
-
boltz_vsynthes-1.0.
|
111
|
-
boltz_vsynthes-1.0.
|
112
|
-
boltz_vsynthes-1.0.
|
113
|
-
boltz_vsynthes-1.0.
|
114
|
-
boltz_vsynthes-1.0.
|
115
|
-
boltz_vsynthes-1.0.
|
110
|
+
boltz_vsynthes-1.0.41.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
|
111
|
+
boltz_vsynthes-1.0.41.dist-info/METADATA,sha256=CtEq_ih3lz92gdhQYgAf5frBqConi3jhOoTV4DlxPuo,7171
|
112
|
+
boltz_vsynthes-1.0.41.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
113
|
+
boltz_vsynthes-1.0.41.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
|
114
|
+
boltz_vsynthes-1.0.41.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
|
115
|
+
boltz_vsynthes-1.0.41.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|