boltz-vsynthes 1.0.28__py3-none-any.whl → 1.0.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltz/data/parse/schema.py +77 -7
- {boltz_vsynthes-1.0.28.dist-info → boltz_vsynthes-1.0.30.dist-info}/METADATA +1 -1
- {boltz_vsynthes-1.0.28.dist-info → boltz_vsynthes-1.0.30.dist-info}/RECORD +7 -7
- {boltz_vsynthes-1.0.28.dist-info → boltz_vsynthes-1.0.30.dist-info}/WHEEL +0 -0
- {boltz_vsynthes-1.0.28.dist-info → boltz_vsynthes-1.0.30.dist-info}/entry_points.txt +0 -0
- {boltz_vsynthes-1.0.28.dist-info → boltz_vsynthes-1.0.30.dist-info}/licenses/LICENSE +0 -0
- {boltz_vsynthes-1.0.28.dist-info → boltz_vsynthes-1.0.30.dist-info}/top_level.txt +0 -0
boltz/data/parse/schema.py
CHANGED
@@ -1024,12 +1024,12 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1024
1024
|
# This is a PDB ID
|
1025
1025
|
from boltz.data.parse.pdb_download import parse_pdb_id
|
1026
1026
|
target = parse_pdb_id(pdb_path.stem, ccd, mol_dir, pdb_path.parent)
|
1027
|
-
seq = target
|
1027
|
+
seq = target["sequences"][0]["protein"]["sequence"]
|
1028
1028
|
else:
|
1029
1029
|
# This is a PDB file
|
1030
1030
|
from boltz.data.parse.pdb import parse_pdb
|
1031
1031
|
target = parse_pdb(pdb_path, ccd, mol_dir)
|
1032
|
-
seq = target
|
1032
|
+
seq = target["sequences"][0]["protein"]["sequence"]
|
1033
1033
|
else:
|
1034
1034
|
msg = f"Protein must have either 'sequence' or 'pdb' field: {item}"
|
1035
1035
|
raise ValueError(msg)
|
@@ -1041,9 +1041,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1041
1041
|
sdf_path = Path(item[entity_type]["sdf"])
|
1042
1042
|
from boltz.data.parse.sdf import parse_sdf
|
1043
1043
|
target = parse_sdf(sdf_path, ccd, mol_dir)
|
1044
|
-
|
1045
|
-
seq = target.sequences[0]
|
1046
|
-
print(seq)
|
1044
|
+
seq = target["sequences"][0]["ligand"]["smiles"]
|
1047
1045
|
elif "ccd" in item[entity_type]:
|
1048
1046
|
seq = str(item[entity_type]["ccd"])
|
1049
1047
|
else:
|
@@ -1173,7 +1171,25 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1173
1171
|
unk_token = const.unk_token[entity_type.upper()]
|
1174
1172
|
|
1175
1173
|
# Extract sequence
|
1176
|
-
|
1174
|
+
if "sequence" in items[0][entity_type]:
|
1175
|
+
raw_seq = items[0][entity_type]["sequence"]
|
1176
|
+
elif "pdb" in items[0][entity_type]:
|
1177
|
+
# Handle PDB file
|
1178
|
+
pdb_path = Path(items[0][entity_type]["pdb"])
|
1179
|
+
if len(pdb_path.stem) == 4 and pdb_path.stem.isalnum():
|
1180
|
+
# This is a PDB ID
|
1181
|
+
from boltz.data.parse.pdb_download import parse_pdb_id
|
1182
|
+
target = parse_pdb_id(pdb_path.stem, ccd, mol_dir, pdb_path.parent)
|
1183
|
+
raw_seq = target["sequences"][0]["protein"]["sequence"]
|
1184
|
+
else:
|
1185
|
+
# This is a PDB file
|
1186
|
+
from boltz.data.parse.pdb import parse_pdb
|
1187
|
+
target = parse_pdb(pdb_path, ccd, mol_dir)
|
1188
|
+
raw_seq = target["sequences"][0]["protein"]["sequence"]
|
1189
|
+
else:
|
1190
|
+
msg = f"Protein must have either 'sequence' or 'pdb' field: {items[0]}"
|
1191
|
+
raise ValueError(msg)
|
1192
|
+
|
1177
1193
|
entity_to_seq[entity_id] = raw_seq
|
1178
1194
|
|
1179
1195
|
# Convert sequence to tokens
|
@@ -1199,7 +1215,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1199
1215
|
)
|
1200
1216
|
|
1201
1217
|
# Parse a non-polymer
|
1202
|
-
elif (entity_type == "ligand") and "ccd" in
|
1218
|
+
elif (entity_type == "ligand") and ("ccd" in items[0][entity_type]):
|
1203
1219
|
seq = items[0][entity_type]["ccd"]
|
1204
1220
|
|
1205
1221
|
if isinstance(seq, str):
|
@@ -1291,6 +1307,60 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1291
1307
|
"cyclic", False
|
1292
1308
|
), "Cyclic flag is not supported for ligands"
|
1293
1309
|
|
1310
|
+
elif (entity_type == "ligand") and ("sdf" in items[0][entity_type]):
|
1311
|
+
# Handle SDF file
|
1312
|
+
sdf_path = Path(items[0][entity_type]["sdf"])
|
1313
|
+
from boltz.data.parse.sdf import parse_sdf
|
1314
|
+
target = parse_sdf(sdf_path, ccd, mol_dir)
|
1315
|
+
mol = target["sequences"][0]["ligand"]["smiles"]
|
1316
|
+
|
1317
|
+
if affinity:
|
1318
|
+
mol = standardize(mol)
|
1319
|
+
|
1320
|
+
mol = AllChem.MolFromSmiles(mol)
|
1321
|
+
mol = AllChem.AddHs(mol)
|
1322
|
+
|
1323
|
+
# Set atom names
|
1324
|
+
canonical_order = AllChem.CanonicalRankAtoms(mol)
|
1325
|
+
for atom, can_idx in zip(mol.GetAtoms(), canonical_order):
|
1326
|
+
atom_name = atom.GetSymbol().upper() + str(can_idx + 1)
|
1327
|
+
if len(atom_name) > 4:
|
1328
|
+
msg = (
|
1329
|
+
f"{mol} has an atom with a name longer than "
|
1330
|
+
f"4 characters: {atom_name}."
|
1331
|
+
)
|
1332
|
+
raise ValueError(msg)
|
1333
|
+
atom.SetProp("name", atom_name)
|
1334
|
+
|
1335
|
+
success = compute_3d_conformer(mol)
|
1336
|
+
if not success:
|
1337
|
+
msg = f"Failed to compute 3D conformer for {mol}"
|
1338
|
+
raise ValueError(msg)
|
1339
|
+
|
1340
|
+
mol_no_h = AllChem.RemoveHs(mol, sanitize=False)
|
1341
|
+
affinity_mw = AllChem.Descriptors.MolWt(mol_no_h) if affinity else None
|
1342
|
+
extra_mols[f"LIG{ligand_id}"] = mol_no_h
|
1343
|
+
residue = parse_ccd_residue(
|
1344
|
+
name=f"LIG{ligand_id}",
|
1345
|
+
ref_mol=mol,
|
1346
|
+
res_idx=0,
|
1347
|
+
)
|
1348
|
+
|
1349
|
+
ligand_id += 1
|
1350
|
+
parsed_chain = ParsedChain(
|
1351
|
+
entity=entity_id,
|
1352
|
+
residues=[residue],
|
1353
|
+
type=const.chain_type_ids["NONPOLYMER"],
|
1354
|
+
cyclic_period=0,
|
1355
|
+
sequence=None,
|
1356
|
+
affinity=affinity,
|
1357
|
+
affinity_mw=affinity_mw,
|
1358
|
+
)
|
1359
|
+
|
1360
|
+
assert not items[0][entity_type].get(
|
1361
|
+
"cyclic", False
|
1362
|
+
), "Cyclic flag is not supported for ligands"
|
1363
|
+
|
1294
1364
|
else:
|
1295
1365
|
msg = f"Invalid entity type: {entity_type}"
|
1296
1366
|
raise ValueError(msg)
|
@@ -40,7 +40,7 @@ boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,368
|
|
40
40
|
boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
|
41
41
|
boltz/data/parse/pdb.py,sha256=iybk4p2UgUy_ABGprDq_xxyPSdm1HAZsGTM0lhxVEwM,1654
|
42
42
|
boltz/data/parse/pdb_download.py,sha256=wge-scX-lOatX0q83W1wOsaql99rYp-6uGWSHEc995M,2718
|
43
|
-
boltz/data/parse/schema.py,sha256=
|
43
|
+
boltz/data/parse/schema.py,sha256=kNu28U2_MGiecwWNlcxgaDH3WOcO0P-q2LdoSPSb66w,63826
|
44
44
|
boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
|
45
45
|
boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
|
46
46
|
boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
|
|
107
107
|
boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
108
108
|
boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
|
109
109
|
boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
|
110
|
-
boltz_vsynthes-1.0.
|
111
|
-
boltz_vsynthes-1.0.
|
112
|
-
boltz_vsynthes-1.0.
|
113
|
-
boltz_vsynthes-1.0.
|
114
|
-
boltz_vsynthes-1.0.
|
115
|
-
boltz_vsynthes-1.0.
|
110
|
+
boltz_vsynthes-1.0.30.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
|
111
|
+
boltz_vsynthes-1.0.30.dist-info/METADATA,sha256=LCqHhIkKBAtMf8t6kbT4xoPr54lLZb86Tuy9KOgWYdc,7171
|
112
|
+
boltz_vsynthes-1.0.30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
113
|
+
boltz_vsynthes-1.0.30.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
|
114
|
+
boltz_vsynthes-1.0.30.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
|
115
|
+
boltz_vsynthes-1.0.30.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|