boltz-vsynthes 1.0.29__py3-none-any.whl → 1.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltz/data/parse/schema.py +39 -15
- {boltz_vsynthes-1.0.29.dist-info → boltz_vsynthes-1.0.31.dist-info}/METADATA +1 -1
- {boltz_vsynthes-1.0.29.dist-info → boltz_vsynthes-1.0.31.dist-info}/RECORD +7 -7
- {boltz_vsynthes-1.0.29.dist-info → boltz_vsynthes-1.0.31.dist-info}/WHEEL +0 -0
- {boltz_vsynthes-1.0.29.dist-info → boltz_vsynthes-1.0.31.dist-info}/entry_points.txt +0 -0
- {boltz_vsynthes-1.0.29.dist-info → boltz_vsynthes-1.0.31.dist-info}/licenses/LICENSE +0 -0
- {boltz_vsynthes-1.0.29.dist-info → boltz_vsynthes-1.0.31.dist-info}/top_level.txt +0 -0
boltz/data/parse/schema.py
CHANGED
@@ -1024,12 +1024,12 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1024
1024
|
# This is a PDB ID
|
1025
1025
|
from boltz.data.parse.pdb_download import parse_pdb_id
|
1026
1026
|
target = parse_pdb_id(pdb_path.stem, ccd, mol_dir, pdb_path.parent)
|
1027
|
-
seq = target["sequences"][0]["protein"]["sequence"]
|
1027
|
+
seq = target["sequences"][0]["protein"]["sequence"]
|
1028
1028
|
else:
|
1029
1029
|
# This is a PDB file
|
1030
1030
|
from boltz.data.parse.pdb import parse_pdb
|
1031
1031
|
target = parse_pdb(pdb_path, ccd, mol_dir)
|
1032
|
-
seq = target["sequences"][0]["protein"]["sequence"]
|
1032
|
+
seq = target["sequences"][0]["protein"]["sequence"]
|
1033
1033
|
else:
|
1034
1034
|
msg = f"Protein must have either 'sequence' or 'pdb' field: {item}"
|
1035
1035
|
raise ValueError(msg)
|
@@ -1041,7 +1041,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1041
1041
|
sdf_path = Path(item[entity_type]["sdf"])
|
1042
1042
|
from boltz.data.parse.sdf import parse_sdf
|
1043
1043
|
target = parse_sdf(sdf_path, ccd, mol_dir)
|
1044
|
-
seq = target["sequences"][0]["ligand"]["smiles"]
|
1044
|
+
seq = target["sequences"][0]["ligand"]["smiles"]
|
1045
1045
|
elif "ccd" in item[entity_type]:
|
1046
1046
|
seq = str(item[entity_type]["ccd"])
|
1047
1047
|
else:
|
@@ -1059,6 +1059,7 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1059
1059
|
|
1060
1060
|
# Check if any affinity ligand is present
|
1061
1061
|
affinity_ligands = set()
|
1062
|
+
affinity_proteins = set()
|
1062
1063
|
properties = schema.get("properties", [])
|
1063
1064
|
if properties and not boltz_2:
|
1064
1065
|
msg = "Affinity prediction is only supported for Boltz2!"
|
@@ -1069,7 +1070,6 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1069
1070
|
if prop_type == "affinity":
|
1070
1071
|
binder = prop["affinity"]["binder"]
|
1071
1072
|
if not isinstance(binder, str):
|
1072
|
-
# TODO: support multi residue ligands and ccd's
|
1073
1073
|
msg = "Binder must be a single chain."
|
1074
1074
|
raise ValueError(msg)
|
1075
1075
|
|
@@ -1077,18 +1077,21 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1077
1077
|
msg = f"Could not find binder with name {binder} in the input!"
|
1078
1078
|
raise ValueError(msg)
|
1079
1079
|
|
1080
|
-
|
1080
|
+
# Allow both protein and ligand as binders
|
1081
|
+
if chain_name_to_entity_type[binder] == "protein":
|
1082
|
+
affinity_proteins.add(binder)
|
1083
|
+
elif chain_name_to_entity_type[binder] == "ligand":
|
1084
|
+
affinity_ligands.add(binder)
|
1085
|
+
else:
|
1081
1086
|
msg = (
|
1082
|
-
f"Chain {binder} is not a ligand! "
|
1083
|
-
"Affinity is currently only supported for ligands."
|
1087
|
+
f"Chain {binder} is not a protein or ligand! "
|
1088
|
+
"Affinity is currently only supported for proteins and ligands."
|
1084
1089
|
)
|
1085
1090
|
raise ValueError(msg)
|
1086
1091
|
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
if len(affinity_ligands) > 1:
|
1091
|
-
msg = "Only one affinity ligand is currently supported!"
|
1092
|
+
# Check if any affinity binder is present
|
1093
|
+
if len(affinity_proteins) + len(affinity_ligands) > 1:
|
1094
|
+
msg = "Only one affinity binder is currently supported!"
|
1092
1095
|
raise ValueError(msg)
|
1093
1096
|
|
1094
1097
|
# Go through entities and parse them
|
@@ -1111,12 +1114,15 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1111
1114
|
elif isinstance(item[entity_type]["id"], list):
|
1112
1115
|
ids.extend(item[entity_type]["id"])
|
1113
1116
|
|
1114
|
-
# Check if any affinity
|
1117
|
+
# Check if any affinity binder is present
|
1115
1118
|
if len(ids) == 1:
|
1116
|
-
affinity = ids[0] in affinity_ligands
|
1119
|
+
affinity = ids[0] in affinity_ligands or ids[0] in affinity_proteins
|
1117
1120
|
elif (len(ids) > 1) and any(x in affinity_ligands for x in ids):
|
1118
1121
|
msg = "Cannot compute affinity for a ligand that has multiple copies!"
|
1119
1122
|
raise ValueError(msg)
|
1123
|
+
elif (len(ids) > 1) and any(x in affinity_proteins for x in ids):
|
1124
|
+
# If binder is a protein, allow multiple ligands
|
1125
|
+
affinity = True
|
1120
1126
|
else:
|
1121
1127
|
affinity = False
|
1122
1128
|
|
@@ -1171,7 +1177,25 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1171
1177
|
unk_token = const.unk_token[entity_type.upper()]
|
1172
1178
|
|
1173
1179
|
# Extract sequence
|
1174
|
-
|
1180
|
+
if "sequence" in items[0][entity_type]:
|
1181
|
+
raw_seq = items[0][entity_type]["sequence"]
|
1182
|
+
elif "pdb" in items[0][entity_type]:
|
1183
|
+
# Handle PDB file
|
1184
|
+
pdb_path = Path(items[0][entity_type]["pdb"])
|
1185
|
+
if len(pdb_path.stem) == 4 and pdb_path.stem.isalnum():
|
1186
|
+
# This is a PDB ID
|
1187
|
+
from boltz.data.parse.pdb_download import parse_pdb_id
|
1188
|
+
target = parse_pdb_id(pdb_path.stem, ccd, mol_dir, pdb_path.parent)
|
1189
|
+
raw_seq = target["sequences"][0]["protein"]["sequence"]
|
1190
|
+
else:
|
1191
|
+
# This is a PDB file
|
1192
|
+
from boltz.data.parse.pdb import parse_pdb
|
1193
|
+
target = parse_pdb(pdb_path, ccd, mol_dir)
|
1194
|
+
raw_seq = target["sequences"][0]["protein"]["sequence"]
|
1195
|
+
else:
|
1196
|
+
msg = f"Protein must have either 'sequence' or 'pdb' field: {items[0]}"
|
1197
|
+
raise ValueError(msg)
|
1198
|
+
|
1175
1199
|
entity_to_seq[entity_id] = raw_seq
|
1176
1200
|
|
1177
1201
|
# Convert sequence to tokens
|
@@ -40,7 +40,7 @@ boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,368
|
|
40
40
|
boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
|
41
41
|
boltz/data/parse/pdb.py,sha256=iybk4p2UgUy_ABGprDq_xxyPSdm1HAZsGTM0lhxVEwM,1654
|
42
42
|
boltz/data/parse/pdb_download.py,sha256=wge-scX-lOatX0q83W1wOsaql99rYp-6uGWSHEc995M,2718
|
43
|
-
boltz/data/parse/schema.py,sha256=
|
43
|
+
boltz/data/parse/schema.py,sha256=9H7ZJpKb22KHXmfJAjG_0jz7VbZ1LAYt5eVp34_nwVw,62176
|
44
44
|
boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
|
45
45
|
boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
|
46
46
|
boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
|
|
107
107
|
boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
108
108
|
boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
|
109
109
|
boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
|
110
|
-
boltz_vsynthes-1.0.
|
111
|
-
boltz_vsynthes-1.0.
|
112
|
-
boltz_vsynthes-1.0.
|
113
|
-
boltz_vsynthes-1.0.
|
114
|
-
boltz_vsynthes-1.0.
|
115
|
-
boltz_vsynthes-1.0.
|
110
|
+
boltz_vsynthes-1.0.31.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
|
111
|
+
boltz_vsynthes-1.0.31.dist-info/METADATA,sha256=4hMLLLOG2rXQo4bR4F_kU_jxyKnOVVQhrXiwwsHxDtk,7171
|
112
|
+
boltz_vsynthes-1.0.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
113
|
+
boltz_vsynthes-1.0.31.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
|
114
|
+
boltz_vsynthes-1.0.31.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
|
115
|
+
boltz_vsynthes-1.0.31.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|