boltz-vsynthes 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltz/data/mol.py +4 -0
- boltz/data/parse/schema.py +87 -292
- boltz/main.py +78 -61
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.9.dist-info}/METADATA +1 -1
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.9.dist-info}/RECORD +9 -9
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.9.dist-info}/WHEEL +0 -0
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.9.dist-info}/entry_points.txt +0 -0
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.9.dist-info}/licenses/LICENSE +0 -0
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.9.dist-info}/top_level.txt +0 -0
boltz/data/mol.py
CHANGED
@@ -30,6 +30,10 @@ def load_molecules(moldir: str, molecules: list[str]) -> dict[str, Mol]:
|
|
30
30
|
"""
|
31
31
|
loaded_mols = {}
|
32
32
|
for molecule in molecules:
|
33
|
+
# Skip if it's a SMILES string (starts with LIG)
|
34
|
+
if molecule.startswith("LIG"):
|
35
|
+
continue
|
36
|
+
|
33
37
|
path = Path(moldir) / f"{molecule}.pkl"
|
34
38
|
if not path.exists():
|
35
39
|
msg = f"CCD component {molecule} not found!"
|
boltz/data/parse/schema.py
CHANGED
@@ -621,6 +621,9 @@ def get_mol(ccd: str, mols: dict, moldir: str) -> Mol:
|
|
621
621
|
Return mol with ccd from mols if it is in mols. Otherwise load it from moldir,
|
622
622
|
add it to mols, and return the mol.
|
623
623
|
"""
|
624
|
+
# Skip if it's a SMILES string (starts with LIG)
|
625
|
+
if ccd.startswith("LIG"):
|
626
|
+
return None
|
624
627
|
mol = mols.get(ccd)
|
625
628
|
if mol is None:
|
626
629
|
mol = load_molecules(moldir, [ccd])[ccd]
|
@@ -655,6 +658,10 @@ def parse_ccd_residue(
|
|
655
658
|
The output ParsedResidue, if successful.
|
656
659
|
|
657
660
|
"""
|
661
|
+
# Skip if it's a SMILES string (starts with LIG)
|
662
|
+
if name.startswith("LIG"):
|
663
|
+
return None
|
664
|
+
|
658
665
|
unk_chirality = const.chirality_type_ids[const.unk_chirality_type]
|
659
666
|
|
660
667
|
# Check if this is a single heavy atom CCD residue
|
@@ -929,83 +936,37 @@ def token_spec_to_ids(
|
|
929
936
|
contacts.append((chain_to_idx[chain_name], residue_index_or_atom_name - 1))
|
930
937
|
|
931
938
|
|
932
|
-
def parse_boltz_schema(
|
933
|
-
|
934
|
-
schema: dict,
|
935
|
-
ccd: Mapping[str, Mol],
|
936
|
-
mol_dir: Optional[Path] = None,
|
937
|
-
boltz_2: bool = False,
|
938
|
-
) -> Target:
|
939
|
-
"""Parse a Boltz input yaml / json.
|
940
|
-
|
941
|
-
The input file should be a dictionary with the following format:
|
942
|
-
|
943
|
-
version: 1
|
944
|
-
sequences:
|
945
|
-
- protein:
|
946
|
-
id: A
|
947
|
-
sequence: "MADQLTEEQIAEFKEAFSLF" # or pdb: "1a2k" or pdb: "path/to/file.pdb"
|
948
|
-
msa: path/to/msa1.a3m
|
949
|
-
- protein:
|
950
|
-
id: [B, C]
|
951
|
-
sequence: "AKLSILPWGHC"
|
952
|
-
msa: path/to/msa2.a3m
|
953
|
-
- rna:
|
954
|
-
id: D
|
955
|
-
sequence: "GCAUAGC"
|
956
|
-
- ligand:
|
957
|
-
id: E
|
958
|
-
smiles: "CC1=CC=CC=C1"
|
959
|
-
constraints:
|
960
|
-
- bond:
|
961
|
-
atom1: [A, 1, CA]
|
962
|
-
atom2: [A, 2, N]
|
963
|
-
- pocket:
|
964
|
-
binder: E
|
965
|
-
contacts: [[B, 1], [B, 2]]
|
966
|
-
max_distance: 6
|
967
|
-
- contact:
|
968
|
-
token1: [A, 1]
|
969
|
-
token2: [B, 1]
|
970
|
-
max_distance: 6
|
971
|
-
templates:
|
972
|
-
- cif: path/to/template.cif
|
973
|
-
properties:
|
974
|
-
- affinity:
|
975
|
-
binder: E
|
939
|
+
def parse_boltz_schema(name: str, schema: dict, ccd: dict, mol_dir: Path, boltz2: bool = False) -> dict:
|
940
|
+
"""Parse the Boltz input schema.
|
976
941
|
|
977
942
|
Parameters
|
978
943
|
----------
|
979
944
|
name : str
|
980
|
-
|
945
|
+
The name of the input file.
|
981
946
|
schema : dict
|
982
947
|
The input schema.
|
983
|
-
|
948
|
+
ccd : dict
|
984
949
|
Dictionary of CCD components.
|
985
|
-
mol_dir: Path
|
950
|
+
mol_dir : Path
|
986
951
|
Path to the directory containing the molecules.
|
987
|
-
boltz2: bool
|
988
|
-
Whether to parse the input for Boltz2.
|
952
|
+
boltz2 : bool, optional
|
953
|
+
Whether to parse the input for Boltz2, by default False.
|
989
954
|
|
990
955
|
Returns
|
991
956
|
-------
|
992
|
-
|
993
|
-
The parsed
|
957
|
+
dict
|
958
|
+
The parsed schema.
|
994
959
|
|
995
960
|
"""
|
996
|
-
#
|
997
|
-
|
998
|
-
|
999
|
-
msg = f"Invalid version {version} in input!"
|
961
|
+
# Check version
|
962
|
+
if "version" not in schema:
|
963
|
+
msg = "Schema must have a version field"
|
1000
964
|
raise ValueError(msg)
|
1001
965
|
|
1002
|
-
#
|
1003
|
-
blocker = rdBase.BlockLogs() # noqa: F841
|
1004
|
-
|
1005
|
-
# First group items that have the same type, sequence and modifications
|
966
|
+
# Group items by entity type and sequence
|
1006
967
|
items_to_group = {}
|
1007
968
|
chain_name_to_entity_type = {}
|
1008
|
-
|
969
|
+
|
1009
970
|
# Keep track of ligand IDs
|
1010
971
|
ligand_id = 1
|
1011
972
|
ligand_id_map = {}
|
@@ -1057,19 +1018,37 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1057
1018
|
msg = "Protein must have either 'sequence' or 'pdb' field"
|
1058
1019
|
raise ValueError(msg)
|
1059
1020
|
elif entity_type == "ligand":
|
1060
|
-
|
1061
|
-
assert "smiles" not in item[entity_type] or "ccd" not in item[entity_type]
|
1021
|
+
# Support for SMILES, CCD, and SDF
|
1062
1022
|
if "smiles" in item[entity_type]:
|
1063
1023
|
seq = str(item[entity_type]["smiles"])
|
1064
1024
|
# Map user-provided ID to internal LIG1, LIG2, etc.
|
1065
1025
|
for id in entity_id:
|
1066
1026
|
ligand_id_map[id] = f"LIG{ligand_id}"
|
1067
1027
|
ligand_id += 1
|
1068
|
-
|
1028
|
+
elif "ccd" in item[entity_type]:
|
1069
1029
|
seq = str(item[entity_type]["ccd"])
|
1070
1030
|
# For CCD ligands, use the CCD code as the internal ID
|
1071
1031
|
for id in entity_id:
|
1072
1032
|
ligand_id_map[id] = seq
|
1033
|
+
elif "sdf" in item[entity_type]:
|
1034
|
+
sdf_path = Path(item[entity_type]["sdf"])
|
1035
|
+
if not sdf_path.exists():
|
1036
|
+
msg = f"SDF file not found: {sdf_path}"
|
1037
|
+
raise FileNotFoundError(msg)
|
1038
|
+
# Read SDF and convert to SMILES
|
1039
|
+
from rdkit import Chem
|
1040
|
+
mol = Chem.SDMolSupplier(str(sdf_path))[0]
|
1041
|
+
if mol is None:
|
1042
|
+
msg = f"Failed to read SDF file: {sdf_path}"
|
1043
|
+
raise ValueError(msg)
|
1044
|
+
seq = Chem.MolToSmiles(mol)
|
1045
|
+
# Map user-provided ID to internal LIG1, LIG2, etc.
|
1046
|
+
for id in entity_id:
|
1047
|
+
ligand_id_map[id] = f"LIG{ligand_id}"
|
1048
|
+
ligand_id += 1
|
1049
|
+
else:
|
1050
|
+
msg = "Ligand must have either 'smiles', 'ccd', or 'sdf' field"
|
1051
|
+
raise ValueError(msg)
|
1073
1052
|
|
1074
1053
|
# Group items by entity
|
1075
1054
|
items_to_group.setdefault((entity_type, seq), []).append(item)
|
@@ -1080,244 +1059,60 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1080
1059
|
for chain_name in chain_names:
|
1081
1060
|
chain_name_to_entity_type[chain_name] = entity_type
|
1082
1061
|
|
1083
|
-
#
|
1084
|
-
|
1085
|
-
properties = schema.get("properties", [])
|
1086
|
-
|
1087
|
-
# Get all ligands
|
1062
|
+
# Get all proteins and ligands
|
1063
|
+
proteins = []
|
1088
1064
|
ligands = []
|
1089
1065
|
for item in schema["sequences"]:
|
1090
1066
|
entity_type = list(item.keys())[0]
|
1091
|
-
|
1092
|
-
entity_id = item[entity_type]["id"]
|
1093
|
-
entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
|
1094
|
-
ligands.extend(entity_id)
|
1095
|
-
|
1096
|
-
# Get user-specified binders
|
1097
|
-
specified_binders = set()
|
1098
|
-
for prop in properties:
|
1099
|
-
if "affinity" in prop:
|
1100
|
-
binder = prop["affinity"]["binder"]
|
1101
|
-
specified_binders.add(binder)
|
1102
|
-
|
1103
|
-
# If no binders specified, use all proteins
|
1104
|
-
if not specified_binders:
|
1105
|
-
for item in schema["sequences"]:
|
1106
|
-
entity_type = list(item.keys())[0]
|
1107
|
-
if entity_type == "protein":
|
1108
|
-
entity_id = item[entity_type]["id"]
|
1109
|
-
entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
|
1110
|
-
specified_binders.update(entity_id)
|
1111
|
-
|
1112
|
-
# Generate protein-ligand pairs for specified binders
|
1113
|
-
new_properties = []
|
1114
|
-
for binder in specified_binders:
|
1115
|
-
for ligand in ligands:
|
1116
|
-
if ligand in ligand_id_map:
|
1117
|
-
ligand = ligand_id_map[ligand] # Convert to internal LIG1, LIG2, etc.
|
1118
|
-
affinity_ligands.add(ligand)
|
1119
|
-
new_properties.append({
|
1120
|
-
"affinity": {
|
1121
|
-
"binder": binder,
|
1122
|
-
"ligand": ligand
|
1123
|
-
}
|
1124
|
-
})
|
1125
|
-
|
1126
|
-
# Update schema with generated properties
|
1127
|
-
schema["properties"] = new_properties
|
1128
|
-
|
1129
|
-
# Parse each group
|
1130
|
-
chains = []
|
1131
|
-
extra_mols = {}
|
1132
|
-
for (entity_type, seq), items in items_to_group.items():
|
1133
|
-
# Get entity id
|
1134
|
-
entity_id = items[0][entity_type]["id"]
|
1067
|
+
entity_id = item[entity_type]["id"]
|
1135
1068
|
entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
|
1136
|
-
|
1137
|
-
# Check if this entity has affinity
|
1138
|
-
affinity = any(entity in affinity_ligands for entity in entity_id)
|
1139
|
-
|
1140
|
-
# Parse a protein
|
1141
1069
|
if entity_type == "protein":
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
msa = Path(msa)
|
1146
|
-
if not msa.exists():
|
1147
|
-
msg = f"MSA file not found: {msa}"
|
1148
|
-
raise FileNotFoundError(msg)
|
1149
|
-
with msa.open("r") as f:
|
1150
|
-
msa_data = f.read()
|
1151
|
-
else:
|
1152
|
-
msa_data = None
|
1153
|
-
|
1154
|
-
# Parse sequence
|
1155
|
-
residues = []
|
1156
|
-
for res_idx, code in enumerate(seq):
|
1157
|
-
# Get mol
|
1158
|
-
ref_mol = get_mol(code, ccd, mol_dir)
|
1159
|
-
|
1160
|
-
# Parse residue
|
1161
|
-
residue = parse_ccd_residue(
|
1162
|
-
name=code,
|
1163
|
-
ref_mol=ref_mol,
|
1164
|
-
res_idx=res_idx,
|
1165
|
-
)
|
1166
|
-
residues.append(residue)
|
1167
|
-
|
1168
|
-
# Create protein chain
|
1169
|
-
parsed_chain = ParsedChain(
|
1170
|
-
entity=entity_id,
|
1171
|
-
residues=residues,
|
1172
|
-
type=const.chain_type_ids["PROTEIN"],
|
1173
|
-
cyclic_period=0,
|
1174
|
-
sequence=seq,
|
1175
|
-
affinity=affinity,
|
1176
|
-
affinity_mw=None,
|
1177
|
-
)
|
1178
|
-
|
1179
|
-
# Parse a non-polymer
|
1180
|
-
elif (entity_type == "ligand") and "ccd" in (items[0][entity_type]):
|
1181
|
-
seq = items[0][entity_type]["ccd"]
|
1182
|
-
|
1183
|
-
if isinstance(seq, str):
|
1184
|
-
seq = [seq]
|
1185
|
-
|
1186
|
-
if affinity and len(seq) > 1:
|
1187
|
-
msg = "Cannot compute affinity for multi residue ligands!"
|
1188
|
-
raise ValueError(msg)
|
1189
|
-
|
1190
|
-
residues = []
|
1191
|
-
affinity_mw = None
|
1192
|
-
for res_idx, code in enumerate(seq):
|
1193
|
-
# Get mol
|
1194
|
-
ref_mol = get_mol(code, ccd, mol_dir)
|
1195
|
-
|
1196
|
-
if affinity:
|
1197
|
-
affinity_mw = AllChem.Descriptors.MolWt(ref_mol)
|
1198
|
-
|
1199
|
-
# Parse residue
|
1200
|
-
residue = parse_ccd_residue(
|
1201
|
-
name=code,
|
1202
|
-
ref_mol=ref_mol,
|
1203
|
-
res_idx=res_idx,
|
1204
|
-
)
|
1205
|
-
residues.append(residue)
|
1206
|
-
|
1207
|
-
# Create multi ligand chain
|
1208
|
-
parsed_chain = ParsedChain(
|
1209
|
-
entity=entity_id,
|
1210
|
-
residues=residues,
|
1211
|
-
type=const.chain_type_ids["NONPOLYMER"],
|
1212
|
-
cyclic_period=0,
|
1213
|
-
sequence=None,
|
1214
|
-
affinity=affinity,
|
1215
|
-
affinity_mw=affinity_mw,
|
1216
|
-
)
|
1217
|
-
|
1218
|
-
assert not items[0][entity_type].get(
|
1219
|
-
"cyclic", False
|
1220
|
-
), "Cyclic flag is not supported for ligands"
|
1221
|
-
|
1222
|
-
elif (entity_type == "ligand") and ("smiles" in items[0][entity_type]):
|
1223
|
-
seq = items[0][entity_type]["smiles"]
|
1224
|
-
|
1225
|
-
if affinity:
|
1226
|
-
seq = standardize(seq)
|
1227
|
-
|
1228
|
-
mol = AllChem.MolFromSmiles(seq)
|
1229
|
-
mol = AllChem.AddHs(mol)
|
1070
|
+
proteins.extend(entity_id)
|
1071
|
+
elif entity_type == "ligand":
|
1072
|
+
ligands.extend(entity_id)
|
1230
1073
|
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1074
|
+
# Generate properties for each protein-ligand pair
|
1075
|
+
new_properties = []
|
1076
|
+
for prop in schema.get("properties", []):
|
1077
|
+
if "affinity" in prop:
|
1078
|
+
affinity = prop["affinity"]
|
1079
|
+
# Handle protein as binder
|
1080
|
+
if "protein" in affinity:
|
1081
|
+
binder = affinity["protein"]
|
1082
|
+
if binder not in proteins:
|
1083
|
+
msg = f"Protein {binder} not found in sequences"
|
1240
1084
|
raise ValueError(msg)
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
#
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
affinity_mw=affinity_mw,
|
1268
|
-
)
|
1269
|
-
|
1270
|
-
assert not items[0][entity_type].get(
|
1271
|
-
"cyclic", False
|
1272
|
-
), "Cyclic flag is not supported for ligands"
|
1273
|
-
|
1274
|
-
else:
|
1275
|
-
msg = f"Invalid entity type: {entity_type}"
|
1276
|
-
raise ValueError(msg)
|
1277
|
-
|
1278
|
-
chains.append(parsed_chain)
|
1085
|
+
# Generate pairs with all ligands
|
1086
|
+
for ligand in ligands:
|
1087
|
+
if ligand in ligand_id_map:
|
1088
|
+
ligand = ligand_id_map[ligand] # Convert to internal LIG1, LIG2, etc.
|
1089
|
+
new_properties.append({
|
1090
|
+
"affinity": {
|
1091
|
+
"binder": binder,
|
1092
|
+
"ligand": ligand
|
1093
|
+
}
|
1094
|
+
})
|
1095
|
+
# Handle ligand as binder (backward compatibility)
|
1096
|
+
elif "binder" in affinity:
|
1097
|
+
binder = affinity["binder"]
|
1098
|
+
if binder not in proteins:
|
1099
|
+
msg = f"Protein {binder} not found in sequences"
|
1100
|
+
raise ValueError(msg)
|
1101
|
+
# Generate pairs with all ligands
|
1102
|
+
for ligand in ligands:
|
1103
|
+
if ligand in ligand_id_map:
|
1104
|
+
ligand = ligand_id_map[ligand] # Convert to internal LIG1, LIG2, etc.
|
1105
|
+
new_properties.append({
|
1106
|
+
"affinity": {
|
1107
|
+
"binder": binder,
|
1108
|
+
"ligand": ligand
|
1109
|
+
}
|
1110
|
+
})
|
1279
1111
|
|
1280
|
-
#
|
1281
|
-
|
1282
|
-
for constraint in schema.get("constraints", []):
|
1283
|
-
if "bond" in constraint:
|
1284
|
-
atom1 = constraint["bond"]["atom1"]
|
1285
|
-
atom2 = constraint["bond"]["atom2"]
|
1286
|
-
constraints.append(ParsedBond(atom1, atom2))
|
1287
|
-
elif "pocket" in constraint:
|
1288
|
-
binder = constraint["pocket"]["binder"]
|
1289
|
-
if binder in ligand_id_map:
|
1290
|
-
binder = ligand_id_map[binder] # Convert to internal LIG1, LIG2, etc.
|
1291
|
-
contacts = constraint["pocket"]["contacts"]
|
1292
|
-
max_distance = constraint["pocket"].get("max_distance", 6.0)
|
1293
|
-
constraints.append(ParsedPocket(binder, contacts, max_distance))
|
1294
|
-
elif "contact" in constraint:
|
1295
|
-
token1 = constraint["contact"]["token1"]
|
1296
|
-
token2 = constraint["contact"]["token2"]
|
1297
|
-
max_distance = constraint["contact"].get("max_distance", 6.0)
|
1298
|
-
constraints.append(ParsedContact(token1, token2, max_distance))
|
1299
|
-
else:
|
1300
|
-
msg = f"Invalid constraint type: {list(constraint.keys())[0]}"
|
1301
|
-
raise ValueError(msg)
|
1302
|
-
|
1303
|
-
# Parse templates
|
1304
|
-
templates = []
|
1305
|
-
for template in schema.get("templates", []):
|
1306
|
-
cif = template["cif"]
|
1307
|
-
chain_id = template.get("chain_id")
|
1308
|
-
template_id = template.get("template_id")
|
1309
|
-
templates.append(ParsedTemplate(cif, chain_id, template_id))
|
1310
|
-
|
1311
|
-
# Create target
|
1312
|
-
target = Target(
|
1313
|
-
name=name,
|
1314
|
-
chains=chains,
|
1315
|
-
constraints=constraints,
|
1316
|
-
templates=templates,
|
1317
|
-
extra_mols=extra_mols,
|
1318
|
-
)
|
1112
|
+
# Update schema with generated properties
|
1113
|
+
schema["properties"] = new_properties
|
1319
1114
|
|
1320
|
-
return
|
1115
|
+
return schema
|
1321
1116
|
|
1322
1117
|
|
1323
1118
|
def standardize(smiles: str) -> Optional[str]:
|
boltz/main.py
CHANGED
@@ -1235,72 +1235,89 @@ def predict( # noqa: C901, PLR0915, PLR0912
|
|
1235
1235
|
# Print header
|
1236
1236
|
click.echo("\nPredicting property: affinity\n")
|
1237
1237
|
|
1238
|
-
#
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1238
|
+
# Group records by protein-ligand pairs
|
1239
|
+
affinity_groups = {}
|
1240
|
+
for record in manifest.records:
|
1241
|
+
if record.affinity:
|
1242
|
+
key = (record.affinity["binder"], record.affinity["ligand"])
|
1243
|
+
if key not in affinity_groups:
|
1244
|
+
affinity_groups[key] = []
|
1245
|
+
affinity_groups[key].append(record)
|
1246
|
+
|
1247
|
+
# Process each protein-ligand pair
|
1248
|
+
for (binder, ligand), records in affinity_groups.items():
|
1249
|
+
# Create subfolder for this protein-ligand pair
|
1250
|
+
pair_dir = out_dir / "predictions" / f"{binder}_{ligand}"
|
1251
|
+
pair_dir.mkdir(parents=True, exist_ok=True)
|
1252
|
+
|
1253
|
+
# Create manifest for this pair
|
1254
|
+
pair_manifest = Manifest(records)
|
1255
|
+
|
1256
|
+
# Validate inputs
|
1257
|
+
pair_manifest_filtered = filter_inputs_affinity(
|
1258
|
+
manifest=pair_manifest,
|
1259
|
+
outdir=pair_dir,
|
1260
|
+
override=override,
|
1261
|
+
)
|
1262
|
+
if not pair_manifest_filtered.records:
|
1263
|
+
click.echo(f"Found existing affinity predictions for {binder}_{ligand}, skipping.")
|
1264
|
+
continue
|
1256
1265
|
|
1257
|
-
|
1258
|
-
|
1259
|
-
target_dir=out_dir / "predictions",
|
1260
|
-
msa_dir=processed.msa_dir,
|
1261
|
-
mol_dir=mol_dir,
|
1262
|
-
num_workers=num_workers,
|
1263
|
-
constraints_dir=processed.constraints_dir,
|
1264
|
-
template_dir=processed.template_dir,
|
1265
|
-
extra_mols_dir=processed.extra_mols_dir,
|
1266
|
-
override_method="other",
|
1267
|
-
affinity=True,
|
1268
|
-
)
|
1266
|
+
msg = f"Running affinity prediction for {binder} with {ligand}"
|
1267
|
+
click.echo(msg)
|
1269
1268
|
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
"max_parallel_samples": 1,
|
1275
|
-
"write_confidence_summary": False,
|
1276
|
-
"write_full_pae": False,
|
1277
|
-
"write_full_pde": False,
|
1278
|
-
}
|
1269
|
+
pred_writer = BoltzAffinityWriter(
|
1270
|
+
data_dir=processed.targets_dir,
|
1271
|
+
output_dir=pair_dir,
|
1272
|
+
)
|
1279
1273
|
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1274
|
+
data_module = Boltz2InferenceDataModule(
|
1275
|
+
manifest=pair_manifest_filtered,
|
1276
|
+
target_dir=out_dir / "predictions",
|
1277
|
+
msa_dir=processed.msa_dir,
|
1278
|
+
mol_dir=mol_dir,
|
1279
|
+
num_workers=num_workers,
|
1280
|
+
constraints_dir=processed.constraints_dir,
|
1281
|
+
template_dir=processed.template_dir,
|
1282
|
+
extra_mols_dir=processed.extra_mols_dir,
|
1283
|
+
override_method="other",
|
1284
|
+
affinity=True,
|
1285
|
+
)
|
1283
1286
|
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1287
|
+
predict_affinity_args = {
|
1288
|
+
"recycling_steps": 5,
|
1289
|
+
"sampling_steps": sampling_steps_affinity,
|
1290
|
+
"diffusion_samples": diffusion_samples_affinity,
|
1291
|
+
"max_parallel_samples": 1,
|
1292
|
+
"write_confidence_summary": False,
|
1293
|
+
"write_full_pae": False,
|
1294
|
+
"write_full_pde": False,
|
1295
|
+
}
|
1296
|
+
|
1297
|
+
# Load affinity model
|
1298
|
+
if affinity_checkpoint is None:
|
1299
|
+
affinity_checkpoint = cache / "boltz2_aff.ckpt"
|
1300
|
+
|
1301
|
+
model_module = Boltz2.load_from_checkpoint(
|
1302
|
+
affinity_checkpoint,
|
1303
|
+
strict=True,
|
1304
|
+
predict_args=predict_affinity_args,
|
1305
|
+
map_location="cpu",
|
1306
|
+
diffusion_process_args=asdict(diffusion_params),
|
1307
|
+
ema=False,
|
1308
|
+
pairformer_args=asdict(pairformer_args),
|
1309
|
+
msa_args=asdict(msa_args),
|
1310
|
+
steering_args={"fk_steering": False, "guidance_update": False},
|
1311
|
+
affinity_mw_correction=affinity_mw_correction,
|
1312
|
+
)
|
1313
|
+
model_module.eval()
|
1297
1314
|
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1315
|
+
trainer.callbacks[0] = pred_writer
|
1316
|
+
trainer.predict(
|
1317
|
+
model_module,
|
1318
|
+
datamodule=data_module,
|
1319
|
+
return_predictions=False,
|
1320
|
+
)
|
1304
1321
|
|
1305
1322
|
|
1306
1323
|
if __name__ == "__main__":
|
@@ -1,8 +1,8 @@
|
|
1
1
|
boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
|
2
|
-
boltz/main.py,sha256=
|
2
|
+
boltz/main.py,sha256=VpCVMACmYA4nsJ9XFuh6JUFR0pdaZuqPWefjF5-Uh7U,42439
|
3
3
|
boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
|
5
|
-
boltz/data/mol.py,sha256=
|
5
|
+
boltz/data/mol.py,sha256=kPytx81filtBASGp7BOf9INvMqIijQaSh8HgU7JQsJ0,34398
|
6
6
|
boltz/data/pad.py,sha256=O4CGOOc5TwFuuWeP7hKjMIIsljdfLj-VJtXQeVXFx8s,2066
|
7
7
|
boltz/data/types.py,sha256=4w9brpOCQe16AyByNrxz7pjIzrgzFNihtik3aaHvKaE,21965
|
8
8
|
boltz/data/crop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -38,7 +38,7 @@ boltz/data/parse/csv.py,sha256=Hcq8rJW2njczahEr8jfd_o-zxLaNSgJ3YIoC9srIqpw,2518
|
|
38
38
|
boltz/data/parse/fasta.py,sha256=taI4s_CqPtyF0XaLJAsVAJHCL0GXm2g1g8Qeccdxikk,3906
|
39
39
|
boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,36822
|
40
40
|
boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
|
41
|
-
boltz/data/parse/schema.py,sha256=
|
41
|
+
boltz/data/parse/schema.py,sha256=5VANtvxFZ0FTelESWHA58QJ810XVfSdXHSB8YtJVCuw,37097
|
42
42
|
boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
|
43
43
|
boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
boltz/data/sample/cluster.py,sha256=9Sx8qP7zGZOAyEspwYFtCTbGTBZnuN-zfCKFbbA_6oI,8175
|
@@ -104,9 +104,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
|
|
104
104
|
boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
|
106
106
|
boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
|
107
|
-
boltz_vsynthes-1.0.
|
108
|
-
boltz_vsynthes-1.0.
|
109
|
-
boltz_vsynthes-1.0.
|
110
|
-
boltz_vsynthes-1.0.
|
111
|
-
boltz_vsynthes-1.0.
|
112
|
-
boltz_vsynthes-1.0.
|
107
|
+
boltz_vsynthes-1.0.9.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
|
108
|
+
boltz_vsynthes-1.0.9.dist-info/METADATA,sha256=_HnBtfMTbZT71l94bMVcMEEKq0InYcmMnuVFO0NQSHc,7171
|
109
|
+
boltz_vsynthes-1.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
110
|
+
boltz_vsynthes-1.0.9.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
|
111
|
+
boltz_vsynthes-1.0.9.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
|
112
|
+
boltz_vsynthes-1.0.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|