boltz-vsynthes 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltz/data/mol.py +4 -0
- boltz/data/parse/schema.py +82 -295
- boltz/main.py +78 -61
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.8.dist-info}/METADATA +1 -1
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.8.dist-info}/RECORD +9 -9
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.8.dist-info}/WHEEL +0 -0
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.8.dist-info}/entry_points.txt +0 -0
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.8.dist-info}/licenses/LICENSE +0 -0
- {boltz_vsynthes-1.0.7.dist-info → boltz_vsynthes-1.0.8.dist-info}/top_level.txt +0 -0
boltz/data/mol.py
CHANGED
@@ -30,6 +30,10 @@ def load_molecules(moldir: str, molecules: list[str]) -> dict[str, Mol]:
|
|
30
30
|
"""
|
31
31
|
loaded_mols = {}
|
32
32
|
for molecule in molecules:
|
33
|
+
# Skip if it's a SMILES string (starts with LIG)
|
34
|
+
if molecule.startswith("LIG"):
|
35
|
+
continue
|
36
|
+
|
33
37
|
path = Path(moldir) / f"{molecule}.pkl"
|
34
38
|
if not path.exists():
|
35
39
|
msg = f"CCD component {molecule} not found!"
|
boltz/data/parse/schema.py
CHANGED
@@ -621,6 +621,9 @@ def get_mol(ccd: str, mols: dict, moldir: str) -> Mol:
|
|
621
621
|
Return mol with ccd from mols if it is in mols. Otherwise load it from moldir,
|
622
622
|
add it to mols, and return the mol.
|
623
623
|
"""
|
624
|
+
# Skip if it's a SMILES string (starts with LIG)
|
625
|
+
if ccd.startswith("LIG"):
|
626
|
+
return None
|
624
627
|
mol = mols.get(ccd)
|
625
628
|
if mol is None:
|
626
629
|
mol = load_molecules(moldir, [ccd])[ccd]
|
@@ -655,6 +658,10 @@ def parse_ccd_residue(
|
|
655
658
|
The output ParsedResidue, if successful.
|
656
659
|
|
657
660
|
"""
|
661
|
+
# Skip if it's a SMILES string (starts with LIG)
|
662
|
+
if name.startswith("LIG"):
|
663
|
+
return None
|
664
|
+
|
658
665
|
unk_chirality = const.chirality_type_ids[const.unk_chirality_type]
|
659
666
|
|
660
667
|
# Check if this is a single heavy atom CCD residue
|
@@ -929,83 +936,29 @@ def token_spec_to_ids(
|
|
929
936
|
contacts.append((chain_to_idx[chain_name], residue_index_or_atom_name - 1))
|
930
937
|
|
931
938
|
|
932
|
-
def parse_boltz_schema(
|
933
|
-
|
934
|
-
schema: dict,
|
935
|
-
ccd: Mapping[str, Mol],
|
936
|
-
mol_dir: Optional[Path] = None,
|
937
|
-
boltz_2: bool = False,
|
938
|
-
) -> Target:
|
939
|
-
"""Parse a Boltz input yaml / json.
|
940
|
-
|
941
|
-
The input file should be a dictionary with the following format:
|
942
|
-
|
943
|
-
version: 1
|
944
|
-
sequences:
|
945
|
-
- protein:
|
946
|
-
id: A
|
947
|
-
sequence: "MADQLTEEQIAEFKEAFSLF" # or pdb: "1a2k" or pdb: "path/to/file.pdb"
|
948
|
-
msa: path/to/msa1.a3m
|
949
|
-
- protein:
|
950
|
-
id: [B, C]
|
951
|
-
sequence: "AKLSILPWGHC"
|
952
|
-
msa: path/to/msa2.a3m
|
953
|
-
- rna:
|
954
|
-
id: D
|
955
|
-
sequence: "GCAUAGC"
|
956
|
-
- ligand:
|
957
|
-
id: E
|
958
|
-
smiles: "CC1=CC=CC=C1"
|
959
|
-
constraints:
|
960
|
-
- bond:
|
961
|
-
atom1: [A, 1, CA]
|
962
|
-
atom2: [A, 2, N]
|
963
|
-
- pocket:
|
964
|
-
binder: E
|
965
|
-
contacts: [[B, 1], [B, 2]]
|
966
|
-
max_distance: 6
|
967
|
-
- contact:
|
968
|
-
token1: [A, 1]
|
969
|
-
token2: [B, 1]
|
970
|
-
max_distance: 6
|
971
|
-
templates:
|
972
|
-
- cif: path/to/template.cif
|
973
|
-
properties:
|
974
|
-
- affinity:
|
975
|
-
binder: E
|
939
|
+
def parse_boltz_schema(schema: dict) -> dict:
|
940
|
+
"""Parse the Boltz input schema.
|
976
941
|
|
977
942
|
Parameters
|
978
943
|
----------
|
979
|
-
name : str
|
980
|
-
A name for the input.
|
981
944
|
schema : dict
|
982
945
|
The input schema.
|
983
|
-
components : dict
|
984
|
-
Dictionary of CCD components.
|
985
|
-
mol_dir: Path
|
986
|
-
Path to the directory containing the molecules.
|
987
|
-
boltz2: bool
|
988
|
-
Whether to parse the input for Boltz2.
|
989
946
|
|
990
947
|
Returns
|
991
948
|
-------
|
992
|
-
|
993
|
-
The parsed
|
949
|
+
dict
|
950
|
+
The parsed schema.
|
994
951
|
|
995
952
|
"""
|
996
|
-
#
|
997
|
-
|
998
|
-
|
999
|
-
msg = f"Invalid version {version} in input!"
|
953
|
+
# Check version
|
954
|
+
if "version" not in schema:
|
955
|
+
msg = "Schema must have a version field"
|
1000
956
|
raise ValueError(msg)
|
1001
957
|
|
1002
|
-
#
|
1003
|
-
blocker = rdBase.BlockLogs() # noqa: F841
|
1004
|
-
|
1005
|
-
# First group items that have the same type, sequence and modifications
|
958
|
+
# Group items by entity type and sequence
|
1006
959
|
items_to_group = {}
|
1007
960
|
chain_name_to_entity_type = {}
|
1008
|
-
|
961
|
+
|
1009
962
|
# Keep track of ligand IDs
|
1010
963
|
ligand_id = 1
|
1011
964
|
ligand_id_map = {}
|
@@ -1057,19 +1010,37 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1057
1010
|
msg = "Protein must have either 'sequence' or 'pdb' field"
|
1058
1011
|
raise ValueError(msg)
|
1059
1012
|
elif entity_type == "ligand":
|
1060
|
-
|
1061
|
-
assert "smiles" not in item[entity_type] or "ccd" not in item[entity_type]
|
1013
|
+
# Support for SMILES, CCD, and SDF
|
1062
1014
|
if "smiles" in item[entity_type]:
|
1063
1015
|
seq = str(item[entity_type]["smiles"])
|
1064
1016
|
# Map user-provided ID to internal LIG1, LIG2, etc.
|
1065
1017
|
for id in entity_id:
|
1066
1018
|
ligand_id_map[id] = f"LIG{ligand_id}"
|
1067
1019
|
ligand_id += 1
|
1068
|
-
|
1020
|
+
elif "ccd" in item[entity_type]:
|
1069
1021
|
seq = str(item[entity_type]["ccd"])
|
1070
1022
|
# For CCD ligands, use the CCD code as the internal ID
|
1071
1023
|
for id in entity_id:
|
1072
1024
|
ligand_id_map[id] = seq
|
1025
|
+
elif "sdf" in item[entity_type]:
|
1026
|
+
sdf_path = Path(item[entity_type]["sdf"])
|
1027
|
+
if not sdf_path.exists():
|
1028
|
+
msg = f"SDF file not found: {sdf_path}"
|
1029
|
+
raise FileNotFoundError(msg)
|
1030
|
+
# Read SDF and convert to SMILES
|
1031
|
+
from rdkit import Chem
|
1032
|
+
mol = Chem.SDMolSupplier(str(sdf_path))[0]
|
1033
|
+
if mol is None:
|
1034
|
+
msg = f"Failed to read SDF file: {sdf_path}"
|
1035
|
+
raise ValueError(msg)
|
1036
|
+
seq = Chem.MolToSmiles(mol)
|
1037
|
+
# Map user-provided ID to internal LIG1, LIG2, etc.
|
1038
|
+
for id in entity_id:
|
1039
|
+
ligand_id_map[id] = f"LIG{ligand_id}"
|
1040
|
+
ligand_id += 1
|
1041
|
+
else:
|
1042
|
+
msg = "Ligand must have either 'smiles', 'ccd', or 'sdf' field"
|
1043
|
+
raise ValueError(msg)
|
1073
1044
|
|
1074
1045
|
# Group items by entity
|
1075
1046
|
items_to_group.setdefault((entity_type, seq), []).append(item)
|
@@ -1080,244 +1051,60 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
|
|
1080
1051
|
for chain_name in chain_names:
|
1081
1052
|
chain_name_to_entity_type[chain_name] = entity_type
|
1082
1053
|
|
1083
|
-
#
|
1084
|
-
|
1085
|
-
properties = schema.get("properties", [])
|
1086
|
-
|
1087
|
-
# Get all ligands
|
1054
|
+
# Get all proteins and ligands
|
1055
|
+
proteins = []
|
1088
1056
|
ligands = []
|
1089
1057
|
for item in schema["sequences"]:
|
1090
1058
|
entity_type = list(item.keys())[0]
|
1091
|
-
|
1092
|
-
entity_id = item[entity_type]["id"]
|
1093
|
-
entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
|
1094
|
-
ligands.extend(entity_id)
|
1095
|
-
|
1096
|
-
# Get user-specified binders
|
1097
|
-
specified_binders = set()
|
1098
|
-
for prop in properties:
|
1099
|
-
if "affinity" in prop:
|
1100
|
-
binder = prop["affinity"]["binder"]
|
1101
|
-
specified_binders.add(binder)
|
1102
|
-
|
1103
|
-
# If no binders specified, use all proteins
|
1104
|
-
if not specified_binders:
|
1105
|
-
for item in schema["sequences"]:
|
1106
|
-
entity_type = list(item.keys())[0]
|
1107
|
-
if entity_type == "protein":
|
1108
|
-
entity_id = item[entity_type]["id"]
|
1109
|
-
entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
|
1110
|
-
specified_binders.update(entity_id)
|
1111
|
-
|
1112
|
-
# Generate protein-ligand pairs for specified binders
|
1113
|
-
new_properties = []
|
1114
|
-
for binder in specified_binders:
|
1115
|
-
for ligand in ligands:
|
1116
|
-
if ligand in ligand_id_map:
|
1117
|
-
ligand = ligand_id_map[ligand] # Convert to internal LIG1, LIG2, etc.
|
1118
|
-
affinity_ligands.add(ligand)
|
1119
|
-
new_properties.append({
|
1120
|
-
"affinity": {
|
1121
|
-
"binder": binder,
|
1122
|
-
"ligand": ligand
|
1123
|
-
}
|
1124
|
-
})
|
1125
|
-
|
1126
|
-
# Update schema with generated properties
|
1127
|
-
schema["properties"] = new_properties
|
1128
|
-
|
1129
|
-
# Parse each group
|
1130
|
-
chains = []
|
1131
|
-
extra_mols = {}
|
1132
|
-
for (entity_type, seq), items in items_to_group.items():
|
1133
|
-
# Get entity id
|
1134
|
-
entity_id = items[0][entity_type]["id"]
|
1059
|
+
entity_id = item[entity_type]["id"]
|
1135
1060
|
entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
|
1136
|
-
|
1137
|
-
# Check if this entity has affinity
|
1138
|
-
affinity = any(entity in affinity_ligands for entity in entity_id)
|
1139
|
-
|
1140
|
-
# Parse a protein
|
1141
1061
|
if entity_type == "protein":
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
msa = Path(msa)
|
1146
|
-
if not msa.exists():
|
1147
|
-
msg = f"MSA file not found: {msa}"
|
1148
|
-
raise FileNotFoundError(msg)
|
1149
|
-
with msa.open("r") as f:
|
1150
|
-
msa_data = f.read()
|
1151
|
-
else:
|
1152
|
-
msa_data = None
|
1153
|
-
|
1154
|
-
# Parse sequence
|
1155
|
-
residues = []
|
1156
|
-
for res_idx, code in enumerate(seq):
|
1157
|
-
# Get mol
|
1158
|
-
ref_mol = get_mol(code, ccd, mol_dir)
|
1159
|
-
|
1160
|
-
# Parse residue
|
1161
|
-
residue = parse_ccd_residue(
|
1162
|
-
name=code,
|
1163
|
-
ref_mol=ref_mol,
|
1164
|
-
res_idx=res_idx,
|
1165
|
-
)
|
1166
|
-
residues.append(residue)
|
1167
|
-
|
1168
|
-
# Create protein chain
|
1169
|
-
parsed_chain = ParsedChain(
|
1170
|
-
entity=entity_id,
|
1171
|
-
residues=residues,
|
1172
|
-
type=const.chain_type_ids["PROTEIN"],
|
1173
|
-
cyclic_period=0,
|
1174
|
-
sequence=seq,
|
1175
|
-
affinity=affinity,
|
1176
|
-
affinity_mw=None,
|
1177
|
-
)
|
1178
|
-
|
1179
|
-
# Parse a non-polymer
|
1180
|
-
elif (entity_type == "ligand") and "ccd" in (items[0][entity_type]):
|
1181
|
-
seq = items[0][entity_type]["ccd"]
|
1182
|
-
|
1183
|
-
if isinstance(seq, str):
|
1184
|
-
seq = [seq]
|
1185
|
-
|
1186
|
-
if affinity and len(seq) > 1:
|
1187
|
-
msg = "Cannot compute affinity for multi residue ligands!"
|
1188
|
-
raise ValueError(msg)
|
1189
|
-
|
1190
|
-
residues = []
|
1191
|
-
affinity_mw = None
|
1192
|
-
for res_idx, code in enumerate(seq):
|
1193
|
-
# Get mol
|
1194
|
-
ref_mol = get_mol(code, ccd, mol_dir)
|
1195
|
-
|
1196
|
-
if affinity:
|
1197
|
-
affinity_mw = AllChem.Descriptors.MolWt(ref_mol)
|
1198
|
-
|
1199
|
-
# Parse residue
|
1200
|
-
residue = parse_ccd_residue(
|
1201
|
-
name=code,
|
1202
|
-
ref_mol=ref_mol,
|
1203
|
-
res_idx=res_idx,
|
1204
|
-
)
|
1205
|
-
residues.append(residue)
|
1206
|
-
|
1207
|
-
# Create multi ligand chain
|
1208
|
-
parsed_chain = ParsedChain(
|
1209
|
-
entity=entity_id,
|
1210
|
-
residues=residues,
|
1211
|
-
type=const.chain_type_ids["NONPOLYMER"],
|
1212
|
-
cyclic_period=0,
|
1213
|
-
sequence=None,
|
1214
|
-
affinity=affinity,
|
1215
|
-
affinity_mw=affinity_mw,
|
1216
|
-
)
|
1217
|
-
|
1218
|
-
assert not items[0][entity_type].get(
|
1219
|
-
"cyclic", False
|
1220
|
-
), "Cyclic flag is not supported for ligands"
|
1221
|
-
|
1222
|
-
elif (entity_type == "ligand") and ("smiles" in items[0][entity_type]):
|
1223
|
-
seq = items[0][entity_type]["smiles"]
|
1224
|
-
|
1225
|
-
if affinity:
|
1226
|
-
seq = standardize(seq)
|
1227
|
-
|
1228
|
-
mol = AllChem.MolFromSmiles(seq)
|
1229
|
-
mol = AllChem.AddHs(mol)
|
1062
|
+
proteins.extend(entity_id)
|
1063
|
+
elif entity_type == "ligand":
|
1064
|
+
ligands.extend(entity_id)
|
1230
1065
|
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1066
|
+
# Generate properties for each protein-ligand pair
|
1067
|
+
new_properties = []
|
1068
|
+
for prop in schema.get("properties", []):
|
1069
|
+
if "affinity" in prop:
|
1070
|
+
affinity = prop["affinity"]
|
1071
|
+
# Handle protein as binder
|
1072
|
+
if "protein" in affinity:
|
1073
|
+
binder = affinity["protein"]
|
1074
|
+
if binder not in proteins:
|
1075
|
+
msg = f"Protein {binder} not found in sequences"
|
1240
1076
|
raise ValueError(msg)
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
#
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
affinity_mw=affinity_mw,
|
1268
|
-
)
|
1269
|
-
|
1270
|
-
assert not items[0][entity_type].get(
|
1271
|
-
"cyclic", False
|
1272
|
-
), "Cyclic flag is not supported for ligands"
|
1273
|
-
|
1274
|
-
else:
|
1275
|
-
msg = f"Invalid entity type: {entity_type}"
|
1276
|
-
raise ValueError(msg)
|
1277
|
-
|
1278
|
-
chains.append(parsed_chain)
|
1077
|
+
# Generate pairs with all ligands
|
1078
|
+
for ligand in ligands:
|
1079
|
+
if ligand in ligand_id_map:
|
1080
|
+
ligand = ligand_id_map[ligand] # Convert to internal LIG1, LIG2, etc.
|
1081
|
+
new_properties.append({
|
1082
|
+
"affinity": {
|
1083
|
+
"binder": binder,
|
1084
|
+
"ligand": ligand
|
1085
|
+
}
|
1086
|
+
})
|
1087
|
+
# Handle ligand as binder (backward compatibility)
|
1088
|
+
elif "binder" in affinity:
|
1089
|
+
binder = affinity["binder"]
|
1090
|
+
if binder not in proteins:
|
1091
|
+
msg = f"Protein {binder} not found in sequences"
|
1092
|
+
raise ValueError(msg)
|
1093
|
+
# Generate pairs with all ligands
|
1094
|
+
for ligand in ligands:
|
1095
|
+
if ligand in ligand_id_map:
|
1096
|
+
ligand = ligand_id_map[ligand] # Convert to internal LIG1, LIG2, etc.
|
1097
|
+
new_properties.append({
|
1098
|
+
"affinity": {
|
1099
|
+
"binder": binder,
|
1100
|
+
"ligand": ligand
|
1101
|
+
}
|
1102
|
+
})
|
1279
1103
|
|
1280
|
-
#
|
1281
|
-
|
1282
|
-
for constraint in schema.get("constraints", []):
|
1283
|
-
if "bond" in constraint:
|
1284
|
-
atom1 = constraint["bond"]["atom1"]
|
1285
|
-
atom2 = constraint["bond"]["atom2"]
|
1286
|
-
constraints.append(ParsedBond(atom1, atom2))
|
1287
|
-
elif "pocket" in constraint:
|
1288
|
-
binder = constraint["pocket"]["binder"]
|
1289
|
-
if binder in ligand_id_map:
|
1290
|
-
binder = ligand_id_map[binder] # Convert to internal LIG1, LIG2, etc.
|
1291
|
-
contacts = constraint["pocket"]["contacts"]
|
1292
|
-
max_distance = constraint["pocket"].get("max_distance", 6.0)
|
1293
|
-
constraints.append(ParsedPocket(binder, contacts, max_distance))
|
1294
|
-
elif "contact" in constraint:
|
1295
|
-
token1 = constraint["contact"]["token1"]
|
1296
|
-
token2 = constraint["contact"]["token2"]
|
1297
|
-
max_distance = constraint["contact"].get("max_distance", 6.0)
|
1298
|
-
constraints.append(ParsedContact(token1, token2, max_distance))
|
1299
|
-
else:
|
1300
|
-
msg = f"Invalid constraint type: {list(constraint.keys())[0]}"
|
1301
|
-
raise ValueError(msg)
|
1302
|
-
|
1303
|
-
# Parse templates
|
1304
|
-
templates = []
|
1305
|
-
for template in schema.get("templates", []):
|
1306
|
-
cif = template["cif"]
|
1307
|
-
chain_id = template.get("chain_id")
|
1308
|
-
template_id = template.get("template_id")
|
1309
|
-
templates.append(ParsedTemplate(cif, chain_id, template_id))
|
1310
|
-
|
1311
|
-
# Create target
|
1312
|
-
target = Target(
|
1313
|
-
name=name,
|
1314
|
-
chains=chains,
|
1315
|
-
constraints=constraints,
|
1316
|
-
templates=templates,
|
1317
|
-
extra_mols=extra_mols,
|
1318
|
-
)
|
1104
|
+
# Update schema with generated properties
|
1105
|
+
schema["properties"] = new_properties
|
1319
1106
|
|
1320
|
-
return
|
1107
|
+
return schema
|
1321
1108
|
|
1322
1109
|
|
1323
1110
|
def standardize(smiles: str) -> Optional[str]:
|
boltz/main.py
CHANGED
@@ -1235,72 +1235,89 @@ def predict( # noqa: C901, PLR0915, PLR0912
|
|
1235
1235
|
# Print header
|
1236
1236
|
click.echo("\nPredicting property: affinity\n")
|
1237
1237
|
|
1238
|
-
#
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1238
|
+
# Group records by protein-ligand pairs
|
1239
|
+
affinity_groups = {}
|
1240
|
+
for record in manifest.records:
|
1241
|
+
if record.affinity:
|
1242
|
+
key = (record.affinity["binder"], record.affinity["ligand"])
|
1243
|
+
if key not in affinity_groups:
|
1244
|
+
affinity_groups[key] = []
|
1245
|
+
affinity_groups[key].append(record)
|
1246
|
+
|
1247
|
+
# Process each protein-ligand pair
|
1248
|
+
for (binder, ligand), records in affinity_groups.items():
|
1249
|
+
# Create subfolder for this protein-ligand pair
|
1250
|
+
pair_dir = out_dir / "predictions" / f"{binder}_{ligand}"
|
1251
|
+
pair_dir.mkdir(parents=True, exist_ok=True)
|
1252
|
+
|
1253
|
+
# Create manifest for this pair
|
1254
|
+
pair_manifest = Manifest(records)
|
1255
|
+
|
1256
|
+
# Validate inputs
|
1257
|
+
pair_manifest_filtered = filter_inputs_affinity(
|
1258
|
+
manifest=pair_manifest,
|
1259
|
+
outdir=pair_dir,
|
1260
|
+
override=override,
|
1261
|
+
)
|
1262
|
+
if not pair_manifest_filtered.records:
|
1263
|
+
click.echo(f"Found existing affinity predictions for {binder}_{ligand}, skipping.")
|
1264
|
+
continue
|
1256
1265
|
|
1257
|
-
|
1258
|
-
|
1259
|
-
target_dir=out_dir / "predictions",
|
1260
|
-
msa_dir=processed.msa_dir,
|
1261
|
-
mol_dir=mol_dir,
|
1262
|
-
num_workers=num_workers,
|
1263
|
-
constraints_dir=processed.constraints_dir,
|
1264
|
-
template_dir=processed.template_dir,
|
1265
|
-
extra_mols_dir=processed.extra_mols_dir,
|
1266
|
-
override_method="other",
|
1267
|
-
affinity=True,
|
1268
|
-
)
|
1266
|
+
msg = f"Running affinity prediction for {binder} with {ligand}"
|
1267
|
+
click.echo(msg)
|
1269
1268
|
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
"max_parallel_samples": 1,
|
1275
|
-
"write_confidence_summary": False,
|
1276
|
-
"write_full_pae": False,
|
1277
|
-
"write_full_pde": False,
|
1278
|
-
}
|
1269
|
+
pred_writer = BoltzAffinityWriter(
|
1270
|
+
data_dir=processed.targets_dir,
|
1271
|
+
output_dir=pair_dir,
|
1272
|
+
)
|
1279
1273
|
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1274
|
+
data_module = Boltz2InferenceDataModule(
|
1275
|
+
manifest=pair_manifest_filtered,
|
1276
|
+
target_dir=out_dir / "predictions",
|
1277
|
+
msa_dir=processed.msa_dir,
|
1278
|
+
mol_dir=mol_dir,
|
1279
|
+
num_workers=num_workers,
|
1280
|
+
constraints_dir=processed.constraints_dir,
|
1281
|
+
template_dir=processed.template_dir,
|
1282
|
+
extra_mols_dir=processed.extra_mols_dir,
|
1283
|
+
override_method="other",
|
1284
|
+
affinity=True,
|
1285
|
+
)
|
1283
1286
|
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1287
|
+
predict_affinity_args = {
|
1288
|
+
"recycling_steps": 5,
|
1289
|
+
"sampling_steps": sampling_steps_affinity,
|
1290
|
+
"diffusion_samples": diffusion_samples_affinity,
|
1291
|
+
"max_parallel_samples": 1,
|
1292
|
+
"write_confidence_summary": False,
|
1293
|
+
"write_full_pae": False,
|
1294
|
+
"write_full_pde": False,
|
1295
|
+
}
|
1296
|
+
|
1297
|
+
# Load affinity model
|
1298
|
+
if affinity_checkpoint is None:
|
1299
|
+
affinity_checkpoint = cache / "boltz2_aff.ckpt"
|
1300
|
+
|
1301
|
+
model_module = Boltz2.load_from_checkpoint(
|
1302
|
+
affinity_checkpoint,
|
1303
|
+
strict=True,
|
1304
|
+
predict_args=predict_affinity_args,
|
1305
|
+
map_location="cpu",
|
1306
|
+
diffusion_process_args=asdict(diffusion_params),
|
1307
|
+
ema=False,
|
1308
|
+
pairformer_args=asdict(pairformer_args),
|
1309
|
+
msa_args=asdict(msa_args),
|
1310
|
+
steering_args={"fk_steering": False, "guidance_update": False},
|
1311
|
+
affinity_mw_correction=affinity_mw_correction,
|
1312
|
+
)
|
1313
|
+
model_module.eval()
|
1297
1314
|
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1315
|
+
trainer.callbacks[0] = pred_writer
|
1316
|
+
trainer.predict(
|
1317
|
+
model_module,
|
1318
|
+
datamodule=data_module,
|
1319
|
+
return_predictions=False,
|
1320
|
+
)
|
1304
1321
|
|
1305
1322
|
|
1306
1323
|
if __name__ == "__main__":
|
@@ -1,8 +1,8 @@
|
|
1
1
|
boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
|
2
|
-
boltz/main.py,sha256=
|
2
|
+
boltz/main.py,sha256=VpCVMACmYA4nsJ9XFuh6JUFR0pdaZuqPWefjF5-Uh7U,42439
|
3
3
|
boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
|
5
|
-
boltz/data/mol.py,sha256=
|
5
|
+
boltz/data/mol.py,sha256=kPytx81filtBASGp7BOf9INvMqIijQaSh8HgU7JQsJ0,34398
|
6
6
|
boltz/data/pad.py,sha256=O4CGOOc5TwFuuWeP7hKjMIIsljdfLj-VJtXQeVXFx8s,2066
|
7
7
|
boltz/data/types.py,sha256=4w9brpOCQe16AyByNrxz7pjIzrgzFNihtik3aaHvKaE,21965
|
8
8
|
boltz/data/crop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -38,7 +38,7 @@ boltz/data/parse/csv.py,sha256=Hcq8rJW2njczahEr8jfd_o-zxLaNSgJ3YIoC9srIqpw,2518
|
|
38
38
|
boltz/data/parse/fasta.py,sha256=taI4s_CqPtyF0XaLJAsVAJHCL0GXm2g1g8Qeccdxikk,3906
|
39
39
|
boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,36822
|
40
40
|
boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
|
41
|
-
boltz/data/parse/schema.py,sha256=
|
41
|
+
boltz/data/parse/schema.py,sha256=3SKeHB5ao-I9Sznytwqxb7-n-dIXtTdKY3VrRp8lIZs,36766
|
42
42
|
boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
|
43
43
|
boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
boltz/data/sample/cluster.py,sha256=9Sx8qP7zGZOAyEspwYFtCTbGTBZnuN-zfCKFbbA_6oI,8175
|
@@ -104,9 +104,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
|
|
104
104
|
boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
105
|
boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
|
106
106
|
boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
|
107
|
-
boltz_vsynthes-1.0.
|
108
|
-
boltz_vsynthes-1.0.
|
109
|
-
boltz_vsynthes-1.0.
|
110
|
-
boltz_vsynthes-1.0.
|
111
|
-
boltz_vsynthes-1.0.
|
112
|
-
boltz_vsynthes-1.0.
|
107
|
+
boltz_vsynthes-1.0.8.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
|
108
|
+
boltz_vsynthes-1.0.8.dist-info/METADATA,sha256=GBCIrSt4OpP4gbp7hYXESfvaeU7JzADqpefMfza3ScI,7171
|
109
|
+
boltz_vsynthes-1.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
110
|
+
boltz_vsynthes-1.0.8.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
|
111
|
+
boltz_vsynthes-1.0.8.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
|
112
|
+
boltz_vsynthes-1.0.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|