boltz-vsynthes 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. boltz/__init__.py +7 -0
  2. boltz/data/__init__.py +0 -0
  3. boltz/data/const.py +1184 -0
  4. boltz/data/crop/__init__.py +0 -0
  5. boltz/data/crop/affinity.py +164 -0
  6. boltz/data/crop/boltz.py +296 -0
  7. boltz/data/crop/cropper.py +45 -0
  8. boltz/data/feature/__init__.py +0 -0
  9. boltz/data/feature/featurizer.py +1230 -0
  10. boltz/data/feature/featurizerv2.py +2208 -0
  11. boltz/data/feature/symmetry.py +602 -0
  12. boltz/data/filter/__init__.py +0 -0
  13. boltz/data/filter/dynamic/__init__.py +0 -0
  14. boltz/data/filter/dynamic/date.py +76 -0
  15. boltz/data/filter/dynamic/filter.py +24 -0
  16. boltz/data/filter/dynamic/max_residues.py +37 -0
  17. boltz/data/filter/dynamic/resolution.py +34 -0
  18. boltz/data/filter/dynamic/size.py +38 -0
  19. boltz/data/filter/dynamic/subset.py +42 -0
  20. boltz/data/filter/static/__init__.py +0 -0
  21. boltz/data/filter/static/filter.py +26 -0
  22. boltz/data/filter/static/ligand.py +37 -0
  23. boltz/data/filter/static/polymer.py +299 -0
  24. boltz/data/module/__init__.py +0 -0
  25. boltz/data/module/inference.py +307 -0
  26. boltz/data/module/inferencev2.py +429 -0
  27. boltz/data/module/training.py +684 -0
  28. boltz/data/module/trainingv2.py +660 -0
  29. boltz/data/mol.py +900 -0
  30. boltz/data/msa/__init__.py +0 -0
  31. boltz/data/msa/mmseqs2.py +235 -0
  32. boltz/data/pad.py +84 -0
  33. boltz/data/parse/__init__.py +0 -0
  34. boltz/data/parse/a3m.py +134 -0
  35. boltz/data/parse/csv.py +100 -0
  36. boltz/data/parse/fasta.py +138 -0
  37. boltz/data/parse/mmcif.py +1239 -0
  38. boltz/data/parse/mmcif_with_constraints.py +1607 -0
  39. boltz/data/parse/schema.py +1851 -0
  40. boltz/data/parse/yaml.py +68 -0
  41. boltz/data/sample/__init__.py +0 -0
  42. boltz/data/sample/cluster.py +283 -0
  43. boltz/data/sample/distillation.py +57 -0
  44. boltz/data/sample/random.py +39 -0
  45. boltz/data/sample/sampler.py +49 -0
  46. boltz/data/tokenize/__init__.py +0 -0
  47. boltz/data/tokenize/boltz.py +195 -0
  48. boltz/data/tokenize/boltz2.py +396 -0
  49. boltz/data/tokenize/tokenizer.py +24 -0
  50. boltz/data/types.py +777 -0
  51. boltz/data/write/__init__.py +0 -0
  52. boltz/data/write/mmcif.py +305 -0
  53. boltz/data/write/pdb.py +171 -0
  54. boltz/data/write/utils.py +23 -0
  55. boltz/data/write/writer.py +330 -0
  56. boltz/main.py +1292 -0
  57. boltz/model/__init__.py +0 -0
  58. boltz/model/layers/__init__.py +0 -0
  59. boltz/model/layers/attention.py +132 -0
  60. boltz/model/layers/attentionv2.py +111 -0
  61. boltz/model/layers/confidence_utils.py +231 -0
  62. boltz/model/layers/dropout.py +34 -0
  63. boltz/model/layers/initialize.py +100 -0
  64. boltz/model/layers/outer_product_mean.py +98 -0
  65. boltz/model/layers/pair_averaging.py +135 -0
  66. boltz/model/layers/pairformer.py +337 -0
  67. boltz/model/layers/relative.py +58 -0
  68. boltz/model/layers/transition.py +78 -0
  69. boltz/model/layers/triangular_attention/__init__.py +0 -0
  70. boltz/model/layers/triangular_attention/attention.py +189 -0
  71. boltz/model/layers/triangular_attention/primitives.py +409 -0
  72. boltz/model/layers/triangular_attention/utils.py +380 -0
  73. boltz/model/layers/triangular_mult.py +212 -0
  74. boltz/model/loss/__init__.py +0 -0
  75. boltz/model/loss/bfactor.py +49 -0
  76. boltz/model/loss/confidence.py +590 -0
  77. boltz/model/loss/confidencev2.py +621 -0
  78. boltz/model/loss/diffusion.py +171 -0
  79. boltz/model/loss/diffusionv2.py +134 -0
  80. boltz/model/loss/distogram.py +48 -0
  81. boltz/model/loss/distogramv2.py +105 -0
  82. boltz/model/loss/validation.py +1025 -0
  83. boltz/model/models/__init__.py +0 -0
  84. boltz/model/models/boltz1.py +1286 -0
  85. boltz/model/models/boltz2.py +1249 -0
  86. boltz/model/modules/__init__.py +0 -0
  87. boltz/model/modules/affinity.py +223 -0
  88. boltz/model/modules/confidence.py +481 -0
  89. boltz/model/modules/confidence_utils.py +181 -0
  90. boltz/model/modules/confidencev2.py +495 -0
  91. boltz/model/modules/diffusion.py +844 -0
  92. boltz/model/modules/diffusion_conditioning.py +116 -0
  93. boltz/model/modules/diffusionv2.py +677 -0
  94. boltz/model/modules/encoders.py +639 -0
  95. boltz/model/modules/encodersv2.py +565 -0
  96. boltz/model/modules/transformers.py +322 -0
  97. boltz/model/modules/transformersv2.py +261 -0
  98. boltz/model/modules/trunk.py +688 -0
  99. boltz/model/modules/trunkv2.py +828 -0
  100. boltz/model/modules/utils.py +303 -0
  101. boltz/model/optim/__init__.py +0 -0
  102. boltz/model/optim/ema.py +389 -0
  103. boltz/model/optim/scheduler.py +99 -0
  104. boltz/model/potentials/__init__.py +0 -0
  105. boltz/model/potentials/potentials.py +497 -0
  106. boltz/model/potentials/schedules.py +32 -0
  107. boltz_vsynthes-1.0.0.dist-info/METADATA +151 -0
  108. boltz_vsynthes-1.0.0.dist-info/RECORD +112 -0
  109. boltz_vsynthes-1.0.0.dist-info/WHEEL +5 -0
  110. boltz_vsynthes-1.0.0.dist-info/entry_points.txt +2 -0
  111. boltz_vsynthes-1.0.0.dist-info/licenses/LICENSE +21 -0
  112. boltz_vsynthes-1.0.0.dist-info/top_level.txt +1 -0
boltz/data/const.py ADDED
@@ -0,0 +1,1184 @@
1
+ ####################################################################################################
2
+ # CHAINS
3
+ ####################################################################################################
4
+
5
+ chain_types = [
6
+ "PROTEIN",
7
+ "DNA",
8
+ "RNA",
9
+ "NONPOLYMER",
10
+ ]
11
+ chain_type_ids = {chain: i for i, chain in enumerate(chain_types)}
12
+
13
+ out_types = [
14
+ "dna_protein",
15
+ "rna_protein",
16
+ "ligand_protein",
17
+ "dna_ligand",
18
+ "rna_ligand",
19
+ "intra_ligand",
20
+ "intra_dna",
21
+ "intra_rna",
22
+ "intra_protein",
23
+ "protein_protein",
24
+ "modified",
25
+ ]
26
+
27
+ out_types_weights_af3 = {
28
+ "dna_protein": 10.0,
29
+ "rna_protein": 10.0,
30
+ "ligand_protein": 10.0,
31
+ "dna_ligand": 5.0,
32
+ "rna_ligand": 5.0,
33
+ "intra_ligand": 20.0,
34
+ "intra_dna": 4.0,
35
+ "intra_rna": 16.0,
36
+ "intra_protein": 20.0,
37
+ "protein_protein": 20.0,
38
+ "modified": 0.0,
39
+ }
40
+
41
+ out_types_weights = {
42
+ "dna_protein": 5.0,
43
+ "rna_protein": 5.0,
44
+ "ligand_protein": 20.0,
45
+ "dna_ligand": 2.0,
46
+ "rna_ligand": 2.0,
47
+ "intra_ligand": 20.0,
48
+ "intra_dna": 2.0,
49
+ "intra_rna": 8.0,
50
+ "intra_protein": 20.0,
51
+ "protein_protein": 20.0,
52
+ "modified": 0.0,
53
+ }
54
+
55
+
56
+ out_single_types = ["protein", "ligand", "dna", "rna"]
57
+
58
+ clash_types = [
59
+ "dna_protein",
60
+ "rna_protein",
61
+ "ligand_protein",
62
+ "protein_protein",
63
+ "dna_ligand",
64
+ "rna_ligand",
65
+ "ligand_ligand",
66
+ "rna_dna",
67
+ "dna_dna",
68
+ "rna_rna",
69
+ ]
70
+
71
+ chain_types_to_clash_type = {
72
+ frozenset(("PROTEIN", "DNA")): "dna_protein",
73
+ frozenset(("PROTEIN", "RNA")): "rna_protein",
74
+ frozenset(("PROTEIN", "NONPOLYMER")): "ligand_protein",
75
+ frozenset(("PROTEIN",)): "protein_protein",
76
+ frozenset(("NONPOLYMER", "DNA")): "dna_ligand",
77
+ frozenset(("NONPOLYMER", "RNA")): "rna_ligand",
78
+ frozenset(("NONPOLYMER",)): "ligand_ligand",
79
+ frozenset(("DNA", "RNA")): "rna_dna",
80
+ frozenset(("DNA",)): "dna_dna",
81
+ frozenset(("RNA",)): "rna_rna",
82
+ }
83
+
84
+ chain_type_to_out_single_type = {
85
+ "PROTEIN": "protein",
86
+ "DNA": "dna",
87
+ "RNA": "rna",
88
+ "NONPOLYMER": "ligand",
89
+ }
90
+ ####################################################################################################
91
+ # RESIDUES & TOKENS
92
+ ####################################################################################################
93
+
94
+
95
+ canonical_tokens = [
96
+ "ALA",
97
+ "ARG",
98
+ "ASN",
99
+ "ASP",
100
+ "CYS",
101
+ "GLN",
102
+ "GLU",
103
+ "GLY",
104
+ "HIS",
105
+ "ILE",
106
+ "LEU",
107
+ "LYS",
108
+ "MET",
109
+ "PHE",
110
+ "PRO",
111
+ "SER",
112
+ "THR",
113
+ "TRP",
114
+ "TYR",
115
+ "VAL",
116
+ "UNK", # unknown protein token
117
+ ]
118
+
119
+ tokens = [
120
+ "<pad>",
121
+ "-",
122
+ *canonical_tokens,
123
+ "A",
124
+ "G",
125
+ "C",
126
+ "U",
127
+ "N", # unknown rna token
128
+ "DA",
129
+ "DG",
130
+ "DC",
131
+ "DT",
132
+ "DN", # unknown dna token
133
+ ]
134
+
135
+ token_ids = {token: i for i, token in enumerate(tokens)}
136
+ num_tokens = len(tokens)
137
+ unk_token = {"PROTEIN": "UNK", "DNA": "DN", "RNA": "N"}
138
+ unk_token_ids = {m: token_ids[t] for m, t in unk_token.items()}
139
+
140
+ prot_letter_to_token = {
141
+ "A": "ALA",
142
+ "R": "ARG",
143
+ "N": "ASN",
144
+ "D": "ASP",
145
+ "C": "CYS",
146
+ "E": "GLU",
147
+ "Q": "GLN",
148
+ "G": "GLY",
149
+ "H": "HIS",
150
+ "I": "ILE",
151
+ "L": "LEU",
152
+ "K": "LYS",
153
+ "M": "MET",
154
+ "F": "PHE",
155
+ "P": "PRO",
156
+ "S": "SER",
157
+ "T": "THR",
158
+ "W": "TRP",
159
+ "Y": "TYR",
160
+ "V": "VAL",
161
+ "X": "UNK",
162
+ "J": "UNK",
163
+ "B": "UNK",
164
+ "Z": "UNK",
165
+ "O": "UNK",
166
+ "U": "UNK",
167
+ "-": "-",
168
+ }
169
+
170
+ prot_token_to_letter = {v: k for k, v in prot_letter_to_token.items()}
171
+ prot_token_to_letter["UNK"] = "X"
172
+
173
+ rna_letter_to_token = {
174
+ "A": "A",
175
+ "G": "G",
176
+ "C": "C",
177
+ "U": "U",
178
+ "N": "N",
179
+ }
180
+ rna_token_to_letter = {v: k for k, v in rna_letter_to_token.items()}
181
+
182
+ dna_letter_to_token = {
183
+ "A": "DA",
184
+ "G": "DG",
185
+ "C": "DC",
186
+ "T": "DT",
187
+ "N": "DN",
188
+ }
189
+ dna_token_to_letter = {v: k for k, v in dna_letter_to_token.items()}
190
+
191
+ ####################################################################################################
192
+ # ATOMS
193
+ ####################################################################################################
194
+
195
+ num_elements = 128
196
+
197
+ chirality_types = [
198
+ "CHI_UNSPECIFIED",
199
+ "CHI_TETRAHEDRAL_CW",
200
+ "CHI_TETRAHEDRAL_CCW",
201
+ "CHI_SQUAREPLANAR",
202
+ "CHI_OCTAHEDRAL",
203
+ "CHI_TRIGONALBIPYRAMIDAL",
204
+ "CHI_OTHER",
205
+ ]
206
+ chirality_type_ids = {chirality: i for i, chirality in enumerate(chirality_types)}
207
+ unk_chirality_type = "CHI_OTHER"
208
+
209
+ hybridization_map = [
210
+ "S",
211
+ "SP",
212
+ "SP2",
213
+ "SP2D",
214
+ "SP3",
215
+ "SP3D",
216
+ "SP3D2",
217
+ "OTHER",
218
+ "UNSPECIFIED",
219
+ ]
220
+ hybridization_type_ids = {hybrid: i for i, hybrid in enumerate(hybridization_map)}
221
+ unk_hybridization_type = "UNSPECIFIED"
222
+
223
+ # fmt: off
224
+ ref_atoms = {
225
+ "PAD": [],
226
+ "UNK": ["N", "CA", "C", "O", "CB"],
227
+ "-": [],
228
+ "ALA": ["N", "CA", "C", "O", "CB"],
229
+ "ARG": ["N", "CA", "C", "O", "CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"],
230
+ "ASN": ["N", "CA", "C", "O", "CB", "CG", "OD1", "ND2"],
231
+ "ASP": ["N", "CA", "C", "O", "CB", "CG", "OD1", "OD2"],
232
+ "CYS": ["N", "CA", "C", "O", "CB", "SG"],
233
+ "GLN": ["N", "CA", "C", "O", "CB", "CG", "CD", "OE1", "NE2"],
234
+ "GLU": ["N", "CA", "C", "O", "CB", "CG", "CD", "OE1", "OE2"],
235
+ "GLY": ["N", "CA", "C", "O"],
236
+ "HIS": ["N", "CA", "C", "O", "CB", "CG", "ND1", "CD2", "CE1", "NE2"],
237
+ "ILE": ["N", "CA", "C", "O", "CB", "CG1", "CG2", "CD1"],
238
+ "LEU": ["N", "CA", "C", "O", "CB", "CG", "CD1", "CD2"],
239
+ "LYS": ["N", "CA", "C", "O", "CB", "CG", "CD", "CE", "NZ"],
240
+ "MET": ["N", "CA", "C", "O", "CB", "CG", "SD", "CE"],
241
+ "PHE": ["N", "CA", "C", "O", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ"],
242
+ "PRO": ["N", "CA", "C", "O", "CB", "CG", "CD"],
243
+ "SER": ["N", "CA", "C", "O", "CB", "OG"],
244
+ "THR": ["N", "CA", "C", "O", "CB", "OG1", "CG2"],
245
+ "TRP": ["N", "CA", "C", "O", "CB", "CG", "CD1", "CD2", "NE1", "CE2", "CE3", "CZ2", "CZ3", "CH2"], # noqa: E501
246
+ "TYR": ["N", "CA", "C", "O", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "OH"],
247
+ "VAL": ["N", "CA", "C", "O", "CB", "CG1", "CG2"],
248
+ "A": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'", "N9", "C8", "N7", "C5", "C6", "N6", "N1", "C2", "N3", "C4"], # noqa: E501
249
+ "G": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'", "N9", "C8", "N7", "C5", "C6", "O6", "N1", "C2", "N2", "N3", "C4"], # noqa: E501
250
+ "C": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'", "N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"], # noqa: E501
251
+ "U": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'", "N1", "C2", "O2", "N3", "C4", "O4", "C5", "C6"], # noqa: E501
252
+ "N": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"], # noqa: E501
253
+ "DA": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'", "N9", "C8", "N7", "C5", "C6", "N6", "N1", "C2", "N3", "C4"], # noqa: E501
254
+ "DG": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'", "N9", "C8", "N7", "C5", "C6", "O6", "N1", "C2", "N2", "N3", "C4"], # noqa: E501
255
+ "DC": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'", "N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"], # noqa: E501
256
+ "DT": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'", "N1", "C2", "O2", "N3", "C4", "O4", "C5", "C7", "C6"], # noqa: E501
257
+ "DN": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'"]
258
+ }
259
+
260
+ protein_backbone_atom_names = ["N", "CA", "C", "O"]
261
+ nucleic_backbone_atom_names = ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"]
262
+
263
+ protein_backbone_atom_index = {name: i for i, name in enumerate(protein_backbone_atom_names)}
264
+ nucleic_backbone_atom_index = {name: i for i, name in enumerate(nucleic_backbone_atom_names)}
265
+
266
+ ref_symmetries = {
267
+ "PAD": [],
268
+ "ALA": [],
269
+ "ARG": [],
270
+ "ASN": [],
271
+ "ASP": [[(6, 7), (7, 6)]],
272
+ "CYS": [],
273
+ "GLN": [],
274
+ "GLU": [[(7, 8), (8, 7)]],
275
+ "GLY": [],
276
+ "HIS": [],
277
+ "ILE": [],
278
+ "LEU": [],
279
+ "LYS": [],
280
+ "MET": [],
281
+ "PHE": [[(6, 7), (7, 6), (8, 9), (9, 8)]],
282
+ "PRO": [],
283
+ "SER": [],
284
+ "THR": [],
285
+ "TRP": [],
286
+ "TYR": [[(6, 7), (7, 6), (8, 9), (9, 8)]],
287
+ "VAL": [],
288
+ "A": [[(1, 2), (2, 1)]],
289
+ "G": [[(1, 2), (2, 1)]],
290
+ "C": [[(1, 2), (2, 1)]],
291
+ "U": [[(1, 2), (2, 1)]],
292
+ #"N": [[(1, 2), (2, 1)]],
293
+ "DA": [[(1, 2), (2, 1)]],
294
+ "DG": [[(1, 2), (2, 1)]],
295
+ "DC": [[(1, 2), (2, 1)]],
296
+ "DT": [[(1, 2), (2, 1)]],
297
+ #"DN": [[(1, 2), (2, 1)]]
298
+ }
299
+
300
+
301
+ res_to_center_atom = {
302
+ "UNK": "CA",
303
+ "ALA": "CA",
304
+ "ARG": "CA",
305
+ "ASN": "CA",
306
+ "ASP": "CA",
307
+ "CYS": "CA",
308
+ "GLN": "CA",
309
+ "GLU": "CA",
310
+ "GLY": "CA",
311
+ "HIS": "CA",
312
+ "ILE": "CA",
313
+ "LEU": "CA",
314
+ "LYS": "CA",
315
+ "MET": "CA",
316
+ "PHE": "CA",
317
+ "PRO": "CA",
318
+ "SER": "CA",
319
+ "THR": "CA",
320
+ "TRP": "CA",
321
+ "TYR": "CA",
322
+ "VAL": "CA",
323
+ "A": "C1'",
324
+ "G": "C1'",
325
+ "C": "C1'",
326
+ "U": "C1'",
327
+ "N": "C1'",
328
+ "DA": "C1'",
329
+ "DG": "C1'",
330
+ "DC": "C1'",
331
+ "DT": "C1'",
332
+ "DN": "C1'"
333
+ }
334
+
335
+ res_to_disto_atom = {
336
+ "UNK": "CB",
337
+ "ALA": "CB",
338
+ "ARG": "CB",
339
+ "ASN": "CB",
340
+ "ASP": "CB",
341
+ "CYS": "CB",
342
+ "GLN": "CB",
343
+ "GLU": "CB",
344
+ "GLY": "CA",
345
+ "HIS": "CB",
346
+ "ILE": "CB",
347
+ "LEU": "CB",
348
+ "LYS": "CB",
349
+ "MET": "CB",
350
+ "PHE": "CB",
351
+ "PRO": "CB",
352
+ "SER": "CB",
353
+ "THR": "CB",
354
+ "TRP": "CB",
355
+ "TYR": "CB",
356
+ "VAL": "CB",
357
+ "A": "C4",
358
+ "G": "C4",
359
+ "C": "C2",
360
+ "U": "C2",
361
+ "N": "C1'",
362
+ "DA": "C4",
363
+ "DG": "C4",
364
+ "DC": "C2",
365
+ "DT": "C2",
366
+ "DN": "C1'"
367
+ }
368
+
369
+ res_to_center_atom_id = {
370
+ res: ref_atoms[res].index(atom)
371
+ for res, atom in res_to_center_atom.items()
372
+ }
373
+
374
+ res_to_disto_atom_id = {
375
+ res: ref_atoms[res].index(atom)
376
+ for res, atom in res_to_disto_atom.items()
377
+ }
378
+
379
+ # fmt: on
380
+
381
+ ####################################################################################################
382
+ # BONDS
383
+ ####################################################################################################
384
+
385
+ atom_interface_cutoff = 5.0
386
+ interface_cutoff = 15.0
387
+
388
+ bond_types = [
389
+ "OTHER",
390
+ "SINGLE",
391
+ "DOUBLE",
392
+ "TRIPLE",
393
+ "AROMATIC",
394
+ "COVALENT",
395
+ ]
396
+ bond_type_ids = {bond: i for i, bond in enumerate(bond_types)}
397
+ unk_bond_type = "OTHER"
398
+
399
+
400
+ ####################################################################################################
401
+ # Contacts
402
+ ####################################################################################################
403
+
404
+
405
+ pocket_contact_info = {
406
+ "UNSPECIFIED": 0,
407
+ "UNSELECTED": 1,
408
+ "POCKET": 2,
409
+ "BINDER": 3,
410
+ }
411
+
412
+ contact_conditioning_info = {
413
+ "UNSPECIFIED": 0,
414
+ "UNSELECTED": 1,
415
+ "POCKET>BINDER": 2,
416
+ "BINDER>POCKET": 3,
417
+ "CONTACT": 4,
418
+ }
419
+
420
+
421
+ ####################################################################################################
422
+ # MSA
423
+ ####################################################################################################
424
+
425
+ max_msa_seqs = 16384
426
+ max_paired_seqs = 8192
427
+
428
+
429
+ ####################################################################################################
430
+ # CHUNKING
431
+ ####################################################################################################
432
+
433
+ chunk_size_threshold = 384
434
+
435
+ ####################################################################################################
436
+ # Method conditioning
437
+ ####################################################################################################
438
+
439
+ # Methods
440
+ method_types_ids = {
441
+ "MD": 0,
442
+ "X-RAY DIFFRACTION": 1,
443
+ "ELECTRON MICROSCOPY": 2,
444
+ "SOLUTION NMR": 3,
445
+ "SOLID-STATE NMR": 4,
446
+ "NEUTRON DIFFRACTION": 4,
447
+ "ELECTRON CRYSTALLOGRAPHY": 4,
448
+ "FIBER DIFFRACTION": 4,
449
+ "POWDER DIFFRACTION": 4,
450
+ "INFRARED SPECTROSCOPY": 4,
451
+ "FLUORESCENCE TRANSFER": 4,
452
+ "EPR": 4,
453
+ "THEORETICAL MODEL": 4,
454
+ "SOLUTION SCATTERING": 4,
455
+ "OTHER": 4,
456
+ "AFDB": 5,
457
+ "BOLTZ-1": 6,
458
+ "FUTURE1": 7, # Placeholder for future supervision sources
459
+ "FUTURE2": 8,
460
+ "FUTURE3": 9,
461
+ "FUTURE4": 10,
462
+ "FUTURE5": 11,
463
+ }
464
+ method_types_ids = {k.lower(): v for k, v in method_types_ids.items()}
465
+ num_method_types = len(set(method_types_ids.values()))
466
+
467
+ # Temperature
468
+ temperature_bins = [(265, 280), (280, 295), (295, 310)]
469
+ temperature_bins_ids = {temp: i for i, temp in enumerate(temperature_bins)}
470
+ temperature_bins_ids["other"] = len(temperature_bins)
471
+ num_temp_bins = len(temperature_bins_ids)
472
+
473
+
474
+ # pH
475
+ ph_bins = [(0, 6), (6, 8), (8, 14)]
476
+ ph_bins_ids = {ph: i for i, ph in enumerate(ph_bins)}
477
+ ph_bins_ids["other"] = len(ph_bins)
478
+ num_ph_bins = len(ph_bins_ids)
479
+
480
+ ####################################################################################################
481
+ # VDW_RADII
482
+ ####################################################################################################
483
+
484
+ # fmt: off
485
+ vdw_radii = [
486
+ 1.2, 1.4, 2.2, 1.9, 1.8, 1.7, 1.6, 1.55, 1.5, 1.54,
487
+ 2.4, 2.2, 2.1, 2.1, 1.95, 1.8, 1.8, 1.88, 2.8, 2.4,
488
+ 2.3, 2.15, 2.05, 2.05, 2.05, 2.05, 2.0, 2.0, 2.0, 2.1,
489
+ 2.1, 2.1, 2.05, 1.9, 1.9, 2.02, 2.9, 2.55, 2.4, 2.3,
490
+ 2.15, 2.1, 2.05, 2.05, 2.0, 2.05, 2.1, 2.2, 2.2, 2.25,
491
+ 2.2, 2.1, 2.1, 2.16, 3.0, 2.7, 2.5, 2.48, 2.47, 2.45,
492
+ 2.43, 2.42, 2.4, 2.38, 2.37, 2.35, 2.33, 2.32, 2.3, 2.28,
493
+ 2.27, 2.25, 2.2, 2.1, 2.05, 2.0, 2.0, 2.05, 2.1, 2.05,
494
+ 2.2, 2.3, 2.3, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.4,
495
+ 2.0, 2.3, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
496
+ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
497
+ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
498
+ ]
499
+ # fmt: on
500
+
501
+ ####################################################################################################
502
+ # Excluded ligands
503
+ ####################################################################################################
504
+
505
+ ligand_exclusion = {
506
+ "144",
507
+ "15P",
508
+ "1PE",
509
+ "2F2",
510
+ "2JC",
511
+ "3HR",
512
+ "3SY",
513
+ "7N5",
514
+ "7PE",
515
+ "9JE",
516
+ "AAE",
517
+ "ABA",
518
+ "ACE",
519
+ "ACN",
520
+ "ACT",
521
+ "ACY",
522
+ "AZI",
523
+ "BAM",
524
+ "BCN",
525
+ "BCT",
526
+ "BDN",
527
+ "BEN",
528
+ "BME",
529
+ "BO3",
530
+ "BTB",
531
+ "BTC",
532
+ "BU1",
533
+ "C8E",
534
+ "CAD",
535
+ "CAQ",
536
+ "CBM",
537
+ "CCN",
538
+ "CIT",
539
+ "CL",
540
+ "CLR",
541
+ "CM",
542
+ "CMO",
543
+ "CO3",
544
+ "CPT",
545
+ "CXS",
546
+ "D10",
547
+ "DEP",
548
+ "DIO",
549
+ "DMS",
550
+ "DN",
551
+ "DOD",
552
+ "DOX",
553
+ "EDO",
554
+ "EEE",
555
+ "EGL",
556
+ "EOH",
557
+ "EOX",
558
+ "EPE",
559
+ "ETF",
560
+ "FCY",
561
+ "FJO",
562
+ "FLC",
563
+ "FMT",
564
+ "FW5",
565
+ "GOL",
566
+ "GSH",
567
+ "GTT",
568
+ "GYF",
569
+ "HED",
570
+ "IHP",
571
+ "IHS",
572
+ "IMD",
573
+ "IOD",
574
+ "IPA",
575
+ "IPH",
576
+ "LDA",
577
+ "MB3",
578
+ "MEG",
579
+ "MES",
580
+ "MLA",
581
+ "MLI",
582
+ "MOH",
583
+ "MPD",
584
+ "MRD",
585
+ "MSE",
586
+ "MYR",
587
+ "N",
588
+ "NA",
589
+ "NH2",
590
+ "NH4",
591
+ "NHE",
592
+ "NO3",
593
+ "O4B",
594
+ "OHE",
595
+ "OLA",
596
+ "OLC",
597
+ "OMB",
598
+ "OME",
599
+ "OXA",
600
+ "P6G",
601
+ "PE3",
602
+ "PE4",
603
+ "PEG",
604
+ "PEO",
605
+ "PEP",
606
+ "PG0",
607
+ "PG4",
608
+ "PGE",
609
+ "PGR",
610
+ "PLM",
611
+ "PO4",
612
+ "POL",
613
+ "POP",
614
+ "PVO",
615
+ "SAR",
616
+ "SCN",
617
+ "SEO",
618
+ "SEP",
619
+ "SIN",
620
+ "SO4",
621
+ "SPD",
622
+ "SPM",
623
+ "SR",
624
+ "STE",
625
+ "STO",
626
+ "STU",
627
+ "TAR",
628
+ "TBU",
629
+ "TME",
630
+ "TPO",
631
+ "TRS",
632
+ "UNK",
633
+ "UNL",
634
+ "UNX",
635
+ "UPL",
636
+ "URE",
637
+ }
638
+
639
+
640
+ ####################################################################################################
641
+ # TEMPLATES
642
+ ####################################################################################################
643
+
644
+ min_coverage_residues = 10
645
+ min_coverage_fraction = 0.1
646
+
647
+
648
+ ####################################################################################################
649
+ # Ambiguous atoms
650
+ ####################################################################################################
651
+
652
+ ambiguous_atoms = {
653
+ "CA": {
654
+ "*": "C",
655
+ "OEX": "CA",
656
+ "OEC": "CA",
657
+ "543": "CA",
658
+ "OC6": "CA",
659
+ "OC1": "CA",
660
+ "OC7": "CA",
661
+ "OEY": "CA",
662
+ "OC4": "CA",
663
+ "OC3": "CA",
664
+ "ICA": "CA",
665
+ "CA": "CA",
666
+ "OC2": "CA",
667
+ "OC5": "CA",
668
+ },
669
+ "CD": {"*": "C", "CD": "CD", "CD3": "CD", "CD5": "CD", "CD1": "CD"},
670
+ "BR": "BR",
671
+ "CL": {
672
+ "*": "CL",
673
+ "C8P": "C",
674
+ "L3T": "C",
675
+ "TLC": "C",
676
+ "TZ0": "C",
677
+ "471": "C",
678
+ "NLK": "C",
679
+ "PGM": "C",
680
+ "PNE": "C",
681
+ "RCY": "C",
682
+ "11F": "C",
683
+ "PII": "C",
684
+ "C1Q": "C",
685
+ "4MD": "C",
686
+ "R5A": "C",
687
+ "KW2": "C",
688
+ "I7M": "C",
689
+ "R48": "C",
690
+ "FC3": "C",
691
+ "55V": "C",
692
+ "KPF": "C",
693
+ "SPZ": "C",
694
+ "0TT": "C",
695
+ "R9A": "C",
696
+ "5NA": "C",
697
+ "C55": "C",
698
+ "NIX": "C",
699
+ "5PM": "C",
700
+ "PP8": "C",
701
+ "544": "C",
702
+ "812": "C",
703
+ "NPM": "C",
704
+ "KU8": "C",
705
+ "A1AMM": "C",
706
+ "4S0": "C",
707
+ "AQC": "C",
708
+ "2JK": "C",
709
+ "WJR": "C",
710
+ "A1AAW": "C",
711
+ "85E": "C",
712
+ "MB0": "C",
713
+ "ZAB": "C",
714
+ "85K": "C",
715
+ "GBP": "C",
716
+ "A1H80": "C",
717
+ "A1AFR": "C",
718
+ "L9M": "C",
719
+ "MYK": "C",
720
+ "MB9": "C",
721
+ "38R": "C",
722
+ "EKB": "C",
723
+ "NKF": "C",
724
+ "UMQ": "C",
725
+ "T4K": "C",
726
+ "3PT": "C",
727
+ "A1A7S": "C",
728
+ "1Q9": "C",
729
+ "11R": "C",
730
+ "D2V": "C",
731
+ "SM8": "C",
732
+ "IFC": "C",
733
+ "DB5": "C",
734
+ "L2T": "C",
735
+ "GNB": "C",
736
+ "PP7": "C",
737
+ "072": "C",
738
+ "P88": "C",
739
+ "DRL": "C",
740
+ "C9W": "C",
741
+ "NTP": "C",
742
+ "4HJ": "C",
743
+ "7NA": "C",
744
+ "LPC": "C",
745
+ "T8W": "C",
746
+ "63R": "C",
747
+ "570": "C",
748
+ "R4A": "C",
749
+ "3BG": "C",
750
+ "4RB": "C",
751
+ "GSO": "C",
752
+ "BQ6": "C",
753
+ "R4P": "C",
754
+ "5CP": "C",
755
+ "TTR": "C",
756
+ "6UZ": "C",
757
+ "SPJ": "C",
758
+ "0SA": "C",
759
+ "ZL1": "C",
760
+ "BYG": "C",
761
+ "F0E": "C",
762
+ "PC0": "C",
763
+ "B2Q": "C",
764
+ "KV6": "C",
765
+ "NTO": "C",
766
+ "CLG": "C",
767
+ "R7U": "C",
768
+ "SMQ": "C",
769
+ "GM2": "C",
770
+ "Z7P": "C",
771
+ "NXF": "C",
772
+ "C6Q": "C",
773
+ "A1G": "C",
774
+ "433": "C",
775
+ "L9N": "C",
776
+ "7OX": "C",
777
+ "A1H84": "C",
778
+ "97L": "C",
779
+ "HDV": "C",
780
+ "LUO": "C",
781
+ "R6A": "C",
782
+ "1PC": "C",
783
+ "4PT": "C",
784
+ "SBZ": "C",
785
+ "EAB": "C",
786
+ "FL4": "C",
787
+ "OPS": "C",
788
+ "C2X": "C",
789
+ "SLL": "C",
790
+ "BFC": "C",
791
+ "GIP": "C",
792
+ "7CP": "C",
793
+ "CLH": "C",
794
+ "34E": "C",
795
+ "5NE": "C",
796
+ "PBF": "C",
797
+ "ABD": "C",
798
+ "ABC": "C",
799
+ "LPF": "C",
800
+ "TIZ": "C",
801
+ "4HH": "C",
802
+ "AFC": "C",
803
+ "WQH": "C",
804
+ "9JL": "C",
805
+ "CS3": "C",
806
+ "NL0": "C",
807
+ "KPY": "C",
808
+ "DNA": "C",
809
+ "B3C": "C",
810
+ "TKL": "C",
811
+ "KVS": "C",
812
+ "HO6": "C",
813
+ "NLH": "C",
814
+ "1PB": "C",
815
+ "CYF": "C",
816
+ "G4M": "C",
817
+ "R5B": "C",
818
+ "N4S": "C",
819
+ "N11": "C",
820
+ "C8F": "C",
821
+ "PIJ": "C",
822
+ "WIN": "C",
823
+ "NT1": "C",
824
+ "WJW": "C",
825
+ "HF7": "C",
826
+ "TY1": "C",
827
+ "VM1": "C",
828
+ },
829
+ "OS": {"*": "O", "DWC": "OS", "OHX": "OS", "OS": "OS", "8WV": "OS", "OS4": "OS"},
830
+ "PB": {"*": "P", "ZN9": "PB", "ZN7": "PB", "PBM": "PB", "PB": "PB", "CSB": "PB"},
831
+ "CE": {"*": "C", "CE": "CE"},
832
+ "FE": {"*": "FE", "TFR": "F", "PF5": "F", "IFC": "F", "F5C": "F"},
833
+ "NA": {"*": "N", "CGO": "NA", "R2K": "NA", "LVQ": "NA", "NA": "NA"},
834
+ "ND": {"*": "N", "ND": "ND"},
835
+ "CF": {"*": "C", "CF": "CF"},
836
+ "RU": "RU",
837
+ "BRAF": "BR",
838
+ "EU": "EU",
839
+ "CLAA": "CL",
840
+ "CLBQ": "CL",
841
+ "CM": {"*": "C", "ZCM": "CM"},
842
+ "SN": {"*": "SN", "TAP": "S", "SND": "S", "TAD": "S", "XPT": "S"},
843
+ "AG": "AG",
844
+ "CLN": "CL",
845
+ "CLM": "CL",
846
+ "CLA": {"*": "CL", "PII": "C", "TDL": "C", "D0J": "C", "GM2": "C", "PIJ": "C"},
847
+ "CLB": {
848
+ "*": "CL",
849
+ "TD5": "C",
850
+ "PII": "C",
851
+ "TDL": "C",
852
+ "GM2": "C",
853
+ "TD7": "C",
854
+ "TD6": "C",
855
+ "PIJ": "C",
856
+ },
857
+ "CR": {
858
+ "*": "C",
859
+ "BW9": "CR",
860
+ "CQ4": "CR",
861
+ "AC9": "CR",
862
+ "TIL": "CR",
863
+ "J7U": "CR",
864
+ "CR": "CR",
865
+ },
866
+ "CLAY": "CL",
867
+ "CLBC": "CL",
868
+ "PD": {
869
+ "*": "P",
870
+ "F6Q": "PD",
871
+ "SVP": "PD",
872
+ "SXC": "PD",
873
+ "U5U": "PD",
874
+ "PD": "PD",
875
+ "PLL": "PD",
876
+ },
877
+ "CO": {
878
+ "*": "C",
879
+ "J1S": "CO",
880
+ "OCN": "CO",
881
+ "OL3": "CO",
882
+ "OL4": "CO",
883
+ "B12": "CO",
884
+ "XCO": "CO",
885
+ "UFU": "CO",
886
+ "CON": "CO",
887
+ "OL5": "CO",
888
+ "B13": "CO",
889
+ "7KI": "CO",
890
+ "PL1": "CO",
891
+ "OCO": "CO",
892
+ "J1R": "CO",
893
+ "COH": "CO",
894
+ "SIR": "CO",
895
+ "6KI": "CO",
896
+ "NCO": "CO",
897
+ "9CO": "CO",
898
+ "PC3": "CO",
899
+ "BWU": "CO",
900
+ "B1Z": "CO",
901
+ "J83": "CO",
902
+ "CO": "CO",
903
+ "COY": "CO",
904
+ "CNC": "CO",
905
+ "3CO": "CO",
906
+ "OCL": "CO",
907
+ "R5Q": "CO",
908
+ "X5Z": "CO",
909
+ "CBY": "CO",
910
+ "OLS": "CO",
911
+ "F0X": "CO",
912
+ "I2A": "CO",
913
+ "OCM": "CO",
914
+ },
915
+ "CU": {
916
+ "*": "C",
917
+ "8ZR": "CU",
918
+ "K7E": "CU",
919
+ "CU3": "CU",
920
+ "SI9": "CU",
921
+ "35N": "CU",
922
+ "C2O": "CU",
923
+ "SI7": "CU",
924
+ "B15": "CU",
925
+ "SI0": "CU",
926
+ "CUP": "CU",
927
+ "SQ1": "CU",
928
+ "CUK": "CU",
929
+ "CUL": "CU",
930
+ "SI8": "CU",
931
+ "IC4": "CU",
932
+ "CUM": "CU",
933
+ "MM2": "CU",
934
+ "B30": "CU",
935
+ "S32": "CU",
936
+ "V79": "CU",
937
+ "IMF": "CU",
938
+ "CUN": "CU",
939
+ "MM1": "CU",
940
+ "MP1": "CU",
941
+ "IME": "CU",
942
+ "B17": "CU",
943
+ "C2C": "CU",
944
+ "1CU": "CU",
945
+ "CU6": "CU",
946
+ "C1O": "CU",
947
+ "CU1": "CU",
948
+ "B22": "CU",
949
+ "CUS": "CU",
950
+ "RUQ": "CU",
951
+ "CUF": "CU",
952
+ "CUA": "CU",
953
+ "CU": "CU",
954
+ "CUO": "CU",
955
+ "0TE": "CU",
956
+ "SI4": "CU",
957
+ },
958
+ "CS": {"*": "C", "CS": "CS"},
959
+ "CLQ": "CL",
960
+ "CLR": "CL",
961
+ "CLU": "CL",
962
+ "TE": "TE",
963
+ "NI": {
964
+ "*": "N",
965
+ "USN": "NI",
966
+ "NFO": "NI",
967
+ "NI2": "NI",
968
+ "NFS": "NI",
969
+ "NFR": "NI",
970
+ "82N": "NI",
971
+ "R5N": "NI",
972
+ "NFU": "NI",
973
+ "A1ICD": "NI",
974
+ "NI3": "NI",
975
+ "M43": "NI",
976
+ "MM5": "NI",
977
+ "BF8": "NI",
978
+ "TCN": "NI",
979
+ "NIK": "NI",
980
+ "CUV": "NI",
981
+ "MM6": "NI",
982
+ "J52": "NI",
983
+ "NI": "NI",
984
+ "SNF": "NI",
985
+ "XCC": "NI",
986
+ "F0L": "NI",
987
+ "UWE": "NI",
988
+ "NFC": "NI",
989
+ "3NI": "NI",
990
+ "HNI": "NI",
991
+ "F43": "NI",
992
+ "RQM": "NI",
993
+ "NFE": "NI",
994
+ "NFB": "NI",
995
+ "B51": "NI",
996
+ "NI1": "NI",
997
+ "WCC": "NI",
998
+ "NUF": "NI",
999
+ },
1000
+ "SB": {"*": "S", "UJI": "SB", "SB": "SB", "118": "SB", "SBO": "SB", "3CG": "SB"},
1001
+ "MO": "MO",
1002
+ "SEG": "SE",
1003
+ "CLL": "CL",
1004
+ "CLAH": "CL",
1005
+ "CLC": {
1006
+ "*": "CL",
1007
+ "TD5": "C",
1008
+ "PII": "C",
1009
+ "TDL": "C",
1010
+ "GM2": "C",
1011
+ "TD7": "C",
1012
+ "TD6": "C",
1013
+ "PIJ": "C",
1014
+ },
1015
+ "CLD": {"*": "CL", "PII": "C", "GM2": "C", "PIJ": "C"},
1016
+ "CLAD": "CL",
1017
+ "CLAE": "CL",
1018
+ "LA": "LA",
1019
+ "RH": "RH",
1020
+ "BRAC": "BR",
1021
+ "BRAD": "BR",
1022
+ "CLBN": "CL",
1023
+ "CLAC": "CL",
1024
+ "BRAB": "BR",
1025
+ "BRAE": "BR",
1026
+ "MG": "MG",
1027
+ "IR": "IR",
1028
+ "SE": {
1029
+ "*": "SE",
1030
+ "HII": "S",
1031
+ "NT2": "S",
1032
+ "R2P": "S",
1033
+ "S2P": "S",
1034
+ "0IU": "S",
1035
+ "QMB": "S",
1036
+ "81S": "S",
1037
+ "0QB": "S",
1038
+ "UB4": "S",
1039
+ "OHS": "S",
1040
+ "Q78": "S",
1041
+ "0Y2": "S",
1042
+ "B3M": "S",
1043
+ "NT1": "S",
1044
+ "81R": "S",
1045
+ },
1046
+ "BRAG": "BR",
1047
+ "CLF": {"*": "CL", "PII": "C", "GM2": "C", "PIJ": "C"},
1048
+ "CLE": {"*": "CL", "PII": "C", "GM2": "C", "PIJ": "C"},
1049
+ "BRAX": "BR",
1050
+ "CLK": "CL",
1051
+ "ZN": "ZN",
1052
+ "AS": "AS",
1053
+ "AU": "AU",
1054
+ "PT": "PT",
1055
+ "CLAS": "CL",
1056
+ "MN": "MN",
1057
+ "CLBE": "CL",
1058
+ "CLBF": "CL",
1059
+ "CLAF": "CL",
1060
+ "NA'": {"*": "N", "CGO": "NA"},
1061
+ "BRAH": "BR",
1062
+ "BRAI": "BR",
1063
+ "BRA": "BR",
1064
+ "BRB": "BR",
1065
+ "BRAV": "BR",
1066
+ "HG": {
1067
+ "*": "HG",
1068
+ "BBA": "H",
1069
+ "MID": "H",
1070
+ "APM": "H",
1071
+ "4QQ": "H",
1072
+ "0ZG": "H",
1073
+ "APH": "H",
1074
+ },
1075
+ "AR": "AR",
1076
+ "D": "H",
1077
+ "CLAN": "CL",
1078
+ "SI": "SI",
1079
+ "CLS": "CL",
1080
+ "ZR": "ZR",
1081
+ "CLAR": {"*": "CL", "ZM4": "C"},
1082
+ "HO": "HO",
1083
+ "CLI": {"*": "CL", "GM2": "C"},
1084
+ "CLH": {"*": "CL", "GM2": "C"},
1085
+ "CLAP": "CL",
1086
+ "CLBL": "CL",
1087
+ "CLBM": "CL",
1088
+ "PR": {"*": "PR", "UF0": "P", "252": "P"},
1089
+ "IN": "IN",
1090
+ "CLJ": "CL",
1091
+ "BRU": "BR",
1092
+ "SC": {"*": "S", "SFL": "SC"},
1093
+ "CLG": {"*": "CL", "GM2": "C"},
1094
+ "BRAT": "BR",
1095
+ "BRAR": "BR",
1096
+ "CLAG": "CL",
1097
+ "CLAB": "CL",
1098
+ "CLV": "CL",
1099
+ "TI": "TI",
1100
+ "CLAX": "CL",
1101
+ "CLAJ": "CL",
1102
+ "CL'": {"*": "CL", "BNR": "C", "25A": "C", "BDA": "C"},
1103
+ "CLAW": "CL",
1104
+ "BRF": "BR",
1105
+ "BRE": "BR",
1106
+ "RE": "RE",
1107
+ "GD": "GD",
1108
+ "SM": {"*": "S", "SM": "SM"},
1109
+ "CLBH": "CL",
1110
+ "CLBI": "CL",
1111
+ "CLAI": "CL",
1112
+ "CLY": "CL",
1113
+ "CLZ": "CL",
1114
+ "AC": "AC",
1115
+ "BR'": "BR",
1116
+ "CLT": "CL",
1117
+ "CLO": "CL",
1118
+ "CLP": "CL",
1119
+ "LU": "LU",
1120
+ "BA": {"*": "B", "BA": "BA"},
1121
+ "CLAU": "CL",
1122
+ "RB": "RB",
1123
+ "LI": "LI",
1124
+ "MOM": "MO",
1125
+ "BRAQ": "BR",
1126
+ "SR": {"*": "S", "SR": "SR", "OER": "SR"},
1127
+ "CLAT": "CL",
1128
+ "BRAL": "BR",
1129
+ "SEB": "SE",
1130
+ "CLW": "CL",
1131
+ "CLX": "CL",
1132
+ "BE": "BE",
1133
+ "BRG": "BR",
1134
+ "SEA": "SE",
1135
+ "BRAW": "BR",
1136
+ "BRBB": "BR",
1137
+ "ER": "ER",
1138
+ "TH": "TH",
1139
+ "BRR": "BR",
1140
+ "CLBV": "CL",
1141
+ "AL": "AL",
1142
+ "CLAV": "CL",
1143
+ "BRH": "BR",
1144
+ "CLAQ": "CL",
1145
+ "GA": "GA",
1146
+ "X": "*",
1147
+ "TL": "TL",
1148
+ "CLBB": "CL",
1149
+ "TB": "TB",
1150
+ "CLAK": "CL",
1151
+ "XE": {"*": "*", "XE": "XE"},
1152
+ "SEL": "SE",
1153
+ "PU": {"*": "P", "4PU": "PU"},
1154
+ "CLAZ": "CL",
1155
+ "SE'": "SE",
1156
+ "CLBA": "CL",
1157
+ "SEN": "SE",
1158
+ "SNN": "SN",
1159
+ "MOB": "MO",
1160
+ "YB": "YB",
1161
+ "BRC": "BR",
1162
+ "BRD": "BR",
1163
+ "CLAM": "CL",
1164
+ "DA": "H",
1165
+ "DB": "H",
1166
+ "DC": "H",
1167
+ "DXT": "H",
1168
+ "DXU": "H",
1169
+ "DXX": "H",
1170
+ "DXY": "H",
1171
+ "DXZ": "H",
1172
+ "DY": "DY",
1173
+ "TA": "TA",
1174
+ "XD": "*",
1175
+ "SED": "SE",
1176
+ "CLAL": "CL",
1177
+ "BRAJ": "BR",
1178
+ "AM": "AM",
1179
+ "CLAO": "CL",
1180
+ "BI": "BI",
1181
+ "KR": "KR",
1182
+ "BRBJ": "BR",
1183
+ "UNK": "*",
1184
+ }