boltz-vsynthes 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltz/__init__.py +7 -0
- boltz/data/__init__.py +0 -0
- boltz/data/const.py +1184 -0
- boltz/data/crop/__init__.py +0 -0
- boltz/data/crop/affinity.py +164 -0
- boltz/data/crop/boltz.py +296 -0
- boltz/data/crop/cropper.py +45 -0
- boltz/data/feature/__init__.py +0 -0
- boltz/data/feature/featurizer.py +1230 -0
- boltz/data/feature/featurizerv2.py +2208 -0
- boltz/data/feature/symmetry.py +602 -0
- boltz/data/filter/__init__.py +0 -0
- boltz/data/filter/dynamic/__init__.py +0 -0
- boltz/data/filter/dynamic/date.py +76 -0
- boltz/data/filter/dynamic/filter.py +24 -0
- boltz/data/filter/dynamic/max_residues.py +37 -0
- boltz/data/filter/dynamic/resolution.py +34 -0
- boltz/data/filter/dynamic/size.py +38 -0
- boltz/data/filter/dynamic/subset.py +42 -0
- boltz/data/filter/static/__init__.py +0 -0
- boltz/data/filter/static/filter.py +26 -0
- boltz/data/filter/static/ligand.py +37 -0
- boltz/data/filter/static/polymer.py +299 -0
- boltz/data/module/__init__.py +0 -0
- boltz/data/module/inference.py +307 -0
- boltz/data/module/inferencev2.py +429 -0
- boltz/data/module/training.py +684 -0
- boltz/data/module/trainingv2.py +660 -0
- boltz/data/mol.py +900 -0
- boltz/data/msa/__init__.py +0 -0
- boltz/data/msa/mmseqs2.py +235 -0
- boltz/data/pad.py +84 -0
- boltz/data/parse/__init__.py +0 -0
- boltz/data/parse/a3m.py +134 -0
- boltz/data/parse/csv.py +100 -0
- boltz/data/parse/fasta.py +138 -0
- boltz/data/parse/mmcif.py +1239 -0
- boltz/data/parse/mmcif_with_constraints.py +1607 -0
- boltz/data/parse/schema.py +1851 -0
- boltz/data/parse/yaml.py +68 -0
- boltz/data/sample/__init__.py +0 -0
- boltz/data/sample/cluster.py +283 -0
- boltz/data/sample/distillation.py +57 -0
- boltz/data/sample/random.py +39 -0
- boltz/data/sample/sampler.py +49 -0
- boltz/data/tokenize/__init__.py +0 -0
- boltz/data/tokenize/boltz.py +195 -0
- boltz/data/tokenize/boltz2.py +396 -0
- boltz/data/tokenize/tokenizer.py +24 -0
- boltz/data/types.py +777 -0
- boltz/data/write/__init__.py +0 -0
- boltz/data/write/mmcif.py +305 -0
- boltz/data/write/pdb.py +171 -0
- boltz/data/write/utils.py +23 -0
- boltz/data/write/writer.py +330 -0
- boltz/main.py +1292 -0
- boltz/model/__init__.py +0 -0
- boltz/model/layers/__init__.py +0 -0
- boltz/model/layers/attention.py +132 -0
- boltz/model/layers/attentionv2.py +111 -0
- boltz/model/layers/confidence_utils.py +231 -0
- boltz/model/layers/dropout.py +34 -0
- boltz/model/layers/initialize.py +100 -0
- boltz/model/layers/outer_product_mean.py +98 -0
- boltz/model/layers/pair_averaging.py +135 -0
- boltz/model/layers/pairformer.py +337 -0
- boltz/model/layers/relative.py +58 -0
- boltz/model/layers/transition.py +78 -0
- boltz/model/layers/triangular_attention/__init__.py +0 -0
- boltz/model/layers/triangular_attention/attention.py +189 -0
- boltz/model/layers/triangular_attention/primitives.py +409 -0
- boltz/model/layers/triangular_attention/utils.py +380 -0
- boltz/model/layers/triangular_mult.py +212 -0
- boltz/model/loss/__init__.py +0 -0
- boltz/model/loss/bfactor.py +49 -0
- boltz/model/loss/confidence.py +590 -0
- boltz/model/loss/confidencev2.py +621 -0
- boltz/model/loss/diffusion.py +171 -0
- boltz/model/loss/diffusionv2.py +134 -0
- boltz/model/loss/distogram.py +48 -0
- boltz/model/loss/distogramv2.py +105 -0
- boltz/model/loss/validation.py +1025 -0
- boltz/model/models/__init__.py +0 -0
- boltz/model/models/boltz1.py +1286 -0
- boltz/model/models/boltz2.py +1249 -0
- boltz/model/modules/__init__.py +0 -0
- boltz/model/modules/affinity.py +223 -0
- boltz/model/modules/confidence.py +481 -0
- boltz/model/modules/confidence_utils.py +181 -0
- boltz/model/modules/confidencev2.py +495 -0
- boltz/model/modules/diffusion.py +844 -0
- boltz/model/modules/diffusion_conditioning.py +116 -0
- boltz/model/modules/diffusionv2.py +677 -0
- boltz/model/modules/encoders.py +639 -0
- boltz/model/modules/encodersv2.py +565 -0
- boltz/model/modules/transformers.py +322 -0
- boltz/model/modules/transformersv2.py +261 -0
- boltz/model/modules/trunk.py +688 -0
- boltz/model/modules/trunkv2.py +828 -0
- boltz/model/modules/utils.py +303 -0
- boltz/model/optim/__init__.py +0 -0
- boltz/model/optim/ema.py +389 -0
- boltz/model/optim/scheduler.py +99 -0
- boltz/model/potentials/__init__.py +0 -0
- boltz/model/potentials/potentials.py +497 -0
- boltz/model/potentials/schedules.py +32 -0
- boltz_vsynthes-1.0.0.dist-info/METADATA +151 -0
- boltz_vsynthes-1.0.0.dist-info/RECORD +112 -0
- boltz_vsynthes-1.0.0.dist-info/WHEEL +5 -0
- boltz_vsynthes-1.0.0.dist-info/entry_points.txt +2 -0
- boltz_vsynthes-1.0.0.dist-info/licenses/LICENSE +21 -0
- boltz_vsynthes-1.0.0.dist-info/top_level.txt +1 -0
boltz/data/const.py
ADDED
@@ -0,0 +1,1184 @@
|
|
1
|
+
####################################################################################################
|
2
|
+
# CHAINS
|
3
|
+
####################################################################################################
|
4
|
+
|
5
|
+
chain_types = [
|
6
|
+
"PROTEIN",
|
7
|
+
"DNA",
|
8
|
+
"RNA",
|
9
|
+
"NONPOLYMER",
|
10
|
+
]
|
11
|
+
chain_type_ids = {chain: i for i, chain in enumerate(chain_types)}
|
12
|
+
|
13
|
+
out_types = [
|
14
|
+
"dna_protein",
|
15
|
+
"rna_protein",
|
16
|
+
"ligand_protein",
|
17
|
+
"dna_ligand",
|
18
|
+
"rna_ligand",
|
19
|
+
"intra_ligand",
|
20
|
+
"intra_dna",
|
21
|
+
"intra_rna",
|
22
|
+
"intra_protein",
|
23
|
+
"protein_protein",
|
24
|
+
"modified",
|
25
|
+
]
|
26
|
+
|
27
|
+
out_types_weights_af3 = {
|
28
|
+
"dna_protein": 10.0,
|
29
|
+
"rna_protein": 10.0,
|
30
|
+
"ligand_protein": 10.0,
|
31
|
+
"dna_ligand": 5.0,
|
32
|
+
"rna_ligand": 5.0,
|
33
|
+
"intra_ligand": 20.0,
|
34
|
+
"intra_dna": 4.0,
|
35
|
+
"intra_rna": 16.0,
|
36
|
+
"intra_protein": 20.0,
|
37
|
+
"protein_protein": 20.0,
|
38
|
+
"modified": 0.0,
|
39
|
+
}
|
40
|
+
|
41
|
+
out_types_weights = {
|
42
|
+
"dna_protein": 5.0,
|
43
|
+
"rna_protein": 5.0,
|
44
|
+
"ligand_protein": 20.0,
|
45
|
+
"dna_ligand": 2.0,
|
46
|
+
"rna_ligand": 2.0,
|
47
|
+
"intra_ligand": 20.0,
|
48
|
+
"intra_dna": 2.0,
|
49
|
+
"intra_rna": 8.0,
|
50
|
+
"intra_protein": 20.0,
|
51
|
+
"protein_protein": 20.0,
|
52
|
+
"modified": 0.0,
|
53
|
+
}
|
54
|
+
|
55
|
+
|
56
|
+
out_single_types = ["protein", "ligand", "dna", "rna"]
|
57
|
+
|
58
|
+
clash_types = [
|
59
|
+
"dna_protein",
|
60
|
+
"rna_protein",
|
61
|
+
"ligand_protein",
|
62
|
+
"protein_protein",
|
63
|
+
"dna_ligand",
|
64
|
+
"rna_ligand",
|
65
|
+
"ligand_ligand",
|
66
|
+
"rna_dna",
|
67
|
+
"dna_dna",
|
68
|
+
"rna_rna",
|
69
|
+
]
|
70
|
+
|
71
|
+
chain_types_to_clash_type = {
|
72
|
+
frozenset(("PROTEIN", "DNA")): "dna_protein",
|
73
|
+
frozenset(("PROTEIN", "RNA")): "rna_protein",
|
74
|
+
frozenset(("PROTEIN", "NONPOLYMER")): "ligand_protein",
|
75
|
+
frozenset(("PROTEIN",)): "protein_protein",
|
76
|
+
frozenset(("NONPOLYMER", "DNA")): "dna_ligand",
|
77
|
+
frozenset(("NONPOLYMER", "RNA")): "rna_ligand",
|
78
|
+
frozenset(("NONPOLYMER",)): "ligand_ligand",
|
79
|
+
frozenset(("DNA", "RNA")): "rna_dna",
|
80
|
+
frozenset(("DNA",)): "dna_dna",
|
81
|
+
frozenset(("RNA",)): "rna_rna",
|
82
|
+
}
|
83
|
+
|
84
|
+
chain_type_to_out_single_type = {
|
85
|
+
"PROTEIN": "protein",
|
86
|
+
"DNA": "dna",
|
87
|
+
"RNA": "rna",
|
88
|
+
"NONPOLYMER": "ligand",
|
89
|
+
}
|
90
|
+
####################################################################################################
|
91
|
+
# RESIDUES & TOKENS
|
92
|
+
####################################################################################################
|
93
|
+
|
94
|
+
|
95
|
+
canonical_tokens = [
|
96
|
+
"ALA",
|
97
|
+
"ARG",
|
98
|
+
"ASN",
|
99
|
+
"ASP",
|
100
|
+
"CYS",
|
101
|
+
"GLN",
|
102
|
+
"GLU",
|
103
|
+
"GLY",
|
104
|
+
"HIS",
|
105
|
+
"ILE",
|
106
|
+
"LEU",
|
107
|
+
"LYS",
|
108
|
+
"MET",
|
109
|
+
"PHE",
|
110
|
+
"PRO",
|
111
|
+
"SER",
|
112
|
+
"THR",
|
113
|
+
"TRP",
|
114
|
+
"TYR",
|
115
|
+
"VAL",
|
116
|
+
"UNK", # unknown protein token
|
117
|
+
]
|
118
|
+
|
119
|
+
tokens = [
|
120
|
+
"<pad>",
|
121
|
+
"-",
|
122
|
+
*canonical_tokens,
|
123
|
+
"A",
|
124
|
+
"G",
|
125
|
+
"C",
|
126
|
+
"U",
|
127
|
+
"N", # unknown rna token
|
128
|
+
"DA",
|
129
|
+
"DG",
|
130
|
+
"DC",
|
131
|
+
"DT",
|
132
|
+
"DN", # unknown dna token
|
133
|
+
]
|
134
|
+
|
135
|
+
token_ids = {token: i for i, token in enumerate(tokens)}
|
136
|
+
num_tokens = len(tokens)
|
137
|
+
unk_token = {"PROTEIN": "UNK", "DNA": "DN", "RNA": "N"}
|
138
|
+
unk_token_ids = {m: token_ids[t] for m, t in unk_token.items()}
|
139
|
+
|
140
|
+
prot_letter_to_token = {
|
141
|
+
"A": "ALA",
|
142
|
+
"R": "ARG",
|
143
|
+
"N": "ASN",
|
144
|
+
"D": "ASP",
|
145
|
+
"C": "CYS",
|
146
|
+
"E": "GLU",
|
147
|
+
"Q": "GLN",
|
148
|
+
"G": "GLY",
|
149
|
+
"H": "HIS",
|
150
|
+
"I": "ILE",
|
151
|
+
"L": "LEU",
|
152
|
+
"K": "LYS",
|
153
|
+
"M": "MET",
|
154
|
+
"F": "PHE",
|
155
|
+
"P": "PRO",
|
156
|
+
"S": "SER",
|
157
|
+
"T": "THR",
|
158
|
+
"W": "TRP",
|
159
|
+
"Y": "TYR",
|
160
|
+
"V": "VAL",
|
161
|
+
"X": "UNK",
|
162
|
+
"J": "UNK",
|
163
|
+
"B": "UNK",
|
164
|
+
"Z": "UNK",
|
165
|
+
"O": "UNK",
|
166
|
+
"U": "UNK",
|
167
|
+
"-": "-",
|
168
|
+
}
|
169
|
+
|
170
|
+
prot_token_to_letter = {v: k for k, v in prot_letter_to_token.items()}
|
171
|
+
prot_token_to_letter["UNK"] = "X"
|
172
|
+
|
173
|
+
rna_letter_to_token = {
|
174
|
+
"A": "A",
|
175
|
+
"G": "G",
|
176
|
+
"C": "C",
|
177
|
+
"U": "U",
|
178
|
+
"N": "N",
|
179
|
+
}
|
180
|
+
rna_token_to_letter = {v: k for k, v in rna_letter_to_token.items()}
|
181
|
+
|
182
|
+
dna_letter_to_token = {
|
183
|
+
"A": "DA",
|
184
|
+
"G": "DG",
|
185
|
+
"C": "DC",
|
186
|
+
"T": "DT",
|
187
|
+
"N": "DN",
|
188
|
+
}
|
189
|
+
dna_token_to_letter = {v: k for k, v in dna_letter_to_token.items()}
|
190
|
+
|
191
|
+
####################################################################################################
|
192
|
+
# ATOMS
|
193
|
+
####################################################################################################
|
194
|
+
|
195
|
+
num_elements = 128
|
196
|
+
|
197
|
+
chirality_types = [
|
198
|
+
"CHI_UNSPECIFIED",
|
199
|
+
"CHI_TETRAHEDRAL_CW",
|
200
|
+
"CHI_TETRAHEDRAL_CCW",
|
201
|
+
"CHI_SQUAREPLANAR",
|
202
|
+
"CHI_OCTAHEDRAL",
|
203
|
+
"CHI_TRIGONALBIPYRAMIDAL",
|
204
|
+
"CHI_OTHER",
|
205
|
+
]
|
206
|
+
chirality_type_ids = {chirality: i for i, chirality in enumerate(chirality_types)}
|
207
|
+
unk_chirality_type = "CHI_OTHER"
|
208
|
+
|
209
|
+
hybridization_map = [
|
210
|
+
"S",
|
211
|
+
"SP",
|
212
|
+
"SP2",
|
213
|
+
"SP2D",
|
214
|
+
"SP3",
|
215
|
+
"SP3D",
|
216
|
+
"SP3D2",
|
217
|
+
"OTHER",
|
218
|
+
"UNSPECIFIED",
|
219
|
+
]
|
220
|
+
hybridization_type_ids = {hybrid: i for i, hybrid in enumerate(hybridization_map)}
|
221
|
+
unk_hybridization_type = "UNSPECIFIED"
|
222
|
+
|
223
|
+
# fmt: off
|
224
|
+
ref_atoms = {
|
225
|
+
"PAD": [],
|
226
|
+
"UNK": ["N", "CA", "C", "O", "CB"],
|
227
|
+
"-": [],
|
228
|
+
"ALA": ["N", "CA", "C", "O", "CB"],
|
229
|
+
"ARG": ["N", "CA", "C", "O", "CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"],
|
230
|
+
"ASN": ["N", "CA", "C", "O", "CB", "CG", "OD1", "ND2"],
|
231
|
+
"ASP": ["N", "CA", "C", "O", "CB", "CG", "OD1", "OD2"],
|
232
|
+
"CYS": ["N", "CA", "C", "O", "CB", "SG"],
|
233
|
+
"GLN": ["N", "CA", "C", "O", "CB", "CG", "CD", "OE1", "NE2"],
|
234
|
+
"GLU": ["N", "CA", "C", "O", "CB", "CG", "CD", "OE1", "OE2"],
|
235
|
+
"GLY": ["N", "CA", "C", "O"],
|
236
|
+
"HIS": ["N", "CA", "C", "O", "CB", "CG", "ND1", "CD2", "CE1", "NE2"],
|
237
|
+
"ILE": ["N", "CA", "C", "O", "CB", "CG1", "CG2", "CD1"],
|
238
|
+
"LEU": ["N", "CA", "C", "O", "CB", "CG", "CD1", "CD2"],
|
239
|
+
"LYS": ["N", "CA", "C", "O", "CB", "CG", "CD", "CE", "NZ"],
|
240
|
+
"MET": ["N", "CA", "C", "O", "CB", "CG", "SD", "CE"],
|
241
|
+
"PHE": ["N", "CA", "C", "O", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ"],
|
242
|
+
"PRO": ["N", "CA", "C", "O", "CB", "CG", "CD"],
|
243
|
+
"SER": ["N", "CA", "C", "O", "CB", "OG"],
|
244
|
+
"THR": ["N", "CA", "C", "O", "CB", "OG1", "CG2"],
|
245
|
+
"TRP": ["N", "CA", "C", "O", "CB", "CG", "CD1", "CD2", "NE1", "CE2", "CE3", "CZ2", "CZ3", "CH2"], # noqa: E501
|
246
|
+
"TYR": ["N", "CA", "C", "O", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "OH"],
|
247
|
+
"VAL": ["N", "CA", "C", "O", "CB", "CG1", "CG2"],
|
248
|
+
"A": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'", "N9", "C8", "N7", "C5", "C6", "N6", "N1", "C2", "N3", "C4"], # noqa: E501
|
249
|
+
"G": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'", "N9", "C8", "N7", "C5", "C6", "O6", "N1", "C2", "N2", "N3", "C4"], # noqa: E501
|
250
|
+
"C": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'", "N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"], # noqa: E501
|
251
|
+
"U": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'", "N1", "C2", "O2", "N3", "C4", "O4", "C5", "C6"], # noqa: E501
|
252
|
+
"N": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"], # noqa: E501
|
253
|
+
"DA": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'", "N9", "C8", "N7", "C5", "C6", "N6", "N1", "C2", "N3", "C4"], # noqa: E501
|
254
|
+
"DG": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'", "N9", "C8", "N7", "C5", "C6", "O6", "N1", "C2", "N2", "N3", "C4"], # noqa: E501
|
255
|
+
"DC": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'", "N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"], # noqa: E501
|
256
|
+
"DT": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'", "N1", "C2", "O2", "N3", "C4", "O4", "C5", "C7", "C6"], # noqa: E501
|
257
|
+
"DN": ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "C1'"]
|
258
|
+
}
|
259
|
+
|
260
|
+
protein_backbone_atom_names = ["N", "CA", "C", "O"]
|
261
|
+
nucleic_backbone_atom_names = ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"]
|
262
|
+
|
263
|
+
protein_backbone_atom_index = {name: i for i, name in enumerate(protein_backbone_atom_names)}
|
264
|
+
nucleic_backbone_atom_index = {name: i for i, name in enumerate(nucleic_backbone_atom_names)}
|
265
|
+
|
266
|
+
ref_symmetries = {
|
267
|
+
"PAD": [],
|
268
|
+
"ALA": [],
|
269
|
+
"ARG": [],
|
270
|
+
"ASN": [],
|
271
|
+
"ASP": [[(6, 7), (7, 6)]],
|
272
|
+
"CYS": [],
|
273
|
+
"GLN": [],
|
274
|
+
"GLU": [[(7, 8), (8, 7)]],
|
275
|
+
"GLY": [],
|
276
|
+
"HIS": [],
|
277
|
+
"ILE": [],
|
278
|
+
"LEU": [],
|
279
|
+
"LYS": [],
|
280
|
+
"MET": [],
|
281
|
+
"PHE": [[(6, 7), (7, 6), (8, 9), (9, 8)]],
|
282
|
+
"PRO": [],
|
283
|
+
"SER": [],
|
284
|
+
"THR": [],
|
285
|
+
"TRP": [],
|
286
|
+
"TYR": [[(6, 7), (7, 6), (8, 9), (9, 8)]],
|
287
|
+
"VAL": [],
|
288
|
+
"A": [[(1, 2), (2, 1)]],
|
289
|
+
"G": [[(1, 2), (2, 1)]],
|
290
|
+
"C": [[(1, 2), (2, 1)]],
|
291
|
+
"U": [[(1, 2), (2, 1)]],
|
292
|
+
#"N": [[(1, 2), (2, 1)]],
|
293
|
+
"DA": [[(1, 2), (2, 1)]],
|
294
|
+
"DG": [[(1, 2), (2, 1)]],
|
295
|
+
"DC": [[(1, 2), (2, 1)]],
|
296
|
+
"DT": [[(1, 2), (2, 1)]],
|
297
|
+
#"DN": [[(1, 2), (2, 1)]]
|
298
|
+
}
|
299
|
+
|
300
|
+
|
301
|
+
res_to_center_atom = {
|
302
|
+
"UNK": "CA",
|
303
|
+
"ALA": "CA",
|
304
|
+
"ARG": "CA",
|
305
|
+
"ASN": "CA",
|
306
|
+
"ASP": "CA",
|
307
|
+
"CYS": "CA",
|
308
|
+
"GLN": "CA",
|
309
|
+
"GLU": "CA",
|
310
|
+
"GLY": "CA",
|
311
|
+
"HIS": "CA",
|
312
|
+
"ILE": "CA",
|
313
|
+
"LEU": "CA",
|
314
|
+
"LYS": "CA",
|
315
|
+
"MET": "CA",
|
316
|
+
"PHE": "CA",
|
317
|
+
"PRO": "CA",
|
318
|
+
"SER": "CA",
|
319
|
+
"THR": "CA",
|
320
|
+
"TRP": "CA",
|
321
|
+
"TYR": "CA",
|
322
|
+
"VAL": "CA",
|
323
|
+
"A": "C1'",
|
324
|
+
"G": "C1'",
|
325
|
+
"C": "C1'",
|
326
|
+
"U": "C1'",
|
327
|
+
"N": "C1'",
|
328
|
+
"DA": "C1'",
|
329
|
+
"DG": "C1'",
|
330
|
+
"DC": "C1'",
|
331
|
+
"DT": "C1'",
|
332
|
+
"DN": "C1'"
|
333
|
+
}
|
334
|
+
|
335
|
+
res_to_disto_atom = {
|
336
|
+
"UNK": "CB",
|
337
|
+
"ALA": "CB",
|
338
|
+
"ARG": "CB",
|
339
|
+
"ASN": "CB",
|
340
|
+
"ASP": "CB",
|
341
|
+
"CYS": "CB",
|
342
|
+
"GLN": "CB",
|
343
|
+
"GLU": "CB",
|
344
|
+
"GLY": "CA",
|
345
|
+
"HIS": "CB",
|
346
|
+
"ILE": "CB",
|
347
|
+
"LEU": "CB",
|
348
|
+
"LYS": "CB",
|
349
|
+
"MET": "CB",
|
350
|
+
"PHE": "CB",
|
351
|
+
"PRO": "CB",
|
352
|
+
"SER": "CB",
|
353
|
+
"THR": "CB",
|
354
|
+
"TRP": "CB",
|
355
|
+
"TYR": "CB",
|
356
|
+
"VAL": "CB",
|
357
|
+
"A": "C4",
|
358
|
+
"G": "C4",
|
359
|
+
"C": "C2",
|
360
|
+
"U": "C2",
|
361
|
+
"N": "C1'",
|
362
|
+
"DA": "C4",
|
363
|
+
"DG": "C4",
|
364
|
+
"DC": "C2",
|
365
|
+
"DT": "C2",
|
366
|
+
"DN": "C1'"
|
367
|
+
}
|
368
|
+
|
369
|
+
res_to_center_atom_id = {
|
370
|
+
res: ref_atoms[res].index(atom)
|
371
|
+
for res, atom in res_to_center_atom.items()
|
372
|
+
}
|
373
|
+
|
374
|
+
res_to_disto_atom_id = {
|
375
|
+
res: ref_atoms[res].index(atom)
|
376
|
+
for res, atom in res_to_disto_atom.items()
|
377
|
+
}
|
378
|
+
|
379
|
+
# fmt: on
|
380
|
+
|
381
|
+
####################################################################################################
|
382
|
+
# BONDS
|
383
|
+
####################################################################################################
|
384
|
+
|
385
|
+
atom_interface_cutoff = 5.0
|
386
|
+
interface_cutoff = 15.0
|
387
|
+
|
388
|
+
bond_types = [
|
389
|
+
"OTHER",
|
390
|
+
"SINGLE",
|
391
|
+
"DOUBLE",
|
392
|
+
"TRIPLE",
|
393
|
+
"AROMATIC",
|
394
|
+
"COVALENT",
|
395
|
+
]
|
396
|
+
bond_type_ids = {bond: i for i, bond in enumerate(bond_types)}
|
397
|
+
unk_bond_type = "OTHER"
|
398
|
+
|
399
|
+
|
400
|
+
####################################################################################################
|
401
|
+
# Contacts
|
402
|
+
####################################################################################################
|
403
|
+
|
404
|
+
|
405
|
+
pocket_contact_info = {
|
406
|
+
"UNSPECIFIED": 0,
|
407
|
+
"UNSELECTED": 1,
|
408
|
+
"POCKET": 2,
|
409
|
+
"BINDER": 3,
|
410
|
+
}
|
411
|
+
|
412
|
+
contact_conditioning_info = {
|
413
|
+
"UNSPECIFIED": 0,
|
414
|
+
"UNSELECTED": 1,
|
415
|
+
"POCKET>BINDER": 2,
|
416
|
+
"BINDER>POCKET": 3,
|
417
|
+
"CONTACT": 4,
|
418
|
+
}
|
419
|
+
|
420
|
+
|
421
|
+
####################################################################################################
|
422
|
+
# MSA
|
423
|
+
####################################################################################################
|
424
|
+
|
425
|
+
max_msa_seqs = 16384
|
426
|
+
max_paired_seqs = 8192
|
427
|
+
|
428
|
+
|
429
|
+
####################################################################################################
|
430
|
+
# CHUNKING
|
431
|
+
####################################################################################################
|
432
|
+
|
433
|
+
chunk_size_threshold = 384
|
434
|
+
|
435
|
+
####################################################################################################
|
436
|
+
# Method conditioning
|
437
|
+
####################################################################################################
|
438
|
+
|
439
|
+
# Methods
|
440
|
+
method_types_ids = {
|
441
|
+
"MD": 0,
|
442
|
+
"X-RAY DIFFRACTION": 1,
|
443
|
+
"ELECTRON MICROSCOPY": 2,
|
444
|
+
"SOLUTION NMR": 3,
|
445
|
+
"SOLID-STATE NMR": 4,
|
446
|
+
"NEUTRON DIFFRACTION": 4,
|
447
|
+
"ELECTRON CRYSTALLOGRAPHY": 4,
|
448
|
+
"FIBER DIFFRACTION": 4,
|
449
|
+
"POWDER DIFFRACTION": 4,
|
450
|
+
"INFRARED SPECTROSCOPY": 4,
|
451
|
+
"FLUORESCENCE TRANSFER": 4,
|
452
|
+
"EPR": 4,
|
453
|
+
"THEORETICAL MODEL": 4,
|
454
|
+
"SOLUTION SCATTERING": 4,
|
455
|
+
"OTHER": 4,
|
456
|
+
"AFDB": 5,
|
457
|
+
"BOLTZ-1": 6,
|
458
|
+
"FUTURE1": 7, # Placeholder for future supervision sources
|
459
|
+
"FUTURE2": 8,
|
460
|
+
"FUTURE3": 9,
|
461
|
+
"FUTURE4": 10,
|
462
|
+
"FUTURE5": 11,
|
463
|
+
}
|
464
|
+
method_types_ids = {k.lower(): v for k, v in method_types_ids.items()}
|
465
|
+
num_method_types = len(set(method_types_ids.values()))
|
466
|
+
|
467
|
+
# Temperature
|
468
|
+
temperature_bins = [(265, 280), (280, 295), (295, 310)]
|
469
|
+
temperature_bins_ids = {temp: i for i, temp in enumerate(temperature_bins)}
|
470
|
+
temperature_bins_ids["other"] = len(temperature_bins)
|
471
|
+
num_temp_bins = len(temperature_bins_ids)
|
472
|
+
|
473
|
+
|
474
|
+
# pH
|
475
|
+
ph_bins = [(0, 6), (6, 8), (8, 14)]
|
476
|
+
ph_bins_ids = {ph: i for i, ph in enumerate(ph_bins)}
|
477
|
+
ph_bins_ids["other"] = len(ph_bins)
|
478
|
+
num_ph_bins = len(ph_bins_ids)
|
479
|
+
|
480
|
+
####################################################################################################
|
481
|
+
# VDW_RADII
|
482
|
+
####################################################################################################
|
483
|
+
|
484
|
+
# fmt: off
|
485
|
+
vdw_radii = [
|
486
|
+
1.2, 1.4, 2.2, 1.9, 1.8, 1.7, 1.6, 1.55, 1.5, 1.54,
|
487
|
+
2.4, 2.2, 2.1, 2.1, 1.95, 1.8, 1.8, 1.88, 2.8, 2.4,
|
488
|
+
2.3, 2.15, 2.05, 2.05, 2.05, 2.05, 2.0, 2.0, 2.0, 2.1,
|
489
|
+
2.1, 2.1, 2.05, 1.9, 1.9, 2.02, 2.9, 2.55, 2.4, 2.3,
|
490
|
+
2.15, 2.1, 2.05, 2.05, 2.0, 2.05, 2.1, 2.2, 2.2, 2.25,
|
491
|
+
2.2, 2.1, 2.1, 2.16, 3.0, 2.7, 2.5, 2.48, 2.47, 2.45,
|
492
|
+
2.43, 2.42, 2.4, 2.38, 2.37, 2.35, 2.33, 2.32, 2.3, 2.28,
|
493
|
+
2.27, 2.25, 2.2, 2.1, 2.05, 2.0, 2.0, 2.05, 2.1, 2.05,
|
494
|
+
2.2, 2.3, 2.3, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.4,
|
495
|
+
2.0, 2.3, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
|
496
|
+
2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
|
497
|
+
2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
|
498
|
+
]
|
499
|
+
# fmt: on
|
500
|
+
|
501
|
+
####################################################################################################
|
502
|
+
# Excluded ligands
|
503
|
+
####################################################################################################
|
504
|
+
|
505
|
+
ligand_exclusion = {
|
506
|
+
"144",
|
507
|
+
"15P",
|
508
|
+
"1PE",
|
509
|
+
"2F2",
|
510
|
+
"2JC",
|
511
|
+
"3HR",
|
512
|
+
"3SY",
|
513
|
+
"7N5",
|
514
|
+
"7PE",
|
515
|
+
"9JE",
|
516
|
+
"AAE",
|
517
|
+
"ABA",
|
518
|
+
"ACE",
|
519
|
+
"ACN",
|
520
|
+
"ACT",
|
521
|
+
"ACY",
|
522
|
+
"AZI",
|
523
|
+
"BAM",
|
524
|
+
"BCN",
|
525
|
+
"BCT",
|
526
|
+
"BDN",
|
527
|
+
"BEN",
|
528
|
+
"BME",
|
529
|
+
"BO3",
|
530
|
+
"BTB",
|
531
|
+
"BTC",
|
532
|
+
"BU1",
|
533
|
+
"C8E",
|
534
|
+
"CAD",
|
535
|
+
"CAQ",
|
536
|
+
"CBM",
|
537
|
+
"CCN",
|
538
|
+
"CIT",
|
539
|
+
"CL",
|
540
|
+
"CLR",
|
541
|
+
"CM",
|
542
|
+
"CMO",
|
543
|
+
"CO3",
|
544
|
+
"CPT",
|
545
|
+
"CXS",
|
546
|
+
"D10",
|
547
|
+
"DEP",
|
548
|
+
"DIO",
|
549
|
+
"DMS",
|
550
|
+
"DN",
|
551
|
+
"DOD",
|
552
|
+
"DOX",
|
553
|
+
"EDO",
|
554
|
+
"EEE",
|
555
|
+
"EGL",
|
556
|
+
"EOH",
|
557
|
+
"EOX",
|
558
|
+
"EPE",
|
559
|
+
"ETF",
|
560
|
+
"FCY",
|
561
|
+
"FJO",
|
562
|
+
"FLC",
|
563
|
+
"FMT",
|
564
|
+
"FW5",
|
565
|
+
"GOL",
|
566
|
+
"GSH",
|
567
|
+
"GTT",
|
568
|
+
"GYF",
|
569
|
+
"HED",
|
570
|
+
"IHP",
|
571
|
+
"IHS",
|
572
|
+
"IMD",
|
573
|
+
"IOD",
|
574
|
+
"IPA",
|
575
|
+
"IPH",
|
576
|
+
"LDA",
|
577
|
+
"MB3",
|
578
|
+
"MEG",
|
579
|
+
"MES",
|
580
|
+
"MLA",
|
581
|
+
"MLI",
|
582
|
+
"MOH",
|
583
|
+
"MPD",
|
584
|
+
"MRD",
|
585
|
+
"MSE",
|
586
|
+
"MYR",
|
587
|
+
"N",
|
588
|
+
"NA",
|
589
|
+
"NH2",
|
590
|
+
"NH4",
|
591
|
+
"NHE",
|
592
|
+
"NO3",
|
593
|
+
"O4B",
|
594
|
+
"OHE",
|
595
|
+
"OLA",
|
596
|
+
"OLC",
|
597
|
+
"OMB",
|
598
|
+
"OME",
|
599
|
+
"OXA",
|
600
|
+
"P6G",
|
601
|
+
"PE3",
|
602
|
+
"PE4",
|
603
|
+
"PEG",
|
604
|
+
"PEO",
|
605
|
+
"PEP",
|
606
|
+
"PG0",
|
607
|
+
"PG4",
|
608
|
+
"PGE",
|
609
|
+
"PGR",
|
610
|
+
"PLM",
|
611
|
+
"PO4",
|
612
|
+
"POL",
|
613
|
+
"POP",
|
614
|
+
"PVO",
|
615
|
+
"SAR",
|
616
|
+
"SCN",
|
617
|
+
"SEO",
|
618
|
+
"SEP",
|
619
|
+
"SIN",
|
620
|
+
"SO4",
|
621
|
+
"SPD",
|
622
|
+
"SPM",
|
623
|
+
"SR",
|
624
|
+
"STE",
|
625
|
+
"STO",
|
626
|
+
"STU",
|
627
|
+
"TAR",
|
628
|
+
"TBU",
|
629
|
+
"TME",
|
630
|
+
"TPO",
|
631
|
+
"TRS",
|
632
|
+
"UNK",
|
633
|
+
"UNL",
|
634
|
+
"UNX",
|
635
|
+
"UPL",
|
636
|
+
"URE",
|
637
|
+
}
|
638
|
+
|
639
|
+
|
640
|
+
####################################################################################################
|
641
|
+
# TEMPLATES
|
642
|
+
####################################################################################################
|
643
|
+
|
644
|
+
min_coverage_residues = 10
|
645
|
+
min_coverage_fraction = 0.1
|
646
|
+
|
647
|
+
|
648
|
+
####################################################################################################
|
649
|
+
# Ambiguous atoms
|
650
|
+
####################################################################################################
|
651
|
+
|
652
|
+
ambiguous_atoms = {
|
653
|
+
"CA": {
|
654
|
+
"*": "C",
|
655
|
+
"OEX": "CA",
|
656
|
+
"OEC": "CA",
|
657
|
+
"543": "CA",
|
658
|
+
"OC6": "CA",
|
659
|
+
"OC1": "CA",
|
660
|
+
"OC7": "CA",
|
661
|
+
"OEY": "CA",
|
662
|
+
"OC4": "CA",
|
663
|
+
"OC3": "CA",
|
664
|
+
"ICA": "CA",
|
665
|
+
"CA": "CA",
|
666
|
+
"OC2": "CA",
|
667
|
+
"OC5": "CA",
|
668
|
+
},
|
669
|
+
"CD": {"*": "C", "CD": "CD", "CD3": "CD", "CD5": "CD", "CD1": "CD"},
|
670
|
+
"BR": "BR",
|
671
|
+
"CL": {
|
672
|
+
"*": "CL",
|
673
|
+
"C8P": "C",
|
674
|
+
"L3T": "C",
|
675
|
+
"TLC": "C",
|
676
|
+
"TZ0": "C",
|
677
|
+
"471": "C",
|
678
|
+
"NLK": "C",
|
679
|
+
"PGM": "C",
|
680
|
+
"PNE": "C",
|
681
|
+
"RCY": "C",
|
682
|
+
"11F": "C",
|
683
|
+
"PII": "C",
|
684
|
+
"C1Q": "C",
|
685
|
+
"4MD": "C",
|
686
|
+
"R5A": "C",
|
687
|
+
"KW2": "C",
|
688
|
+
"I7M": "C",
|
689
|
+
"R48": "C",
|
690
|
+
"FC3": "C",
|
691
|
+
"55V": "C",
|
692
|
+
"KPF": "C",
|
693
|
+
"SPZ": "C",
|
694
|
+
"0TT": "C",
|
695
|
+
"R9A": "C",
|
696
|
+
"5NA": "C",
|
697
|
+
"C55": "C",
|
698
|
+
"NIX": "C",
|
699
|
+
"5PM": "C",
|
700
|
+
"PP8": "C",
|
701
|
+
"544": "C",
|
702
|
+
"812": "C",
|
703
|
+
"NPM": "C",
|
704
|
+
"KU8": "C",
|
705
|
+
"A1AMM": "C",
|
706
|
+
"4S0": "C",
|
707
|
+
"AQC": "C",
|
708
|
+
"2JK": "C",
|
709
|
+
"WJR": "C",
|
710
|
+
"A1AAW": "C",
|
711
|
+
"85E": "C",
|
712
|
+
"MB0": "C",
|
713
|
+
"ZAB": "C",
|
714
|
+
"85K": "C",
|
715
|
+
"GBP": "C",
|
716
|
+
"A1H80": "C",
|
717
|
+
"A1AFR": "C",
|
718
|
+
"L9M": "C",
|
719
|
+
"MYK": "C",
|
720
|
+
"MB9": "C",
|
721
|
+
"38R": "C",
|
722
|
+
"EKB": "C",
|
723
|
+
"NKF": "C",
|
724
|
+
"UMQ": "C",
|
725
|
+
"T4K": "C",
|
726
|
+
"3PT": "C",
|
727
|
+
"A1A7S": "C",
|
728
|
+
"1Q9": "C",
|
729
|
+
"11R": "C",
|
730
|
+
"D2V": "C",
|
731
|
+
"SM8": "C",
|
732
|
+
"IFC": "C",
|
733
|
+
"DB5": "C",
|
734
|
+
"L2T": "C",
|
735
|
+
"GNB": "C",
|
736
|
+
"PP7": "C",
|
737
|
+
"072": "C",
|
738
|
+
"P88": "C",
|
739
|
+
"DRL": "C",
|
740
|
+
"C9W": "C",
|
741
|
+
"NTP": "C",
|
742
|
+
"4HJ": "C",
|
743
|
+
"7NA": "C",
|
744
|
+
"LPC": "C",
|
745
|
+
"T8W": "C",
|
746
|
+
"63R": "C",
|
747
|
+
"570": "C",
|
748
|
+
"R4A": "C",
|
749
|
+
"3BG": "C",
|
750
|
+
"4RB": "C",
|
751
|
+
"GSO": "C",
|
752
|
+
"BQ6": "C",
|
753
|
+
"R4P": "C",
|
754
|
+
"5CP": "C",
|
755
|
+
"TTR": "C",
|
756
|
+
"6UZ": "C",
|
757
|
+
"SPJ": "C",
|
758
|
+
"0SA": "C",
|
759
|
+
"ZL1": "C",
|
760
|
+
"BYG": "C",
|
761
|
+
"F0E": "C",
|
762
|
+
"PC0": "C",
|
763
|
+
"B2Q": "C",
|
764
|
+
"KV6": "C",
|
765
|
+
"NTO": "C",
|
766
|
+
"CLG": "C",
|
767
|
+
"R7U": "C",
|
768
|
+
"SMQ": "C",
|
769
|
+
"GM2": "C",
|
770
|
+
"Z7P": "C",
|
771
|
+
"NXF": "C",
|
772
|
+
"C6Q": "C",
|
773
|
+
"A1G": "C",
|
774
|
+
"433": "C",
|
775
|
+
"L9N": "C",
|
776
|
+
"7OX": "C",
|
777
|
+
"A1H84": "C",
|
778
|
+
"97L": "C",
|
779
|
+
"HDV": "C",
|
780
|
+
"LUO": "C",
|
781
|
+
"R6A": "C",
|
782
|
+
"1PC": "C",
|
783
|
+
"4PT": "C",
|
784
|
+
"SBZ": "C",
|
785
|
+
"EAB": "C",
|
786
|
+
"FL4": "C",
|
787
|
+
"OPS": "C",
|
788
|
+
"C2X": "C",
|
789
|
+
"SLL": "C",
|
790
|
+
"BFC": "C",
|
791
|
+
"GIP": "C",
|
792
|
+
"7CP": "C",
|
793
|
+
"CLH": "C",
|
794
|
+
"34E": "C",
|
795
|
+
"5NE": "C",
|
796
|
+
"PBF": "C",
|
797
|
+
"ABD": "C",
|
798
|
+
"ABC": "C",
|
799
|
+
"LPF": "C",
|
800
|
+
"TIZ": "C",
|
801
|
+
"4HH": "C",
|
802
|
+
"AFC": "C",
|
803
|
+
"WQH": "C",
|
804
|
+
"9JL": "C",
|
805
|
+
"CS3": "C",
|
806
|
+
"NL0": "C",
|
807
|
+
"KPY": "C",
|
808
|
+
"DNA": "C",
|
809
|
+
"B3C": "C",
|
810
|
+
"TKL": "C",
|
811
|
+
"KVS": "C",
|
812
|
+
"HO6": "C",
|
813
|
+
"NLH": "C",
|
814
|
+
"1PB": "C",
|
815
|
+
"CYF": "C",
|
816
|
+
"G4M": "C",
|
817
|
+
"R5B": "C",
|
818
|
+
"N4S": "C",
|
819
|
+
"N11": "C",
|
820
|
+
"C8F": "C",
|
821
|
+
"PIJ": "C",
|
822
|
+
"WIN": "C",
|
823
|
+
"NT1": "C",
|
824
|
+
"WJW": "C",
|
825
|
+
"HF7": "C",
|
826
|
+
"TY1": "C",
|
827
|
+
"VM1": "C",
|
828
|
+
},
|
829
|
+
"OS": {"*": "O", "DWC": "OS", "OHX": "OS", "OS": "OS", "8WV": "OS", "OS4": "OS"},
|
830
|
+
"PB": {"*": "P", "ZN9": "PB", "ZN7": "PB", "PBM": "PB", "PB": "PB", "CSB": "PB"},
|
831
|
+
"CE": {"*": "C", "CE": "CE"},
|
832
|
+
"FE": {"*": "FE", "TFR": "F", "PF5": "F", "IFC": "F", "F5C": "F"},
|
833
|
+
"NA": {"*": "N", "CGO": "NA", "R2K": "NA", "LVQ": "NA", "NA": "NA"},
|
834
|
+
"ND": {"*": "N", "ND": "ND"},
|
835
|
+
"CF": {"*": "C", "CF": "CF"},
|
836
|
+
"RU": "RU",
|
837
|
+
"BRAF": "BR",
|
838
|
+
"EU": "EU",
|
839
|
+
"CLAA": "CL",
|
840
|
+
"CLBQ": "CL",
|
841
|
+
"CM": {"*": "C", "ZCM": "CM"},
|
842
|
+
"SN": {"*": "SN", "TAP": "S", "SND": "S", "TAD": "S", "XPT": "S"},
|
843
|
+
"AG": "AG",
|
844
|
+
"CLN": "CL",
|
845
|
+
"CLM": "CL",
|
846
|
+
"CLA": {"*": "CL", "PII": "C", "TDL": "C", "D0J": "C", "GM2": "C", "PIJ": "C"},
|
847
|
+
"CLB": {
|
848
|
+
"*": "CL",
|
849
|
+
"TD5": "C",
|
850
|
+
"PII": "C",
|
851
|
+
"TDL": "C",
|
852
|
+
"GM2": "C",
|
853
|
+
"TD7": "C",
|
854
|
+
"TD6": "C",
|
855
|
+
"PIJ": "C",
|
856
|
+
},
|
857
|
+
"CR": {
|
858
|
+
"*": "C",
|
859
|
+
"BW9": "CR",
|
860
|
+
"CQ4": "CR",
|
861
|
+
"AC9": "CR",
|
862
|
+
"TIL": "CR",
|
863
|
+
"J7U": "CR",
|
864
|
+
"CR": "CR",
|
865
|
+
},
|
866
|
+
"CLAY": "CL",
|
867
|
+
"CLBC": "CL",
|
868
|
+
"PD": {
|
869
|
+
"*": "P",
|
870
|
+
"F6Q": "PD",
|
871
|
+
"SVP": "PD",
|
872
|
+
"SXC": "PD",
|
873
|
+
"U5U": "PD",
|
874
|
+
"PD": "PD",
|
875
|
+
"PLL": "PD",
|
876
|
+
},
|
877
|
+
"CO": {
|
878
|
+
"*": "C",
|
879
|
+
"J1S": "CO",
|
880
|
+
"OCN": "CO",
|
881
|
+
"OL3": "CO",
|
882
|
+
"OL4": "CO",
|
883
|
+
"B12": "CO",
|
884
|
+
"XCO": "CO",
|
885
|
+
"UFU": "CO",
|
886
|
+
"CON": "CO",
|
887
|
+
"OL5": "CO",
|
888
|
+
"B13": "CO",
|
889
|
+
"7KI": "CO",
|
890
|
+
"PL1": "CO",
|
891
|
+
"OCO": "CO",
|
892
|
+
"J1R": "CO",
|
893
|
+
"COH": "CO",
|
894
|
+
"SIR": "CO",
|
895
|
+
"6KI": "CO",
|
896
|
+
"NCO": "CO",
|
897
|
+
"9CO": "CO",
|
898
|
+
"PC3": "CO",
|
899
|
+
"BWU": "CO",
|
900
|
+
"B1Z": "CO",
|
901
|
+
"J83": "CO",
|
902
|
+
"CO": "CO",
|
903
|
+
"COY": "CO",
|
904
|
+
"CNC": "CO",
|
905
|
+
"3CO": "CO",
|
906
|
+
"OCL": "CO",
|
907
|
+
"R5Q": "CO",
|
908
|
+
"X5Z": "CO",
|
909
|
+
"CBY": "CO",
|
910
|
+
"OLS": "CO",
|
911
|
+
"F0X": "CO",
|
912
|
+
"I2A": "CO",
|
913
|
+
"OCM": "CO",
|
914
|
+
},
|
915
|
+
"CU": {
|
916
|
+
"*": "C",
|
917
|
+
"8ZR": "CU",
|
918
|
+
"K7E": "CU",
|
919
|
+
"CU3": "CU",
|
920
|
+
"SI9": "CU",
|
921
|
+
"35N": "CU",
|
922
|
+
"C2O": "CU",
|
923
|
+
"SI7": "CU",
|
924
|
+
"B15": "CU",
|
925
|
+
"SI0": "CU",
|
926
|
+
"CUP": "CU",
|
927
|
+
"SQ1": "CU",
|
928
|
+
"CUK": "CU",
|
929
|
+
"CUL": "CU",
|
930
|
+
"SI8": "CU",
|
931
|
+
"IC4": "CU",
|
932
|
+
"CUM": "CU",
|
933
|
+
"MM2": "CU",
|
934
|
+
"B30": "CU",
|
935
|
+
"S32": "CU",
|
936
|
+
"V79": "CU",
|
937
|
+
"IMF": "CU",
|
938
|
+
"CUN": "CU",
|
939
|
+
"MM1": "CU",
|
940
|
+
"MP1": "CU",
|
941
|
+
"IME": "CU",
|
942
|
+
"B17": "CU",
|
943
|
+
"C2C": "CU",
|
944
|
+
"1CU": "CU",
|
945
|
+
"CU6": "CU",
|
946
|
+
"C1O": "CU",
|
947
|
+
"CU1": "CU",
|
948
|
+
"B22": "CU",
|
949
|
+
"CUS": "CU",
|
950
|
+
"RUQ": "CU",
|
951
|
+
"CUF": "CU",
|
952
|
+
"CUA": "CU",
|
953
|
+
"CU": "CU",
|
954
|
+
"CUO": "CU",
|
955
|
+
"0TE": "CU",
|
956
|
+
"SI4": "CU",
|
957
|
+
},
|
958
|
+
"CS": {"*": "C", "CS": "CS"},
|
959
|
+
"CLQ": "CL",
|
960
|
+
"CLR": "CL",
|
961
|
+
"CLU": "CL",
|
962
|
+
"TE": "TE",
|
963
|
+
"NI": {
|
964
|
+
"*": "N",
|
965
|
+
"USN": "NI",
|
966
|
+
"NFO": "NI",
|
967
|
+
"NI2": "NI",
|
968
|
+
"NFS": "NI",
|
969
|
+
"NFR": "NI",
|
970
|
+
"82N": "NI",
|
971
|
+
"R5N": "NI",
|
972
|
+
"NFU": "NI",
|
973
|
+
"A1ICD": "NI",
|
974
|
+
"NI3": "NI",
|
975
|
+
"M43": "NI",
|
976
|
+
"MM5": "NI",
|
977
|
+
"BF8": "NI",
|
978
|
+
"TCN": "NI",
|
979
|
+
"NIK": "NI",
|
980
|
+
"CUV": "NI",
|
981
|
+
"MM6": "NI",
|
982
|
+
"J52": "NI",
|
983
|
+
"NI": "NI",
|
984
|
+
"SNF": "NI",
|
985
|
+
"XCC": "NI",
|
986
|
+
"F0L": "NI",
|
987
|
+
"UWE": "NI",
|
988
|
+
"NFC": "NI",
|
989
|
+
"3NI": "NI",
|
990
|
+
"HNI": "NI",
|
991
|
+
"F43": "NI",
|
992
|
+
"RQM": "NI",
|
993
|
+
"NFE": "NI",
|
994
|
+
"NFB": "NI",
|
995
|
+
"B51": "NI",
|
996
|
+
"NI1": "NI",
|
997
|
+
"WCC": "NI",
|
998
|
+
"NUF": "NI",
|
999
|
+
},
|
1000
|
+
"SB": {"*": "S", "UJI": "SB", "SB": "SB", "118": "SB", "SBO": "SB", "3CG": "SB"},
|
1001
|
+
"MO": "MO",
|
1002
|
+
"SEG": "SE",
|
1003
|
+
"CLL": "CL",
|
1004
|
+
"CLAH": "CL",
|
1005
|
+
"CLC": {
|
1006
|
+
"*": "CL",
|
1007
|
+
"TD5": "C",
|
1008
|
+
"PII": "C",
|
1009
|
+
"TDL": "C",
|
1010
|
+
"GM2": "C",
|
1011
|
+
"TD7": "C",
|
1012
|
+
"TD6": "C",
|
1013
|
+
"PIJ": "C",
|
1014
|
+
},
|
1015
|
+
"CLD": {"*": "CL", "PII": "C", "GM2": "C", "PIJ": "C"},
|
1016
|
+
"CLAD": "CL",
|
1017
|
+
"CLAE": "CL",
|
1018
|
+
"LA": "LA",
|
1019
|
+
"RH": "RH",
|
1020
|
+
"BRAC": "BR",
|
1021
|
+
"BRAD": "BR",
|
1022
|
+
"CLBN": "CL",
|
1023
|
+
"CLAC": "CL",
|
1024
|
+
"BRAB": "BR",
|
1025
|
+
"BRAE": "BR",
|
1026
|
+
"MG": "MG",
|
1027
|
+
"IR": "IR",
|
1028
|
+
"SE": {
|
1029
|
+
"*": "SE",
|
1030
|
+
"HII": "S",
|
1031
|
+
"NT2": "S",
|
1032
|
+
"R2P": "S",
|
1033
|
+
"S2P": "S",
|
1034
|
+
"0IU": "S",
|
1035
|
+
"QMB": "S",
|
1036
|
+
"81S": "S",
|
1037
|
+
"0QB": "S",
|
1038
|
+
"UB4": "S",
|
1039
|
+
"OHS": "S",
|
1040
|
+
"Q78": "S",
|
1041
|
+
"0Y2": "S",
|
1042
|
+
"B3M": "S",
|
1043
|
+
"NT1": "S",
|
1044
|
+
"81R": "S",
|
1045
|
+
},
|
1046
|
+
"BRAG": "BR",
|
1047
|
+
"CLF": {"*": "CL", "PII": "C", "GM2": "C", "PIJ": "C"},
|
1048
|
+
"CLE": {"*": "CL", "PII": "C", "GM2": "C", "PIJ": "C"},
|
1049
|
+
"BRAX": "BR",
|
1050
|
+
"CLK": "CL",
|
1051
|
+
"ZN": "ZN",
|
1052
|
+
"AS": "AS",
|
1053
|
+
"AU": "AU",
|
1054
|
+
"PT": "PT",
|
1055
|
+
"CLAS": "CL",
|
1056
|
+
"MN": "MN",
|
1057
|
+
"CLBE": "CL",
|
1058
|
+
"CLBF": "CL",
|
1059
|
+
"CLAF": "CL",
|
1060
|
+
"NA'": {"*": "N", "CGO": "NA"},
|
1061
|
+
"BRAH": "BR",
|
1062
|
+
"BRAI": "BR",
|
1063
|
+
"BRA": "BR",
|
1064
|
+
"BRB": "BR",
|
1065
|
+
"BRAV": "BR",
|
1066
|
+
"HG": {
|
1067
|
+
"*": "HG",
|
1068
|
+
"BBA": "H",
|
1069
|
+
"MID": "H",
|
1070
|
+
"APM": "H",
|
1071
|
+
"4QQ": "H",
|
1072
|
+
"0ZG": "H",
|
1073
|
+
"APH": "H",
|
1074
|
+
},
|
1075
|
+
"AR": "AR",
|
1076
|
+
"D": "H",
|
1077
|
+
"CLAN": "CL",
|
1078
|
+
"SI": "SI",
|
1079
|
+
"CLS": "CL",
|
1080
|
+
"ZR": "ZR",
|
1081
|
+
"CLAR": {"*": "CL", "ZM4": "C"},
|
1082
|
+
"HO": "HO",
|
1083
|
+
"CLI": {"*": "CL", "GM2": "C"},
|
1084
|
+
"CLH": {"*": "CL", "GM2": "C"},
|
1085
|
+
"CLAP": "CL",
|
1086
|
+
"CLBL": "CL",
|
1087
|
+
"CLBM": "CL",
|
1088
|
+
"PR": {"*": "PR", "UF0": "P", "252": "P"},
|
1089
|
+
"IN": "IN",
|
1090
|
+
"CLJ": "CL",
|
1091
|
+
"BRU": "BR",
|
1092
|
+
"SC": {"*": "S", "SFL": "SC"},
|
1093
|
+
"CLG": {"*": "CL", "GM2": "C"},
|
1094
|
+
"BRAT": "BR",
|
1095
|
+
"BRAR": "BR",
|
1096
|
+
"CLAG": "CL",
|
1097
|
+
"CLAB": "CL",
|
1098
|
+
"CLV": "CL",
|
1099
|
+
"TI": "TI",
|
1100
|
+
"CLAX": "CL",
|
1101
|
+
"CLAJ": "CL",
|
1102
|
+
"CL'": {"*": "CL", "BNR": "C", "25A": "C", "BDA": "C"},
|
1103
|
+
"CLAW": "CL",
|
1104
|
+
"BRF": "BR",
|
1105
|
+
"BRE": "BR",
|
1106
|
+
"RE": "RE",
|
1107
|
+
"GD": "GD",
|
1108
|
+
"SM": {"*": "S", "SM": "SM"},
|
1109
|
+
"CLBH": "CL",
|
1110
|
+
"CLBI": "CL",
|
1111
|
+
"CLAI": "CL",
|
1112
|
+
"CLY": "CL",
|
1113
|
+
"CLZ": "CL",
|
1114
|
+
"AC": "AC",
|
1115
|
+
"BR'": "BR",
|
1116
|
+
"CLT": "CL",
|
1117
|
+
"CLO": "CL",
|
1118
|
+
"CLP": "CL",
|
1119
|
+
"LU": "LU",
|
1120
|
+
"BA": {"*": "B", "BA": "BA"},
|
1121
|
+
"CLAU": "CL",
|
1122
|
+
"RB": "RB",
|
1123
|
+
"LI": "LI",
|
1124
|
+
"MOM": "MO",
|
1125
|
+
"BRAQ": "BR",
|
1126
|
+
"SR": {"*": "S", "SR": "SR", "OER": "SR"},
|
1127
|
+
"CLAT": "CL",
|
1128
|
+
"BRAL": "BR",
|
1129
|
+
"SEB": "SE",
|
1130
|
+
"CLW": "CL",
|
1131
|
+
"CLX": "CL",
|
1132
|
+
"BE": "BE",
|
1133
|
+
"BRG": "BR",
|
1134
|
+
"SEA": "SE",
|
1135
|
+
"BRAW": "BR",
|
1136
|
+
"BRBB": "BR",
|
1137
|
+
"ER": "ER",
|
1138
|
+
"TH": "TH",
|
1139
|
+
"BRR": "BR",
|
1140
|
+
"CLBV": "CL",
|
1141
|
+
"AL": "AL",
|
1142
|
+
"CLAV": "CL",
|
1143
|
+
"BRH": "BR",
|
1144
|
+
"CLAQ": "CL",
|
1145
|
+
"GA": "GA",
|
1146
|
+
"X": "*",
|
1147
|
+
"TL": "TL",
|
1148
|
+
"CLBB": "CL",
|
1149
|
+
"TB": "TB",
|
1150
|
+
"CLAK": "CL",
|
1151
|
+
"XE": {"*": "*", "XE": "XE"},
|
1152
|
+
"SEL": "SE",
|
1153
|
+
"PU": {"*": "P", "4PU": "PU"},
|
1154
|
+
"CLAZ": "CL",
|
1155
|
+
"SE'": "SE",
|
1156
|
+
"CLBA": "CL",
|
1157
|
+
"SEN": "SE",
|
1158
|
+
"SNN": "SN",
|
1159
|
+
"MOB": "MO",
|
1160
|
+
"YB": "YB",
|
1161
|
+
"BRC": "BR",
|
1162
|
+
"BRD": "BR",
|
1163
|
+
"CLAM": "CL",
|
1164
|
+
"DA": "H",
|
1165
|
+
"DB": "H",
|
1166
|
+
"DC": "H",
|
1167
|
+
"DXT": "H",
|
1168
|
+
"DXU": "H",
|
1169
|
+
"DXX": "H",
|
1170
|
+
"DXY": "H",
|
1171
|
+
"DXZ": "H",
|
1172
|
+
"DY": "DY",
|
1173
|
+
"TA": "TA",
|
1174
|
+
"XD": "*",
|
1175
|
+
"SED": "SE",
|
1176
|
+
"CLAL": "CL",
|
1177
|
+
"BRAJ": "BR",
|
1178
|
+
"AM": "AM",
|
1179
|
+
"CLAO": "CL",
|
1180
|
+
"BI": "BI",
|
1181
|
+
"KR": "KR",
|
1182
|
+
"BRBJ": "BR",
|
1183
|
+
"UNK": "*",
|
1184
|
+
}
|