pepdata 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pepdata/__init__.py ADDED
@@ -0,0 +1,26 @@
1
+ from .amino_acid_alphabet import (
2
+ AminoAcid,
3
+ canonical_amino_acids,
4
+ canonical_amino_acid_letters,
5
+ extended_amino_acids,
6
+ extended_amino_acid_letters,
7
+ amino_acid_letter_indices,
8
+ amino_acid_name_indices,
9
+ )
10
+ from .peptide_vectorizer import PeptideVectorizer
11
+ from .version import __version__
12
+ from . import iedb
13
+
14
+
15
+
16
+ __all__ = [
17
+ "iedb",
18
+ "AminoAcid",
19
+ "canonical_amino_acids",
20
+ "canonical_amino_acid_letters",
21
+ "extended_amino_acids",
22
+ "extended_amino_acid_letters",
23
+ "amino_acid_letter_indices",
24
+ "amino_acid_name_indices",
25
+ "PeptideVectorizer",
26
+ ]
pepdata/amino_acid.py ADDED
@@ -0,0 +1,34 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+ class AminoAcid(object):
15
+ def __init__(
16
+ self, full_name, short_name, letter, contains=None):
17
+ self.letter = letter
18
+ self.full_name = full_name
19
+ self.short_name = short_name
20
+ if not contains:
21
+ contains = [letter]
22
+ self.contains = contains
23
+
24
+ def __str__(self):
25
+ return (
26
+ ("AminoAcid(full_name='%s', short_name='%s', letter='%s', "
27
+ "contains=%s)") % (
28
+ self.letter, self.full_name, self.short_name, self.contains))
29
+
30
+ def __repr__(self):
31
+ return str(self)
32
+
33
+ def __eq__(self, other):
34
+ return other.__class__ is AminoAcid and self.letter == other.letter
@@ -0,0 +1,158 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+ """
15
+ Quantify amino acids by their physical/chemical properties
16
+ """
17
+
18
+ import numpy as np
19
+
20
+ from .amino_acid import AminoAcid
21
+
22
+ canonical_amino_acids = [
23
+ AminoAcid("Alanine", "Ala", "A"),
24
+ AminoAcid("Arginine", "Arg", "R"),
25
+ AminoAcid("Asparagine","Asn", "N"),
26
+ AminoAcid("Aspartic Acid", "Asp", "D"),
27
+ AminoAcid("Cysteine", "Cys", "C"),
28
+ AminoAcid("Glutamic Acid", "Glu", "E"),
29
+ AminoAcid("Glutamine", "Gln", "Q"),
30
+ AminoAcid("Glycine", "Gly", "G"),
31
+ AminoAcid("Histidine", "His", "H"),
32
+ AminoAcid("Isoleucine", "Ile", "I"),
33
+ AminoAcid("Leucine", "Leu", "L"),
34
+ AminoAcid("Lysine", "Lys", "K"),
35
+ AminoAcid("Methionine", "Met", "M"),
36
+ AminoAcid("Phenylalanine", "Phe", "F"),
37
+ AminoAcid("Proline", "Pro", "P"),
38
+ AminoAcid("Serine", "Ser", "S"),
39
+ AminoAcid("Threonine", "Thr", "T"),
40
+ AminoAcid("Tryptophan", "Trp", "W"),
41
+ AminoAcid("Tyrosine", "Tyr", "Y"),
42
+ AminoAcid("Valine", "Val", "V")
43
+ ]
44
+
45
+ canonical_amino_acid_letters = [aa.letter for aa in canonical_amino_acids]
46
+
47
+ ###
48
+ # Post-translation modifications commonly detected by mass-spec
49
+ ###
50
+
51
+ # TODO: figure out three letter codes for modified AAs
52
+
53
+ modified_amino_acids = [
54
+ AminoAcid("Phospho-Serine", "Sep", "s"),
55
+ AminoAcid("Phospho-Threonine", "???", "t"),
56
+ AminoAcid("Phospho-Tyrosine", "???", "y"),
57
+ AminoAcid("Cystine", "???", "c"),
58
+ AminoAcid("Methionine sulfoxide", "???", "m"),
59
+ AminoAcid("Pyroglutamate", "???", "q"),
60
+ AminoAcid("Pyroglutamic acid", "???", "n"),
61
+ ]
62
+
63
+ ###
64
+ # Amino acid tokens which represent multiple canonical amino acids
65
+ ###
66
+ wildcard_amino_acids = [
67
+ AminoAcid("Unknown", "Xaa", "X", contains=set(canonical_amino_acid_letters)),
68
+ AminoAcid("Asparagine-or-Aspartic-Acid", "Asx", "B", contains={"D", "N"}),
69
+ AminoAcid("Glutamine-or-Glutamic-Acid", "Glx", "Z", contains={"E", "Q"}),
70
+ AminoAcid("Leucine-or-Isoleucine", "Xle", "J", contains={"I", "L"})
71
+ ]
72
+
73
+ ###
74
+ # Canonical amino acids + wilcard tokens
75
+ ###
76
+
77
+ canonical_amino_acids_with_unknown = canonical_amino_acids + wildcard_amino_acids
78
+
79
+
80
+ ###
81
+ # Rare amino acids which aren't considered part of the core 20 "canonical"
82
+ ###
83
+
84
+ rare_amino_acids = [
85
+ AminoAcid("Selenocysteine", "Sec", "U"),
86
+ AminoAcid("Pyrrolysine", "Pyl", "O"),
87
+ ]
88
+
89
+ ###
90
+ # Extended amino acids + wildcard tokens
91
+ ###
92
+
93
+ extended_amino_acids = canonical_amino_acids + rare_amino_acids + wildcard_amino_acids
94
+ extended_amino_acid_letters = [
95
+ aa.letter for aa in extended_amino_acids
96
+ ]
97
+ extended_amino_acids_with_unknown_names = [
98
+ aa.full_name for aa in extended_amino_acids
99
+ ]
100
+
101
+
102
+ amino_acid_letter_indices = {
103
+ c: i for (i, c) in
104
+ enumerate(extended_amino_acid_letters)
105
+ }
106
+
107
+
108
+ amino_acid_letter_pairs = [
109
+ "%s%s" % (x, y)
110
+ for y in extended_amino_acids
111
+ for x in extended_amino_acids
112
+ ]
113
+
114
+
115
+ amino_acid_name_indices = {
116
+ aa_name: i for (i, aa_name)
117
+ in enumerate(extended_amino_acids_with_unknown_names)
118
+ }
119
+
120
+ amino_acid_pair_positions = {
121
+ pair: i for (i, pair) in enumerate(amino_acid_letter_pairs)
122
+ }
123
+
124
+ def index_to_full_name(idx):
125
+ return extended_amino_acids[idx].full_name
126
+
127
+ def index_to_short_name(idx):
128
+ return extended_amino_acids[idx].short_name
129
+
130
+ def index_to_letter(idx):
131
+ return extended_amino_acids[idx]
132
+
133
+ def letter_to_index(x):
134
+ """
135
+ Convert from an amino acid's letter code to its position index
136
+ """
137
+ assert x in amino_acid_letter_indices, "Unknown amino acid: %s" % x
138
+ return amino_acid_letter_indices[x]
139
+
140
+ def peptide_to_indices(xs):
141
+ return [amino_acid_letter_indices[x] for x in xs]
142
+
143
+ def letter_to_short_name(x):
144
+ return index_to_short_name(letter_to_index(x))
145
+
146
+ def peptide_to_short_amino_acid_names(xs):
147
+ return [amino_acid_letter_indices[x] for x in xs]
148
+
149
+ def dict_to_amino_acid_matrix(d, alphabet=canonical_amino_acids):
150
+ n_aa = len(d)
151
+ result_matrix = np.zeros((n_aa, n_aa), dtype="float32")
152
+ for i, aa_row in enumerate(alphabet):
153
+ d_row = d[aa_row.letter]
154
+ for j, aa_col in enumerate(alphabet):
155
+ value = d_row[aa_col.letter]
156
+ result_matrix[i, j] = value
157
+ return result_matrix
158
+
@@ -0,0 +1,357 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ from .amino_acid_alphabet import letter_to_index
14
+
15
+ """
16
+ Quantify amino acids by their physical/chemical properties
17
+ """
18
+
19
+
20
+ def aa_dict_to_positional_list(aa_property_dict):
21
+ value_list = [None] * 20
22
+ for letter, value in aa_property_dict.items():
23
+ idx = letter_to_index(letter)
24
+ assert idx >= 0
25
+ assert idx < 20
26
+ value_list[idx] = value
27
+ assert all(elt is not None for elt in value_list), \
28
+ "Missing amino acids in:\n%s" % aa_property_dict.keys()
29
+ return value_list
30
+
31
+ def parse_property_table(table_string):
32
+ value_dict = {}
33
+ for line in table_string.splitlines():
34
+ line = line.strip()
35
+ if not line:
36
+ continue
37
+ fields = line.split(" ")
38
+ fields = [f for f in fields if len(f.strip()) > 0]
39
+ assert len(fields) >= 2
40
+ value, letter = fields[:2]
41
+ assert letter not in value_dict, "Repeated amino acid " + line
42
+ value_dict[letter] = float(value)
43
+ return value_dict
44
+
45
+
46
+ """
47
+ Amino acids property tables copied from CRASP website
48
+ """
49
+
50
+ hydropathy = parse_property_table("""
51
+ 1.80000 A ALA
52
+ -4.5000 R ARG
53
+ -3.5000 N ASN
54
+ -3.5000 D ASP
55
+ 2.50000 C CYS
56
+ -3.5000 Q GLN
57
+ -3.5000 E GLU
58
+ -0.4000 G GLY
59
+ -3.2000 H HIS
60
+ 4.50000 I ILE
61
+ 3.80000 L LEU
62
+ -3.9000 K LYS
63
+ 1.90000 M MET
64
+ 2.80000 F PHE
65
+ -1.6000 P PRO
66
+ -0.8000 S SER
67
+ -0.7000 T THR
68
+ -0.9000 W TRP
69
+ -1.3000 Y TYR
70
+ 4.20000 V VAL
71
+ """)
72
+
73
+ volume = parse_property_table("""
74
+ 91.5000 A ALA
75
+ 202.0000 R ARG
76
+ 135.2000 N ASN
77
+ 124.5000 D ASP
78
+ 118.0000 C CYS
79
+ 161.1000 Q GLN
80
+ 155.1000 E GLU
81
+ 66.40000 G GLY
82
+ 167.3000 H HIS
83
+ 168.8000 I ILE
84
+ 167.9000 L LEU
85
+ 171.3000 K LYS
86
+ 170.8000 M MET
87
+ 203.4000 F PHE
88
+ 129.3000 P PRO
89
+ 99.10000 S SER
90
+ 122.1000 T THR
91
+ 237.6000 W TRP
92
+ 203.6000 Y TYR
93
+ 141.7000 V VAL
94
+ """)
95
+
96
+ polarity = parse_property_table("""
97
+ 0.0000 A ALA
98
+ 52.000 R ARG
99
+ 3.3800 N ASN
100
+ 40.700 D ASP
101
+ 1.4800 C CYS
102
+ 3.5300 Q GLN
103
+ 49.910 E GLU
104
+ 0.0000 G GLY
105
+ 51.600 H HIS
106
+ 0.1500 I ILE
107
+ 0.4500 L LEU
108
+ 49.500 K LYS
109
+ 1.4300 M MET
110
+ 0.3500 F PHE
111
+ 1.5800 P PRO
112
+ 1.6700 S SER
113
+ 1.6600 T THR
114
+ 2.1000 W TRP
115
+ 1.6100 Y TYR
116
+ 0.1300 V VAL
117
+ """)
118
+
119
+ pK_side_chain = parse_property_table("""
120
+ 0.0000 A ALA
121
+ 12.480 R ARG
122
+ 0.0000 N ASN
123
+ 3.6500 D ASP
124
+ 8.1800 C CYS
125
+ 0.0000 Q GLN
126
+ 4.2500 E GLU
127
+ 0.0000 G GLY
128
+ 6.0000 H HIS
129
+ 0.0000 I ILE
130
+ 0.0000 L LEU
131
+ 10.530 K LYS
132
+ 0.0000 M MET
133
+ 0.0000 F PHE
134
+ 0.0000 P PRO
135
+ 0.0000 S SER
136
+ 0.0000 T THR
137
+ 0.0000 W TRP
138
+ 10.700 Y TYR
139
+ 0.0000 V VAL
140
+ """)
141
+
142
+ prct_exposed_residues = parse_property_table("""
143
+ 15.0000 A ALA
144
+ 67.0000 R ARG
145
+ 49.0000 N ASN
146
+ 50.0000 D ASP
147
+ 5.00000 C CYS
148
+ 56.0000 Q GLN
149
+ 55.0000 E GLU
150
+ 10.0000 G GLY
151
+ 34.0000 H HIS
152
+ 13.0000 I ILE
153
+ 16.0000 L LEU
154
+ 85.0000 K LYS
155
+ 20.0000 M MET
156
+ 10.0000 F PHE
157
+ 45.0000 P PRO
158
+ 32.0000 S SER
159
+ 32.0000 T THR
160
+ 17.0000 W TRP
161
+ 41.0000 Y TYR
162
+ 14.0000 V VAL
163
+ """)
164
+
165
+ hydrophilicity = parse_property_table("""
166
+ -0.5000 A ALA
167
+ 3.00000 R ARG
168
+ 0.20000 N ASN
169
+ 3.00000 D ASP
170
+ -1.0000 C CYS
171
+ 0.20000 Q GLN
172
+ 3.00000 E GLU
173
+ 0.00000 G GLY
174
+ -0.5000 H HIS
175
+ -1.8000 I ILE
176
+ -1.8000 L LEU
177
+ 3.00000 K LYS
178
+ -1.3000 M MET
179
+ -2.5000 F PHE
180
+ 0.00000 P PRO
181
+ 0.30000 S SER
182
+ -0.4000 T THR
183
+ -3.4000 W TRP
184
+ -2.3000 Y TYR
185
+ -1.5000 V VAL
186
+ """)
187
+
188
+ accessible_surface_area = parse_property_table("""
189
+ 27.8000 A ALA
190
+ 94.7000 R ARG
191
+ 60.1000 N ASN
192
+ 60.6000 D ASP
193
+ 15.5000 C CYS
194
+ 68.7000 Q GLN
195
+ 68.2000 E GLU
196
+ 24.5000 G GLY
197
+ 50.7000 H HIS
198
+ 22.8000 I ILE
199
+ 27.6000 L LEU
200
+ 103.000 K LYS
201
+ 33.5000 M MET
202
+ 25.5000 F PHE
203
+ 51.5000 P PRO
204
+ 42.0000 S SER
205
+ 45.0000 T THR
206
+ 34.7000 W TRP
207
+ 55.2000 Y TYR
208
+ 23.7000 V VAL
209
+ """)
210
+
211
+ local_flexibility = parse_property_table("""
212
+ 705.42000 A ALA
213
+ 1484.2800 R ARG
214
+ 513.46010 N ASN
215
+ 34.960000 D ASP
216
+ 2412.5601 C CYS
217
+ 1087.8300 Q GLN
218
+ 1158.6600 E GLU
219
+ 33.180000 G GLY
220
+ 1637.1300 H HIS
221
+ 5979.3701 I ILE
222
+ 4985.7300 L LEU
223
+ 699.69000 K LYS
224
+ 4491.6602 M MET
225
+ 5203.8599 F PHE
226
+ 431.96000 P PRO
227
+ 174.76000 S SER
228
+ 601.88000 T THR
229
+ 6374.0698 W TRP
230
+ 4291.1001 Y TYR
231
+ 4474.4199 V VAL
232
+ """)
233
+
234
+ accessible_surface_area_folded = parse_property_table("""
235
+ 31.5000 A ALA
236
+ 93.8000 R ARG
237
+ 62.2000 N ASN
238
+ 60.9000 D ASP
239
+ 13.9000 C CYS
240
+ 74.0000 Q GLN
241
+ 72.3000 E GLU
242
+ 25.2000 G GLY
243
+ 46.7000 H HIS
244
+ 23.0000 I ILE
245
+ 29.0000 L LEU
246
+ 110.300 K LYS
247
+ 30.5000 M MET
248
+ 28.7000 F PHE
249
+ 53.7000 P PRO
250
+ 44.2000 S SER
251
+ 46.0000 T THR
252
+ 41.7000 W TRP
253
+ 59.1000 Y TYR
254
+ 23.5000 V VAL
255
+ """)
256
+
257
+ refractivity = parse_property_table("""
258
+ 4.34000 A ALA
259
+ 26.6600 R ARG
260
+ 13.2800 N ASN
261
+ 12.0000 D ASP
262
+ 35.7700 C CYS
263
+ 17.5600 Q GLN
264
+ 17.2600 E GLU
265
+ 0.00000 G GLY
266
+ 21.8100 H HIS
267
+ 19.0600 I ILE
268
+ 18.7800 L LEU
269
+ 21.2900 K LYS
270
+ 21.6400 M MET
271
+ 29.4000 F PHE
272
+ 10.9300 P PRO
273
+ 6.35000 S SER
274
+ 11.0100 T THR
275
+ 42.5300 W TRP
276
+ 31.5300 Y TYR
277
+ 13.9200 V VAL
278
+ """)
279
+
280
+
281
+ mass = parse_property_table("""
282
+ 70.079 A ALA
283
+ 156.188 R ARG
284
+ 114.104 N ASN
285
+ 115.089 D ASP
286
+ 103.144 C CYS
287
+ 128.131 Q GLN
288
+ 129.116 E GLU
289
+ 57.052 G GLY
290
+ 137.142 H HIS
291
+ 113.160 I ILE
292
+ 113.160 L LEU
293
+ 128.174 K LYS
294
+ 131.198 M MET
295
+ 147.177 F PHE
296
+ 97.177 P PRO
297
+ 87.078 S SER
298
+ 101.105 T THR
299
+ 186.213 W TRP
300
+ 163.170 Y TYR
301
+ 99.133 V VAL
302
+ """)
303
+
304
+ ###
305
+ # Values copied from:
306
+ # "Solvent accessibility of AA in known protein structures"
307
+ # http://prowl.rockefeller.edu/aainfo/access.htm
308
+ ###
309
+ """
310
+ Solvent accessibility of AA in known protein structures
311
+
312
+ Figure 1.
313
+
314
+ S 0.70 0.20 0.10
315
+ T 0.71 0.16 0.13
316
+ A 0.48 0.35 0.17
317
+ G 0.51 0.36 0.13
318
+ P 0.78 0.13 0.09
319
+ C 0.32 0.54 0.14
320
+ D 0.81 0.09 0.10
321
+ E 0.93 0.04 0.03
322
+ Q 0.81 0.10 0.09
323
+ N 0.82 0.10 0.08
324
+ L 0.41 0.49 0.10
325
+ I 0.39 0.47 0.14
326
+ V 0.40 0.50 0.10
327
+ M 0.44 0.20 0.36
328
+ F 0.42 0.42 0.16
329
+ Y 0.67 0.20 0.13
330
+ W 0.49 0.44 0.07
331
+ K 0.93 0.02 0.05
332
+ R 0.84 0.05 0.11
333
+ H 0.66 0.19 0.15
334
+ """
335
+
336
+ solvent_exposed_area = dict(
337
+ S=0.70,
338
+ T=0.71,
339
+ A=0.48,
340
+ G=0.51,
341
+ P=0.78,
342
+ C=0.32,
343
+ D=0.81,
344
+ E=0.93,
345
+ Q=0.81,
346
+ N=0.82,
347
+ L=0.41,
348
+ I=0.39,
349
+ V=0.40,
350
+ M=0.44,
351
+ F=0.42,
352
+ Y=0.67,
353
+ W=0.49,
354
+ K=0.93,
355
+ R=0.84,
356
+ H=0.66,
357
+ )
pepdata/blosum.py ADDED
@@ -0,0 +1,73 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ from os.path import join
14
+
15
+ from .static_data import MATRIX_DIR
16
+
17
+ from .amino_acid_alphabet import dict_to_amino_acid_matrix
18
+
19
+ def parse_blosum_table(table, coeff_type=int, key_type='row'):
20
+ """
21
+ Parse a table of pairwise amino acid coefficient (e.g. BLOSUM50)
22
+ """
23
+
24
+ lines = table.split("\n")
25
+ # drop comments
26
+ lines = [line for line in lines if not line.startswith("#")]
27
+ # drop CR endline characters
28
+ lines = [line.replace("\r", "") for line in lines]
29
+ # skip empty lines
30
+ lines = [line for line in lines if line]
31
+
32
+ labels = lines[0].split()
33
+
34
+ if len(labels) < 20:
35
+ raise ValueError(
36
+ "Expected 20+ amino acids but first line '%s' has %d fields" % (
37
+ lines[0],
38
+ len(labels)))
39
+ coeffs = {}
40
+ for line in lines[1:]:
41
+
42
+ fields = line.split()
43
+ assert len(fields) >= 21, \
44
+ "Expected AA and 20+ coefficients but '%s' has %d fields" % (
45
+ line, len(fields))
46
+ x = fields[0]
47
+ for i, coeff_str in enumerate(fields[1:]):
48
+ y = labels[i]
49
+ coeff = coeff_type(coeff_str)
50
+ if key_type == 'pair':
51
+ coeffs[(x, y)] = coeff
52
+ elif key_type == 'pair_string':
53
+ coeffs[x + y] = coeff
54
+ else:
55
+ assert key_type == 'row', "Unknown key type: %s" % key_type
56
+ if x not in coeffs:
57
+ coeffs[x] = {}
58
+ coeffs[x][y] = coeff
59
+ return coeffs
60
+
61
+
62
+ with open(join(MATRIX_DIR, 'BLOSUM30'), 'r') as f:
63
+ blosum30_dict = parse_blosum_table(f.read())
64
+ blosum30_matrix = dict_to_amino_acid_matrix(blosum30_dict)
65
+
66
+ with open(join(MATRIX_DIR, 'BLOSUM50'), 'r') as f:
67
+ blosum50_dict = parse_blosum_table(f.read())
68
+ blosum50_matrix = dict_to_amino_acid_matrix(blosum50_dict)
69
+
70
+ with open(join(MATRIX_DIR, 'BLOSUM62'), 'r') as f:
71
+ blosum62_dict = parse_blosum_table(f.read())
72
+ blosum62_matrix = dict_to_amino_acid_matrix(blosum62_dict)
73
+