pyseqalignment 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyseqalign/__init__.py +14 -0
- pyseqalign/core/__init__.py +12 -0
- pyseqalign/core/alignment.py +67 -0
- pyseqalign/core/needleman_wunsch.py +122 -0
- pyseqalign/core/smith_waterman.py +173 -0
- pyseqalign/learning/__init__.py +20 -0
- pyseqalign/learning/aleph.py +212 -0
- pyseqalign/learning/aleph_files/__init__.py +0 -0
- pyseqalign/learning/aleph_files/aleph_swi_ak.pl +10420 -0
- pyseqalign/learning/base.py +68 -0
- pyseqalign/learning/popper.py +215 -0
- pyseqalign/learning/task_builder.py +213 -0
- pyseqalign/prolog/__init__.py +5 -0
- pyseqalign/prolog/engine.py +102 -0
- pyseqalign/prolog/knowledge/__init__.py +0 -0
- pyseqalign/prolog/knowledge/amino_acids.pl +53 -0
- pyseqalign/prolog/knowledge/blosum50.pl +800 -0
- pyseqalign/prolog/knowledge/defaults.pl +15 -0
- pyseqalign/prolog/knowledge/distances.pl +119 -0
- pyseqalign/scoring/__init__.py +11 -0
- pyseqalign/scoring/distance.py +100 -0
- pyseqalign/scoring/matrices.py +362 -0
- pyseqalign/scoring/matrix_data/BLOSUM100 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM50 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM60 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM62 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM70 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM80 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM90 +31 -0
- pyseqalign/scoring/matrix_data/PAM150 +34 -0
- pyseqalign/scoring/matrix_data/PAM200 +34 -0
- pyseqalign/scoring/matrix_data/PAM250 +34 -0
- pyseqalign/scoring/matrix_data/PAM50 +34 -0
- pyseqalign/scoring/matrix_data/__init__.py +0 -0
- pyseqalign/utils/__init__.py +9 -0
- pyseqalign/utils/helpers.py +47 -0
- pyseqalignment-0.1.0.dist-info/METADATA +317 -0
- pyseqalignment-0.1.0.dist-info/RECORD +41 -0
- pyseqalignment-0.1.0.dist-info/WHEEL +5 -0
- pyseqalignment-0.1.0.dist-info/licenses/LICENSE +21 -0
- pyseqalignment-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Matrix made by matblas from blosum60.iij
|
|
2
|
+
# * column uses minimum score
|
|
3
|
+
# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
|
|
4
|
+
# Blocks Database = /data/blocks_5.0/blocks.dat
|
|
5
|
+
# Cluster Percentage: >= 60
|
|
6
|
+
# Entropy = 0.6603, Expected = -0.4917
|
|
7
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
8
|
+
A 4 -1 -1 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
|
|
9
|
+
R -1 5 0 -1 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -2 -1 0 -1 -4
|
|
10
|
+
N -1 0 6 1 -2 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
|
|
11
|
+
D -2 -1 1 6 -3 0 2 -1 -1 -3 -3 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
|
|
12
|
+
C 0 -3 -2 -3 9 -3 -3 -2 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
|
|
13
|
+
Q -1 1 0 0 -3 5 2 -2 1 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
|
|
14
|
+
E -1 0 0 2 -3 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
|
|
15
|
+
G 0 -2 0 -1 -2 -2 -2 6 -2 -3 -4 -1 -2 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
|
|
16
|
+
H -2 0 1 -1 -3 1 0 -2 7 -3 -3 -1 -1 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
|
|
17
|
+
I -1 -3 -3 -3 -1 -3 -3 -3 -3 4 2 -3 1 0 -3 -2 -1 -2 -1 3 -3 -3 -1 -4
|
|
18
|
+
L -1 -2 -3 -3 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -3 -2 -1 -4
|
|
19
|
+
K -1 2 0 -1 -3 1 1 -1 -1 -3 -2 4 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
|
|
20
|
+
M -1 -1 -2 -3 -1 0 -2 -2 -1 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
|
|
21
|
+
F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
|
|
22
|
+
P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
|
|
23
|
+
S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
|
|
24
|
+
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 4 -2 -2 0 0 -1 0 -4
|
|
25
|
+
W -3 -3 -4 -4 -2 -2 -3 -2 -2 -2 -2 -3 -1 1 -4 -3 -2 10 2 -3 -4 -2 -2 -4
|
|
26
|
+
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 6 -1 -2 -2 -1 -4
|
|
27
|
+
V 0 -2 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
|
|
28
|
+
B -2 -1 3 4 -3 0 1 -1 0 -3 -3 0 -3 -3 -2 0 0 -4 -2 -3 4 1 -1 -4
|
|
29
|
+
Z -1 0 0 1 -3 3 4 -2 0 -3 -2 1 -1 -3 -1 0 -1 -2 -2 -2 1 3 -1 -4
|
|
30
|
+
X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
|
|
31
|
+
* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Matrix made by matblas from blosum62.iij
|
|
2
|
+
# * column uses minimum score
|
|
3
|
+
# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
|
|
4
|
+
# Blocks Database = /data/blocks_5.0/blocks.dat
|
|
5
|
+
# Cluster Percentage: >= 62
|
|
6
|
+
# Entropy = 0.6979, Expected = -0.5209
|
|
7
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
8
|
+
A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
|
|
9
|
+
R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
|
|
10
|
+
N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
|
|
11
|
+
D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
|
|
12
|
+
C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
|
|
13
|
+
Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
|
|
14
|
+
E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
|
|
15
|
+
G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
|
|
16
|
+
H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
|
|
17
|
+
I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
|
|
18
|
+
L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
|
|
19
|
+
K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
|
|
20
|
+
M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
|
|
21
|
+
F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
|
|
22
|
+
P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
|
|
23
|
+
S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
|
|
24
|
+
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
|
|
25
|
+
W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
|
|
26
|
+
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
|
|
27
|
+
V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
|
|
28
|
+
B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
|
|
29
|
+
Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
|
|
30
|
+
X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
|
|
31
|
+
* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Matrix made by matblas from blosum70.iij
|
|
2
|
+
# * column uses minimum score
|
|
3
|
+
# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
|
|
4
|
+
# Blocks Database = /data/blocks_5.0/blocks.dat
|
|
5
|
+
# Cluster Percentage: >= 70
|
|
6
|
+
# Entropy = 0.8391, Expected = -0.6313
|
|
7
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
8
|
+
A 4 -2 -2 -2 -1 -1 -1 0 -2 -2 -2 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 -1 -5
|
|
9
|
+
R -2 6 -1 -2 -4 1 0 -3 0 -3 -3 2 -2 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -5
|
|
10
|
+
N -2 -1 6 1 -3 0 0 -1 0 -4 -4 0 -2 -3 -2 0 0 -4 -2 -3 3 0 -1 -5
|
|
11
|
+
D -2 -2 1 6 -4 -1 1 -2 -1 -4 -4 -1 -3 -4 -2 0 -1 -5 -4 -4 4 1 -2 -5
|
|
12
|
+
C -1 -4 -3 -4 9 -3 -4 -3 -4 -1 -2 -4 -2 -2 -3 -1 -1 -3 -3 -1 -4 -4 -2 -5
|
|
13
|
+
Q -1 1 0 -1 -3 6 2 -2 1 -3 -2 1 0 -3 -2 0 -1 -2 -2 -2 0 3 -1 -5
|
|
14
|
+
E -1 0 0 1 -4 2 5 -2 0 -4 -3 1 -2 -4 -1 0 -1 -4 -3 -3 1 4 -1 -5
|
|
15
|
+
G 0 -3 -1 -2 -3 -2 -2 6 -2 -4 -4 -2 -3 -4 -3 -1 -2 -3 -4 -4 -1 -2 -2 -5
|
|
16
|
+
H -2 0 0 -1 -4 1 0 -2 8 -4 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 -1 0 -1 -5
|
|
17
|
+
I -2 -3 -4 -4 -1 -3 -4 -4 -4 4 2 -3 1 0 -3 -3 -1 -3 -1 3 -4 -3 -1 -5
|
|
18
|
+
L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 4 -3 2 0 -3 -3 -2 -2 -1 1 -4 -3 -1 -5
|
|
19
|
+
K -1 2 0 -1 -4 1 1 -2 -1 -3 -3 5 -2 -3 -1 0 -1 -3 -2 -3 -1 1 -1 -5
|
|
20
|
+
M -1 -2 -2 -3 -2 0 -2 -3 -2 1 2 -2 6 0 -3 -2 -1 -2 -1 1 -3 -2 -1 -5
|
|
21
|
+
F -2 -3 -3 -4 -2 -3 -4 -4 -1 0 0 -3 0 6 -4 -3 -2 1 3 -1 -4 -4 -2 -5
|
|
22
|
+
P -1 -2 -2 -2 -3 -2 -1 -3 -2 -3 -3 -1 -3 -4 8 -1 -1 -4 -3 -3 -2 -1 -2 -5
|
|
23
|
+
S 1 -1 0 0 -1 0 0 -1 -1 -3 -3 0 -2 -3 -1 4 1 -3 -2 -2 0 0 -1 -5
|
|
24
|
+
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -2 -1 -1 -2 -1 1 5 -3 -2 0 -1 -1 -1 -5
|
|
25
|
+
W -3 -3 -4 -5 -3 -2 -4 -3 -2 -3 -2 -3 -2 1 -4 -3 -3 11 2 -3 -4 -3 -3 -5
|
|
26
|
+
Y -2 -2 -2 -4 -3 -2 -3 -4 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -2 -3 -2 -2 -5
|
|
27
|
+
V 0 -3 -3 -4 -1 -2 -3 -4 -3 3 1 -3 1 -1 -3 -2 0 -3 -2 4 -3 -3 -1 -5
|
|
28
|
+
B -2 -1 3 4 -4 0 1 -1 -1 -4 -4 -1 -3 -4 -2 0 -1 -4 -3 -3 4 0 -1 -5
|
|
29
|
+
Z -1 0 0 1 -4 3 4 -2 0 -3 -3 1 -2 -4 -1 0 -1 -3 -2 -3 0 4 -1 -5
|
|
30
|
+
X -1 -1 -1 -2 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1 -1 -3 -2 -1 -1 -1 -1 -5
|
|
31
|
+
* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Matrix made by matblas from blosum80_3.iij
|
|
2
|
+
# * column uses minimum score
|
|
3
|
+
# BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
|
|
4
|
+
# Blocks Database = /data/blocks_5.0/blocks.dat
|
|
5
|
+
# Cluster Percentage: >= 80
|
|
6
|
+
# Entropy = 0.9868, Expected = -0.7442
|
|
7
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
8
|
+
A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1 -8
|
|
9
|
+
R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2 -8
|
|
10
|
+
N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2 -8
|
|
11
|
+
D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3 -8
|
|
12
|
+
C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -8
|
|
13
|
+
Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2 -8
|
|
14
|
+
E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2 -8
|
|
15
|
+
G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -8
|
|
16
|
+
H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2 -8
|
|
17
|
+
I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2 -8
|
|
18
|
+
L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2 -8
|
|
19
|
+
K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2 -8
|
|
20
|
+
M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2 -8
|
|
21
|
+
F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3 -8
|
|
22
|
+
P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -8
|
|
23
|
+
S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1 -8
|
|
24
|
+
T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1 -8
|
|
25
|
+
W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5 -8
|
|
26
|
+
Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3 -8
|
|
27
|
+
V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2 -8
|
|
28
|
+
B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3 -8
|
|
29
|
+
Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1 -8
|
|
30
|
+
X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -8
|
|
31
|
+
* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Matrix made by matblas from blosum90.iij
|
|
2
|
+
# * column uses minimum score
|
|
3
|
+
# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
|
|
4
|
+
# Blocks Database = /data/blocks_5.0/blocks.dat
|
|
5
|
+
# Cluster Percentage: >= 90
|
|
6
|
+
# Entropy = 1.1806, Expected = -0.8887
|
|
7
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
8
|
+
A 5 -2 -2 -3 -1 -1 -1 0 -2 -2 -2 -1 -2 -3 -1 1 0 -4 -3 -1 -2 -1 -1 -6
|
|
9
|
+
R -2 6 -1 -3 -5 1 -1 -3 0 -4 -3 2 -2 -4 -3 -1 -2 -4 -3 -3 -2 0 -2 -6
|
|
10
|
+
N -2 -1 7 1 -4 0 -1 -1 0 -4 -4 0 -3 -4 -3 0 0 -5 -3 -4 4 -1 -2 -6
|
|
11
|
+
D -3 -3 1 7 -5 -1 1 -2 -2 -5 -5 -1 -4 -5 -3 -1 -2 -6 -4 -5 4 0 -2 -6
|
|
12
|
+
C -1 -5 -4 -5 9 -4 -6 -4 -5 -2 -2 -4 -2 -3 -4 -2 -2 -4 -4 -2 -4 -5 -3 -6
|
|
13
|
+
Q -1 1 0 -1 -4 7 2 -3 1 -4 -3 1 0 -4 -2 -1 -1 -3 -3 -3 -1 4 -1 -6
|
|
14
|
+
E -1 -1 -1 1 -6 2 6 -3 -1 -4 -4 0 -3 -5 -2 -1 -1 -5 -4 -3 0 4 -2 -6
|
|
15
|
+
G 0 -3 -1 -2 -4 -3 -3 6 -3 -5 -5 -2 -4 -5 -3 -1 -3 -4 -5 -5 -2 -3 -2 -6
|
|
16
|
+
H -2 0 0 -2 -5 1 -1 -3 8 -4 -4 -1 -3 -2 -3 -2 -2 -3 1 -4 -1 0 -2 -6
|
|
17
|
+
I -2 -4 -4 -5 -2 -4 -4 -5 -4 5 1 -4 1 -1 -4 -3 -1 -4 -2 3 -5 -4 -2 -6
|
|
18
|
+
L -2 -3 -4 -5 -2 -3 -4 -5 -4 1 5 -3 2 0 -4 -3 -2 -3 -2 0 -5 -4 -2 -6
|
|
19
|
+
K -1 2 0 -1 -4 1 0 -2 -1 -4 -3 6 -2 -4 -2 -1 -1 -5 -3 -3 -1 1 -1 -6
|
|
20
|
+
M -2 -2 -3 -4 -2 0 -3 -4 -3 1 2 -2 7 -1 -3 -2 -1 -2 -2 0 -4 -2 -1 -6
|
|
21
|
+
F -3 -4 -4 -5 -3 -4 -5 -5 -2 -1 0 -4 -1 7 -4 -3 -3 0 3 -2 -4 -4 -2 -6
|
|
22
|
+
P -1 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -3 -4 8 -2 -2 -5 -4 -3 -3 -2 -2 -6
|
|
23
|
+
S 1 -1 0 -1 -2 -1 -1 -1 -2 -3 -3 -1 -2 -3 -2 5 1 -4 -3 -2 0 -1 -1 -6
|
|
24
|
+
T 0 -2 0 -2 -2 -1 -1 -3 -2 -1 -2 -1 -1 -3 -2 1 6 -4 -2 -1 -1 -1 -1 -6
|
|
25
|
+
W -4 -4 -5 -6 -4 -3 -5 -4 -3 -4 -3 -5 -2 0 -5 -4 -4 11 2 -3 -6 -4 -3 -6
|
|
26
|
+
Y -3 -3 -3 -4 -4 -3 -4 -5 1 -2 -2 -3 -2 3 -4 -3 -2 2 8 -3 -4 -3 -2 -6
|
|
27
|
+
V -1 -3 -4 -5 -2 -3 -3 -5 -4 3 0 -3 0 -2 -3 -2 -1 -3 -3 5 -4 -3 -2 -6
|
|
28
|
+
B -2 -2 4 4 -4 -1 0 -2 -1 -5 -5 -1 -4 -4 -3 0 -1 -6 -4 -4 4 0 -2 -6
|
|
29
|
+
Z -1 0 -1 0 -5 4 4 -3 0 -4 -4 1 -2 -4 -2 -1 -1 -4 -3 -3 0 4 -1 -6
|
|
30
|
+
X -1 -2 -2 -2 -3 -1 -2 -2 -2 -2 -2 -1 -1 -2 -2 -1 -1 -3 -2 -2 -2 -1 -2 -6
|
|
31
|
+
* -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 1
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#
|
|
2
|
+
# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
|
|
3
|
+
#
|
|
4
|
+
# PAM 150 substitution matrix, scale = ln(2)/2 = 0.346574
|
|
5
|
+
#
|
|
6
|
+
# Expected score = -1.25, Entropy = 0.754 bits
|
|
7
|
+
#
|
|
8
|
+
# Lowest score = -7, Highest score = 12
|
|
9
|
+
#
|
|
10
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
11
|
+
A 3 -2 0 0 -2 -1 0 1 -2 -1 -2 -2 -1 -4 1 1 1 -6 -3 0 0 0 -1 -7
|
|
12
|
+
R -2 6 -1 -2 -4 1 -2 -3 1 -2 -3 3 -1 -4 -1 -1 -2 1 -4 -3 -2 0 -1 -7
|
|
13
|
+
N 0 -1 3 2 -4 0 1 0 2 -2 -3 1 -2 -4 -1 1 0 -4 -2 -2 3 1 -1 -7
|
|
14
|
+
D 0 -2 2 4 -6 1 3 0 0 -3 -5 -1 -3 -6 -2 0 -1 -7 -4 -3 3 2 -1 -7
|
|
15
|
+
C -2 -4 -4 -6 9 -6 -6 -4 -3 -2 -6 -6 -5 -5 -3 0 -3 -7 0 -2 -5 -6 -3 -7
|
|
16
|
+
Q -1 1 0 1 -6 5 2 -2 3 -3 -2 0 -1 -5 0 -1 -1 -5 -4 -2 1 4 -1 -7
|
|
17
|
+
E 0 -2 1 3 -6 2 4 -1 0 -2 -4 -1 -2 -6 -1 -1 -1 -7 -4 -2 2 4 -1 -7
|
|
18
|
+
G 1 -3 0 0 -4 -2 -1 4 -3 -3 -4 -2 -3 -5 -1 1 -1 -7 -5 -2 0 -1 -1 -7
|
|
19
|
+
H -2 1 2 0 -3 3 0 -3 6 -3 -2 -1 -3 -2 -1 -1 -2 -3 0 -3 1 1 -1 -7
|
|
20
|
+
I -1 -2 -2 -3 -2 -3 -2 -3 -3 5 1 -2 2 0 -3 -2 0 -5 -2 3 -2 -2 -1 -7
|
|
21
|
+
L -2 -3 -3 -5 -6 -2 -4 -4 -2 1 5 -3 3 1 -3 -3 -2 -2 -2 1 -4 -3 -2 -7
|
|
22
|
+
K -2 3 1 -1 -6 0 -1 -2 -1 -2 -3 4 0 -6 -2 -1 0 -4 -4 -3 0 0 -1 -7
|
|
23
|
+
M -1 -1 -2 -3 -5 -1 -2 -3 -3 2 3 0 7 -1 -3 -2 -1 -5 -3 1 -3 -2 -1 -7
|
|
24
|
+
F -4 -4 -4 -6 -5 -5 -6 -5 -2 0 1 -6 -1 7 -5 -3 -3 -1 5 -2 -5 -5 -3 -7
|
|
25
|
+
P 1 -1 -1 -2 -3 0 -1 -1 -1 -3 -3 -2 -3 -5 6 1 0 -6 -5 -2 -2 -1 -1 -7
|
|
26
|
+
S 1 -1 1 0 0 -1 -1 1 -1 -2 -3 -1 -2 -3 1 2 1 -2 -3 -1 0 -1 0 -7
|
|
27
|
+
T 1 -2 0 -1 -3 -1 -1 -1 -2 0 -2 0 -1 -3 0 1 4 -5 -3 0 0 -1 -1 -7
|
|
28
|
+
W -6 1 -4 -7 -7 -5 -7 -7 -3 -5 -2 -4 -5 -1 -6 -2 -5 12 -1 -6 -5 -6 -4 -7
|
|
29
|
+
Y -3 -4 -2 -4 0 -4 -4 -5 0 -2 -2 -4 -3 5 -5 -3 -3 -1 8 -3 -3 -4 -3 -7
|
|
30
|
+
V 0 -3 -2 -3 -2 -2 -2 -2 -3 3 1 -3 1 -2 -2 -1 0 -6 -3 4 -2 -2 -1 -7
|
|
31
|
+
B 0 -2 3 3 -5 1 2 0 1 -2 -4 0 -3 -5 -2 0 0 -5 -3 -2 3 2 -1 -7
|
|
32
|
+
Z 0 0 1 2 -6 4 4 -1 1 -2 -3 0 -2 -5 -1 -1 -1 -6 -4 -2 2 4 -1 -7
|
|
33
|
+
X -1 -1 -1 -1 -3 -1 -1 -1 -1 -1 -2 -1 -1 -3 -1 0 -1 -4 -3 -1 -1 -1 -1 -7
|
|
34
|
+
* -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 1
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#
|
|
2
|
+
# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
|
|
3
|
+
#
|
|
4
|
+
# PAM 200 substitution matrix, scale = ln(2)/3 = 0.231049
|
|
5
|
+
#
|
|
6
|
+
# Expected score = -1.23, Entropy = 0.507 bits
|
|
7
|
+
#
|
|
8
|
+
# Lowest score = -9, Highest score = 18
|
|
9
|
+
#
|
|
10
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
11
|
+
A 3 -2 0 0 -3 -1 0 1 -2 -1 -2 -2 -2 -4 1 1 1 -7 -4 0 0 0 0 -9
|
|
12
|
+
R -2 7 0 -2 -4 1 -2 -4 2 -2 -4 4 -1 -5 0 -1 -1 2 -5 -3 -1 0 -1 -9
|
|
13
|
+
N 0 0 3 3 -5 1 2 0 2 -2 -4 1 -2 -4 -1 1 0 -5 -2 -2 3 1 0 -9
|
|
14
|
+
D 0 -2 3 5 -6 2 4 0 0 -3 -5 0 -4 -7 -2 0 0 -8 -5 -3 4 3 -1 -9
|
|
15
|
+
C -3 -4 -5 -6 12 -7 -7 -4 -4 -3 -7 -7 -6 -6 -4 0 -3 -9 0 -2 -5 -7 -4 -9
|
|
16
|
+
Q -1 1 1 2 -7 5 3 -2 3 -3 -2 1 -1 -6 0 -1 -1 -6 -5 -3 1 4 -1 -9
|
|
17
|
+
E 0 -2 2 4 -7 3 5 0 0 -3 -4 0 -3 -7 -1 0 -1 -9 -5 -2 3 4 -1 -9
|
|
18
|
+
G 1 -4 0 0 -4 -2 0 6 -3 -3 -5 -2 -4 -6 -1 1 0 -8 -6 -2 0 -1 -1 -9
|
|
19
|
+
H -2 2 2 0 -4 3 0 -3 8 -3 -3 -1 -3 -2 -1 -1 -2 -3 0 -3 1 2 -1 -9
|
|
20
|
+
I -1 -2 -2 -3 -3 -3 -3 -3 -3 6 2 -2 2 1 -3 -2 0 -6 -2 4 -3 -3 -1 -9
|
|
21
|
+
L -2 -4 -4 -5 -7 -2 -4 -5 -3 2 7 -4 4 2 -3 -4 -2 -2 -2 2 -4 -3 -2 -9
|
|
22
|
+
K -2 4 1 0 -7 1 0 -2 -1 -2 -4 6 1 -7 -2 0 0 -4 -5 -3 0 0 -1 -9
|
|
23
|
+
M -2 -1 -2 -4 -6 -1 -3 -4 -3 2 4 1 8 0 -3 -2 -1 -5 -3 2 -3 -2 -1 -9
|
|
24
|
+
F -4 -5 -4 -7 -6 -6 -7 -6 -2 1 2 -7 0 10 -6 -4 -4 0 7 -2 -6 -6 -3 -9
|
|
25
|
+
P 1 0 -1 -2 -4 0 -1 -1 -1 -3 -3 -2 -3 -6 7 1 0 -7 -6 -2 -1 -1 -1 -9
|
|
26
|
+
S 1 -1 1 0 0 -1 0 1 -1 -2 -4 0 -2 -4 1 2 2 -3 -3 -1 1 -1 0 -9
|
|
27
|
+
T 1 -1 0 0 -3 -1 -1 0 -2 0 -2 0 -1 -4 0 2 4 -6 -3 0 0 -1 0 -9
|
|
28
|
+
W -7 2 -5 -8 -9 -6 -9 -8 -3 -6 -2 -4 -5 0 -7 -3 -6 18 -1 -8 -6 -7 -5 -9
|
|
29
|
+
Y -4 -5 -2 -5 0 -5 -5 -6 0 -2 -2 -5 -3 7 -6 -3 -3 -1 11 -3 -4 -5 -3 -9
|
|
30
|
+
V 0 -3 -2 -3 -2 -3 -2 -2 -3 4 2 -3 2 -2 -2 -1 0 -8 -3 5 -3 -2 -1 -9
|
|
31
|
+
B 0 -1 3 4 -5 1 3 0 1 -3 -4 0 -3 -6 -1 1 0 -6 -4 -3 3 2 -1 -9
|
|
32
|
+
Z 0 0 1 3 -7 4 4 -1 2 -3 -3 0 -2 -6 -1 -1 -1 -7 -5 -2 2 4 -1 -9
|
|
33
|
+
X 0 -1 0 -1 -4 -1 -1 -1 -1 -1 -2 -1 -1 -3 -1 0 0 -5 -3 -1 -1 -1 -1 -9
|
|
34
|
+
* -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 1
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#
|
|
2
|
+
# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
|
|
3
|
+
#
|
|
4
|
+
# PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
|
|
5
|
+
#
|
|
6
|
+
# Expected score = -0.844, Entropy = 0.354 bits
|
|
7
|
+
#
|
|
8
|
+
# Lowest score = -8, Highest score = 17
|
|
9
|
+
#
|
|
10
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
11
|
+
A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8
|
|
12
|
+
R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8
|
|
13
|
+
N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8
|
|
14
|
+
D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1 -8
|
|
15
|
+
C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3 -8
|
|
16
|
+
Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1 -8
|
|
17
|
+
E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1 -8
|
|
18
|
+
G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1 -8
|
|
19
|
+
H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1 -8
|
|
20
|
+
I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1 -8
|
|
21
|
+
L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1 -8
|
|
22
|
+
K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1 -8
|
|
23
|
+
M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1 -8
|
|
24
|
+
F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2 -8
|
|
25
|
+
P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1 -8
|
|
26
|
+
S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0 -8
|
|
27
|
+
T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0 -8
|
|
28
|
+
W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4 -8
|
|
29
|
+
Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2 -8
|
|
30
|
+
V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1 -8
|
|
31
|
+
B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1 -8
|
|
32
|
+
Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1 -8
|
|
33
|
+
X 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1 -8
|
|
34
|
+
* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#
|
|
2
|
+
# This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
|
|
3
|
+
#
|
|
4
|
+
# PAM 50 substitution matrix, scale = ln(2)/2 = 0.346574
|
|
5
|
+
#
|
|
6
|
+
# Expected score = -3.70, Entropy = 2.00 bits
|
|
7
|
+
#
|
|
8
|
+
# Lowest score = -13, Highest score = 13
|
|
9
|
+
#
|
|
10
|
+
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
|
11
|
+
A 5 -5 -2 -2 -5 -3 -1 -1 -5 -3 -5 -5 -4 -7 0 0 0 -11 -6 -1 -2 -2 -2 -13
|
|
12
|
+
R -5 8 -4 -7 -6 0 -7 -7 0 -4 -7 1 -3 -8 -3 -2 -5 -1 -8 -6 -5 -2 -4 -13
|
|
13
|
+
N -2 -4 7 2 -8 -2 -1 -2 1 -4 -6 0 -6 -7 -4 1 -1 -7 -3 -6 5 -1 -2 -13
|
|
14
|
+
D -2 -7 2 7 -11 -1 3 -2 -2 -6 -10 -3 -8 -12 -6 -2 -3 -12 -9 -6 6 2 -4 -13
|
|
15
|
+
C -5 -6 -8 -11 9 -11 -11 -7 -6 -5 -12 -11 -11 -10 -6 -2 -6 -13 -3 -5 -9 -11 -7 -13
|
|
16
|
+
Q -3 0 -2 -1 -11 8 2 -5 2 -6 -4 -2 -3 -10 -2 -4 -4 -10 -9 -5 -2 6 -3 -13
|
|
17
|
+
E -1 -7 -1 3 -11 2 7 -3 -3 -4 -7 -3 -5 -11 -4 -3 -4 -13 -7 -5 2 6 -3 -13
|
|
18
|
+
G -1 -7 -2 -2 -7 -5 -3 6 -7 -8 -9 -6 -7 -8 -4 -1 -4 -12 -11 -4 -2 -4 -4 -13
|
|
19
|
+
H -5 0 1 -2 -6 2 -3 -7 9 -7 -5 -4 -8 -5 -3 -4 -5 -6 -2 -5 0 0 -4 -13
|
|
20
|
+
I -3 -4 -4 -6 -5 -6 -4 -8 -7 8 0 -5 0 -1 -7 -5 -1 -11 -5 3 -5 -5 -3 -13
|
|
21
|
+
L -5 -7 -6 -10 -12 -4 -7 -9 -5 0 6 -6 2 -1 -6 -7 -5 -5 -5 -1 -7 -5 -5 -13
|
|
22
|
+
K -5 1 0 -3 -11 -2 -3 -6 -4 -5 -6 6 -1 -11 -5 -3 -2 -9 -8 -7 -1 -2 -4 -13
|
|
23
|
+
M -4 -3 -6 -8 -11 -3 -5 -7 -8 0 2 -1 10 -3 -6 -4 -3 -10 -8 0 -7 -4 -4 -13
|
|
24
|
+
F -7 -8 -7 -12 -10 -10 -11 -8 -5 -1 -1 -11 -3 9 -8 -5 -7 -3 3 -6 -9 -11 -6 -13
|
|
25
|
+
P 0 -3 -4 -6 -6 -2 -4 -4 -3 -7 -6 -5 -6 -8 8 -1 -3 -11 -11 -4 -5 -3 -4 -13
|
|
26
|
+
S 0 -2 1 -2 -2 -4 -3 -1 -4 -5 -7 -3 -4 -5 -1 6 1 -4 -5 -4 -1 -3 -2 -13
|
|
27
|
+
T 0 -5 -1 -3 -6 -4 -4 -4 -5 -1 -5 -2 -3 -7 -3 1 6 -10 -5 -2 -2 -4 -2 -13
|
|
28
|
+
W -11 -1 -7 -12 -13 -10 -13 -12 -6 -11 -5 -9 -10 -3 -11 -4 -10 13 -4 -12 -8 -11 -9 -13
|
|
29
|
+
Y -6 -8 -3 -9 -3 -9 -7 -11 -2 -5 -5 -8 -8 3 -11 -5 -5 -4 9 -6 -5 -8 -6 -13
|
|
30
|
+
V -1 -6 -6 -6 -5 -5 -5 -4 -5 3 -1 -7 0 -6 -4 -4 -2 -12 -6 7 -6 -5 -3 -13
|
|
31
|
+
B -2 -5 5 6 -9 -2 2 -2 0 -5 -7 -1 -7 -9 -5 -1 -2 -8 -5 -6 5 1 -3 -13
|
|
32
|
+
Z -2 -2 -1 2 -11 6 6 -4 0 -5 -5 -2 -4 -11 -3 -3 -4 -11 -8 -5 1 6 -3 -13
|
|
33
|
+
X -2 -4 -2 -4 -7 -3 -3 -4 -4 -3 -5 -4 -4 -6 -4 -2 -2 -9 -6 -3 -3 -3 -4 -13
|
|
34
|
+
* -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 1
|
|
File without changes
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Helper utilities for sequence encoding and decoding.
|
|
2
|
+
|
|
3
|
+
Ported from the legacy aminoAcids.pl mappings and C helper functions.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from pyseqalign.scoring.matrices import AMINO_ACIDS
|
|
9
|
+
|
|
10
|
+
# Build lookup tables from the canonical amino acid list.
|
|
11
|
+
_AA_TO_ID: dict[str, int] = {aa: idx for idx, aa in enumerate(AMINO_ACIDS)}
|
|
12
|
+
_ID_TO_AA: dict[int, str] = {idx: aa for idx, aa in enumerate(AMINO_ACIDS)}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def amino_acid_to_id(aa: str) -> int:
|
|
16
|
+
"""Convert a single-letter amino acid code to its integer ID.
|
|
17
|
+
|
|
18
|
+
Returns 0 (gap) for unknown characters.
|
|
19
|
+
"""
|
|
20
|
+
return _AA_TO_ID.get(aa.lower(), 0)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def id_to_amino_acid(element_id: int) -> str:
|
|
24
|
+
"""Convert an integer element ID back to its amino acid character."""
|
|
25
|
+
return _ID_TO_AA.get(element_id, "-")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def encode_sequence(sequence: str) -> list[int]:
|
|
29
|
+
"""Encode an amino acid string into a list of integer IDs.
|
|
30
|
+
|
|
31
|
+
Example::
|
|
32
|
+
|
|
33
|
+
>>> encode_sequence("HEAGAWGHEE")
|
|
34
|
+
[9, 7, 1, 8, 1, 18, 8, 9, 7, 7]
|
|
35
|
+
"""
|
|
36
|
+
return [amino_acid_to_id(ch) for ch in sequence]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def decode_sequence(ids: list[int]) -> str:
|
|
40
|
+
"""Decode a list of integer IDs back into an amino acid string.
|
|
41
|
+
|
|
42
|
+
Example::
|
|
43
|
+
|
|
44
|
+
>>> decode_sequence([9, 7, 1, 8, 1, 18, 8, 9, 7, 7])
|
|
45
|
+
'heagawghee'
|
|
46
|
+
"""
|
|
47
|
+
return "".join(id_to_amino_acid(i) for i in ids)
|