pyseqalignment 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. pyseqalign/__init__.py +14 -0
  2. pyseqalign/core/__init__.py +12 -0
  3. pyseqalign/core/alignment.py +67 -0
  4. pyseqalign/core/needleman_wunsch.py +122 -0
  5. pyseqalign/core/smith_waterman.py +173 -0
  6. pyseqalign/learning/__init__.py +20 -0
  7. pyseqalign/learning/aleph.py +212 -0
  8. pyseqalign/learning/aleph_files/__init__.py +0 -0
  9. pyseqalign/learning/aleph_files/aleph_swi_ak.pl +10420 -0
  10. pyseqalign/learning/base.py +68 -0
  11. pyseqalign/learning/popper.py +215 -0
  12. pyseqalign/learning/task_builder.py +213 -0
  13. pyseqalign/prolog/__init__.py +5 -0
  14. pyseqalign/prolog/engine.py +102 -0
  15. pyseqalign/prolog/knowledge/__init__.py +0 -0
  16. pyseqalign/prolog/knowledge/amino_acids.pl +53 -0
  17. pyseqalign/prolog/knowledge/blosum50.pl +800 -0
  18. pyseqalign/prolog/knowledge/defaults.pl +15 -0
  19. pyseqalign/prolog/knowledge/distances.pl +119 -0
  20. pyseqalign/scoring/__init__.py +11 -0
  21. pyseqalign/scoring/distance.py +100 -0
  22. pyseqalign/scoring/matrices.py +362 -0
  23. pyseqalign/scoring/matrix_data/BLOSUM100 +31 -0
  24. pyseqalign/scoring/matrix_data/BLOSUM50 +31 -0
  25. pyseqalign/scoring/matrix_data/BLOSUM60 +31 -0
  26. pyseqalign/scoring/matrix_data/BLOSUM62 +31 -0
  27. pyseqalign/scoring/matrix_data/BLOSUM70 +31 -0
  28. pyseqalign/scoring/matrix_data/BLOSUM80 +31 -0
  29. pyseqalign/scoring/matrix_data/BLOSUM90 +31 -0
  30. pyseqalign/scoring/matrix_data/PAM150 +34 -0
  31. pyseqalign/scoring/matrix_data/PAM200 +34 -0
  32. pyseqalign/scoring/matrix_data/PAM250 +34 -0
  33. pyseqalign/scoring/matrix_data/PAM50 +34 -0
  34. pyseqalign/scoring/matrix_data/__init__.py +0 -0
  35. pyseqalign/utils/__init__.py +9 -0
  36. pyseqalign/utils/helpers.py +47 -0
  37. pyseqalignment-0.1.0.dist-info/METADATA +317 -0
  38. pyseqalignment-0.1.0.dist-info/RECORD +41 -0
  39. pyseqalignment-0.1.0.dist-info/WHEEL +5 -0
  40. pyseqalignment-0.1.0.dist-info/licenses/LICENSE +21 -0
  41. pyseqalignment-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,31 @@
1
+ # Matrix made by matblas from blosum60.iij
2
+ # * column uses minimum score
3
+ # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
4
+ # Blocks Database = /data/blocks_5.0/blocks.dat
5
+ # Cluster Percentage: >= 60
6
+ # Entropy = 0.6603, Expected = -0.4917
7
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
8
+ A 4 -1 -1 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
9
+ R -1 5 0 -1 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -2 -1 0 -1 -4
10
+ N -1 0 6 1 -2 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
11
+ D -2 -1 1 6 -3 0 2 -1 -1 -3 -3 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
12
+ C 0 -3 -2 -3 9 -3 -3 -2 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
13
+ Q -1 1 0 0 -3 5 2 -2 1 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
14
+ E -1 0 0 2 -3 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
15
+ G 0 -2 0 -1 -2 -2 -2 6 -2 -3 -4 -1 -2 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
16
+ H -2 0 1 -1 -3 1 0 -2 7 -3 -3 -1 -1 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
17
+ I -1 -3 -3 -3 -1 -3 -3 -3 -3 4 2 -3 1 0 -3 -2 -1 -2 -1 3 -3 -3 -1 -4
18
+ L -1 -2 -3 -3 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -3 -2 -1 -4
19
+ K -1 2 0 -1 -3 1 1 -1 -1 -3 -2 4 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
20
+ M -1 -1 -2 -3 -1 0 -2 -2 -1 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
21
+ F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
22
+ P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
23
+ S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
24
+ T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 4 -2 -2 0 0 -1 0 -4
25
+ W -3 -3 -4 -4 -2 -2 -3 -2 -2 -2 -2 -3 -1 1 -4 -3 -2 10 2 -3 -4 -2 -2 -4
26
+ Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 6 -1 -2 -2 -1 -4
27
+ V 0 -2 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
28
+ B -2 -1 3 4 -3 0 1 -1 0 -3 -3 0 -3 -3 -2 0 0 -4 -2 -3 4 1 -1 -4
29
+ Z -1 0 0 1 -3 3 4 -2 0 -3 -2 1 -1 -3 -1 0 -1 -2 -2 -2 1 3 -1 -4
30
+ X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
31
+ * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
@@ -0,0 +1,31 @@
1
+ # Matrix made by matblas from blosum62.iij
2
+ # * column uses minimum score
3
+ # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
4
+ # Blocks Database = /data/blocks_5.0/blocks.dat
5
+ # Cluster Percentage: >= 62
6
+ # Entropy = 0.6979, Expected = -0.5209
7
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
8
+ A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
9
+ R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
10
+ N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
11
+ D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
12
+ C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
13
+ Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
14
+ E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
15
+ G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
16
+ H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
17
+ I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
18
+ L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
19
+ K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
20
+ M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
21
+ F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
22
+ P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
23
+ S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
24
+ T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
25
+ W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
26
+ Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
27
+ V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
28
+ B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
29
+ Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
30
+ X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
31
+ * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
@@ -0,0 +1,31 @@
1
+ # Matrix made by matblas from blosum70.iij
2
+ # * column uses minimum score
3
+ # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
4
+ # Blocks Database = /data/blocks_5.0/blocks.dat
5
+ # Cluster Percentage: >= 70
6
+ # Entropy = 0.8391, Expected = -0.6313
7
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
8
+ A 4 -2 -2 -2 -1 -1 -1 0 -2 -2 -2 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 -1 -5
9
+ R -2 6 -1 -2 -4 1 0 -3 0 -3 -3 2 -2 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -5
10
+ N -2 -1 6 1 -3 0 0 -1 0 -4 -4 0 -2 -3 -2 0 0 -4 -2 -3 3 0 -1 -5
11
+ D -2 -2 1 6 -4 -1 1 -2 -1 -4 -4 -1 -3 -4 -2 0 -1 -5 -4 -4 4 1 -2 -5
12
+ C -1 -4 -3 -4 9 -3 -4 -3 -4 -1 -2 -4 -2 -2 -3 -1 -1 -3 -3 -1 -4 -4 -2 -5
13
+ Q -1 1 0 -1 -3 6 2 -2 1 -3 -2 1 0 -3 -2 0 -1 -2 -2 -2 0 3 -1 -5
14
+ E -1 0 0 1 -4 2 5 -2 0 -4 -3 1 -2 -4 -1 0 -1 -4 -3 -3 1 4 -1 -5
15
+ G 0 -3 -1 -2 -3 -2 -2 6 -2 -4 -4 -2 -3 -4 -3 -1 -2 -3 -4 -4 -1 -2 -2 -5
16
+ H -2 0 0 -1 -4 1 0 -2 8 -4 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 -1 0 -1 -5
17
+ I -2 -3 -4 -4 -1 -3 -4 -4 -4 4 2 -3 1 0 -3 -3 -1 -3 -1 3 -4 -3 -1 -5
18
+ L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 4 -3 2 0 -3 -3 -2 -2 -1 1 -4 -3 -1 -5
19
+ K -1 2 0 -1 -4 1 1 -2 -1 -3 -3 5 -2 -3 -1 0 -1 -3 -2 -3 -1 1 -1 -5
20
+ M -1 -2 -2 -3 -2 0 -2 -3 -2 1 2 -2 6 0 -3 -2 -1 -2 -1 1 -3 -2 -1 -5
21
+ F -2 -3 -3 -4 -2 -3 -4 -4 -1 0 0 -3 0 6 -4 -3 -2 1 3 -1 -4 -4 -2 -5
22
+ P -1 -2 -2 -2 -3 -2 -1 -3 -2 -3 -3 -1 -3 -4 8 -1 -1 -4 -3 -3 -2 -1 -2 -5
23
+ S 1 -1 0 0 -1 0 0 -1 -1 -3 -3 0 -2 -3 -1 4 1 -3 -2 -2 0 0 -1 -5
24
+ T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -2 -1 -1 -2 -1 1 5 -3 -2 0 -1 -1 -1 -5
25
+ W -3 -3 -4 -5 -3 -2 -4 -3 -2 -3 -2 -3 -2 1 -4 -3 -3 11 2 -3 -4 -3 -3 -5
26
+ Y -2 -2 -2 -4 -3 -2 -3 -4 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -2 -3 -2 -2 -5
27
+ V 0 -3 -3 -4 -1 -2 -3 -4 -3 3 1 -3 1 -1 -3 -2 0 -3 -2 4 -3 -3 -1 -5
28
+ B -2 -1 3 4 -4 0 1 -1 -1 -4 -4 -1 -3 -4 -2 0 -1 -4 -3 -3 4 0 -1 -5
29
+ Z -1 0 0 1 -4 3 4 -2 0 -3 -3 1 -2 -4 -1 0 -1 -3 -2 -3 0 4 -1 -5
30
+ X -1 -1 -1 -2 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1 -1 -3 -2 -1 -1 -1 -1 -5
31
+ * -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1
@@ -0,0 +1,31 @@
1
+ # Matrix made by matblas from blosum80_3.iij
2
+ # * column uses minimum score
3
+ # BLOSUM Clustered Scoring Matrix in 1/3 Bit Units
4
+ # Blocks Database = /data/blocks_5.0/blocks.dat
5
+ # Cluster Percentage: >= 80
6
+ # Entropy = 0.9868, Expected = -0.7442
7
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
8
+ A 7 -3 -3 -3 -1 -2 -2 0 -3 -3 -3 -1 -2 -4 -1 2 0 -5 -4 -1 -3 -2 -1 -8
9
+ R -3 9 -1 -3 -6 1 -1 -4 0 -5 -4 3 -3 -5 -3 -2 -2 -5 -4 -4 -2 0 -2 -8
10
+ N -3 -1 9 2 -5 0 -1 -1 1 -6 -6 0 -4 -6 -4 1 0 -7 -4 -5 5 -1 -2 -8
11
+ D -3 -3 2 10 -7 -1 2 -3 -2 -7 -7 -2 -6 -6 -3 -1 -2 -8 -6 -6 6 1 -3 -8
12
+ C -1 -6 -5 -7 13 -5 -7 -6 -7 -2 -3 -6 -3 -4 -6 -2 -2 -5 -5 -2 -6 -7 -4 -8
13
+ Q -2 1 0 -1 -5 9 3 -4 1 -5 -4 2 -1 -5 -3 -1 -1 -4 -3 -4 -1 5 -2 -8
14
+ E -2 -1 -1 2 -7 3 8 -4 0 -6 -6 1 -4 -6 -2 -1 -2 -6 -5 -4 1 6 -2 -8
15
+ G 0 -4 -1 -3 -6 -4 -4 9 -4 -7 -7 -3 -5 -6 -5 -1 -3 -6 -6 -6 -2 -4 -3 -8
16
+ H -3 0 1 -2 -7 1 0 -4 12 -6 -5 -1 -4 -2 -4 -2 -3 -4 3 -5 -1 0 -2 -8
17
+ I -3 -5 -6 -7 -2 -5 -6 -7 -6 7 2 -5 2 -1 -5 -4 -2 -5 -3 4 -6 -6 -2 -8
18
+ L -3 -4 -6 -7 -3 -4 -6 -7 -5 2 6 -4 3 0 -5 -4 -3 -4 -2 1 -7 -5 -2 -8
19
+ K -1 3 0 -2 -6 2 1 -3 -1 -5 -4 8 -3 -5 -2 -1 -1 -6 -4 -4 -1 1 -2 -8
20
+ M -2 -3 -4 -6 -3 -1 -4 -5 -4 2 3 -3 9 0 -4 -3 -1 -3 -3 1 -5 -3 -2 -8
21
+ F -4 -5 -6 -6 -4 -5 -6 -6 -2 -1 0 -5 0 10 -6 -4 -4 0 4 -2 -6 -6 -3 -8
22
+ P -1 -3 -4 -3 -6 -3 -2 -5 -4 -5 -5 -2 -4 -6 12 -2 -3 -7 -6 -4 -4 -2 -3 -8
23
+ S 2 -2 1 -1 -2 -1 -1 -1 -2 -4 -4 -1 -3 -4 -2 7 2 -6 -3 -3 0 -1 -1 -8
24
+ T 0 -2 0 -2 -2 -1 -2 -3 -3 -2 -3 -1 -1 -4 -3 2 8 -5 -3 0 -1 -2 -1 -8
25
+ W -5 -5 -7 -8 -5 -4 -6 -6 -4 -5 -4 -6 -3 0 -7 -6 -5 16 3 -5 -8 -5 -5 -8
26
+ Y -4 -4 -4 -6 -5 -3 -5 -6 3 -3 -2 -4 -3 4 -6 -3 -3 3 11 -3 -5 -4 -3 -8
27
+ V -1 -4 -5 -6 -2 -4 -4 -6 -5 4 1 -4 1 -2 -4 -3 0 -5 -3 7 -6 -4 -2 -8
28
+ B -3 -2 5 6 -6 -1 1 -2 -1 -6 -7 -1 -5 -6 -4 0 -1 -8 -5 -6 6 0 -3 -8
29
+ Z -2 0 -1 1 -7 5 6 -4 0 -6 -5 1 -3 -6 -2 -1 -2 -5 -4 -4 0 6 -1 -8
30
+ X -1 -2 -2 -3 -4 -2 -2 -3 -2 -2 -2 -2 -2 -3 -3 -1 -1 -5 -3 -2 -3 -1 -2 -8
31
+ * -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1
@@ -0,0 +1,31 @@
1
+ # Matrix made by matblas from blosum90.iij
2
+ # * column uses minimum score
3
+ # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
4
+ # Blocks Database = /data/blocks_5.0/blocks.dat
5
+ # Cluster Percentage: >= 90
6
+ # Entropy = 1.1806, Expected = -0.8887
7
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
8
+ A 5 -2 -2 -3 -1 -1 -1 0 -2 -2 -2 -1 -2 -3 -1 1 0 -4 -3 -1 -2 -1 -1 -6
9
+ R -2 6 -1 -3 -5 1 -1 -3 0 -4 -3 2 -2 -4 -3 -1 -2 -4 -3 -3 -2 0 -2 -6
10
+ N -2 -1 7 1 -4 0 -1 -1 0 -4 -4 0 -3 -4 -3 0 0 -5 -3 -4 4 -1 -2 -6
11
+ D -3 -3 1 7 -5 -1 1 -2 -2 -5 -5 -1 -4 -5 -3 -1 -2 -6 -4 -5 4 0 -2 -6
12
+ C -1 -5 -4 -5 9 -4 -6 -4 -5 -2 -2 -4 -2 -3 -4 -2 -2 -4 -4 -2 -4 -5 -3 -6
13
+ Q -1 1 0 -1 -4 7 2 -3 1 -4 -3 1 0 -4 -2 -1 -1 -3 -3 -3 -1 4 -1 -6
14
+ E -1 -1 -1 1 -6 2 6 -3 -1 -4 -4 0 -3 -5 -2 -1 -1 -5 -4 -3 0 4 -2 -6
15
+ G 0 -3 -1 -2 -4 -3 -3 6 -3 -5 -5 -2 -4 -5 -3 -1 -3 -4 -5 -5 -2 -3 -2 -6
16
+ H -2 0 0 -2 -5 1 -1 -3 8 -4 -4 -1 -3 -2 -3 -2 -2 -3 1 -4 -1 0 -2 -6
17
+ I -2 -4 -4 -5 -2 -4 -4 -5 -4 5 1 -4 1 -1 -4 -3 -1 -4 -2 3 -5 -4 -2 -6
18
+ L -2 -3 -4 -5 -2 -3 -4 -5 -4 1 5 -3 2 0 -4 -3 -2 -3 -2 0 -5 -4 -2 -6
19
+ K -1 2 0 -1 -4 1 0 -2 -1 -4 -3 6 -2 -4 -2 -1 -1 -5 -3 -3 -1 1 -1 -6
20
+ M -2 -2 -3 -4 -2 0 -3 -4 -3 1 2 -2 7 -1 -3 -2 -1 -2 -2 0 -4 -2 -1 -6
21
+ F -3 -4 -4 -5 -3 -4 -5 -5 -2 -1 0 -4 -1 7 -4 -3 -3 0 3 -2 -4 -4 -2 -6
22
+ P -1 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -3 -4 8 -2 -2 -5 -4 -3 -3 -2 -2 -6
23
+ S 1 -1 0 -1 -2 -1 -1 -1 -2 -3 -3 -1 -2 -3 -2 5 1 -4 -3 -2 0 -1 -1 -6
24
+ T 0 -2 0 -2 -2 -1 -1 -3 -2 -1 -2 -1 -1 -3 -2 1 6 -4 -2 -1 -1 -1 -1 -6
25
+ W -4 -4 -5 -6 -4 -3 -5 -4 -3 -4 -3 -5 -2 0 -5 -4 -4 11 2 -3 -6 -4 -3 -6
26
+ Y -3 -3 -3 -4 -4 -3 -4 -5 1 -2 -2 -3 -2 3 -4 -3 -2 2 8 -3 -4 -3 -2 -6
27
+ V -1 -3 -4 -5 -2 -3 -3 -5 -4 3 0 -3 0 -2 -3 -2 -1 -3 -3 5 -4 -3 -2 -6
28
+ B -2 -2 4 4 -4 -1 0 -2 -1 -5 -5 -1 -4 -4 -3 0 -1 -6 -4 -4 4 0 -2 -6
29
+ Z -1 0 -1 0 -5 4 4 -3 0 -4 -4 1 -2 -4 -2 -1 -1 -4 -3 -3 0 4 -1 -6
30
+ X -1 -2 -2 -2 -3 -1 -2 -2 -2 -2 -2 -1 -1 -2 -2 -1 -1 -3 -2 -2 -2 -1 -2 -6
31
+ * -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 1
@@ -0,0 +1,34 @@
1
+ #
2
+ # This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
3
+ #
4
+ # PAM 150 substitution matrix, scale = ln(2)/2 = 0.346574
5
+ #
6
+ # Expected score = -1.25, Entropy = 0.754 bits
7
+ #
8
+ # Lowest score = -7, Highest score = 12
9
+ #
10
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
11
+ A 3 -2 0 0 -2 -1 0 1 -2 -1 -2 -2 -1 -4 1 1 1 -6 -3 0 0 0 -1 -7
12
+ R -2 6 -1 -2 -4 1 -2 -3 1 -2 -3 3 -1 -4 -1 -1 -2 1 -4 -3 -2 0 -1 -7
13
+ N 0 -1 3 2 -4 0 1 0 2 -2 -3 1 -2 -4 -1 1 0 -4 -2 -2 3 1 -1 -7
14
+ D 0 -2 2 4 -6 1 3 0 0 -3 -5 -1 -3 -6 -2 0 -1 -7 -4 -3 3 2 -1 -7
15
+ C -2 -4 -4 -6 9 -6 -6 -4 -3 -2 -6 -6 -5 -5 -3 0 -3 -7 0 -2 -5 -6 -3 -7
16
+ Q -1 1 0 1 -6 5 2 -2 3 -3 -2 0 -1 -5 0 -1 -1 -5 -4 -2 1 4 -1 -7
17
+ E 0 -2 1 3 -6 2 4 -1 0 -2 -4 -1 -2 -6 -1 -1 -1 -7 -4 -2 2 4 -1 -7
18
+ G 1 -3 0 0 -4 -2 -1 4 -3 -3 -4 -2 -3 -5 -1 1 -1 -7 -5 -2 0 -1 -1 -7
19
+ H -2 1 2 0 -3 3 0 -3 6 -3 -2 -1 -3 -2 -1 -1 -2 -3 0 -3 1 1 -1 -7
20
+ I -1 -2 -2 -3 -2 -3 -2 -3 -3 5 1 -2 2 0 -3 -2 0 -5 -2 3 -2 -2 -1 -7
21
+ L -2 -3 -3 -5 -6 -2 -4 -4 -2 1 5 -3 3 1 -3 -3 -2 -2 -2 1 -4 -3 -2 -7
22
+ K -2 3 1 -1 -6 0 -1 -2 -1 -2 -3 4 0 -6 -2 -1 0 -4 -4 -3 0 0 -1 -7
23
+ M -1 -1 -2 -3 -5 -1 -2 -3 -3 2 3 0 7 -1 -3 -2 -1 -5 -3 1 -3 -2 -1 -7
24
+ F -4 -4 -4 -6 -5 -5 -6 -5 -2 0 1 -6 -1 7 -5 -3 -3 -1 5 -2 -5 -5 -3 -7
25
+ P 1 -1 -1 -2 -3 0 -1 -1 -1 -3 -3 -2 -3 -5 6 1 0 -6 -5 -2 -2 -1 -1 -7
26
+ S 1 -1 1 0 0 -1 -1 1 -1 -2 -3 -1 -2 -3 1 2 1 -2 -3 -1 0 -1 0 -7
27
+ T 1 -2 0 -1 -3 -1 -1 -1 -2 0 -2 0 -1 -3 0 1 4 -5 -3 0 0 -1 -1 -7
28
+ W -6 1 -4 -7 -7 -5 -7 -7 -3 -5 -2 -4 -5 -1 -6 -2 -5 12 -1 -6 -5 -6 -4 -7
29
+ Y -3 -4 -2 -4 0 -4 -4 -5 0 -2 -2 -4 -3 5 -5 -3 -3 -1 8 -3 -3 -4 -3 -7
30
+ V 0 -3 -2 -3 -2 -2 -2 -2 -3 3 1 -3 1 -2 -2 -1 0 -6 -3 4 -2 -2 -1 -7
31
+ B 0 -2 3 3 -5 1 2 0 1 -2 -4 0 -3 -5 -2 0 0 -5 -3 -2 3 2 -1 -7
32
+ Z 0 0 1 2 -6 4 4 -1 1 -2 -3 0 -2 -5 -1 -1 -1 -6 -4 -2 2 4 -1 -7
33
+ X -1 -1 -1 -1 -3 -1 -1 -1 -1 -1 -2 -1 -1 -3 -1 0 -1 -4 -3 -1 -1 -1 -1 -7
34
+ * -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 -7 1
@@ -0,0 +1,34 @@
1
+ #
2
+ # This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
3
+ #
4
+ # PAM 200 substitution matrix, scale = ln(2)/3 = 0.231049
5
+ #
6
+ # Expected score = -1.23, Entropy = 0.507 bits
7
+ #
8
+ # Lowest score = -9, Highest score = 18
9
+ #
10
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
11
+ A 3 -2 0 0 -3 -1 0 1 -2 -1 -2 -2 -2 -4 1 1 1 -7 -4 0 0 0 0 -9
12
+ R -2 7 0 -2 -4 1 -2 -4 2 -2 -4 4 -1 -5 0 -1 -1 2 -5 -3 -1 0 -1 -9
13
+ N 0 0 3 3 -5 1 2 0 2 -2 -4 1 -2 -4 -1 1 0 -5 -2 -2 3 1 0 -9
14
+ D 0 -2 3 5 -6 2 4 0 0 -3 -5 0 -4 -7 -2 0 0 -8 -5 -3 4 3 -1 -9
15
+ C -3 -4 -5 -6 12 -7 -7 -4 -4 -3 -7 -7 -6 -6 -4 0 -3 -9 0 -2 -5 -7 -4 -9
16
+ Q -1 1 1 2 -7 5 3 -2 3 -3 -2 1 -1 -6 0 -1 -1 -6 -5 -3 1 4 -1 -9
17
+ E 0 -2 2 4 -7 3 5 0 0 -3 -4 0 -3 -7 -1 0 -1 -9 -5 -2 3 4 -1 -9
18
+ G 1 -4 0 0 -4 -2 0 6 -3 -3 -5 -2 -4 -6 -1 1 0 -8 -6 -2 0 -1 -1 -9
19
+ H -2 2 2 0 -4 3 0 -3 8 -3 -3 -1 -3 -2 -1 -1 -2 -3 0 -3 1 2 -1 -9
20
+ I -1 -2 -2 -3 -3 -3 -3 -3 -3 6 2 -2 2 1 -3 -2 0 -6 -2 4 -3 -3 -1 -9
21
+ L -2 -4 -4 -5 -7 -2 -4 -5 -3 2 7 -4 4 2 -3 -4 -2 -2 -2 2 -4 -3 -2 -9
22
+ K -2 4 1 0 -7 1 0 -2 -1 -2 -4 6 1 -7 -2 0 0 -4 -5 -3 0 0 -1 -9
23
+ M -2 -1 -2 -4 -6 -1 -3 -4 -3 2 4 1 8 0 -3 -2 -1 -5 -3 2 -3 -2 -1 -9
24
+ F -4 -5 -4 -7 -6 -6 -7 -6 -2 1 2 -7 0 10 -6 -4 -4 0 7 -2 -6 -6 -3 -9
25
+ P 1 0 -1 -2 -4 0 -1 -1 -1 -3 -3 -2 -3 -6 7 1 0 -7 -6 -2 -1 -1 -1 -9
26
+ S 1 -1 1 0 0 -1 0 1 -1 -2 -4 0 -2 -4 1 2 2 -3 -3 -1 1 -1 0 -9
27
+ T 1 -1 0 0 -3 -1 -1 0 -2 0 -2 0 -1 -4 0 2 4 -6 -3 0 0 -1 0 -9
28
+ W -7 2 -5 -8 -9 -6 -9 -8 -3 -6 -2 -4 -5 0 -7 -3 -6 18 -1 -8 -6 -7 -5 -9
29
+ Y -4 -5 -2 -5 0 -5 -5 -6 0 -2 -2 -5 -3 7 -6 -3 -3 -1 11 -3 -4 -5 -3 -9
30
+ V 0 -3 -2 -3 -2 -3 -2 -2 -3 4 2 -3 2 -2 -2 -1 0 -8 -3 5 -3 -2 -1 -9
31
+ B 0 -1 3 4 -5 1 3 0 1 -3 -4 0 -3 -6 -1 1 0 -6 -4 -3 3 2 -1 -9
32
+ Z 0 0 1 3 -7 4 4 -1 2 -3 -3 0 -2 -6 -1 -1 -1 -7 -5 -2 2 4 -1 -9
33
+ X 0 -1 0 -1 -4 -1 -1 -1 -1 -1 -2 -1 -1 -3 -1 0 0 -5 -3 -1 -1 -1 -1 -9
34
+ * -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 1
@@ -0,0 +1,34 @@
1
+ #
2
+ # This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
3
+ #
4
+ # PAM 250 substitution matrix, scale = ln(2)/3 = 0.231049
5
+ #
6
+ # Expected score = -0.844, Entropy = 0.354 bits
7
+ #
8
+ # Lowest score = -8, Highest score = 17
9
+ #
10
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
11
+ A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8
12
+ R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8
13
+ N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8
14
+ D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1 -8
15
+ C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3 -8
16
+ Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1 -8
17
+ E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1 -8
18
+ G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1 -8
19
+ H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1 -8
20
+ I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1 -8
21
+ L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1 -8
22
+ K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1 -8
23
+ M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1 -8
24
+ F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2 -8
25
+ P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1 -8
26
+ S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0 -8
27
+ T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0 -8
28
+ W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4 -8
29
+ Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2 -8
30
+ V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1 -8
31
+ B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1 -8
32
+ Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1 -8
33
+ X 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1 -8
34
+ * -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1
@@ -0,0 +1,34 @@
1
+ #
2
+ # This matrix was produced by "pam" Version 1.0.6 [28-Jul-93]
3
+ #
4
+ # PAM 50 substitution matrix, scale = ln(2)/2 = 0.346574
5
+ #
6
+ # Expected score = -3.70, Entropy = 2.00 bits
7
+ #
8
+ # Lowest score = -13, Highest score = 13
9
+ #
10
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
11
+ A 5 -5 -2 -2 -5 -3 -1 -1 -5 -3 -5 -5 -4 -7 0 0 0 -11 -6 -1 -2 -2 -2 -13
12
+ R -5 8 -4 -7 -6 0 -7 -7 0 -4 -7 1 -3 -8 -3 -2 -5 -1 -8 -6 -5 -2 -4 -13
13
+ N -2 -4 7 2 -8 -2 -1 -2 1 -4 -6 0 -6 -7 -4 1 -1 -7 -3 -6 5 -1 -2 -13
14
+ D -2 -7 2 7 -11 -1 3 -2 -2 -6 -10 -3 -8 -12 -6 -2 -3 -12 -9 -6 6 2 -4 -13
15
+ C -5 -6 -8 -11 9 -11 -11 -7 -6 -5 -12 -11 -11 -10 -6 -2 -6 -13 -3 -5 -9 -11 -7 -13
16
+ Q -3 0 -2 -1 -11 8 2 -5 2 -6 -4 -2 -3 -10 -2 -4 -4 -10 -9 -5 -2 6 -3 -13
17
+ E -1 -7 -1 3 -11 2 7 -3 -3 -4 -7 -3 -5 -11 -4 -3 -4 -13 -7 -5 2 6 -3 -13
18
+ G -1 -7 -2 -2 -7 -5 -3 6 -7 -8 -9 -6 -7 -8 -4 -1 -4 -12 -11 -4 -2 -4 -4 -13
19
+ H -5 0 1 -2 -6 2 -3 -7 9 -7 -5 -4 -8 -5 -3 -4 -5 -6 -2 -5 0 0 -4 -13
20
+ I -3 -4 -4 -6 -5 -6 -4 -8 -7 8 0 -5 0 -1 -7 -5 -1 -11 -5 3 -5 -5 -3 -13
21
+ L -5 -7 -6 -10 -12 -4 -7 -9 -5 0 6 -6 2 -1 -6 -7 -5 -5 -5 -1 -7 -5 -5 -13
22
+ K -5 1 0 -3 -11 -2 -3 -6 -4 -5 -6 6 -1 -11 -5 -3 -2 -9 -8 -7 -1 -2 -4 -13
23
+ M -4 -3 -6 -8 -11 -3 -5 -7 -8 0 2 -1 10 -3 -6 -4 -3 -10 -8 0 -7 -4 -4 -13
24
+ F -7 -8 -7 -12 -10 -10 -11 -8 -5 -1 -1 -11 -3 9 -8 -5 -7 -3 3 -6 -9 -11 -6 -13
25
+ P 0 -3 -4 -6 -6 -2 -4 -4 -3 -7 -6 -5 -6 -8 8 -1 -3 -11 -11 -4 -5 -3 -4 -13
26
+ S 0 -2 1 -2 -2 -4 -3 -1 -4 -5 -7 -3 -4 -5 -1 6 1 -4 -5 -4 -1 -3 -2 -13
27
+ T 0 -5 -1 -3 -6 -4 -4 -4 -5 -1 -5 -2 -3 -7 -3 1 6 -10 -5 -2 -2 -4 -2 -13
28
+ W -11 -1 -7 -12 -13 -10 -13 -12 -6 -11 -5 -9 -10 -3 -11 -4 -10 13 -4 -12 -8 -11 -9 -13
29
+ Y -6 -8 -3 -9 -3 -9 -7 -11 -2 -5 -5 -8 -8 3 -11 -5 -5 -4 9 -6 -5 -8 -6 -13
30
+ V -1 -6 -6 -6 -5 -5 -5 -4 -5 3 -1 -7 0 -6 -4 -4 -2 -12 -6 7 -6 -5 -3 -13
31
+ B -2 -5 5 6 -9 -2 2 -2 0 -5 -7 -1 -7 -9 -5 -1 -2 -8 -5 -6 5 1 -3 -13
32
+ Z -2 -2 -1 2 -11 6 6 -4 0 -5 -5 -2 -4 -11 -3 -3 -4 -11 -8 -5 1 6 -3 -13
33
+ X -2 -4 -2 -4 -7 -3 -3 -4 -4 -3 -5 -4 -4 -6 -4 -2 -2 -9 -6 -3 -3 -3 -4 -13
34
+ * -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 -13 1
File without changes
@@ -0,0 +1,9 @@
1
+ """Utility functions and helpers."""
2
+
3
+ from pyseqalign.utils.helpers import amino_acid_to_id, encode_sequence, id_to_amino_acid
4
+
5
+ __all__ = [
6
+ "amino_acid_to_id",
7
+ "id_to_amino_acid",
8
+ "encode_sequence",
9
+ ]
@@ -0,0 +1,47 @@
1
+ """Helper utilities for sequence encoding and decoding.
2
+
3
+ Ported from the legacy aminoAcids.pl mappings and C helper functions.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from pyseqalign.scoring.matrices import AMINO_ACIDS
9
+
10
+ # Build lookup tables from the canonical amino acid list.
11
+ _AA_TO_ID: dict[str, int] = {aa: idx for idx, aa in enumerate(AMINO_ACIDS)}
12
+ _ID_TO_AA: dict[int, str] = {idx: aa for idx, aa in enumerate(AMINO_ACIDS)}
13
+
14
+
15
+ def amino_acid_to_id(aa: str) -> int:
16
+ """Convert a single-letter amino acid code to its integer ID.
17
+
18
+ Returns 0 (gap) for unknown characters.
19
+ """
20
+ return _AA_TO_ID.get(aa.lower(), 0)
21
+
22
+
23
+ def id_to_amino_acid(element_id: int) -> str:
24
+ """Convert an integer element ID back to its amino acid character."""
25
+ return _ID_TO_AA.get(element_id, "-")
26
+
27
+
28
+ def encode_sequence(sequence: str) -> list[int]:
29
+ """Encode an amino acid string into a list of integer IDs.
30
+
31
+ Example::
32
+
33
+ >>> encode_sequence("HEAGAWGHEE")
34
+ [9, 7, 1, 8, 1, 18, 8, 9, 7, 7]
35
+ """
36
+ return [amino_acid_to_id(ch) for ch in sequence]
37
+
38
+
39
+ def decode_sequence(ids: list[int]) -> str:
40
+ """Decode a list of integer IDs back into an amino acid string.
41
+
42
+ Example::
43
+
44
+ >>> decode_sequence([9, 7, 1, 8, 1, 18, 8, 9, 7, 7])
45
+ 'heagawghee'
46
+ """
47
+ return "".join(id_to_amino_acid(i) for i in ids)