ataserinyelMSA 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ata Serinyel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.2
2
+ Name: ataserinyelMSA
3
+ Version: 0.1.0
4
+ Summary: A simple MAFFT-based Multiple Sequence Alignment (MSA) library
5
+ Author-email: ataserinyel <clasher.mp2@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ataserinyel/ataserinyelMSA
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy>=1.21.0
15
+
16
+ # ataserinyelMSA
17
+
18
+ A simple MAFFT-inspired Multiple Sequence Alignment (MSA) tool written in Python.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install ataserinyelMSA
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```bash
29
+ python main.py input.fasta output.fasta
30
+ ```
31
+
32
+ ## Example
33
+
34
+ Input (`input.fasta`):
35
+ ```
36
+ >seq1
37
+ GATTACA
38
+ >seq2
39
+ GCATGCU
40
+ >seq3
41
+ AGCTAGC
42
+ ```
43
+ Output (`output.fasta`):
44
+ ```
45
+ >seq1
46
+ -GAT-TACA
47
+ >seq2
48
+ -GC-ATGCU
49
+ >seq3
50
+ AGCTA-GC-
51
+ ```
52
+ ## Algorithm
53
+
54
+ This tool implements a simplified version of the MAFFT FFT-NS-1 algorithm:
55
+
56
+ 1. **FASTA Parsing** - Read and write FASTA format files
57
+ 2. **Pairwise Alignment** - Needleman-Wunsch global alignment algorithm
58
+ 3. **Distance Matrix** - Compute pairwise distances between sequences
59
+ 4. **Guide Tree** - UPGMA clustering algorithm
60
+ 5. **Progressive Alignment** - Align sequences following the guide tree order
61
+
62
+ ## Differences from original MAFFT
63
+
64
+ - Uses Needleman-Wunsch instead of FFT for similarity calculation
65
+ - Simple +1/-1 scoring matrix instead of advanced substitution matrices
66
+ - Suitable for small datasets
67
+
68
+ ## Author
69
+
70
+ Ata Serinyel
@@ -0,0 +1,55 @@
1
+ # ataserinyelMSA
2
+
3
+ A simple MAFFT-inspired Multiple Sequence Alignment (MSA) tool written in Python.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install ataserinyelMSA
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ python main.py input.fasta output.fasta
15
+ ```
16
+
17
+ ## Example
18
+
19
+ Input (`input.fasta`):
20
+ ```
21
+ >seq1
22
+ GATTACA
23
+ >seq2
24
+ GCATGCU
25
+ >seq3
26
+ AGCTAGC
27
+ ```
28
+ Output (`output.fasta`):
29
+ ```
30
+ >seq1
31
+ -GAT-TACA
32
+ >seq2
33
+ -GC-ATGCU
34
+ >seq3
35
+ AGCTA-GC-
36
+ ```
37
+ ## Algorithm
38
+
39
+ This tool implements a simplified version of the MAFFT FFT-NS-1 algorithm:
40
+
41
+ 1. **FASTA Parsing** - Read and write FASTA format files
42
+ 2. **Pairwise Alignment** - Needleman-Wunsch global alignment algorithm
43
+ 3. **Distance Matrix** - Compute pairwise distances between sequences
44
+ 4. **Guide Tree** - UPGMA clustering algorithm
45
+ 5. **Progressive Alignment** - Align sequences following the guide tree order
46
+
47
+ ## Differences from original MAFFT
48
+
49
+ - Uses Needleman-Wunsch instead of FFT for similarity calculation
50
+ - Simple +1/-1 scoring matrix instead of advanced substitution matrices
51
+ - Suitable for small datasets
52
+
53
+ ## Author
54
+
55
+ Ata Serinyel
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0,<77.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ataserinyelMSA"
7
+ version = "0.1.0"
8
+ authors = [
9
+ { name="ataserinyel", email="clasher.mp2@gmail.com" }
10
+ ]
11
+ description = "A simple MAFFT-based Multiple Sequence Alignment (MSA) library"
12
+ readme = "README.md"
13
+ requires-python = ">=3.10"
14
+ license = {text = "MIT"}
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
19
+ ]
20
+ dependencies = [
21
+ "numpy>=1.21.0",
22
+ ]
23
+
24
+ [project.urls]
25
+ Homepage = "https://github.com/ataserinyel/ataserinyelMSA"
26
+
27
+ [tool.setuptools.packages.find]
28
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,155 @@
1
+ # Needleman-Wunsch Algoritması
2
+
3
+ import numpy as np
4
+ from src.ataserinyelMSA.scoring import get_score, GAP_PENALTY
5
+
6
+ def needleman_wunsch(seq1, seq2):
7
+ '''
8
+ İki diziyi Needleman-Wunsch algoritması ile hizalar.
9
+ Hizalanmış seq1 ve seq2 döndürür.
10
+ '''
11
+
12
+ # MAtrisleri 0 ile doldurma
13
+ S = np.zeros([len(seq1)+1, len(seq2)+1])
14
+
15
+ for i in range(1, len(seq1)+1):
16
+ S[i, 0] = S[i-1, 0] + GAP_PENALTY
17
+
18
+ for j in range(1, len(seq2)+1):
19
+ S[0, j] = S[0, j-1] + GAP_PENALTY
20
+
21
+ for i in range(1, len(seq1)+1):
22
+ for j in range(1, len(seq2)+1):
23
+ a = S[i-1, j-1] + get_score(seq1[i-1], seq2[j-1]) # Çapraz kontrol
24
+ b = S[i-1, j] + GAP_PENALTY # Yukarı
25
+ c = S[i, j-1] + GAP_PENALTY # Sol
26
+ S[i,j] = max(a,b,c) # En yükseği al
27
+
28
+ # Traceback işlemi
29
+ aligned_seq1 = ''
30
+ aligned_seq2 = ''
31
+ i, j = len(seq1), len(seq2)
32
+
33
+ while i>0 or j>0:
34
+ # Çaprazdan gelme
35
+ if i>0 and j>0 and S[i,j] == S[i-1, j-1] + get_score(seq1[i-1], seq2[j-1]):
36
+ aligned_seq1 = seq1[i-1] + aligned_seq1
37
+ aligned_seq2 = seq2[j-1] + aligned_seq2
38
+ i = i-1
39
+ j = j-1
40
+ # Yukarıdan Gelme
41
+ elif i>0 and S[i,j] == S[i-1,j] + GAP_PENALTY:
42
+ aligned_seq1 = seq1[i-1] + aligned_seq1
43
+ aligned_seq2 = '-' + aligned_seq2
44
+ i = i-1
45
+ # Soldan Gelme
46
+ else:
47
+ aligned_seq1 = '-' + aligned_seq1
48
+ aligned_seq2 = seq2[j-1] + aligned_seq2
49
+ j = j-1
50
+
51
+ return aligned_seq1, aligned_seq2
52
+
53
+ def compute_distance(seq1, seq2):
54
+ '''
55
+ İki dizi arasındaki mesafeyi hesaplar.
56
+ Hizalama skorunu mesafeye çevirir.
57
+ Yüksek Skor = Düşük Mesafe
58
+ '''
59
+
60
+ aligned_seq1, aligned_seq2 = needleman_wunsch(seq1, seq2)
61
+
62
+ matches = 0
63
+ total = len(aligned_seq1)
64
+
65
+ for a, b in zip(aligned_seq1, aligned_seq2):
66
+ if a == b:
67
+ matches += 1
68
+
69
+ smilarity = matches / total # 0 ile 1 arasında benzerlik oranı
70
+ distance = 1 - smilarity # Benzerliğin tersi = mesafe
71
+
72
+ return distance
73
+
74
+ def distance_matrix(sequences):
75
+ '''
76
+ Tüm dizi çiftleri için mesafe hesaplar.
77
+ Girdi: {'seq1': 'GATTACA', 'seq2': 'GCATGCU', ...}
78
+ Döndürdüğü 2 boyutlu liste (matris) ve isim listesi.
79
+ '''
80
+
81
+ names = list(sequences.keys())
82
+ seqs = list(sequences.values())
83
+ n = len(seqs)
84
+
85
+ matrix = np.zeros((n, n))
86
+
87
+ for i in range(n):
88
+ for j in range(i+1, n): # i+1'den başlıyoruz çünkü i == j anyı dizi ve mefase 0
89
+ dist = compute_distance(seqs[i], seqs[j])
90
+ matrix[i][j] = dist
91
+ matrix[j][i] = dist # Simetrik, seq1 -> seq2 mesafesi = seq2 -> seq1 mesafesi.
92
+
93
+ return matrix, names
94
+
95
+ def get_consensus(aligned_seqs):
96
+ """
97
+ Hizalanmış diziler grubundan konsensüs dizi üretir.
98
+ Gap olmayan karakterlere öncelik verir.
99
+ """
100
+ consensus = ''
101
+ for i in range(len(aligned_seqs[0])):
102
+ column = [seq[i] for seq in aligned_seqs]
103
+ non_gaps = [c for c in column if c != '-']
104
+ if non_gaps:
105
+ consensus += max(set(non_gaps), key=non_gaps.count) # Gap olmayanların en sığını al
106
+ else:
107
+ consensus += '-' # Tüm pozisyon gap ise gap koy
108
+ return consensus
109
+
110
+ def progressive_alignment(sequences, merge_order):
111
+ '''
112
+ UPGMA'nın verdiği sıraya göre dizileri hizalar.
113
+ Girdi: - sequences -> {'seq1': 'GATTACA', ...}
114
+ - merge_oreder -> [('seq2', 'seq3'), ('seq1', '(seq2, seq3)')]
115
+ Döndürdüğü: hizalanmış diziler sözlüğü
116
+ '''
117
+
118
+ # Grupları takip eden sözlük, başta her isim kendi dizisini takip eder
119
+ groups = {name: [seq] for name, seq in sequences.items()}
120
+
121
+ for (name1, name2) in merge_order:
122
+ # İki grubunu konsensüsünü al
123
+ consensus1 = get_consensus(groups[name1])
124
+ consensus2 = get_consensus(groups[name2])
125
+
126
+ # Konsensüsler arasında hizalama yap
127
+ aligned1, aligned2 = needleman_wunsch(consensus1, consensus2)
128
+
129
+ # Gap pozisyonlarını tüm gruba uygula
130
+ def apply_gaps(seqs, aligned_consensus, original_consensus):
131
+ result = []
132
+
133
+ for seq in seqs:
134
+ new_seq = ''
135
+ seq_idx = 0
136
+ for char in aligned_consensus:
137
+ if char == '-':
138
+ new_seq += '-' # Yeni gap ekle
139
+ else:
140
+ new_seq += seq[seq_idx]
141
+ seq_idx += 1
142
+ result.append(new_seq)
143
+ return result
144
+
145
+ new_group1 = apply_gaps(groups[name1], aligned1, consensus1)
146
+ new_group2 = apply_gaps(groups[name2], aligned2, consensus2)
147
+
148
+ # Yeni grubu oluşturma
149
+ new_name = f'({name1}, {name2})'
150
+ groups[new_name] = new_group1 + new_group2
151
+
152
+ final_group = groups[list(groups.keys())[-1]]
153
+ names = list(sequences.keys())
154
+
155
+ return {names[i]: final_group[i] for i in range(len(names))}
@@ -0,0 +1,35 @@
1
+ # FASTA Dosyalarını okumak ve yazmak için yardımcı fonkyionlar.
2
+
3
+ def read_fasta(filename):
4
+ '''
5
+ FASTA dosyasını okur.
6
+ Döndürme şu şekilde olur:
7
+ {'seq1': 'ATAATAGC', 'seq2': 'GATACACG'}
8
+ '''
9
+
10
+ sequences = {}
11
+ current_name = None
12
+
13
+ with open(filename, 'r') as f:
14
+ for line in f:
15
+ line = line.strip() # Satır sonu \n karakterini temizler
16
+
17
+ if line.startswith('>'):
18
+ current_name = line[1:] # > karkterini atla
19
+ sequences[current_name] = ''
20
+ else:
21
+ sequences[current_name] += line
22
+ # += kullannıyoruz çünkü bazı diziler birden fazla satıra bölünmüş olabilir
23
+
24
+ return sequences
25
+
26
+ def write_fasta(sequences, filename):
27
+ '''
28
+ Sözlükteki dizileri FASTA formatında dosyaya yazar.
29
+ Hizalamadan sonra dizilerde gap karakteri olabilir.
30
+ '''
31
+
32
+ with open(filename, 'w') as f:
33
+ for name, seq in sequences.items():
34
+ f.write(f'>{name}\n')
35
+ f.write(f'{seq}\n')
@@ -0,0 +1,24 @@
1
+ # Dizi hizalamasında kullanılan puanlama matrisi.
2
+ # DNA ve RNA dizileri için nükleotid puanlama matrisi.
3
+
4
+ MATCH = 1
5
+ MISMATCH = -1
6
+ GAP_PENALTY = -1
7
+
8
+ # Eşleşme: MATCH, Uyuşmazlık: MISMATCH
9
+
10
+ NUCLEOTIDE_MATRIX = {
11
+ ('A', 'A'): MATCH, ('A', 'T'): MISMATCH, ('A', 'G'): MISMATCH, ('A', 'C'): MISMATCH, ('A', 'U'): MISMATCH,
12
+ ('T', 'A'): MISMATCH, ('T', 'T'): MATCH, ('T', 'G'): MISMATCH, ('T', 'C'): MISMATCH, ('T', 'U'): MISMATCH,
13
+ ('G', 'A'): MISMATCH, ('G', 'T'): MISMATCH, ('G', 'G'): MATCH, ('G', 'C'): MISMATCH, ('G', 'U'): MISMATCH,
14
+ ('C', 'A'): MISMATCH, ('C', 'T'): MISMATCH, ('C', 'G'): MISMATCH, ('C', 'C'): MATCH, ('C', 'U'): MISMATCH,
15
+ ('U', 'A'): MISMATCH, ('U', 'T'): MISMATCH, ('U', 'G'): MISMATCH, ('U', 'C'): MISMATCH, ('U', 'U'): MATCH,
16
+ }
17
+
18
+ def get_score(a,b):
19
+ '''
20
+ İki karakter arasındaki skoru döndürür.
21
+ Matriste olmayan karakterler için 0 döndürür.
22
+ '''
23
+
24
+ return NUCLEOTIDE_MATRIX.get((a,b), 0)
@@ -0,0 +1,60 @@
1
+ # UPGMA algoritması ile guide tree oluşturur.
2
+ # Girdi: Distance matrix ve isim listesi
3
+ # Çıktı: Dizilerin hangi sırayla birleştirildiği listesi
4
+ # [(i,j), (k,l), ...] gibi gibi
5
+
6
+ import numpy as np
7
+
8
+ def upgma(matrix, names):
9
+ '''
10
+ UPGMA algorimtası ile birleştirme sırasını döndürür.
11
+ Döndürdüğğü: [('seq2', 'seq3'), ('seq1', '(seq2,seq3)'), ...]
12
+ '''
13
+
14
+ matrix = [row[:] for row in matrix] # Orijinal matrisi bozmamak için kopyala
15
+ names = list(names) # İsim listesini kopyala
16
+ merge_order = [] # Birleştirme sırası
17
+
18
+ while len(names) > 1:
19
+ n = len(names)
20
+
21
+ # En küçük mesafeyi bul
22
+ min_dist = float('inf')
23
+ min_i, min_j = 0, 1
24
+
25
+ for i in range(n):
26
+ for j in range(i+1, n):
27
+ if matrix[i][j] < min_dist:
28
+ min_dist = matrix[i][j]
29
+ min_i, min_j = i, j
30
+
31
+ # Birleştirme sırasına ekle
32
+ merge_order.append((names[min_i], names[min_j]))
33
+
34
+ # Yeni grubun ismini oluştur
35
+ new_name = f'({names[min_i]}, {names[min_j]})'
36
+
37
+ # Yeni grubun diğer dizilere mesafesini hesapla
38
+ new_distances = []
39
+ for k in range(n):
40
+ if k != min_i and k != min_j:
41
+ new_dist = (matrix[min_i][k] + matrix[min_j][k]) / 2
42
+ new_distances.append(new_dist)
43
+
44
+ # Eski grupları sil, yeni grubu ekle
45
+ names.pop(min_j)
46
+ names.pop(min_i)
47
+ names.append(new_name)
48
+
49
+ # Matrisi güncelle
50
+ matrix = np.delete(matrix, [min_i, min_j], axis=0)
51
+ matrix = np.delete(matrix, [min_i, min_j], axis=1)
52
+ matrix = matrix.tolist()
53
+
54
+ # Yeni satır ve sütun ekle
55
+ for k in range(len(matrix)):
56
+ matrix[k].append(new_distances[k])
57
+ new_distances.append(0.0)
58
+ matrix.append(new_distances)
59
+
60
+ return merge_order
@@ -0,0 +1,70 @@
1
+ Metadata-Version: 2.2
2
+ Name: ataserinyelMSA
3
+ Version: 0.1.0
4
+ Summary: A simple MAFFT-based Multiple Sequence Alignment (MSA) library
5
+ Author-email: ataserinyel <clasher.mp2@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ataserinyel/ataserinyelMSA
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy>=1.21.0
15
+
16
+ # ataserinyelMSA
17
+
18
+ A simple MAFFT-inspired Multiple Sequence Alignment (MSA) tool written in Python.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install ataserinyelMSA
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ```bash
29
+ python main.py input.fasta output.fasta
30
+ ```
31
+
32
+ ## Example
33
+
34
+ Input (`input.fasta`):
35
+ ```
36
+ >seq1
37
+ GATTACA
38
+ >seq2
39
+ GCATGCU
40
+ >seq3
41
+ AGCTAGC
42
+ ```
43
+ Output (`output.fasta`):
44
+ ```
45
+ >seq1
46
+ -GAT-TACA
47
+ >seq2
48
+ -GC-ATGCU
49
+ >seq3
50
+ AGCTA-GC-
51
+ ```
52
+ ## Algorithm
53
+
54
+ This tool implements a simplified version of the MAFFT FFT-NS-1 algorithm:
55
+
56
+ 1. **FASTA Parsing** - Read and write FASTA format files
57
+ 2. **Pairwise Alignment** - Needleman-Wunsch global alignment algorithm
58
+ 3. **Distance Matrix** - Compute pairwise distances between sequences
59
+ 4. **Guide Tree** - UPGMA clustering algorithm
60
+ 5. **Progressive Alignment** - Align sequences following the guide tree order
61
+
62
+ ## Differences from original MAFFT
63
+
64
+ - Uses Needleman-Wunsch instead of FFT for similarity calculation
65
+ - Simple +1/-1 scoring matrix instead of advanced substitution matrices
66
+ - Suitable for small datasets
67
+
68
+ ## Author
69
+
70
+ Ata Serinyel
@@ -0,0 +1,19 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/ataserinyelMSA/alignment.py
5
+ src/ataserinyelMSA/fasta.py
6
+ src/ataserinyelMSA/scoring.py
7
+ src/ataserinyelMSA/tree.py
8
+ src/ataserinyelMSA.egg-info/PKG-INFO
9
+ src/ataserinyelMSA.egg-info/SOURCES.txt
10
+ src/ataserinyelMSA.egg-info/dependency_links.txt
11
+ src/ataserinyelMSA.egg-info/requires.txt
12
+ src/ataserinyelMSA.egg-info/top_level.txt
13
+ test/test_alignment.py
14
+ test/test_distance.py
15
+ test/test_fasta.py
16
+ test/test_pipline.py
17
+ test/test_progressive_alignment.py
18
+ test/test_scoring.py
19
+ test/test_tree.py
@@ -0,0 +1 @@
1
+ numpy>=1.21.0
@@ -0,0 +1 @@
1
+ ataserinyelMSA
@@ -0,0 +1,3 @@
1
+ from src.ataserinyelMSA.alignment import needleman_wunsch
2
+
3
+ print(needleman_wunsch('GATTACA', 'GCATGCU'))
@@ -0,0 +1,8 @@
1
+
2
+ from src.ataserinyelMSA.fasta import read_fasta
3
+ from src.ataserinyelMSA.alignment import distance_matrix
4
+ seqs = read_fasta('test/test.fasta')
5
+ matrix, names = distance_matrix(seqs)
6
+ print(names)
7
+ for row in matrix:
8
+ print([round(float(x), 2) for x in row])
@@ -0,0 +1,3 @@
1
+ from ataserinyelMSA.fasta import read_fasta
2
+
3
+ print(read_fasta('test/test.fasta'))
@@ -0,0 +1,33 @@
1
+
2
+ from src.ataserinyelMSA.fasta import read_fasta, write_fasta
3
+ from src.ataserinyelMSA.alignment import distance_matrix, progressive_alignment
4
+ from src.ataserinyelMSA.tree import upgma
5
+
6
+ # 1. FASTA oku
7
+ seqs = read_fasta('test/test.fasta')
8
+ print('1. Diziler okundu:')
9
+ for name, seq in seqs.items():
10
+ print(f' {name}: {seq}')
11
+
12
+ # 2. Distance matrix
13
+ mat, names = distance_matrix(seqs)
14
+ print('\n2. Distance Matrix:')
15
+ print(' ', names)
16
+ for i, row in enumerate(mat):
17
+ print(f' {names[i]}: {[round(float(x), 2) for x in row]}')
18
+
19
+ # 3. UPGMA
20
+ order = upgma(mat, names)
21
+ print('\n3. Birleştirme Sirasi:')
22
+ for step in order:
23
+ print(f' {step[0]} + {step[1]}')
24
+
25
+ # 4. Progressive Alignment
26
+ result = progressive_alignment(seqs, order)
27
+ print('\n4. Hizalanmis Diziler:')
28
+ for name, seq in result.items():
29
+ print(f' {name}: {seq}')
30
+
31
+ # 5. Sonucu dosyaya yaz
32
+ write_fasta(result, 'test/output.fasta')
33
+ print('\n5. Sonuc test/output.fasta dosyasina yazildi.')
@@ -0,0 +1,11 @@
1
+ from src.ataserinyelMSA.fasta import read_fasta
2
+ from src.ataserinyelMSA.alignment import distance_matrix, progressive_alignment
3
+ from src.ataserinyelMSA.tree import upgma
4
+
5
+ seqs = read_fasta('test/test.fasta')
6
+ mat, names = distance_matrix(seqs)
7
+ order = upgma(mat, names)
8
+ result = progressive_alignment(seqs, order)
9
+
10
+ for name, seq in result.items():
11
+ print(f'{name}: {seq}')
@@ -0,0 +1,6 @@
1
+ from src.ataserinyelMSA.scoring import get_score
2
+
3
+ print(get_score('A', 'A'))
4
+ print(get_score('A', 'T'))
5
+ print(get_score('A', 'X'))
6
+
@@ -0,0 +1,8 @@
1
+ from src.ataserinyelMSA.fasta import read_fasta
2
+ from src.ataserinyelMSA.alignment import distance_matrix
3
+ from src.ataserinyelMSA.tree import upgma
4
+
5
+ seqs = read_fasta('test/test.fasta')
6
+ mat, names = distance_matrix(seqs)
7
+ order = upgma(mat, names)
8
+ print(order)