levseq 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,386 @@
1
+ >NB01
2
+ CACAAAGACACCGACAACTTTCTT
3
+ >NB02
4
+ ACAGACGACTACAAACGGAATCGA
5
+ >NB03
6
+ CCTGGTAACTGGGACACAAGACTC
7
+ >NB04
8
+ TAGGGAAACACGATAGAATCCGAA
9
+ >NB05
10
+ AAGGTTACACAAACCCTGGACAAG
11
+ >NB06
12
+ GACTACTTTCTGCCTTTGCGAGAA
13
+ >NB07
14
+ AAGGATTCATTCCCACGGTAACAC
15
+ >NB08
16
+ ACGTAACTTGGTTTGTTCCCTGAA
17
+ >NB09
18
+ AACCAAGACTCGCTGTGCCTAGTT
19
+ >NB10
20
+ GAGAGGACAAAGGTTTCAACGCTT
21
+ >NB11
22
+ TCCATTCCCTCCGATAGATGAAAC
23
+ >NB12
24
+ TCCGATTCTGCTTCTTTCTACCTG
25
+ >NB13
26
+ AGAACGACTTCCATACTCGTGTGA
27
+ >NB14
28
+ AACGAGTCTCTTGGGACCCATAGA
29
+ >NB15
30
+ AGGTCTACCTCGCTAACACCACTG
31
+ >NB16
32
+ CGTCAACTGACAGTGGTTCGTACT
33
+ >NB17
34
+ ACCCTCCAGGAAAGTACCTCTGAT
35
+ >NB18
36
+ CCAAACCCAACAACCTAGATAGGC
37
+ >NB19
38
+ GTTCCTCGTGCAGTGTCAAGAGAT
39
+ >NB20
40
+ TTGCGTCCTGTTACGAGAACTCAT
41
+ >NB21
42
+ GAGCCTCTCATTGTCCGTTCTCTA
43
+ >NB22
44
+ ACCACTGCCATGTATCAAAGTACG
45
+ >NB23
46
+ CTTACTACCCAGTGAACCTCCTCG
47
+ >NB24
48
+ GCATAGTTCTGCATGATGGGTTAG
49
+ >NB25
50
+ GTAAGTTGGGTATGCAACGCAATG
51
+ >NB26
52
+ CATACAGCGACTACGCATTCTCAT
53
+ >NB27
54
+ CGACGGTTAGATTCACCTCTTACA
55
+ >NB28
56
+ TGAAACCTAAGAAGGCACCGTATC
57
+ >NB29
58
+ CTAGACACCTTGGGTTGACAGACC
59
+ >NB30
60
+ TCAGTGAGGATCTACTTCGACCCA
61
+ >NB31
62
+ TGCGTACAGCAATCAGTTACATTG
63
+ >NB32
64
+ CCAGTAGAAGTCCGACAACGTCAT
65
+ >NB33
66
+ CAGACTTGGTACGGTTGGGTAACT
67
+ >NB34
68
+ GGACGAAGAACTCAAGTCAAAGGC
69
+ >NB35
70
+ CTACTTACGAAGCTGAGGGACTGC
71
+ >NB36
72
+ ATGTCCCAGTTAGAGGAGGAAACA
73
+ >NB37
74
+ GCTTGCGATTGATGCTTAGTATCA
75
+ >NB38
76
+ ACCACAGGAGGACGATACAGAGAA
77
+ >NB39
78
+ CCACAGTGTCAACTAGAGCCTCTC
79
+ >NB40
80
+ TAGTTTGGATGACCAAGGATAGCC
81
+ >NB41
82
+ GGAGTTCGTCCAGAGAAGTACACG
83
+ >NB42
84
+ CTACGTGTAAGGCATACCTGCCAG
85
+ >NB43
86
+ CTTTCGTTGTTGACTCGACGGTAG
87
+ >NB44
88
+ AGTAGAAAGGGTTCCTTCCCACTC
89
+ >NB45
90
+ GATCCAACAGAGATGCCTTCAGTG
91
+ >NB46
92
+ GCTGTGTTCCACTTCATTCTCCTG
93
+ >NB47
94
+ GTGCAACTTTCCCACAGGTAGTTC
95
+ >NB48
96
+ CATCTGGAACGTGGTACACCTGTA
97
+ >NB49
98
+ ACTGGTGCAGCTTTGAACATCTAG
99
+ >NB50
100
+ ATGGACTTTGGTAACTTCCTGCGT
101
+ >NB51
102
+ GTTGAATGAGCCTACTGGGTCCTC
103
+ >NB52
104
+ TGAGAGACAAGATTGTTCGTGGAC
105
+ >NB53
106
+ AGATTCAGACCGTCTCATGCAAAG
107
+ >NB54
108
+ CAAGAGCTTTGACTAAGGAGCATG
109
+ >NB55
110
+ TGGAAGATGAGACCCTGATCTACG
111
+ >NB56
112
+ TCACTACTCAACAGGTGGCATGAA
113
+ >NB57
114
+ GCTAGGTCAATCTCCTTCGGAAGT
115
+ >NB58
116
+ CAGGTTACTCCTCCGTGAGTCTGA
117
+ >NB59
118
+ TCAATCAAGAAGGGAAAGCAAGGT
119
+ >NB60
120
+ CATGTTCAACCAAGGCTTCTATGG
121
+ >NB61
122
+ AGAGGGTACTATGTGCCTCAGCAC
123
+ >NB62
124
+ CACCCACACTTACTTCAGGACGTA
125
+ >NB63
126
+ TTCTGAAGTTCCTGGGTCTTGAAC
127
+ >NB64
128
+ GACAGACACCGTTCATCGACTTTC
129
+ >NB65
130
+ TTCTCAGTCTTCCTCCAGACAAGG
131
+ >NB66
132
+ CCGATCCTTGTGGCTTCTAACTTC
133
+ >NB67
134
+ GTTTGTCATACTCGTGTGCTCACC
135
+ >NB68
136
+ GAATCTAAGCAAACACGAAGGTGG
137
+ >NB69
138
+ TACAGTCCGAGCCTCATGTGATCT
139
+ >NB70
140
+ ACCGAGATCCTACGAATGGAGTGT
141
+ >NB71
142
+ CCTGGGAGCATCAGGTAGTAACAG
143
+ >NB72
144
+ TAGCTGACTGTCTTCCATACCGAC
145
+ >NB73
146
+ AAGAAACAGGATGACAGAACCCTC
147
+ >NB74
148
+ TACAAGCATCCCAACACTTCCACT
149
+ >NB75
150
+ GACCATTGTGATGAACCCTGTTGT
151
+ >NB76
152
+ ATGCTTGTTACATCAACCCTGGAC
153
+ >NB77
154
+ CGACCTGTTTCTCAGGGATACAAC
155
+ >NB78
156
+ AACAACCGAACCTTTGAATCAGAA
157
+ >NB79
158
+ TCTCGGAGATAGTTCTCACTGCTG
159
+ >NB80
160
+ CGGATGAACATAGGATAGCGATTC
161
+ >NB81
162
+ CCTCATCTTGTGAAGTTGTTTCGG
163
+ >NB82
164
+ ACGGTATGTCGAGTTCCAGGACTA
165
+ >NB83
166
+ TGGCTTGATCTAGGTAAGGTCGAA
167
+ >NB84
168
+ GTAGTGGACCTAGAACCTGTGCCA
169
+ >NB85
170
+ AACGGAGGAGTTAGTTGGATGATC
171
+ >NB86
172
+ AGGTGATCCCAACAAGCGTAAGTA
173
+ >NB87
174
+ TACATGCTCCTGTTGTTAGGGAGG
175
+ >NB88
176
+ TCTTCTACTACCGATCCGAAGCAG
177
+ >NB89
178
+ ACAGCATCAATGTTTGGCTAGTTG
179
+ >NB90
180
+ GATGTAGAGGGTACGGTTTGAGGC
181
+ >NB91
182
+ GGCTCCATAGGAACTCACGCTACT
183
+ >NB92
184
+ TTGTGAGTGGAAAGATACAGGACC
185
+ >NB93
186
+ AGTTTCCATCACTTCAGACTTGGG
187
+ >NB94
188
+ GATTGTCCTCAAACTGCCACCTAC
189
+ >NB95
190
+ CCTGTCTGGAAGAAGAATGGACTT
191
+ >NB96
192
+ CTGAACGGTCATAGAGTCCACCAT
193
+
194
+ >RB01
195
+ AAGAAAGTTGTCGGTGTCTTTGTG
196
+ >RB02
197
+ TCGATTCCGTTTGTAGTCGTCTGT
198
+ >RB03
199
+ GAGTCTTGTGTCCCAGTTACCAGG
200
+ >RB04
201
+ TTCGGATTCTATCGTGTTTCCCTA
202
+ >RB05
203
+ CTTGTCCAGGGTTTGTGTAACCTT
204
+ >RB06
205
+ TTCTCGCAAAGGCAGAAAGTAGTC
206
+ >RB07
207
+ GTGTTACCGTGGGAATGAATCCTT
208
+ >RB08
209
+ TTCAGGGAACAAACCAAGTTACGT
210
+ >RB09
211
+ AACTAGGCACAGCGAGTCTTGGTT
212
+ >RB10
213
+ AAGCGTTGAAACCTTTGTCCTCTC
214
+ >RB11
215
+ GTTTCATCTATCGGAGGGAATGGA
216
+ >RB12
217
+ CAGGTAGAAAGAAGCAGAATCGGA
218
+ >RB13
219
+ AGAACGACTTCCATACTCGTGTGA
220
+ >RB14
221
+ AACGAGTCTCTTGGGACCCATAGA
222
+ >RB15
223
+ AGGTCTACCTCGCTAACACCACTG
224
+ >RB16
225
+ CGTCAACTGACAGTGGTTCGTACT
226
+ >RB17
227
+ ACCCTCCAGGAAAGTACCTCTGAT
228
+ >RB18
229
+ CCAAACCCAACAACCTAGATAGGC
230
+ >RB19
231
+ GTTCCTCGTGCAGTGTCAAGAGAT
232
+ >RB20
233
+ TTGCGTCCTGTTACGAGAACTCAT
234
+ >RB21
235
+ GAGCCTCTCATTGTCCGTTCTCTA
236
+ >RB22
237
+ ACCACTGCCATGTATCAAAGTACG
238
+ >RB23
239
+ CTTACTACCCAGTGAACCTCCTCG
240
+ >RB24
241
+ GCATAGTTCTGCATGATGGGTTAG
242
+ >RB25
243
+ GTAAGTTGGGTATGCAACGCAATG
244
+ >RB26
245
+ ACTATGCCTTTCCGTGAAACAGTT
246
+ >RB27
247
+ CGACGGTTAGATTCACCTCTTACA
248
+ >RB28
249
+ TGAAACCTAAGAAGGCACCGTATC
250
+ >RB29
251
+ CTAGACACCTTGGGTTGACAGACC
252
+ >RB30
253
+ TCAGTGAGGATCTACTTCGACCCA
254
+ >RB31
255
+ TGCGTACAGCAATCAGTTACATTG
256
+ >RB32
257
+ CCAGTAGAAGTCCGACAACGTCAT
258
+ >RB33
259
+ CAGACTTGGTACGGTTGGGTAACT
260
+ >RB34
261
+ GGACGAAGAACTCAAGTCAAAGGC
262
+ >RB35
263
+ CTACTTACGAAGCTGAGGGACTGC
264
+ >RB36
265
+ ATGTCCCAGTTAGAGGAGGAAACA
266
+ >RB37
267
+ GCTTGCGATTGATGCTTAGTATCA
268
+ >RB38
269
+ ACCACAGGAGGACGATACAGAGAA
270
+ >RB39
271
+ TCTGCCACACACTCGTAAGTCCTT
272
+ >RB40
273
+ GTCGATACTGGACCTATCCCTTGG
274
+ >RB41
275
+ GGAGTTCGTCCAGAGAAGTACACG
276
+ >RB42
277
+ CTACGTGTAAGGCATACCTGCCAG
278
+ >RB43
279
+ CTTTCGTTGTTGACTCGACGGTAG
280
+ >RB44
281
+ AGTAGAAAGGGTTCCTTCCCACTC
282
+ >RB45
283
+ GATCCAACAGAGATGCCTTCAGTG
284
+ >RB46
285
+ GCTGTGTTCCACTTCATTCTCCTG
286
+ >RB47
287
+ GTGCAACTTTCCCACAGGTAGTTC
288
+ >RB48
289
+ GAGTCCGTGACAACTTCTGAAAGC
290
+ >RB49
291
+ ACTGGTGCAGCTTTGAACATCTAG
292
+ >RB50
293
+ ATGGACTTTGGTAACTTCCTGCGT
294
+ >RB51
295
+ GTTGAATGAGCCTACTGGGTCCTC
296
+ >RB52
297
+ TGAGAGACAAGATTGTTCGTGGAC
298
+ >RB53
299
+ AGATTCAGACCGTCTCATGCAAAG
300
+ >RB54
301
+ GGGTGCCAACTACATACCAAACCT
302
+ >RB55
303
+ TGGAAGATGAGACCCTGATCTACG
304
+ >RB56
305
+ TCACTACTCAACAGGTGGCATGAA
306
+ >RB57
307
+ GCTAGGTCAATCTCCTTCGGAAGT
308
+ >RB58
309
+ CAGGTTACTCCTCCGTGAGTCTGA
310
+ >RB59
311
+ TCAATCAAGAAGGGAAAGCAAGGT
312
+ >RB60
313
+ GAACCCTACTTTGGACAGACACCT
314
+ >RB61
315
+ AGAGGGTACTATGTGCCTCAGCAC
316
+ >RB62
317
+ CACCCACACTTACTTCAGGACGTA
318
+ >RB63
319
+ TTCTGAAGTTCCTGGGTCTTGAAC
320
+ >RB64
321
+ GACAGACACCGTTCATCGACTTTC
322
+ >RB65
323
+ TTCTCAGTCTTCCTCCAGACAAGG
324
+ >RB66
325
+ CCGATCCTTGTGGCTTCTAACTTC
326
+ >RB67
327
+ GTTTGTCATACTCGTGTGCTCACC
328
+ >RB68
329
+ GAATCTAAGCAAACACGAAGGTGG
330
+ >RB69
331
+ TACAGTCCGAGCCTCATGTGATCT
332
+ >RB70
333
+ ACCGAGATCCTACGAATGGAGTGT
334
+ >RB71
335
+ CCTGGGAGCATCAGGTAGTAACAG
336
+ >RB72
337
+ TAGCTGACTGTCTTCCATACCGAC
338
+ >RB73
339
+ AAGAAACAGGATGACAGAACCCTC
340
+ >RB74
341
+ TACAAGCATCCCAACACTTCCACT
342
+ >RB75
343
+ GACCATTGTGATGAACCCTGTTGT
344
+ >RB76
345
+ ATGCTTGTTACATCAACCCTGGAC
346
+ >RB77
347
+ CGACCTGTTTCTCAGGGATACAAC
348
+ >RB78
349
+ AACAACCGAACCTTTGAATCAGAA
350
+ >RB79
351
+ TCTCGGAGATAGTTCTCACTGCTG
352
+ >RB80
353
+ CGGATGAACATAGGATAGCGATTC
354
+ >RB81
355
+ CCTCATCTTGTGAAGTTGTTTCGG
356
+ >RB82
357
+ ACGGTATGTCGAGTTCCAGGACTA
358
+ >RB83
359
+ TGGCTTGATCTAGGTAAGGTCGAA
360
+ >RB84
361
+ GTAGTGGACCTAGAACCTGTGCCA
362
+ >RB85
363
+ AACGGAGGAGTTAGTTGGATGATC
364
+ >RB86
365
+ AGGTGATCCCAACAAGCGTAAGTA
366
+ >RB87
367
+ TACATGCTCCTGTTGTTAGGGAGG
368
+ >RB88
369
+ TCTTCTACTACCGATCCGAAGCAG
370
+ >RB89
371
+ ACAGCATCAATGTTTGGCTAGTTG
372
+ >RB90
373
+ GATGTAGAGGGTACGGTTTGAGGC
374
+ >RB91
375
+ GGCTCCATAGGAACTCACGCTACT
376
+ >RB92
377
+ TTGTGAGTGGAAAGATACAGGACC
378
+ >RB93
379
+ AGTTTCCATCACTTCAGACTTGGG
380
+ >RB94
381
+ GATTGTCCTCAAACTGCCACCTAC
382
+ >RB95
383
+ CCTGTCTGGAAGAAGAATGGACTT
384
+ >RB96
385
+ CTGAACGGTCATAGAGTCCACCAT
386
+
levseq/basecaller.py ADDED
@@ -0,0 +1,80 @@
1
+ ###############################################################################
2
+ # #
3
+ # This program is free software: you can redistribute it and/or modify #
4
+ # it under the terms of the GNU General Public License as published by #
5
+ # the Free Software Foundation, either version 3 of the License, or #
6
+ # (at your option) any later version. #
7
+ # #
8
+ # This program is distributed in the hope that it will be useful, #
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
11
+ # GNU General Public License for more details. #
12
+ # #
13
+ # You should have received a copy of the GNU General Public License #
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>. #
15
+ # #
16
+ ###############################################################################
17
+
18
+ from levseq.globals import DORADO_MODELS
19
+ import os
20
+ import glob
21
+ import subprocess
22
+
23
+
24
+ class Basecaller:
25
+
26
+ def __init__(self, model, file_folder, save_folder, fastq = True):
27
+ self.model = model
28
+ self.file_folder = file_folder
29
+ self.save_folder = save_folder
30
+ self.fastq = fastq
31
+ self.model_path = self.check_model()
32
+
33
+ def check_model(self):
34
+ """Checks if the required dorado model is downloaded."""
35
+
36
+ model_path = os.path.join(os.path.dirname(__file__), "dorado_models")
37
+
38
+ # Check if model already exists
39
+ models = glob.glob(model_path + "/*")
40
+ if DORADO_MODELS[self.model] in models:
41
+ return os.path.join(model_path, DORADO_MODELS[self.model])
42
+
43
+ else:
44
+ self.download_model()
45
+ model_path = os.path.join(model_path, DORADO_MODELS[self.model])
46
+
47
+ return model_path
48
+
49
+ def download_model(self):
50
+ """Downloads the required dorado model."""
51
+ model = DORADO_MODELS[self.model]
52
+
53
+ input = f"dorado download --model {model} --directory {self.model_path}"
54
+ subprocess.run(input, shell=True)
55
+ return "Model downloaded."
56
+
57
+ def run_dorado(self):
58
+ """
59
+ Runs dorado basecaller.
60
+
61
+ Args:
62
+ - .pod5 files
63
+ Returns:
64
+ - .bam files
65
+ """
66
+
67
+ model_path = self.check_model()
68
+
69
+ if self.fastq:
70
+ input = f"dorado basecaller {model_path} {self.file_folder} --emit-fastq > {self.save_folder}/basecalled.fastq"
71
+
72
+ else:
73
+ input = f"dorado basecaller {model_path} {self.file_folder} > {self.save_folder}/basecalled.fastq"
74
+
75
+ subprocess.run(input, shell=True)
76
+
77
+ return "Basecalling submitted"
78
+
79
+
80
+
levseq/cmd.py ADDED
@@ -0,0 +1,23 @@
1
+ ###############################################################################
2
+ # #
3
+ # This program is free software: you can redistribute it and/or modify #
4
+ # it under the terms of the GNU General Public License as published by #
5
+ # the Free Software Foundation, either version 3 of the License, or #
6
+ # (at your option) any later version. #
7
+ # #
8
+ # This program is distributed in the hope that it will be useful, #
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
11
+ # GNU General Public License for more details. #
12
+ # #
13
+ # You should have received a copy of the GNU General Public License #
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>. #
15
+ # #
16
+ ###############################################################################
17
+
18
+ from levseq.interface import execute_LevSeq
19
+ def main():
20
+ execute_LevSeq()
21
+
22
+ if __name__ == "__main__":
23
+ main()
levseq/globals.py ADDED
@@ -0,0 +1,66 @@
1
+ ###############################################################################
2
+ # #
3
+ # This program is free software: you can redistribute it and/or modify #
4
+ # it under the terms of the GNU General Public License as published by #
5
+ # the Free Software Foundation, either version 3 of the License, or #
6
+ # (at your option) any later version. #
7
+ # #
8
+ # This program is distributed in the hope that it will be useful, #
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
11
+ # GNU General Public License for more details. #
12
+ # #
13
+ # You should have received a copy of the GNU General Public License #
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>. #
15
+ # #
16
+ ###############################################################################
17
+
18
+ # Global parameters for minIon
19
+
20
+ # Parameter for Demultiplexing
21
+ SCORE_MATRIX = {
22
+ ('A', 'A'): 96, ('A', 'C'): -316, ('A', 'G'): -192, ('A', 'T'): -369, ('A', 'N'): 0,
23
+ ('C', 'A'): -316, ('C', 'C'): 100, ('C', 'G'): -352, ('C', 'T'): -295, ('C', 'N'): 0,
24
+ ('G', 'A'): -192, ('G', 'C'): -352, ('G', 'G'): 98, ('G', 'T'): -329, ('G', 'N'): 0,
25
+ ('T', 'A'): -369, ('T', 'C'): -295, ('T', 'G'): -329, ('T', 'T'): 100, ('T', 'N'): 0,
26
+ ('N', 'A'): 0, ('N', 'C'): 0, ('N', 'G'): 0, ('N', 'T'): 0, ('N', 'N'): 0,
27
+ } # Adapted from Guppy Barcoder
28
+
29
+ SW_ALIGN_PARAMS = {
30
+ "start_gap1": 40,
31
+ "end_gap1": 40,
32
+ "open_gap1": 0,
33
+ "extend_gap1": -40,
34
+ "start_gap2": 40,
35
+ "end_gap2": 40,
36
+ "open_gap2": -160,
37
+ "extend_gap2": -160,
38
+ "min_score_barcode_front": 60.0,
39
+ "front_window_size": 150,
40
+ "rear_window_size": 150,
41
+ } # Adapted from Guppy Barcoder
42
+
43
+
44
+ # Defaul target folder names from ONT
45
+ DEFAULT_TARGETS = {"Not_basecalled": ["pod5"], "Basecalled": ["fastq_pass"]}
46
+
47
+
48
+ # Codons
49
+ CODONS = {
50
+ 'ATA': 'I', 'ATC': 'I', 'ATT': 'I', 'ATG': 'M',
51
+ 'ACA': 'T', 'ACC': 'T', 'ACG': 'T', 'ACT': 'T',
52
+ 'AAC': 'N', 'AAT': 'N', 'AAA': 'K', 'AAG': 'K',
53
+ 'AGC': 'S', 'AGT': 'S', 'AGA': 'R', 'AGG': 'R',
54
+ 'CTA': 'L', 'CTC': 'L', 'CTG': 'L', 'CTT': 'L',
55
+ 'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCT': 'P',
56
+ 'CAC': 'H', 'CAT': 'H', 'CAA': 'Q', 'CAG': 'Q',
57
+ 'CGA': 'R', 'CGC': 'R', 'CGG': 'R', 'CGT': 'R',
58
+ 'GTA': 'V', 'GTC': 'V', 'GTG': 'V', 'GTT': 'V',
59
+ 'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCT': 'A',
60
+ 'GAC': 'D', 'GAT': 'D', 'GAA': 'E', 'GAG': 'E',
61
+ 'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGT': 'G',
62
+ 'TCA': 'S', 'TCC': 'S', 'TCG': 'S', 'TCT': 'S',
63
+ 'TTC': 'F', 'TTT': 'F', 'TTA': 'L', 'TTG': 'L',
64
+ 'TAC': 'Y', 'TAT': 'Y', 'TAA': '_', 'TAG': '_',
65
+ 'TGC': 'C', 'TGT': 'C', 'TGA': '_', 'TGG': 'W',
66
+ }
levseq/interface.py ADDED
@@ -0,0 +1,85 @@
1
+ ###############################################################################
2
+ # #
3
+ # This program is free software: you can redistribute it and/or modify #
4
+ # it under the terms of the GNU General Public License as published by #
5
+ # the Free Software Foundation, either version 3 of the License, or #
6
+ # (at your option) any later version. #
7
+ # #
8
+ # This program is distributed in the hope that it will be useful, #
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of #
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
11
+ # GNU General Public License for more details. #
12
+ # #
13
+ # You should have received a copy of the GNU General Public License #
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>. #
15
+ # #
16
+ ###############################################################################
17
+ """
18
+ Contain argument parsers used for command line interface and web interface
19
+ """
20
+ # Import packages
21
+ import os
22
+ import tqdm
23
+ import argparse
24
+ # Import local packages
25
+ from levseq.run_levseq import run_LevSeq
26
+
27
+ # Get the working directory
28
+ CWD = os.getcwd()
29
+
30
+ # Set default arguments
31
+ padding_start = 0
32
+ padding_end = 0
33
+ min_depth = 5
34
+ threshold = 0.2
35
+ basecall_model = 'sup'
36
+
37
+
38
+ # Build the CLI argparser
39
+ def build_cli_parser():
40
+ # Initialize
41
+ parser = argparse.ArgumentParser()
42
+
43
+ # Add required arguments
44
+ required_args_group = parser.add_argument_group("Required Arguments", "Arguments required for each run")
45
+ required_args_group.add_argument('name',
46
+ help = 'User defined name for the output folder')
47
+ required_args_group.add_argument("path",
48
+ help="Path to folder containing fastq.pass or pod5_pass files.")
49
+ required_args_group.add_argument("summary",
50
+ help="CSV file containig barcodes used, name of each plate and reference sequence in string")
51
+ # Add optional arguments
52
+ optional_args_group = parser.add_argument_group("Optional Arguments", "Aditional arguments")
53
+ optional_args_group.add_argument("--output",
54
+ help="Save location for run. Defaults to current working directory.",
55
+ required=False,
56
+ default=CWD)
57
+ optional_args_group.add_argument("--perform_basecalling",
58
+ action="store_true",
59
+ help="Skip the basecalling step, default is false")
60
+ optional_args_group.add_argument("--skip_demultiplexing",
61
+ action="store_true",
62
+ help="Skip the demultiplexing step, default is false")
63
+ optional_args_group.add_argument("--skip_variantcalling",
64
+ action="store_true",
65
+ help="Skip the variant calling step, default is false")
66
+ optional_args_group.add_argument("--show_msa",
67
+ default=False,
68
+ help="Skip showing msa")
69
+ return parser
70
+
71
+
72
+ # Execute LevSeq
73
+ def execute_LevSeq():
74
+ # Build parser
75
+ parser = build_cli_parser()
76
+ # Parse the arguments
77
+ CL_ARGS = vars(parser.parse_args())
78
+ # Set up progres bar
79
+ tqdm_fn = tqdm.tqdm
80
+ # Run LevSeq
81
+ try:
82
+ run_LevSeq(CL_ARGS, tqdm_fn)
83
+ except Exception as e:
84
+ print(e)
85
+ print("Run Complete, add log info")