levseq 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- levseq/IO_processor.py +565 -0
- levseq/__init__.py +34 -0
- levseq/barcoding/__init__.py +1 -0
- levseq/barcoding/demultiplex +0 -0
- levseq/barcoding/demultiplex-arm64 +0 -0
- levseq/barcoding/demultiplex-x86 +0 -0
- levseq/barcoding/minion_barcodes.fasta +386 -0
- levseq/basecaller.py +80 -0
- levseq/cmd.py +23 -0
- levseq/globals.py +66 -0
- levseq/interface.py +85 -0
- levseq/parser.py +82 -0
- levseq/run_levseq.py +558 -0
- levseq/screen.py +38 -0
- levseq/simulation.py +311 -0
- levseq/user.py +157 -0
- levseq/utils.py +474 -0
- levseq/variantcaller.py +252 -0
- levseq/visualization.py +1130 -0
- levseq-1.0.0.data/data/LICENSE +674 -0
- levseq-1.0.0.dist-info/LICENSE +674 -0
- levseq-1.0.0.dist-info/METADATA +180 -0
- levseq-1.0.0.dist-info/RECORD +26 -0
- levseq-1.0.0.dist-info/WHEEL +5 -0
- levseq-1.0.0.dist-info/entry_points.txt +2 -0
- levseq-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
>NB01
|
|
2
|
+
CACAAAGACACCGACAACTTTCTT
|
|
3
|
+
>NB02
|
|
4
|
+
ACAGACGACTACAAACGGAATCGA
|
|
5
|
+
>NB03
|
|
6
|
+
CCTGGTAACTGGGACACAAGACTC
|
|
7
|
+
>NB04
|
|
8
|
+
TAGGGAAACACGATAGAATCCGAA
|
|
9
|
+
>NB05
|
|
10
|
+
AAGGTTACACAAACCCTGGACAAG
|
|
11
|
+
>NB06
|
|
12
|
+
GACTACTTTCTGCCTTTGCGAGAA
|
|
13
|
+
>NB07
|
|
14
|
+
AAGGATTCATTCCCACGGTAACAC
|
|
15
|
+
>NB08
|
|
16
|
+
ACGTAACTTGGTTTGTTCCCTGAA
|
|
17
|
+
>NB09
|
|
18
|
+
AACCAAGACTCGCTGTGCCTAGTT
|
|
19
|
+
>NB10
|
|
20
|
+
GAGAGGACAAAGGTTTCAACGCTT
|
|
21
|
+
>NB11
|
|
22
|
+
TCCATTCCCTCCGATAGATGAAAC
|
|
23
|
+
>NB12
|
|
24
|
+
TCCGATTCTGCTTCTTTCTACCTG
|
|
25
|
+
>NB13
|
|
26
|
+
AGAACGACTTCCATACTCGTGTGA
|
|
27
|
+
>NB14
|
|
28
|
+
AACGAGTCTCTTGGGACCCATAGA
|
|
29
|
+
>NB15
|
|
30
|
+
AGGTCTACCTCGCTAACACCACTG
|
|
31
|
+
>NB16
|
|
32
|
+
CGTCAACTGACAGTGGTTCGTACT
|
|
33
|
+
>NB17
|
|
34
|
+
ACCCTCCAGGAAAGTACCTCTGAT
|
|
35
|
+
>NB18
|
|
36
|
+
CCAAACCCAACAACCTAGATAGGC
|
|
37
|
+
>NB19
|
|
38
|
+
GTTCCTCGTGCAGTGTCAAGAGAT
|
|
39
|
+
>NB20
|
|
40
|
+
TTGCGTCCTGTTACGAGAACTCAT
|
|
41
|
+
>NB21
|
|
42
|
+
GAGCCTCTCATTGTCCGTTCTCTA
|
|
43
|
+
>NB22
|
|
44
|
+
ACCACTGCCATGTATCAAAGTACG
|
|
45
|
+
>NB23
|
|
46
|
+
CTTACTACCCAGTGAACCTCCTCG
|
|
47
|
+
>NB24
|
|
48
|
+
GCATAGTTCTGCATGATGGGTTAG
|
|
49
|
+
>NB25
|
|
50
|
+
GTAAGTTGGGTATGCAACGCAATG
|
|
51
|
+
>NB26
|
|
52
|
+
CATACAGCGACTACGCATTCTCAT
|
|
53
|
+
>NB27
|
|
54
|
+
CGACGGTTAGATTCACCTCTTACA
|
|
55
|
+
>NB28
|
|
56
|
+
TGAAACCTAAGAAGGCACCGTATC
|
|
57
|
+
>NB29
|
|
58
|
+
CTAGACACCTTGGGTTGACAGACC
|
|
59
|
+
>NB30
|
|
60
|
+
TCAGTGAGGATCTACTTCGACCCA
|
|
61
|
+
>NB31
|
|
62
|
+
TGCGTACAGCAATCAGTTACATTG
|
|
63
|
+
>NB32
|
|
64
|
+
CCAGTAGAAGTCCGACAACGTCAT
|
|
65
|
+
>NB33
|
|
66
|
+
CAGACTTGGTACGGTTGGGTAACT
|
|
67
|
+
>NB34
|
|
68
|
+
GGACGAAGAACTCAAGTCAAAGGC
|
|
69
|
+
>NB35
|
|
70
|
+
CTACTTACGAAGCTGAGGGACTGC
|
|
71
|
+
>NB36
|
|
72
|
+
ATGTCCCAGTTAGAGGAGGAAACA
|
|
73
|
+
>NB37
|
|
74
|
+
GCTTGCGATTGATGCTTAGTATCA
|
|
75
|
+
>NB38
|
|
76
|
+
ACCACAGGAGGACGATACAGAGAA
|
|
77
|
+
>NB39
|
|
78
|
+
CCACAGTGTCAACTAGAGCCTCTC
|
|
79
|
+
>NB40
|
|
80
|
+
TAGTTTGGATGACCAAGGATAGCC
|
|
81
|
+
>NB41
|
|
82
|
+
GGAGTTCGTCCAGAGAAGTACACG
|
|
83
|
+
>NB42
|
|
84
|
+
CTACGTGTAAGGCATACCTGCCAG
|
|
85
|
+
>NB43
|
|
86
|
+
CTTTCGTTGTTGACTCGACGGTAG
|
|
87
|
+
>NB44
|
|
88
|
+
AGTAGAAAGGGTTCCTTCCCACTC
|
|
89
|
+
>NB45
|
|
90
|
+
GATCCAACAGAGATGCCTTCAGTG
|
|
91
|
+
>NB46
|
|
92
|
+
GCTGTGTTCCACTTCATTCTCCTG
|
|
93
|
+
>NB47
|
|
94
|
+
GTGCAACTTTCCCACAGGTAGTTC
|
|
95
|
+
>NB48
|
|
96
|
+
CATCTGGAACGTGGTACACCTGTA
|
|
97
|
+
>NB49
|
|
98
|
+
ACTGGTGCAGCTTTGAACATCTAG
|
|
99
|
+
>NB50
|
|
100
|
+
ATGGACTTTGGTAACTTCCTGCGT
|
|
101
|
+
>NB51
|
|
102
|
+
GTTGAATGAGCCTACTGGGTCCTC
|
|
103
|
+
>NB52
|
|
104
|
+
TGAGAGACAAGATTGTTCGTGGAC
|
|
105
|
+
>NB53
|
|
106
|
+
AGATTCAGACCGTCTCATGCAAAG
|
|
107
|
+
>NB54
|
|
108
|
+
CAAGAGCTTTGACTAAGGAGCATG
|
|
109
|
+
>NB55
|
|
110
|
+
TGGAAGATGAGACCCTGATCTACG
|
|
111
|
+
>NB56
|
|
112
|
+
TCACTACTCAACAGGTGGCATGAA
|
|
113
|
+
>NB57
|
|
114
|
+
GCTAGGTCAATCTCCTTCGGAAGT
|
|
115
|
+
>NB58
|
|
116
|
+
CAGGTTACTCCTCCGTGAGTCTGA
|
|
117
|
+
>NB59
|
|
118
|
+
TCAATCAAGAAGGGAAAGCAAGGT
|
|
119
|
+
>NB60
|
|
120
|
+
CATGTTCAACCAAGGCTTCTATGG
|
|
121
|
+
>NB61
|
|
122
|
+
AGAGGGTACTATGTGCCTCAGCAC
|
|
123
|
+
>NB62
|
|
124
|
+
CACCCACACTTACTTCAGGACGTA
|
|
125
|
+
>NB63
|
|
126
|
+
TTCTGAAGTTCCTGGGTCTTGAAC
|
|
127
|
+
>NB64
|
|
128
|
+
GACAGACACCGTTCATCGACTTTC
|
|
129
|
+
>NB65
|
|
130
|
+
TTCTCAGTCTTCCTCCAGACAAGG
|
|
131
|
+
>NB66
|
|
132
|
+
CCGATCCTTGTGGCTTCTAACTTC
|
|
133
|
+
>NB67
|
|
134
|
+
GTTTGTCATACTCGTGTGCTCACC
|
|
135
|
+
>NB68
|
|
136
|
+
GAATCTAAGCAAACACGAAGGTGG
|
|
137
|
+
>NB69
|
|
138
|
+
TACAGTCCGAGCCTCATGTGATCT
|
|
139
|
+
>NB70
|
|
140
|
+
ACCGAGATCCTACGAATGGAGTGT
|
|
141
|
+
>NB71
|
|
142
|
+
CCTGGGAGCATCAGGTAGTAACAG
|
|
143
|
+
>NB72
|
|
144
|
+
TAGCTGACTGTCTTCCATACCGAC
|
|
145
|
+
>NB73
|
|
146
|
+
AAGAAACAGGATGACAGAACCCTC
|
|
147
|
+
>NB74
|
|
148
|
+
TACAAGCATCCCAACACTTCCACT
|
|
149
|
+
>NB75
|
|
150
|
+
GACCATTGTGATGAACCCTGTTGT
|
|
151
|
+
>NB76
|
|
152
|
+
ATGCTTGTTACATCAACCCTGGAC
|
|
153
|
+
>NB77
|
|
154
|
+
CGACCTGTTTCTCAGGGATACAAC
|
|
155
|
+
>NB78
|
|
156
|
+
AACAACCGAACCTTTGAATCAGAA
|
|
157
|
+
>NB79
|
|
158
|
+
TCTCGGAGATAGTTCTCACTGCTG
|
|
159
|
+
>NB80
|
|
160
|
+
CGGATGAACATAGGATAGCGATTC
|
|
161
|
+
>NB81
|
|
162
|
+
CCTCATCTTGTGAAGTTGTTTCGG
|
|
163
|
+
>NB82
|
|
164
|
+
ACGGTATGTCGAGTTCCAGGACTA
|
|
165
|
+
>NB83
|
|
166
|
+
TGGCTTGATCTAGGTAAGGTCGAA
|
|
167
|
+
>NB84
|
|
168
|
+
GTAGTGGACCTAGAACCTGTGCCA
|
|
169
|
+
>NB85
|
|
170
|
+
AACGGAGGAGTTAGTTGGATGATC
|
|
171
|
+
>NB86
|
|
172
|
+
AGGTGATCCCAACAAGCGTAAGTA
|
|
173
|
+
>NB87
|
|
174
|
+
TACATGCTCCTGTTGTTAGGGAGG
|
|
175
|
+
>NB88
|
|
176
|
+
TCTTCTACTACCGATCCGAAGCAG
|
|
177
|
+
>NB89
|
|
178
|
+
ACAGCATCAATGTTTGGCTAGTTG
|
|
179
|
+
>NB90
|
|
180
|
+
GATGTAGAGGGTACGGTTTGAGGC
|
|
181
|
+
>NB91
|
|
182
|
+
GGCTCCATAGGAACTCACGCTACT
|
|
183
|
+
>NB92
|
|
184
|
+
TTGTGAGTGGAAAGATACAGGACC
|
|
185
|
+
>NB93
|
|
186
|
+
AGTTTCCATCACTTCAGACTTGGG
|
|
187
|
+
>NB94
|
|
188
|
+
GATTGTCCTCAAACTGCCACCTAC
|
|
189
|
+
>NB95
|
|
190
|
+
CCTGTCTGGAAGAAGAATGGACTT
|
|
191
|
+
>NB96
|
|
192
|
+
CTGAACGGTCATAGAGTCCACCAT
|
|
193
|
+
|
|
194
|
+
>RB01
|
|
195
|
+
AAGAAAGTTGTCGGTGTCTTTGTG
|
|
196
|
+
>RB02
|
|
197
|
+
TCGATTCCGTTTGTAGTCGTCTGT
|
|
198
|
+
>RB03
|
|
199
|
+
GAGTCTTGTGTCCCAGTTACCAGG
|
|
200
|
+
>RB04
|
|
201
|
+
TTCGGATTCTATCGTGTTTCCCTA
|
|
202
|
+
>RB05
|
|
203
|
+
CTTGTCCAGGGTTTGTGTAACCTT
|
|
204
|
+
>RB06
|
|
205
|
+
TTCTCGCAAAGGCAGAAAGTAGTC
|
|
206
|
+
>RB07
|
|
207
|
+
GTGTTACCGTGGGAATGAATCCTT
|
|
208
|
+
>RB08
|
|
209
|
+
TTCAGGGAACAAACCAAGTTACGT
|
|
210
|
+
>RB09
|
|
211
|
+
AACTAGGCACAGCGAGTCTTGGTT
|
|
212
|
+
>RB10
|
|
213
|
+
AAGCGTTGAAACCTTTGTCCTCTC
|
|
214
|
+
>RB11
|
|
215
|
+
GTTTCATCTATCGGAGGGAATGGA
|
|
216
|
+
>RB12
|
|
217
|
+
CAGGTAGAAAGAAGCAGAATCGGA
|
|
218
|
+
>RB13
|
|
219
|
+
AGAACGACTTCCATACTCGTGTGA
|
|
220
|
+
>RB14
|
|
221
|
+
AACGAGTCTCTTGGGACCCATAGA
|
|
222
|
+
>RB15
|
|
223
|
+
AGGTCTACCTCGCTAACACCACTG
|
|
224
|
+
>RB16
|
|
225
|
+
CGTCAACTGACAGTGGTTCGTACT
|
|
226
|
+
>RB17
|
|
227
|
+
ACCCTCCAGGAAAGTACCTCTGAT
|
|
228
|
+
>RB18
|
|
229
|
+
CCAAACCCAACAACCTAGATAGGC
|
|
230
|
+
>RB19
|
|
231
|
+
GTTCCTCGTGCAGTGTCAAGAGAT
|
|
232
|
+
>RB20
|
|
233
|
+
TTGCGTCCTGTTACGAGAACTCAT
|
|
234
|
+
>RB21
|
|
235
|
+
GAGCCTCTCATTGTCCGTTCTCTA
|
|
236
|
+
>RB22
|
|
237
|
+
ACCACTGCCATGTATCAAAGTACG
|
|
238
|
+
>RB23
|
|
239
|
+
CTTACTACCCAGTGAACCTCCTCG
|
|
240
|
+
>RB24
|
|
241
|
+
GCATAGTTCTGCATGATGGGTTAG
|
|
242
|
+
>RB25
|
|
243
|
+
GTAAGTTGGGTATGCAACGCAATG
|
|
244
|
+
>RB26
|
|
245
|
+
ACTATGCCTTTCCGTGAAACAGTT
|
|
246
|
+
>RB27
|
|
247
|
+
CGACGGTTAGATTCACCTCTTACA
|
|
248
|
+
>RB28
|
|
249
|
+
TGAAACCTAAGAAGGCACCGTATC
|
|
250
|
+
>RB29
|
|
251
|
+
CTAGACACCTTGGGTTGACAGACC
|
|
252
|
+
>RB30
|
|
253
|
+
TCAGTGAGGATCTACTTCGACCCA
|
|
254
|
+
>RB31
|
|
255
|
+
TGCGTACAGCAATCAGTTACATTG
|
|
256
|
+
>RB32
|
|
257
|
+
CCAGTAGAAGTCCGACAACGTCAT
|
|
258
|
+
>RB33
|
|
259
|
+
CAGACTTGGTACGGTTGGGTAACT
|
|
260
|
+
>RB34
|
|
261
|
+
GGACGAAGAACTCAAGTCAAAGGC
|
|
262
|
+
>RB35
|
|
263
|
+
CTACTTACGAAGCTGAGGGACTGC
|
|
264
|
+
>RB36
|
|
265
|
+
ATGTCCCAGTTAGAGGAGGAAACA
|
|
266
|
+
>RB37
|
|
267
|
+
GCTTGCGATTGATGCTTAGTATCA
|
|
268
|
+
>RB38
|
|
269
|
+
ACCACAGGAGGACGATACAGAGAA
|
|
270
|
+
>RB39
|
|
271
|
+
TCTGCCACACACTCGTAAGTCCTT
|
|
272
|
+
>RB40
|
|
273
|
+
GTCGATACTGGACCTATCCCTTGG
|
|
274
|
+
>RB41
|
|
275
|
+
GGAGTTCGTCCAGAGAAGTACACG
|
|
276
|
+
>RB42
|
|
277
|
+
CTACGTGTAAGGCATACCTGCCAG
|
|
278
|
+
>RB43
|
|
279
|
+
CTTTCGTTGTTGACTCGACGGTAG
|
|
280
|
+
>RB44
|
|
281
|
+
AGTAGAAAGGGTTCCTTCCCACTC
|
|
282
|
+
>RB45
|
|
283
|
+
GATCCAACAGAGATGCCTTCAGTG
|
|
284
|
+
>RB46
|
|
285
|
+
GCTGTGTTCCACTTCATTCTCCTG
|
|
286
|
+
>RB47
|
|
287
|
+
GTGCAACTTTCCCACAGGTAGTTC
|
|
288
|
+
>RB48
|
|
289
|
+
GAGTCCGTGACAACTTCTGAAAGC
|
|
290
|
+
>RB49
|
|
291
|
+
ACTGGTGCAGCTTTGAACATCTAG
|
|
292
|
+
>RB50
|
|
293
|
+
ATGGACTTTGGTAACTTCCTGCGT
|
|
294
|
+
>RB51
|
|
295
|
+
GTTGAATGAGCCTACTGGGTCCTC
|
|
296
|
+
>RB52
|
|
297
|
+
TGAGAGACAAGATTGTTCGTGGAC
|
|
298
|
+
>RB53
|
|
299
|
+
AGATTCAGACCGTCTCATGCAAAG
|
|
300
|
+
>RB54
|
|
301
|
+
GGGTGCCAACTACATACCAAACCT
|
|
302
|
+
>RB55
|
|
303
|
+
TGGAAGATGAGACCCTGATCTACG
|
|
304
|
+
>RB56
|
|
305
|
+
TCACTACTCAACAGGTGGCATGAA
|
|
306
|
+
>RB57
|
|
307
|
+
GCTAGGTCAATCTCCTTCGGAAGT
|
|
308
|
+
>RB58
|
|
309
|
+
CAGGTTACTCCTCCGTGAGTCTGA
|
|
310
|
+
>RB59
|
|
311
|
+
TCAATCAAGAAGGGAAAGCAAGGT
|
|
312
|
+
>RB60
|
|
313
|
+
GAACCCTACTTTGGACAGACACCT
|
|
314
|
+
>RB61
|
|
315
|
+
AGAGGGTACTATGTGCCTCAGCAC
|
|
316
|
+
>RB62
|
|
317
|
+
CACCCACACTTACTTCAGGACGTA
|
|
318
|
+
>RB63
|
|
319
|
+
TTCTGAAGTTCCTGGGTCTTGAAC
|
|
320
|
+
>RB64
|
|
321
|
+
GACAGACACCGTTCATCGACTTTC
|
|
322
|
+
>RB65
|
|
323
|
+
TTCTCAGTCTTCCTCCAGACAAGG
|
|
324
|
+
>RB66
|
|
325
|
+
CCGATCCTTGTGGCTTCTAACTTC
|
|
326
|
+
>RB67
|
|
327
|
+
GTTTGTCATACTCGTGTGCTCACC
|
|
328
|
+
>RB68
|
|
329
|
+
GAATCTAAGCAAACACGAAGGTGG
|
|
330
|
+
>RB69
|
|
331
|
+
TACAGTCCGAGCCTCATGTGATCT
|
|
332
|
+
>RB70
|
|
333
|
+
ACCGAGATCCTACGAATGGAGTGT
|
|
334
|
+
>RB71
|
|
335
|
+
CCTGGGAGCATCAGGTAGTAACAG
|
|
336
|
+
>RB72
|
|
337
|
+
TAGCTGACTGTCTTCCATACCGAC
|
|
338
|
+
>RB73
|
|
339
|
+
AAGAAACAGGATGACAGAACCCTC
|
|
340
|
+
>RB74
|
|
341
|
+
TACAAGCATCCCAACACTTCCACT
|
|
342
|
+
>RB75
|
|
343
|
+
GACCATTGTGATGAACCCTGTTGT
|
|
344
|
+
>RB76
|
|
345
|
+
ATGCTTGTTACATCAACCCTGGAC
|
|
346
|
+
>RB77
|
|
347
|
+
CGACCTGTTTCTCAGGGATACAAC
|
|
348
|
+
>RB78
|
|
349
|
+
AACAACCGAACCTTTGAATCAGAA
|
|
350
|
+
>RB79
|
|
351
|
+
TCTCGGAGATAGTTCTCACTGCTG
|
|
352
|
+
>RB80
|
|
353
|
+
CGGATGAACATAGGATAGCGATTC
|
|
354
|
+
>RB81
|
|
355
|
+
CCTCATCTTGTGAAGTTGTTTCGG
|
|
356
|
+
>RB82
|
|
357
|
+
ACGGTATGTCGAGTTCCAGGACTA
|
|
358
|
+
>RB83
|
|
359
|
+
TGGCTTGATCTAGGTAAGGTCGAA
|
|
360
|
+
>RB84
|
|
361
|
+
GTAGTGGACCTAGAACCTGTGCCA
|
|
362
|
+
>RB85
|
|
363
|
+
AACGGAGGAGTTAGTTGGATGATC
|
|
364
|
+
>RB86
|
|
365
|
+
AGGTGATCCCAACAAGCGTAAGTA
|
|
366
|
+
>RB87
|
|
367
|
+
TACATGCTCCTGTTGTTAGGGAGG
|
|
368
|
+
>RB88
|
|
369
|
+
TCTTCTACTACCGATCCGAAGCAG
|
|
370
|
+
>RB89
|
|
371
|
+
ACAGCATCAATGTTTGGCTAGTTG
|
|
372
|
+
>RB90
|
|
373
|
+
GATGTAGAGGGTACGGTTTGAGGC
|
|
374
|
+
>RB91
|
|
375
|
+
GGCTCCATAGGAACTCACGCTACT
|
|
376
|
+
>RB92
|
|
377
|
+
TTGTGAGTGGAAAGATACAGGACC
|
|
378
|
+
>RB93
|
|
379
|
+
AGTTTCCATCACTTCAGACTTGGG
|
|
380
|
+
>RB94
|
|
381
|
+
GATTGTCCTCAAACTGCCACCTAC
|
|
382
|
+
>RB95
|
|
383
|
+
CCTGTCTGGAAGAAGAATGGACTT
|
|
384
|
+
>RB96
|
|
385
|
+
CTGAACGGTCATAGAGTCCACCAT
|
|
386
|
+
|
levseq/basecaller.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# #
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify #
|
|
4
|
+
# it under the terms of the GNU General Public License as published by #
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
6
|
+
# (at your option) any later version. #
|
|
7
|
+
# #
|
|
8
|
+
# This program is distributed in the hope that it will be useful, #
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
11
|
+
# GNU General Public License for more details. #
|
|
12
|
+
# #
|
|
13
|
+
# You should have received a copy of the GNU General Public License #
|
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
|
|
15
|
+
# #
|
|
16
|
+
###############################################################################
|
|
17
|
+
|
|
18
|
+
from levseq.globals import DORADO_MODELS
|
|
19
|
+
import os
|
|
20
|
+
import glob
|
|
21
|
+
import subprocess
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Basecaller:
|
|
25
|
+
|
|
26
|
+
def __init__(self, model, file_folder, save_folder, fastq = True):
|
|
27
|
+
self.model = model
|
|
28
|
+
self.file_folder = file_folder
|
|
29
|
+
self.save_folder = save_folder
|
|
30
|
+
self.fastq = fastq
|
|
31
|
+
self.model_path = self.check_model()
|
|
32
|
+
|
|
33
|
+
def check_model(self):
|
|
34
|
+
"""Checks if the required dorado model is downloaded."""
|
|
35
|
+
|
|
36
|
+
model_path = os.path.join(os.path.dirname(__file__), "dorado_models")
|
|
37
|
+
|
|
38
|
+
# Check if model already exists
|
|
39
|
+
models = glob.glob(model_path + "/*")
|
|
40
|
+
if DORADO_MODELS[self.model] in models:
|
|
41
|
+
return os.path.join(model_path, DORADO_MODELS[self.model])
|
|
42
|
+
|
|
43
|
+
else:
|
|
44
|
+
self.download_model()
|
|
45
|
+
model_path = os.path.join(model_path, DORADO_MODELS[self.model])
|
|
46
|
+
|
|
47
|
+
return model_path
|
|
48
|
+
|
|
49
|
+
def download_model(self):
|
|
50
|
+
"""Downloads the required dorado model."""
|
|
51
|
+
model = DORADO_MODELS[self.model]
|
|
52
|
+
|
|
53
|
+
input = f"dorado download --model {model} --directory {self.model_path}"
|
|
54
|
+
subprocess.run(input, shell=True)
|
|
55
|
+
return "Model downloaded."
|
|
56
|
+
|
|
57
|
+
def run_dorado(self):
|
|
58
|
+
"""
|
|
59
|
+
Runs dorado basecaller.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
- .pod5 files
|
|
63
|
+
Returns:
|
|
64
|
+
- .bam files
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
model_path = self.check_model()
|
|
68
|
+
|
|
69
|
+
if self.fastq:
|
|
70
|
+
input = f"dorado basecaller {model_path} {self.file_folder} --emit-fastq > {self.save_folder}/basecalled.fastq"
|
|
71
|
+
|
|
72
|
+
else:
|
|
73
|
+
input = f"dorado basecaller {model_path} {self.file_folder} > {self.save_folder}/basecalled.fastq"
|
|
74
|
+
|
|
75
|
+
subprocess.run(input, shell=True)
|
|
76
|
+
|
|
77
|
+
return "Basecalling submitted"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
levseq/cmd.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# #
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify #
|
|
4
|
+
# it under the terms of the GNU General Public License as published by #
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
6
|
+
# (at your option) any later version. #
|
|
7
|
+
# #
|
|
8
|
+
# This program is distributed in the hope that it will be useful, #
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
11
|
+
# GNU General Public License for more details. #
|
|
12
|
+
# #
|
|
13
|
+
# You should have received a copy of the GNU General Public License #
|
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
|
|
15
|
+
# #
|
|
16
|
+
###############################################################################
|
|
17
|
+
|
|
18
|
+
from levseq.interface import execute_LevSeq
|
|
19
|
+
def main():
|
|
20
|
+
execute_LevSeq()
|
|
21
|
+
|
|
22
|
+
if __name__ == "__main__":
|
|
23
|
+
main()
|
levseq/globals.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# #
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify #
|
|
4
|
+
# it under the terms of the GNU General Public License as published by #
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
6
|
+
# (at your option) any later version. #
|
|
7
|
+
# #
|
|
8
|
+
# This program is distributed in the hope that it will be useful, #
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
11
|
+
# GNU General Public License for more details. #
|
|
12
|
+
# #
|
|
13
|
+
# You should have received a copy of the GNU General Public License #
|
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
|
|
15
|
+
# #
|
|
16
|
+
###############################################################################
|
|
17
|
+
|
|
18
|
+
# Global parameters for minIon
|
|
19
|
+
|
|
20
|
+
# Parameter for Demultiplexing
|
|
21
|
+
SCORE_MATRIX = {
|
|
22
|
+
('A', 'A'): 96, ('A', 'C'): -316, ('A', 'G'): -192, ('A', 'T'): -369, ('A', 'N'): 0,
|
|
23
|
+
('C', 'A'): -316, ('C', 'C'): 100, ('C', 'G'): -352, ('C', 'T'): -295, ('C', 'N'): 0,
|
|
24
|
+
('G', 'A'): -192, ('G', 'C'): -352, ('G', 'G'): 98, ('G', 'T'): -329, ('G', 'N'): 0,
|
|
25
|
+
('T', 'A'): -369, ('T', 'C'): -295, ('T', 'G'): -329, ('T', 'T'): 100, ('T', 'N'): 0,
|
|
26
|
+
('N', 'A'): 0, ('N', 'C'): 0, ('N', 'G'): 0, ('N', 'T'): 0, ('N', 'N'): 0,
|
|
27
|
+
} # Adapted from Guppy Barcoder
|
|
28
|
+
|
|
29
|
+
SW_ALIGN_PARAMS = {
|
|
30
|
+
"start_gap1": 40,
|
|
31
|
+
"end_gap1": 40,
|
|
32
|
+
"open_gap1": 0,
|
|
33
|
+
"extend_gap1": -40,
|
|
34
|
+
"start_gap2": 40,
|
|
35
|
+
"end_gap2": 40,
|
|
36
|
+
"open_gap2": -160,
|
|
37
|
+
"extend_gap2": -160,
|
|
38
|
+
"min_score_barcode_front": 60.0,
|
|
39
|
+
"front_window_size": 150,
|
|
40
|
+
"rear_window_size": 150,
|
|
41
|
+
} # Adapted from Guppy Barcoder
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Defaul target folder names from ONT
|
|
45
|
+
DEFAULT_TARGETS = {"Not_basecalled": ["pod5"], "Basecalled": ["fastq_pass"]}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Codons
|
|
49
|
+
CODONS = {
|
|
50
|
+
'ATA': 'I', 'ATC': 'I', 'ATT': 'I', 'ATG': 'M',
|
|
51
|
+
'ACA': 'T', 'ACC': 'T', 'ACG': 'T', 'ACT': 'T',
|
|
52
|
+
'AAC': 'N', 'AAT': 'N', 'AAA': 'K', 'AAG': 'K',
|
|
53
|
+
'AGC': 'S', 'AGT': 'S', 'AGA': 'R', 'AGG': 'R',
|
|
54
|
+
'CTA': 'L', 'CTC': 'L', 'CTG': 'L', 'CTT': 'L',
|
|
55
|
+
'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCT': 'P',
|
|
56
|
+
'CAC': 'H', 'CAT': 'H', 'CAA': 'Q', 'CAG': 'Q',
|
|
57
|
+
'CGA': 'R', 'CGC': 'R', 'CGG': 'R', 'CGT': 'R',
|
|
58
|
+
'GTA': 'V', 'GTC': 'V', 'GTG': 'V', 'GTT': 'V',
|
|
59
|
+
'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCT': 'A',
|
|
60
|
+
'GAC': 'D', 'GAT': 'D', 'GAA': 'E', 'GAG': 'E',
|
|
61
|
+
'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGT': 'G',
|
|
62
|
+
'TCA': 'S', 'TCC': 'S', 'TCG': 'S', 'TCT': 'S',
|
|
63
|
+
'TTC': 'F', 'TTT': 'F', 'TTA': 'L', 'TTG': 'L',
|
|
64
|
+
'TAC': 'Y', 'TAT': 'Y', 'TAA': '_', 'TAG': '_',
|
|
65
|
+
'TGC': 'C', 'TGT': 'C', 'TGA': '_', 'TGG': 'W',
|
|
66
|
+
}
|
levseq/interface.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# #
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify #
|
|
4
|
+
# it under the terms of the GNU General Public License as published by #
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
6
|
+
# (at your option) any later version. #
|
|
7
|
+
# #
|
|
8
|
+
# This program is distributed in the hope that it will be useful, #
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
11
|
+
# GNU General Public License for more details. #
|
|
12
|
+
# #
|
|
13
|
+
# You should have received a copy of the GNU General Public License #
|
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
|
|
15
|
+
# #
|
|
16
|
+
###############################################################################
|
|
17
|
+
"""
|
|
18
|
+
Contain argument parsers used for command line interface and web interface
|
|
19
|
+
"""
|
|
20
|
+
# Import packages
|
|
21
|
+
import os
|
|
22
|
+
import tqdm
|
|
23
|
+
import argparse
|
|
24
|
+
# Import local packages
|
|
25
|
+
from levseq.run_levseq import run_LevSeq
|
|
26
|
+
|
|
27
|
+
# Get the working directory
|
|
28
|
+
CWD = os.getcwd()
|
|
29
|
+
|
|
30
|
+
# Set default arguments
|
|
31
|
+
padding_start = 0
|
|
32
|
+
padding_end = 0
|
|
33
|
+
min_depth = 5
|
|
34
|
+
threshold = 0.2
|
|
35
|
+
basecall_model = 'sup'
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Build the CLI argparser
|
|
39
|
+
def build_cli_parser():
|
|
40
|
+
# Initialize
|
|
41
|
+
parser = argparse.ArgumentParser()
|
|
42
|
+
|
|
43
|
+
# Add required arguments
|
|
44
|
+
required_args_group = parser.add_argument_group("Required Arguments", "Arguments required for each run")
|
|
45
|
+
required_args_group.add_argument('name',
|
|
46
|
+
help = 'User defined name for the output folder')
|
|
47
|
+
required_args_group.add_argument("path",
|
|
48
|
+
help="Path to folder containing fastq.pass or pod5_pass files.")
|
|
49
|
+
required_args_group.add_argument("summary",
|
|
50
|
+
help="CSV file containig barcodes used, name of each plate and reference sequence in string")
|
|
51
|
+
# Add optional arguments
|
|
52
|
+
optional_args_group = parser.add_argument_group("Optional Arguments", "Aditional arguments")
|
|
53
|
+
optional_args_group.add_argument("--output",
|
|
54
|
+
help="Save location for run. Defaults to current working directory.",
|
|
55
|
+
required=False,
|
|
56
|
+
default=CWD)
|
|
57
|
+
optional_args_group.add_argument("--perform_basecalling",
|
|
58
|
+
action="store_true",
|
|
59
|
+
help="Skip the basecalling step, default is false")
|
|
60
|
+
optional_args_group.add_argument("--skip_demultiplexing",
|
|
61
|
+
action="store_true",
|
|
62
|
+
help="Skip the demultiplexing step, default is false")
|
|
63
|
+
optional_args_group.add_argument("--skip_variantcalling",
|
|
64
|
+
action="store_true",
|
|
65
|
+
help="Skip the variant calling step, default is false")
|
|
66
|
+
optional_args_group.add_argument("--show_msa",
|
|
67
|
+
default=False,
|
|
68
|
+
help="Skip showing msa")
|
|
69
|
+
return parser
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# Execute LevSeq
|
|
73
|
+
def execute_LevSeq():
|
|
74
|
+
# Build parser
|
|
75
|
+
parser = build_cli_parser()
|
|
76
|
+
# Parse the arguments
|
|
77
|
+
CL_ARGS = vars(parser.parse_args())
|
|
78
|
+
# Set up progres bar
|
|
79
|
+
tqdm_fn = tqdm.tqdm
|
|
80
|
+
# Run LevSeq
|
|
81
|
+
try:
|
|
82
|
+
run_LevSeq(CL_ARGS, tqdm_fn)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
print(e)
|
|
85
|
+
print("Run Complete, add log info")
|