REDItools3 3.1a0__py3-none-any.whl → 3.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/METADATA +2 -2
- {REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/RECORD +11 -11
- {REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/WHEEL +1 -1
- reditools/analyze.py +42 -58
- reditools/file_utils.py +21 -42
- reditools/homopolymerics.py +6 -1
- reditools/index.py +7 -2
- reditools/reditools.py +14 -1
- reditools/rtchecks.py +23 -0
- {REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/LICENSE +0 -0
- {REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: REDItools3
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.2a0
|
|
4
4
|
Author: Ernesto Picardi
|
|
5
5
|
Author-email: Adam Handen <adam.handen@gmail.com>
|
|
6
6
|
Project-URL: homepage, https://github.com/BioinfoUNIBA/REDItools3
|
|
@@ -13,7 +13,7 @@ Classifier: Intended Audience :: Science/Research
|
|
|
13
13
|
Classifier: License :: OSI Approved :: GNU General Public License (GPL)
|
|
14
14
|
Classifier: Operating System :: MacOS :: MacOS X
|
|
15
15
|
Classifier: Operating System :: Unix
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
17
17
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
18
18
|
Requires-Python: >=3.7
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
@@ -2,20 +2,20 @@ reditools/__init__.py,sha256=7nSB0hrQznxrn6l95cv_pSonJTG6jZCQdbn7aT1TtvY,46
|
|
|
2
2
|
reditools/__main__.py,sha256=mWJ9O2LDiOpBWDBJJUN7OiM4SyltW-kVXXAGBe_JxgQ,842
|
|
3
3
|
reditools/alignment_file.py,sha256=YFyCEhMek2t93DpmpwEst5v3gDZkmRotbd6Fy_mP0aE,4258
|
|
4
4
|
reditools/alignment_manager.py,sha256=_FXwvqGWoXRdzVrwBxki2heaVZA2cQbGXqCopr-g1Hs,4138
|
|
5
|
-
reditools/analyze.py,sha256=
|
|
5
|
+
reditools/analyze.py,sha256=tW9Rz-R_8R-mJ2uQP5fpGFTH7TEv-2pOsigtbKqwYDY,14649
|
|
6
6
|
reditools/compiled_position.py,sha256=v540uUEie_HHUwsYQmBqeeOkUvtYlcnWj1v8gAhLUiE,3858
|
|
7
7
|
reditools/compiled_reads.py,sha256=7Hm5f7g1T8q1zDOOxZUD7aZax9b7SdQ0PlmT93hmcaE,4154
|
|
8
8
|
reditools/fasta_file.py,sha256=KBsJBs7OnBpew2PGWGp0mTxPLlpBmRrtXL4uvQw4t34,2212
|
|
9
|
-
reditools/file_utils.py,sha256=
|
|
10
|
-
reditools/homopolymerics.py,sha256=
|
|
11
|
-
reditools/index.py,sha256=
|
|
9
|
+
reditools/file_utils.py,sha256=MfQPzJ4ogbwNvIiEu1oooS64EJH1CFdRS8eoqT9Zo4w,2763
|
|
10
|
+
reditools/homopolymerics.py,sha256=UsHTr0e_OP_dkGq5te-oTSe5u6kzi5UJOF9t9QAunUk,2269
|
|
11
|
+
reditools/index.py,sha256=jLgWwKXIA_e-bqVu74SDZXmrdWch_syDSmMnFZPbqz4,7537
|
|
12
12
|
reditools/logger.py,sha256=u4L2SYxy4vJ4KDHEymd0b1sCa8BXXHchx8LR_wcFq1A,1210
|
|
13
|
-
reditools/reditools.py,sha256=
|
|
13
|
+
reditools/reditools.py,sha256=RNH7aKC2QnbafA7T9E6UpV5Llv3FjfDIabjPCuwDgW0,13111
|
|
14
14
|
reditools/region.py,sha256=_BiKDc5lCl1snjkokRiUWOgzA57ME3yLydEIwK9ku7U,3780
|
|
15
|
-
reditools/rtchecks.py,sha256=
|
|
15
|
+
reditools/rtchecks.py,sha256=TmCow38fCRwSICvx3nlOxy6Q216BcDXESGhM7bB_ixo,8878
|
|
16
16
|
reditools/utils.py,sha256=a2qfhMcrH2QlK-JoR-HHF6_bnlo5v3jihAqqknvVIjc,2733
|
|
17
|
-
REDItools3-3.
|
|
18
|
-
REDItools3-3.
|
|
19
|
-
REDItools3-3.
|
|
20
|
-
REDItools3-3.
|
|
21
|
-
REDItools3-3.
|
|
17
|
+
REDItools3-3.2a0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
18
|
+
REDItools3-3.2a0.dist-info/METADATA,sha256=NRwZTGGmlHBkP6XiQ4Sdql-XXRR2Ii27beCDf_jCt90,1288
|
|
19
|
+
REDItools3-3.2a0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
|
20
|
+
REDItools3-3.2a0.dist-info/top_level.txt,sha256=wrvvbFXhmNg7s6LQqjlV_fVQYUZOOpF93IcMu_hBCx4,10
|
|
21
|
+
REDItools3-3.2a0.dist-info/RECORD,,
|
reditools/analyze.py
CHANGED
|
@@ -79,19 +79,11 @@ def setup_rtools(options): # noqa:WPS213,WPS231
|
|
|
79
79
|
rtools.log_level = Logger.info_level
|
|
80
80
|
|
|
81
81
|
if options.load_omopolymeric_file:
|
|
82
|
-
regions = file_utils.
|
|
83
|
-
options.load_omopolymeric_file,
|
|
84
|
-
)
|
|
82
|
+
regions = file_utils.read_bed_file(options.load_omopolymeric_file)
|
|
85
83
|
rtools.exclude(regions)
|
|
86
84
|
|
|
87
|
-
if options.create_omopolymeric_file:
|
|
88
|
-
rtools.create_omopolymeric_positions(
|
|
89
|
-
options.create_omopolymeric_file,
|
|
90
|
-
options.omopolymeric_span,
|
|
91
|
-
)
|
|
92
|
-
|
|
93
85
|
if options.splicing_file:
|
|
94
|
-
rtools.load_splicing_file(
|
|
86
|
+
rtools.splice_positions = file_utils.load_splicing_file(
|
|
95
87
|
options.splicing_file,
|
|
96
88
|
options.splicing_span,
|
|
97
89
|
)
|
|
@@ -109,10 +101,11 @@ def setup_rtools(options): # noqa:WPS213,WPS231
|
|
|
109
101
|
rtools.max_base_position = options.max_base_position
|
|
110
102
|
rtools.min_base_quality = options.min_base_quality
|
|
111
103
|
|
|
112
|
-
rtools.min_column_length = options.
|
|
104
|
+
rtools.min_column_length = options.min_read_depth
|
|
113
105
|
rtools.min_edits = options.min_edits
|
|
114
106
|
rtools.min_edits_per_nucleotide = options.min_edits_per_nucleotide
|
|
115
107
|
rtools.strand = options.strand
|
|
108
|
+
rtools.max_alts = options.max_editing_nucleotides
|
|
116
109
|
|
|
117
110
|
rtools.strand_confidence_threshold = options.strand_confidence_threshold
|
|
118
111
|
|
|
@@ -225,21 +218,26 @@ def parse_options(): # noqa:WPS213
|
|
|
225
218
|
Returns:
|
|
226
219
|
namespace: commandline args
|
|
227
220
|
"""
|
|
228
|
-
parser = argparse.ArgumentParser(
|
|
221
|
+
parser = argparse.ArgumentParser(
|
|
222
|
+
prog="reditools analyze",
|
|
223
|
+
description='REDItools3',
|
|
224
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
225
|
+
)
|
|
229
226
|
parser.add_argument(
|
|
230
227
|
'file',
|
|
231
228
|
nargs='+',
|
|
232
|
-
help='The bam file to be analyzed',
|
|
229
|
+
help='The bam file(s) to be analyzed.',
|
|
233
230
|
)
|
|
234
231
|
parser.add_argument(
|
|
235
232
|
'-r',
|
|
236
233
|
'--reference',
|
|
237
|
-
help='
|
|
234
|
+
help='Reference FASTA file.',
|
|
238
235
|
)
|
|
239
236
|
parser.add_argument(
|
|
240
237
|
'-o',
|
|
241
238
|
'--output-file',
|
|
242
|
-
help='
|
|
239
|
+
help='Path to write output to.',
|
|
240
|
+
default='/dev/stdout',
|
|
243
241
|
)
|
|
244
242
|
parser.add_argument(
|
|
245
243
|
'-s',
|
|
@@ -248,96 +246,85 @@ def parse_options(): # noqa:WPS213
|
|
|
248
246
|
type=int,
|
|
249
247
|
default=0,
|
|
250
248
|
help='Strand: this can be 0 (unstranded),' +
|
|
251
|
-
'1 (
|
|
252
|
-
'2 (
|
|
249
|
+
'1 (second strand oriented) or ' +
|
|
250
|
+
'2 (first strand oriented).',
|
|
253
251
|
)
|
|
254
252
|
parser.add_argument(
|
|
255
253
|
'-a',
|
|
256
254
|
'--append-file',
|
|
257
255
|
action='store_true',
|
|
258
|
-
help='Appends results to file (and creates if not existing)',
|
|
256
|
+
help='Appends results to file (and creates if not existing).',
|
|
259
257
|
)
|
|
260
258
|
parser.add_argument(
|
|
261
259
|
'-g',
|
|
262
260
|
'--region',
|
|
263
|
-
help='
|
|
261
|
+
help='Only analyzes the specified region.',
|
|
264
262
|
)
|
|
265
263
|
parser.add_argument(
|
|
266
264
|
'-m',
|
|
267
265
|
'--load-omopolymeric-file',
|
|
268
|
-
help='
|
|
269
|
-
)
|
|
270
|
-
parser.add_argument(
|
|
271
|
-
'-c',
|
|
272
|
-
'--create-omopolymeric-file',
|
|
273
|
-
default=False,
|
|
274
|
-
help='Path to write omopolymeric positions to',
|
|
275
|
-
action='store_true',
|
|
266
|
+
help='BED file of omopolymeric positions.',
|
|
276
267
|
)
|
|
277
268
|
parser.add_argument(
|
|
278
269
|
'-os',
|
|
279
270
|
'--omopolymeric-span',
|
|
280
271
|
type=int,
|
|
281
272
|
default=5,
|
|
282
|
-
help='The omopolymeric span',
|
|
273
|
+
help='The omopolymeric span.',
|
|
283
274
|
)
|
|
284
275
|
parser.add_argument(
|
|
285
276
|
'-sf',
|
|
286
277
|
'--splicing-file',
|
|
287
|
-
help='The file containing
|
|
278
|
+
help='The file containing splicing site positions.',
|
|
288
279
|
)
|
|
289
280
|
parser.add_argument(
|
|
290
281
|
'-ss',
|
|
291
282
|
'--splicing-span',
|
|
292
283
|
type=int,
|
|
293
284
|
default=4,
|
|
294
|
-
help='The splicing span',
|
|
285
|
+
help='The splicing span.',
|
|
295
286
|
)
|
|
296
287
|
parser.add_argument(
|
|
297
288
|
'-mrl',
|
|
298
289
|
'--min-read-length',
|
|
299
290
|
type=int,
|
|
300
291
|
default=30, # noqa:WPS432
|
|
301
|
-
help='Reads
|
|
292
|
+
help='Reads with length below -mrl will be discarded.',
|
|
302
293
|
)
|
|
303
294
|
parser.add_argument(
|
|
304
295
|
'-q',
|
|
305
296
|
'--min-read-quality',
|
|
306
297
|
type=int,
|
|
307
298
|
default=20, # noqa:WPS432
|
|
308
|
-
help='Reads with mapping quality below
|
|
299
|
+
help='Reads with mapping quality below -q will be discarded.',
|
|
309
300
|
)
|
|
310
301
|
parser.add_argument(
|
|
311
302
|
'-bq',
|
|
312
303
|
'--min-base-quality',
|
|
313
304
|
type=int,
|
|
314
305
|
default=30, # noqa:WPS432
|
|
315
|
-
help='Base quality below
|
|
316
|
-
'the analysis.',
|
|
306
|
+
help='Base quality below -bq will bed discarded.',
|
|
317
307
|
)
|
|
318
308
|
parser.add_argument(
|
|
319
309
|
'-mbp',
|
|
320
310
|
'--min-base-position',
|
|
321
311
|
type=int,
|
|
322
312
|
default=0,
|
|
323
|
-
help='
|
|
324
|
-
'will not be included in the analysis.',
|
|
313
|
+
help='Ignores the first -mbp bases in each read.',
|
|
325
314
|
)
|
|
326
315
|
parser.add_argument(
|
|
327
316
|
'-Mbp',
|
|
328
317
|
'--max-base-position',
|
|
329
318
|
type=int,
|
|
330
319
|
default=0,
|
|
331
|
-
help='
|
|
332
|
-
'will not be included in the analysis.',
|
|
320
|
+
help='Ignores the last -Mpb bases in each read.',
|
|
333
321
|
)
|
|
334
322
|
parser.add_argument(
|
|
335
323
|
'-l',
|
|
336
|
-
'--min-
|
|
324
|
+
'--min-read-depth',
|
|
337
325
|
type=int,
|
|
338
326
|
default=1,
|
|
339
|
-
help='
|
|
340
|
-
'not be included in the analysis.',
|
|
327
|
+
help='Only report on positions with at least -l read depth',
|
|
341
328
|
)
|
|
342
329
|
parser.add_argument(
|
|
343
330
|
'-e',
|
|
@@ -351,8 +338,7 @@ def parse_options(): # noqa:WPS213
|
|
|
351
338
|
'--min-edits-per-nucleotide',
|
|
352
339
|
type=int,
|
|
353
340
|
default=0,
|
|
354
|
-
help='Positions
|
|
355
|
-
'min-edits-per-base edits will not be included in the analysis.',
|
|
341
|
+
help='Positions with fewer than -men edits will not be discarded.',
|
|
356
342
|
)
|
|
357
343
|
parser.add_argument(
|
|
358
344
|
'-me',
|
|
@@ -360,16 +346,14 @@ def parse_options(): # noqa:WPS213
|
|
|
360
346
|
type=int,
|
|
361
347
|
default=0, # noqa:WPS432
|
|
362
348
|
help='The minimum number of editing events (per position). ' +
|
|
363
|
-
'Positions
|
|
364
|
-
'"min-edits-per-base edits" will not be included in the ' +
|
|
365
|
-
'analysis.',
|
|
349
|
+
'Positions with fewer than -me edits will be discarded.',
|
|
366
350
|
)
|
|
367
351
|
parser.add_argument(
|
|
368
352
|
'-Men',
|
|
369
353
|
'--max-editing-nucleotides',
|
|
370
354
|
type=int,
|
|
371
|
-
default=
|
|
372
|
-
help='The maximum number of editing nucleotides, from 0 to
|
|
355
|
+
default=4, # noqa:WPS432
|
|
356
|
+
help='The maximum number of editing nucleotides, from 0 to 3 ' +
|
|
373
357
|
'(per position). Positions whose columns have more than ' +
|
|
374
358
|
'"max-editing-nucleotides" will not be included in the analysis.',
|
|
375
359
|
)
|
|
@@ -378,8 +362,8 @@ def parse_options(): # noqa:WPS213
|
|
|
378
362
|
'--strand-confidence-threshold',
|
|
379
363
|
type=float,
|
|
380
364
|
default=0.7, # noqa:WPS432
|
|
381
|
-
help='Only report the strandedness if at least
|
|
382
|
-
'reads are of a given strand',
|
|
365
|
+
help='Only report the strandedness if at least -T proportion of ' +
|
|
366
|
+
'reads are of a given strand.',
|
|
383
367
|
)
|
|
384
368
|
parser.add_argument(
|
|
385
369
|
'-C',
|
|
@@ -393,25 +377,25 @@ def parse_options(): # noqa:WPS213
|
|
|
393
377
|
'-V',
|
|
394
378
|
'--verbose',
|
|
395
379
|
default=False,
|
|
396
|
-
help='
|
|
380
|
+
help='Run in verbose mode.',
|
|
397
381
|
action='store_true',
|
|
398
382
|
)
|
|
399
383
|
parser.add_argument(
|
|
400
384
|
'-N',
|
|
401
385
|
'--dna',
|
|
402
386
|
default=False,
|
|
403
|
-
help='Run REDItools
|
|
387
|
+
help='Run REDItools on DNA-Seq data.',
|
|
404
388
|
action='store_true',
|
|
405
389
|
)
|
|
406
390
|
parser.add_argument(
|
|
407
391
|
'-B',
|
|
408
392
|
'--bed_file',
|
|
409
|
-
help='
|
|
393
|
+
help='Only analyze regions in the provided BED file.',
|
|
410
394
|
)
|
|
411
395
|
parser.add_argument(
|
|
412
396
|
'-t',
|
|
413
397
|
'--threads',
|
|
414
|
-
help='Number of threads
|
|
398
|
+
help='Number of threads for parallel processing.',
|
|
415
399
|
type=int,
|
|
416
400
|
default=1,
|
|
417
401
|
)
|
|
@@ -419,7 +403,7 @@ def parse_options(): # noqa:WPS213
|
|
|
419
403
|
'-w',
|
|
420
404
|
'--window',
|
|
421
405
|
help='How many bp should be processed by each thread at a time. ' +
|
|
422
|
-
'
|
|
406
|
+
'Zero uses the full contig.',
|
|
423
407
|
type=int,
|
|
424
408
|
default=0,
|
|
425
409
|
)
|
|
@@ -427,18 +411,18 @@ def parse_options(): # noqa:WPS213
|
|
|
427
411
|
'-k',
|
|
428
412
|
'--exclude_regions',
|
|
429
413
|
nargs='+',
|
|
430
|
-
help='
|
|
414
|
+
help='Skip regions in the provided BED file(s).',
|
|
431
415
|
)
|
|
432
416
|
parser.add_argument(
|
|
433
417
|
'-E',
|
|
434
418
|
'--exclude_reads',
|
|
435
|
-
help='
|
|
419
|
+
help='Text file listing read names to exclude from analysis.',
|
|
436
420
|
)
|
|
437
421
|
parser.add_argument(
|
|
438
422
|
'-d',
|
|
439
423
|
'--debug',
|
|
440
424
|
default=False,
|
|
441
|
-
help='
|
|
425
|
+
help='Run in debug mode.',
|
|
442
426
|
action='store_true',
|
|
443
427
|
)
|
|
444
428
|
|
reditools/file_utils.py
CHANGED
|
@@ -2,11 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
import csv
|
|
4
4
|
import os
|
|
5
|
-
from collections import defaultdict
|
|
6
5
|
from gzip import open as gzip_open
|
|
7
6
|
|
|
8
|
-
from sortedcontainers import SortedSet
|
|
9
|
-
|
|
10
7
|
from reditools.region import Region
|
|
11
8
|
|
|
12
9
|
|
|
@@ -68,54 +65,36 @@ def concat(output, *fnames, clean_up=True, encoding='utf-8'):
|
|
|
68
65
|
os.remove(fname)
|
|
69
66
|
|
|
70
67
|
|
|
71
|
-
def
|
|
72
|
-
"""
|
|
73
|
-
Read omopolymeric positions from a file.
|
|
74
|
-
|
|
75
|
-
Parameters:
|
|
76
|
-
fname (str): File path
|
|
77
|
-
|
|
78
|
-
Returns:
|
|
79
|
-
(dict): Contigs and regions
|
|
80
|
-
"""
|
|
81
|
-
poly_regions = defaultdict(set)
|
|
82
|
-
with read_bed_file(fname) as reader:
|
|
83
|
-
for row in reader:
|
|
84
|
-
poly_regions[row[0]] = Region(
|
|
85
|
-
contig=row[0],
|
|
86
|
-
start=row[1],
|
|
87
|
-
stop=row[2],
|
|
88
|
-
)
|
|
89
|
-
return poly_regions
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def load_splicing_file(splicing_file, span):
|
|
68
|
+
def load_splicing_file(splicing_file, splicing_span):
|
|
93
69
|
"""
|
|
94
70
|
Read splicing positions from a file.
|
|
95
71
|
|
|
96
72
|
Parameters:
|
|
97
73
|
splicing_file (str): File path
|
|
98
|
-
|
|
74
|
+
splicing_span(int): Width of splice sites
|
|
99
75
|
|
|
100
|
-
|
|
101
|
-
|
|
76
|
+
Yeilds:
|
|
77
|
+
Splicing file contents as Regions.
|
|
102
78
|
"""
|
|
103
|
-
splice_positions = defaultdict(SortedSet)
|
|
104
79
|
strand_map = {'-': 'D', '+': 'A'}
|
|
105
80
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
81
|
+
stream = open_stream(splicing_file)
|
|
82
|
+
reader = csv.reader(
|
|
83
|
+
filter(lambda row: row[0] != '#', stream),
|
|
84
|
+
delimiter=' ',
|
|
85
|
+
)
|
|
86
|
+
for row in reader:
|
|
87
|
+
contig = row[0]
|
|
88
|
+
span = int(row[1])
|
|
89
|
+
splice = row[3]
|
|
90
|
+
strand = row[4]
|
|
91
|
+
|
|
92
|
+
coe = -1 if strand_map.get(strand, None) == splice else 1
|
|
93
|
+
start = 1 + span
|
|
94
|
+
stop = start + splicing_span * coe
|
|
95
|
+
if start > stop:
|
|
96
|
+
start, stop = stop, start
|
|
97
|
+
yield Region(contig=contig, start=start, stop=stop)
|
|
119
98
|
|
|
120
99
|
|
|
121
100
|
def load_text_file(file_name):
|
reditools/homopolymerics.py
CHANGED
|
@@ -42,7 +42,11 @@ def parse_options():
|
|
|
42
42
|
Returns:
|
|
43
43
|
namespace
|
|
44
44
|
"""
|
|
45
|
-
parser = argparse.ArgumentParser(
|
|
45
|
+
parser = argparse.ArgumentParser(
|
|
46
|
+
prog="reditools find-repeats",
|
|
47
|
+
description='REDItools3',
|
|
48
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
49
|
+
)
|
|
46
50
|
parser.add_argument(
|
|
47
51
|
'file',
|
|
48
52
|
help='The fasta file to be analyzed',
|
|
@@ -57,6 +61,7 @@ def parse_options():
|
|
|
57
61
|
parser.add_argument(
|
|
58
62
|
'-o',
|
|
59
63
|
'--output',
|
|
64
|
+
default='/dev/stdout',
|
|
60
65
|
help='Destination to write results. Default is to use STDOUT. ' +
|
|
61
66
|
'If the filename ends in .gz, the contents will be gzipped.',
|
|
62
67
|
)
|
reditools/index.py
CHANGED
|
@@ -180,7 +180,11 @@ def parse_options(): # noqa:WPS213
|
|
|
180
180
|
Returns:
|
|
181
181
|
namespace: commandline args
|
|
182
182
|
"""
|
|
183
|
-
parser = argparse.ArgumentParser(
|
|
183
|
+
parser = argparse.ArgumentParser(
|
|
184
|
+
prog="reditools index",
|
|
185
|
+
description='REDItools3',
|
|
186
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
187
|
+
)
|
|
184
188
|
parser.add_argument(
|
|
185
189
|
'file',
|
|
186
190
|
nargs='+',
|
|
@@ -189,6 +193,7 @@ def parse_options(): # noqa:WPS213
|
|
|
189
193
|
parser.add_argument(
|
|
190
194
|
'-o',
|
|
191
195
|
'--output-file',
|
|
196
|
+
default='/dev/stdout',
|
|
192
197
|
help='The output statistics file',
|
|
193
198
|
)
|
|
194
199
|
parser.add_argument(
|
|
@@ -239,7 +244,7 @@ def main():
|
|
|
239
244
|
indexer.add_target_from_bed(trg_fname)
|
|
240
245
|
|
|
241
246
|
if options.output_file:
|
|
242
|
-
stream = open_stream(options.
|
|
247
|
+
stream = open_stream(options.output_file, 'w')
|
|
243
248
|
else:
|
|
244
249
|
stream = sys.stdout
|
|
245
250
|
|
reditools/reditools.py
CHANGED
|
@@ -127,7 +127,6 @@ class REDItools(object):
|
|
|
127
127
|
self._target_positions = False
|
|
128
128
|
self._exclude_positions = {}
|
|
129
129
|
self._splice_positions = []
|
|
130
|
-
|
|
131
130
|
self._specific_edits = None
|
|
132
131
|
|
|
133
132
|
self.reference = None
|
|
@@ -294,6 +293,20 @@ class REDItools(object):
|
|
|
294
293
|
"""
|
|
295
294
|
return self._exclude_positions
|
|
296
295
|
|
|
296
|
+
@property
|
|
297
|
+
def max_alts(self):
|
|
298
|
+
"""Maximum number of alternative bases for a position."""
|
|
299
|
+
return self._max_alts
|
|
300
|
+
|
|
301
|
+
@max_alts.setter
|
|
302
|
+
def max_alts(self, max_alts):
|
|
303
|
+
self._max_alts = max_alts
|
|
304
|
+
function = self._rtqc.check_max_alts
|
|
305
|
+
if max_alts < 3:
|
|
306
|
+
self._rtqc.add(function)
|
|
307
|
+
else:
|
|
308
|
+
self._rtqc.discard(function)
|
|
309
|
+
|
|
297
310
|
def exclude(self, regions):
|
|
298
311
|
"""
|
|
299
312
|
Explicitly skip specified genomic regions.
|
reditools/rtchecks.py
CHANGED
|
@@ -272,3 +272,26 @@ class RTChecks(object):
|
|
|
272
272
|
)
|
|
273
273
|
return False
|
|
274
274
|
return True
|
|
275
|
+
|
|
276
|
+
def check_max_alts(self, bases, rtools):
|
|
277
|
+
"""
|
|
278
|
+
Check that there are no more than a max number of alts.
|
|
279
|
+
|
|
280
|
+
Parameters:
|
|
281
|
+
bases (CompiledPosition): Base position under analysis
|
|
282
|
+
rtools (REDItools): Object running the analysis
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
(bool): True if there are n or fewer alts
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
alts = bases.get_variants()
|
|
289
|
+
if len(alts) > rtools.max_alts:
|
|
290
|
+
rtools.log(
|
|
291
|
+
Logger.debug_level,
|
|
292
|
+
'DISCARD COLUMN alts={} > {}',
|
|
293
|
+
len(alts),
|
|
294
|
+
rtools.max_alts,
|
|
295
|
+
)
|
|
296
|
+
return False
|
|
297
|
+
return True
|
|
File without changes
|
|
File without changes
|