REDItools3 3.4__tar.gz → 3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {reditools3-3.4 → reditools3-3.5}/PKG-INFO +2 -2
  2. {reditools3-3.4 → reditools3-3.5}/REDItools3.egg-info/PKG-INFO +2 -2
  3. {reditools3-3.4 → reditools3-3.5}/REDItools3.egg-info/SOURCES.txt +1 -0
  4. {reditools3-3.4 → reditools3-3.5}/pyproject.toml +2 -2
  5. {reditools3-3.4 → reditools3-3.5}/reditools/analyze.py +22 -20
  6. {reditools3-3.4 → reditools3-3.5}/reditools/compiled_reads.py +11 -5
  7. {reditools3-3.4 → reditools3-3.5}/reditools/fasta_file.py +0 -2
  8. {reditools3-3.4 → reditools3-3.5}/reditools/reditools.py +28 -40
  9. {reditools3-3.4 → reditools3-3.5}/reditools/region.py +10 -8
  10. reditools3-3.5/reditools/region_collection.py +78 -0
  11. {reditools3-3.4 → reditools3-3.5}/reditools/rtchecks.py +12 -27
  12. {reditools3-3.4 → reditools3-3.5}/LICENSE +0 -0
  13. {reditools3-3.4 → reditools3-3.5}/README.md +0 -0
  14. {reditools3-3.4 → reditools3-3.5}/REDItools3.egg-info/dependency_links.txt +0 -0
  15. {reditools3-3.4 → reditools3-3.5}/REDItools3.egg-info/requires.txt +0 -0
  16. {reditools3-3.4 → reditools3-3.5}/REDItools3.egg-info/top_level.txt +0 -0
  17. {reditools3-3.4 → reditools3-3.5}/reditools/__init__.py +0 -0
  18. {reditools3-3.4 → reditools3-3.5}/reditools/__main__.py +0 -0
  19. {reditools3-3.4 → reditools3-3.5}/reditools/alignment_file.py +0 -0
  20. {reditools3-3.4 → reditools3-3.5}/reditools/alignment_manager.py +0 -0
  21. {reditools3-3.4 → reditools3-3.5}/reditools/annotate.py +0 -0
  22. {reditools3-3.4 → reditools3-3.5}/reditools/compiled_position.py +0 -0
  23. {reditools3-3.4 → reditools3-3.5}/reditools/file_utils.py +0 -0
  24. {reditools3-3.4 → reditools3-3.5}/reditools/homopolymerics.py +0 -0
  25. {reditools3-3.4 → reditools3-3.5}/reditools/index.py +0 -0
  26. {reditools3-3.4 → reditools3-3.5}/reditools/logger.py +0 -0
  27. {reditools3-3.4 → reditools3-3.5}/reditools/utils.py +0 -0
  28. {reditools3-3.4 → reditools3-3.5}/setup.cfg +0 -0
@@ -1,8 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: REDItools3
3
- Version: 3.4
3
+ Version: 3.5
4
4
  Author: Ernesto Picardi
5
5
  Author-email: Adam Handen <adam.handen@gmail.com>
6
+ License-Expression: GPL-3.0-or-later
6
7
  Project-URL: homepage, https://github.com/BioinfoUNIBA/REDItools3
7
8
  Project-URL: repository, https://github.com/BioinfoUNIBA/REDItools3
8
9
  Project-URL: issues, https://github.com/BioinfoUNIBA/REDItools3/issues
@@ -10,7 +11,6 @@ Keywords: bioinformatics,RNA,RNA-editing
10
11
  Classifier: Development Status :: 5 - Production/Stable
11
12
  Classifier: Intended Audience :: Developers
12
13
  Classifier: Intended Audience :: Science/Research
13
- Classifier: License :: OSI Approved :: GNU General Public License (GPL)
14
14
  Classifier: Operating System :: MacOS :: MacOS X
15
15
  Classifier: Operating System :: Unix
16
16
  Classifier: Programming Language :: Python :: 3.7
@@ -1,8 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: REDItools3
3
- Version: 3.4
3
+ Version: 3.5
4
4
  Author: Ernesto Picardi
5
5
  Author-email: Adam Handen <adam.handen@gmail.com>
6
+ License-Expression: GPL-3.0-or-later
6
7
  Project-URL: homepage, https://github.com/BioinfoUNIBA/REDItools3
7
8
  Project-URL: repository, https://github.com/BioinfoUNIBA/REDItools3
8
9
  Project-URL: issues, https://github.com/BioinfoUNIBA/REDItools3/issues
@@ -10,7 +11,6 @@ Keywords: bioinformatics,RNA,RNA-editing
10
11
  Classifier: Development Status :: 5 - Production/Stable
11
12
  Classifier: Intended Audience :: Developers
12
13
  Classifier: Intended Audience :: Science/Research
13
- Classifier: License :: OSI Approved :: GNU General Public License (GPL)
14
14
  Classifier: Operating System :: MacOS :: MacOS X
15
15
  Classifier: Operating System :: Unix
16
16
  Classifier: Programming Language :: Python :: 3.7
@@ -21,5 +21,6 @@ reditools/index.py
21
21
  reditools/logger.py
22
22
  reditools/reditools.py
23
23
  reditools/region.py
24
+ reditools/region_collection.py
24
25
  reditools/rtchecks.py
25
26
  reditools/utils.py
@@ -4,7 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "REDItools3"
7
- version = "v3.4"
7
+ version = "v3.5"
8
+ license = "GPL-3.0-or-later"
8
9
  authors = [
9
10
  { name="Adam Handen", email="adam.handen@gmail.com" },
10
11
  { name="Ernesto Picardi" },
@@ -20,7 +21,6 @@ classifiers = [
20
21
  "Development Status :: 5 - Production/Stable",
21
22
  "Intended Audience :: Developers",
22
23
  "Intended Audience :: Science/Research",
23
- "License :: OSI Approved :: GNU General Public License (GPL)",
24
24
  "Operating System :: MacOS :: MacOS X",
25
25
  "Operating System :: Unix",
26
26
  "Programming Language :: Python :: 3.7",
@@ -80,27 +80,29 @@ def setup_rtools(options): # noqa:WPS213,WPS231
80
80
 
81
81
  if options.load_omopolymeric_file:
82
82
  regions = file_utils.read_bed_file(options.load_omopolymeric_file)
83
- rtools.exclude(regions)
84
-
85
- if options.splicing_file:
86
- rtools.splice_positions = file_utils.load_splicing_file(
87
- options.splicing_file,
88
- options.splicing_span,
89
- )
83
+ rtools.add_exclude_regions(regions)
90
84
 
91
85
  if options.variants:
92
86
  rtools.specific_edits = [_.upper() for _ in options.variants]
93
87
 
94
88
  if options.bed_file:
95
- regions = file_utils.read_bed_file(options.bed_file)
96
- rtools.target_positions = regions
89
+ for fname in options.bed_file:
90
+ regions = file_utils.read_bed_file(fname)
91
+ rtools.add_target_regions(regions)
97
92
  if options.exclude_regions:
98
93
  for fname in options.exclude_regions:
99
94
  regions = file_utils.read_bed_file(fname)
100
- rtools.exclude(regions)
95
+ rtools.add_exclude_regions(regions)
101
96
  if options.reference:
102
97
  rtools.add_reference(options.reference)
103
98
 
99
+ if options.splicing_file:
100
+ rtools.splice_positions = file_utils.load_splicing_file(
101
+ options.splicing_file,
102
+ options.splicing_span,
103
+ )
104
+ rtools.add_exclude_regions(regions)
105
+
104
106
  rtools.min_base_position = options.min_base_position
105
107
  rtools.max_base_position = options.max_base_position
106
108
  rtools.min_base_quality = options.min_base_quality
@@ -148,7 +150,8 @@ def region_args(bam_fname, region, window):
148
150
  return args
149
151
 
150
152
 
151
- def write_results(rtools, sam_manager, file_name, region, output_format):
153
+ def write_results(rtools, sam_manager, file_name, region, output_format,
154
+ temp_dir):
152
155
  """
153
156
  Write the results from a REDItools analysis to a temporary file.
154
157
 
@@ -162,7 +165,7 @@ def write_results(rtools, sam_manager, file_name, region, output_format):
162
165
  Returns:
163
166
  string: Name of the temporary file.
164
167
  """
165
- with NamedTemporaryFile(mode='w', delete=False) as stream:
168
+ with NamedTemporaryFile(mode='w', delete=False, dir=temp_dir) as stream:
166
169
  writer = csv.writer(stream, **output_format)
167
170
  for rt_result in rtools.analyze(sam_manager, region):
168
171
  variants = rt_result.variants
@@ -207,12 +210,14 @@ def run(options, in_queue, out_queue):
207
210
  options.file,
208
211
  region,
209
212
  options.output_format,
213
+ options.temp_dir,
210
214
  )
211
215
  out_queue.put((idx, file_name))
212
216
  except Exception as exc:
213
217
  if options.debug:
214
218
  traceback.print_exception(*sys.exc_info())
215
219
  sys.stderr.write(f'[ERROR] ({type(exc)}) {exc}\n')
220
+ sys.exit(1)
216
221
 
217
222
 
218
223
  def parse_options(): # noqa:WPS213
@@ -269,13 +274,6 @@ def parse_options(): # noqa:WPS213
269
274
  '--load-omopolymeric-file',
270
275
  help='BED file of omopolymeric positions.',
271
276
  )
272
- parser.add_argument(
273
- '-os',
274
- '--omopolymeric-span',
275
- type=int,
276
- default=5,
277
- help='The omopolymeric span.',
278
- )
279
277
  parser.add_argument(
280
278
  '-sf',
281
279
  '--splicing-file',
@@ -394,6 +392,7 @@ def parse_options(): # noqa:WPS213
394
392
  parser.add_argument(
395
393
  '-B',
396
394
  '--bed_file',
395
+ nargs='+',
397
396
  help='Only analyze regions in the provided BED file.',
398
397
  )
399
398
  parser.add_argument(
@@ -403,6 +402,9 @@ def parse_options(): # noqa:WPS213
403
402
  type=int,
404
403
  default=1,
405
404
  )
405
+ parser.add_argument(
406
+ '--temp-dir',
407
+ help='Location to save temporary files')
406
408
  parser.add_argument(
407
409
  '-w',
408
410
  '--window',
@@ -433,7 +435,7 @@ def parse_options(): # noqa:WPS213
433
435
  '-v',
434
436
  '--variants',
435
437
  nargs='*',
436
- default=['CT', 'AG'],
438
+ default=['all'],
437
439
  help='Which editing events to report. Edits should be two characters, '
438
440
  'separated by spaces. Use "all" to report all variants.',
439
441
  )
@@ -28,11 +28,12 @@ class CompiledReads(object):
28
28
  """
29
29
  self._nucleotides = {}
30
30
  if strand == 0:
31
- self.get_strand = lambda read: read.is_reverse
31
+ self.get_strand = lambda _: 2
32
+ elif strand == 1:
33
+ self.get_strand = self._get_strand_one
32
34
  else:
33
- self.get_strand = self._get_strand
35
+ self.get_strand = self._get_strand_two
34
36
 
35
- self._strand_one = strand == 1
36
37
  self._ref = None
37
38
  self._ref_seq = self._get_ref_from_read
38
39
 
@@ -127,5 +128,10 @@ class CompiledReads(object):
127
128
  if qualities[offset] >= self._qc['min_base_quality']:
128
129
  yield (ref_pos, seq[offset], qualities[offset], ref_base)
129
130
 
130
- def _get_strand(self, read):
131
- return read.is_read2 ^ self._strand_one ^ read.is_reverse
131
+ def _get_strand_one(self, read):
132
+ return read.is_read1 and not read.is_reverse or \
133
+ read.is_read2 and read.is_reverse
134
+
135
+ def _get_strand_two(self, read):
136
+ return read.is_read1 and read.is_reverse or \
137
+ read.is_read2 and not read.is_reverse
@@ -49,8 +49,6 @@ class RTFastaFile(PysamFastaFile):
49
49
  if contig != self._contig_name:
50
50
  self._update_contig_cache(contig)
51
51
  try:
52
- if len(position) == 1:
53
- return self._contig_cache[position[0]]
54
52
  return [self._contig_cache[idx] for idx in position]
55
53
  except IndexError as exc:
56
54
  raise IndexError(
@@ -11,6 +11,7 @@ from reditools.compiled_reads import CompiledReads
11
11
  from reditools.fasta_file import RTFastaFile
12
12
  from reditools.logger import Logger
13
13
  from reditools.rtchecks import RTChecks
14
+ from reditools.region_collection import RegionCollection
14
15
 
15
16
 
16
17
  class RTResult(object):
@@ -124,9 +125,8 @@ class REDItools(object):
124
125
 
125
126
  self._min_read_quality = 0
126
127
 
127
- self._target_positions = False
128
- self._exclude_positions = {}
129
- self._splice_positions = []
128
+ self._target_regions = RegionCollection()
129
+ self._exclude_regions = RegionCollection()
130
130
  self._specific_edits = None
131
131
 
132
132
  self.reference = None
@@ -174,44 +174,25 @@ class REDItools(object):
174
174
  return True
175
175
 
176
176
  @property
177
- def splice_positions(self):
177
+ def target_regions(self):
178
178
  """
179
- Known splice sites.
179
+ Only report results for these locations.
180
180
 
181
181
  Returns:
182
182
  list
183
183
  """
184
- return self._splice_positions
184
+ return self._target_regions
185
185
 
186
- @splice_positions.setter
187
- def splice_positions(self, regions):
188
- function = self._rtqc.check_splice_positions
189
- if regions:
190
- self._splice_positions = utils.enumerate_positions(regions)
191
- self._rtqc.add(function)
192
- else:
193
- self._splice_positions = []
194
- self._rtqc.discard(function)
195
-
196
- @property
197
- def target_positions(self):
186
+ def add_target_regions(self, regions):
198
187
  """
199
188
  Only report results for these locations.
200
189
 
201
- Returns:
202
- list
190
+ Parameters:
191
+ regions (iterable): List of Region objects.
203
192
  """
204
- return self._target_positions
205
-
206
- @target_positions.setter
207
- def target_positions(self, regions):
208
- function = self._rtqc.check_target_positions
209
193
  if regions:
210
- self._target_positions = utils.enumerate_positions(regions)
211
- self._rtqc.add(function)
212
- else:
213
- self._target_positions = False
214
- self._rtqc.discard(function)
194
+ self._target_regions.add_regions(regions)
195
+ self._rtqc.add(self._rtqc.check_target_positions)
215
196
 
216
197
  @property
217
198
  def log_level(self):
@@ -291,14 +272,20 @@ class REDItools(object):
291
272
  self._rtqc.discard(function)
292
273
 
293
274
  @property
294
- def exclude_positions(self):
275
+ def exclude_regions(self):
276
+ """Regions to exclude from analysis"""
277
+ return self._exclude_regions
278
+
279
+ def add_exclude_regions(self, regions):
295
280
  """
296
- Genomic positions NOT to include in output.
281
+ Regions to exclude from analysis
297
282
 
298
- Returns:
299
- Dictionary of contigs to positions
283
+ Parameters:
284
+ regions (iterable): List of Region objects.
300
285
  """
301
- return self._exclude_positions
286
+ if regions:
287
+ self._exclude_regions.add_regions(regions)
288
+ self._rtqc.add(self._rtqc.check_exclusions)
302
289
 
303
290
  @property
304
291
  def max_alts(self):
@@ -323,10 +310,10 @@ class REDItools(object):
323
310
  """
324
311
  for region in regions:
325
312
  contig = region.contig
326
- old_pos = self._exclude_positions.get(contig, set())
327
- self._exclude_positions[contig] = old_pos | region.enumerate()
328
- function = self._rtqc.check_exclusion
329
- if self._exclude_positions:
313
+ old_pos = self._exclude_regions.get(contig, set())
314
+ self._exclude_regions[contig] = old_pos | region.enumerate()
315
+ function = self._rtqc.check_exclusions
316
+ if self._exclude_regions:
330
317
  self._rtqc.add(function)
331
318
  else:
332
319
  self._rtqc.discard(function)
@@ -396,7 +383,8 @@ class REDItools(object):
396
383
  if column is None:
397
384
  self.log(Logger.debug_level, 'Bad column - skipping')
398
385
  continue
399
- if self._specific_edits and not self._specific_edits & set(column.variants):
386
+ if self._specific_edits and \
387
+ not self._specific_edits & set(column.variants):
400
388
  self.log(
401
389
  Logger.debug_level,
402
390
  'Requested edits not found - skipping',
@@ -30,8 +30,11 @@ class Region(object):
30
30
  if 'contig' not in kwargs:
31
31
  raise ValueError('Region constructor requires a contig.')
32
32
  self.contig = kwargs['contig']
33
- self.start = self._to_int(kwargs.get('start', 1))
34
- self.stop = self._to_int(kwargs.get('stop', None))
33
+ self.start = self._to_int(kwargs.get('start', 1)) - 1
34
+ if 'stop' in kwargs:
35
+ self.stop = self._to_int(kwargs['stop']) - 1
36
+ else:
37
+ self.stop = None
35
38
 
36
39
  def __str__(self):
37
40
  """
@@ -40,12 +43,11 @@ class Region(object):
40
43
  Returns:
41
44
  (str): contig:start-stop
42
45
  """
43
- region = self.contig
44
- if self.start:
45
- region = f'{region}:{self.start}'
46
+ if self.start > 0:
46
47
  if self.stop:
47
- region = f'{region}-{self.stop}'
48
- return region
48
+ return f'{self.contig}:{self.start}-{self.stop + 1}'
49
+ return f'{self.contig}:{self.start}'
50
+ return self.contig
49
51
 
50
52
  def split(self, window):
51
53
  """
@@ -111,7 +113,7 @@ class Region(object):
111
113
  if not region:
112
114
  return None
113
115
  contig = region[0]
114
- start = None
116
+ start = 0
115
117
  stop = None
116
118
 
117
119
  if len(region) > 3:
@@ -0,0 +1,78 @@
1
+ """Genomic Region Collection."""
2
+
3
+
4
+ class RegionCollection(object):
5
+ """Collections of REDItools3 region objects. This class is meant to
6
+ provide fast lookups of overlaps, and so behaves as a queue."""
7
+
8
+ def __init__(self):
9
+ """
10
+ Creates a new region collection.
11
+
12
+ """
13
+
14
+ self._regions = {}
15
+ self._last_index = None
16
+ self._last_contig = None
17
+ self._sorted = False
18
+
19
+ def _sort(self):
20
+ for contig, regions in self._regions.items():
21
+ self._regions[contig] = sorted(
22
+ regions,
23
+ key=lambda _: (_.start, _.stop),
24
+ )
25
+ self._sorted = True
26
+
27
+ def contains(self, contig, position):
28
+ """
29
+ Checks whether the given position or range overlaps with the
30
+ collection.
31
+
32
+ Parameters:
33
+ contig (str): Chromomsome/contig name from reference.
34
+ position_start (int): Position to check for.
35
+
36
+ Returns:
37
+ True if there is an overlap, False otherwise.
38
+ """
39
+ if not self._sorted:
40
+ self._sort()
41
+
42
+ if contig != self._last_contig:
43
+ self._last_contig = contig
44
+ self._last_index = 0
45
+
46
+ for i in range(self._last_index, len(self._regions.get(contig, []))):
47
+ self._last_index = i
48
+ region = self._regions[contig][i]
49
+ if position < region.start:
50
+ return False
51
+ if position >= region.start and \
52
+ (region.stop is None or position < region.stop):
53
+ return True
54
+ self._last_index += 1
55
+
56
+ return False
57
+
58
+ def add_region(self, region):
59
+ """
60
+ Add a region to the collection.
61
+
62
+ Parameters:
63
+ region (Region): region to add.
64
+ """
65
+ self._sorted = False
66
+ if region.contig not in self._regions:
67
+ self._regions[region.contig] = []
68
+ self._regions[region.contig].append(region)
69
+
70
+ def add_regions(self, regions):
71
+ """
72
+ Add a list or iterable of regions to the collection.
73
+
74
+ Parameters:
75
+ regions (iterable): List of regions.
76
+ """
77
+ for r in regions:
78
+ self.add_region(r)
@@ -47,28 +47,6 @@ class RTChecks(object):
47
47
  rtools=rtools,
48
48
  )
49
49
 
50
- def check_splice_positions(self, rtools, bases):
51
- """
52
- Check if the contig and position are in a splice site.
53
-
54
- Parameters:
55
- rtools (REDItools): Object performing analysis
56
- bases (CompiledPosition): Base position under analysis
57
-
58
- Returns:
59
- (bool): True if the position is not a splice site.
60
- """
61
- contig = bases.contig
62
- if bases.position in rtools.splice_positions.get(contig, []):
63
- rtools.log(
64
- Logger.debug_level,
65
- '[SPLICE_SITE] Discarding ({}, {}) because in splice site',
66
- contig,
67
- bases.position,
68
- )
69
- return False
70
- return True
71
-
72
50
  def check_column_min_length(self, rtools, bases):
73
51
  """
74
52
  Check read depth.
@@ -210,13 +188,16 @@ class RTChecks(object):
210
188
  Returns:
211
189
  (bool): True if the position is in a target region
212
190
  """
213
- if bases.position not in rtools.target_positions.get(bases.contig, []):
191
+ in_targets = rtools.target_regions.contains(
192
+ bases.contig,
193
+ bases.position,
194
+ )
195
+ if not in_targets:
214
196
  rtools.log(
215
197
  Logger.debug_level,
216
- 'DISCARD COLUMN not in target positions',
198
+ 'DISCARD COLUMN not in target regions',
217
199
  )
218
- return False
219
- return True
200
+ return in_targets
220
201
 
221
202
  def check_exclusions(self, bases, rtools):
222
203
  """
@@ -229,7 +210,11 @@ class RTChecks(object):
229
210
  Returns:
230
211
  (bool): True if the position is not excluded
231
212
  """
232
- if bases.position in rtools.exclude_positions.get(bases.contig, []):
213
+ in_exclusions = rtools.exclude_regions.contains(
214
+ bases.contig,
215
+ bases.position,
216
+ )
217
+ if in_exclusions:
233
218
  rtools.log(Logger.debug_level, 'DISCARD COLUMN in excluded region')
234
219
  return False
235
220
  return True
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes