HTSeq 2.0.8__tar.gz → 2.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. htseq-2.0.9/HTSeq/_version.py +1 -0
  2. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/features.py +17 -6
  3. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count.py +49 -13
  4. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_with_barcodes.py +7 -4
  5. {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/PKG-INFO +1 -1
  6. {htseq-2.0.8 → htseq-2.0.9}/PKG-INFO +1 -1
  7. htseq-2.0.9/VERSION +1 -0
  8. {htseq-2.0.8 → htseq-2.0.9}/src/_HTSeq.c +2123 -2076
  9. {htseq-2.0.8 → htseq-2.0.9}/test/test_htseq-count.py +38 -3
  10. htseq-2.0.8/HTSeq/_version.py +0 -1
  11. htseq-2.0.8/VERSION +0 -1
  12. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/StepVector.py +0 -0
  13. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/StretchVector.py +0 -0
  14. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/_HTSeq_internal.py +0 -0
  15. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/__init__.py +0 -0
  16. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/__init__.py +0 -0
  17. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_features/__init__.py +0 -0
  18. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_features/count_features_per_file.py +0 -0
  19. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_features/reads_io_processor.py +0 -0
  20. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_features/reads_stats.py +0 -0
  21. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_old.py +0 -0
  22. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/qa.py +0 -0
  23. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/utils.py +0 -0
  24. {htseq-2.0.8 → htseq-2.0.9}/HTSeq/utils.py +0 -0
  25. {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/SOURCES.txt +0 -0
  26. {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/dependency_links.txt +0 -0
  27. {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/requires.txt +0 -0
  28. {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/top_level.txt +0 -0
  29. {htseq-2.0.8 → htseq-2.0.9}/LICENSE +0 -0
  30. {htseq-2.0.8 → htseq-2.0.9}/MANIFEST.in +0 -0
  31. {htseq-2.0.8 → htseq-2.0.9}/README.md +0 -0
  32. {htseq-2.0.8 → htseq-2.0.9}/pyproject.toml +0 -0
  33. {htseq-2.0.8 → htseq-2.0.9}/scripts/htseq-count +0 -0
  34. {htseq-2.0.8 → htseq-2.0.9}/scripts/htseq-count-barcodes +0 -0
  35. {htseq-2.0.8 → htseq-2.0.9}/scripts/htseq-qa +0 -0
  36. {htseq-2.0.8 → htseq-2.0.9}/setup.cfg +0 -0
  37. {htseq-2.0.8 → htseq-2.0.9}/setup.py +0 -0
  38. {htseq-2.0.8 → htseq-2.0.9}/src/AutoPyObjPtr.i +0 -0
  39. {htseq-2.0.8 → htseq-2.0.9}/src/HTSeq/_HTSeq.pxd +0 -0
  40. {htseq-2.0.8 → htseq-2.0.9}/src/HTSeq/_HTSeq.pyx +0 -0
  41. {htseq-2.0.8 → htseq-2.0.9}/src/HTSeq/__init__.py +0 -0
  42. {htseq-2.0.8 → htseq-2.0.9}/src/StepVector.i +0 -0
  43. {htseq-2.0.8 → htseq-2.0.9}/src/StepVector.py +0 -0
  44. {htseq-2.0.8 → htseq-2.0.9}/src/StepVector_wrap.cxx +0 -0
  45. {htseq-2.0.8 → htseq-2.0.9}/src/step_vector.h +0 -0
  46. {htseq-2.0.8 → htseq-2.0.9}/test/test_general.py +0 -0
  47. {htseq-2.0.8 → htseq-2.0.9}/test/test_genomic.py +0 -0
  48. {htseq-2.0.8 → htseq-2.0.9}/test/test_parsers.py +0 -0
  49. {htseq-2.0.8 → htseq-2.0.9}/test/test_stretch_vector.py +0 -0
@@ -0,0 +1 @@
1
+ __version__ = "2.0.9"
@@ -239,8 +239,17 @@ def make_feature_dict(
239
239
  Args:
240
240
  feature_sequence (iterable of Feature): A sequence of features, e.g. as
241
241
  obtained from GFF_reader('myfile.gtf')
242
- feature_type (string or None): If None, collect all features. If a
243
- string, restrict to only one type of features, e.g. 'exon'.
242
+ feature_type (string, sequence of strings, or None): If None, collect
243
+ all features. If a string, restrict to only one type of features,
244
+ e.g. 'exon' (this is the most common situation). If a sequence of
245
+ strings, restrict to the types found in the sequence, e.g.
246
+ ['gene', 'pseudogene']. Using a feature of strings is an uncommon
247
+ need and can lead to a higher number of ambiguous alignments: only
248
+ use if you know what you are doing. Even then, beware that this
249
+ option is designed to work for feature types that are "peers" and
250
+ not obviously overlapping, such as genes and pseudogenes. If you
251
+ select nested features types (e.g. "gene" and "exon"), you are
252
+ likely to end up with meaningless numbers.
244
253
  feature_query (string or None): If None, all features of the selected
245
254
  types will be collected. If a string, it has to be in the format:
246
255
 
@@ -280,7 +289,7 @@ def make_feature_dict(
280
289
 
281
290
  features = {}
282
291
  for f in feature_sequence:
283
- if feature_type in (None, f.type):
292
+ if any(ft in (None, f.type) for ft in feature_type):
284
293
  if f.type not in features:
285
294
  features[f.type] = {}
286
295
  res_ftype = features[f.type]
@@ -322,8 +331,10 @@ def make_feature_genomicarrayofsets(
322
331
  attributes, separated by colons (:), will be used as an identifier.
323
332
  For instance, ['gene_id', 'exon_number'] uniquely identifies
324
333
  specific exons.
325
- feature_type (string or None): If None, collect all features. If a
326
- string, restrict to only one type of features, e.g. 'exon'.
334
+ feature_type (string, sequence of strings, or None): If None, collect
335
+ all features. If a string, restrict to only one type of features,
336
+ e.g. 'exon'. If a sequence of strings, restrict to the types found
337
+ in the sequence, e.g. 'gene' and 'pseudogene'
327
338
  feature_query (string or None): If None, all features of the selected
328
339
  types will be collected. If a string, it has to be in the format:
329
340
 
@@ -401,7 +412,7 @@ def make_feature_genomicarrayofsets(
401
412
  i = 0
402
413
  try:
403
414
  for f in feature_sequence:
404
- if feature_type in (None, f.type):
415
+ if any(ft in (None, f.type) for ft in feature_type):
405
416
  feature_id = get_id_attr(f, id_attribute)
406
417
 
407
418
  if stranded and f.iv.strand == ".":
@@ -176,6 +176,43 @@ def _check_samouts(sam_filenames, samout_format, samouts):
176
176
  pass
177
177
 
178
178
 
179
+
180
+ # Adapted from: https://github.com/python/cpython/issues/60603
181
+ class OverwriteUniqueAppendAction(argparse.Action):
182
+ """Custom action to append unique values to a list, overwriting the default.
183
+
184
+ When using the `append` action, the default value is not removed
185
+ from the list. This problem is described in
186
+ https://github.com/python/cpython/issues/60603
187
+
188
+ This custom action aims to fix this problem by removing the default
189
+ value when the argument is specified for the first time.
190
+
191
+ Moreover, it only appends if the value is not already there, so the resulting
192
+ list has unique elements.
193
+ """
194
+
195
+ def __init__(self, option_strings, dest, nargs=None, **kwargs):
196
+ """Initialize the action."""
197
+ self.called_times = 0
198
+ self.default_value = kwargs.get("default")
199
+ super().__init__(option_strings, dest, **kwargs)
200
+
201
+ def __call__(self, parser, namespace, values, option_string=None):
202
+ """When the argument is specified on the commandline."""
203
+ current_values = getattr(namespace, self.dest)
204
+
205
+ if self.called_times == 0 and current_values == self.default_value:
206
+ current_values = []
207
+
208
+ # Only add if not already present (unique values)
209
+ if values not in current_values:
210
+ current_values.append(values)
211
+
212
+ setattr(namespace, self.dest, current_values)
213
+ self.called_times += 1
214
+
215
+
179
216
  def _parse_sanitize_cmdline_arguments():
180
217
  pa = argparse.ArgumentParser(
181
218
  add_help=False,
@@ -274,17 +311,23 @@ def _parse_sanitize_cmdline_arguments():
274
311
  "--type",
275
312
  type=str,
276
313
  dest="feature_type",
277
- default="exon",
314
+ action=OverwriteUniqueAppendAction,
315
+ default=["exon"],
278
316
  help="Feature type (3rd column in GTF file) to be used, "
279
- + "all features of other type are ignored (default, suitable for Ensembl "
280
- + "GTF files: exon)",
317
+ + "all features of other type are ignored (default, suitable for"
318
+ + "Ensembl GTF files: exon). If you can call this option multiple times, "
319
+ + "features of all specified types will be included, e.g. to include "
320
+ + "both genes and pseudogenes you might use -t gene -t pseudogene. "
321
+ + "Calling this option multiple times is a rare need and might result "
322
+ + "in excessive numbers of ambiguous counts: only use if you know what "
323
+ + "you are doing.",
281
324
  )
282
325
  pa.add_argument(
283
326
  "-i",
284
327
  "--idattr",
285
328
  type=str,
286
329
  dest="idattr",
287
- action="append",
330
+ action=OverwriteUniqueAppendAction,
288
331
  default=["gene_id"],
289
332
  help="GTF attribute to be used as feature ID (default, "
290
333
  + "suitable for Ensembl GTF files: gene_id). All feature of the "
@@ -299,7 +342,7 @@ def _parse_sanitize_cmdline_arguments():
299
342
  pa.add_argument(
300
343
  "--additional-attr",
301
344
  type=str,
302
- action="append",
345
+ action='append',
303
346
  dest='additional_attributes',
304
347
  default=[],
305
348
  help="Additional feature attributes (default: none, "
@@ -354,7 +397,7 @@ def _parse_sanitize_cmdline_arguments():
354
397
  "--samout",
355
398
  type=str,
356
399
  dest="samouts",
357
- action="append",
400
+ action='append',
358
401
  default=[],
359
402
  help="Write out all SAM alignment records into "
360
403
  + "SAM/BAM files (one per input file needed), annotating each line "
@@ -445,13 +488,6 @@ def _parse_sanitize_cmdline_arguments():
445
488
 
446
489
  args = pa.parse_args()
447
490
 
448
- # Deal with custom id_attribute lists. This is never shorter than 1 because
449
- # gene_id is the default. However, if the option was called at least once,
450
- # that should _override_ the default, which means skipping the first
451
- # element (i.e., gene_id).
452
- if len(args.idattr) > 1:
453
- del args.idattr[0]
454
-
455
491
  # Never use more CPUs than files
456
492
  args.nprocesses = min(args.nprocesses, len(args.samfilenames))
457
493
 
@@ -591,10 +591,13 @@ def main():
591
591
 
592
592
  pa.add_argument(
593
593
  "-t", "--type", type=str, dest="featuretype",
594
- default="exon",
595
- help="Feature type (3rd column in GTF file) to be used, " +
596
- "all features of other type are ignored (default, suitable for Ensembl " +
597
- "GTF files: exon)")
594
+ action="append", default=["exon"],
595
+ help="Feature type (3rd column in GTF file) to be used, all "
596
+ + "features of other type are ignored (default, suitable for"
597
+ + "Ensembl GTF files: exon). You can call this option multiple "
598
+ + "times. Features of all specified types will be included. "
599
+ + "E.g. to include both genes and pseudogenes you might use "
600
+ + "-t gene -t pseudogene")
598
601
 
599
602
  pa.add_argument(
600
603
  "-i", "--idattr", type=str, dest="idattr",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: HTSeq
3
- Version: 2.0.8
3
+ Version: 2.0.9
4
4
  Summary: A framework to process and analyze data from high-throughput sequencing (HTS) assays
5
5
  Home-page: https://github.com/htseq
6
6
  Author: Simon Anders, Fabio Zanini
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: HTSeq
3
- Version: 2.0.8
3
+ Version: 2.0.9
4
4
  Summary: A framework to process and analyze data from high-throughput sequencing (HTS) assays
5
5
  Home-page: https://github.com/htseq
6
6
  Author: Simon Anders, Fabio Zanini
htseq-2.0.9/VERSION ADDED
@@ -0,0 +1 @@
1
+ 2.0.9