HTSeq 2.0.8__tar.gz → 2.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- htseq-2.0.9/HTSeq/_version.py +1 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/features.py +17 -6
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count.py +49 -13
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_with_barcodes.py +7 -4
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/PKG-INFO +1 -1
- {htseq-2.0.8 → htseq-2.0.9}/PKG-INFO +1 -1
- htseq-2.0.9/VERSION +1 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/_HTSeq.c +2123 -2076
- {htseq-2.0.8 → htseq-2.0.9}/test/test_htseq-count.py +38 -3
- htseq-2.0.8/HTSeq/_version.py +0 -1
- htseq-2.0.8/VERSION +0 -1
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/StepVector.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/StretchVector.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/_HTSeq_internal.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/__init__.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/__init__.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_features/__init__.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_features/count_features_per_file.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_features/reads_io_processor.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_features/reads_stats.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_old.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/qa.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/utils.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq/utils.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/SOURCES.txt +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/dependency_links.txt +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/requires.txt +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/top_level.txt +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/LICENSE +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/MANIFEST.in +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/README.md +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/pyproject.toml +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/scripts/htseq-count +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/scripts/htseq-count-barcodes +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/scripts/htseq-qa +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/setup.cfg +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/setup.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/AutoPyObjPtr.i +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/HTSeq/_HTSeq.pxd +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/HTSeq/_HTSeq.pyx +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/HTSeq/__init__.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/StepVector.i +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/StepVector.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/StepVector_wrap.cxx +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/src/step_vector.h +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/test/test_general.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/test/test_genomic.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/test/test_parsers.py +0 -0
- {htseq-2.0.8 → htseq-2.0.9}/test/test_stretch_vector.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.0.9"
|
|
@@ -239,8 +239,17 @@ def make_feature_dict(
|
|
|
239
239
|
Args:
|
|
240
240
|
feature_sequence (iterable of Feature): A sequence of features, e.g. as
|
|
241
241
|
obtained from GFF_reader('myfile.gtf')
|
|
242
|
-
feature_type (string or None): If None, collect
|
|
243
|
-
string, restrict to only one type of features,
|
|
242
|
+
feature_type (string, sequence of strings, or None): If None, collect
|
|
243
|
+
all features. If a string, restrict to only one type of features,
|
|
244
|
+
e.g. 'exon' (this is the most common situation). If a sequence of
|
|
245
|
+
strings, restrict to the types found in the sequence, e.g.
|
|
246
|
+
['gene', 'pseudogene']. Using a feature of strings is an uncommon
|
|
247
|
+
need and can lead to a higher number of ambiguous alignments: only
|
|
248
|
+
use if you know what you are doing. Even then, beware that this
|
|
249
|
+
option is designed to work for feature types that are "peers" and
|
|
250
|
+
not obviously overlapping, such as genes and pseudogenes. If you
|
|
251
|
+
select nested features types (e.g. "gene" and "exon"), you are
|
|
252
|
+
likely to end up with meaningless numbers.
|
|
244
253
|
feature_query (string or None): If None, all features of the selected
|
|
245
254
|
types will be collected. If a string, it has to be in the format:
|
|
246
255
|
|
|
@@ -280,7 +289,7 @@ def make_feature_dict(
|
|
|
280
289
|
|
|
281
290
|
features = {}
|
|
282
291
|
for f in feature_sequence:
|
|
283
|
-
if
|
|
292
|
+
if any(ft in (None, f.type) for ft in feature_type):
|
|
284
293
|
if f.type not in features:
|
|
285
294
|
features[f.type] = {}
|
|
286
295
|
res_ftype = features[f.type]
|
|
@@ -322,8 +331,10 @@ def make_feature_genomicarrayofsets(
|
|
|
322
331
|
attributes, separated by colons (:), will be used as an identifier.
|
|
323
332
|
For instance, ['gene_id', 'exon_number'] uniquely identifies
|
|
324
333
|
specific exons.
|
|
325
|
-
feature_type (string or None): If None, collect
|
|
326
|
-
string, restrict to only one type of features,
|
|
334
|
+
feature_type (string, sequence of strings, or None): If None, collect
|
|
335
|
+
all features. If a string, restrict to only one type of features,
|
|
336
|
+
e.g. 'exon'. If a sequence of strings, restrict to the types found
|
|
337
|
+
in the sequence, e.g. 'gene' and 'pseudogene'
|
|
327
338
|
feature_query (string or None): If None, all features of the selected
|
|
328
339
|
types will be collected. If a string, it has to be in the format:
|
|
329
340
|
|
|
@@ -401,7 +412,7 @@ def make_feature_genomicarrayofsets(
|
|
|
401
412
|
i = 0
|
|
402
413
|
try:
|
|
403
414
|
for f in feature_sequence:
|
|
404
|
-
if
|
|
415
|
+
if any(ft in (None, f.type) for ft in feature_type):
|
|
405
416
|
feature_id = get_id_attr(f, id_attribute)
|
|
406
417
|
|
|
407
418
|
if stranded and f.iv.strand == ".":
|
|
@@ -176,6 +176,43 @@ def _check_samouts(sam_filenames, samout_format, samouts):
|
|
|
176
176
|
pass
|
|
177
177
|
|
|
178
178
|
|
|
179
|
+
|
|
180
|
+
# Adapted from: https://github.com/python/cpython/issues/60603
|
|
181
|
+
class OverwriteUniqueAppendAction(argparse.Action):
|
|
182
|
+
"""Custom action to append unique values to a list, overwriting the default.
|
|
183
|
+
|
|
184
|
+
When using the `append` action, the default value is not removed
|
|
185
|
+
from the list. This problem is described in
|
|
186
|
+
https://github.com/python/cpython/issues/60603
|
|
187
|
+
|
|
188
|
+
This custom action aims to fix this problem by removing the default
|
|
189
|
+
value when the argument is specified for the first time.
|
|
190
|
+
|
|
191
|
+
Moreover, it only appends if the value is not already there, so the resulting
|
|
192
|
+
list has unique elements.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
def __init__(self, option_strings, dest, nargs=None, **kwargs):
|
|
196
|
+
"""Initialize the action."""
|
|
197
|
+
self.called_times = 0
|
|
198
|
+
self.default_value = kwargs.get("default")
|
|
199
|
+
super().__init__(option_strings, dest, **kwargs)
|
|
200
|
+
|
|
201
|
+
def __call__(self, parser, namespace, values, option_string=None):
|
|
202
|
+
"""When the argument is specified on the commandline."""
|
|
203
|
+
current_values = getattr(namespace, self.dest)
|
|
204
|
+
|
|
205
|
+
if self.called_times == 0 and current_values == self.default_value:
|
|
206
|
+
current_values = []
|
|
207
|
+
|
|
208
|
+
# Only add if not already present (unique values)
|
|
209
|
+
if values not in current_values:
|
|
210
|
+
current_values.append(values)
|
|
211
|
+
|
|
212
|
+
setattr(namespace, self.dest, current_values)
|
|
213
|
+
self.called_times += 1
|
|
214
|
+
|
|
215
|
+
|
|
179
216
|
def _parse_sanitize_cmdline_arguments():
|
|
180
217
|
pa = argparse.ArgumentParser(
|
|
181
218
|
add_help=False,
|
|
@@ -274,17 +311,23 @@ def _parse_sanitize_cmdline_arguments():
|
|
|
274
311
|
"--type",
|
|
275
312
|
type=str,
|
|
276
313
|
dest="feature_type",
|
|
277
|
-
|
|
314
|
+
action=OverwriteUniqueAppendAction,
|
|
315
|
+
default=["exon"],
|
|
278
316
|
help="Feature type (3rd column in GTF file) to be used, "
|
|
279
|
-
+ "all features of other type are ignored (default, suitable for
|
|
280
|
-
+ "GTF files: exon)"
|
|
317
|
+
+ "all features of other type are ignored (default, suitable for"
|
|
318
|
+
+ "Ensembl GTF files: exon). If you can call this option multiple times, "
|
|
319
|
+
+ "features of all specified types will be included, e.g. to include "
|
|
320
|
+
+ "both genes and pseudogenes you might use -t gene -t pseudogene. "
|
|
321
|
+
+ "Calling this option multiple times is a rare need and might result "
|
|
322
|
+
+ "in excessive numbers of ambiguous counts: only use if you know what "
|
|
323
|
+
+ "you are doing.",
|
|
281
324
|
)
|
|
282
325
|
pa.add_argument(
|
|
283
326
|
"-i",
|
|
284
327
|
"--idattr",
|
|
285
328
|
type=str,
|
|
286
329
|
dest="idattr",
|
|
287
|
-
action=
|
|
330
|
+
action=OverwriteUniqueAppendAction,
|
|
288
331
|
default=["gene_id"],
|
|
289
332
|
help="GTF attribute to be used as feature ID (default, "
|
|
290
333
|
+ "suitable for Ensembl GTF files: gene_id). All feature of the "
|
|
@@ -299,7 +342,7 @@ def _parse_sanitize_cmdline_arguments():
|
|
|
299
342
|
pa.add_argument(
|
|
300
343
|
"--additional-attr",
|
|
301
344
|
type=str,
|
|
302
|
-
action=
|
|
345
|
+
action='append',
|
|
303
346
|
dest='additional_attributes',
|
|
304
347
|
default=[],
|
|
305
348
|
help="Additional feature attributes (default: none, "
|
|
@@ -354,7 +397,7 @@ def _parse_sanitize_cmdline_arguments():
|
|
|
354
397
|
"--samout",
|
|
355
398
|
type=str,
|
|
356
399
|
dest="samouts",
|
|
357
|
-
action=
|
|
400
|
+
action='append',
|
|
358
401
|
default=[],
|
|
359
402
|
help="Write out all SAM alignment records into "
|
|
360
403
|
+ "SAM/BAM files (one per input file needed), annotating each line "
|
|
@@ -445,13 +488,6 @@ def _parse_sanitize_cmdline_arguments():
|
|
|
445
488
|
|
|
446
489
|
args = pa.parse_args()
|
|
447
490
|
|
|
448
|
-
# Deal with custom id_attribute lists. This is never shorter than 1 because
|
|
449
|
-
# gene_id is the default. However, if the option was called at least once,
|
|
450
|
-
# that should _override_ the default, which means skipping the first
|
|
451
|
-
# element (i.e., gene_id).
|
|
452
|
-
if len(args.idattr) > 1:
|
|
453
|
-
del args.idattr[0]
|
|
454
|
-
|
|
455
491
|
# Never use more CPUs than files
|
|
456
492
|
args.nprocesses = min(args.nprocesses, len(args.samfilenames))
|
|
457
493
|
|
|
@@ -591,10 +591,13 @@ def main():
|
|
|
591
591
|
|
|
592
592
|
pa.add_argument(
|
|
593
593
|
"-t", "--type", type=str, dest="featuretype",
|
|
594
|
-
default="exon",
|
|
595
|
-
help="Feature type (3rd column in GTF file) to be used, "
|
|
596
|
-
"
|
|
597
|
-
"GTF files: exon)"
|
|
594
|
+
action="append", default=["exon"],
|
|
595
|
+
help="Feature type (3rd column in GTF file) to be used, all "
|
|
596
|
+
+ "features of other type are ignored (default, suitable for"
|
|
597
|
+
+ "Ensembl GTF files: exon). You can call this option multiple "
|
|
598
|
+
+ "times. Features of all specified types will be included. "
|
|
599
|
+
+ "E.g. to include both genes and pseudogenes you might use "
|
|
600
|
+
+ "-t gene -t pseudogene")
|
|
598
601
|
|
|
599
602
|
pa.add_argument(
|
|
600
603
|
"-i", "--idattr", type=str, dest="idattr",
|
htseq-2.0.9/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.0.9
|