PyPI - HTSeq - Versions diffs - 2.0.8__tar.gz → 2.0.9__tar.gz - Mend

HTSeq 2.0.8tar.gz → 2.0.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

htseq-2.0.9/HTSeq/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "2.0.9"

{htseq-2.0.8 → htseq-2.0.9}/HTSeq/features.py RENAMED Viewed

@@ -239,8 +239,17 @@ def make_feature_dict(
     Args:
         feature_sequence (iterable of Feature): A sequence of features, e.g. as
             obtained from GFF_reader('myfile.gtf')
-        feature_type (string or None): If None, collect all features. If a
-            string, restrict to only one type of features, e.g. 'exon'.
+        feature_type (string, sequence of strings, or None): If None, collect
+            all features. If a string, restrict to only one type of features,
+            e.g. 'exon' (this is the most common situation). If a sequence of
+            strings, restrict to the types found in the sequence, e.g.
+            ['gene', 'pseudogene']. Using a feature of strings is an uncommon
+            need and can lead to a higher number of ambiguous alignments: only
+            use if you know what you are doing. Even then, beware that this
+            option is designed to work for feature types that are "peers" and
+            not obviously overlapping, such as genes and pseudogenes. If you
+            select nested features types (e.g. "gene" and "exon"), you are
+            likely to end up with meaningless numbers.
         feature_query (string or None): If None, all features of the selected
             types will be collected. If a string, it has to be in the format:
@@ -280,7 +289,7 @@ def make_feature_dict(
     features = {}
     for f in feature_sequence:
-        if feature_type in (None, f.type):
+        if any(ft in (None, f.type) for ft in feature_type):
             if f.type not in features:
                 features[f.type] = {}
             res_ftype = features[f.type]
@@ -322,8 +331,10 @@ def make_feature_genomicarrayofsets(
             attributes, separated by colons (:), will be used as an identifier.
             For instance, ['gene_id', 'exon_number'] uniquely identifies
             specific exons.
-        feature_type (string or None): If None, collect all features. If a
-            string, restrict to only one type of features, e.g. 'exon'.
+        feature_type (string, sequence of strings, or None): If None, collect
+            all features. If a string, restrict to only one type of features,
+            e.g. 'exon'. If a sequence of strings, restrict to the types found
+            in the sequence, e.g. 'gene' and 'pseudogene'
         feature_query (string or None): If None, all features of the selected
             types will be collected. If a string, it has to be in the format:
@@ -401,7 +412,7 @@ def make_feature_genomicarrayofsets(
     i = 0
     try:
         for f in feature_sequence:
-            if feature_type in (None, f.type):
+            if any(ft in (None, f.type) for ft in feature_type):
                 feature_id = get_id_attr(f, id_attribute)
                 if stranded and f.iv.strand == ".":

{htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count.py RENAMED Viewed

@@ -176,6 +176,43 @@ def _check_samouts(sam_filenames, samout_format, samouts):
                         pass
+# Adapted from: https://github.com/python/cpython/issues/60603
+class OverwriteUniqueAppendAction(argparse.Action):
+    """Custom action to append unique values to a list, overwriting the default.
+    When using the `append` action, the default value is not removed
+    from the list. This problem is described in
+    https://github.com/python/cpython/issues/60603
+    This custom action aims to fix this problem by removing the default
+    value when the argument is specified for the first time.
+    Moreover, it only appends if the value is not already there, so the resulting
+    list has unique elements.
+    """
+    def __init__(self, option_strings, dest, nargs=None, **kwargs):
+        """Initialize the action."""
+        self.called_times = 0
+        self.default_value = kwargs.get("default")
+        super().__init__(option_strings, dest, **kwargs)
+    def __call__(self, parser, namespace, values, option_string=None):
+        """When the argument is specified on the commandline."""
+        current_values = getattr(namespace, self.dest)
+        if self.called_times == 0 and current_values == self.default_value:
+            current_values = []
+        # Only add if not already present (unique values)
+        if values not in current_values:
+            current_values.append(values)
+        setattr(namespace, self.dest, current_values)
+        self.called_times += 1
 def _parse_sanitize_cmdline_arguments():
     pa = argparse.ArgumentParser(
         add_help=False,
@@ -274,17 +311,23 @@ def _parse_sanitize_cmdline_arguments():
         "--type",
         type=str,
         dest="feature_type",
-        default="exon",
+        action=OverwriteUniqueAppendAction,
+        default=["exon"],
         help="Feature type (3rd column in GTF file) to be used, "
-        + "all features of other type are ignored (default, suitable for Ensembl "
-        + "GTF files: exon)",
+        + "all features of other type are ignored (default, suitable for"
+        + "Ensembl GTF files: exon). If you can call this option multiple times, "
+        + "features of all specified types will be included, e.g. to include "
+        + "both genes and pseudogenes you might use -t gene -t pseudogene. "
+        + "Calling this option multiple times is a rare need and might result "
+        + "in excessive numbers of ambiguous counts: only use if you know what "
+        + "you are doing.",
     )
     pa.add_argument(
         "-i",
         "--idattr",
         type=str,
         dest="idattr",
-        action="append",
+        action=OverwriteUniqueAppendAction,
         default=["gene_id"],
         help="GTF attribute to be used as feature ID (default, "
         + "suitable for Ensembl GTF files: gene_id). All feature of the "
@@ -299,7 +342,7 @@ def _parse_sanitize_cmdline_arguments():
     pa.add_argument(
         "--additional-attr",
         type=str,
-        action="append",
+        action='append',
         dest='additional_attributes',
         default=[],
         help="Additional feature attributes (default: none, "
@@ -354,7 +397,7 @@ def _parse_sanitize_cmdline_arguments():
         "--samout",
         type=str,
         dest="samouts",
-        action="append",
+        action='append',
         default=[],
         help="Write out all SAM alignment records into "
         + "SAM/BAM files (one per input file needed), annotating each line "
@@ -445,13 +488,6 @@ def _parse_sanitize_cmdline_arguments():
     args = pa.parse_args()
-    # Deal with custom id_attribute lists. This is never shorter than 1 because
-    # gene_id is the default. However, if the option was called at least once,
-    # that should _override_ the default, which means skipping the first
-    # element (i.e., gene_id).
-    if len(args.idattr) > 1:
-        del args.idattr[0]
     # Never use more CPUs than files
     args.nprocesses = min(args.nprocesses, len(args.samfilenames))

{htseq-2.0.8 → htseq-2.0.9}/HTSeq/scripts/count_with_barcodes.py RENAMED Viewed

@@ -591,10 +591,13 @@ def main():
     pa.add_argument(
             "-t", "--type", type=str, dest="featuretype",
-            default="exon",
-            help="Feature type (3rd column in GTF file) to be used, " +
-            "all features of other type are ignored (default, suitable for Ensembl " +
-            "GTF files: exon)")
+            action="append", default=["exon"],
+            help="Feature type (3rd column in GTF file) to be used, all "
+            + "features of other type are ignored (default, suitable for"
+            + "Ensembl GTF files: exon). You can call this option multiple "
+            + "times. Features of all specified types will be included. "
+            + "E.g. to include both genes and pseudogenes you might use "
+            + "-t gene -t pseudogene")
     pa.add_argument(
             "-i", "--idattr", type=str, dest="idattr",

{htseq-2.0.8 → htseq-2.0.9}/HTSeq.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: HTSeq
-Version: 2.0.8
+Version: 2.0.9
 Summary: A framework to process and analyze data from high-throughput sequencing (HTS) assays
 Home-page: https://github.com/htseq
 Author: Simon Anders, Fabio Zanini

{htseq-2.0.8 → htseq-2.0.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: HTSeq
-Version: 2.0.8
+Version: 2.0.9
 Summary: A framework to process and analyze data from high-throughput sequencing (HTS) assays
 Home-page: https://github.com/htseq
 Author: Simon Anders, Fabio Zanini

htseq-2.0.9/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 2.0.9

HTSeq 2.0.8__tar.gz → 2.0.9__tar.gz

HTSeq 2.0.8tar.gz → 2.0.9tar.gz