PyPI - mgnify-pipelines-toolkit - Versions diffs - 1.0.5__tar.gz → 1.0.7__tar.gz - Mend

@@ -22,7 +22,15 @@ from pathlib import Path
 logging.basicConfig(level=logging.INFO)
-def main(standard_file, substrate_file, outfile, dbcan_version):
+def main():
+    args = parse_args()
+    standard_file, substrate_file, outfile, dbcan_ver = (
+        args.standard_file,
+        args.substrate_file,
+        args.outfile,
+        args.dbcan_ver,
+    )
     standard_path = Path(standard_file)
     substrate_path = Path(substrate_file)
@@ -36,12 +44,12 @@ def main(standard_file, substrate_file, outfile, dbcan_version):
     substrates = load_substrates(substrate_path)
     cgc_locations = load_cgcs(standard_path)
-    print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations)
+    print_gff(standard_path, outfile, dbcan_ver, substrates, cgc_locations)
 def load_cgcs(standard_path):
     cgc_locations = dict()
-    with fileinput.hook_compressed(standard_path, "rt") as file_in:
+    with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
         for line in file_in:
             if not line.startswith("CGC#"):
                 cgc, _, contig, _, start, end, _, _ = line.strip().split("\t")
@@ -64,7 +72,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
     with open(outfile, "w") as file_out:
         file_out.write("##gff-version 3\n")
         cgcs_printed = list()
-        with fileinput.hook_compressed(standard_path, "rt") as file_in:
+        with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
             for line in file_in:
                 if not line.startswith("CGC#"):
                     cgc, gene_type, contig, prot_id, start, end, strand, protein_fam = (
@@ -99,7 +107,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
 def load_substrates(substrate_path):
     substrates = dict()
-    with fileinput.hook_compressed(substrate_path, "rt") as file_in:
+    with fileinput.hook_compressed(substrate_path, "r", encoding="utf-8") as file_in:
         for line in file_in:
             if not line.startswith("#"):
                 parts = line.strip().split("\t")
@@ -158,5 +166,4 @@ def parse_args():
 if __name__ == "__main__":
-    args = parse_args()
-    main(args.standard_file, args.substrate_file, args.outfile, args.dbcan_ver)
+    main()

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mgnify_pipelines_toolkit
-Version: 1.0.5
+Version: 1.0.7
 Summary: Collection of scripts and tools for MGnify pipelines
 Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
 License: Apache Software License 2.0

@@ -29,24 +29,27 @@ from mgnify_pipelines_toolkit.analysis.assembly.gff_file_utils import (
 )
-def main(
-    gff,
-    ipr_file,
-    eggnog_file,
-    sanntis_file,
-    crispr_file,
-    amr_file,
-    antismash_file,
-    gecco_file,
-    dbcan_file,
-    dbcan_cazys_file,
-    defense_finder_file,
-    pseudofinder_file,
-    rfam_file,
-    trnascan_file,
-    outfile,
-    pseudogene_report_file,
-):
+def main():
+    (
+        gff,
+        ipr_file,
+        eggnog_file,
+        sanntis_file,
+        crispr_file,
+        amr_file,
+        antismash_file,
+        gecco_file,
+        dbcan_file,
+        dbcan_cazys_file,
+        defense_finder_file,
+        pseudofinder_file,
+        rfam_file,
+        trnascan_file,
+        outfile,
+        pseudogene_report_file,
+    ) = parse_args()
     # load annotations and add them to existing CDS
     # here header contains leading GFF lines starting with "#",
     # main_gff_extended is a dictionary that contains GFF lines with added in additional annotations
@@ -163,12 +166,8 @@ def parse_args():
         "--pseudogene-report", help="Pseudogene report filename", required=False
     )
-    return parser.parse_args()
-if __name__ == "__main__":
-    args = parse_args()
-    main(
+    args = parser.parse_args()
+    return (
         args.gff_input,
         args.ips,
         args.eggnog,
@@ -186,3 +185,7 @@ if __name__ == "__main__":
         args.outfile,
         args.pseudogene_report,
     )
+if __name__ == "__main__":
+    main()

@@ -24,7 +24,16 @@ import re
 logging.basicConfig(level=logging.INFO)
-def main(hmm_file, overview_file, genome_gff, outfile, dbcan_version):
+def main():
+    args = parse_args()
+    hmm_file, overview_file, genome_gff, outfile, dbcan_ver = (
+        args.hmm_file,
+        args.overview_file,
+        args.genome_gff,
+        args.outfile,
+        args.dbcan_ver,
+    )
     hmm_path = Path(hmm_file)
     overview_path = Path(overview_file)
@@ -38,12 +47,12 @@ def main(hmm_file, overview_file, genome_gff, outfile, dbcan_version):
     substrates = load_substrates(hmm_path)
     genome_gff_lines = load_gff(genome_gff)
-    print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_lines)
+    print_gff(overview_file, outfile, dbcan_ver, substrates, genome_gff_lines)
 def load_gff(gff):
     genome_gff_lines = dict()
-    with fileinput.hook_compressed(gff, "rt") as gff:
+    with fileinput.hook_compressed(gff, "r", encoding="utf-8") as gff:
         for line in gff:
             if line.startswith("##FASTA"):
                 return genome_gff_lines
@@ -72,7 +81,7 @@ def load_gff(gff):
 def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_lines):
     with open(outfile, "w") as file_out:
         file_out.write("##gff-version 3\n")
-        with fileinput.hook_compressed(overview_file, "rt") as file_in:
+        with fileinput.hook_compressed(overview_file, "r", encoding="utf-8") as file_in:
             for line in file_in:
                 if line.startswith("MGYG") or line.startswith("ERZ"):
                     (
@@ -142,7 +151,7 @@ def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_line
 def load_substrates(hmm_path):
     substrates = dict()
-    with fileinput.hook_compressed(hmm_path, "rt") as file_in:
+    with fileinput.hook_compressed(hmm_path, "r", encoding="utf-8") as file_in:
         header = next(file_in)
         header_fields = header.strip().split("\t")
         substrate_idx = header_fields.index("Substrate")
@@ -205,7 +214,4 @@ def parse_args():
 if __name__ == "__main__":
-    args = parse_args()
-    main(
-        args.hmm_file, args.overview_file, args.genome_gff, args.outfile, args.dbcan_ver
-    )
+    main()

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mgnify_pipelines_toolkit
-Version: 1.0.5
+Version: 1.0.7
 Summary: Collection of scripts and tools for MGnify pipelines
 Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
 License: Apache Software License 2.0

@@ -1,6 +1,6 @@
 [project]
 name = "mgnify_pipelines_toolkit"
-version = "1.0.5"
+version = "1.0.7"
 readme = "README.md"
 license = {text = "Apache Software License 2.0"}
 authors = [

mgnify-pipelines-toolkit 1.0.5tar.gz → 1.0.7tar.gz

Potentially problematic release.

mgnify-pipelines-toolkit 1.0.5__tar.gz → 1.0.7__tar.gz

Potentially problematic release.

mgnify-pipelines-toolkit 1.0.5tar.gz → 1.0.7tar.gz