PyPI - mgnify-pipelines-toolkit - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

mgnify-pipelines-toolkit 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

Files changed (32) hide show

mgnify_pipelines_toolkit/analysis/amplicon/standard_primer_matching.py CHANGED Viewed

@@ -17,31 +17,48 @@
 import argparse
 from collections import defaultdict
 import os
-import subprocess
 from Bio.Seq import Seq
 import regex
-from mgnify_pipelines_toolkit.analysis.amplicon.amplicon_utils import primer_regex_query_builder, get_read_count, fetch_mcp
+from mgnify_pipelines_toolkit.analysis.amplicon.amplicon_utils import (
+    primer_regex_query_builder,
+    get_read_count,
+    fetch_mcp,
+)
 def parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--input", required=True, type=str, help="Path to merged FASTQ to look for primers")
-    parser.add_argument("-p", "--primers", required=True, type=str, help="Path to directory containing standard primers fasta files")
+    parser.add_argument(
+        "-i",
+        "--input",
+        required=True,
+        type=str,
+        help="Path to merged FASTQ to look for primers",
+    )
+    parser.add_argument(
+        "-p",
+        "--primers",
+        required=True,
+        type=str,
+        help="Path to directory containing standard primers fasta files",
+    )
     parser.add_argument("-s", "--sample", required=True, type=str, help="Sample ID")
     parser.add_argument("-o", "--output", required=True, type=str, help="Output path")
     args = parser.parse_args()
-    _INPUT = args.input
-    _PRIMERS = args.primers
-    _SAMPLE = args.sample
-    _OUTPUT = args.output
-    return _INPUT, _PRIMERS, _SAMPLE, _OUTPUT
+    input = args.input
+    primers = args.primers
+    sample = args.sample
+    output = args.output
+    return input, primers, sample, output
-def parse_std_primers(_PRIMERS):
+def parse_std_primers(primers):
     """
     Parse the library of standard primers.
@@ -60,19 +77,19 @@ def parse_std_primers(_PRIMERS):
     std_primer_dict_regex = defaultdict(defaultdict)
     std_primer_dict = defaultdict(defaultdict)
-    dir = os.listdir(_PRIMERS)
-    dir = [ f'{_PRIMERS}/{path}' for path in dir ]
+    dir = os.listdir(primers)
+    dir = [f"{primers}/{path}" for path in dir]
     rev_flag = False
     for path in dir:
-        region = path.split('/')[-1].split('.')[0]
-        with open(path, 'r') as fr:
-            key = ''
+        region = path.split("/")[-1].split(".")[0]
+        with open(path, "r") as fr:
+            key = ""
             for line in fr:
                 line = line.strip()
-                if line[0] == '>':
-                    if 'R' in line: # If a primer is a reverse primer
+                if line[0] == ">":
+                    if "R" in line:  # If a primer is a reverse primer
                         rev_flag = True
                     key = line[1:]
                 else:
@@ -87,6 +104,7 @@ def parse_std_primers(_PRIMERS):
     return std_primer_dict_regex, std_primer_dict
 def run_primer_matching_once(input_path, input_primer, rev=False):
     """
     Run primer matching using the regex package.
@@ -103,11 +121,12 @@ def run_primer_matching_once(input_path, input_primer, rev=False):
     for mcp in mcp_count_dict.keys():
         mcp = mcp.strip()
         res = regex.match(input_primer, mcp)
-        if res != None:
+        if res is not None:
             match_count += mcp_count_dict[mcp]
     return match_count
 def get_primer_props(std_primer_dict_regex, input_path):
     """
     Look for the standard primers in the input fastq file.
@@ -122,93 +141,107 @@ def get_primer_props(std_primer_dict_regex, input_path):
         max_primers: dictionary containing the F and/or R primers that were chosen
     """
-    threshold = 0.60 # Arbitrary threshold for collecting a matched primer
-    read_count = get_read_count(input_path, 'fastq') # Get read count of fastq file to calculate proportion with
+    threshold = 0.60  # Arbitrary threshold for collecting a matched primer
+    read_count = get_read_count(
+        input_path, "fastq"
+    )  # Get read count of fastq file to calculate proportion with
     res_dict = defaultdict(defaultdict)
     # Loop through every primer region
     for region, primer in std_primer_dict_regex.items():
-        res_dict[region]['F'] = {}
-        res_dict[region]['R'] = {}
+        res_dict[region]["F"] = {}
+        res_dict[region]["R"] = {}
         # Loop through every primer of a certain region
         for primer_name, primer_seq in primer.items():
-            region_name_str = f'{region};{primer_name}'
+            region_name_str = f"{region};{primer_name}"
             primer_count = 0.0
-            if 'F' in primer_name:
-                primer_count = run_primer_matching_once(input_path, primer_seq, rev=False) # Get proportion of a F primer with fuzzy regex matching
-            elif 'R' in primer_name:
-                primer_count = run_primer_matching_once(input_path, primer_seq, rev=True) # Get proportion of a R primer with fuzzy regex matching
+            if "F" in primer_name:
+                primer_count = run_primer_matching_once(
+                    input_path, primer_seq, rev=False
+                )  # Get proportion of a F primer with fuzzy regex matching
+            elif "R" in primer_name:
+                primer_count = run_primer_matching_once(
+                    input_path, primer_seq, rev=True
+                )  # Get proportion of a R primer with fuzzy regex matching
             try:
                 primer_prop = primer_count / read_count
             except ZeroDivisionError:
                 primer_prop = 0
-            if 'F' in primer_name:
-                if primer_prop > threshold: # Only collect primer if it's above threshold
-                    res_dict[region]['F'][primer_name] = primer_prop
-            elif 'R' in primer_name:
-                if primer_prop > threshold: # Only collect primer if it's above threshold
-                    res_dict[region]['R'][primer_name] = primer_prop
+            if "F" in primer_name:
+                if (
+                    primer_prop > threshold
+                ):  # Only collect primer if it's above threshold
+                    res_dict[region]["F"][primer_name] = primer_prop
+            elif "R" in primer_name:
+                if (
+                    primer_prop > threshold
+                ):  # Only collect primer if it's above threshold
+                    res_dict[region]["R"][primer_name] = primer_prop
+            print(f"{region_name_str}: {primer_prop}")
-            print(f'{region_name_str}: {primer_prop}')
         # If an F or/and R primer wasn't found then just remove it from the dictionary
-        if res_dict[region]['F'] == {}:
-            res_dict[region].pop('F')
-        if res_dict[region]['R'] == {}:
-            res_dict[region].pop('R')
+        if res_dict[region]["F"] == {}:
+            res_dict[region].pop("F")
+        if res_dict[region]["R"] == {}:
+            res_dict[region].pop("R")
     singles = defaultdict(str)
     doubles = defaultdict(list)
-    double_status = False # Flag for whether primers were found on both strands
+    double_status = False  # Flag for whether primers were found on both strands
     #  Loop through every collected primer and put primers in singles or doubles
     for region in res_dict.keys():
         strands = res_dict[region]
         for strand in strands.keys():
             primers = strands[strand]
             max_prop = 0
-            max_name = ''
+            max_name = ""
             for primer_name, prop in primers.items():
                 if prop > max_prop:
                     max_prop = prop
                     max_name = primer_name
             if len(strands.keys()) == 2:
                 double_status = True
                 doubles[region].append({max_name: max_prop})
             elif len(strands.keys()) == 1:
                 singles[region] = {max_name: max_prop}
-    max_region = ''
+    max_region = ""
     max_primers = {}
     max_mean_prop = 0
     # if at least one pair of primers was collected
     if double_status:
-        for region in doubles: # Loop through all pairs of primers and choose the best one
+        for (
+            region
+        ) in doubles:  # Loop through all pairs of primers and choose the best one
             primers = doubles[region]
             f_primer_name = list(primers[0].keys())[0]
             r_primer_name = list(primers[1].keys())[0]
             f_primer_prop = primers[0][f_primer_name]
             r_primer_prop = primers[1][r_primer_name]
             mean_prop = (f_primer_prop + r_primer_prop) / 2.0
             if mean_prop > max_mean_prop:
                 max_mean_prop = mean_prop
                 max_region = region
-                max_primers = [{f_primer_name: f_primer_prop}, {r_primer_name: r_primer_prop}]
+                max_primers = [
+                    {f_primer_name: f_primer_prop},
+                    {r_primer_name: r_primer_prop},
+                ]
     else:
-        for region in singles: # Choose the best single primer
+        for region in singles:  # Choose the best single primer
             primer = singles[region]
             primer_name = list(primer.keys())[0]
             prop = primer[primer_name]
@@ -217,23 +250,22 @@ def get_primer_props(std_primer_dict_regex, input_path):
                 max_region = region
                 max_primers = {primer_name: prop}
-    if max_region == '':
-        print('No standard library primers!')
-        return([])
+    if max_region == "":
+        print("No standard library primers!")
+        return []
     elif double_status:
-        print('Standard library primers found!')
-        print(f'Region: {max_region}')
-        print(f'Forward Primer: {max_primers[0]}')
-        print(f'Reverse Primer: {max_primers[1]}')
+        print("Standard library primers found!")
+        print(f"Region: {max_region}")
+        print(f"Forward Primer: {max_primers[0]}")
+        print(f"Reverse Primer: {max_primers[1]}")
-        return([max_region, max_primers[0], max_primers[1]])
+        return [max_region, max_primers[0], max_primers[1]]
     else:
-        print('Standard library primer found on one strand!')
-        print(f'Region: {max_region}')
-        print(f'Primer: {max_primers}')
-        return([max_region, max_primers])
+        print("Standard library primer found on one strand!")
+        print(f"Region: {max_region}")
+        print(f"Primer: {max_primers}")
+        return [max_region, max_primers]
 def save_out(results, sample_id, output, std_primer_dict):
@@ -241,24 +273,26 @@ def save_out(results, sample_id, output, std_primer_dict):
     Save found std primers into a fasta file.
     """
-    with open(f'{output}/{sample_id}_std_primer_out.txt', 'w') as fw_out, open(f'{output}/{sample_id}_std_primers.fasta', 'w') as fw_seq:
+    with (
+        open(f"{output}/{sample_id}_std_primer_out.txt", "w") as fw_out,
+        open(f"{output}/{sample_id}_std_primers.fasta", "w") as fw_seq,
+    ):
         if results == []:
-            fw_out.write(f'')
-            fw_seq.write(f'')
+            fw_out.write("")
+            fw_seq.write("")
         elif len(results) == 2:
             region = results[0]
             primer_name = list(results[1].keys())[0]
             primer_prop = results[1][list(results[1].keys())[0]]
             seq = std_primer_dict[region][primer_name]
-            if 'R' in primer_name:
+            if "R" in primer_name:
                 seq = str(Seq(seq).complement())
-            fw_out.write(f'{region}\n')
-            fw_out.write(f'{primer_name}: {primer_prop}')
+            fw_out.write(f"{region}\n")
+            fw_out.write(f"{primer_name}: {primer_prop}")
+            fw_seq.write(f">{primer_name}\n{seq}")
-            fw_seq.write(f'>{primer_name}\n{seq}')
         elif len(results) == 3:
             region = results[0]
             f_primer_name = list(results[1].keys())[0]
@@ -268,22 +302,26 @@ def save_out(results, sample_id, output, std_primer_dict):
             r_primer_prop = results[2][list(results[2].keys())[0]]
             r_seq = std_primer_dict[region][r_primer_name]
             r_seq = str(Seq(r_seq).complement())
-            fw_out.write(f'{region}\n')
-            fw_out.write(f'{f_primer_name}: {f_primer_prop}\n')
-            fw_out.write(f'{r_primer_name}: {r_primer_prop}')
+            fw_out.write(f"{region}\n")
+            fw_out.write(f"{f_primer_name}: {f_primer_prop}\n")
+            fw_out.write(f"{r_primer_name}: {r_primer_prop}")
+            fw_seq.write(f">{f_primer_name}\n{f_seq}\n")
+            fw_seq.write(f">{r_primer_name}\n{r_seq}\n")
-            fw_seq.write(f'>{f_primer_name}\n{f_seq}\n')
-            fw_seq.write(f'>{r_primer_name}\n{r_seq}\n')
 def main():
-    _INPUT, _PRIMERS, _SAMPLE, _OUTPUT = parse_args()
-    std_primer_dict_regex, std_primer_dict = parse_std_primers(_PRIMERS) # Parse std primer library into dictionaries
-    results = get_primer_props(std_primer_dict_regex, _INPUT) # Find all the std primers in the input and select most common
-    save_out(results, _SAMPLE, _OUTPUT, std_primer_dict)
+    input, primers, sample, output = parse_args()
+    std_primer_dict_regex, std_primer_dict = parse_std_primers(
+        primers
+    )  # Parse std primer library into dictionaries
+    results = get_primer_props(
+        std_primer_dict_regex, input
+    )  # Find all the std primers in the input and select most common
+    save_out(results, sample, output, std_primer_dict)
 if __name__ == "__main__":
-    main()
+    main()

mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py ADDED Viewed

@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2024 EMBL - European Bioinformatics Institute
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+from collections import defaultdict
+import gzip
+import json
+import logging
+logging.basicConfig(level=logging.DEBUG)
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Script that sanity checks whether the strand suffix of a FASTQ file matches the headers inside the FASTQ file."
+    )
+    parser.add_argument(
+        "-f",
+        "--fwd",
+        required=True,
+        type=str,
+        help="Input forward read headers file (PE) OR SE read file",
+    )
+    parser.add_argument(
+        "-r",
+        "--rev",
+        required=False,
+        type=str,
+        help="Input reverse read headers file (PE)",
+    )
+    parser.add_argument("-s", "--sample", required=True, type=str, help="Sample ID")
+    parser.add_argument("-o", "--output", required=True, type=str, help="Output")
+    args = parser.parse_args()
+    fwd = args.fwd
+    rev = args.rev
+    sample = args.sample
+    output = args.output
+    return fwd, rev, sample, output
+def choose_open_func(file_path):
+    open_func = open
+    if file_path[-2:] == "gz":
+        open_func = gzip.open
+    return open_func
+def main():
+    fwd, rev, sample, output = parse_args()
+    files_to_parse = []
+    if "_1" in fwd:
+        if not rev:
+            logging.error(
+                'No reverse file given, yet given forward file has the "_1" suffix implying it\'s paired-end. '
+                + "Either supply the reverse file, or supply a single-end file."
+            )
+        elif "_2" not in rev:
+            logging.error(
+                'The expected suffix "_2" for a supplied reverse file is missing. Please verify your inputs.'
+            )
+        else:
+            files_to_parse = [fwd, rev]
+    else:
+        files_to_parse = [fwd]
+    open_func = choose_open_func(
+        fwd
+    )  # Choose between gzip.open() and open() by checking the file extension
+    reads_with_err = defaultdict(list)
+    for file in files_to_parse:
+        header_str = ""
+        if "_1" in file:
+            header_str = "/1"
+        elif "_2" in file:
+            header_str = "/2"
+        else:
+            header_str = "/1"  # SE files still have "/1" in the headers
+        for counter, line in enumerate(open_func(file)):
+            if counter % 4 == 0:  # Only do stuff every four lines to hit the header
+                line = line.decode("ascii").strip()
+                curr_read_strand = line[-2:]
+                if curr_read_strand != header_str:
+                    reads_with_err[file].append(line)
+                    reads_with_err["total"].append(1)
+    if len(reads_with_err) != 0:
+        num_of_reads_with_err = len(reads_with_err["total"])
+        reads_with_err["total"] = num_of_reads_with_err
+        logging.error(
+            f"Found {num_of_reads_with_err} reads with header strands that don't match file suffix. See log file at {output}/{sample}_suffix_header_err.json"  # noqa: E501
+        )
+        with open(
+            f"{output}/{sample}_suffix_header_err.json", "w"
+        ) as fw:  # Writes JSON file containing the headers of reads with errors
+            json.dump(reads_with_err, fw)
+    else:
+        with open(
+            f"{output}/{sample}_suffix_header_err.json", "w"
+        ) as fw:  # Creates an empty file if there are no errors
+            print("No errors.")
+if __name__ == "__main__":
+    main()

mgnify_pipelines_toolkit/analysis/shared/get_subunits.py CHANGED Viewed

@@ -36,17 +36,31 @@ LSU_rRNA_eukarya = "LSU_rRNA_eukarya"
 def set_model_names(prefix, name, directory):
     pattern_dict = {}
-    pattern_dict[SSU] = os.path.join(directory, f'{name}_SSU.fasta')
-    pattern_dict[SSU_rRNA_archaea] = os.path.join(directory, f'{prefix}{name}_{SSU_rRNA_archaea}.RF01959.fa')
-    pattern_dict[SSU_rRNA_bacteria] = os.path.join(directory, f'{prefix}{name}_{SSU_rRNA_bacteria}.RF00177.fa')
-    pattern_dict[SSU_rRNA_eukarya] = os.path.join(directory, f'{prefix}{name}_{SSU_rRNA_eukarya}.RF01960.fa')
-    pattern_dict[SSU_rRNA_microsporidia] = os.path.join(directory, f'{prefix}{name}_{SSU_rRNA_microsporidia}.RF02542.fa')
-    pattern_dict[LSU] = os.path.join(directory, f'{name}_LSU.fasta')
-    pattern_dict[LSU_rRNA_archaea] = os.path.join(directory, f'{prefix}{name}_{LSU_rRNA_archaea}.RF02540.fa')
-    pattern_dict[LSU_rRNA_bacteria] = os.path.join(directory, f'{prefix}{name}_{LSU_rRNA_bacteria}.RF02541.fa')
-    pattern_dict[LSU_rRNA_eukarya] = os.path.join(directory, f'{prefix}{name}_{LSU_rRNA_eukarya}.RF02543.fa')
-    pattern_dict[Seq5S] = os.path.join(directory, f'{name}_5S.fa')
-    pattern_dict[Seq5_8S] = os.path.join(directory, f'{name}_5_8S.fa')
+    pattern_dict[SSU] = os.path.join(directory, f"{name}_SSU.fasta")
+    pattern_dict[SSU_rRNA_archaea] = os.path.join(
+        directory, f"{prefix}{name}_{SSU_rRNA_archaea}.RF01959.fa"
+    )
+    pattern_dict[SSU_rRNA_bacteria] = os.path.join(
+        directory, f"{prefix}{name}_{SSU_rRNA_bacteria}.RF00177.fa"
+    )
+    pattern_dict[SSU_rRNA_eukarya] = os.path.join(
+        directory, f"{prefix}{name}_{SSU_rRNA_eukarya}.RF01960.fa"
+    )
+    pattern_dict[SSU_rRNA_microsporidia] = os.path.join(
+        directory, f"{prefix}{name}_{SSU_rRNA_microsporidia}.RF02542.fa"
+    )
+    pattern_dict[LSU] = os.path.join(directory, f"{name}_LSU.fasta")
+    pattern_dict[LSU_rRNA_archaea] = os.path.join(
+        directory, f"{prefix}{name}_{LSU_rRNA_archaea}.RF02540.fa"
+    )
+    pattern_dict[LSU_rRNA_bacteria] = os.path.join(
+        directory, f"{prefix}{name}_{LSU_rRNA_bacteria}.RF02541.fa"
+    )
+    pattern_dict[LSU_rRNA_eukarya] = os.path.join(
+        directory, f"{prefix}{name}_{LSU_rRNA_eukarya}.RF02543.fa"
+    )
+    pattern_dict[Seq5S] = os.path.join(directory, f"{name}_5S.fa")
+    pattern_dict[Seq5_8S] = os.path.join(directory, f"{name}_5_8S.fa")
     return pattern_dict
@@ -69,42 +83,57 @@ def main():
     directory = "sequence-categorisation"
     if not os.path.exists(directory):
         os.makedirs(directory)
-    directory_ncRNA = os.path.join("sequence-categorisation", "ncRNA")
-    if not os.path.exists(directory_ncRNA):
-        os.makedirs(directory_ncRNA)
+    directory_ncrna = os.path.join("sequence-categorisation", "ncRNA")
+    if not os.path.exists(directory_ncrna):
+        os.makedirs(directory_ncrna)
-    print('Start fasta mode')
+    print("Start fasta mode")
     pattern_dict = set_model_names(prefix, name, directory)
-    coding_rna = [SSU_rRNA_archaea, SSU_rRNA_bacteria, SSU_rRNA_eukarya, SSU_rRNA_microsporidia,
-                  LSU_rRNA_archaea, LSU_rRNA_bacteria, LSU_rRNA_eukarya, Seq5S, Seq5_8S]
+    coding_rna = [
+        SSU_rRNA_archaea,
+        SSU_rRNA_bacteria,
+        SSU_rRNA_eukarya,
+        SSU_rRNA_microsporidia,
+        LSU_rRNA_archaea,
+        LSU_rRNA_bacteria,
+        LSU_rRNA_eukarya,
+        Seq5S,
+        Seq5_8S,
+    ]
     open_files = {}
     for record in SeqIO.parse(args.input, "fasta"):
-        model = '-'.join(record.id.split('/')[0].split('-')[1:])
+        model = "-".join(record.id.split("/")[0].split("-")[1:])
         if model in coding_rna:
             filename = pattern_dict[model]
         else:
-            filename = os.path.join(directory_ncRNA, f'{prefix}{name}_{model}.fasta')
+            filename = os.path.join(directory_ncrna, f"{prefix}{name}_{model}.fasta")
         if model not in open_files:
-            file_out = open(filename, 'w')
+            file_out = open(filename, "w")
             open_files[model] = file_out
         SeqIO.write(record, open_files[model], "fasta")
-        if model in (SSU_rRNA_archaea, SSU_rRNA_bacteria, SSU_rRNA_eukarya, SSU_rRNA_microsporidia):
+        if model in (
+            SSU_rRNA_archaea,
+            SSU_rRNA_bacteria,
+            SSU_rRNA_eukarya,
+            SSU_rRNA_microsporidia,
+        ):
             if SSU not in open_files:
-                file_out = open(pattern_dict[SSU], 'w')
+                file_out = open(pattern_dict[SSU], "w")
                 open_files[SSU] = file_out
             SeqIO.write(record, open_files[SSU], "fasta")
         if model in (LSU_rRNA_archaea, LSU_rRNA_bacteria, LSU_rRNA_eukarya):
             if LSU not in open_files:
-                file_out = open(pattern_dict[LSU], 'w')
+                file_out = open(pattern_dict[LSU], "w")
                 open_files[LSU] = file_out
             SeqIO.write(record, open_files[LSU], "fasta")
     for item in open_files:
         open_files[item].close()
-    if len(os.listdir(directory_ncRNA)) == 0:
-        os.rmdir(directory_ncRNA)
+    if len(os.listdir(directory_ncrna)) == 0:
+        os.rmdir(directory_ncrna)
 if __name__ == "__main__":
-    main()
+    main()

mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py CHANGED Viewed

@@ -16,41 +16,47 @@
 import argparse
 import sys
-import os
-import gzip
-from Bio import SeqIO
 def main():
     parser = argparse.ArgumentParser(description="Extract lsu, ssu and 5s")
-    parser.add_argument("-i", "--input", dest="input", help="Input fasta file", required=True)
+    parser.add_argument(
+        "-i", "--input", dest="input", help="Input fasta file", required=True
+    )
     parser.add_argument("-l", "--lsu", dest="lsu", help="LSU pattern", required=True)
     parser.add_argument("-s", "--ssu", dest="ssu", help="SSU pattern", required=True)
-    SSU_coords = "SSU_coords"
-    LSU_coords = "LSU_coords"
-    SSU_count = 0
-    LSU_count = 0
+    ssu_coords = "SSU_coords"
+    lsu_coords = "LSU_coords"
+    ssu_count = 0
+    lsu_count = 0
     if len(sys.argv) == 1:
         parser.print_help()
     else:
         args = parser.parse_args()
-        with open(SSU_coords, 'w') as out_ssu, open(LSU_coords, 'w') as out_lsu, open(args.input, 'r') as input:
+        with (
+            open(ssu_coords, "w") as out_ssu,
+            open(lsu_coords, "w") as out_lsu,
+            open(args.input, "r") as input,
+        ):
             for line in input:
                 if args.lsu in line:
                     out_lsu.write(line)
-                    LSU_count += 1
+                    lsu_count += 1
                 elif args.ssu in line:
                     out_ssu.write(line)
-                    SSU_count += 1
-        with open("RNA-counts", 'w') as count:
-            count.write("LSU count\t" + str(LSU_count) + "\nSSU count\t" + str(SSU_count))
+                    ssu_count += 1
+        with open("RNA-counts", "w") as count:
+            count.write(
+                "LSU count\t" + str(lsu_count) + "\nSSU count\t" + str(ssu_count)
+            )
     out_ssu.close()
     out_lsu.close()
     count.close()
 if __name__ == "__main__":
     main()

mgnify-pipelines-toolkit 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

Potentially problematic release.

mgnify-pipelines-toolkit 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl