PyPI - mgnify-pipelines-toolkit - Versions diffs - 1.2.7__tar.gz → 1.2.9__tar.gz - Mend

mgnify-pipelines-toolkit 1.2.7tar.gz → 1.2.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

Files changed (58) hide show

{mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mgnify_pipelines_toolkit
-Version: 1.2.7
+Version: 1.2.9
 Summary: Collection of scripts and tools for MGnify pipelines
 Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
 License: Apache Software License 2.0

{mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py RENAMED Viewed

@@ -300,7 +300,7 @@ def main():
     if paired_end:
         rev_fr.close()
-    if asv_dict:   # if there are matches between taxonomic and ASV annotations
+    if asv_dict:  # if there are matches between taxonomic and ASV annotations
         ref_db = ""
         if len(taxa_df.columns) == 9:

{mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py RENAMED Viewed

@@ -89,8 +89,10 @@ def main():
             if iter_cds and feature["type"] == "CDS":
                 # Annotate CDS features
-                start = int(feature["location"].split(":")[0][1:])
-                end = int(feature["location"].split(":")[1].split("]")[0])
+                # The > and < are removed to work with pseudogene outputs in Bakta
+                # A feature["location"] example that can be seen in Bakta outputs: "[81883:>82231](+)"
+                start = int(feature["location"].split(":")[0][1:].lstrip("<>"))
+                end = int(feature["location"].split(":")[1].split("]")[0].lstrip("<>"))
                 strand = feature["location"].split("(")[1][0]  # + or -
                 if not region_name or not (region_start <= end and start <= region_end):

{mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py RENAMED Viewed

@@ -110,10 +110,9 @@ def main():
         df_merged = df_merged[
             ["nearest_mibig", "nearest_mibig_class", "description", "count"]
         ]
-        df_merged = df_merged.rename(columns={
-            "Description": "description",
-            "Count": "count"
-        })
+        df_merged = df_merged.rename(
+            columns={"Description": "description", "Count": "count"}
+        )
         df_merged.to_csv(output_filename, sep="\t", index=False)

{mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/rawreads/study_summary_generator.py RENAMED Viewed

@@ -53,7 +53,7 @@ def cli():
 def get_file(
     run_acc: str, analyses_dir: Path, db_label: str
-) -> Union[Path, List[Path]]:
+) -> Union[Path, List[Path], None]:
     """Takes path information for a particular analysis and db_label combo, and returns any existing files.
     :param run_acc: Run accession for the tax file that should be retrieved.
@@ -84,7 +84,7 @@ def get_file(
         return
     analysis_file = Path(
-        f"{analyses_dir}/{run_acc}/{db_dir}/{db_label}/{run_acc}_{db_label}.txt"
+        f"{analyses_dir}/{run_acc}/{db_dir}/{db_label}/{run_acc}_{db_label}.txt.gz"
     )
     if not analysis_file.exists():
         logging.error(
@@ -119,20 +119,25 @@ def parse_one_tax_file(run_acc: str, tax_file: Path, db_label: str) -> pd.DataFr
     :rtype: pd.DataFrame
     """
-    tax_ranks = _MOTUS_TAX_RANKS if db_label == "mOTUs" else _SILVA_TAX_RANKS
+    tax_ranks = _MOTUS_TAX_RANKS if db_label == "motus" else _SILVA_TAX_RANKS
     res_df = pd.read_csv(tax_file, sep="\t", skiprows=1, names=["Count"] + tax_ranks)
     res_df = res_df.fillna("")
-    validate_dataframe(
-        res_df, MotusTaxonSchema if db_label == "mOTUs" else TaxonSchema, str(tax_file)
-    )
+    if res_df.shape[0] > 0:
+        validate_dataframe(
+            res_df,
+            MotusTaxonSchema if db_label == "motus" else TaxonSchema,
+            str(tax_file),
+        )
-    res_df["full_taxon"] = res_df.iloc[:, 1:].apply(
-        lambda x: ";".join(x).strip(";"), axis=1
+    res_df["full_taxon"] = [
+        ";".join(r[tax_ranks]).strip(";") for _, r in res_df.iterrows()
+    ]
+    final_df = (
+        res_df[["Count", "full_taxon"]]
+        .set_index("full_taxon")
+        .rename(columns={"Count": run_acc})
     )
-    final_df = res_df.iloc[:, [0, -1]]
-    final_df = final_df.set_index("full_taxon")
-    final_df.columns = [run_acc]
     return final_df
@@ -162,16 +167,20 @@ def parse_one_func_file(
     ).set_index("function")
     res_df = res_df.fillna(0)
-    validate_dataframe(res_df, FunctionProfileSchema, str(func_file))
+    if res_df.shape[0] > 0:
+        validate_dataframe(res_df, FunctionProfileSchema, str(func_file))
-    count_df = res_df[["read_count"]]
-    count_df.columns = [run_acc]
+    count_df = pd.DataFrame(res_df[["read_count"]]).rename(
+        columns={"read_count": run_acc}
+    )
-    depth_df = res_df[["coverage_depth"]]
-    depth_df.columns = [run_acc]
+    depth_df = pd.DataFrame(res_df[["coverage_depth"]]).rename(
+        columns={"coverage_depth": run_acc}
+    )
-    breadth_df = res_df[["coverage_breadth"]]
-    breadth_df.columns = [run_acc]
+    breadth_df = pd.DataFrame(res_df[["coverage_breadth"]]).rename(
+        columns={"coverage_breadth": run_acc}
+    )
     return count_df, depth_df, breadth_df
@@ -423,7 +432,9 @@ def merge_summaries(analyses_dir: str, output_prefix: str) -> None:
                         curr_df = pd.read_csv(summary, sep="\t", index_col=0)
                         res_df = res_df.join(curr_df, how="outer")
                         res_df = res_df.fillna(0)
-                        res_df = res_df.astype(int if table_type == "count" else float)
+                        res_df = res_df.astype(
+                            int if table_type == "read-count" else float
+                        )
                     res_df = res_df.reindex(sorted(res_df.columns), axis=1)
                     res_df.to_csv(

mgnify-pipelines-toolkit 1.2.7__tar.gz → 1.2.9__tar.gz

Potentially problematic release.

mgnify-pipelines-toolkit 1.2.7tar.gz → 1.2.9tar.gz