PyPI - mgnify-pipelines-toolkit - Versions diffs - 0.1.3__tar.gz → 0.1.4__tar.gz - Mend

@@ -223,6 +223,25 @@ def make_tax_assignment_dict_pr2(taxa_df, asv_dict):
     return tax_assignment_dict
+def generate_asv_count_dict(asv_dict):
+    res_dict = defaultdict(list)
+    for asv_id, count in asv_dict.items():
+        if count == 0:
+            continue
+        res_dict['asv'].append(asv_id)
+        res_dict['count'].append(count)
+    res_df = pd.DataFrame.from_dict(res_dict)
+    res_df = res_df.sort_values(by='asv', ascending=True)
+    res_df = res_df.sort_values(by='count', ascending=False)
+    return res_df
 def main():
     _TAXA, _FWD, _REV, _AMP, _HEADERS, _SAMPLE = parse_args()
@@ -250,23 +269,12 @@ def main():
     for line_fwd in fwd_fr:
         counter += 1
         line_fwd = line_fwd.strip()
-        fwd_asvs = line_fwd.split(",")
-        if paired_end:
-            line_rev = next(rev_fr).strip()
-            rev_asvs = line_rev.split(",")
-            asv_intersection = list(set(fwd_asvs).intersection(rev_asvs))
-            if len(asv_intersection) == 0:
-                continue
-            if len(asv_intersection) == 1 and asv_intersection[0] == "0":
-                continue
-        else:
-            asv_intersection = fwd_asvs
+        if line_fwd == '0':
+            continue
         if headers[counter] in amp_reads:
-            asv_dict[f"seq_{int(asv_intersection[0]) - 1}"] += 1
+            asv_dict[f"seq_{line_fwd}"] += 1
     fwd_fr.close()
     if paired_end:
@@ -285,6 +293,8 @@ def main():
         for tax_assignment, count in tax_assignment_dict.items():
             fw.write(f"{count}\t{tax_assignment}\n")
+    asv_count_df = generate_asv_count_dict(asv_dict)
+    asv_count_df.to_csv(f'./{_SAMPLE}_{amp_region}_asv_read_counts.tsv', sep='\t', index=False)
 if __name__ == "__main__":
     main()

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mgnify_pipelines_toolkit
-Version: 0.1.3
+Version: 0.1.4
 Summary: Collection of scripts and tools for MGnify pipelines
 Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
 License: Apache Software License 2.0

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mgnify_pipelines_toolkit
-Version: 0.1.3
+Version: 0.1.4
 Summary: Collection of scripts and tools for MGnify pipelines
 Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
 License: Apache Software License 2.0

@@ -1,6 +1,6 @@
 [project]
 name = "mgnify_pipelines_toolkit"
-version = "0.1.3"
+version = "0.1.4"
 readme = "README.md"
 license = {text = "Apache Software License 2.0"}
 authors = [

mgnify-pipelines-toolkit 0.1.3tar.gz → 0.1.4tar.gz

Potentially problematic release.

mgnify-pipelines-toolkit 0.1.3__tar.gz → 0.1.4__tar.gz

Potentially problematic release.

mgnify-pipelines-toolkit 0.1.3tar.gz → 0.1.4tar.gz