mgnify-pipelines-toolkit 1.2.7__tar.gz → 1.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/PKG-INFO +1 -1
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +1 -1
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +4 -2
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +3 -4
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/rawreads/study_summary_generator.py +30 -19
- mgnify_pipelines_toolkit-1.2.9/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +749 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/constants/db_labels.py +2 -2
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/constants/tax_ranks.py +1 -9
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/schemas/schemas.py +2 -2
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit.egg-info/PKG-INFO +1 -1
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit.egg-info/entry_points.txt +1 -1
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/pyproject.toml +2 -2
- mgnify_pipelines_toolkit-1.2.7/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +0 -240
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/LICENSE +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/README.md +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/genomes/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/shared/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/constants/ncrna.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/constants/regex_fasta_header.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/constants/thresholds.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/constants/var_region_coordinates.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/utils/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit/utils/get_mpt_version.py +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit.egg-info/SOURCES.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit.egg-info/dependency_links.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit.egg-info/requires.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/mgnify_pipelines_toolkit.egg-info/top_level.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.9}/setup.cfg +0 -0
|
@@ -300,7 +300,7 @@ def main():
|
|
|
300
300
|
if paired_end:
|
|
301
301
|
rev_fr.close()
|
|
302
302
|
|
|
303
|
-
if asv_dict:
|
|
303
|
+
if asv_dict: # if there are matches between taxonomic and ASV annotations
|
|
304
304
|
ref_db = ""
|
|
305
305
|
|
|
306
306
|
if len(taxa_df.columns) == 9:
|
|
@@ -89,8 +89,10 @@ def main():
|
|
|
89
89
|
if iter_cds and feature["type"] == "CDS":
|
|
90
90
|
# Annotate CDS features
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
# The > and < are removed to work with pseudogene outputs in Bakta
|
|
93
|
+
# A feature["location"] example that can be seen in Bakta outputs: "[81883:>82231](+)"
|
|
94
|
+
start = int(feature["location"].split(":")[0][1:].lstrip("<>"))
|
|
95
|
+
end = int(feature["location"].split(":")[1].split("]")[0].lstrip("<>"))
|
|
94
96
|
strand = feature["location"].split("(")[1][0] # + or -
|
|
95
97
|
|
|
96
98
|
if not region_name or not (region_start <= end and start <= region_end):
|
|
@@ -110,10 +110,9 @@ def main():
|
|
|
110
110
|
df_merged = df_merged[
|
|
111
111
|
["nearest_mibig", "nearest_mibig_class", "description", "count"]
|
|
112
112
|
]
|
|
113
|
-
df_merged = df_merged.rename(
|
|
114
|
-
"Description": "description",
|
|
115
|
-
|
|
116
|
-
})
|
|
113
|
+
df_merged = df_merged.rename(
|
|
114
|
+
columns={"Description": "description", "Count": "count"}
|
|
115
|
+
)
|
|
117
116
|
df_merged.to_csv(output_filename, sep="\t", index=False)
|
|
118
117
|
|
|
119
118
|
|
|
@@ -53,7 +53,7 @@ def cli():
|
|
|
53
53
|
|
|
54
54
|
def get_file(
|
|
55
55
|
run_acc: str, analyses_dir: Path, db_label: str
|
|
56
|
-
) -> Union[Path, List[Path]]:
|
|
56
|
+
) -> Union[Path, List[Path], None]:
|
|
57
57
|
"""Takes path information for a particular analysis and db_label combo, and returns any existing files.
|
|
58
58
|
|
|
59
59
|
:param run_acc: Run accession for the tax file that should be retrieved.
|
|
@@ -84,7 +84,7 @@ def get_file(
|
|
|
84
84
|
return
|
|
85
85
|
|
|
86
86
|
analysis_file = Path(
|
|
87
|
-
f"{analyses_dir}/{run_acc}/{db_dir}/{db_label}/{run_acc}_{db_label}.txt"
|
|
87
|
+
f"{analyses_dir}/{run_acc}/{db_dir}/{db_label}/{run_acc}_{db_label}.txt.gz"
|
|
88
88
|
)
|
|
89
89
|
if not analysis_file.exists():
|
|
90
90
|
logging.error(
|
|
@@ -119,20 +119,25 @@ def parse_one_tax_file(run_acc: str, tax_file: Path, db_label: str) -> pd.DataFr
|
|
|
119
119
|
:rtype: pd.DataFrame
|
|
120
120
|
"""
|
|
121
121
|
|
|
122
|
-
tax_ranks = _MOTUS_TAX_RANKS if db_label == "
|
|
122
|
+
tax_ranks = _MOTUS_TAX_RANKS if db_label == "motus" else _SILVA_TAX_RANKS
|
|
123
123
|
res_df = pd.read_csv(tax_file, sep="\t", skiprows=1, names=["Count"] + tax_ranks)
|
|
124
124
|
res_df = res_df.fillna("")
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
126
|
+
if res_df.shape[0] > 0:
|
|
127
|
+
validate_dataframe(
|
|
128
|
+
res_df,
|
|
129
|
+
MotusTaxonSchema if db_label == "motus" else TaxonSchema,
|
|
130
|
+
str(tax_file),
|
|
131
|
+
)
|
|
129
132
|
|
|
130
|
-
res_df["full_taxon"] =
|
|
131
|
-
|
|
133
|
+
res_df["full_taxon"] = [
|
|
134
|
+
";".join(r[tax_ranks]).strip(";") for _, r in res_df.iterrows()
|
|
135
|
+
]
|
|
136
|
+
final_df = (
|
|
137
|
+
res_df[["Count", "full_taxon"]]
|
|
138
|
+
.set_index("full_taxon")
|
|
139
|
+
.rename(columns={"Count": run_acc})
|
|
132
140
|
)
|
|
133
|
-
final_df = res_df.iloc[:, [0, -1]]
|
|
134
|
-
final_df = final_df.set_index("full_taxon")
|
|
135
|
-
final_df.columns = [run_acc]
|
|
136
141
|
|
|
137
142
|
return final_df
|
|
138
143
|
|
|
@@ -162,16 +167,20 @@ def parse_one_func_file(
|
|
|
162
167
|
).set_index("function")
|
|
163
168
|
res_df = res_df.fillna(0)
|
|
164
169
|
|
|
165
|
-
|
|
170
|
+
if res_df.shape[0] > 0:
|
|
171
|
+
validate_dataframe(res_df, FunctionProfileSchema, str(func_file))
|
|
166
172
|
|
|
167
|
-
count_df = res_df[["read_count"]]
|
|
168
|
-
|
|
173
|
+
count_df = pd.DataFrame(res_df[["read_count"]]).rename(
|
|
174
|
+
columns={"read_count": run_acc}
|
|
175
|
+
)
|
|
169
176
|
|
|
170
|
-
depth_df = res_df[["coverage_depth"]]
|
|
171
|
-
|
|
177
|
+
depth_df = pd.DataFrame(res_df[["coverage_depth"]]).rename(
|
|
178
|
+
columns={"coverage_depth": run_acc}
|
|
179
|
+
)
|
|
172
180
|
|
|
173
|
-
breadth_df = res_df[["coverage_breadth"]]
|
|
174
|
-
|
|
181
|
+
breadth_df = pd.DataFrame(res_df[["coverage_breadth"]]).rename(
|
|
182
|
+
columns={"coverage_breadth": run_acc}
|
|
183
|
+
)
|
|
175
184
|
|
|
176
185
|
return count_df, depth_df, breadth_df
|
|
177
186
|
|
|
@@ -423,7 +432,9 @@ def merge_summaries(analyses_dir: str, output_prefix: str) -> None:
|
|
|
423
432
|
curr_df = pd.read_csv(summary, sep="\t", index_col=0)
|
|
424
433
|
res_df = res_df.join(curr_df, how="outer")
|
|
425
434
|
res_df = res_df.fillna(0)
|
|
426
|
-
res_df = res_df.astype(
|
|
435
|
+
res_df = res_df.astype(
|
|
436
|
+
int if table_type == "read-count" else float
|
|
437
|
+
)
|
|
427
438
|
|
|
428
439
|
res_df = res_df.reindex(sorted(res_df.columns), axis=1)
|
|
429
440
|
res_df.to_csv(
|