mgnify-pipelines-toolkit 1.2.7__tar.gz → 1.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

Files changed (58) hide show
  1. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/PKG-INFO +1 -1
  2. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +1 -1
  3. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +3 -4
  4. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/rawreads/study_summary_generator.py +30 -19
  5. mgnify_pipelines_toolkit-1.2.8/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +749 -0
  6. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/constants/db_labels.py +2 -2
  7. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/constants/tax_ranks.py +1 -9
  8. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/schemas/schemas.py +2 -2
  9. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit.egg-info/PKG-INFO +1 -1
  10. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit.egg-info/entry_points.txt +1 -1
  11. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/pyproject.toml +2 -2
  12. mgnify_pipelines_toolkit-1.2.7/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +0 -240
  13. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/LICENSE +0 -0
  14. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/README.md +0 -0
  15. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/__init__.py +0 -0
  16. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/__init__.py +0 -0
  17. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +0 -0
  18. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +0 -0
  19. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py +0 -0
  20. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +0 -0
  21. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +0 -0
  22. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +0 -0
  23. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py +0 -0
  24. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +0 -0
  25. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +0 -0
  26. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +0 -0
  27. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +0 -0
  28. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +0 -0
  29. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +0 -0
  30. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +0 -0
  31. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +0 -0
  32. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py +0 -0
  33. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +0 -0
  34. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +0 -0
  35. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py +0 -0
  36. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +0 -0
  37. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +0 -0
  38. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/genomes/__init__.py +0 -0
  39. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/shared/__init__.py +0 -0
  40. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py +0 -0
  41. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +0 -0
  42. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +0 -0
  43. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +0 -0
  44. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +0 -0
  45. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +0 -0
  46. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +0 -0
  47. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/constants/ncrna.py +0 -0
  48. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/constants/regex_fasta_header.py +0 -0
  49. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/constants/thresholds.py +0 -0
  50. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/constants/var_region_coordinates.py +0 -0
  51. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/utils/__init__.py +0 -0
  52. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +0 -0
  53. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit/utils/get_mpt_version.py +0 -0
  54. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit.egg-info/SOURCES.txt +0 -0
  55. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit.egg-info/dependency_links.txt +0 -0
  56. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit.egg-info/requires.txt +0 -0
  57. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/mgnify_pipelines_toolkit.egg-info/top_level.txt +0 -0
  58. {mgnify_pipelines_toolkit-1.2.7 → mgnify_pipelines_toolkit-1.2.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.2.7
3
+ Version: 1.2.8
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -300,7 +300,7 @@ def main():
300
300
  if paired_end:
301
301
  rev_fr.close()
302
302
 
303
- if asv_dict: # if there are matches between taxonomic and ASV annotations
303
+ if asv_dict: # if there are matches between taxonomic and ASV annotations
304
304
  ref_db = ""
305
305
 
306
306
  if len(taxa_df.columns) == 9:
@@ -110,10 +110,9 @@ def main():
110
110
  df_merged = df_merged[
111
111
  ["nearest_mibig", "nearest_mibig_class", "description", "count"]
112
112
  ]
113
- df_merged = df_merged.rename(columns={
114
- "Description": "description",
115
- "Count": "count"
116
- })
113
+ df_merged = df_merged.rename(
114
+ columns={"Description": "description", "Count": "count"}
115
+ )
117
116
  df_merged.to_csv(output_filename, sep="\t", index=False)
118
117
 
119
118
 
@@ -53,7 +53,7 @@ def cli():
53
53
 
54
54
  def get_file(
55
55
  run_acc: str, analyses_dir: Path, db_label: str
56
- ) -> Union[Path, List[Path]]:
56
+ ) -> Union[Path, List[Path], None]:
57
57
  """Takes path information for a particular analysis and db_label combo, and returns any existing files.
58
58
 
59
59
  :param run_acc: Run accession for the tax file that should be retrieved.
@@ -84,7 +84,7 @@ def get_file(
84
84
  return
85
85
 
86
86
  analysis_file = Path(
87
- f"{analyses_dir}/{run_acc}/{db_dir}/{db_label}/{run_acc}_{db_label}.txt"
87
+ f"{analyses_dir}/{run_acc}/{db_dir}/{db_label}/{run_acc}_{db_label}.txt.gz"
88
88
  )
89
89
  if not analysis_file.exists():
90
90
  logging.error(
@@ -119,20 +119,25 @@ def parse_one_tax_file(run_acc: str, tax_file: Path, db_label: str) -> pd.DataFr
119
119
  :rtype: pd.DataFrame
120
120
  """
121
121
 
122
- tax_ranks = _MOTUS_TAX_RANKS if db_label == "mOTUs" else _SILVA_TAX_RANKS
122
+ tax_ranks = _MOTUS_TAX_RANKS if db_label == "motus" else _SILVA_TAX_RANKS
123
123
  res_df = pd.read_csv(tax_file, sep="\t", skiprows=1, names=["Count"] + tax_ranks)
124
124
  res_df = res_df.fillna("")
125
125
 
126
- validate_dataframe(
127
- res_df, MotusTaxonSchema if db_label == "mOTUs" else TaxonSchema, str(tax_file)
128
- )
126
+ if res_df.shape[0] > 0:
127
+ validate_dataframe(
128
+ res_df,
129
+ MotusTaxonSchema if db_label == "motus" else TaxonSchema,
130
+ str(tax_file),
131
+ )
129
132
 
130
- res_df["full_taxon"] = res_df.iloc[:, 1:].apply(
131
- lambda x: ";".join(x).strip(";"), axis=1
133
+ res_df["full_taxon"] = [
134
+ ";".join(r[tax_ranks]).strip(";") for _, r in res_df.iterrows()
135
+ ]
136
+ final_df = (
137
+ res_df[["Count", "full_taxon"]]
138
+ .set_index("full_taxon")
139
+ .rename(columns={"Count": run_acc})
132
140
  )
133
- final_df = res_df.iloc[:, [0, -1]]
134
- final_df = final_df.set_index("full_taxon")
135
- final_df.columns = [run_acc]
136
141
 
137
142
  return final_df
138
143
 
@@ -162,16 +167,20 @@ def parse_one_func_file(
162
167
  ).set_index("function")
163
168
  res_df = res_df.fillna(0)
164
169
 
165
- validate_dataframe(res_df, FunctionProfileSchema, str(func_file))
170
+ if res_df.shape[0] > 0:
171
+ validate_dataframe(res_df, FunctionProfileSchema, str(func_file))
166
172
 
167
- count_df = res_df[["read_count"]]
168
- count_df.columns = [run_acc]
173
+ count_df = pd.DataFrame(res_df[["read_count"]]).rename(
174
+ columns={"read_count": run_acc}
175
+ )
169
176
 
170
- depth_df = res_df[["coverage_depth"]]
171
- depth_df.columns = [run_acc]
177
+ depth_df = pd.DataFrame(res_df[["coverage_depth"]]).rename(
178
+ columns={"coverage_depth": run_acc}
179
+ )
172
180
 
173
- breadth_df = res_df[["coverage_breadth"]]
174
- breadth_df.columns = [run_acc]
181
+ breadth_df = pd.DataFrame(res_df[["coverage_breadth"]]).rename(
182
+ columns={"coverage_breadth": run_acc}
183
+ )
175
184
 
176
185
  return count_df, depth_df, breadth_df
177
186
 
@@ -423,7 +432,9 @@ def merge_summaries(analyses_dir: str, output_prefix: str) -> None:
423
432
  curr_df = pd.read_csv(summary, sep="\t", index_col=0)
424
433
  res_df = res_df.join(curr_df, how="outer")
425
434
  res_df = res_df.fillna(0)
426
- res_df = res_df.astype(int if table_type == "count" else float)
435
+ res_df = res_df.astype(
436
+ int if table_type == "read-count" else float
437
+ )
427
438
 
428
439
  res_df = res_df.reindex(sorted(res_df.columns), axis=1)
429
440
  res_df.to_csv(