mgnify-pipelines-toolkit 1.0.6__tar.gz → 1.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

Files changed (61) hide show
  1. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/PKG-INFO +1 -1
  2. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +5 -2
  3. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +3 -3
  4. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +3 -3
  5. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit.egg-info/PKG-INFO +1 -1
  6. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/pyproject.toml +1 -1
  7. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/LICENSE +0 -0
  8. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/README.md +0 -0
  9. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/__init__.py +0 -0
  10. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/__init__.py +0 -0
  11. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/amplicon_utils.py +0 -0
  12. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/are_there_primers.py +0 -0
  13. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/assess_inflection_point_mcp.py +0 -0
  14. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/assess_mcp_proportions.py +0 -0
  15. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/find_mcp_inflection_points.py +0 -0
  16. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +0 -0
  17. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +0 -0
  18. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +0 -0
  19. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +0 -0
  20. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +0 -0
  21. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/amplicon/standard_primer_matching.py +0 -0
  22. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +0 -0
  23. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +0 -0
  24. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +0 -0
  25. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +0 -0
  26. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +0 -0
  27. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +0 -0
  28. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +0 -0
  29. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +0 -0
  30. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py +0 -0
  31. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +0 -0
  32. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +0 -0
  33. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +0 -0
  34. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/genomes/__init__.py +0 -0
  35. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/__init__.py +0 -0
  36. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py +0 -0
  37. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +0 -0
  38. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +0 -0
  39. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +0 -0
  40. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +0 -0
  41. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +0 -0
  42. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +0 -0
  43. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +0 -0
  44. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py +0 -0
  45. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/constants/db_labels.py +0 -0
  46. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/constants/ncrna.py +0 -0
  47. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/constants/regex_ambiguous_bases.py +0 -0
  48. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/constants/regex_fasta_header.py +0 -0
  49. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/constants/tax_ranks.py +0 -0
  50. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/constants/thresholds.py +0 -0
  51. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/constants/var_region_coordinates.py +0 -0
  52. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/schemas/schemas.py +0 -0
  53. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/utils/__init__.py +0 -0
  54. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +0 -0
  55. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit/utils/get_mpt_version.py +0 -0
  56. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit.egg-info/SOURCES.txt +0 -0
  57. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit.egg-info/dependency_links.txt +0 -0
  58. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit.egg-info/entry_points.txt +0 -0
  59. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit.egg-info/requires.txt +0 -0
  60. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/mgnify_pipelines_toolkit.egg-info/top_level.txt +0 -0
  61. {mgnify_pipelines_toolkit-1.0.6 → mgnify_pipelines_toolkit-1.0.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -401,12 +401,10 @@ def retrieve_regions(
401
401
  region_counter = defaultdict(int)
402
402
 
403
403
  regions_to_remove = []
404
-
405
404
  for model, value in multiregion_matches.items():
406
405
  marker_gene = determine_marker_gene(determine_domain(model))
407
406
  for region in value:
408
407
  region_counter[f"{marker_gene}.{region}"] += 1
409
-
410
408
  for region, count in region_counter.items():
411
409
  if count < MIN_SEQ_COUNT:
412
410
  regions_to_remove.append(region)
@@ -421,6 +419,8 @@ def retrieve_regions(
421
419
  for model, value in multiregion_matches.items():
422
420
  new_value = []
423
421
  for region in value:
422
+ if region == "":
423
+ continue
424
424
  marker_gene = determine_marker_gene(determine_domain(model))
425
425
  full_region = f"{marker_gene}.{region}"
426
426
  if full_region not in regions_to_remove:
@@ -463,6 +463,9 @@ def retrieve_regions(
463
463
  for key, value in temp_seq_counter.items():
464
464
  seq_per_variable_region_count.setdefault(key, 0)
465
465
  seq_per_variable_region_count[key] += value
466
+ else:
467
+ logging.info("No output will be produced - the run is ambiguous.")
468
+ continue
466
469
 
467
470
  json_outfile = "{}.json".format(outfile_prefix)
468
471
  tsv_outfile = "{}.tsv".format(outfile_prefix)
@@ -52,7 +52,7 @@ def main():
52
52
 
53
53
  def load_gff(gff):
54
54
  genome_gff_lines = dict()
55
- with fileinput.hook_compressed(gff, "rt") as gff:
55
+ with fileinput.hook_compressed(gff, "r", encoding="utf-8") as gff:
56
56
  for line in gff:
57
57
  if line.startswith("##FASTA"):
58
58
  return genome_gff_lines
@@ -81,7 +81,7 @@ def load_gff(gff):
81
81
  def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_lines):
82
82
  with open(outfile, "w") as file_out:
83
83
  file_out.write("##gff-version 3\n")
84
- with fileinput.hook_compressed(overview_file, "rt") as file_in:
84
+ with fileinput.hook_compressed(overview_file, "r", encoding="utf-8") as file_in:
85
85
  for line in file_in:
86
86
  if line.startswith("MGYG") or line.startswith("ERZ"):
87
87
  (
@@ -151,7 +151,7 @@ def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_line
151
151
 
152
152
  def load_substrates(hmm_path):
153
153
  substrates = dict()
154
- with fileinput.hook_compressed(hmm_path, "rt") as file_in:
154
+ with fileinput.hook_compressed(hmm_path, "r", encoding="utf-8") as file_in:
155
155
  header = next(file_in)
156
156
  header_fields = header.strip().split("\t")
157
157
  substrate_idx = header_fields.index("Substrate")
@@ -49,7 +49,7 @@ def main():
49
49
 
50
50
  def load_cgcs(standard_path):
51
51
  cgc_locations = dict()
52
- with fileinput.hook_compressed(standard_path, "rt") as file_in:
52
+ with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
53
53
  for line in file_in:
54
54
  if not line.startswith("CGC#"):
55
55
  cgc, _, contig, _, start, end, _, _ = line.strip().split("\t")
@@ -72,7 +72,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
72
72
  with open(outfile, "w") as file_out:
73
73
  file_out.write("##gff-version 3\n")
74
74
  cgcs_printed = list()
75
- with fileinput.hook_compressed(standard_path, "rt") as file_in:
75
+ with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
76
76
  for line in file_in:
77
77
  if not line.startswith("CGC#"):
78
78
  cgc, gene_type, contig, prot_id, start, end, strand, protein_fam = (
@@ -107,7 +107,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
107
107
 
108
108
  def load_substrates(substrate_path):
109
109
  substrates = dict()
110
- with fileinput.hook_compressed(substrate_path, "rt") as file_in:
110
+ with fileinput.hook_compressed(substrate_path, "r", encoding="utf-8") as file_in:
111
111
  for line in file_in:
112
112
  if not line.startswith("#"):
113
113
  parts = line.strip().split("\t")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mgnify_pipelines_toolkit"
3
- version = "1.0.6"
3
+ version = "1.0.8"
4
4
  readme = "README.md"
5
5
  license = {text = "Apache Software License 2.0"}
6
6
  authors = [