mgnify-pipelines-toolkit 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

@@ -401,12 +401,10 @@ def retrieve_regions(
401
401
  region_counter = defaultdict(int)
402
402
 
403
403
  regions_to_remove = []
404
-
405
404
  for model, value in multiregion_matches.items():
406
405
  marker_gene = determine_marker_gene(determine_domain(model))
407
406
  for region in value:
408
407
  region_counter[f"{marker_gene}.{region}"] += 1
409
-
410
408
  for region, count in region_counter.items():
411
409
  if count < MIN_SEQ_COUNT:
412
410
  regions_to_remove.append(region)
@@ -421,6 +419,8 @@ def retrieve_regions(
421
419
  for model, value in multiregion_matches.items():
422
420
  new_value = []
423
421
  for region in value:
422
+ if region == "":
423
+ continue
424
424
  marker_gene = determine_marker_gene(determine_domain(model))
425
425
  full_region = f"{marker_gene}.{region}"
426
426
  if full_region not in regions_to_remove:
@@ -463,6 +463,9 @@ def retrieve_regions(
463
463
  for key, value in temp_seq_counter.items():
464
464
  seq_per_variable_region_count.setdefault(key, 0)
465
465
  seq_per_variable_region_count[key] += value
466
+ else:
467
+ logging.info("No output will be produced - the run is ambiguous.")
468
+ continue
466
469
 
467
470
  json_outfile = "{}.json".format(outfile_prefix)
468
471
  tsv_outfile = "{}.tsv".format(outfile_prefix)
@@ -52,7 +52,7 @@ def main():
52
52
 
53
53
  def load_gff(gff):
54
54
  genome_gff_lines = dict()
55
- with fileinput.hook_compressed(gff, "rt") as gff:
55
+ with fileinput.hook_compressed(gff, "r", encoding="utf-8") as gff:
56
56
  for line in gff:
57
57
  if line.startswith("##FASTA"):
58
58
  return genome_gff_lines
@@ -81,7 +81,7 @@ def load_gff(gff):
81
81
  def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_lines):
82
82
  with open(outfile, "w") as file_out:
83
83
  file_out.write("##gff-version 3\n")
84
- with fileinput.hook_compressed(overview_file, "rt") as file_in:
84
+ with fileinput.hook_compressed(overview_file, "r", encoding="utf-8") as file_in:
85
85
  for line in file_in:
86
86
  if line.startswith("MGYG") or line.startswith("ERZ"):
87
87
  (
@@ -151,7 +151,7 @@ def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_line
151
151
 
152
152
  def load_substrates(hmm_path):
153
153
  substrates = dict()
154
- with fileinput.hook_compressed(hmm_path, "rt") as file_in:
154
+ with fileinput.hook_compressed(hmm_path, "r", encoding="utf-8") as file_in:
155
155
  header = next(file_in)
156
156
  header_fields = header.strip().split("\t")
157
157
  substrate_idx = header_fields.index("Substrate")
@@ -49,7 +49,7 @@ def main():
49
49
 
50
50
  def load_cgcs(standard_path):
51
51
  cgc_locations = dict()
52
- with fileinput.hook_compressed(standard_path, "rt") as file_in:
52
+ with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
53
53
  for line in file_in:
54
54
  if not line.startswith("CGC#"):
55
55
  cgc, _, contig, _, start, end, _, _ = line.strip().split("\t")
@@ -72,7 +72,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
72
72
  with open(outfile, "w") as file_out:
73
73
  file_out.write("##gff-version 3\n")
74
74
  cgcs_printed = list()
75
- with fileinput.hook_compressed(standard_path, "rt") as file_in:
75
+ with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
76
76
  for line in file_in:
77
77
  if not line.startswith("CGC#"):
78
78
  cgc, gene_type, contig, prot_id, start, end, strand, protein_fam = (
@@ -107,7 +107,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
107
107
 
108
108
  def load_substrates(substrate_path):
109
109
  substrates = dict()
110
- with fileinput.hook_compressed(substrate_path, "rt") as file_in:
110
+ with fileinput.hook_compressed(substrate_path, "r", encoding="utf-8") as file_in:
111
111
  for line in file_in:
112
112
  if not line.startswith("#"):
113
113
  parts = line.strip().split("\t")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -4,7 +4,7 @@ mgnify_pipelines_toolkit/analysis/amplicon/amplicon_utils.py,sha256=8qmb57E2XBrw
4
4
  mgnify_pipelines_toolkit/analysis/amplicon/are_there_primers.py,sha256=2-URxvcl13_8O9bUmoa3-KMPSvdTaLbxfFDY-ycs_4M,5316
5
5
  mgnify_pipelines_toolkit/analysis/amplicon/assess_inflection_point_mcp.py,sha256=cRoHPM-VB_L3NWYgkNWuyzqIqhzwHJuU3-6BiiS2lnw,7553
6
6
  mgnify_pipelines_toolkit/analysis/amplicon/assess_mcp_proportions.py,sha256=RAdqakH05Qt_LG9jlV7P2M90o5KmlAXmDFQ4X51NIBE,5387
7
- mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py,sha256=EqfaATb5agvtQOhJqrb2YS6OxtCXvxC-q_05UzvDYug,19926
7
+ mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py,sha256=x8GmhFPT2ElAcBMeyLEBw3zYxI75fYL0xVHUPSyhw7c,20100
8
8
  mgnify_pipelines_toolkit/analysis/amplicon/find_mcp_inflection_points.py,sha256=vC3nKxggnSljfw4HNkugXbXfGvLx7XnryEE7eEGqfqs,3552
9
9
  mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py,sha256=soTewFddtebW-EcejGh9whs3cBLWJrGCYdPc0KukoAw,8756
10
10
  mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py,sha256=BLqhflblUegCvuQic16PrFXfIXlFWmGkmWJyl4wJoLQ,5040
@@ -21,8 +21,8 @@ mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py,sha256=_4J31wAjK5B1
21
21
  mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py,sha256=_iaTBvMKbQDi_02_QuSPqLJ_rC37ruxiPHv5lLQmI-w,5480
22
22
  mgnify_pipelines_toolkit/analysis/assembly/go_utils.py,sha256=eay9e3Xdc8XxnlC_4SHHjN89k-M9i_cFMc2lI_ZFxqY,5596
23
23
  mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py,sha256=uex2T6GagtYFBIc39-Xm4SFHL06KAQ5v0_loOmY_eaw,4289
24
- mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py,sha256=5lbVSWRZoi61cKvuolzUJlhUBzpx8DgWMH0Vzw1HcHA,7748
25
- mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py,sha256=i4uYdqY6y2ee72vl0sLkHeJvigHGKJMzdyR3HEIK1Mk,5930
24
+ mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py,sha256=KaJHOKfbIurbD1iiMssjdAaSAT8Nv-_ZUFwxkLqukAE,7799
25
+ mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py,sha256=DYZhChGD49M-zAtGkCmNHXDoVTnd5Qy6amG-oePO8Ek,5981
26
26
  mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py,sha256=eRAQ0vFbqnWreiBdtFuwLKve9WwYwv9dYQtD1pumaZs,10776
27
27
  mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py,sha256=TPaKlYkoy37_XgYNOskWCCoXtPNku_k5ygSeK4fT1VQ,6689
28
28
  mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py,sha256=65szj-H8Hxy_eXy3TyTs48EhPJbJ2w1skHlVbH2YeVM,4538
@@ -48,9 +48,9 @@ mgnify_pipelines_toolkit/schemas/schemas.py,sha256=pnH8LUH8i2ACNvFNWyG-n-eIHZcI5
48
48
  mgnify_pipelines_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  mgnify_pipelines_toolkit/utils/fasta_to_delimited.py,sha256=lgYIR1S4crURY7C7nFtgE6QMV4u4zCNsUrVkcRnsEEo,3996
50
50
  mgnify_pipelines_toolkit/utils/get_mpt_version.py,sha256=aS9bWrC9CP7tpxoEVg6eEYt18-pmjG7fJl5Mchz4YOU,798
51
- mgnify_pipelines_toolkit-1.0.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
- mgnify_pipelines_toolkit-1.0.6.dist-info/METADATA,sha256=b9Hoo0e0xVvL4erImJgt_7gtbb-5Yx8TZNlf9KZcQIY,5810
53
- mgnify_pipelines_toolkit-1.0.6.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
54
- mgnify_pipelines_toolkit-1.0.6.dist-info/entry_points.txt,sha256=T8soGT2to8c_qafw-0itqCn4sjOnxlfaNWHIaHz4H54,3416
55
- mgnify_pipelines_toolkit-1.0.6.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
56
- mgnify_pipelines_toolkit-1.0.6.dist-info/RECORD,,
51
+ mgnify_pipelines_toolkit-1.0.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
+ mgnify_pipelines_toolkit-1.0.8.dist-info/METADATA,sha256=1hUZYUS7S2wdpK1zQdj4gVYhpUvn_-LjHi8iGb9LQ30,5810
53
+ mgnify_pipelines_toolkit-1.0.8.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
54
+ mgnify_pipelines_toolkit-1.0.8.dist-info/entry_points.txt,sha256=T8soGT2to8c_qafw-0itqCn4sjOnxlfaNWHIaHz4H54,3416
55
+ mgnify_pipelines_toolkit-1.0.8.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
56
+ mgnify_pipelines_toolkit-1.0.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5