mgnify-pipelines-toolkit 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.
- mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +5 -2
- mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +3 -3
- mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +3 -3
- {mgnify_pipelines_toolkit-1.0.6.dist-info → mgnify_pipelines_toolkit-1.0.8.dist-info}/METADATA +1 -1
- {mgnify_pipelines_toolkit-1.0.6.dist-info → mgnify_pipelines_toolkit-1.0.8.dist-info}/RECORD +9 -9
- {mgnify_pipelines_toolkit-1.0.6.dist-info → mgnify_pipelines_toolkit-1.0.8.dist-info}/WHEEL +1 -1
- {mgnify_pipelines_toolkit-1.0.6.dist-info → mgnify_pipelines_toolkit-1.0.8.dist-info}/entry_points.txt +0 -0
- {mgnify_pipelines_toolkit-1.0.6.dist-info → mgnify_pipelines_toolkit-1.0.8.dist-info}/licenses/LICENSE +0 -0
- {mgnify_pipelines_toolkit-1.0.6.dist-info → mgnify_pipelines_toolkit-1.0.8.dist-info}/top_level.txt +0 -0
|
@@ -401,12 +401,10 @@ def retrieve_regions(
|
|
|
401
401
|
region_counter = defaultdict(int)
|
|
402
402
|
|
|
403
403
|
regions_to_remove = []
|
|
404
|
-
|
|
405
404
|
for model, value in multiregion_matches.items():
|
|
406
405
|
marker_gene = determine_marker_gene(determine_domain(model))
|
|
407
406
|
for region in value:
|
|
408
407
|
region_counter[f"{marker_gene}.{region}"] += 1
|
|
409
|
-
|
|
410
408
|
for region, count in region_counter.items():
|
|
411
409
|
if count < MIN_SEQ_COUNT:
|
|
412
410
|
regions_to_remove.append(region)
|
|
@@ -421,6 +419,8 @@ def retrieve_regions(
|
|
|
421
419
|
for model, value in multiregion_matches.items():
|
|
422
420
|
new_value = []
|
|
423
421
|
for region in value:
|
|
422
|
+
if region == "":
|
|
423
|
+
continue
|
|
424
424
|
marker_gene = determine_marker_gene(determine_domain(model))
|
|
425
425
|
full_region = f"{marker_gene}.{region}"
|
|
426
426
|
if full_region not in regions_to_remove:
|
|
@@ -463,6 +463,9 @@ def retrieve_regions(
|
|
|
463
463
|
for key, value in temp_seq_counter.items():
|
|
464
464
|
seq_per_variable_region_count.setdefault(key, 0)
|
|
465
465
|
seq_per_variable_region_count[key] += value
|
|
466
|
+
else:
|
|
467
|
+
logging.info("No output will be produced - the run is ambiguous.")
|
|
468
|
+
continue
|
|
466
469
|
|
|
467
470
|
json_outfile = "{}.json".format(outfile_prefix)
|
|
468
471
|
tsv_outfile = "{}.tsv".format(outfile_prefix)
|
|
@@ -52,7 +52,7 @@ def main():
|
|
|
52
52
|
|
|
53
53
|
def load_gff(gff):
|
|
54
54
|
genome_gff_lines = dict()
|
|
55
|
-
with fileinput.hook_compressed(gff, "
|
|
55
|
+
with fileinput.hook_compressed(gff, "r", encoding="utf-8") as gff:
|
|
56
56
|
for line in gff:
|
|
57
57
|
if line.startswith("##FASTA"):
|
|
58
58
|
return genome_gff_lines
|
|
@@ -81,7 +81,7 @@ def load_gff(gff):
|
|
|
81
81
|
def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_lines):
|
|
82
82
|
with open(outfile, "w") as file_out:
|
|
83
83
|
file_out.write("##gff-version 3\n")
|
|
84
|
-
with fileinput.hook_compressed(overview_file, "
|
|
84
|
+
with fileinput.hook_compressed(overview_file, "r", encoding="utf-8") as file_in:
|
|
85
85
|
for line in file_in:
|
|
86
86
|
if line.startswith("MGYG") or line.startswith("ERZ"):
|
|
87
87
|
(
|
|
@@ -151,7 +151,7 @@ def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_line
|
|
|
151
151
|
|
|
152
152
|
def load_substrates(hmm_path):
|
|
153
153
|
substrates = dict()
|
|
154
|
-
with fileinput.hook_compressed(hmm_path, "
|
|
154
|
+
with fileinput.hook_compressed(hmm_path, "r", encoding="utf-8") as file_in:
|
|
155
155
|
header = next(file_in)
|
|
156
156
|
header_fields = header.strip().split("\t")
|
|
157
157
|
substrate_idx = header_fields.index("Substrate")
|
|
@@ -49,7 +49,7 @@ def main():
|
|
|
49
49
|
|
|
50
50
|
def load_cgcs(standard_path):
|
|
51
51
|
cgc_locations = dict()
|
|
52
|
-
with fileinput.hook_compressed(standard_path, "
|
|
52
|
+
with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
|
|
53
53
|
for line in file_in:
|
|
54
54
|
if not line.startswith("CGC#"):
|
|
55
55
|
cgc, _, contig, _, start, end, _, _ = line.strip().split("\t")
|
|
@@ -72,7 +72,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
|
|
|
72
72
|
with open(outfile, "w") as file_out:
|
|
73
73
|
file_out.write("##gff-version 3\n")
|
|
74
74
|
cgcs_printed = list()
|
|
75
|
-
with fileinput.hook_compressed(standard_path, "
|
|
75
|
+
with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
|
|
76
76
|
for line in file_in:
|
|
77
77
|
if not line.startswith("CGC#"):
|
|
78
78
|
cgc, gene_type, contig, prot_id, start, end, strand, protein_fam = (
|
|
@@ -107,7 +107,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
|
|
|
107
107
|
|
|
108
108
|
def load_substrates(substrate_path):
|
|
109
109
|
substrates = dict()
|
|
110
|
-
with fileinput.hook_compressed(substrate_path, "
|
|
110
|
+
with fileinput.hook_compressed(substrate_path, "r", encoding="utf-8") as file_in:
|
|
111
111
|
for line in file_in:
|
|
112
112
|
if not line.startswith("#"):
|
|
113
113
|
parts = line.strip().split("\t")
|
{mgnify_pipelines_toolkit-1.0.6.dist-info → mgnify_pipelines_toolkit-1.0.8.dist-info}/RECORD
RENAMED
|
@@ -4,7 +4,7 @@ mgnify_pipelines_toolkit/analysis/amplicon/amplicon_utils.py,sha256=8qmb57E2XBrw
|
|
|
4
4
|
mgnify_pipelines_toolkit/analysis/amplicon/are_there_primers.py,sha256=2-URxvcl13_8O9bUmoa3-KMPSvdTaLbxfFDY-ycs_4M,5316
|
|
5
5
|
mgnify_pipelines_toolkit/analysis/amplicon/assess_inflection_point_mcp.py,sha256=cRoHPM-VB_L3NWYgkNWuyzqIqhzwHJuU3-6BiiS2lnw,7553
|
|
6
6
|
mgnify_pipelines_toolkit/analysis/amplicon/assess_mcp_proportions.py,sha256=RAdqakH05Qt_LG9jlV7P2M90o5KmlAXmDFQ4X51NIBE,5387
|
|
7
|
-
mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py,sha256=
|
|
7
|
+
mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py,sha256=x8GmhFPT2ElAcBMeyLEBw3zYxI75fYL0xVHUPSyhw7c,20100
|
|
8
8
|
mgnify_pipelines_toolkit/analysis/amplicon/find_mcp_inflection_points.py,sha256=vC3nKxggnSljfw4HNkugXbXfGvLx7XnryEE7eEGqfqs,3552
|
|
9
9
|
mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py,sha256=soTewFddtebW-EcejGh9whs3cBLWJrGCYdPc0KukoAw,8756
|
|
10
10
|
mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py,sha256=BLqhflblUegCvuQic16PrFXfIXlFWmGkmWJyl4wJoLQ,5040
|
|
@@ -21,8 +21,8 @@ mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py,sha256=_4J31wAjK5B1
|
|
|
21
21
|
mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py,sha256=_iaTBvMKbQDi_02_QuSPqLJ_rC37ruxiPHv5lLQmI-w,5480
|
|
22
22
|
mgnify_pipelines_toolkit/analysis/assembly/go_utils.py,sha256=eay9e3Xdc8XxnlC_4SHHjN89k-M9i_cFMc2lI_ZFxqY,5596
|
|
23
23
|
mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py,sha256=uex2T6GagtYFBIc39-Xm4SFHL06KAQ5v0_loOmY_eaw,4289
|
|
24
|
-
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py,sha256=
|
|
25
|
-
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py,sha256=
|
|
24
|
+
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py,sha256=KaJHOKfbIurbD1iiMssjdAaSAT8Nv-_ZUFwxkLqukAE,7799
|
|
25
|
+
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py,sha256=DYZhChGD49M-zAtGkCmNHXDoVTnd5Qy6amG-oePO8Ek,5981
|
|
26
26
|
mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py,sha256=eRAQ0vFbqnWreiBdtFuwLKve9WwYwv9dYQtD1pumaZs,10776
|
|
27
27
|
mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py,sha256=TPaKlYkoy37_XgYNOskWCCoXtPNku_k5ygSeK4fT1VQ,6689
|
|
28
28
|
mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py,sha256=65szj-H8Hxy_eXy3TyTs48EhPJbJ2w1skHlVbH2YeVM,4538
|
|
@@ -48,9 +48,9 @@ mgnify_pipelines_toolkit/schemas/schemas.py,sha256=pnH8LUH8i2ACNvFNWyG-n-eIHZcI5
|
|
|
48
48
|
mgnify_pipelines_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
mgnify_pipelines_toolkit/utils/fasta_to_delimited.py,sha256=lgYIR1S4crURY7C7nFtgE6QMV4u4zCNsUrVkcRnsEEo,3996
|
|
50
50
|
mgnify_pipelines_toolkit/utils/get_mpt_version.py,sha256=aS9bWrC9CP7tpxoEVg6eEYt18-pmjG7fJl5Mchz4YOU,798
|
|
51
|
-
mgnify_pipelines_toolkit-1.0.
|
|
52
|
-
mgnify_pipelines_toolkit-1.0.
|
|
53
|
-
mgnify_pipelines_toolkit-1.0.
|
|
54
|
-
mgnify_pipelines_toolkit-1.0.
|
|
55
|
-
mgnify_pipelines_toolkit-1.0.
|
|
56
|
-
mgnify_pipelines_toolkit-1.0.
|
|
51
|
+
mgnify_pipelines_toolkit-1.0.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
52
|
+
mgnify_pipelines_toolkit-1.0.8.dist-info/METADATA,sha256=1hUZYUS7S2wdpK1zQdj4gVYhpUvn_-LjHi8iGb9LQ30,5810
|
|
53
|
+
mgnify_pipelines_toolkit-1.0.8.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
54
|
+
mgnify_pipelines_toolkit-1.0.8.dist-info/entry_points.txt,sha256=T8soGT2to8c_qafw-0itqCn4sjOnxlfaNWHIaHz4H54,3416
|
|
55
|
+
mgnify_pipelines_toolkit-1.0.8.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
|
|
56
|
+
mgnify_pipelines_toolkit-1.0.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{mgnify_pipelines_toolkit-1.0.6.dist-info → mgnify_pipelines_toolkit-1.0.8.dist-info}/top_level.txt
RENAMED
|
File without changes
|