mgnify-pipelines-toolkit 1.0.5__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/PKG-INFO +1 -1
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +27 -24
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +15 -9
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +14 -7
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/PKG-INFO +1 -1
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/pyproject.toml +1 -1
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/LICENSE +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/README.md +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/amplicon_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/are_there_primers.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/assess_inflection_point_mcp.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/assess_mcp_proportions.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/find_mcp_inflection_points.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/standard_primer_matching.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/genomes/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/db_labels.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/ncrna.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/regex_ambiguous_bases.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/regex_fasta_header.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/tax_ranks.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/thresholds.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/var_region_coordinates.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/schemas/schemas.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/utils/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/utils/get_mpt_version.py +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/SOURCES.txt +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/dependency_links.txt +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/entry_points.txt +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/requires.txt +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/top_level.txt +0 -0
- {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/setup.cfg +0 -0
|
@@ -29,24 +29,27 @@ from mgnify_pipelines_toolkit.analysis.assembly.gff_file_utils import (
|
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def main(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
32
|
+
def main():
|
|
33
|
+
|
|
34
|
+
(
|
|
35
|
+
gff,
|
|
36
|
+
ipr_file,
|
|
37
|
+
eggnog_file,
|
|
38
|
+
sanntis_file,
|
|
39
|
+
crispr_file,
|
|
40
|
+
amr_file,
|
|
41
|
+
antismash_file,
|
|
42
|
+
gecco_file,
|
|
43
|
+
dbcan_file,
|
|
44
|
+
dbcan_cazys_file,
|
|
45
|
+
defense_finder_file,
|
|
46
|
+
pseudofinder_file,
|
|
47
|
+
rfam_file,
|
|
48
|
+
trnascan_file,
|
|
49
|
+
outfile,
|
|
50
|
+
pseudogene_report_file,
|
|
51
|
+
) = parse_args()
|
|
52
|
+
|
|
50
53
|
# load annotations and add them to existing CDS
|
|
51
54
|
# here header contains leading GFF lines starting with "#",
|
|
52
55
|
# main_gff_extended is a dictionary that contains GFF lines with added in additional annotations
|
|
@@ -163,12 +166,8 @@ def parse_args():
|
|
|
163
166
|
"--pseudogene-report", help="Pseudogene report filename", required=False
|
|
164
167
|
)
|
|
165
168
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
if __name__ == "__main__":
|
|
170
|
-
args = parse_args()
|
|
171
|
-
main(
|
|
169
|
+
args = parser.parse_args()
|
|
170
|
+
return (
|
|
172
171
|
args.gff_input,
|
|
173
172
|
args.ips,
|
|
174
173
|
args.eggnog,
|
|
@@ -186,3 +185,7 @@ if __name__ == "__main__":
|
|
|
186
185
|
args.outfile,
|
|
187
186
|
args.pseudogene_report,
|
|
188
187
|
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
if __name__ == "__main__":
|
|
191
|
+
main()
|
|
@@ -24,7 +24,16 @@ import re
|
|
|
24
24
|
logging.basicConfig(level=logging.INFO)
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def main(
|
|
27
|
+
def main():
|
|
28
|
+
|
|
29
|
+
args = parse_args()
|
|
30
|
+
hmm_file, overview_file, genome_gff, outfile, dbcan_ver = (
|
|
31
|
+
args.hmm_file,
|
|
32
|
+
args.overview_file,
|
|
33
|
+
args.genome_gff,
|
|
34
|
+
args.outfile,
|
|
35
|
+
args.dbcan_ver,
|
|
36
|
+
)
|
|
28
37
|
|
|
29
38
|
hmm_path = Path(hmm_file)
|
|
30
39
|
overview_path = Path(overview_file)
|
|
@@ -38,12 +47,12 @@ def main(hmm_file, overview_file, genome_gff, outfile, dbcan_version):
|
|
|
38
47
|
substrates = load_substrates(hmm_path)
|
|
39
48
|
genome_gff_lines = load_gff(genome_gff)
|
|
40
49
|
|
|
41
|
-
print_gff(overview_file, outfile,
|
|
50
|
+
print_gff(overview_file, outfile, dbcan_ver, substrates, genome_gff_lines)
|
|
42
51
|
|
|
43
52
|
|
|
44
53
|
def load_gff(gff):
|
|
45
54
|
genome_gff_lines = dict()
|
|
46
|
-
with fileinput.hook_compressed(gff, "
|
|
55
|
+
with fileinput.hook_compressed(gff, "r", encoding="utf-8") as gff:
|
|
47
56
|
for line in gff:
|
|
48
57
|
if line.startswith("##FASTA"):
|
|
49
58
|
return genome_gff_lines
|
|
@@ -72,7 +81,7 @@ def load_gff(gff):
|
|
|
72
81
|
def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_lines):
|
|
73
82
|
with open(outfile, "w") as file_out:
|
|
74
83
|
file_out.write("##gff-version 3\n")
|
|
75
|
-
with fileinput.hook_compressed(overview_file, "
|
|
84
|
+
with fileinput.hook_compressed(overview_file, "r", encoding="utf-8") as file_in:
|
|
76
85
|
for line in file_in:
|
|
77
86
|
if line.startswith("MGYG") or line.startswith("ERZ"):
|
|
78
87
|
(
|
|
@@ -142,7 +151,7 @@ def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_line
|
|
|
142
151
|
|
|
143
152
|
def load_substrates(hmm_path):
|
|
144
153
|
substrates = dict()
|
|
145
|
-
with fileinput.hook_compressed(hmm_path, "
|
|
154
|
+
with fileinput.hook_compressed(hmm_path, "r", encoding="utf-8") as file_in:
|
|
146
155
|
header = next(file_in)
|
|
147
156
|
header_fields = header.strip().split("\t")
|
|
148
157
|
substrate_idx = header_fields.index("Substrate")
|
|
@@ -205,7 +214,4 @@ def parse_args():
|
|
|
205
214
|
|
|
206
215
|
|
|
207
216
|
if __name__ == "__main__":
|
|
208
|
-
|
|
209
|
-
main(
|
|
210
|
-
args.hmm_file, args.overview_file, args.genome_gff, args.outfile, args.dbcan_ver
|
|
211
|
-
)
|
|
217
|
+
main()
|
|
@@ -22,7 +22,15 @@ from pathlib import Path
|
|
|
22
22
|
logging.basicConfig(level=logging.INFO)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
def main(
|
|
25
|
+
def main():
|
|
26
|
+
|
|
27
|
+
args = parse_args()
|
|
28
|
+
standard_file, substrate_file, outfile, dbcan_ver = (
|
|
29
|
+
args.standard_file,
|
|
30
|
+
args.substrate_file,
|
|
31
|
+
args.outfile,
|
|
32
|
+
args.dbcan_ver,
|
|
33
|
+
)
|
|
26
34
|
standard_path = Path(standard_file)
|
|
27
35
|
substrate_path = Path(substrate_file)
|
|
28
36
|
|
|
@@ -36,12 +44,12 @@ def main(standard_file, substrate_file, outfile, dbcan_version):
|
|
|
36
44
|
|
|
37
45
|
substrates = load_substrates(substrate_path)
|
|
38
46
|
cgc_locations = load_cgcs(standard_path)
|
|
39
|
-
print_gff(standard_path, outfile,
|
|
47
|
+
print_gff(standard_path, outfile, dbcan_ver, substrates, cgc_locations)
|
|
40
48
|
|
|
41
49
|
|
|
42
50
|
def load_cgcs(standard_path):
|
|
43
51
|
cgc_locations = dict()
|
|
44
|
-
with fileinput.hook_compressed(standard_path, "
|
|
52
|
+
with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
|
|
45
53
|
for line in file_in:
|
|
46
54
|
if not line.startswith("CGC#"):
|
|
47
55
|
cgc, _, contig, _, start, end, _, _ = line.strip().split("\t")
|
|
@@ -64,7 +72,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
|
|
|
64
72
|
with open(outfile, "w") as file_out:
|
|
65
73
|
file_out.write("##gff-version 3\n")
|
|
66
74
|
cgcs_printed = list()
|
|
67
|
-
with fileinput.hook_compressed(standard_path, "
|
|
75
|
+
with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
|
|
68
76
|
for line in file_in:
|
|
69
77
|
if not line.startswith("CGC#"):
|
|
70
78
|
cgc, gene_type, contig, prot_id, start, end, strand, protein_fam = (
|
|
@@ -99,7 +107,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
|
|
|
99
107
|
|
|
100
108
|
def load_substrates(substrate_path):
|
|
101
109
|
substrates = dict()
|
|
102
|
-
with fileinput.hook_compressed(substrate_path, "
|
|
110
|
+
with fileinput.hook_compressed(substrate_path, "r", encoding="utf-8") as file_in:
|
|
103
111
|
for line in file_in:
|
|
104
112
|
if not line.startswith("#"):
|
|
105
113
|
parts = line.strip().split("\t")
|
|
@@ -158,5 +166,4 @@ def parse_args():
|
|
|
158
166
|
|
|
159
167
|
|
|
160
168
|
if __name__ == "__main__":
|
|
161
|
-
|
|
162
|
-
main(args.standard_file, args.substrate_file, args.outfile, args.dbcan_ver)
|
|
169
|
+
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|