mgnify-pipelines-toolkit 1.0.5__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

Files changed (61) hide show
  1. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/PKG-INFO +1 -1
  2. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +27 -24
  3. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +15 -9
  4. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +14 -7
  5. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/PKG-INFO +1 -1
  6. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/pyproject.toml +1 -1
  7. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/LICENSE +0 -0
  8. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/README.md +0 -0
  9. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/__init__.py +0 -0
  10. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/__init__.py +0 -0
  11. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/amplicon_utils.py +0 -0
  12. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/are_there_primers.py +0 -0
  13. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/assess_inflection_point_mcp.py +0 -0
  14. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/assess_mcp_proportions.py +0 -0
  15. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +0 -0
  16. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/find_mcp_inflection_points.py +0 -0
  17. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +0 -0
  18. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +0 -0
  19. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +0 -0
  20. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +0 -0
  21. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +0 -0
  22. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/amplicon/standard_primer_matching.py +0 -0
  23. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +0 -0
  24. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +0 -0
  25. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +0 -0
  26. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +0 -0
  27. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +0 -0
  28. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +0 -0
  29. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +0 -0
  30. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py +0 -0
  31. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +0 -0
  32. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +0 -0
  33. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +0 -0
  34. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/genomes/__init__.py +0 -0
  35. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/__init__.py +0 -0
  36. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py +0 -0
  37. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +0 -0
  38. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +0 -0
  39. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +0 -0
  40. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +0 -0
  41. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +0 -0
  42. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +0 -0
  43. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +0 -0
  44. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py +0 -0
  45. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/db_labels.py +0 -0
  46. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/ncrna.py +0 -0
  47. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/regex_ambiguous_bases.py +0 -0
  48. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/regex_fasta_header.py +0 -0
  49. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/tax_ranks.py +0 -0
  50. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/thresholds.py +0 -0
  51. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/constants/var_region_coordinates.py +0 -0
  52. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/schemas/schemas.py +0 -0
  53. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/utils/__init__.py +0 -0
  54. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +0 -0
  55. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit/utils/get_mpt_version.py +0 -0
  56. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/SOURCES.txt +0 -0
  57. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/dependency_links.txt +0 -0
  58. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/entry_points.txt +0 -0
  59. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/requires.txt +0 -0
  60. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/mgnify_pipelines_toolkit.egg-info/top_level.txt +0 -0
  61. {mgnify_pipelines_toolkit-1.0.5 → mgnify_pipelines_toolkit-1.0.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.0.5
3
+ Version: 1.0.7
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -29,24 +29,27 @@ from mgnify_pipelines_toolkit.analysis.assembly.gff_file_utils import (
29
29
  )
30
30
 
31
31
 
32
- def main(
33
- gff,
34
- ipr_file,
35
- eggnog_file,
36
- sanntis_file,
37
- crispr_file,
38
- amr_file,
39
- antismash_file,
40
- gecco_file,
41
- dbcan_file,
42
- dbcan_cazys_file,
43
- defense_finder_file,
44
- pseudofinder_file,
45
- rfam_file,
46
- trnascan_file,
47
- outfile,
48
- pseudogene_report_file,
49
- ):
32
+ def main():
33
+
34
+ (
35
+ gff,
36
+ ipr_file,
37
+ eggnog_file,
38
+ sanntis_file,
39
+ crispr_file,
40
+ amr_file,
41
+ antismash_file,
42
+ gecco_file,
43
+ dbcan_file,
44
+ dbcan_cazys_file,
45
+ defense_finder_file,
46
+ pseudofinder_file,
47
+ rfam_file,
48
+ trnascan_file,
49
+ outfile,
50
+ pseudogene_report_file,
51
+ ) = parse_args()
52
+
50
53
  # load annotations and add them to existing CDS
51
54
  # here header contains leading GFF lines starting with "#",
52
55
  # main_gff_extended is a dictionary that contains GFF lines with added in additional annotations
@@ -163,12 +166,8 @@ def parse_args():
163
166
  "--pseudogene-report", help="Pseudogene report filename", required=False
164
167
  )
165
168
 
166
- return parser.parse_args()
167
-
168
-
169
- if __name__ == "__main__":
170
- args = parse_args()
171
- main(
169
+ args = parser.parse_args()
170
+ return (
172
171
  args.gff_input,
173
172
  args.ips,
174
173
  args.eggnog,
@@ -186,3 +185,7 @@ if __name__ == "__main__":
186
185
  args.outfile,
187
186
  args.pseudogene_report,
188
187
  )
188
+
189
+
190
+ if __name__ == "__main__":
191
+ main()
@@ -24,7 +24,16 @@ import re
24
24
  logging.basicConfig(level=logging.INFO)
25
25
 
26
26
 
27
- def main(hmm_file, overview_file, genome_gff, outfile, dbcan_version):
27
+ def main():
28
+
29
+ args = parse_args()
30
+ hmm_file, overview_file, genome_gff, outfile, dbcan_ver = (
31
+ args.hmm_file,
32
+ args.overview_file,
33
+ args.genome_gff,
34
+ args.outfile,
35
+ args.dbcan_ver,
36
+ )
28
37
 
29
38
  hmm_path = Path(hmm_file)
30
39
  overview_path = Path(overview_file)
@@ -38,12 +47,12 @@ def main(hmm_file, overview_file, genome_gff, outfile, dbcan_version):
38
47
  substrates = load_substrates(hmm_path)
39
48
  genome_gff_lines = load_gff(genome_gff)
40
49
 
41
- print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_lines)
50
+ print_gff(overview_file, outfile, dbcan_ver, substrates, genome_gff_lines)
42
51
 
43
52
 
44
53
  def load_gff(gff):
45
54
  genome_gff_lines = dict()
46
- with fileinput.hook_compressed(gff, "rt") as gff:
55
+ with fileinput.hook_compressed(gff, "r", encoding="utf-8") as gff:
47
56
  for line in gff:
48
57
  if line.startswith("##FASTA"):
49
58
  return genome_gff_lines
@@ -72,7 +81,7 @@ def load_gff(gff):
72
81
  def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_lines):
73
82
  with open(outfile, "w") as file_out:
74
83
  file_out.write("##gff-version 3\n")
75
- with fileinput.hook_compressed(overview_file, "rt") as file_in:
84
+ with fileinput.hook_compressed(overview_file, "r", encoding="utf-8") as file_in:
76
85
  for line in file_in:
77
86
  if line.startswith("MGYG") or line.startswith("ERZ"):
78
87
  (
@@ -142,7 +151,7 @@ def print_gff(overview_file, outfile, dbcan_version, substrates, genome_gff_line
142
151
 
143
152
  def load_substrates(hmm_path):
144
153
  substrates = dict()
145
- with fileinput.hook_compressed(hmm_path, "rt") as file_in:
154
+ with fileinput.hook_compressed(hmm_path, "r", encoding="utf-8") as file_in:
146
155
  header = next(file_in)
147
156
  header_fields = header.strip().split("\t")
148
157
  substrate_idx = header_fields.index("Substrate")
@@ -205,7 +214,4 @@ def parse_args():
205
214
 
206
215
 
207
216
  if __name__ == "__main__":
208
- args = parse_args()
209
- main(
210
- args.hmm_file, args.overview_file, args.genome_gff, args.outfile, args.dbcan_ver
211
- )
217
+ main()
@@ -22,7 +22,15 @@ from pathlib import Path
22
22
  logging.basicConfig(level=logging.INFO)
23
23
 
24
24
 
25
- def main(standard_file, substrate_file, outfile, dbcan_version):
25
+ def main():
26
+
27
+ args = parse_args()
28
+ standard_file, substrate_file, outfile, dbcan_ver = (
29
+ args.standard_file,
30
+ args.substrate_file,
31
+ args.outfile,
32
+ args.dbcan_ver,
33
+ )
26
34
  standard_path = Path(standard_file)
27
35
  substrate_path = Path(substrate_file)
28
36
 
@@ -36,12 +44,12 @@ def main(standard_file, substrate_file, outfile, dbcan_version):
36
44
 
37
45
  substrates = load_substrates(substrate_path)
38
46
  cgc_locations = load_cgcs(standard_path)
39
- print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations)
47
+ print_gff(standard_path, outfile, dbcan_ver, substrates, cgc_locations)
40
48
 
41
49
 
42
50
  def load_cgcs(standard_path):
43
51
  cgc_locations = dict()
44
- with fileinput.hook_compressed(standard_path, "rt") as file_in:
52
+ with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
45
53
  for line in file_in:
46
54
  if not line.startswith("CGC#"):
47
55
  cgc, _, contig, _, start, end, _, _ = line.strip().split("\t")
@@ -64,7 +72,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
64
72
  with open(outfile, "w") as file_out:
65
73
  file_out.write("##gff-version 3\n")
66
74
  cgcs_printed = list()
67
- with fileinput.hook_compressed(standard_path, "rt") as file_in:
75
+ with fileinput.hook_compressed(standard_path, "r", encoding="utf-8") as file_in:
68
76
  for line in file_in:
69
77
  if not line.startswith("CGC#"):
70
78
  cgc, gene_type, contig, prot_id, start, end, strand, protein_fam = (
@@ -99,7 +107,7 @@ def print_gff(standard_path, outfile, dbcan_version, substrates, cgc_locations):
99
107
 
100
108
  def load_substrates(substrate_path):
101
109
  substrates = dict()
102
- with fileinput.hook_compressed(substrate_path, "rt") as file_in:
110
+ with fileinput.hook_compressed(substrate_path, "r", encoding="utf-8") as file_in:
103
111
  for line in file_in:
104
112
  if not line.startswith("#"):
105
113
  parts = line.strip().split("\t")
@@ -158,5 +166,4 @@ def parse_args():
158
166
 
159
167
 
160
168
  if __name__ == "__main__":
161
- args = parse_args()
162
- main(args.standard_file, args.substrate_file, args.outfile, args.dbcan_ver)
169
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.0.5
3
+ Version: 1.0.7
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mgnify_pipelines_toolkit"
3
- version = "1.0.5"
3
+ version = "1.0.7"
4
4
  readme = "README.md"
5
5
  license = {text = "Apache Software License 2.0"}
6
6
  authors = [