mgnify-pipelines-toolkit 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

Files changed (46) hide show
  1. mgnify_pipelines_toolkit/analysis/amplicon/amplicon_utils.py +1 -1
  2. mgnify_pipelines_toolkit/analysis/amplicon/are_there_primers.py +1 -1
  3. mgnify_pipelines_toolkit/analysis/amplicon/assess_inflection_point_mcp.py +1 -1
  4. mgnify_pipelines_toolkit/analysis/amplicon/assess_mcp_proportions.py +1 -1
  5. mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +1 -1
  6. mgnify_pipelines_toolkit/analysis/amplicon/find_mcp_inflection_points.py +1 -1
  7. mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +1 -1
  8. mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +1 -1
  9. mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +1 -1
  10. mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +1 -1
  11. mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +1 -1
  12. mgnify_pipelines_toolkit/analysis/amplicon/standard_primer_matching.py +1 -1
  13. mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +1 -1
  14. mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +1 -1
  15. mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +511 -0
  16. mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +1 -1
  17. mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +829 -0
  18. mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +82 -0
  19. mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +170 -0
  20. mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +1 -1
  21. mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +1 -1
  22. mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +240 -0
  23. mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +1 -1
  24. mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +1 -1
  25. mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +1 -1
  26. mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +1 -1
  27. mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +1 -1
  28. mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +243 -0
  29. mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py +1 -1
  30. mgnify_pipelines_toolkit/constants/db_labels.py +1 -1
  31. mgnify_pipelines_toolkit/constants/regex_ambiguous_bases.py +1 -1
  32. mgnify_pipelines_toolkit/constants/regex_fasta_header.py +1 -1
  33. mgnify_pipelines_toolkit/constants/tax_ranks.py +1 -1
  34. mgnify_pipelines_toolkit/constants/thresholds.py +8 -1
  35. mgnify_pipelines_toolkit/constants/var_region_coordinates.py +1 -1
  36. mgnify_pipelines_toolkit/schemas/schemas.py +1 -1
  37. mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +1 -1
  38. mgnify_pipelines_toolkit/utils/get_mpt_version.py +1 -1
  39. {mgnify_pipelines_toolkit-0.2.1.dist-info → mgnify_pipelines_toolkit-1.0.0.dist-info}/METADATA +3 -1
  40. mgnify_pipelines_toolkit-1.0.0.dist-info/RECORD +48 -0
  41. {mgnify_pipelines_toolkit-0.2.1.dist-info → mgnify_pipelines_toolkit-1.0.0.dist-info}/WHEEL +1 -1
  42. {mgnify_pipelines_toolkit-0.2.1.dist-info → mgnify_pipelines_toolkit-1.0.0.dist-info}/entry_points.txt +4 -2
  43. mgnify_pipelines_toolkit/analysis/assembly/cgc_merge.py +0 -424
  44. mgnify_pipelines_toolkit-0.2.1.dist-info/RECORD +0 -43
  45. {mgnify_pipelines_toolkit-0.2.1.dist-info → mgnify_pipelines_toolkit-1.0.0.dist-info}/LICENSE +0 -0
  46. {mgnify_pipelines_toolkit-0.2.1.dist-info → mgnify_pipelines_toolkit-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,243 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+
18
+ import argparse
19
+ from collections import defaultdict
20
+ import json
21
+ import pathlib
22
+ import logging
23
+
24
+ import pandas as pd
25
+ import pyfastx
26
+
27
+ from mgnify_pipelines_toolkit.constants.thresholds import MAJORITY_MARKER_PROPORTION
28
+
29
+ logging.basicConfig(level=logging.DEBUG)
30
+
31
+
32
+ def parse_args():
33
+
34
+ parser = argparse.ArgumentParser()
35
+ parser.add_argument(
36
+ "-i",
37
+ "--input_path",
38
+ required=True,
39
+ type=str,
40
+ help="Input directory containing amplicon analysis pipeline results",
41
+ )
42
+ parser.add_argument(
43
+ "-r",
44
+ "--runs",
45
+ required=True,
46
+ type=str,
47
+ help="CSV file containing successful analyses generated by the pipeline",
48
+ )
49
+ parser.add_argument(
50
+ "-p", "--prefix", required=True, type=str, help="Prefix for the output file"
51
+ )
52
+
53
+ args = parser.parse_args()
54
+
55
+ input_path = args.input_path
56
+ runs = args.runs
57
+ prefix = args.prefix
58
+
59
+ return input_path, runs, prefix
60
+
61
+
62
+ def get_read_count(read_path):
63
+
64
+ fasta = pyfastx.Fasta(read_path, build_index=False)
65
+ read_count = sum(1 for _ in fasta)
66
+
67
+ return read_count
68
+
69
+
70
+ def add_markergene(root_path, run_acc, markergene_dict, markergene):
71
+
72
+ if markergene != "ITS":
73
+
74
+ bacterial_ssu = list(
75
+ pathlib.Path(f"{root_path}/{run_acc}/sequence-categorisation").glob(
76
+ f"*{markergene}*bacteria*"
77
+ )
78
+ )
79
+ archaeal_ssu = list(
80
+ pathlib.Path(f"{root_path}/{run_acc}/sequence-categorisation").glob(
81
+ f"*{markergene}*archaea*"
82
+ )
83
+ )
84
+ eukarya_ssu = list(
85
+ pathlib.Path(f"{root_path}/{run_acc}/sequence-categorisation").glob(
86
+ f"*{markergene}*eukarya*"
87
+ )
88
+ )
89
+
90
+ markergene_dict[markergene] = defaultdict()
91
+ markergene_dict[markergene]["Bacteria"] = defaultdict()
92
+ markergene_dict[markergene]["Archaea"] = defaultdict()
93
+ markergene_dict[markergene]["Eukarya"] = defaultdict()
94
+
95
+ markergene_dict[markergene] = add_read_count_to_markergene(
96
+ markergene_dict[markergene], bacterial_ssu, "Bacteria"
97
+ )
98
+ markergene_dict[markergene] = add_read_count_to_markergene(
99
+ markergene_dict[markergene], archaeal_ssu, "Archaea"
100
+ )
101
+ markergene_dict[markergene] = add_read_count_to_markergene(
102
+ markergene_dict[markergene], eukarya_ssu, "Eukarya"
103
+ )
104
+ else:
105
+ its = list(
106
+ pathlib.Path(f"{root_path}/{run_acc}/sequence-categorisation").glob("*ITS*")
107
+ )
108
+ markergene_dict["ITS"] = defaultdict()
109
+ markergene_dict["ITS"]["Eukarya"] = defaultdict()
110
+ markergene_dict["ITS"] = add_read_count_to_markergene(
111
+ markergene_dict["ITS"], its, "Eukarya"
112
+ )
113
+
114
+ return markergene_dict
115
+
116
+
117
+ def add_read_count_to_markergene(marker_gene_dict, marker, label):
118
+
119
+ if marker:
120
+ read_count = get_read_count(str(marker[0]))
121
+ marker_gene_dict[label]["read_count"] = read_count
122
+ else:
123
+ marker_gene_dict[label]["read_count"] = 0
124
+
125
+ return marker_gene_dict
126
+
127
+
128
+ def main():
129
+
130
+ input_path, runs, prefix = parse_args()
131
+
132
+ root_path = pathlib.Path(input_path)
133
+
134
+ if not root_path.exists():
135
+ logging.error(f"Results path does not exist: {root_path}")
136
+ exit(1)
137
+
138
+ runs_df = pd.read_csv(runs, names=["run", "status"])
139
+
140
+ # Marker gene study summary
141
+ markergene_dict = defaultdict(dict)
142
+ for i in range(0, len(runs_df)):
143
+ run_acc = runs_df.loc[i, "run"]
144
+ markergene_dict[run_acc]["marker_genes"] = defaultdict(dict)
145
+ markergene_dict[run_acc]["marker_genes"] = add_markergene(
146
+ root_path, run_acc, markergene_dict[run_acc]["marker_genes"], "SSU"
147
+ )
148
+ markergene_dict[run_acc]["marker_genes"] = add_markergene(
149
+ root_path, run_acc, markergene_dict[run_acc]["marker_genes"], "LSU"
150
+ )
151
+ markergene_dict[run_acc]["marker_genes"] = add_markergene(
152
+ root_path, run_acc, markergene_dict[run_acc]["marker_genes"], "ITS"
153
+ )
154
+
155
+ total_read_counts = sum(
156
+ [
157
+ markergene["read_count"]
158
+ for markergene in markergene_dict[run_acc]["marker_genes"][
159
+ "SSU"
160
+ ].values()
161
+ ]
162
+ )
163
+ total_read_counts += sum(
164
+ [
165
+ markergene["read_count"]
166
+ for markergene in markergene_dict[run_acc]["marker_genes"][
167
+ "LSU"
168
+ ].values()
169
+ ]
170
+ )
171
+ total_read_counts += sum(
172
+ [
173
+ markergene["read_count"]
174
+ for markergene in markergene_dict[run_acc]["marker_genes"][
175
+ "ITS"
176
+ ].values()
177
+ ]
178
+ )
179
+
180
+ for markergene in markergene_dict[run_acc]["marker_genes"].keys():
181
+ read_count = 0
182
+ for domain in markergene_dict[run_acc]["marker_genes"][markergene].keys():
183
+ read_count += markergene_dict[run_acc]["marker_genes"][markergene][
184
+ domain
185
+ ]["read_count"]
186
+ proportion = read_count / float(total_read_counts)
187
+ markergene_dict[run_acc]["marker_genes"][markergene][domain][
188
+ "majority_marker"
189
+ ] = (proportion >= MAJORITY_MARKER_PROPORTION)
190
+
191
+ if markergene_dict:
192
+ with open(f"{prefix}_markergene_study_summary.json", "w") as fw:
193
+ fw.write(json.dumps(markergene_dict, indent=4))
194
+ else:
195
+ logging.warning(
196
+ "Marker gene data empty for some reason. No summary file created."
197
+ )
198
+
199
+ # Amplified region study summary (only available if ASV results present)
200
+
201
+ ampregion_dict = defaultdict(dict)
202
+ for i in range(0, len(runs_df)):
203
+ run_status = runs_df.loc[i, "status"]
204
+ if run_status == "no_asvs":
205
+ continue
206
+
207
+ run_acc = runs_df.loc[i, "run"]
208
+ ampregion_dict[run_acc]["amplified_regions"] = []
209
+
210
+ amp_regions = sorted(
211
+ list(pathlib.Path(f"{root_path}/{run_acc}/asv").glob("*S-V*/*.tsv"))
212
+ )
213
+
214
+ for amp_region_path in amp_regions:
215
+ amp_dict = defaultdict()
216
+ amp_region = str(amp_region_path).split("/")[-2]
217
+ marker_gene = amp_region.split("-")[0]
218
+ amp_region = "-".join(amp_region.split("-")[1:])
219
+
220
+ amp_region_df = pd.read_csv(amp_region_path, sep="\t")
221
+ asv_count = len(amp_region_df)
222
+ read_count = amp_region_df.loc[:, "count"].sum()
223
+
224
+ amp_dict["marker_gene"] = marker_gene
225
+ amp_dict["amplified_region"] = amp_region
226
+ amp_dict["asv_count"] = int(
227
+ asv_count
228
+ ) # casting needed for JSON serialising
229
+ amp_dict["read_count"] = int(
230
+ read_count
231
+ ) # casting needed for JSON serialising
232
+
233
+ ampregion_dict[run_acc]["amplified_regions"].append(amp_dict)
234
+
235
+ if ampregion_dict:
236
+ with open(f"{prefix}_ampregion_study_summary.json", "w") as fw:
237
+ fw.write(json.dumps(ampregion_dict, indent=4))
238
+ else:
239
+ logging.warning("No amplified region data found. No summary file created.")
240
+
241
+
242
+ if __name__ == "__main__":
243
+ main()
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -25,3 +25,10 @@ MAX_INTERNAL_PRIMER_PROPORTION = 0.2
25
25
 
26
26
  # used by library_strategy_checker in analysis.shared
27
27
  MIN_AMPLICON_STRATEGY_CHECK = 0.30
28
+
29
+ # used by markergene_study_summary in analysis.shared
30
+ MAJORITY_MARKER_PROPORTION = 0.45
31
+
32
+ # used by gff_toolkit in analysis.assembly
33
+ EVALUE_CUTOFF_IPS = 1e-10
34
+ EVALUE_CUTOFF_EGGNOG = 1e-10
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # Copyright 2024 EMBL - European Bioinformatics Institute
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 0.2.1
3
+ Version: 1.0.0
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -19,6 +19,7 @@ Requires-Dist: requests==2.32.3
19
19
  Requires-Dist: click==8.1.7
20
20
  Requires-Dist: pandera==0.22.1
21
21
  Requires-Dist: pyfastx>=2.2.0
22
+ Requires-Dist: intervaltree==3.1.0
22
23
  Provides-Extra: tests
23
24
  Requires-Dist: pytest==7.4.0; extra == "tests"
24
25
  Requires-Dist: pytest-md==0.2.0; extra == "tests"
@@ -30,6 +31,7 @@ Requires-Dist: regex==2023.12.25; extra == "tests"
30
31
  Requires-Dist: requests==2.32.3; extra == "tests"
31
32
  Requires-Dist: click==8.1.7; extra == "tests"
32
33
  Requires-Dist: pandera==0.22.1; extra == "tests"
34
+ Requires-Dist: pyfastx>=2.2.0; extra == "tests"
33
35
  Provides-Extra: dev
34
36
  Requires-Dist: mgnify_pipelines_toolkit[tests]; extra == "dev"
35
37
  Requires-Dist: pre-commit==3.8.0; extra == "dev"
@@ -0,0 +1,48 @@
1
+ mgnify_pipelines_toolkit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mgnify_pipelines_toolkit/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ mgnify_pipelines_toolkit/analysis/amplicon/amplicon_utils.py,sha256=8qmb57E2XBrwqo6YcJYyvPyuaIMu82Ifho7yyyUdnSM,6572
4
+ mgnify_pipelines_toolkit/analysis/amplicon/are_there_primers.py,sha256=2-URxvcl13_8O9bUmoa3-KMPSvdTaLbxfFDY-ycs_4M,5316
5
+ mgnify_pipelines_toolkit/analysis/amplicon/assess_inflection_point_mcp.py,sha256=cRoHPM-VB_L3NWYgkNWuyzqIqhzwHJuU3-6BiiS2lnw,7553
6
+ mgnify_pipelines_toolkit/analysis/amplicon/assess_mcp_proportions.py,sha256=RAdqakH05Qt_LG9jlV7P2M90o5KmlAXmDFQ4X51NIBE,5387
7
+ mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py,sha256=EqfaATb5agvtQOhJqrb2YS6OxtCXvxC-q_05UzvDYug,19926
8
+ mgnify_pipelines_toolkit/analysis/amplicon/find_mcp_inflection_points.py,sha256=vC3nKxggnSljfw4HNkugXbXfGvLx7XnryEE7eEGqfqs,3552
9
+ mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py,sha256=soTewFddtebW-EcejGh9whs3cBLWJrGCYdPc0KukoAw,8756
10
+ mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py,sha256=BLqhflblUegCvuQic16PrFXfIXlFWmGkmWJyl4wJoLQ,5040
11
+ mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py,sha256=Bmc4Yu8inpT6AVTG1zwxp9F9mknIDLY33-UuFdaZuq0,3756
12
+ mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py,sha256=Wu4tRtuRkgd3hoeuwPl_E5ghxIW7e_1vrcvFGWv_U4A,3173
13
+ mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py,sha256=yLpzkRJXAeXRUNgz60zopEwHcdprM2UDjquE-GkrFys,1722
14
+ mgnify_pipelines_toolkit/analysis/amplicon/standard_primer_matching.py,sha256=K6gniytuItq5WzHLi1BsaUCOdP4Zm0_ZzW2_ns7-BTI,11114
15
+ mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py,sha256=HarDM6ay0MbyDfGGjmxP8epjsXciAJHOmqe8G64gLuM,4258
16
+ mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py,sha256=wXrw1B-z4hOu5oA27Vp1WYxGP2Mk6ZY4i_T5jDZgek0,6954
17
+ mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py,sha256=Pq-9RSt3RCxzDMQVW1VHlHF4NtpVwCWFbg2CMkvpZZc,19089
18
+ mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py,sha256=2T4T7aXMGPac-LZUXJF3lOUzZZF50dAKkKTSaO-4idQ,3587
19
+ mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py,sha256=IlkeP4DuN7rXJIHa7o2sONHAXLhV9nGP-5Y1_0u8YQo,31393
20
+ mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py,sha256=8kv_6KWznOVRkeAtghLf4pxKPhAqdn36LOK4MsTz9hU,3282
21
+ mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py,sha256=uUIo97gmzO2zzN-pYF5paIzeHWBsmmjFp7zGAhf4PKY,5021
22
+ mgnify_pipelines_toolkit/analysis/assembly/go_utils.py,sha256=5D-9rB3omTxKwZuS_WjgyjsaaSPNnvZoXeThofWrK7k,5452
23
+ mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py,sha256=07CbJdpo-Gy2aglCFiu2mHbkY18pYMlLFLPnYoD7tyk,5839
24
+ mgnify_pipelines_toolkit/analysis/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py,sha256=hggPqv9QawWAccm5tmru4VF9VnQAHF5LCXnqyLw_BWI,6727
26
+ mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py,sha256=ye0Jka6_lNn4dQGb2QG3YT46y7QK0QvyaIitIaS8JVQ,4026
27
+ mgnify_pipelines_toolkit/analysis/shared/get_subunits.py,sha256=NhX6cSLu9QB9I5JKNUJwJVMmcRcbG-0MJCEgDJ5DxtE,4777
28
+ mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py,sha256=EH5RyzesLqsonnTQbSDs7kAOV6IskS4oyqZYlex1tAY,1934
29
+ mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py,sha256=6Ck2NhwRWw66GctUtKDdPT5fwJhWFR_YOZq-Vxwoa8A,1996
30
+ mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py,sha256=7-U0DN1joVu0ifLOoDUK2Pfqy8rb1RDKT6khVg3jky0,5559
31
+ mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py,sha256=sKAo_rKEyVAZXSaIFMkpSoYZxiWwXMA3XDA6Z-hbHgg,7904
32
+ mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py,sha256=SosRFtW2PWr3dzvLEvYHQFZgGFX0LkQe30sGl3ozThA,13685
33
+ mgnify_pipelines_toolkit/constants/db_labels.py,sha256=omPINMylAjO2PxeFhSk2MbYNcGZH3P82optSlMey3dw,858
34
+ mgnify_pipelines_toolkit/constants/regex_ambiguous_bases.py,sha256=7nEOODQq35y9wx9YnvJuo29oBpwTpXg_kIbf_t7N4TQ,1093
35
+ mgnify_pipelines_toolkit/constants/regex_fasta_header.py,sha256=G-xrc9b8zdmPTaOICD2b3RCVeFAEOVkfRkIfotQ7gek,1193
36
+ mgnify_pipelines_toolkit/constants/tax_ranks.py,sha256=kMq__kOJcbiwsgolkdvb-XLo3WMnJdEXgedjUyMOYjI,1081
37
+ mgnify_pipelines_toolkit/constants/thresholds.py,sha256=guDE7c4KrVJEfg_AcO_cQoJM6LGGaRlmo_U2i8d4N7g,1157
38
+ mgnify_pipelines_toolkit/constants/var_region_coordinates.py,sha256=0bM4MwarFiM5yTcp5AbAmQ0o-q-gWy7kknir9zJ9R0A,1312
39
+ mgnify_pipelines_toolkit/schemas/schemas.py,sha256=Iwps_YtOrIzCuADBgjjJU5VSKb4G0OQZLJfvwRNGN3A,7103
40
+ mgnify_pipelines_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ mgnify_pipelines_toolkit/utils/fasta_to_delimited.py,sha256=lgYIR1S4crURY7C7nFtgE6QMV4u4zCNsUrVkcRnsEEo,3996
42
+ mgnify_pipelines_toolkit/utils/get_mpt_version.py,sha256=aS9bWrC9CP7tpxoEVg6eEYt18-pmjG7fJl5Mchz4YOU,798
43
+ mgnify_pipelines_toolkit-1.0.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
+ mgnify_pipelines_toolkit-1.0.0.dist-info/METADATA,sha256=46IhEb_9fA1DuCMiQDWyc3yv4EcoZ9KhZ77hWBmjHjA,6181
45
+ mgnify_pipelines_toolkit-1.0.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
46
+ mgnify_pipelines_toolkit-1.0.0.dist-info/entry_points.txt,sha256=cTTjlAPQafv9uLrsV4PUGWZgU61qaY8j6uvu0FEpO4A,2309
47
+ mgnify_pipelines_toolkit-1.0.0.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
48
+ mgnify_pipelines_toolkit-1.0.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -3,8 +3,9 @@ add_rhea_chebi_annotation = mgnify_pipelines_toolkit.analysis.assembly.add_rhea_
3
3
  are_there_primers = mgnify_pipelines_toolkit.analysis.amplicon.are_there_primers:main
4
4
  assess_inflection_point_mcp = mgnify_pipelines_toolkit.analysis.amplicon.assess_inflection_point_mcp:main
5
5
  assess_mcp_proportions = mgnify_pipelines_toolkit.analysis.amplicon.assess_mcp_proportions:main
6
- cgc_merge = mgnify_pipelines_toolkit.analysis.assembly.cgc_merge:combine_main
7
6
  classify_var_regions = mgnify_pipelines_toolkit.analysis.amplicon.classify_var_regions:main
7
+ combined_gene_caller_merge = mgnify_pipelines_toolkit.analysis.assembly.combined_gene_caller_merge:combine_main
8
+ dwc_summary_generator = mgnify_pipelines_toolkit.analysis.assembly.dwc_summary_generator:main
8
9
  fasta_to_delimited = mgnify_pipelines_toolkit.utils.fasta_to_delimited:main
9
10
  fastq_suffix_header_check = mgnify_pipelines_toolkit.analysis.shared.fastq_suffix_header_check:main
10
11
  find_mcp_inflection_points = mgnify_pipelines_toolkit.analysis.amplicon.find_mcp_inflection_points:main
@@ -16,9 +17,10 @@ library_strategy_check = mgnify_pipelines_toolkit.analysis.shared.library_strate
16
17
  make_asv_count_table = mgnify_pipelines_toolkit.analysis.amplicon.make_asv_count_table:main
17
18
  mapseq2biom = mgnify_pipelines_toolkit.analysis.shared.mapseq2biom:main
18
19
  mapseq_to_asv_table = mgnify_pipelines_toolkit.analysis.amplicon.mapseq_to_asv_table:main
20
+ markergene_study_summary = mgnify_pipelines_toolkit.analysis.shared.markergene_study_summary:main
19
21
  primer_val_classification = mgnify_pipelines_toolkit.analysis.amplicon.primer_val_classification:main
20
22
  remove_ambiguous_reads = mgnify_pipelines_toolkit.analysis.amplicon.remove_ambiguous_reads:main
21
23
  rev_comp_se_primers = mgnify_pipelines_toolkit.analysis.amplicon.rev_comp_se_primers:main
22
24
  standard_primer_matching = mgnify_pipelines_toolkit.analysis.amplicon.standard_primer_matching:main
23
- study_summary_generator = mgnify_pipelines_toolkit.analysis.shared.study_summary_generator:main
25
+ study_summary_generator = mgnify_pipelines_toolkit.analysis.shared.study_summary_generator:cli
24
26
  summarise_goslims = mgnify_pipelines_toolkit.analysis.assembly.summarise_goslims:main