mgnify-pipelines-toolkit 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright 2025 EMBL - European Bioinformatics Institute
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """
17
+ Script to convert cmscan-table to cmsearch-table (swap columns 1 and 2 with 3 and 4)
18
+
19
+ input example:
20
+ #target name accession query name accession mdl mdl from mdl to seq from seq to strand ..
21
+ #------------------- --------- -------------------- --------- --- -------- -------- -------- -------- ------ ..
22
+ SSU_rRNA_eukarya RF01960 SRR17062740.1 - cm 582 1025 1 452 + ..
23
+
24
+ expected output:
25
+ #------------------- --------- -------------------- --------- --- -------- -------- -------- -------- ------ ..
26
+ #target name accession query name accession mdl mdl from mdl to seq from seq to strand ..
27
+ SRR17062740.1 - SSU_rRNA_eukarya RF01960 cm 582 1025 1 452 + ..
28
+
29
+ """
30
+
31
+ import sys
32
+ import argparse
33
+ import fileinput
34
+ from itertools import accumulate
35
+
36
+
37
+ def parse_args(argv):
38
+ parser = argparse.ArgumentParser(
39
+ description="Convert cmscan table to cmsearch table"
40
+ )
41
+ parser.add_argument(
42
+ "-i", "--input", dest="input", help="Input cmscan file", required=True
43
+ )
44
+ parser.add_argument(
45
+ "-o", "--output", dest="output", help="Output filename", required=True
46
+ )
47
+ return parser.parse_args(argv)
48
+
49
+
50
+ class TableModifier:
51
+ def __init__(
52
+ self,
53
+ input_file: str,
54
+ output_file: str,
55
+ ):
56
+ """
57
+ Output of cmsearch-table has columns separated with different number of spaces (to keep humanreadable format)
58
+ :param input_file: output of cmscan-table
59
+ :param output_file: name of cmsearch table
60
+ """
61
+ self.input_file = input_file
62
+ self.output_file = output_file
63
+
64
+ def modify_table(self):
65
+ with fileinput.hook_compressed(self.input_file, "rt") as file_in, open(
66
+ self.output_file, "w"
67
+ ) as file_out:
68
+ header_written = False
69
+ separator_line, header = "", ""
70
+ for line in file_in:
71
+ if line.startswith("#"):
72
+ if "--" in line:
73
+ separator_line = line.split(" ")
74
+ separator_line[0] = separator_line[0].replace("#", "-")
75
+ lengths = [0] + list(
76
+ accumulate(len(s) + 1 for s in separator_line)
77
+ )
78
+ else:
79
+ header = line
80
+ else:
81
+ coord_to_keep = len(" ".join(separator_line[0:4]))
82
+ if not header_written:
83
+ file_out.write(header)
84
+ file_out.write(
85
+ " ".join(
86
+ [
87
+ "#" + separator_line[2][1:],
88
+ separator_line[3],
89
+ separator_line[0].replace("#", ""),
90
+ separator_line[1],
91
+ ]
92
+ + separator_line[4:]
93
+ )
94
+ )
95
+ header_written = True
96
+ new_line = (
97
+ line[lengths[2] : lengths[3]]
98
+ + line[lengths[3] : lengths[4]]
99
+ + line[lengths[0] : lengths[1]]
100
+ + line[lengths[1] : lengths[2]]
101
+ + line[coord_to_keep + 1 :]
102
+ )
103
+ file_out.write(new_line)
104
+
105
+
106
+ def main():
107
+ args = parse_args(sys.argv[1:])
108
+ table_modifier = TableModifier(
109
+ input_file=args.input,
110
+ output_file=args.output,
111
+ )
112
+ table_modifier.modify_table()
113
+
114
+
115
+ if __name__ == "__main__":
116
+ main()
@@ -14,53 +14,65 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
+
17
18
  import argparse
18
19
  import os
19
20
  from Bio import SeqIO
20
-
21
-
22
- SSU = "SSU_rRNA"
23
- LSU = "LSU_rRNA"
24
- Seq5S = "mtPerm-5S"
25
- Seq5_8S = "5_8S_rRNA"
26
-
27
- SSU_rRNA_archaea = "SSU_rRNA_archaea"
28
- SSU_rRNA_bacteria = "SSU_rRNA_bacteria"
29
- SSU_rRNA_eukarya = "SSU_rRNA_eukarya"
30
- SSU_rRNA_microsporidia = "SSU_rRNA_microsporidia"
31
-
32
- LSU_rRNA_archaea = "LSU_rRNA_archaea"
33
- LSU_rRNA_bacteria = "LSU_rRNA_bacteria"
34
- LSU_rRNA_eukarya = "LSU_rRNA_eukarya"
35
-
36
-
37
- def set_model_names(prefix, name, directory):
21
+ from mgnify_pipelines_toolkit.constants.ncrna import (
22
+ DIRECTORY_SEQ_CAT,
23
+ SSU,
24
+ LSU,
25
+ Seq5S,
26
+ Seq5_8S,
27
+ SSU_rRNA_archaea,
28
+ SSU_rRNA_bacteria,
29
+ SSU_rRNA_eukarya,
30
+ SSU_rRNA_microsporidia,
31
+ LSU_rRNA_archaea,
32
+ LSU_rRNA_bacteria,
33
+ LSU_rRNA_eukarya,
34
+ NON_CODING_RNA,
35
+ SSU_MODELS,
36
+ LSU_MODELS,
37
+ RFAM_MODELS,
38
+ )
39
+
40
+
41
+ def set_model_names(prefix, name, directory, separate_subunits):
38
42
  pattern_dict = {}
39
43
  pattern_dict[SSU] = os.path.join(directory, f"{name}_SSU.fasta")
40
- pattern_dict[SSU_rRNA_archaea] = os.path.join(
41
- directory, f"{prefix}{name}_{SSU_rRNA_archaea}.RF01959.fa"
42
- )
43
- pattern_dict[SSU_rRNA_bacteria] = os.path.join(
44
- directory, f"{prefix}{name}_{SSU_rRNA_bacteria}.RF00177.fa"
45
- )
46
- pattern_dict[SSU_rRNA_eukarya] = os.path.join(
47
- directory, f"{prefix}{name}_{SSU_rRNA_eukarya}.RF01960.fa"
48
- )
49
- pattern_dict[SSU_rRNA_microsporidia] = os.path.join(
50
- directory, f"{prefix}{name}_{SSU_rRNA_microsporidia}.RF02542.fa"
51
- )
52
44
  pattern_dict[LSU] = os.path.join(directory, f"{name}_LSU.fasta")
53
- pattern_dict[LSU_rRNA_archaea] = os.path.join(
54
- directory, f"{prefix}{name}_{LSU_rRNA_archaea}.RF02540.fa"
55
- )
56
- pattern_dict[LSU_rRNA_bacteria] = os.path.join(
57
- directory, f"{prefix}{name}_{LSU_rRNA_bacteria}.RF02541.fa"
58
- )
59
- pattern_dict[LSU_rRNA_eukarya] = os.path.join(
60
- directory, f"{prefix}{name}_{LSU_rRNA_eukarya}.RF02543.fa"
61
- )
62
- pattern_dict[Seq5S] = os.path.join(directory, f"{name}_5S.fa")
63
- pattern_dict[Seq5_8S] = os.path.join(directory, f"{name}_5_8S.fa")
45
+ pattern_dict[Seq5S] = os.path.join(directory, f"{name}_5S.fasta")
46
+ pattern_dict[Seq5_8S] = os.path.join(directory, f"{name}_5_8S.fasta")
47
+ if separate_subunits:
48
+ pattern_dict[SSU_rRNA_archaea] = os.path.join(
49
+ directory,
50
+ f"{prefix}{name}_{SSU_rRNA_archaea}.{RFAM_MODELS[SSU_rRNA_archaea]}.fasta",
51
+ )
52
+ pattern_dict[SSU_rRNA_bacteria] = os.path.join(
53
+ directory,
54
+ f"{prefix}{name}_{SSU_rRNA_bacteria}.{RFAM_MODELS[SSU_rRNA_bacteria]}.fasta",
55
+ )
56
+ pattern_dict[SSU_rRNA_eukarya] = os.path.join(
57
+ directory,
58
+ f"{prefix}{name}_{SSU_rRNA_eukarya}.{RFAM_MODELS[SSU_rRNA_eukarya]}.fasta",
59
+ )
60
+ pattern_dict[SSU_rRNA_microsporidia] = os.path.join(
61
+ directory,
62
+ f"{prefix}{name}_{SSU_rRNA_microsporidia}.{RFAM_MODELS[SSU_rRNA_microsporidia]}.fasta",
63
+ )
64
+ pattern_dict[LSU_rRNA_archaea] = os.path.join(
65
+ directory,
66
+ f"{prefix}{name}_{LSU_rRNA_archaea}.{RFAM_MODELS[LSU_rRNA_archaea]}.fasta",
67
+ )
68
+ pattern_dict[LSU_rRNA_bacteria] = os.path.join(
69
+ directory,
70
+ f"{prefix}{name}_{LSU_rRNA_bacteria}.{RFAM_MODELS[LSU_rRNA_bacteria]}.fasta",
71
+ )
72
+ pattern_dict[LSU_rRNA_eukarya] = os.path.join(
73
+ directory,
74
+ f"{prefix}{name}_{LSU_rRNA_eukarya}.{RFAM_MODELS[LSU_rRNA_eukarya]}.fasta",
75
+ )
64
76
  return pattern_dict
65
77
 
66
78
 
@@ -75,65 +87,55 @@ def main():
75
87
  "-p", "--prefix", dest="prefix", help="prefix for models", required=False
76
88
  )
77
89
  parser.add_argument("-n", "--name", dest="name", help="Accession", required=True)
90
+ parser.add_argument(
91
+ "--separate-subunits-by-models",
92
+ action="store_true",
93
+ help="Create separate files for each kingdon example: sample_SSU_rRNA_eukarya.RF01960.fasta",
94
+ )
78
95
 
79
96
  args = parser.parse_args()
80
97
  prefix = args.prefix if args.prefix else ""
81
98
  name = args.name if args.name else "accession"
82
99
 
83
- directory = "sequence-categorisation"
100
+ directory = DIRECTORY_SEQ_CAT
84
101
  if not os.path.exists(directory):
85
102
  os.makedirs(directory)
86
- directory_ncrna = os.path.join("sequence-categorisation", "ncRNA")
87
- if not os.path.exists(directory_ncrna):
88
- os.makedirs(directory_ncrna)
89
103
 
90
104
  print("Start fasta mode")
91
- pattern_dict = set_model_names(prefix, name, directory)
92
- coding_rna = [
93
- SSU_rRNA_archaea,
94
- SSU_rRNA_bacteria,
95
- SSU_rRNA_eukarya,
96
- SSU_rRNA_microsporidia,
97
- LSU_rRNA_archaea,
98
- LSU_rRNA_bacteria,
99
- LSU_rRNA_eukarya,
100
- Seq5S,
101
- Seq5_8S,
102
- ]
105
+ pattern_dict = set_model_names(
106
+ prefix, name, directory, args.separate_subunits_by_models
107
+ )
108
+
103
109
  open_files = {}
104
110
  for record in SeqIO.parse(args.input, "fasta"):
105
111
  model = "-".join(record.id.split("/")[0].split("-")[-1:])
106
- if model in coding_rna:
107
- filename = pattern_dict[model]
108
- else:
109
- filename = os.path.join(directory_ncrna, f"{prefix}{name}_{model}.fasta")
110
- if model not in open_files:
111
- file_out = open(filename, "w")
112
- open_files[model] = file_out
113
- SeqIO.write(record, open_files[model], "fasta")
114
-
115
- if model in (
116
- SSU_rRNA_archaea,
117
- SSU_rRNA_bacteria,
118
- SSU_rRNA_eukarya,
119
- SSU_rRNA_microsporidia,
120
- ):
112
+ if model in SSU_MODELS:
121
113
  if SSU not in open_files:
122
114
  file_out = open(pattern_dict[SSU], "w")
123
115
  open_files[SSU] = file_out
124
116
  SeqIO.write(record, open_files[SSU], "fasta")
125
- if model in (LSU_rRNA_archaea, LSU_rRNA_bacteria, LSU_rRNA_eukarya):
117
+ elif model in LSU_MODELS:
126
118
  if LSU not in open_files:
127
119
  file_out = open(pattern_dict[LSU], "w")
128
120
  open_files[LSU] = file_out
129
121
  SeqIO.write(record, open_files[LSU], "fasta")
130
122
 
123
+ if model in NON_CODING_RNA:
124
+ if model in pattern_dict:
125
+ filename = pattern_dict[model]
126
+ else:
127
+ filename = None
128
+ else:
129
+ filename = os.path.join(directory, f"{name}_other_ncRNA.fasta")
130
+ if filename:
131
+ if model not in open_files:
132
+ file_out = open(filename, "w")
133
+ open_files[model] = file_out
134
+ SeqIO.write(record, open_files[model], "fasta")
135
+
131
136
  for item in open_files:
132
137
  open_files[item].close()
133
138
 
134
- if len(os.listdir(directory_ncrna)) == 0:
135
- os.rmdir(directory_ncrna)
136
-
137
139
 
138
140
  if __name__ == "__main__":
139
141
  main()
@@ -1,5 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
+ import shutil
4
+ from shutil import SameFileError
3
5
 
4
6
  # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
7
  #
@@ -33,6 +35,7 @@ from mgnify_pipelines_toolkit.schemas.schemas import (
33
35
  AmpliconNonINSDCPassedRunsSchema,
34
36
  TaxonSchema,
35
37
  PR2TaxonSchema,
38
+ validate_dataframe,
36
39
  )
37
40
 
38
41
  logging.basicConfig(level=logging.DEBUG)
@@ -127,9 +130,9 @@ def parse_one_tax_file(
127
130
  # Two different schemas used for validation depending on the database
128
131
  # because PR2 schema has different taxonomic ranks than the standard
129
132
  if len(long_tax_ranks) == 8:
130
- TaxonSchema(res_df)
133
+ validate_dataframe(res_df, TaxonSchema, str(tax_file))
131
134
  elif len(long_tax_ranks) == 9:
132
- PR2TaxonSchema(res_df)
135
+ validate_dataframe(res_df, PR2TaxonSchema, str(tax_file))
133
136
 
134
137
  res_df["full_taxon"] = res_df.iloc[:, 1:].apply(
135
138
  lambda x: ";".join(x).strip(";"), axis=1
@@ -205,9 +208,7 @@ def generate_db_summary(
205
208
  amp_region_dict[amp_region].append(amp_region_df)
206
209
 
207
210
  for amp_region, amp_region_dfs in amp_region_dict.items():
208
- if (
209
- len(amp_region_dfs) > 1
210
- ): # Need at least two analyses with this amp_region to bother with the summary
211
+ if amp_region_dfs:
211
212
  amp_res_df = amp_region_dfs[0]
212
213
  for amp_df in amp_region_dfs[1:]:
213
214
  amp_res_df = amp_res_df.join(amp_df, how="outer")
@@ -319,9 +320,7 @@ def summarise_analyses(
319
320
  if tax_file:
320
321
  tax_files[run_acc] = tax_file
321
322
 
322
- if (
323
- len(tax_files) > 1
324
- ): # If at least two analyses have results from the current DB, generate a study-level summary for it
323
+ if tax_files:
325
324
  generate_db_summary(db_label, tax_files, output_prefix)
326
325
 
327
326
 
@@ -356,12 +355,12 @@ def merge_summaries(analyses_dir: str, output_prefix: str) -> None:
356
355
  :type output_prefix: str
357
356
  """
358
357
 
359
- # TODO: The way we grab all the summaries might change depending on how the prefect side does things
360
358
  all_study_summaries = glob.glob(f"{analyses_dir}/*_study_summary.tsv")
361
359
 
362
360
  summaries_dict = organise_study_summaries(all_study_summaries)
363
361
 
364
362
  for db_label, summaries in summaries_dict.items():
363
+ merged_summary_name = f"{output_prefix}_{db_label}_study_summary.tsv"
365
364
  if len(summaries) > 1:
366
365
  res_df = pd.read_csv(summaries[0], sep="\t", index_col=0)
367
366
  for summary in summaries[1:]:
@@ -372,10 +371,18 @@ def merge_summaries(analyses_dir: str, output_prefix: str) -> None:
372
371
 
373
372
  res_df = res_df.reindex(sorted(res_df.columns), axis=1)
374
373
  res_df.to_csv(
375
- f"{output_prefix}_{db_label}_study_summary.tsv",
374
+ merged_summary_name,
376
375
  sep="\t",
377
376
  index_label="taxonomy",
378
377
  )
378
+ elif len(summaries) == 1:
379
+ logging.info(
380
+ f"Only one summary ({summaries[0]}) so will use that as {merged_summary_name}"
381
+ )
382
+ try:
383
+ shutil.copyfile(summaries[0], merged_summary_name)
384
+ except SameFileError:
385
+ pass
379
386
 
380
387
 
381
388
  if __name__ == "__main__":
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ DIRECTORY_SEQ_CAT = "sequence-categorisation"
18
+
19
+ SSU = "SSU_rRNA"
20
+ LSU = "LSU_rRNA"
21
+ Seq5S = "mtPerm-5S"
22
+ Seq5_8S = "5_8S_rRNA"
23
+
24
+ SSU_rRNA_archaea = "SSU_rRNA_archaea"
25
+ SSU_rRNA_bacteria = "SSU_rRNA_bacteria"
26
+ SSU_rRNA_eukarya = "SSU_rRNA_eukarya"
27
+ SSU_rRNA_microsporidia = "SSU_rRNA_microsporidia"
28
+
29
+ LSU_rRNA_archaea = "LSU_rRNA_archaea"
30
+ LSU_rRNA_bacteria = "LSU_rRNA_bacteria"
31
+ LSU_rRNA_eukarya = "LSU_rRNA_eukarya"
32
+
33
+ NON_CODING_RNA = [
34
+ SSU_rRNA_archaea,
35
+ SSU_rRNA_bacteria,
36
+ SSU_rRNA_eukarya,
37
+ SSU_rRNA_microsporidia,
38
+ LSU_rRNA_archaea,
39
+ LSU_rRNA_bacteria,
40
+ LSU_rRNA_eukarya,
41
+ Seq5S,
42
+ Seq5_8S,
43
+ ]
44
+
45
+ SSU_MODELS = [
46
+ SSU_rRNA_archaea,
47
+ SSU_rRNA_bacteria,
48
+ SSU_rRNA_eukarya,
49
+ SSU_rRNA_microsporidia,
50
+ ]
51
+
52
+ LSU_MODELS = [LSU_rRNA_archaea, LSU_rRNA_bacteria, LSU_rRNA_eukarya]
53
+
54
+ RFAM_MODELS = {
55
+ SSU_rRNA_archaea: "RF01959",
56
+ SSU_rRNA_bacteria: "RF00177",
57
+ SSU_rRNA_eukarya: "RF01960",
58
+ SSU_rRNA_microsporidia: "RF02542",
59
+ LSU_rRNA_archaea: "RF02540",
60
+ LSU_rRNA_bacteria: "RF02541",
61
+ LSU_rRNA_eukarya: "RF02543",
62
+ }
@@ -1,6 +1,5 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
-
4
3
  # Copyright 2024-2025 EMBL - European Bioinformatics Institute
5
4
  #
6
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,11 +13,15 @@
14
13
  # See the License for the specific language governing permissions and
15
14
  # limitations under the License.
16
15
 
16
+ import logging
17
17
  import re
18
18
 
19
19
  from enum import Enum
20
- from typing import ClassVar, Optional
20
+ from typing import ClassVar, Optional, Type
21
+
22
+ import pandas as pd
21
23
  import pandera as pa
24
+ from pandera.typing.common import DataFrameBase
22
25
 
23
26
  from pydantic import (
24
27
  Field,
@@ -215,3 +218,18 @@ class PR2TaxonSchema(pa.DataFrameModel):
215
218
 
216
219
  dtype = PydanticModel(PR2TaxonRecord)
217
220
  coerce = True
221
+
222
+
223
+ def validate_dataframe(
224
+ df: pd.DataFrame, schema: Type[pa.DataFrameModel], df_metadata: str
225
+ ) -> DataFrameBase:
226
+ """
227
+ Validate a pandas dataframe using a pandera schema.
228
+ df_metadata will be shown in logs on failure: example, the TSV filename from which the df was read.
229
+ """
230
+ try:
231
+ dfs = schema.validate(df, lazy=True)
232
+ except pa.errors.SchemaErrors as e:
233
+ logging.error(f"{schema.__name__} validation failure for {df_metadata}")
234
+ raise e
235
+ return dfs
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -38,6 +38,7 @@ Requires-Dist: pre-commit==3.8.0; extra == "dev"
38
38
  Requires-Dist: black==24.8.0; extra == "dev"
39
39
  Requires-Dist: flake8==7.1.1; extra == "dev"
40
40
  Requires-Dist: pep8-naming==0.14.1; extra == "dev"
41
+ Dynamic: license-file
41
42
 
42
43
  # mgnify-pipelines-toolkit
43
44
 
@@ -22,27 +22,29 @@ mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py,sha256=uUIo97gmzO2zzN-
22
22
  mgnify_pipelines_toolkit/analysis/assembly/go_utils.py,sha256=5D-9rB3omTxKwZuS_WjgyjsaaSPNnvZoXeThofWrK7k,5452
23
23
  mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py,sha256=07CbJdpo-Gy2aglCFiu2mHbkY18pYMlLFLPnYoD7tyk,5839
24
24
  mgnify_pipelines_toolkit/analysis/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py,sha256=0Ot1j4LPsEPyPbySSAh6n9s5Dilm_8_M9YQvTnQ-1PQ,4415
25
26
  mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py,sha256=hggPqv9QawWAccm5tmru4VF9VnQAHF5LCXnqyLw_BWI,6727
26
27
  mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py,sha256=ye0Jka6_lNn4dQGb2QG3YT46y7QK0QvyaIitIaS8JVQ,4026
27
- mgnify_pipelines_toolkit/analysis/shared/get_subunits.py,sha256=NhX6cSLu9QB9I5JKNUJwJVMmcRcbG-0MJCEgDJ5DxtE,4777
28
+ mgnify_pipelines_toolkit/analysis/shared/get_subunits.py,sha256=j_UN3hItF7KhJrhGrSqjvZMg-ZwKAMc2sc0vHdJzjQw,4908
28
29
  mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py,sha256=EH5RyzesLqsonnTQbSDs7kAOV6IskS4oyqZYlex1tAY,1934
29
30
  mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py,sha256=6Ck2NhwRWw66GctUtKDdPT5fwJhWFR_YOZq-Vxwoa8A,1996
30
31
  mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py,sha256=7-U0DN1joVu0ifLOoDUK2Pfqy8rb1RDKT6khVg3jky0,5559
31
32
  mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py,sha256=sKAo_rKEyVAZXSaIFMkpSoYZxiWwXMA3XDA6Z-hbHgg,7904
32
- mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py,sha256=SosRFtW2PWr3dzvLEvYHQFZgGFX0LkQe30sGl3ozThA,13685
33
+ mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py,sha256=OOqKaQmKGAya6_BZgfcWBZSVlmZ918PQTVMv6KwGIns,13827
33
34
  mgnify_pipelines_toolkit/constants/db_labels.py,sha256=omPINMylAjO2PxeFhSk2MbYNcGZH3P82optSlMey3dw,858
35
+ mgnify_pipelines_toolkit/constants/ncrna.py,sha256=RCpV2EG1gwm_5v3yJH0SFB-T3_z0aAz0X-02b5LTHGk,1707
34
36
  mgnify_pipelines_toolkit/constants/regex_ambiguous_bases.py,sha256=7nEOODQq35y9wx9YnvJuo29oBpwTpXg_kIbf_t7N4TQ,1093
35
37
  mgnify_pipelines_toolkit/constants/regex_fasta_header.py,sha256=G-xrc9b8zdmPTaOICD2b3RCVeFAEOVkfRkIfotQ7gek,1193
36
38
  mgnify_pipelines_toolkit/constants/tax_ranks.py,sha256=kMq__kOJcbiwsgolkdvb-XLo3WMnJdEXgedjUyMOYjI,1081
37
39
  mgnify_pipelines_toolkit/constants/thresholds.py,sha256=guDE7c4KrVJEfg_AcO_cQoJM6LGGaRlmo_U2i8d4N7g,1157
38
40
  mgnify_pipelines_toolkit/constants/var_region_coordinates.py,sha256=0bM4MwarFiM5yTcp5AbAmQ0o-q-gWy7kknir9zJ9R0A,1312
39
- mgnify_pipelines_toolkit/schemas/schemas.py,sha256=Iwps_YtOrIzCuADBgjjJU5VSKb4G0OQZLJfvwRNGN3A,7103
41
+ mgnify_pipelines_toolkit/schemas/schemas.py,sha256=pnH8LUH8i2ACNvFNWyG-n-eIHZcI5O9UDYulkh43mec,7692
40
42
  mgnify_pipelines_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
43
  mgnify_pipelines_toolkit/utils/fasta_to_delimited.py,sha256=lgYIR1S4crURY7C7nFtgE6QMV4u4zCNsUrVkcRnsEEo,3996
42
44
  mgnify_pipelines_toolkit/utils/get_mpt_version.py,sha256=aS9bWrC9CP7tpxoEVg6eEYt18-pmjG7fJl5Mchz4YOU,798
43
- mgnify_pipelines_toolkit-1.0.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
- mgnify_pipelines_toolkit-1.0.0.dist-info/METADATA,sha256=46IhEb_9fA1DuCMiQDWyc3yv4EcoZ9KhZ77hWBmjHjA,6181
45
- mgnify_pipelines_toolkit-1.0.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
46
- mgnify_pipelines_toolkit-1.0.0.dist-info/entry_points.txt,sha256=cTTjlAPQafv9uLrsV4PUGWZgU61qaY8j6uvu0FEpO4A,2309
47
- mgnify_pipelines_toolkit-1.0.0.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
48
- mgnify_pipelines_toolkit-1.0.0.dist-info/RECORD,,
45
+ mgnify_pipelines_toolkit-1.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
46
+ mgnify_pipelines_toolkit-1.0.2.dist-info/METADATA,sha256=YWs_6ycCLrZFbrsScKxtMb-_xhj2lZuxnaAmOBOGPyk,6203
47
+ mgnify_pipelines_toolkit-1.0.2.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
48
+ mgnify_pipelines_toolkit-1.0.2.dist-info/entry_points.txt,sha256=v0fD1Qi0NUGgkBinQp34VLxqwMvXOkof7BvuX9B5Q-o,2417
49
+ mgnify_pipelines_toolkit-1.0.2.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
50
+ mgnify_pipelines_toolkit-1.0.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (77.0.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -4,7 +4,8 @@ are_there_primers = mgnify_pipelines_toolkit.analysis.amplicon.are_there_primers
4
4
  assess_inflection_point_mcp = mgnify_pipelines_toolkit.analysis.amplicon.assess_inflection_point_mcp:main
5
5
  assess_mcp_proportions = mgnify_pipelines_toolkit.analysis.amplicon.assess_mcp_proportions:main
6
6
  classify_var_regions = mgnify_pipelines_toolkit.analysis.amplicon.classify_var_regions:main
7
- combined_gene_caller_merge = mgnify_pipelines_toolkit.analysis.assembly.combined_gene_caller_merge:combine_main
7
+ combined_gene_caller_merge = mgnify_pipelines_toolkit.analysis.assembly.combined_gene_caller_merge:main
8
+ convert_cmscan_to_cmsearch_tblout = mgnify_pipelines_toolkit.analysis.shared.convert_cmscan_to_cmsearch_tblout:main
8
9
  dwc_summary_generator = mgnify_pipelines_toolkit.analysis.assembly.dwc_summary_generator:main
9
10
  fasta_to_delimited = mgnify_pipelines_toolkit.utils.fasta_to_delimited:main
10
11
  fastq_suffix_header_check = mgnify_pipelines_toolkit.analysis.shared.fastq_suffix_header_check:main