mgnify-pipelines-toolkit 1.2.10__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.
- mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +2 -1
- mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py +30 -69
- mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py +29 -60
- mgnify_pipelines_toolkit/analysis/rawreads/study_summary_generator.py +33 -81
- mgnify_pipelines_toolkit/schemas/dataframes.py +325 -0
- {mgnify_pipelines_toolkit-1.2.10.dist-info → mgnify_pipelines_toolkit-1.3.0.dist-info}/METADATA +25 -12
- {mgnify_pipelines_toolkit-1.2.10.dist-info → mgnify_pipelines_toolkit-1.3.0.dist-info}/RECORD +11 -11
- mgnify_pipelines_toolkit/schemas/schemas.py +0 -738
- {mgnify_pipelines_toolkit-1.2.10.dist-info → mgnify_pipelines_toolkit-1.3.0.dist-info}/WHEEL +0 -0
- {mgnify_pipelines_toolkit-1.2.10.dist-info → mgnify_pipelines_toolkit-1.3.0.dist-info}/entry_points.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.10.dist-info → mgnify_pipelines_toolkit-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {mgnify_pipelines_toolkit-1.2.10.dist-info → mgnify_pipelines_toolkit-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright 2024-2025 EMBL - European Bioinformatics Institute
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Type
|
|
18
|
+
|
|
19
|
+
import pandas as pd
|
|
20
|
+
import pandera as pa
|
|
21
|
+
from pandera.typing import Series
|
|
22
|
+
from pandera.typing.common import DataFrameBase
|
|
23
|
+
|
|
24
|
+
from mgnify_pipelines_toolkit.constants.tax_ranks import (
|
|
25
|
+
SHORT_MOTUS_TAX_RANKS,
|
|
26
|
+
SHORT_PR2_TAX_RANKS,
|
|
27
|
+
SHORT_TAX_RANKS,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CoerceBaseDataFrameSchema(pa.DataFrameModel):
|
|
32
|
+
"""Base schema for all dataframe models.
|
|
33
|
+
|
|
34
|
+
Provides common configuration for automatic type coercion.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
class Config:
|
|
38
|
+
"""Pandera configuration.
|
|
39
|
+
|
|
40
|
+
coerce: Automatically convert column dtypes to match schema
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
coerce = True
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# This is the schema for the whole DF
|
|
47
|
+
class AmpliconPassedRunsSchema(CoerceBaseDataFrameSchema):
|
|
48
|
+
"""Class modelling a Pandera dataframe schema for amplicon passed runs.
|
|
49
|
+
Validates the generated dataframe when read by pandas.read_csv.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
run: Series[str] = pa.Field(str_matches=r"(E|D|S)RR[0-9]{6,}", unique=True)
|
|
53
|
+
status: Series[str] = pa.Field(isin=["all_results", "no_asvs", "dada2_stats_fail"])
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CompletedAnalysisSchema(CoerceBaseDataFrameSchema):
|
|
57
|
+
"""Class modelling a Pandera dataframe schema for completed assemblies.
|
|
58
|
+
Validates the generated dataframe when read by pandas.read_csv.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
assembly: Series[str] = pa.Field(str_matches=r"ERZ\d{6,}", unique=True)
|
|
62
|
+
status: Series[str] = pa.Field(isin=["success"])
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class BaseSummarySchema(CoerceBaseDataFrameSchema):
|
|
66
|
+
"""Base schema for summary files.
|
|
67
|
+
|
|
68
|
+
All summary schemas inherit from this base and use coerce=True by default.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class InterProSummarySchema(BaseSummarySchema):
|
|
73
|
+
"""Schema for InterPro summary file validation."""
|
|
74
|
+
|
|
75
|
+
count: Series[int] = pa.Field(ge=0)
|
|
76
|
+
interpro_accession: Series[str] = pa.Field(str_matches=r"IPR\d{6}", unique=True)
|
|
77
|
+
description: Series[str]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class GOSummarySchema(BaseSummarySchema):
|
|
81
|
+
"""Schema for GO or GOslim summary file validation."""
|
|
82
|
+
|
|
83
|
+
go: Series[str] = pa.Field(str_matches=r"GO:\d{7}", unique=True)
|
|
84
|
+
term: Series[str]
|
|
85
|
+
category: Series[str]
|
|
86
|
+
count: Series[int] = pa.Field(ge=0)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class SanntisSummarySchema(BaseSummarySchema):
|
|
90
|
+
"""Schema for Sanntis summary file validation."""
|
|
91
|
+
|
|
92
|
+
nearest_mibig: Series[str] = pa.Field(str_matches=r"BGC\d{7}", unique=True)
|
|
93
|
+
nearest_mibig_class: Series[str]
|
|
94
|
+
description: Series[str]
|
|
95
|
+
count: Series[int] = pa.Field(ge=0)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class AntismashSummarySchema(BaseSummarySchema):
|
|
99
|
+
"""Schema for Antismash summary file validation."""
|
|
100
|
+
|
|
101
|
+
label: Series[str] = pa.Field(unique=True)
|
|
102
|
+
description: Series[str]
|
|
103
|
+
count: Series[int] = pa.Field(ge=0)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class KOSummarySchema(BaseSummarySchema):
|
|
107
|
+
"""Schema for KEGG Orthology summary file validation."""
|
|
108
|
+
|
|
109
|
+
ko: Series[str] = pa.Field(str_matches=r"K\d{5,}", unique=True)
|
|
110
|
+
description: Series[str]
|
|
111
|
+
count: Series[int] = pa.Field(ge=0)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class PFAMSummarySchema(BaseSummarySchema):
|
|
115
|
+
"""Schema for PFAM summary file validation."""
|
|
116
|
+
|
|
117
|
+
pfam: Series[str] = pa.Field(str_matches=r"PF\d{5}", unique=True)
|
|
118
|
+
description: Series[str]
|
|
119
|
+
count: Series[int] = pa.Field(ge=0)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class KEGGModulesSummarySchema(BaseSummarySchema):
|
|
123
|
+
"""Schema for KEGG Modules summary file validation."""
|
|
124
|
+
|
|
125
|
+
module_accession: Series[str] = pa.Field(str_matches=r"M\d{5}", unique=True)
|
|
126
|
+
completeness: Series[float] = pa.Field(ge=0)
|
|
127
|
+
pathway_name: Series[str]
|
|
128
|
+
pathway_class: Series[str]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class GOStudySummarySchema(BaseSummarySchema):
|
|
132
|
+
GO: Series[str] = pa.Field(str_matches=r"^GO:\d{7}$", unique=True)
|
|
133
|
+
description: Series[str]
|
|
134
|
+
category: Series[str]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class InterProStudySummarySchema(BaseSummarySchema):
|
|
138
|
+
IPR: Series[str] = pa.Field(str_matches=r"^IPR\d{6}$", unique=True)
|
|
139
|
+
description: Series[str]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class AntismashStudySummarySchema(BaseSummarySchema):
|
|
143
|
+
label: Series[str] = pa.Field(unique=True)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class SanntisStudySummarySchema(BaseSummarySchema):
|
|
147
|
+
# TODO: limit mibig to the avaiable mibig categories
|
|
148
|
+
nearest_mibig: Series[str] = pa.Field(unique=True)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class KOStudySummarySchema(BaseSummarySchema):
|
|
152
|
+
KO: Series[str] = pa.Field(unique=True)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class PFAMStudySummarySchema(BaseSummarySchema):
|
|
156
|
+
PFAM: Series[str] = pa.Field(unique=True)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class KEGGModulesStudySummarySchema(BaseSummarySchema):
|
|
160
|
+
module_accession: Series[str] = pa.Field(unique=True)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class TaxonomyStudySummarySchema(BaseSummarySchema):
|
|
164
|
+
pass
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class AmpliconNonINSDCPassedRunsSchema(CoerceBaseDataFrameSchema):
|
|
168
|
+
"""Class modelling the same dataframe schema as the preceding one, except with no INSDC validation."""
|
|
169
|
+
|
|
170
|
+
run: Series[str]
|
|
171
|
+
status: Series[str] = pa.Field(isin=["all_results", "no_asvs"])
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# This is the schema for the whole DF
|
|
175
|
+
class TaxonSchema(CoerceBaseDataFrameSchema):
|
|
176
|
+
"""Class modelling a Pandera dataframe schema for taxonomy records.
|
|
177
|
+
Validates the generated dataframe when read by pandas.read_csv.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
Superkingdom: Series[str] = pa.Field(nullable=True)
|
|
181
|
+
Kingdom: Series[str] = pa.Field(nullable=True)
|
|
182
|
+
Phylum: Series[str] = pa.Field(nullable=True)
|
|
183
|
+
Class: Series[str] = pa.Field(nullable=True)
|
|
184
|
+
Order: Series[str] = pa.Field(nullable=True)
|
|
185
|
+
Family: Series[str] = pa.Field(nullable=True)
|
|
186
|
+
Genus: Series[str] = pa.Field(nullable=True)
|
|
187
|
+
Species: Series[str] = pa.Field(nullable=True)
|
|
188
|
+
Count: Series[int]
|
|
189
|
+
|
|
190
|
+
@pa.check(r"Superkingdom|Kingdom|Phylum|Class|Order|Family|Genus|Species", regex=True)
|
|
191
|
+
def validate_tax_rank_format(self, series: Series[str]) -> Series[bool]:
|
|
192
|
+
"""Validate that taxonomy rank values follow the format: ${rank}__${taxon}
|
|
193
|
+
or are 'Unclassified' or empty/null.
|
|
194
|
+
|
|
195
|
+
:param series: Column series to validate
|
|
196
|
+
:return: Boolean series indicating valid rows
|
|
197
|
+
"""
|
|
198
|
+
valid_ranks = ["sk", "k", "p", "c", "o", "f", "g", "s"]
|
|
199
|
+
|
|
200
|
+
def check_format(val):
|
|
201
|
+
if pd.isna(val) or val == "" or val.capitalize() == "Unclassified":
|
|
202
|
+
return True
|
|
203
|
+
if "__" not in val:
|
|
204
|
+
return False
|
|
205
|
+
rank = val.split("__")[0]
|
|
206
|
+
return rank in valid_ranks or rank == ""
|
|
207
|
+
|
|
208
|
+
return series.apply(check_format)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class PR2TaxonSchema(CoerceBaseDataFrameSchema):
|
|
212
|
+
"""Class modelling a Pandera dataframe schema for PR2 taxonomy records."""
|
|
213
|
+
|
|
214
|
+
Domain: Series[str] = pa.Field(nullable=True)
|
|
215
|
+
Supergroup: Series[str] = pa.Field(nullable=True)
|
|
216
|
+
Division: Series[str] = pa.Field(nullable=True)
|
|
217
|
+
Subdivision: Series[str] = pa.Field(nullable=True)
|
|
218
|
+
Class: Series[str] = pa.Field(nullable=True)
|
|
219
|
+
Order: Series[str] = pa.Field(nullable=True)
|
|
220
|
+
Family: Series[str] = pa.Field(nullable=True)
|
|
221
|
+
Genus: Series[str] = pa.Field(nullable=True)
|
|
222
|
+
Species: Series[str] = pa.Field(nullable=True)
|
|
223
|
+
Count: Series[int]
|
|
224
|
+
|
|
225
|
+
@pa.check(r"Domain|Supergroup|Division|Subdivision|Class|Order|Family|Genus|Species", regex=True)
|
|
226
|
+
def validate_pr2_tax_rank_format(self, series: Series[str]) -> Series[bool]:
|
|
227
|
+
"""Validate that PR2 taxonomy rank values follow the format: ${rank}__${taxon}
|
|
228
|
+
or are 'Unclassified' or empty/null.
|
|
229
|
+
|
|
230
|
+
:param series: Column series to validate
|
|
231
|
+
:return: Boolean series indicating valid rows
|
|
232
|
+
"""
|
|
233
|
+
valid_ranks = SHORT_TAX_RANKS + SHORT_PR2_TAX_RANKS
|
|
234
|
+
|
|
235
|
+
def check_format(val):
|
|
236
|
+
if pd.isna(val) or val == "" or val.capitalize() == "Unclassified":
|
|
237
|
+
return True
|
|
238
|
+
if "__" not in val:
|
|
239
|
+
return False
|
|
240
|
+
rank = val.split("__")[0]
|
|
241
|
+
return rank in valid_ranks or rank == ""
|
|
242
|
+
|
|
243
|
+
return series.apply(check_format)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# This is the schema for the whole DF
|
|
247
|
+
class RawReadsPassedRunsSchema(CoerceBaseDataFrameSchema):
|
|
248
|
+
"""Class modelling a Pandera dataframe schema for raw reads passed runs.
|
|
249
|
+
Validates the generated dataframe when read by pandas.read_csv.
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
run: Series[str] = pa.Field(str_matches=r"(E|D|S)RR[0-9]{6,}", unique=True)
|
|
253
|
+
status: Series[str] = pa.Field(isin=["all_results", "no_reads", "all_empty_results", "some_empty_results"])
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class RawReadsNonINSDCPassedRunsSchema(CoerceBaseDataFrameSchema):
|
|
257
|
+
"""Class modelling the same dataframe schema as the preceding one, except with no INSDC validation."""
|
|
258
|
+
|
|
259
|
+
run: Series[str]
|
|
260
|
+
status: Series[str] = pa.Field(isin=["all_results", "no_reads", "all_empty_results", "some_empty_results"])
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class MotusTaxonSchema(CoerceBaseDataFrameSchema):
|
|
264
|
+
"""Class for modelling a single Taxonomic Rank in mOTUs output.
|
|
265
|
+
Essentially is just a special string with validation of the structure:
|
|
266
|
+
`${rank}__${taxon}`
|
|
267
|
+
Where `${rank}` is one of the allowed short ranks defined by the imported
|
|
268
|
+
`SHORT_MOTUS_TAX_RANKS` variables.
|
|
269
|
+
And `${taxon}` is the actual taxon for that rank (this isn't validated).
|
|
270
|
+
It will also validate if the whole string is the permitted "unassigned" or "unclassified".
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
Kingdom: Series[str] = pa.Field(nullable=True)
|
|
274
|
+
Phylum: Series[str] = pa.Field(nullable=True)
|
|
275
|
+
Class: Series[str] = pa.Field(nullable=True)
|
|
276
|
+
Order: Series[str] = pa.Field(nullable=True)
|
|
277
|
+
Family: Series[str] = pa.Field(nullable=True)
|
|
278
|
+
Genus: Series[str] = pa.Field(nullable=True)
|
|
279
|
+
Species: Series[str] = pa.Field(nullable=True)
|
|
280
|
+
Count: Series[int]
|
|
281
|
+
|
|
282
|
+
@pa.check(r"Kingdom|Phylum|Class|Order|Family|Genus|Species", regex=True)
|
|
283
|
+
def validate_motus_tax_rank_format(self, series: Series[str]) -> Series[bool]:
|
|
284
|
+
"""Validate that mOTUs taxonomy rank values follow the format: ${rank}__${taxon}
|
|
285
|
+
or are 'Unclassified', 'Unassigned', or empty/null.
|
|
286
|
+
|
|
287
|
+
:param series: Column series to validate
|
|
288
|
+
:return: Boolean series indicating valid rows
|
|
289
|
+
"""
|
|
290
|
+
valid_ranks = SHORT_MOTUS_TAX_RANKS
|
|
291
|
+
|
|
292
|
+
def check_format(val):
|
|
293
|
+
if pd.isna(val) or val == "":
|
|
294
|
+
return True
|
|
295
|
+
if val.capitalize() in {"Unclassified", "Unassigned"}:
|
|
296
|
+
return True
|
|
297
|
+
if "__" not in val:
|
|
298
|
+
return False
|
|
299
|
+
rank = val.split("__")[0]
|
|
300
|
+
return rank in valid_ranks or rank == ""
|
|
301
|
+
|
|
302
|
+
return series.apply(check_format)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class FunctionProfileSchema(CoerceBaseDataFrameSchema):
|
|
306
|
+
"""Class modelling a Pandera dataframe schema for functional profile data.
|
|
307
|
+
This is what actually validates the generated dataframe when read by pandas.read_csv.
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
read_count: Series[int]
|
|
311
|
+
coverage_depth: Series[float]
|
|
312
|
+
coverage_breadth: Series[float]
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def validate_dataframe(df: pd.DataFrame, schema: Type[pa.DataFrameModel], df_metadata: str) -> DataFrameBase:
|
|
316
|
+
"""
|
|
317
|
+
Validate a pandas dataframe using a pandera schema.
|
|
318
|
+
df_metadata will be shown in logs on failure: example, the TSV filename from which the df was read.
|
|
319
|
+
"""
|
|
320
|
+
try:
|
|
321
|
+
dfs = schema.validate(df, lazy=True)
|
|
322
|
+
except pa.errors.SchemaError as e:
|
|
323
|
+
logging.error(f"{schema.__name__} validation failure for {df_metadata}")
|
|
324
|
+
raise e
|
|
325
|
+
return dfs
|
{mgnify_pipelines_toolkit-1.2.10.dist-info → mgnify_pipelines_toolkit-1.3.0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mgnify_pipelines_toolkit
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Collection of scripts and tools for MGnify pipelines
|
|
5
5
|
Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
|
|
6
6
|
License: Apache Software License 2.0
|
|
@@ -19,6 +19,7 @@ Requires-Dist: click<9,>=8.1.8
|
|
|
19
19
|
Requires-Dist: pandera<0.24,>=0.23.1
|
|
20
20
|
Requires-Dist: pyfastx<3,>=2.2.0
|
|
21
21
|
Requires-Dist: intervaltree<4,>=3.1.0
|
|
22
|
+
Requires-Dist: isort>=6.1.0
|
|
22
23
|
Provides-Extra: test
|
|
23
24
|
Requires-Dist: pytest<9,>=8.3.5; extra == "test"
|
|
24
25
|
Requires-Dist: pytest-md>=0.2.0; extra == "test"
|
|
@@ -26,8 +27,7 @@ Requires-Dist: pytest-workflow==2.1.0; extra == "test"
|
|
|
26
27
|
Provides-Extra: dev
|
|
27
28
|
Requires-Dist: pre-commit>=4.2.0; extra == "dev"
|
|
28
29
|
Requires-Dist: black>=25.1.0; extra == "dev"
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
Requires-Dist: pep8-naming>=0.14.1; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.8.4; extra == "dev"
|
|
31
31
|
Dynamic: license-file
|
|
32
32
|
|
|
33
33
|
# mgnify-pipelines-toolkit
|
|
@@ -57,16 +57,29 @@ You should then be able to run the packages from the command-line. For example t
|
|
|
57
57
|
|
|
58
58
|
`get_subunits -i ${easel_coords} -n ${meta.id}`
|
|
59
59
|
|
|
60
|
-
##
|
|
60
|
+
## Development
|
|
61
61
|
|
|
62
|
-
###
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
- Install
|
|
68
|
-
- Install
|
|
69
|
-
|
|
62
|
+
### Quick Start with uv and Taskfile
|
|
63
|
+
|
|
64
|
+
This project uses [uv](https://docs.astral.sh/uv/) for fast Python environment management and [Task](https://taskfile.dev/) for task automation.
|
|
65
|
+
|
|
66
|
+
Prerequisites:
|
|
67
|
+
- Install [uv](https://docs.astral.sh/uv/getting-started/installation/)
|
|
68
|
+
- Install [Task](https://taskfile.dev/installation/)
|
|
69
|
+
|
|
70
|
+
Common tasks:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
task: Available tasks for this project:
|
|
74
|
+
* clean: Clean up generated files and caches
|
|
75
|
+
* lint: Run linters (ruff check only)
|
|
76
|
+
* lint-fix: Run linters and fix issues automatically
|
|
77
|
+
* pre-commit: Install pre-commit hooks
|
|
78
|
+
* run: Run toolkit scripts with uv (usage: task run -- <script_name> [args])
|
|
79
|
+
* test: Run tests with uv
|
|
80
|
+
* testk: Run specific tests from a file (usage: task testk -- test_path)
|
|
81
|
+
* venv: Create a virtual environment with uv
|
|
82
|
+
```
|
|
70
83
|
|
|
71
84
|
When doing these steps above, you ensure that the code you add will be linted and formatted properly.
|
|
72
85
|
|
{mgnify_pipelines_toolkit-1.2.10.dist-info → mgnify_pipelines_toolkit-1.3.0.dist-info}/RECORD
RENAMED
|
@@ -4,10 +4,10 @@ mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py,sha256=8yFhmH
|
|
|
4
4
|
mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py,sha256=-g1FDwdEndWH9VvYLmc_NEs2l204kKjMHk65wag8T_s,8891
|
|
5
5
|
mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py,sha256=BLqhflblUegCvuQic16PrFXfIXlFWmGkmWJyl4wJoLQ,5040
|
|
6
6
|
mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py,sha256=1aGOJX9tC7M1rnd0U2PeJ681sUo02wxk7_ycJqeVt6s,2216
|
|
7
|
-
mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py,sha256
|
|
7
|
+
mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py,sha256=uGlQ0595rammsW-rnhh1UAEkoRUjLqCQ-tp377Mj180,5345
|
|
8
8
|
mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py,sha256=Wu4tRtuRkgd3hoeuwPl_E5ghxIW7e_1vrcvFGWv_U4A,3173
|
|
9
9
|
mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py,sha256=yLpzkRJXAeXRUNgz60zopEwHcdprM2UDjquE-GkrFys,1722
|
|
10
|
-
mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py,sha256=
|
|
10
|
+
mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py,sha256=ZiLSW1zFkYkbEeS6ZJb5GlxhsfDvbJ7JraqPgPUaI68,13473
|
|
11
11
|
mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py,sha256=NZSNY2bqs_TQyz8riDqiEFPLKcwTgzh1C7DeVHT6V8Q,4366
|
|
12
12
|
mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py,sha256=2Zkm3KJ1Borzch5XSZbsVNTPej3J5QYkqTQQACkRDVo,6944
|
|
13
13
|
mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py,sha256=Pq-9RSt3RCxzDMQVW1VHlHF4NtpVwCWFbg2CMkvpZZc,19089
|
|
@@ -19,12 +19,12 @@ mgnify_pipelines_toolkit/analysis/assembly/go_utils.py,sha256=eay9e3Xdc8XxnlC_4S
|
|
|
19
19
|
mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py,sha256=uex2T6GagtYFBIc39-Xm4SFHL06KAQ5v0_loOmY_eaw,4289
|
|
20
20
|
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py,sha256=5m5AwWEKidJx1FI0y93AFka7z0zEE8dBf1ofgP8TV_Y,7108
|
|
21
21
|
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py,sha256=DYZhChGD49M-zAtGkCmNHXDoVTnd5Qy6amG-oePO8Ek,5981
|
|
22
|
-
mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py,sha256=
|
|
22
|
+
mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py,sha256=YBZ5ARXKLMBnZZmbFaQP75kmUuADfUhdye3QU9qUVWY,21506
|
|
23
23
|
mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py,sha256=jUeA7I12YrtIqnm3hUxpdgsWfa2pP1ALGjb9OMKPcgY,10643
|
|
24
24
|
mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py,sha256=TPaKlYkoy37_XgYNOskWCCoXtPNku_k5ygSeK4fT1VQ,6689
|
|
25
25
|
mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py,sha256=lxe7R2RQFyNCzEm6YuNRrqKZLZOUPq5W1P23Pt2sKBU,4570
|
|
26
26
|
mgnify_pipelines_toolkit/analysis/genomes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
mgnify_pipelines_toolkit/analysis/rawreads/study_summary_generator.py,sha256=
|
|
27
|
+
mgnify_pipelines_toolkit/analysis/rawreads/study_summary_generator.py,sha256=obZnZ4uyVR0ckVCpqflZd_tdxA2-uCnFkubkM1Zj1ZY,15545
|
|
28
28
|
mgnify_pipelines_toolkit/analysis/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py,sha256=kAGU5kQyj-Hlcdx32i-xOJSuHYYUDj-kqnyYHMohHGc,4477
|
|
30
30
|
mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py,sha256=RaFopUjJI4UO1ttnSEHj7iUXpAL5-2FTbDXlhOmNy0s,25534
|
|
@@ -40,13 +40,13 @@ mgnify_pipelines_toolkit/constants/regex_fasta_header.py,sha256=G-xrc9b8zdmPTaOI
|
|
|
40
40
|
mgnify_pipelines_toolkit/constants/tax_ranks.py,sha256=ekZN5OcMBhDRcj7XB_27wQ8fEnmAqMJc4aQ3pv4BRmI,1229
|
|
41
41
|
mgnify_pipelines_toolkit/constants/thresholds.py,sha256=1AMBmoHBR0WjXZpkwJ7_Q-gfJtHXuCA4tZ-uvPhF0Xc,1085
|
|
42
42
|
mgnify_pipelines_toolkit/constants/var_region_coordinates.py,sha256=0bM4MwarFiM5yTcp5AbAmQ0o-q-gWy7kknir9zJ9R0A,1312
|
|
43
|
-
mgnify_pipelines_toolkit/schemas/
|
|
43
|
+
mgnify_pipelines_toolkit/schemas/dataframes.py,sha256=gpWA5RlQpsOkjixF_21UsEKPLrpt4_8Ik-ANb1b9DEU,11523
|
|
44
44
|
mgnify_pipelines_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
mgnify_pipelines_toolkit/utils/fasta_to_delimited.py,sha256=lgYIR1S4crURY7C7nFtgE6QMV4u4zCNsUrVkcRnsEEo,3996
|
|
46
46
|
mgnify_pipelines_toolkit/utils/get_mpt_version.py,sha256=aS9bWrC9CP7tpxoEVg6eEYt18-pmjG7fJl5Mchz4YOU,798
|
|
47
|
-
mgnify_pipelines_toolkit-1.
|
|
48
|
-
mgnify_pipelines_toolkit-1.
|
|
49
|
-
mgnify_pipelines_toolkit-1.
|
|
50
|
-
mgnify_pipelines_toolkit-1.
|
|
51
|
-
mgnify_pipelines_toolkit-1.
|
|
52
|
-
mgnify_pipelines_toolkit-1.
|
|
47
|
+
mgnify_pipelines_toolkit-1.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
48
|
+
mgnify_pipelines_toolkit-1.3.0.dist-info/METADATA,sha256=CYk4aiET00z22Dhti78aroegnq-ZlDCvWkPLQDFqEVY,6098
|
|
49
|
+
mgnify_pipelines_toolkit-1.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
50
|
+
mgnify_pipelines_toolkit-1.3.0.dist-info/entry_points.txt,sha256=7TJ8GgbKoX1xnQsOdWwMvwhIv4uuHCx7pMxKmZabPOs,3228
|
|
51
|
+
mgnify_pipelines_toolkit-1.3.0.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
|
|
52
|
+
mgnify_pipelines_toolkit-1.3.0.dist-info/RECORD,,
|