mgnify-pipelines-toolkit 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.
- mgnify_pipelines_toolkit/analysis/{shared → amplicon}/study_summary_generator.py +2 -2
- mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +58 -65
- mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py +618 -0
- mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +5 -9
- mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +18 -16
- mgnify_pipelines_toolkit/schemas/schemas.py +355 -2
- {mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/METADATA +2 -2
- {mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/RECORD +12 -11
- {mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/WHEEL +1 -1
- {mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/entry_points.txt +2 -1
- {mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/licenses/LICENSE +0 -0
- {mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/top_level.txt +0 -0
|
@@ -75,38 +75,40 @@ def main():
|
|
|
75
75
|
entry_dict[key] = value
|
|
76
76
|
dict_list.append(entry_dict)
|
|
77
77
|
|
|
78
|
-
|
|
78
|
+
# Convert to DataFrame
|
|
79
79
|
df = pd.DataFrame(dict_list)
|
|
80
80
|
df = df.rename(
|
|
81
81
|
columns={
|
|
82
|
-
"nearest_MiBIG": "
|
|
83
|
-
"nearest_MiBIG_class": "
|
|
82
|
+
"nearest_MiBIG": "nearest_mibig",
|
|
83
|
+
"nearest_MiBIG_class": "nearest_mibig_class",
|
|
84
84
|
}
|
|
85
85
|
)
|
|
86
86
|
df_grouped = (
|
|
87
|
-
df.groupby(["
|
|
87
|
+
df.groupby(["nearest_mibig", "nearest_mibig_class"])
|
|
88
88
|
.size()
|
|
89
|
-
.reset_index(name="
|
|
89
|
+
.reset_index(name="count")
|
|
90
90
|
)
|
|
91
|
-
df_grouped = df_grouped.sort_values(by="
|
|
91
|
+
df_grouped = df_grouped.sort_values(by="count", ascending=False)
|
|
92
92
|
|
|
93
93
|
df_desc = pd.DataFrame(
|
|
94
|
-
list(DESCRIPTIONS.items()), columns=["
|
|
94
|
+
list(DESCRIPTIONS.items()), columns=["mibig_class", "description"]
|
|
95
95
|
)
|
|
96
|
-
df_desc = df_desc.set_index("
|
|
96
|
+
df_desc = df_desc.set_index("mibig_class")
|
|
97
97
|
df_merged = df_grouped.merge(
|
|
98
|
-
df_desc, left_on="
|
|
98
|
+
df_desc, left_on="nearest_mibig_class", right_index=True, how="left"
|
|
99
99
|
)
|
|
100
|
-
df_merged["
|
|
101
|
-
lambda row:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
100
|
+
df_merged["description"] = df_merged.apply(
|
|
101
|
+
lambda row: (
|
|
102
|
+
row["nearest_mibig_class"].replace(
|
|
103
|
+
"NRP", df_desc.loc["NRP"]["description"]
|
|
104
|
+
)
|
|
105
|
+
if pd.isna(row["description"]) and "NRP" in row["nearest_mibig_class"]
|
|
106
|
+
else row["description"]
|
|
107
|
+
),
|
|
106
108
|
axis=1,
|
|
107
109
|
)
|
|
108
110
|
df_merged = df_merged[
|
|
109
|
-
["
|
|
111
|
+
["nearest_mibig", "nearest_mibig_class", "description", "count"]
|
|
110
112
|
]
|
|
111
113
|
df_merged = df_merged.rename(columns={
|
|
112
114
|
"Description": "description",
|
|
@@ -17,10 +17,11 @@ import logging
|
|
|
17
17
|
import re
|
|
18
18
|
|
|
19
19
|
from enum import Enum
|
|
20
|
-
from typing import ClassVar, Optional, Type
|
|
20
|
+
from typing import ClassVar, Optional, Type, Literal
|
|
21
21
|
|
|
22
22
|
import pandas as pd
|
|
23
23
|
import pandera as pa
|
|
24
|
+
from pandera.typing import Series
|
|
24
25
|
from pandera.typing.common import DataFrameBase
|
|
25
26
|
|
|
26
27
|
from pydantic import (
|
|
@@ -110,6 +111,354 @@ class AmpliconPassedRunsSchema(pa.DataFrameModel):
|
|
|
110
111
|
coerce = True
|
|
111
112
|
|
|
112
113
|
|
|
114
|
+
class CompletedAnalysisRecord(BaseModel):
|
|
115
|
+
"""Class defining a Pydantic model for a single "row" of an successfully analysed assemblies file."""
|
|
116
|
+
|
|
117
|
+
assembly: str = Field(
|
|
118
|
+
...,
|
|
119
|
+
description="Assembly accession",
|
|
120
|
+
examples=["ERZ789012"],
|
|
121
|
+
pattern=r"ERZ\d{6,}",
|
|
122
|
+
)
|
|
123
|
+
status: Literal["success"] = Field(
|
|
124
|
+
...,
|
|
125
|
+
description="Pipeline output for whether this assembly's analysis succeeded or not",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class CompletedAnalysisSchema(pa.DataFrameModel):
|
|
130
|
+
"""Class modelling a Pandera dataframe schema that uses the CompletedAnalysisSchema class as dtype.
|
|
131
|
+
This is what actually validates the generated dataframe when read by pandas.read_csv.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
assembly: Series[str]
|
|
135
|
+
|
|
136
|
+
@pa.check("assembly")
|
|
137
|
+
def accessions_unique(self, series: Series[str]) -> Series[bool]:
|
|
138
|
+
return ~series.duplicated()
|
|
139
|
+
|
|
140
|
+
class Config:
|
|
141
|
+
"""Config with dataframe-level data type."""
|
|
142
|
+
|
|
143
|
+
dtype = PydanticModel(CompletedAnalysisRecord)
|
|
144
|
+
coerce = True
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class InterProSummaryRecord(BaseModel):
|
|
148
|
+
"""Model of a row in the InterPro summary file."""
|
|
149
|
+
|
|
150
|
+
count: int = Field(
|
|
151
|
+
..., ge=0, description="Number of hits for the InterPro accession"
|
|
152
|
+
)
|
|
153
|
+
interpro_accession: str = Field(
|
|
154
|
+
...,
|
|
155
|
+
description="InterPro accession ID",
|
|
156
|
+
examples=["IPR123456"],
|
|
157
|
+
pattern=r"IPR\d{6}",
|
|
158
|
+
)
|
|
159
|
+
description: str = Field(..., description="Description of the InterPro domain")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class GOSummaryRecord(BaseModel):
|
|
163
|
+
"""Model of a row in the GO summary file."""
|
|
164
|
+
|
|
165
|
+
go: str = Field(
|
|
166
|
+
...,
|
|
167
|
+
description="GO term identifier",
|
|
168
|
+
examples=["GO:1234567"],
|
|
169
|
+
pattern=r"GO:\d{7}",
|
|
170
|
+
)
|
|
171
|
+
term: str = Field(..., description="GO term name")
|
|
172
|
+
category: str = Field(
|
|
173
|
+
...,
|
|
174
|
+
description="GO category",
|
|
175
|
+
examples=["biological_process", "molecular_function", "cellular_component"],
|
|
176
|
+
)
|
|
177
|
+
count: int = Field(..., ge=0, description="Number of times the GO term is observed")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class BaseSummarySchema(pa.DataFrameModel):
|
|
181
|
+
"""Base schema for summary files."""
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def is_unique(series: Series[str]) -> Series[bool]:
|
|
185
|
+
return ~series.duplicated()
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class InterProSummarySchema(BaseSummarySchema):
|
|
189
|
+
"""Schema for InterPro summary file validation."""
|
|
190
|
+
|
|
191
|
+
interpro_accession: Series[str]
|
|
192
|
+
|
|
193
|
+
@pa.check("interpro_accession")
|
|
194
|
+
def interpro_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
195
|
+
return self.is_unique(series)
|
|
196
|
+
|
|
197
|
+
class Config:
|
|
198
|
+
dtype = PydanticModel(InterProSummaryRecord)
|
|
199
|
+
coerce = True
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class GOSummarySchema(BaseSummarySchema):
|
|
203
|
+
"""Schema for GO or GOslim summary file validation."""
|
|
204
|
+
|
|
205
|
+
go: Series[str]
|
|
206
|
+
|
|
207
|
+
@pa.check("go")
|
|
208
|
+
def go_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
209
|
+
return self.is_unique(series)
|
|
210
|
+
|
|
211
|
+
class Config:
|
|
212
|
+
dtype = PydanticModel(GOSummaryRecord)
|
|
213
|
+
coerce = True
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class SanntisSummaryRecord(BaseModel):
|
|
217
|
+
"""Model of a row in the Sanntis assembly-level summary file."""
|
|
218
|
+
|
|
219
|
+
nearest_mibig: str = Field(
|
|
220
|
+
...,
|
|
221
|
+
description="The accession ID of the closest matching biosynthetic gene cluster (BGC) in the MIBiG database",
|
|
222
|
+
examples=["BGC0000073"],
|
|
223
|
+
pattern=r"BGC\d{7}",
|
|
224
|
+
)
|
|
225
|
+
nearest_mibig_class: str = Field(
|
|
226
|
+
...,
|
|
227
|
+
description="The biosynthetic class of the nearest MIBiG BGC",
|
|
228
|
+
examples=["Polyketide"],
|
|
229
|
+
)
|
|
230
|
+
description: str = Field(
|
|
231
|
+
...,
|
|
232
|
+
description="A brief summary of the biosynthetic process or type of metabolite associated with the nearest MIBiG cluster",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
count: int = Field(
|
|
236
|
+
..., ge=0, description="Number of times the MIBiG entry is observed"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class AntismashSummaryRecord(BaseModel):
|
|
241
|
+
"""Model of a row in the Antismash summary file."""
|
|
242
|
+
|
|
243
|
+
label: str = Field(
|
|
244
|
+
...,
|
|
245
|
+
description="Biosynthetic class or label assigned by Antismash based on sequence similarity to known biosynthetic gene clusters.",
|
|
246
|
+
examples=["RiPP-like", "T1PKS", "terpene"],
|
|
247
|
+
)
|
|
248
|
+
description: str = Field(
|
|
249
|
+
...,
|
|
250
|
+
description="Brief explanation of the biosynthetic class, often indicating compound type or functional characteristics.",
|
|
251
|
+
examples=["Type I PKS (Polyketide synthase)", "Redox-cofactors such as PQQ"],
|
|
252
|
+
)
|
|
253
|
+
count: int = Field(
|
|
254
|
+
...,
|
|
255
|
+
ge=0,
|
|
256
|
+
description="Number of BGCs (biosynthetic gene clusters) in the dataset assigned to this label.",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class KOSummaryRecord(BaseModel):
|
|
261
|
+
"""Model of a row in the KEGG summary file."""
|
|
262
|
+
|
|
263
|
+
ko: str = Field(
|
|
264
|
+
...,
|
|
265
|
+
description="KEGG Orthology (KO) identifier representing a functional gene or pathway component.",
|
|
266
|
+
examples=["K07547", "K04874", "K19946"],
|
|
267
|
+
pattern=r"K\d{5,}",
|
|
268
|
+
)
|
|
269
|
+
description: str = Field(
|
|
270
|
+
...,
|
|
271
|
+
description="Name or function of the KO, sometimes including EC numbers and protein families.",
|
|
272
|
+
examples=["optineurin", "MFS transporter, POT/PTR family"],
|
|
273
|
+
)
|
|
274
|
+
count: int = Field(
|
|
275
|
+
...,
|
|
276
|
+
ge=0,
|
|
277
|
+
description="Number of times this KO identifier is observed in the dataset.",
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class PFAMSummaryRecord(BaseModel):
|
|
282
|
+
"""Model of a row in the PFAM summary file."""
|
|
283
|
+
|
|
284
|
+
pfam: str = Field(
|
|
285
|
+
...,
|
|
286
|
+
description="PFAM accession identifier representing a protein domain or family.",
|
|
287
|
+
examples=["PF00265", "PF01956", "PF00673"],
|
|
288
|
+
pattern=r"PF\d{5}",
|
|
289
|
+
)
|
|
290
|
+
description: str = Field(
|
|
291
|
+
...,
|
|
292
|
+
description="Description of the protein domain or family associated with the PFAM ID.",
|
|
293
|
+
examples=["Thymidine kinase", "Integral membrane protein EMC3/TMCO1-like"],
|
|
294
|
+
)
|
|
295
|
+
count: int = Field(
|
|
296
|
+
...,
|
|
297
|
+
ge=0,
|
|
298
|
+
description="Number of times the PFAM domain is observed in the dataset.",
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class KEGGModulesSummaryRecord(BaseModel):
|
|
303
|
+
"""Model of a row in the KEGG Modules summary file."""
|
|
304
|
+
|
|
305
|
+
module_accession: str = Field(
|
|
306
|
+
...,
|
|
307
|
+
description="KEGG Module identifier representing a specific metabolic pathway or module.",
|
|
308
|
+
examples=["M00123", "M00234"],
|
|
309
|
+
pattern=r"M\d{5}",
|
|
310
|
+
)
|
|
311
|
+
completeness: float = Field(
|
|
312
|
+
...,
|
|
313
|
+
ge=0,
|
|
314
|
+
description="Completeness score of the KEGG Module, indicating the extent to which the module is present in the metagenome.",
|
|
315
|
+
)
|
|
316
|
+
pathway_name: str = Field(
|
|
317
|
+
...,
|
|
318
|
+
description="Name of the metabolic pathway associated with the KEGG Module.",
|
|
319
|
+
examples=["Sulfur reduction, sulfur => sulfide"],
|
|
320
|
+
)
|
|
321
|
+
pathway_class: str = Field(
|
|
322
|
+
...,
|
|
323
|
+
description="Biosynthetic class or category associated with the KEGG Module, semi colon separated.",
|
|
324
|
+
examples=["Pathway modules; Energy metabolism; Photosynthesis"],
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class SanntisSummarySchema(BaseSummarySchema):
|
|
329
|
+
nearest_mibig: Series[str]
|
|
330
|
+
|
|
331
|
+
@pa.check("nearest_mibig")
|
|
332
|
+
def mibig_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
333
|
+
return self.is_unique(series)
|
|
334
|
+
|
|
335
|
+
class Config:
|
|
336
|
+
dtype = PydanticModel(SanntisSummaryRecord)
|
|
337
|
+
coerce = True
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class AntismashSummarySchema(BaseSummarySchema):
|
|
341
|
+
label: Series[str]
|
|
342
|
+
|
|
343
|
+
@pa.check("label")
|
|
344
|
+
def class_names_unique(self, series: Series[str]) -> Series[bool]:
|
|
345
|
+
return self.is_unique(series)
|
|
346
|
+
|
|
347
|
+
class Config:
|
|
348
|
+
dtype = PydanticModel(AntismashSummaryRecord)
|
|
349
|
+
coerce = True
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
class KOSummarySchema(BaseSummarySchema):
|
|
353
|
+
ko: Series[str]
|
|
354
|
+
|
|
355
|
+
@pa.check("ko")
|
|
356
|
+
def ko_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
357
|
+
return self.is_unique(series)
|
|
358
|
+
|
|
359
|
+
class Config:
|
|
360
|
+
dtype = PydanticModel(KOSummaryRecord)
|
|
361
|
+
coerce = True
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class PFAMSummarySchema(BaseSummarySchema):
|
|
365
|
+
pfam: Series[str]
|
|
366
|
+
|
|
367
|
+
@pa.check("pfam")
|
|
368
|
+
def pfam_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
369
|
+
return self.is_unique(series)
|
|
370
|
+
|
|
371
|
+
class Config:
|
|
372
|
+
dtype = PydanticModel(PFAMSummaryRecord)
|
|
373
|
+
coerce = True
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
class KEGGModulesSummarySchema(BaseSummarySchema):
|
|
377
|
+
module_accession: Series[str]
|
|
378
|
+
|
|
379
|
+
@pa.check("module_accession")
|
|
380
|
+
def module_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
381
|
+
return self.is_unique(series)
|
|
382
|
+
|
|
383
|
+
class Config:
|
|
384
|
+
dtype = PydanticModel(KEGGModulesSummaryRecord)
|
|
385
|
+
coerce = True
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class BaseStudySummarySchema(BaseSummarySchema):
|
|
389
|
+
"""Base schema for study summary files with ERZ* columns and count checks."""
|
|
390
|
+
|
|
391
|
+
@pa.check(regex=r"^ERZ\d+")
|
|
392
|
+
def count_columns_are_non_negative(self, s: Series[int]) -> Series[bool]:
|
|
393
|
+
return s >= 0
|
|
394
|
+
|
|
395
|
+
class Config:
|
|
396
|
+
strict = False # allow extra ERZ* columns not declared above
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
class GOStudySummarySchema(BaseStudySummarySchema):
|
|
400
|
+
GO: Series[str] = pa.Field(str_matches=r"^GO:\d{7}$")
|
|
401
|
+
description: Series[str]
|
|
402
|
+
category: Series[str]
|
|
403
|
+
|
|
404
|
+
@pa.check("GO")
|
|
405
|
+
def go_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
406
|
+
return self.is_unique(series)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class InterProStudySummarySchema(BaseStudySummarySchema):
|
|
410
|
+
IPR: Series[str] = pa.Field(str_matches=r"^IPR\d{6}$")
|
|
411
|
+
description: Series[str]
|
|
412
|
+
|
|
413
|
+
@pa.check("IPR")
|
|
414
|
+
def interpro_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
415
|
+
return self.is_unique(series)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class AntismashStudySummarySchema(BaseStudySummarySchema):
|
|
419
|
+
label: Series[str]
|
|
420
|
+
|
|
421
|
+
@pa.check("label")
|
|
422
|
+
def class_names_unique(self, series: Series[str]) -> Series[bool]:
|
|
423
|
+
return self.is_unique(series)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class SanntisStudySummarySchema(BaseStudySummarySchema):
|
|
427
|
+
nearest_mibig: Series[str]
|
|
428
|
+
|
|
429
|
+
@pa.check("nearest_mibig")
|
|
430
|
+
def mibig_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
431
|
+
return self.is_unique(series)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
class KOStudySummarySchema(BaseStudySummarySchema):
|
|
435
|
+
KO: Series[str]
|
|
436
|
+
|
|
437
|
+
@pa.check("KO")
|
|
438
|
+
def ko_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
439
|
+
return self.is_unique(series)
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
class PFAMStudySummarySchema(BaseStudySummarySchema):
|
|
443
|
+
PFAM: Series[str]
|
|
444
|
+
|
|
445
|
+
@pa.check("PFAM")
|
|
446
|
+
def pfam_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
447
|
+
return self.is_unique(series)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
class KEGGModulesStudySummarySchema(BaseStudySummarySchema):
|
|
451
|
+
module_accession: Series[str]
|
|
452
|
+
|
|
453
|
+
@pa.check("module_accession")
|
|
454
|
+
def module_ids_unique(self, series: Series[str]) -> Series[bool]:
|
|
455
|
+
return self.is_unique(series)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
class TaxonomyStudySummarySchema(BaseStudySummarySchema):
|
|
459
|
+
pass
|
|
460
|
+
|
|
461
|
+
|
|
113
462
|
class AmpliconNonINSDCPassedRunsSchema(pa.DataFrameModel):
|
|
114
463
|
"""Class modelling the same dataframe schema as the preceding one, except with no INSDC validation.
|
|
115
464
|
Uses the AmpliconNonINSDCSPassedRunsRecord as a dtype to achieve this.
|
|
@@ -145,7 +494,11 @@ class TaxRank(RootModel):
|
|
|
145
494
|
def rank_structure_validity_check(cls, taxrank: str) -> bool:
|
|
146
495
|
taxrank_list = taxrank.split("__")
|
|
147
496
|
rank = taxrank_list[0]
|
|
148
|
-
if
|
|
497
|
+
if (
|
|
498
|
+
rank != ""
|
|
499
|
+
and rank.capitalize() != "Unclassified"
|
|
500
|
+
and rank not in cls.valid_tax_ranks
|
|
501
|
+
):
|
|
149
502
|
raise ValueError(f"Invalid taxonomy rank {rank}.")
|
|
150
503
|
|
|
151
504
|
return taxrank
|
{mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mgnify_pipelines_toolkit
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: Collection of scripts and tools for MGnify pipelines
|
|
5
5
|
Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
|
|
6
6
|
License: Apache Software License 2.0
|
|
@@ -8,7 +8,7 @@ Keywords: bioinformatics,pipelines,metagenomics
|
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: biopython>=1.85
|
{mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/RECORD
RENAMED
|
@@ -12,6 +12,7 @@ mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py,sha256=B
|
|
|
12
12
|
mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py,sha256=Wu4tRtuRkgd3hoeuwPl_E5ghxIW7e_1vrcvFGWv_U4A,3173
|
|
13
13
|
mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py,sha256=yLpzkRJXAeXRUNgz60zopEwHcdprM2UDjquE-GkrFys,1722
|
|
14
14
|
mgnify_pipelines_toolkit/analysis/amplicon/standard_primer_matching.py,sha256=K6gniytuItq5WzHLi1BsaUCOdP4Zm0_ZzW2_ns7-BTI,11114
|
|
15
|
+
mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py,sha256=epVClL10QcllL8yu7YGjx0rXNVHL2GxHi-Ek0MOjsjo,13859
|
|
15
16
|
mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py,sha256=NZSNY2bqs_TQyz8riDqiEFPLKcwTgzh1C7DeVHT6V8Q,4366
|
|
16
17
|
mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py,sha256=wXrw1B-z4hOu5oA27Vp1WYxGP2Mk6ZY4i_T5jDZgek0,6954
|
|
17
18
|
mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py,sha256=Pq-9RSt3RCxzDMQVW1VHlHF4NtpVwCWFbg2CMkvpZZc,19089
|
|
@@ -21,11 +22,12 @@ mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py,sha256=_4J31wAjK5B1
|
|
|
21
22
|
mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py,sha256=_iaTBvMKbQDi_02_QuSPqLJ_rC37ruxiPHv5lLQmI-w,5480
|
|
22
23
|
mgnify_pipelines_toolkit/analysis/assembly/go_utils.py,sha256=eay9e3Xdc8XxnlC_4SHHjN89k-M9i_cFMc2lI_ZFxqY,5596
|
|
23
24
|
mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py,sha256=uex2T6GagtYFBIc39-Xm4SFHL06KAQ5v0_loOmY_eaw,4289
|
|
24
|
-
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py,sha256=
|
|
25
|
+
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py,sha256=5m5AwWEKidJx1FI0y93AFka7z0zEE8dBf1ofgP8TV_Y,7108
|
|
25
26
|
mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py,sha256=DYZhChGD49M-zAtGkCmNHXDoVTnd5Qy6amG-oePO8Ek,5981
|
|
26
|
-
mgnify_pipelines_toolkit/analysis/assembly/
|
|
27
|
+
mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py,sha256=eNichqFFmfPsa2J10IUm_PemVs9fBhbKa2vpDqEvJNU,21791
|
|
28
|
+
mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py,sha256=jUeA7I12YrtIqnm3hUxpdgsWfa2pP1ALGjb9OMKPcgY,10643
|
|
27
29
|
mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py,sha256=TPaKlYkoy37_XgYNOskWCCoXtPNku_k5ygSeK4fT1VQ,6689
|
|
28
|
-
mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py,sha256=
|
|
30
|
+
mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py,sha256=1wblLbZl521digIUWoqneAu15gErzvN_oC--5T_xUdw,4582
|
|
29
31
|
mgnify_pipelines_toolkit/analysis/genomes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
32
|
mgnify_pipelines_toolkit/analysis/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
33
|
mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py,sha256=kAGU5kQyj-Hlcdx32i-xOJSuHYYUDj-kqnyYHMohHGc,4477
|
|
@@ -36,7 +38,6 @@ mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py,sha256=EH5RyzesL
|
|
|
36
38
|
mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py,sha256=6Ck2NhwRWw66GctUtKDdPT5fwJhWFR_YOZq-Vxwoa8A,1996
|
|
37
39
|
mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py,sha256=7-U0DN1joVu0ifLOoDUK2Pfqy8rb1RDKT6khVg3jky0,5559
|
|
38
40
|
mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py,sha256=sKAo_rKEyVAZXSaIFMkpSoYZxiWwXMA3XDA6Z-hbHgg,7904
|
|
39
|
-
mgnify_pipelines_toolkit/analysis/shared/study_summary_generator.py,sha256=OOqKaQmKGAya6_BZgfcWBZSVlmZ918PQTVMv6KwGIns,13827
|
|
40
41
|
mgnify_pipelines_toolkit/constants/db_labels.py,sha256=omPINMylAjO2PxeFhSk2MbYNcGZH3P82optSlMey3dw,858
|
|
41
42
|
mgnify_pipelines_toolkit/constants/ncrna.py,sha256=a_5hWp446S7BhRbe_JcydFgZM7sgPLuMlaiBvKWN_XM,1928
|
|
42
43
|
mgnify_pipelines_toolkit/constants/regex_ambiguous_bases.py,sha256=7nEOODQq35y9wx9YnvJuo29oBpwTpXg_kIbf_t7N4TQ,1093
|
|
@@ -44,13 +45,13 @@ mgnify_pipelines_toolkit/constants/regex_fasta_header.py,sha256=G-xrc9b8zdmPTaOI
|
|
|
44
45
|
mgnify_pipelines_toolkit/constants/tax_ranks.py,sha256=kMq__kOJcbiwsgolkdvb-XLo3WMnJdEXgedjUyMOYjI,1081
|
|
45
46
|
mgnify_pipelines_toolkit/constants/thresholds.py,sha256=V_xDBk0RhS3hHeWqOacKzth2gM6zJABRPgwHy-Ciqfk,1157
|
|
46
47
|
mgnify_pipelines_toolkit/constants/var_region_coordinates.py,sha256=0bM4MwarFiM5yTcp5AbAmQ0o-q-gWy7kknir9zJ9R0A,1312
|
|
47
|
-
mgnify_pipelines_toolkit/schemas/schemas.py,sha256=
|
|
48
|
+
mgnify_pipelines_toolkit/schemas/schemas.py,sha256=pyDZvCuWbwccQF0D7c5BN1vv36wQdgcAUXU43_zAu74,18164
|
|
48
49
|
mgnify_pipelines_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
50
|
mgnify_pipelines_toolkit/utils/fasta_to_delimited.py,sha256=lgYIR1S4crURY7C7nFtgE6QMV4u4zCNsUrVkcRnsEEo,3996
|
|
50
51
|
mgnify_pipelines_toolkit/utils/get_mpt_version.py,sha256=aS9bWrC9CP7tpxoEVg6eEYt18-pmjG7fJl5Mchz4YOU,798
|
|
51
|
-
mgnify_pipelines_toolkit-1.1.
|
|
52
|
-
mgnify_pipelines_toolkit-1.1.
|
|
53
|
-
mgnify_pipelines_toolkit-1.1.
|
|
54
|
-
mgnify_pipelines_toolkit-1.1.
|
|
55
|
-
mgnify_pipelines_toolkit-1.1.
|
|
56
|
-
mgnify_pipelines_toolkit-1.1.
|
|
52
|
+
mgnify_pipelines_toolkit-1.1.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
53
|
+
mgnify_pipelines_toolkit-1.1.2.dist-info/METADATA,sha256=5ByyTshLjj3V5NYnqFinX0ROLb9GmST0m_CltiUdBiY,5811
|
|
54
|
+
mgnify_pipelines_toolkit-1.1.2.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
|
55
|
+
mgnify_pipelines_toolkit-1.1.2.dist-info/entry_points.txt,sha256=JSjuxAr71MTeSUPPpno22wmZYgVO-gbsXfDkgWKkF7A,3533
|
|
56
|
+
mgnify_pipelines_toolkit-1.1.2.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
|
|
57
|
+
mgnify_pipelines_toolkit-1.1.2.dist-info/RECORD,,
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
2
|
add_rhea_chebi_annotation = mgnify_pipelines_toolkit.analysis.assembly.add_rhea_chebi_annotation:main
|
|
3
|
+
amplicon_study_summary_generator = mgnify_pipelines_toolkit.analysis.amplicon.study_summary_generator:cli
|
|
3
4
|
antismash_gff_builder = mgnify_pipelines_toolkit.analysis.assembly.antismash_gff_builder:main
|
|
4
5
|
are_there_primers = mgnify_pipelines_toolkit.analysis.amplicon.are_there_primers:main
|
|
6
|
+
assembly_study_summary_generator = mgnify_pipelines_toolkit.analysis.assembly.study_summary_generator:cli
|
|
5
7
|
assess_inflection_point_mcp = mgnify_pipelines_toolkit.analysis.amplicon.assess_inflection_point_mcp:main
|
|
6
8
|
assess_mcp_proportions = mgnify_pipelines_toolkit.analysis.amplicon.assess_mcp_proportions:main
|
|
7
9
|
classify_var_regions = mgnify_pipelines_toolkit.analysis.amplicon.classify_var_regions:main
|
|
@@ -31,7 +33,6 @@ process_dbcan_clusters = mgnify_pipelines_toolkit.analysis.assembly.process_dbca
|
|
|
31
33
|
remove_ambiguous_reads = mgnify_pipelines_toolkit.analysis.amplicon.remove_ambiguous_reads:main
|
|
32
34
|
rev_comp_se_primers = mgnify_pipelines_toolkit.analysis.amplicon.rev_comp_se_primers:main
|
|
33
35
|
standard_primer_matching = mgnify_pipelines_toolkit.analysis.amplicon.standard_primer_matching:main
|
|
34
|
-
study_summary_generator = mgnify_pipelines_toolkit.analysis.shared.study_summary_generator:cli
|
|
35
36
|
summarise_antismash_bgcs = mgnify_pipelines_toolkit.analysis.assembly.summarise_antismash_bgcs:main
|
|
36
37
|
summarise_goslims = mgnify_pipelines_toolkit.analysis.assembly.summarise_goslims:main
|
|
37
38
|
summarise_sanntis_bgcs = mgnify_pipelines_toolkit.analysis.assembly.summarise_sanntis_bgcs:main
|
|
File without changes
|
{mgnify_pipelines_toolkit-1.1.0.dist-info → mgnify_pipelines_toolkit-1.1.2.dist-info}/top_level.txt
RENAMED
|
File without changes
|