biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +290 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.34.0"
|
biopipen/core/filters.py
CHANGED
|
@@ -6,9 +6,10 @@ import shlex
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Any, List, Mapping
|
|
8
8
|
|
|
9
|
-
from argx import Namespace
|
|
9
|
+
from argx import Namespace # pyright: ignore[reportPrivateImportUsage]
|
|
10
10
|
from liquid.filters.manager import FilterManager
|
|
11
|
-
from
|
|
11
|
+
from yunpath import CloudPath
|
|
12
|
+
from pipen_report.filters import register_component, _tag
|
|
12
13
|
|
|
13
14
|
# from .defaults import BIOPIPEN_DIR
|
|
14
15
|
|
|
@@ -172,14 +173,14 @@ def r(
|
|
|
172
173
|
return "TRUE"
|
|
173
174
|
if obj.upper() == "FALSE":
|
|
174
175
|
return "FALSE"
|
|
175
|
-
if obj.upper() == "NA" or obj.upper() == "NULL":
|
|
176
|
+
if obj.upper() == "NA" or obj.upper() == "NULL" or obj == "None":
|
|
176
177
|
return obj.upper()
|
|
177
178
|
if re.match(r"^\d+:\d+$", obj):
|
|
178
179
|
return obj
|
|
179
180
|
if obj.startswith("r:") or obj.startswith("R:"):
|
|
180
181
|
return str(obj)[2:]
|
|
181
182
|
return repr(str(obj))
|
|
182
|
-
if isinstance(obj, Path):
|
|
183
|
+
if isinstance(obj, (Path, CloudPath)):
|
|
183
184
|
return repr(str(obj))
|
|
184
185
|
if isinstance(obj, (list, tuple, set)):
|
|
185
186
|
if any(isinstance(i, dict) for i in obj):
|
|
@@ -233,6 +234,11 @@ def source_r(path: str | Path, chdir: bool = False) -> str:
|
|
|
233
234
|
In addition to generating `source(path)`, we also include the mtime for the script
|
|
234
235
|
to trigger the job not cached when the script is updated.
|
|
235
236
|
|
|
237
|
+
If your process is used in a cloud environment, it is recommended to
|
|
238
|
+
use the `read` filter to load the script content instead of sourcing it using
|
|
239
|
+
the `source` function in R to void the path issue (path could be different
|
|
240
|
+
in different environments).
|
|
241
|
+
|
|
236
242
|
Args:
|
|
237
243
|
path: The path to the R script
|
|
238
244
|
|
|
@@ -248,98 +254,6 @@ def source_r(path: str | Path, chdir: bool = False) -> str:
|
|
|
248
254
|
)
|
|
249
255
|
|
|
250
256
|
|
|
251
|
-
@register_component("fgsea")
|
|
252
|
-
def _render_fgsea(
|
|
253
|
-
cont: Mapping[str, Any],
|
|
254
|
-
job: Mapping[str, Any],
|
|
255
|
-
level: int,
|
|
256
|
-
na_arg: str = "10",
|
|
257
|
-
) -> str:
|
|
258
|
-
"""Render fgsea report"""
|
|
259
|
-
# cont["dir"] is required
|
|
260
|
-
n_pathways = int(na_arg)
|
|
261
|
-
pathways = []
|
|
262
|
-
with Path(cont["dir"]).joinpath("fgsea.txt").open() as f:
|
|
263
|
-
next(f) # skip header
|
|
264
|
-
for line in f:
|
|
265
|
-
items = line.strip().split("\t")
|
|
266
|
-
pathways.append((items[0], items[-1]))
|
|
267
|
-
if len(pathways) >= n_pathways:
|
|
268
|
-
break
|
|
269
|
-
|
|
270
|
-
components = [
|
|
271
|
-
# Summary
|
|
272
|
-
{
|
|
273
|
-
"title": "Enrichment Analysis Summary",
|
|
274
|
-
"ui": "tabs",
|
|
275
|
-
"contents": [
|
|
276
|
-
{
|
|
277
|
-
"title": "Plot",
|
|
278
|
-
"ui": "flat",
|
|
279
|
-
"contents": [
|
|
280
|
-
{
|
|
281
|
-
"kind": "descr",
|
|
282
|
-
"content": (
|
|
283
|
-
"This table presents a comprehensive summary of the "
|
|
284
|
-
"top enriched pathways derived from the fgsea. "
|
|
285
|
-
"Each row corresponds to a pathway, and the gene ranks "
|
|
286
|
-
"are shown based on the ranking metric used in the "
|
|
287
|
-
"analysis. The enrichment score, p-value, and adjusted "
|
|
288
|
-
"p-value are also provided to assess the significance "
|
|
289
|
-
"of the enrichment."
|
|
290
|
-
)
|
|
291
|
-
},
|
|
292
|
-
{
|
|
293
|
-
"kind": "image",
|
|
294
|
-
"src": str(Path(cont["dir"]).joinpath("gsea_table.png")),
|
|
295
|
-
"download": str(Path(cont["dir"]).joinpath("gsea_table.pdf"))
|
|
296
|
-
}
|
|
297
|
-
],
|
|
298
|
-
},
|
|
299
|
-
{
|
|
300
|
-
"title": "Table",
|
|
301
|
-
"ui": "flat",
|
|
302
|
-
"contents": [
|
|
303
|
-
{
|
|
304
|
-
"kind": "descr",
|
|
305
|
-
"content": (
|
|
306
|
-
"This plot represents the GSEA results for a specified "
|
|
307
|
-
"gene set, illustrating the distribution and impact of "
|
|
308
|
-
"the gene set along the ranked list of genes. "
|
|
309
|
-
"The running enrichment score curve shows the "
|
|
310
|
-
"cumulative enrichment score as genes from the input "
|
|
311
|
-
"list are encountered. Positive peaks on the curve "
|
|
312
|
-
"indicate regions where members of the gene set are "
|
|
313
|
-
"predominantly found."
|
|
314
|
-
)
|
|
315
|
-
},
|
|
316
|
-
{
|
|
317
|
-
"kind": "table",
|
|
318
|
-
"src": str(Path(cont["dir"]).joinpath("fgsea.txt")),
|
|
319
|
-
"data": {"excluded": {"slug"}},
|
|
320
|
-
}
|
|
321
|
-
],
|
|
322
|
-
},
|
|
323
|
-
]
|
|
324
|
-
},
|
|
325
|
-
# Pathways
|
|
326
|
-
{
|
|
327
|
-
"title": f"Enriched Pathways (Top {n_pathways})",
|
|
328
|
-
"ui": "table_of_images",
|
|
329
|
-
"contents": [
|
|
330
|
-
{
|
|
331
|
-
"src": str(Path(cont["dir"]) / f"fgsea_{slug}.png"),
|
|
332
|
-
"download": str(Path(cont["dir"]) / f"fgsea_{slug}.pdf"),
|
|
333
|
-
"title": pw,
|
|
334
|
-
}
|
|
335
|
-
for pw, slug in pathways
|
|
336
|
-
]
|
|
337
|
-
},
|
|
338
|
-
]
|
|
339
|
-
|
|
340
|
-
return render_ui(components, "accordion", job, level) # type: ignore
|
|
341
|
-
|
|
342
|
-
|
|
343
257
|
@register_component("pdf")
|
|
344
258
|
def _render_pdf(
|
|
345
259
|
cont: Mapping[str, Any],
|
|
@@ -367,90 +281,3 @@ def _render_gsea(
|
|
|
367
281
|
"""Render gsea report"""
|
|
368
282
|
# cont["dir"] is required
|
|
369
283
|
raise NotImplementedError()
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
@register_component("enrichr")
|
|
373
|
-
def _render_enrichr(
|
|
374
|
-
cont: Mapping[str, Any],
|
|
375
|
-
job: Mapping[str, Any],
|
|
376
|
-
level: int,
|
|
377
|
-
) -> str:
|
|
378
|
-
"""Render enrichr report"""
|
|
379
|
-
# cont["dir"] is required
|
|
380
|
-
dbs = [sumfile.stem[8:] for sumfile in Path(cont["dir"]).glob("Enrichr-*.txt")]
|
|
381
|
-
components = []
|
|
382
|
-
|
|
383
|
-
for db in dbs:
|
|
384
|
-
enrichr_plots = list(Path(cont["dir"]).glob(f"Enrichr-{db}.*.png"))
|
|
385
|
-
if len(enrichr_plots) == 0:
|
|
386
|
-
components.append(
|
|
387
|
-
{
|
|
388
|
-
"title": db,
|
|
389
|
-
"ui": "tabs",
|
|
390
|
-
"contents": [
|
|
391
|
-
{
|
|
392
|
-
"title": "Error",
|
|
393
|
-
"ui": "flat",
|
|
394
|
-
"contents": [
|
|
395
|
-
{
|
|
396
|
-
"kind": "descr",
|
|
397
|
-
"content": (
|
|
398
|
-
"The enrichment analysis results of the top "
|
|
399
|
-
"biological pathways associated with the input "
|
|
400
|
-
"gene set. Each bar represents a pathway, "
|
|
401
|
-
"with the length of the bar indicating the "
|
|
402
|
-
"number of input genes overlapping with genes "
|
|
403
|
-
"in that pathway. The color intensity of the "
|
|
404
|
-
"bars reflects the statistical significance of "
|
|
405
|
-
"the enrichment (p-value). "
|
|
406
|
-
)
|
|
407
|
-
},
|
|
408
|
-
{
|
|
409
|
-
"kind": "error",
|
|
410
|
-
"content": "No enriched terms found.",
|
|
411
|
-
}
|
|
412
|
-
],
|
|
413
|
-
},
|
|
414
|
-
],
|
|
415
|
-
}
|
|
416
|
-
)
|
|
417
|
-
else:
|
|
418
|
-
contents = []
|
|
419
|
-
for enrichr_plot in enrichr_plots:
|
|
420
|
-
plot_type = enrichr_plot.stem.split(".")[-1]
|
|
421
|
-
pdf = enrichr_plot.with_suffix(".pdf")
|
|
422
|
-
contents.append(
|
|
423
|
-
{
|
|
424
|
-
"src": str(enrichr_plot),
|
|
425
|
-
"title": f"{plot_type.title()} Plot",
|
|
426
|
-
"download": str(pdf),
|
|
427
|
-
}
|
|
428
|
-
)
|
|
429
|
-
|
|
430
|
-
components.append(
|
|
431
|
-
{
|
|
432
|
-
"title": db,
|
|
433
|
-
"ui": "tabs",
|
|
434
|
-
"contents": [
|
|
435
|
-
{
|
|
436
|
-
"title": "Plots",
|
|
437
|
-
"ui": "table_of_images",
|
|
438
|
-
"contents": contents,
|
|
439
|
-
},
|
|
440
|
-
{
|
|
441
|
-
"title": "Table",
|
|
442
|
-
"ui": "flat",
|
|
443
|
-
"contents": [
|
|
444
|
-
{
|
|
445
|
-
"kind": "table",
|
|
446
|
-
"src": str(
|
|
447
|
-
Path(cont["dir"]).joinpath(f"Enrichr-{db}.txt")
|
|
448
|
-
),
|
|
449
|
-
}
|
|
450
|
-
],
|
|
451
|
-
},
|
|
452
|
-
],
|
|
453
|
-
}
|
|
454
|
-
)
|
|
455
|
-
|
|
456
|
-
return render_ui(components, "accordion", job, level)
|
biopipen/core/proc.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""Provides a base class for the processes to subclass"""
|
|
2
|
-
from
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from diot import Diot # type: ignore
|
|
3
5
|
from liquid.defaults import SEARCH_PATHS
|
|
4
|
-
from pipen import Proc as PipenProc
|
|
6
|
+
from pipen import Proc as PipenProc # type: ignore
|
|
5
7
|
from pipen_filters.filters import FILTERS
|
|
6
8
|
|
|
7
9
|
from .filters import filtermanager
|
|
@@ -23,7 +25,7 @@ class Proc(PipenProc):
|
|
|
23
25
|
template_opts = {
|
|
24
26
|
"globals": {**FILTERS, "biopipen_dir": str(BIOPIPEN_DIR)},
|
|
25
27
|
"filters": {**FILTERS, **filtermanager.filters},
|
|
26
|
-
"search_paths": SEARCH_PATHS + [str(REPORT_DIR)],
|
|
28
|
+
"search_paths": SEARCH_PATHS + [str(REPORT_DIR)], # type: ignore
|
|
27
29
|
}
|
|
28
30
|
|
|
29
31
|
plugin_opts = {
|
biopipen/core/testing.py
CHANGED
|
@@ -44,12 +44,19 @@ def get_pipeline(testfile, loglevel="debug", enable_report=False, **kwargs):
|
|
|
44
44
|
"""Get a pipeline for a test file"""
|
|
45
45
|
name, workdir, outdir = _get_test_dirs(testfile, False)
|
|
46
46
|
report_plugin_prefix = "+" if enable_report else "-"
|
|
47
|
+
plugins = kwargs.pop("plugins", [])
|
|
48
|
+
if any("report" in p for p in plugins if isinstance(p, str)):
|
|
49
|
+
raise ValueError(
|
|
50
|
+
"Do not pass `report` plugin to `get_pipeline(plugins=[...])`, "
|
|
51
|
+
"use `enable_report` instead."
|
|
52
|
+
)
|
|
53
|
+
plugins.append(f"{report_plugin_prefix}report")
|
|
47
54
|
kws = {
|
|
48
55
|
"name": name,
|
|
49
56
|
"workdir": workdir,
|
|
50
57
|
"outdir": outdir,
|
|
51
58
|
"loglevel": loglevel,
|
|
52
|
-
"plugins":
|
|
59
|
+
"plugins": plugins,
|
|
53
60
|
}
|
|
54
61
|
kws.update(kwargs)
|
|
55
62
|
return Pipen(**kws)
|
biopipen/ns/bam.py
CHANGED
|
@@ -4,6 +4,9 @@ from ..core.proc import Proc
|
|
|
4
4
|
from ..core.config import config
|
|
5
5
|
|
|
6
6
|
|
|
7
|
+
# +-------------------------------------------------------------------+
|
|
8
|
+
# | CNV callers |
|
|
9
|
+
# +-------------------------------------------------------------------+
|
|
7
10
|
class CNVpytor(Proc):
|
|
8
11
|
"""Detect CNV using CNVpytor
|
|
9
12
|
|
|
@@ -26,15 +29,14 @@ class CNVpytor(Proc):
|
|
|
26
29
|
binsizes: The binsizes
|
|
27
30
|
snp: How to read snp data
|
|
28
31
|
filters: The filters to filter the result
|
|
29
|
-
See - https://github.com/abyzovlab/CNVpytor/blob/master
|
|
30
|
-
/GettingStarted.md#predicting-cnv-regions
|
|
32
|
+
See - https://github.com/abyzovlab/CNVpytor/blob/master/GettingStarted.md#predicting-cnv-regions
|
|
31
33
|
mask_snps: Whether mask 1000 Genome snps
|
|
32
34
|
baf_nomask: Do not use P mask in BAF histograms
|
|
33
35
|
|
|
34
36
|
Requires:
|
|
35
37
|
cnvpytor:
|
|
36
38
|
- check: {{proc.envs.cnvpytor}} --version
|
|
37
|
-
"""
|
|
39
|
+
""" # noqa: E501
|
|
38
40
|
input = "bamfile:file, snpfile:file"
|
|
39
41
|
output = "outdir:dir:{{in.bamfile | stem}}.cnvpytor"
|
|
40
42
|
lang = config.lang.python
|
|
@@ -150,7 +152,7 @@ class CNAClinic(Proc):
|
|
|
150
152
|
A list of sample names
|
|
151
153
|
A float number (0 < x <= 1), the fraction of samples to use
|
|
152
154
|
A integer number (x > 1), the number of samples to use
|
|
153
|
-
binsize: Directly use this binsize for CNAClinic, in
|
|
155
|
+
binsize: Directly use this binsize for CNAClinic, in bp.
|
|
154
156
|
genome: The genome assembly
|
|
155
157
|
run_args: The arguments for CNAClinic::runSegmentation
|
|
156
158
|
plot_args: The arguments for CNAClinic::plotSampleData
|
|
@@ -181,6 +183,9 @@ class CNAClinic(Proc):
|
|
|
181
183
|
}
|
|
182
184
|
|
|
183
185
|
|
|
186
|
+
# +-------------------------------------------------------------------+
|
|
187
|
+
# | Bam processing tools |
|
|
188
|
+
# +-------------------------------------------------------------------+
|
|
184
189
|
class BamSplitChroms(Proc):
|
|
185
190
|
"""Split bam file by chromosomes
|
|
186
191
|
|
|
@@ -368,3 +373,34 @@ class BamSort(Proc):
|
|
|
368
373
|
"index": True,
|
|
369
374
|
}
|
|
370
375
|
script = "file://../scripts/bam/BamSort.py"
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
class SamtoolsView(Proc):
|
|
379
|
+
"""View bam file using samtools, mostly used for filtering
|
|
380
|
+
|
|
381
|
+
This is a wrapper for `samtools view` command.
|
|
382
|
+
It will create a new bam file with the same name as the input bam file.
|
|
383
|
+
|
|
384
|
+
Input:
|
|
385
|
+
bamfile: The bam file
|
|
386
|
+
|
|
387
|
+
Output:
|
|
388
|
+
outfile: The output bam file
|
|
389
|
+
|
|
390
|
+
Envs:
|
|
391
|
+
ncores: Number of cores to use
|
|
392
|
+
samtools: Path to samtools executable
|
|
393
|
+
index: Whether to index the output bam file
|
|
394
|
+
Requires the input bam file to be sorted.
|
|
395
|
+
<more>: Other arguments passed to the view tool
|
|
396
|
+
See `samtools view` or `sambamba view`.
|
|
397
|
+
"""
|
|
398
|
+
input = "bamfile:file"
|
|
399
|
+
output = "outfile:file:{{in.bamfile | stem}}.bam"
|
|
400
|
+
lang = config.lang.python
|
|
401
|
+
envs = {
|
|
402
|
+
"ncores": config.misc.ncores,
|
|
403
|
+
"samtools": config.exe.samtools,
|
|
404
|
+
"index": True,
|
|
405
|
+
}
|
|
406
|
+
script = "file://../scripts/bam/SamtoolsView.py"
|
biopipen/ns/cnv.py
CHANGED
|
@@ -150,7 +150,7 @@ class TMADScore(Proc):
|
|
|
150
150
|
excl_chroms (list): The chromosomes to be excluded
|
|
151
151
|
"""
|
|
152
152
|
input = "segfile:file"
|
|
153
|
-
output = "outfile:file:{{in.segfile |
|
|
153
|
+
output = "outfile:file:{{in.segfile | stem}}.tmad.txt"
|
|
154
154
|
lang = config.lang.rscript
|
|
155
155
|
envs = {
|
|
156
156
|
"chrom_col": "chrom",
|
biopipen/ns/cnvkit.py
CHANGED
|
@@ -482,7 +482,7 @@ class CNVkitDiagram(Proc):
|
|
|
482
482
|
}
|
|
483
483
|
script = "file://../scripts/cnvkit/CNVkitDiagram.py"
|
|
484
484
|
plugin_opts = {
|
|
485
|
-
"report": "file://../reports/cnvkit/
|
|
485
|
+
"report": "file://../reports/cnvkit/CNVkitDiagram.svelte",
|
|
486
486
|
"report_paging": 10,
|
|
487
487
|
}
|
|
488
488
|
|
biopipen/ns/delim.py
CHANGED
biopipen/ns/gsea.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"""Gene set enrichment analysis"""
|
|
2
|
+
from pipen.utils import mark
|
|
2
3
|
from ..core.proc import Proc
|
|
3
4
|
from ..core.config import config
|
|
4
5
|
|
|
5
6
|
|
|
7
|
+
@mark(deprecated='[{proc.name}] is deprecated, use `FGSEA` instead.')
|
|
6
8
|
class GSEA(Proc):
|
|
7
9
|
"""Gene set enrichment analysis
|
|
8
10
|
|
|
@@ -51,6 +53,7 @@ class GSEA(Proc):
|
|
|
51
53
|
plugin_opts = {"report": "file://../reports/gsea/GSEA.svelte"}
|
|
52
54
|
|
|
53
55
|
|
|
56
|
+
@mark(deprecated='[{proc.name}] is deprecated, use `FGSEA` directly.')
|
|
54
57
|
class PreRank(Proc):
|
|
55
58
|
"""PreRank the genes for GSEA analysis
|
|
56
59
|
|
|
@@ -100,59 +103,82 @@ class PreRank(Proc):
|
|
|
100
103
|
class FGSEA(Proc):
|
|
101
104
|
"""Gene set enrichment analysis using `fgsea`
|
|
102
105
|
|
|
103
|
-
Need `devtools::install_github("ctlab/fgsea")`
|
|
104
|
-
|
|
105
106
|
Input:
|
|
106
|
-
infile: The expression file.
|
|
107
|
-
Either a tab-delimited
|
|
107
|
+
infile: The expression file (genes x samples).
|
|
108
|
+
Either a tab-delimited file.
|
|
108
109
|
metafile: The meta data file, determining the class of the samples
|
|
109
|
-
Two columns are required
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
configfile: The configuration file in TOML format to specify some envs.
|
|
114
|
-
`clscol`: If not provided, will use `envs.clscol`
|
|
115
|
-
`classes`: Defines pos and neg labels. If not provided, use will
|
|
116
|
-
`envs.classes`.
|
|
110
|
+
Two columns are required. If column `Sample` is found, it will be used
|
|
111
|
+
as the samples; otherwise the first column should be the samples.
|
|
112
|
+
The other column should be the group/class of the samples, whose
|
|
113
|
+
name is specified by `envs.clscol`.
|
|
117
114
|
|
|
118
115
|
Output:
|
|
119
|
-
outdir: The output directory
|
|
116
|
+
outdir: The output directory containing the results, including
|
|
117
|
+
the table and plots.
|
|
120
118
|
|
|
121
119
|
Envs:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
120
|
+
ncores (type=int): Number of cores for parallelization
|
|
121
|
+
Passed to `nproc` of `fgseaMultilevel()`.
|
|
122
|
+
case: The case label for the positive class.
|
|
123
|
+
control: The control label for the negative class.
|
|
124
|
+
When there are only two classes in `in.metafile` at column `envs.clscol`,
|
|
125
|
+
either `case` or `control` can be specified and the other will be
|
|
126
|
+
automatically set to the other class.
|
|
127
|
+
gmtfile: The pathways in GMT format, with the gene names/ids in the same format as the seurat object.
|
|
128
|
+
One could also use a URL to a GMT file. For example, from <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/Pathways/>.
|
|
129
|
+
method (choice): The method to do the preranking.
|
|
130
|
+
- signal_to_noise: Signal to noise.
|
|
131
|
+
The larger the differences of the means (scaled by the standard deviations);
|
|
132
|
+
that is, the more distinct the gene expression is in each phenotype and the more the gene
|
|
133
|
+
acts as a "class marker".
|
|
134
|
+
- s2n: Alias of signal_to_noise.
|
|
135
|
+
- abs_signal_to_noise: The absolute value of signal_to_noise.
|
|
136
|
+
- abs_s2n: Alias of abs_signal_to_noise.
|
|
137
|
+
- t_test: T test.
|
|
138
|
+
Uses the difference of means scaled by the standard deviation and number of samples.
|
|
139
|
+
- ratio_of_classes: Also referred to as fold change.
|
|
140
|
+
Uses the ratio of class means to calculate fold change for natural scale data.
|
|
141
|
+
- diff_of_classes: Difference of class means.
|
|
142
|
+
Uses the difference of class means to calculate fold change for nature scale data
|
|
143
|
+
- log2_ratio_of_classes: Log2 ratio of class means.
|
|
144
|
+
Uses the log2 ratio of class means to calculate fold change for natural scale data.
|
|
145
|
+
This is the recommended statistic for calculating fold change for log scale data.
|
|
129
146
|
clscol: The column of metafile specifying the classes of the samples
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
147
|
+
When `in.metafile` is not specified, it can also be specified as a list of
|
|
148
|
+
classes, in the same order as the samples in `in.infile`.
|
|
149
|
+
top (type=auto): Do gsea table and enrich plot for top N pathways.
|
|
150
|
+
If it is < 1, will apply it to `padj`, selecting pathways with `padj` < `top`.
|
|
151
|
+
eps (type=float): This parameter sets the boundary for calculating the p value.
|
|
152
|
+
See <https://rdrr.io/bioc/fgsea/man/fgseaMultilevel.html>
|
|
153
|
+
minsize (type=int): Minimal size of a gene set to test. All pathways below the threshold are excluded.
|
|
154
|
+
maxsize (type=int): Maximal size of a gene set to test. All pathways above the threshold are excluded.
|
|
155
|
+
rest (type=json;order=98): Rest arguments for [`fgsea()`](https://rdrr.io/bioc/fgsea/man/fgsea.html)
|
|
156
|
+
See also <https://rdrr.io/bioc/fgsea/man/fgseaMultilevel.html>
|
|
157
|
+
cases (type=json;order=99): If you have multiple cases, you can specify them here.
|
|
158
|
+
The keys are the names of the cases and the values are the above options except `mutaters`.
|
|
159
|
+
If some options are not specified, the default values specified above will be used.
|
|
160
|
+
If no cases are specified, the default case will be added with the name `GSEA`.
|
|
137
161
|
|
|
138
162
|
Requires:
|
|
139
163
|
bioconductor-fgsea:
|
|
140
164
|
- check: {{proc.lang}} -e "library(fgsea)"
|
|
141
|
-
"""
|
|
142
|
-
input = "infile:file, metafile:file
|
|
165
|
+
""" # noqa: E501
|
|
166
|
+
input = "infile:file, metafile:file"
|
|
143
167
|
output = "outdir:dir:{{in.infile | stem}}.fgsea"
|
|
144
168
|
lang = config.lang.rscript
|
|
145
169
|
envs = {
|
|
146
|
-
"inopts": {"header": True, "row.names": -1},
|
|
147
|
-
"metaopts": {"header": True, "row.names": -1},
|
|
148
|
-
"method": "s2n",
|
|
149
|
-
"clscol": None,
|
|
150
|
-
"classes": None,
|
|
151
|
-
"top": 20,
|
|
152
170
|
"ncores": config.misc.ncores,
|
|
153
|
-
"
|
|
154
|
-
"
|
|
171
|
+
"case": None,
|
|
172
|
+
"control": None,
|
|
173
|
+
"gmtfile": None,
|
|
174
|
+
"method": "signal_to_noise",
|
|
175
|
+
"clscol": None,
|
|
176
|
+
"top": 10,
|
|
155
177
|
"eps": 0,
|
|
178
|
+
"minsize": 10,
|
|
179
|
+
"maxsize": 100,
|
|
180
|
+
"rest": {},
|
|
181
|
+
"cases": {},
|
|
156
182
|
}
|
|
157
183
|
script = "file://../scripts/gsea/FGSEA.R"
|
|
158
184
|
plugin_opts = {"report": "file://../reports/gsea/FGSEA.svelte"}
|
biopipen/ns/misc.py
CHANGED
|
@@ -106,3 +106,41 @@ class Shell(Proc):
|
|
|
106
106
|
envs = {"cmd": "", "outdir": False}
|
|
107
107
|
lang = config.lang.bash
|
|
108
108
|
script = "file://../scripts/misc/Shell.sh"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class Plot(Proc):
|
|
112
|
+
"""Plot given data using plotthis package in R
|
|
113
|
+
|
|
114
|
+
Input:
|
|
115
|
+
datafile: The input data file in RDS or qs/qs2 format.
|
|
116
|
+
If it is not in RDS nor qs/qs2 format, read.table will be used
|
|
117
|
+
to read the data file with the options provided by `envs.read_opts`.
|
|
118
|
+
|
|
119
|
+
Output:
|
|
120
|
+
plotfile: The output plot file in PNG format
|
|
121
|
+
|
|
122
|
+
envs:
|
|
123
|
+
fn: The plot function to use. Required.
|
|
124
|
+
devpars (ns): The device parameters for the plot.
|
|
125
|
+
- width: The width of the plot in pixels.
|
|
126
|
+
- height: The height of the plot in pixels.
|
|
127
|
+
- res: The resolution of the plot in DPI.
|
|
128
|
+
more_formats: The additional formats to save the plot in other than PNG.
|
|
129
|
+
The file will be saved in the same directory as the plotfile.
|
|
130
|
+
save_code: Whether to save the R code used for plotting.
|
|
131
|
+
read_opts: Options to read the data file.
|
|
132
|
+
If the data file is not in RDS nor qs/qs2 format, these options
|
|
133
|
+
will be passed to `read.table`.
|
|
134
|
+
<more>: Additional parameters to the plot function.
|
|
135
|
+
"""
|
|
136
|
+
input = "datafile:file"
|
|
137
|
+
output = "plotfile:file:{{in.datafile | stem}}.png"
|
|
138
|
+
envs = {
|
|
139
|
+
"fn": None,
|
|
140
|
+
"devpars": {"res": 100},
|
|
141
|
+
"more_formats": [],
|
|
142
|
+
"save_code": False,
|
|
143
|
+
"read_opts": {},
|
|
144
|
+
}
|
|
145
|
+
lang = config.lang.rscript
|
|
146
|
+
script = "file://../scripts/misc/Plot.R"
|
biopipen/ns/plot.py
CHANGED
|
@@ -1,8 +1,16 @@
|
|
|
1
1
|
"""Plotting data"""
|
|
2
2
|
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
3
5
|
from ..core.proc import Proc
|
|
4
6
|
from ..core.config import config
|
|
5
7
|
|
|
8
|
+
warnings.warn(
|
|
9
|
+
"The `biopipen.ns.plot` module is deprecated and will be removed in the future. "
|
|
10
|
+
"Please use `biopipen.ns.misc.Plot` process instead.",
|
|
11
|
+
DeprecationWarning,
|
|
12
|
+
)
|
|
13
|
+
|
|
6
14
|
|
|
7
15
|
class VennDiagram(Proc):
|
|
8
16
|
"""Plot Venn diagram
|