biopipen 0.32.3__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +6 -0
- biopipen/core/filters.py +35 -23
- biopipen/core/testing.py +6 -1
- biopipen/ns/bam.py +39 -0
- biopipen/ns/cellranger.py +5 -0
- biopipen/ns/cellranger_pipeline.py +2 -2
- biopipen/ns/cnvkit_pipeline.py +4 -1
- biopipen/ns/delim.py +33 -27
- biopipen/ns/protein.py +99 -0
- biopipen/ns/scrna.py +411 -250
- biopipen/ns/snp.py +16 -3
- biopipen/ns/tcr.py +125 -1
- biopipen/ns/vcf.py +34 -0
- biopipen/ns/web.py +5 -1
- biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
- biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
- biopipen/reports/tcr/ClonalStats.svelte +15 -0
- biopipen/reports/utils/misc.liq +20 -7
- biopipen/scripts/bam/BamMerge.py +2 -2
- biopipen/scripts/bam/BamSampling.py +4 -4
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +3 -3
- biopipen/scripts/bam/CNVpytor.py +10 -10
- biopipen/scripts/bam/ControlFREEC.py +11 -11
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
- biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +20 -9
- biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
- biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
- biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
- biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/SampleInfo.R +85 -148
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +4 -4
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifScan.py +8 -8
- biopipen/scripts/scrna/CellCellCommunication.py +59 -22
- biopipen/scripts/scrna/MarkersFinder.R +273 -654
- biopipen/scripts/scrna/RadarPlots.R +73 -53
- biopipen/scripts/scrna/SCP-plot.R +15202 -0
- biopipen/scripts/scrna/ScVelo.py +0 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -31
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -54
- biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -403
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +32 -17
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -239
- biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
- biopipen/scripts/scrna/SeuratMap2Ref.R +16 -12
- biopipen/scripts/scrna/SeuratPreparing.R +138 -81
- biopipen/scripts/scrna/SlingShot.R +71 -0
- biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
- biopipen/scripts/snp/Plink2GTMat.py +26 -11
- biopipen/scripts/snp/PlinkFilter.py +7 -7
- biopipen/scripts/snp/PlinkFromVcf.py +8 -5
- biopipen/scripts/snp/PlinkSimulation.py +4 -4
- biopipen/scripts/snp/PlinkUpdateName.py +4 -4
- biopipen/scripts/stats/ChowTest.R +48 -22
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/ClonalStats.R +484 -0
- biopipen/scripts/tcr/ScRepLoading.R +127 -0
- biopipen/scripts/tcr/TCRDock.py +10 -6
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
- biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +4 -4
- biopipen/scripts/vcf/BcftoolsView.py +5 -5
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +12 -3
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +3 -3
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
- biopipen/scripts/web/gcloud_common.py +1 -1
- biopipen/utils/gsea.R +75 -35
- biopipen/utils/misc.R +205 -7
- biopipen/utils/misc.py +17 -8
- biopipen/utils/reference.py +11 -11
- biopipen/utils/repr.R +146 -0
- biopipen/utils/vcf.py +1 -1
- {biopipen-0.32.3.dist-info → biopipen-0.33.0.dist-info}/METADATA +8 -8
- {biopipen-0.32.3.dist-info → biopipen-0.33.0.dist-info}/RECORD +114 -105
- {biopipen-0.32.3.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -144
- biopipen/scripts/scrna/SeuratPreparing-common.R +0 -467
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -204
- {biopipen-0.32.3.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0
biopipen/utils/misc.R
CHANGED
|
@@ -114,6 +114,70 @@ do_call <- function (what, args, quote = FALSE, envir = parent.frame()) {
|
|
|
114
114
|
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
+
#' Save the plot into multiple formats
|
|
118
|
+
#'
|
|
119
|
+
#' @param plot The plot object
|
|
120
|
+
#' @param prefix The prefix of the file
|
|
121
|
+
#' @param formats The formats to save
|
|
122
|
+
#' @param bg The background color
|
|
123
|
+
#' @param devpars The device parameters
|
|
124
|
+
#' @export
|
|
125
|
+
save_plot <- function(plot, prefix, devpars = NULL, bg = "white", formats = c("png", "pdf")) {
|
|
126
|
+
devpars <- devpars %||% list()
|
|
127
|
+
devpars$res <- devpars$res %||% 100
|
|
128
|
+
if (!is.null(attr(plot, "width"))) {
|
|
129
|
+
devpars$width <- devpars$width %||% (attr(plot, "width") * devpars$res)
|
|
130
|
+
devpars$height <- devpars$height %||% (attr(plot, "height") * devpars$res)
|
|
131
|
+
} else {
|
|
132
|
+
devpars$width <- devpars$width %||% 800
|
|
133
|
+
devpars$height <- devpars$height %||% 600
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
old_dev <- grDevices::dev.cur()
|
|
137
|
+
for (fmt in formats) {
|
|
138
|
+
filename = paste0(prefix, ".", fmt)
|
|
139
|
+
dev <- ggplot2:::plot_dev(fmt, filename, dpi = devpars$res)
|
|
140
|
+
dim <- ggplot2:::plot_dim(c(devpars$width, devpars$height), units = "px", limitsize = FALSE, dpi = devpars$res)
|
|
141
|
+
dev(filename = filename, width = dim[1], height = dim[2], bg = bg)
|
|
142
|
+
print(plot)
|
|
143
|
+
grDevices::dev.off()
|
|
144
|
+
}
|
|
145
|
+
on.exit(utils::capture.output({
|
|
146
|
+
if (old_dev > 1) grDevices::dev.set(old_dev) # restore old device unless null device
|
|
147
|
+
}))
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
#' Save the code to generate the data
|
|
151
|
+
#'
|
|
152
|
+
#' @param code The code
|
|
153
|
+
#' @param plot The plot object
|
|
154
|
+
#' @param setup The setup code to generate the plot
|
|
155
|
+
#' @param prefix The prefix of the file
|
|
156
|
+
#' @param ... Additional data frame to save
|
|
157
|
+
#'
|
|
158
|
+
#' @export
|
|
159
|
+
save_plotcode <- function(...) UseMethod("save_plotcode")
|
|
160
|
+
|
|
161
|
+
save_plotcode.character <- function(code, prefix, ..., envir = parent.frame()) {
|
|
162
|
+
codedir <- paste0(prefix, ".code")
|
|
163
|
+
dir.create(codedir, showWarnings = FALSE)
|
|
164
|
+
codefile <- file.path(codedir, "plot.R")
|
|
165
|
+
writeLines(code, codefile)
|
|
166
|
+
save(..., file = file.path(codedir, "data.RData"), envir = envir)
|
|
167
|
+
|
|
168
|
+
zip_file <- paste0(prefix, ".code.zip")
|
|
169
|
+
zip::zip(zip_file, c("plot.R", "data.RData"), root = codedir)
|
|
170
|
+
unlink(codedir, recursive = TRUE)
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
save_plotcode.ggplot <- function(plot, setup, prefix, ..., envir = parent.frame()) {
|
|
174
|
+
if (is.null(plot$logs)) {
|
|
175
|
+
stop("The plot object does not have logs, did you use gglogger?")
|
|
176
|
+
}
|
|
177
|
+
code <- plot$logs$gen_code(setup = setup)
|
|
178
|
+
save_plotcode(code, prefix, ..., envir = envir)
|
|
179
|
+
}
|
|
180
|
+
|
|
117
181
|
#' Set the default value of a key in a list
|
|
118
182
|
#'
|
|
119
183
|
#' @param x A list
|
|
@@ -302,10 +366,13 @@ expand_cases <- function(cases, defaults, expand_each = NULL) {
|
|
|
302
366
|
casename_info <- function(
|
|
303
367
|
casename, cases, outdir,
|
|
304
368
|
section_key = "section",
|
|
305
|
-
section =
|
|
369
|
+
section = NULL,
|
|
306
370
|
sep = "::",
|
|
371
|
+
case_type = c("dir", "prefix"),
|
|
307
372
|
create = FALSE
|
|
308
373
|
) {
|
|
374
|
+
section <- section %||% "DEFAULT"
|
|
375
|
+
case_type <- match.arg(case_type)
|
|
309
376
|
# CR_vs_PD_in_BL:seurat_clusters - IM IL1
|
|
310
377
|
sec_case_names <- strsplit(casename, sep)[[1]]
|
|
311
378
|
# seurat_clusters - IM IL1
|
|
@@ -336,13 +403,25 @@ casename_info <- function(
|
|
|
336
403
|
ifelse(single_section, "#", html_escape(cname))
|
|
337
404
|
)
|
|
338
405
|
)
|
|
339
|
-
|
|
340
|
-
|
|
406
|
+
|
|
407
|
+
if (case_type == "dir") {
|
|
408
|
+
if (single_section && section == "DEFAULT") {
|
|
409
|
+
out$casedir <- file.path(outdir, out$case_slug)
|
|
410
|
+
} else {
|
|
411
|
+
out$casedir <- file.path(outdir, out$section_slug, out$case_slug)
|
|
412
|
+
}
|
|
413
|
+
if (create) {
|
|
414
|
+
dir.create(out$casedir, showWarnings = FALSE, recursive = TRUE)
|
|
415
|
+
}
|
|
341
416
|
} else {
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
417
|
+
if (single_section && section == "DEFAULT") {
|
|
418
|
+
out$caseprefix <- file.path(outdir, out$case_slug)
|
|
419
|
+
} else {
|
|
420
|
+
out$caseprefix <- file.path(outdir, out$section_slug, out$case_slug)
|
|
421
|
+
if (create) {
|
|
422
|
+
dir.create(file.path(outdir, out$section_slug), showWarnings = FALSE, recursive = TRUE)
|
|
423
|
+
}
|
|
424
|
+
}
|
|
346
425
|
}
|
|
347
426
|
out
|
|
348
427
|
}
|
|
@@ -402,3 +481,122 @@ run_command <- function(
|
|
|
402
481
|
return(out)
|
|
403
482
|
}
|
|
404
483
|
}
|
|
484
|
+
|
|
485
|
+
#' Expand the dims usually used in single-cell analysis to specific dimensions
|
|
486
|
+
#'
|
|
487
|
+
#' @param dims The dimensions to expand
|
|
488
|
+
#' @return A vector of expanded dimensions
|
|
489
|
+
#' @export
|
|
490
|
+
#' @examples
|
|
491
|
+
#' expand_dims(NULL) # c(1, 2)
|
|
492
|
+
#' expand_dims(1:2) # c(1, 2)
|
|
493
|
+
#' expand_dims(1) # c(1)
|
|
494
|
+
#' expand_dims("1:2") # c(1, 2)
|
|
495
|
+
#' expand_dims("1") # c(1)
|
|
496
|
+
#' # dash works as the same as colon
|
|
497
|
+
#' expand_dims("1-3") # c(1, 2, 3)
|
|
498
|
+
#' expand_dims("1,3") # c(1, 3)
|
|
499
|
+
#' expand_dims("1,3:5") # c(1, 3, 4, 5)
|
|
500
|
+
#' expand_dims(c(1, "3:5", 7)) # c(1, 3, 4, 5, 7)
|
|
501
|
+
expand_dims <- function(dims, default = 1:2) {
|
|
502
|
+
if (is.null(dims)) {
|
|
503
|
+
return(default)
|
|
504
|
+
}
|
|
505
|
+
if (is.numeric(dims)) {
|
|
506
|
+
return(dims)
|
|
507
|
+
}
|
|
508
|
+
dims <- unlist(strsplit(dims, ","))
|
|
509
|
+
out <- c()
|
|
510
|
+
for (d in dims) {
|
|
511
|
+
if (grepl(":", d)) {
|
|
512
|
+
d <- unlist(strsplit(d, ":"))
|
|
513
|
+
d <- as.integer(d[1]):as.integer(d[2])
|
|
514
|
+
} else if (grepl("-", d)) {
|
|
515
|
+
d <- unlist(strsplit(d, "-"))
|
|
516
|
+
d <- as.integer(d[1]):as.integer(d[2])
|
|
517
|
+
} else {
|
|
518
|
+
d <- as.integer(d)
|
|
519
|
+
}
|
|
520
|
+
out <- c(out, d)
|
|
521
|
+
}
|
|
522
|
+
out
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
#' Get plotthis function from plot_type
|
|
527
|
+
#'
|
|
528
|
+
#' @param plot_type The plot type
|
|
529
|
+
#' @param gglogger_register Register the plotthis function to gglogger
|
|
530
|
+
#' @param return_name Return the name of the function instead of the function
|
|
531
|
+
#' @return The plotthis function
|
|
532
|
+
#' @export
|
|
533
|
+
get_plotthis_fn <- function(plot_type, gglogger_register = TRUE, return_name = FALSE) {
|
|
534
|
+
fn_name <- switch(plot_type,
|
|
535
|
+
hist = "Histogram",
|
|
536
|
+
histo = "Histogram",
|
|
537
|
+
histogram = "Histogram",
|
|
538
|
+
featuredim = "FeatureDimPlot",
|
|
539
|
+
splitbar = "SplitBarPlot",
|
|
540
|
+
enrichmap = "EnrichMap",
|
|
541
|
+
enrichnet = "EnrichNetwork",
|
|
542
|
+
enrichnetwork = "EnrichNetwork",
|
|
543
|
+
gsea = "GSEAPlot",
|
|
544
|
+
gseasummary = "GSEASummaryPlot",
|
|
545
|
+
gseasum = "GSEASummaryPlot",
|
|
546
|
+
heatmap = "Heatmap",
|
|
547
|
+
network = "Network",
|
|
548
|
+
pie = "PieChart",
|
|
549
|
+
wordcloud = "WordCloudPlot",
|
|
550
|
+
venn = "VennDiagram",
|
|
551
|
+
paste0(tools::toTitleCase(plot_type), "Plot")
|
|
552
|
+
)
|
|
553
|
+
if (return_name) {
|
|
554
|
+
return(fn_name)
|
|
555
|
+
}
|
|
556
|
+
fn <- tryCatch({
|
|
557
|
+
utils::getFromNamespace(fn_name, "plotthis")
|
|
558
|
+
}, error = function(e) {
|
|
559
|
+
stop("Unknown plot type: ", plot_type)
|
|
560
|
+
})
|
|
561
|
+
|
|
562
|
+
if (gglogger_register) {
|
|
563
|
+
gglogger::register(fn, fn_name)
|
|
564
|
+
} else {
|
|
565
|
+
fn
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
#' Extract variables from a named list
|
|
571
|
+
#'
|
|
572
|
+
#' @param x A named list
|
|
573
|
+
#' @param ... The names of the variables
|
|
574
|
+
#' @param keep Keep the extracted variables in the list
|
|
575
|
+
#' @param env The environment to assign the extracted variables
|
|
576
|
+
#' @return The list with/ithout the extracted variables
|
|
577
|
+
#'
|
|
578
|
+
#' @export
|
|
579
|
+
extract_vars <- function(x, ..., keep = FALSE, env = parent.frame()) {
|
|
580
|
+
stopifnot("extract_vars: 'x' must be a named list" = is.list(x) && !is.null(names(x)))
|
|
581
|
+
vars <- list(...)
|
|
582
|
+
if (is.null(names(vars))) {
|
|
583
|
+
names(vars) <- unlist(vars)
|
|
584
|
+
}
|
|
585
|
+
for (i in seq_along(vars)) {
|
|
586
|
+
if (nchar(names(vars)[i]) == 0) {
|
|
587
|
+
names(vars)[i] <- vars[[i]]
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
# list2env?
|
|
591
|
+
for (n in names(vars)) {
|
|
592
|
+
if (!n %in% names(x)) {
|
|
593
|
+
stop(sprintf("Variable '%s' not found in the list", n))
|
|
594
|
+
}
|
|
595
|
+
assign(vars[[n]], x[[n]], envir = env)
|
|
596
|
+
if (!isTRUE(keep)) {
|
|
597
|
+
x[[n]] <- NULL
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
x
|
|
602
|
+
}
|
biopipen/utils/misc.py
CHANGED
|
@@ -4,7 +4,8 @@ from pathlib import Path
|
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
6
|
import logging
|
|
7
|
-
from
|
|
7
|
+
from subprocess import Popen
|
|
8
|
+
from typing import List, Callable, Any
|
|
8
9
|
from biopipen.core.filters import dict_to_cli_args # noqa: F401
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger("biopipen_job")
|
|
@@ -34,13 +35,13 @@ def exec_code(code, global_vars=None, local_vars=None, return_var=None):
|
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
def run_command(
|
|
37
|
-
cmd: str | List[
|
|
38
|
+
cmd: str | List[Any],
|
|
38
39
|
fg: bool = False,
|
|
39
40
|
wait: bool = True,
|
|
40
41
|
print_command: bool = True,
|
|
41
|
-
print_command_handler:
|
|
42
|
+
print_command_handler: Callable = print,
|
|
42
43
|
**kwargs,
|
|
43
|
-
):
|
|
44
|
+
) -> Popen | str:
|
|
44
45
|
"""Run a command.
|
|
45
46
|
|
|
46
47
|
Args:
|
|
@@ -57,7 +58,7 @@ def run_command(
|
|
|
57
58
|
The `Popen` object, or str when `stdout` is `RETURN` or `return`.
|
|
58
59
|
"""
|
|
59
60
|
import shlex
|
|
60
|
-
from subprocess import
|
|
61
|
+
from subprocess import PIPE, STDOUT
|
|
61
62
|
|
|
62
63
|
if isinstance(cmd, list):
|
|
63
64
|
cmd = [str(c) for c in cmd]
|
|
@@ -110,15 +111,23 @@ def run_command(
|
|
|
110
111
|
try:
|
|
111
112
|
p = Popen(cmd, **kwargs)
|
|
112
113
|
except Exception as e:
|
|
113
|
-
raise RuntimeError(
|
|
114
|
+
raise RuntimeError(
|
|
115
|
+
f"Failed to run command: {e}\n"
|
|
116
|
+
f"Command (list): {cmd}\n"
|
|
117
|
+
f"Command (str): {shlex.join(cmd)}"
|
|
118
|
+
)
|
|
114
119
|
|
|
115
120
|
if fg or wait or return_stdout:
|
|
116
121
|
rc = p.wait()
|
|
117
122
|
if rc != 0:
|
|
118
|
-
raise RuntimeError(
|
|
123
|
+
raise RuntimeError(
|
|
124
|
+
f"Failed to run command: rc={rc}\n"
|
|
125
|
+
f"Command (list): {cmd}\n"
|
|
126
|
+
f"Command (str): {shlex.join(cmd)}"
|
|
127
|
+
)
|
|
119
128
|
|
|
120
129
|
if return_stdout:
|
|
121
|
-
return p.stdout.read().decode()
|
|
130
|
+
return p.stdout.read().decode() # type: ignore
|
|
122
131
|
|
|
123
132
|
return p
|
|
124
133
|
|
biopipen/utils/reference.py
CHANGED
|
@@ -26,7 +26,7 @@ def gztype(gzfile):
|
|
|
26
26
|
def tabix_index(
|
|
27
27
|
infile: str | PathLike,
|
|
28
28
|
preset: Literal["gff", "bed", "sam", "vcf", "gaf"],
|
|
29
|
-
tmpdir:
|
|
29
|
+
tmpdir: Literal[False] | str | PathLike | None = None,
|
|
30
30
|
tabix: str = config.exe.tabix,
|
|
31
31
|
) -> str | PathLike:
|
|
32
32
|
"""Index input file using tabix
|
|
@@ -126,15 +126,15 @@ def _run_bam_index(
|
|
|
126
126
|
|
|
127
127
|
|
|
128
128
|
def bam_index(
|
|
129
|
-
bam,
|
|
130
|
-
bamdir=tempfile.gettempdir(),
|
|
131
|
-
tool="samtools",
|
|
132
|
-
samtools=config.exe.samtools,
|
|
133
|
-
sambamba=config.exe.sambamba,
|
|
134
|
-
ncores=1,
|
|
135
|
-
ext=".bam.bai",
|
|
136
|
-
force=False,
|
|
137
|
-
):
|
|
129
|
+
bam: str | Path,
|
|
130
|
+
bamdir: Path | str = tempfile.gettempdir(),
|
|
131
|
+
tool: str = "samtools",
|
|
132
|
+
samtools: str = config.exe.samtools,
|
|
133
|
+
sambamba: str = config.exe.sambamba,
|
|
134
|
+
ncores: int = 1,
|
|
135
|
+
ext: str = ".bam.bai",
|
|
136
|
+
force: bool = False,
|
|
137
|
+
) -> Path:
|
|
138
138
|
"""Index a bam file
|
|
139
139
|
|
|
140
140
|
First look for the index file in the same directory as the bam file,
|
|
@@ -175,7 +175,7 @@ def bam_index(
|
|
|
175
175
|
return bam
|
|
176
176
|
|
|
177
177
|
if indexfile.is_file():
|
|
178
|
-
return
|
|
178
|
+
return bam
|
|
179
179
|
|
|
180
180
|
linkfile = Path(bamdir).joinpath(bam.name)
|
|
181
181
|
indexfile = linkfile.with_suffix(ext)
|
biopipen/utils/repr.R
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
library(rlang)
|
|
2
|
+
|
|
3
|
+
#' The string representation of an object
|
|
4
|
+
#' @param x An object
|
|
5
|
+
#' @param newline Whether to add newlines to the output for each element
|
|
6
|
+
#' @return The string representation
|
|
7
|
+
#' @export
|
|
8
|
+
repr <- function(x, newline = FALSE, ...) UseMethod("repr")
|
|
9
|
+
|
|
10
|
+
repr.default <- function(x, newline = FALSE, ...) {
|
|
11
|
+
klass <- paste0(class(x), collapse = "/")
|
|
12
|
+
fallback <- paste0("<", klass, ": ", deparse(substitute(x)), ">")
|
|
13
|
+
|
|
14
|
+
tryCatch(
|
|
15
|
+
x$.repr(newline, ...),
|
|
16
|
+
error = function(e) {
|
|
17
|
+
fallback
|
|
18
|
+
}
|
|
19
|
+
)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
repr.numeric <- function(x, newline = FALSE, ...) {
|
|
23
|
+
if (length(x) == 1) {
|
|
24
|
+
as.character(x)
|
|
25
|
+
} else if (!newline) {
|
|
26
|
+
paste0("c(", paste(x, collapse = paste0(", ")), ")")
|
|
27
|
+
} else {
|
|
28
|
+
paste0(
|
|
29
|
+
"c(\n",
|
|
30
|
+
paste0(lapply(x, function(y) paste0(" ", y)), collapse = ",\n"),
|
|
31
|
+
"\n)"
|
|
32
|
+
)
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
repr.character <- function(x, newline = FALSE, ...) {
|
|
37
|
+
if (length(x) == 1) {
|
|
38
|
+
paste0("\"", x, "\"")
|
|
39
|
+
} else if (!newline) {
|
|
40
|
+
paste0("c(", paste0(lapply(x, function(y) sQuote(y, q = FALSE)), collapse = ", "), ")")
|
|
41
|
+
} else {
|
|
42
|
+
paste0(
|
|
43
|
+
"c(\n",
|
|
44
|
+
paste0(lapply(x, function(y) paste0(" ", sQuote(y, q = FALSE))), collapse = ",\n"),
|
|
45
|
+
"\n)"
|
|
46
|
+
)
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
repr.factor <- function(x, newline = FALSE, ...) {
|
|
51
|
+
if (!newline) {
|
|
52
|
+
paste0(
|
|
53
|
+
"factor(", repr(as.character(x), newline, ...), ", levels = ", repr(levels(x), newline, ...), ")"
|
|
54
|
+
)
|
|
55
|
+
} else if (!newline) {
|
|
56
|
+
paste0(
|
|
57
|
+
"factor(\n",
|
|
58
|
+
paste0(" ", repr(as.character(x), newline, ...), ",\n"),
|
|
59
|
+
" levels = ", repr(levels(x), newline, ...), "\n)"
|
|
60
|
+
)
|
|
61
|
+
} else {
|
|
62
|
+
paste0(
|
|
63
|
+
"factor(\n",
|
|
64
|
+
paste0(" ", repr(as.character(x), newline, ...), ",\n"),
|
|
65
|
+
" levels = ", repr(levels(x), newline, ...), "\n)"
|
|
66
|
+
)
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
repr.logical <- function(x, newline = FALSE, ...) {
|
|
71
|
+
if (length(x) == 1) {
|
|
72
|
+
if (x) "TRUE" else "FALSE"
|
|
73
|
+
} else if (!newline) {
|
|
74
|
+
paste0("c(", paste0(x, collapse = ","), ")")
|
|
75
|
+
} else {
|
|
76
|
+
paste0(
|
|
77
|
+
"c(\n",
|
|
78
|
+
paste0(lapply(x, function(y) paste0(" ", y)), collapse = ",\n"),
|
|
79
|
+
"\n)"
|
|
80
|
+
)
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
repr.list <- function(x, newline = FALSE, ...) {
|
|
85
|
+
start <- if (newline) "list(\n" else "list("
|
|
86
|
+
end <- if (newline) "\n)" else ")"
|
|
87
|
+
sep <- if (newline) ",\n" else ", "
|
|
88
|
+
prefix <- if (newline) " " else ""
|
|
89
|
+
if (length(names(x)) > 0) {
|
|
90
|
+
paste0(
|
|
91
|
+
start,
|
|
92
|
+
paste0(
|
|
93
|
+
lapply(seq_along(x), function(i) {
|
|
94
|
+
name <- names(x)[i]
|
|
95
|
+
if (identical(name, "")) {
|
|
96
|
+
paste0(prefix, repr(x[[i]]))
|
|
97
|
+
} else {
|
|
98
|
+
paste0(prefix, bQuote(name), " = ", repr(x[[name]]))
|
|
99
|
+
}
|
|
100
|
+
}), collapse = sep
|
|
101
|
+
),
|
|
102
|
+
end
|
|
103
|
+
)
|
|
104
|
+
} else {
|
|
105
|
+
paste0(
|
|
106
|
+
start, paste0(lapply(x, repr, newline = newline, ...), collapse = sep), end
|
|
107
|
+
)
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
repr.NULL <- function(x, newline = FALSE, ...) {
|
|
112
|
+
"NULL"
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
repr.formula <- function(x, newline = FALSE, ...) {
|
|
116
|
+
deparse(x)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
repr.data.frame <- function(x, newline = FALSE, ...) {
|
|
120
|
+
paste0(
|
|
121
|
+
"data.frame(\n",
|
|
122
|
+
paste0(
|
|
123
|
+
lapply(names(x), function(name) {
|
|
124
|
+
paste0(" ", bQuote(name), " = ", repr(x[[name]], newline = newline, ...))
|
|
125
|
+
}), collapse = ",\n"
|
|
126
|
+
),
|
|
127
|
+
"\n)"
|
|
128
|
+
)
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
repr.environment <- function(x, newline = FALSE, ...) {
|
|
132
|
+
if (!is_environment(x)) {
|
|
133
|
+
# in case .GlobalEnv is dispatched here
|
|
134
|
+
NextMethod()
|
|
135
|
+
} else {
|
|
136
|
+
nl <- if (newline) "\n" else " "
|
|
137
|
+
prefix <- if (newline) " " else ""
|
|
138
|
+
paste0(
|
|
139
|
+
"rlang::env(", nl, paste0(
|
|
140
|
+
lapply(ls(x), function(name) {
|
|
141
|
+
paste0(prefix, bQuote(name), " = ", repr(get(name, x), newline = newline, ...))
|
|
142
|
+
}), collapse = paste0(",", nl)
|
|
143
|
+
), nl, ")"
|
|
144
|
+
)
|
|
145
|
+
}
|
|
146
|
+
}
|
biopipen/utils/vcf.py
CHANGED
|
@@ -326,7 +326,7 @@ class Variant:
|
|
|
326
326
|
if isinstance(samples, str):
|
|
327
327
|
samples = Samples.from_str(samples, format)
|
|
328
328
|
elif isinstance(samples[0], str):
|
|
329
|
-
samples = Samples.from_strs(samples, format)
|
|
329
|
+
samples = Samples.from_strs(samples, format) # type: ignore
|
|
330
330
|
else:
|
|
331
331
|
samples = Samples.from_strss(samples, format)
|
|
332
332
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.33.0
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
@@ -14,10 +14,10 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
16
|
Provides-Extra: runinfo
|
|
17
|
-
Requires-Dist: datar[pandas] (>=0.15.
|
|
18
|
-
Requires-Dist: pipen-board[report] (>=0.
|
|
19
|
-
Requires-Dist: pipen-cli-run (>=0.
|
|
20
|
-
Requires-Dist: pipen-filters (>=0.
|
|
21
|
-
Requires-Dist: pipen-poplog (>=0.
|
|
22
|
-
Requires-Dist: pipen-runinfo (>=0.
|
|
23
|
-
Requires-Dist: pipen-verbose (>=0.
|
|
17
|
+
Requires-Dist: datar[pandas] (>=0.15.8,<0.16.0)
|
|
18
|
+
Requires-Dist: pipen-board[report] (>=0.17,<0.18)
|
|
19
|
+
Requires-Dist: pipen-cli-run (>=0.15,<0.16)
|
|
20
|
+
Requires-Dist: pipen-filters (>=0.15,<0.16)
|
|
21
|
+
Requires-Dist: pipen-poplog (>=0.3,<0.4)
|
|
22
|
+
Requires-Dist: pipen-runinfo (>=0.9,<0.10) ; extra == "runinfo"
|
|
23
|
+
Requires-Dist: pipen-verbose (>=0.14,<0.15)
|