biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +290 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(rlang)
|
|
4
2
|
library(parallel)
|
|
5
3
|
library(mediation)
|
|
4
|
+
library(biopipen.utils)
|
|
6
5
|
|
|
7
6
|
infile <- {{in.infile | r}}
|
|
8
7
|
fmlfile <- {{in.fmlfile | r}}
|
|
@@ -16,15 +15,16 @@ cases <- {{envs.cases | r}}
|
|
|
16
15
|
transpose_input <- {{envs.transpose_input | r}}
|
|
17
16
|
|
|
18
17
|
set.seed(123)
|
|
18
|
+
log <- get_logger()
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
log$info("Reading input file ...")
|
|
21
21
|
indata <- read.table(infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
|
|
22
22
|
if (transpose_input) { indata <- t(indata) }
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
log$info("Reading formula file/cases ...")
|
|
25
25
|
if (!is.null(fmlfile)) {
|
|
26
26
|
if (!is.null(cases) && length(cases) > 0) {
|
|
27
|
-
|
|
27
|
+
log$warn("envs.cases ignored as in.fmlfile is provided")
|
|
28
28
|
}
|
|
29
29
|
fmldata <- read.table(fmlfile, header = TRUE, sep = "\t", row.names = NULL)
|
|
30
30
|
# Case M Y X Cov Model_M Model_Y
|
|
@@ -39,14 +39,14 @@ medanalysis <- function(i, total) {
|
|
|
39
39
|
casename <- names(cases)[i]
|
|
40
40
|
case <- cases[[casename]]
|
|
41
41
|
if (total < 50) {
|
|
42
|
-
|
|
42
|
+
log$info("- Case: ", casename)
|
|
43
43
|
} else if (total < 500) {
|
|
44
44
|
if (i %% 10 == 0) {
|
|
45
|
-
|
|
45
|
+
log$info("- Processing case {i}/{total} ...")
|
|
46
46
|
}
|
|
47
47
|
} else {
|
|
48
48
|
if (i %% 100 == 0) {
|
|
49
|
-
|
|
49
|
+
log$info("- Processing case {i}/{total} ...")
|
|
50
50
|
}
|
|
51
51
|
}
|
|
52
52
|
M <- case$M
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(metap)
|
|
4
2
|
library(rlang)
|
|
5
3
|
library(dplyr)
|
|
4
|
+
library(biopipen.utils)
|
|
6
5
|
|
|
7
|
-
infiles <- {{in.infiles | r}}
|
|
6
|
+
infiles <- {{in.infiles | each: str | r}}
|
|
8
7
|
outfile <- {{out.outfile | r}}
|
|
9
8
|
id_cols <- {{envs.id_cols | r}}
|
|
10
9
|
id_exprs <- {{envs.id_exprs | r}}
|
|
@@ -16,11 +15,13 @@ padj <- {{envs.padj | r}}
|
|
|
16
15
|
|
|
17
16
|
if (method == "fisher") { method = "sumlog" }
|
|
18
17
|
|
|
18
|
+
log <- get_logger()
|
|
19
|
+
|
|
19
20
|
if (length(infiles) == 1 && padj == "none") {
|
|
20
|
-
|
|
21
|
+
log$info("Only one input file, copying to output ...")
|
|
21
22
|
file.copy(infiles, outfile)
|
|
22
23
|
} else if (length(infiles) == 1) {
|
|
23
|
-
|
|
24
|
+
log$info("Only one input file, performing p-value adjustment ...")
|
|
24
25
|
if (is.null(pval_cols)) {
|
|
25
26
|
stop("Must provide envs.pval_cols")
|
|
26
27
|
}
|
|
@@ -30,7 +31,7 @@ if (length(infiles) == 1 && padj == "none") {
|
|
|
30
31
|
}
|
|
31
32
|
indata$Padj <- p.adjust(indata[, pval_cols], method = padj)
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
log$info("Writing output ...")
|
|
34
35
|
write.table(indata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
|
|
35
36
|
} else {
|
|
36
37
|
# Check pval_cols
|
|
@@ -68,7 +69,7 @@ if (length(infiles) == 1 && padj == "none") {
|
|
|
68
69
|
}
|
|
69
70
|
}
|
|
70
71
|
|
|
71
|
-
|
|
72
|
+
log$info("Reading and preparing data ...")
|
|
72
73
|
outdata <- NULL
|
|
73
74
|
for (i in seq_along(infiles)) {
|
|
74
75
|
infile <- infiles[i]
|
|
@@ -89,7 +90,7 @@ if (length(infiles) == 1 && padj == "none") {
|
|
|
89
90
|
}
|
|
90
91
|
}
|
|
91
92
|
|
|
92
|
-
|
|
93
|
+
log$info("Running metap on each row ...")
|
|
93
94
|
metaps <- c()
|
|
94
95
|
ns <- c()
|
|
95
96
|
pval_columns <- setdiff(colnames(outdata), id_cols)
|
|
@@ -119,14 +120,11 @@ if (length(infiles) == 1 && padj == "none") {
|
|
|
119
120
|
outdata <- outdata %>% arrange(MetaPval)
|
|
120
121
|
|
|
121
122
|
if (padj != "none") {
|
|
122
|
-
|
|
123
|
+
log$info("Calculating adjusted p-values ...")
|
|
123
124
|
outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
|
|
124
125
|
|
|
125
126
|
}
|
|
126
127
|
|
|
127
|
-
|
|
128
|
+
log$info("Writing output ...")
|
|
128
129
|
write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
|
|
129
130
|
}
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(metap)
|
|
4
2
|
library(rlang)
|
|
5
3
|
library(dplyr)
|
|
4
|
+
library(biopipen.utils)
|
|
6
5
|
|
|
7
6
|
infile <- {{in.infile | r}}
|
|
8
7
|
outfile <- {{out.outfile | r}}
|
|
@@ -13,6 +12,8 @@ na <- {{envs.na | r}}
|
|
|
13
12
|
keep_single <- {{envs.keep_single | r}}
|
|
14
13
|
padj <- {{envs.padj | r}}
|
|
15
14
|
|
|
15
|
+
log <- get_logger()
|
|
16
|
+
|
|
16
17
|
if (method == "fisher") { method = "sumlog" }
|
|
17
18
|
|
|
18
19
|
# Check pval_cols
|
|
@@ -24,7 +25,7 @@ if (length(id_cols) == 1) {
|
|
|
24
25
|
id_cols <- trimws(strsplit(id_cols, ",")[[1]])
|
|
25
26
|
}
|
|
26
27
|
|
|
27
|
-
|
|
28
|
+
log$info("Reading input and performing meta-analysis ...")
|
|
28
29
|
outdata <- read.table(
|
|
29
30
|
infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
|
|
30
31
|
) %>%
|
|
@@ -64,10 +65,10 @@ outdata$.pvals <- NULL
|
|
|
64
65
|
outdata <- outdata %>% arrange(MetaPval)
|
|
65
66
|
|
|
66
67
|
if (padj != "none") {
|
|
67
|
-
|
|
68
|
+
log$info("Calculating adjusted p-values ...")
|
|
68
69
|
outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
|
|
69
70
|
|
|
70
71
|
}
|
|
71
72
|
|
|
72
|
-
|
|
73
|
+
log$info("Writing output ...")
|
|
73
74
|
write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
|
|
@@ -1,35 +1,45 @@
|
|
|
1
|
-
|
|
1
|
+
library(rlang)
|
|
2
2
|
library(dplyr)
|
|
3
3
|
library(tidyr)
|
|
4
4
|
library(tibble)
|
|
5
|
-
library(ggplot2)
|
|
6
|
-
library(ggridges)
|
|
7
5
|
library(glue)
|
|
8
6
|
library(hash)
|
|
9
7
|
library(glmnet)
|
|
10
8
|
library(broom.mixed)
|
|
11
9
|
library(stringr)
|
|
10
|
+
library(plotthis)
|
|
11
|
+
library(biopipen.utils)
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
13
|
+
scrfile <- {{in.scrfile | r}}
|
|
14
|
+
outdir <- {{out.outdir | r}}
|
|
15
|
+
joboutdir <- {{job.outdir | r}}
|
|
16
|
+
group_name <- {{envs.group | r}}
|
|
17
|
+
comparison <- {{envs.comparison | r}}
|
|
18
|
+
target <- {{envs.target | r}}
|
|
19
|
+
each_cols <- {{envs.each | r}}
|
|
20
|
+
|
|
21
|
+
log <- get_logger()
|
|
22
|
+
reporter <- get_reporter()
|
|
22
23
|
|
|
23
24
|
if (is.null(group_name) || is.null(comparison)) {
|
|
24
25
|
stop("envs.group and envs.comparison must be specified")
|
|
25
26
|
}
|
|
26
27
|
|
|
27
|
-
if (
|
|
28
|
-
stop("envs.
|
|
28
|
+
if (length(comparison) != 2) {
|
|
29
|
+
stop("envs.comparison must have exactly two elements or keys, representing the two groups to compare")
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (!is.list(comparison)) {
|
|
33
|
+
comparison <- stats::setNames(as.list(comparison), comparison)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
target <- target %||% names(comparison)[1]
|
|
37
|
+
if (!(target %in% names(comparison))) {
|
|
38
|
+
stop(paste0("Target group '", target, "' not found in the comparison groups."))
|
|
29
39
|
}
|
|
30
40
|
|
|
31
|
-
if (is.character(
|
|
32
|
-
|
|
41
|
+
if (is.character(each_cols) && length(each_cols) == 1) {
|
|
42
|
+
each_cols = trimws(strsplit(each_cols, ",")[[1]])
|
|
33
43
|
}
|
|
34
44
|
|
|
35
45
|
### Helpers
|
|
@@ -142,103 +152,43 @@ for (i in 1:3){
|
|
|
142
152
|
AA_MAPS[[i]] <- create_hashmap(as.character(RF$AA), as.vector(RF[,(i+1),drop=TRUE]))
|
|
143
153
|
}
|
|
144
154
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
if (is.null(srtobjfile)) {
|
|
148
|
-
metadata = NULL
|
|
149
|
-
} else {
|
|
150
|
-
# Get the extension (lowercase) of srtobjfile, see if it is .rds file
|
|
151
|
-
srtobjfile_ext = tolower(tools::file_ext(srtobjfile))
|
|
152
|
-
if (srtobjfile_ext != "rds") {
|
|
153
|
-
metadata = read.table(
|
|
154
|
-
srtobjfile,
|
|
155
|
-
sep = "\t",
|
|
156
|
-
header = TRUE,
|
|
157
|
-
row.names = 1,
|
|
158
|
-
stringsAsFactors = FALSE,
|
|
159
|
-
check.names = FALSE,
|
|
160
|
-
)
|
|
161
|
-
} else {
|
|
162
|
-
metadata = readRDS(srtobjfile)@meta.data
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
log_info("Loading immdata from immdatafile")
|
|
167
|
-
immdata = readRDS(immdatafile)
|
|
155
|
+
log$info("Loading data from input file")
|
|
156
|
+
mdata <- read_obj(scrfile)@meta.data
|
|
168
157
|
|
|
158
|
+
if (!group_name %in% colnames(mdata)) {
|
|
159
|
+
stop(paste0("Group name '", group_name, "' not found in the data."))
|
|
160
|
+
}
|
|
169
161
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
filter(chain == "TRB")
|
|
176
|
-
}
|
|
177
|
-
out = out %>%
|
|
178
|
-
mutate(
|
|
179
|
-
Sample = sam,
|
|
180
|
-
locus = "TCRB",
|
|
181
|
-
sequence = CDR3.aa,
|
|
182
|
-
length = nchar(sequence),
|
|
183
|
-
vgene = V.name,
|
|
184
|
-
jgene = J.name,
|
|
185
|
-
) %>%
|
|
186
|
-
select(Sample, Barcode, locus, sequence, length, vgene, jgene) %>%
|
|
187
|
-
separate_longer_delim(Barcode, delim = ";") %>%
|
|
188
|
-
left_join(immdata$meta, by = "Sample")
|
|
189
|
-
|
|
190
|
-
if (is.null(metadata)) {
|
|
191
|
-
# No metadata, just return
|
|
192
|
-
return (out)
|
|
193
|
-
}
|
|
162
|
+
# check if valuess of comparison is in the group_name column
|
|
163
|
+
if (!all(unlist(comparison) %in% as.character(mdata[[group_name]]))) {
|
|
164
|
+
stop(paste0("Some values in comparison are not found in the group_name column: ",
|
|
165
|
+
paste(setdiff(unlist(comparison), mdata[[group_name]]), collapse = ", ")))
|
|
166
|
+
}
|
|
194
167
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
sdata = sdata %>% mutate(.prefix_len = nchar(glue(prefix)))
|
|
201
|
-
# Remove the prefix in the rownames of sdata
|
|
202
|
-
rownames(sdata) = substring(rownames(sdata), sdata$.prefix_len + 1)
|
|
203
|
-
sdata = sdata %>% select(-.prefix_len)
|
|
204
|
-
}
|
|
205
|
-
sdata = rownames_to_column(sdata, "Barcode")
|
|
206
|
-
out = out %>% left_join(sdata, by = "Barcode", suffix = c("", "_seurat"))
|
|
207
|
-
out$.Group = NA_character_
|
|
208
|
-
for (k in names(comparison)) {
|
|
209
|
-
group_mask = out[[group_name]] %in% comparison[[k]]
|
|
210
|
-
if (sum(group_mask) == 0) {
|
|
211
|
-
stop(
|
|
212
|
-
glue("No cells in comparison group {k}. Please check if the group items {comparison[[k]]} exist.")
|
|
213
|
-
)
|
|
168
|
+
# add a new column with the keys of comparison, when their values are in the group_name column
|
|
169
|
+
mdata$.Group <- sapply(as.character(mdata[[group_name]]), function(x) {
|
|
170
|
+
for (key in names(comparison)) {
|
|
171
|
+
if (x %in% comparison[[key]]) {
|
|
172
|
+
return(key)
|
|
214
173
|
}
|
|
215
|
-
out$.Group[out[[group_name]] %in% comparison[[k]]] = k
|
|
216
|
-
}
|
|
217
|
-
if (!is.null(subset_cols)) {
|
|
218
|
-
out = out %>% unite(".Subset", all_of(subset_cols), sep = "_", remove = FALSE)
|
|
219
174
|
}
|
|
220
|
-
return
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
for (sam in immdata$meta$Sample) {
|
|
228
|
-
log_info("- For sample {sam}")
|
|
229
|
-
md = merge_data(sam)
|
|
230
|
-
merged = if (is.null(merged)) md else rbind(merged, md)
|
|
231
|
-
}
|
|
175
|
+
return(NA)
|
|
176
|
+
})
|
|
177
|
+
mdata <- mdata %>%
|
|
178
|
+
separate(CTaa, into = c(NA, "sequence"), sep = "_", remove = FALSE) %>%
|
|
179
|
+
separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = FALSE) %>%
|
|
180
|
+
separate(vjgene, into = c("vgene", NA, "jgene", NA), sep = "\\.", remove = FALSE) %>%
|
|
181
|
+
mutate(length = nchar(sequence))
|
|
232
182
|
|
|
233
183
|
# Statistics about the cell numbers with groups avaiable in metadata
|
|
234
184
|
# !!group_name, TotalCells, AvailCells, AvailCellsPct
|
|
235
|
-
|
|
236
|
-
if (is.null(
|
|
237
|
-
stats =
|
|
185
|
+
log$info("Calculating statistics")
|
|
186
|
+
if (is.null(each_cols)) {
|
|
187
|
+
stats = mdata %>%
|
|
238
188
|
# group by group_name
|
|
239
189
|
group_by(.Group) %>%
|
|
240
190
|
summarise(
|
|
241
|
-
TotalCells = nrow(
|
|
191
|
+
TotalCells = nrow(mdata),
|
|
242
192
|
CellsPerGroup = n(),
|
|
243
193
|
AvailCellsPerGroup = sum(length >= CDR3_MINLEN & length <= CDR3_MAXLEN),
|
|
244
194
|
# Percentage with % in character
|
|
@@ -246,14 +196,15 @@ if (is.null(subset_cols)) {
|
|
|
246
196
|
.groups = "drop"
|
|
247
197
|
)
|
|
248
198
|
} else {
|
|
249
|
-
stats =
|
|
199
|
+
stats = mdata %>%
|
|
200
|
+
unite(".Subset", all_of(each_cols), sep = "_", remove = FALSE) %>%
|
|
250
201
|
group_by(.Subset) %>%
|
|
251
202
|
group_map(function(df, .y) {
|
|
252
203
|
df %>%
|
|
253
204
|
group_by(.Group) %>%
|
|
254
205
|
summarise(
|
|
255
206
|
.Subset = .y$.Subset[1],
|
|
256
|
-
AllCells = nrow(
|
|
207
|
+
AllCells = nrow(mdata),
|
|
257
208
|
TotalCells = nrow(df),
|
|
258
209
|
CellsPerGroup = n(),
|
|
259
210
|
AvailCellsPerGroup = sum(length >= CDR3_MINLEN & length <= CDR3_MAXLEN),
|
|
@@ -274,7 +225,7 @@ write.table(
|
|
|
274
225
|
row.names = FALSE,
|
|
275
226
|
)
|
|
276
227
|
|
|
277
|
-
|
|
228
|
+
reporter$add(
|
|
278
229
|
list(
|
|
279
230
|
kind = "descr",
|
|
280
231
|
content = "Statistics about the cells mapped to the comparison groups. Columns:"
|
|
@@ -304,20 +255,22 @@ add_report(
|
|
|
304
255
|
|
|
305
256
|
|
|
306
257
|
|
|
307
|
-
|
|
308
|
-
|
|
258
|
+
log$info("Add amino acid features")
|
|
259
|
+
mdata = mdata %>%
|
|
309
260
|
filter(!is.na(.Group) & length >= CDR3_MINLEN & length <= CDR3_MAXLEN) %>%
|
|
310
261
|
add_percentAA() %>%
|
|
311
262
|
add_positionalAA()
|
|
312
263
|
|
|
313
264
|
|
|
314
265
|
do_one_subset = function(s) {
|
|
315
|
-
|
|
266
|
+
if (!is.null(s)) {
|
|
267
|
+
log$info(paste("Processing subset", s))
|
|
268
|
+
}
|
|
316
269
|
if (is.null(s)) {
|
|
317
|
-
data =
|
|
270
|
+
data = mdata
|
|
318
271
|
odir = file.path(outdir, "ALL")
|
|
319
272
|
} else {
|
|
320
|
-
data =
|
|
273
|
+
data = mdata %>% filter(.Subset == s)
|
|
321
274
|
odir = file.path(outdir, slugify(s))
|
|
322
275
|
}
|
|
323
276
|
dir.create(odir, recursive = TRUE, showWarnings = FALSE)
|
|
@@ -342,6 +295,13 @@ do_one_subset = function(s) {
|
|
|
342
295
|
}
|
|
343
296
|
}
|
|
344
297
|
y = ifelse(data_fit$.Group == target, 1, 0)
|
|
298
|
+
if (any(table(y) <= 3) || length(table(y)) < 2) {
|
|
299
|
+
if (is.null(s)) {
|
|
300
|
+
log$warn(paste0("Not enough observations for target group '", target, "' with CDR3 length ", len, ". At least 4 observations are required."))
|
|
301
|
+
} else {
|
|
302
|
+
log$warn(paste0("Not enough observations for target group '", target, "' in subset '", s, "' with CDR3 length ", len, ". At least 4 observations are required."))
|
|
303
|
+
}
|
|
304
|
+
}
|
|
345
305
|
# one multinomial or binomial class has 1 or 0 observations; not allowed
|
|
346
306
|
if (any(table(y) <= 1)) { next }
|
|
347
307
|
fit = glmnet(x, y, data=data_fit, alpha=0, lambda=0.01, family="binomial")
|
|
@@ -370,26 +330,22 @@ do_one_subset = function(s) {
|
|
|
370
330
|
write.table(alldf, file = file.path(odir, "estimates.txt"), sep = "\t", quote = FALSE, row.names = FALSE)
|
|
371
331
|
|
|
372
332
|
# save the plots
|
|
373
|
-
gr
|
|
374
|
-
group_by(imgt_pos, feature)
|
|
333
|
+
gr <- alldf %>%
|
|
334
|
+
group_by(imgt_pos, feature) %>%
|
|
375
335
|
summarise(coef = mean(estimate))
|
|
376
336
|
# Avoid too large values
|
|
377
|
-
gr$coef[gr$coef > 1.5]
|
|
337
|
+
gr$coef[gr$coef > 1.5] <- 1.5
|
|
338
|
+
gr$coef <- exp(gr$coef) # Exponentiate the coefficients
|
|
378
339
|
|
|
379
|
-
g
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
g = g + xlab("TCR position") + ylab(paste("Coefficient for", target, "prediction")) + ggtitle(s)
|
|
340
|
+
g <- LinePlot(gr, x = "imgt_pos", y = "coef", group_by = "feature",
|
|
341
|
+
add_line = 1, x_text_angle = 90, xlab = "TCR position",
|
|
342
|
+
ylab = paste("Coefficient for", target, "prediction"), title = s)
|
|
383
343
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
344
|
+
save_plot(g, file.path(odir, "estimated_coefficients"),
|
|
345
|
+
devpars = list(width = 1000, height = 1000, res = 100),
|
|
346
|
+
formats = c("png", "pdf"))
|
|
387
347
|
|
|
388
|
-
|
|
389
|
-
print(g)
|
|
390
|
-
dev.off()
|
|
391
|
-
|
|
392
|
-
add_report(
|
|
348
|
+
reporter$add(
|
|
393
349
|
list(
|
|
394
350
|
kind = "descr",
|
|
395
351
|
content = "Estimated coefficients for each feature and position in the CDR3"
|
|
@@ -397,7 +353,7 @@ do_one_subset = function(s) {
|
|
|
397
353
|
h1 = ifelse(
|
|
398
354
|
is.null(s),
|
|
399
355
|
"Estimated OR (per s.d.)",
|
|
400
|
-
paste0(paste(
|
|
356
|
+
paste0(paste(each_cols, collapse = ", "), " - ", s)
|
|
401
357
|
),
|
|
402
358
|
h2 = ifelse(
|
|
403
359
|
is.null(s),
|
|
@@ -406,7 +362,7 @@ do_one_subset = function(s) {
|
|
|
406
362
|
)
|
|
407
363
|
)
|
|
408
364
|
|
|
409
|
-
|
|
365
|
+
reporter$add(
|
|
410
366
|
list(
|
|
411
367
|
name = "Plot",
|
|
412
368
|
contents = list(
|
|
@@ -429,7 +385,7 @@ do_one_subset = function(s) {
|
|
|
429
385
|
h1 = ifelse(
|
|
430
386
|
is.null(s),
|
|
431
387
|
"Estimated OR (per s.d.)",
|
|
432
|
-
paste0(paste(
|
|
388
|
+
paste0(paste(each_cols, collapse = ", "), " - ", s)
|
|
433
389
|
),
|
|
434
390
|
h2 = ifelse(
|
|
435
391
|
is.null(s),
|
|
@@ -443,38 +399,23 @@ do_one_subset = function(s) {
|
|
|
443
399
|
data$mid_hydro = sapply(data$midseq, function(x) get_feat_score(x, AA_MAPS[[2]]))
|
|
444
400
|
data$smid_hydro = scale(data$mid_hydro)[,1]
|
|
445
401
|
|
|
446
|
-
g
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
alpha=0.4,
|
|
464
|
-
show.legend = FALSE
|
|
465
|
-
) + scale_color_manual(values=cols)
|
|
466
|
-
g = g + scale_fill_manual(values=cols) + theme_bw(base_size=12)
|
|
467
|
-
g = g + xlim(c(-4,4)) + xlab("CDR3bmr hydrophobicity") + ylab("") + coord_flip() + ggtitle(s)
|
|
468
|
-
|
|
469
|
-
png(file.path(odir, "distribution.png"), width=1000, height=1000, res=100)
|
|
470
|
-
print(g)
|
|
471
|
-
dev.off()
|
|
472
|
-
|
|
473
|
-
pdf(file.path(odir, "distribution.pdf"), width=10, height=10)
|
|
474
|
-
print(g)
|
|
475
|
-
dev.off()
|
|
476
|
-
|
|
477
|
-
add_report(
|
|
402
|
+
g <- RidgePlot(
|
|
403
|
+
data = data,
|
|
404
|
+
x = "smid_hydro",
|
|
405
|
+
group_by = ".Group",
|
|
406
|
+
xlab = "CDR3bmr hydrophobicity",
|
|
407
|
+
ylab = "",
|
|
408
|
+
add_vline = TRUE,
|
|
409
|
+
alpha = 0.5,
|
|
410
|
+
title = s,
|
|
411
|
+
flip = TRUE
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
save_plot(g, file.path(odir, "distribution"),
|
|
415
|
+
devpars = list(width = 1000, height = 1000, res = 100),
|
|
416
|
+
formats = c("png", "pdf"))
|
|
417
|
+
|
|
418
|
+
reporter$add(
|
|
478
419
|
list(
|
|
479
420
|
kind = "table_image",
|
|
480
421
|
descr = paste0(
|
|
@@ -488,7 +429,7 @@ do_one_subset = function(s) {
|
|
|
488
429
|
h1 = ifelse(
|
|
489
430
|
is.null(s),
|
|
490
431
|
"Hydrophobicity Distribution",
|
|
491
|
-
paste0(paste(
|
|
432
|
+
paste0(paste(each_cols, collapse = ", "), " - ", s)
|
|
492
433
|
),
|
|
493
434
|
h2 = ifelse(
|
|
494
435
|
is.null(s),
|
|
@@ -499,11 +440,11 @@ do_one_subset = function(s) {
|
|
|
499
440
|
|
|
500
441
|
}
|
|
501
442
|
|
|
502
|
-
if (is.null(
|
|
443
|
+
if (is.null(each_cols)) {
|
|
503
444
|
do_one_subset(NULL)
|
|
504
445
|
} else {
|
|
505
|
-
subsets = na.omit(unique(
|
|
446
|
+
subsets = na.omit(unique(obj$.Subset))
|
|
506
447
|
sapply(subsets, do_one_subset)
|
|
507
448
|
}
|
|
508
449
|
|
|
509
|
-
|
|
450
|
+
reporter$save(joboutdir)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
library(rlang)
|
|
2
2
|
library(glue)
|
|
3
|
+
library(dplyr)
|
|
3
4
|
library(scplotter)
|
|
4
5
|
library(biopipen.utils)
|
|
5
6
|
|
|
6
|
-
screpfile <- {{in.screpfile |
|
|
7
|
-
outdir <- {{out.outdir |
|
|
8
|
-
joboutdir <- {{job.outdir |
|
|
7
|
+
screpfile <- {{in.screpfile | r}}
|
|
8
|
+
outdir <- {{out.outdir | r}}
|
|
9
|
+
joboutdir <- {{job.outdir | r}}
|
|
9
10
|
envs <- {{envs | r}}
|
|
10
11
|
mutaters <- envs$mutaters
|
|
11
12
|
cases <- envs$cases
|
|
@@ -397,7 +398,7 @@ get_plot_descr <- function(viz_type, case) {
|
|
|
397
398
|
}
|
|
398
399
|
|
|
399
400
|
log$info("Loading scRepertoire object ...")
|
|
400
|
-
screp <-
|
|
401
|
+
screp <- read_obj(screpfile)
|
|
401
402
|
|
|
402
403
|
log$info("Applying mutaters if any ...")
|
|
403
404
|
screp <- ScRepMutate(screp, mutaters)
|
|
@@ -14,10 +14,10 @@ library(ComplexUpset)
|
|
|
14
14
|
theme_set(theme_prism())
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
immfile <- {{ in.immdata |
|
|
17
|
+
immfile <- {{ in.immdata | r }}
|
|
18
18
|
metafile <- {{ in.metafile | r }}
|
|
19
|
-
outdir <- {{ out.outdir |
|
|
20
|
-
joboutdir <- {{ job.outdir |
|
|
19
|
+
outdir <- {{ out.outdir | r }}
|
|
20
|
+
joboutdir <- {{ job.outdir | r }}
|
|
21
21
|
|
|
22
22
|
subject_key <- {{ envs.subject | r }}
|
|
23
23
|
group_key <- {{ envs.group | r }}
|
|
@@ -6,8 +6,8 @@ library(tidyr)
|
|
|
6
6
|
library(ggprism)
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
immfile <- {{ in.immdata |
|
|
10
|
-
outdir <- {{ out.outdir |
|
|
9
|
+
immfile <- {{ in.immdata | r }}
|
|
10
|
+
outdir <- {{ out.outdir | r }}
|
|
11
11
|
|
|
12
12
|
subject_key <- {{ envs.subject | r }}
|
|
13
13
|
group_key <- {{ envs.group | r }}
|
|
@@ -6,15 +6,15 @@ library(tidyr)
|
|
|
6
6
|
library(tibble)
|
|
7
7
|
library(immunarch)
|
|
8
8
|
|
|
9
|
-
immfile = {{in.immdata |
|
|
9
|
+
immfile = {{in.immdata | r}}
|
|
10
10
|
{% if in.filterfile %}
|
|
11
11
|
filters = {{in.filterfile | toml_load | r}}
|
|
12
12
|
{% else %}
|
|
13
13
|
filters = {{envs.filters | r}}
|
|
14
14
|
{% endif %}
|
|
15
15
|
metacols = {{envs.metacols | r}}
|
|
16
|
-
outfile = {{out.outfile |
|
|
17
|
-
groupfile = {{out.groupfile |
|
|
16
|
+
outfile = {{out.outfile | r}}
|
|
17
|
+
groupfile = {{out.groupfile | r}}
|
|
18
18
|
|
|
19
19
|
immdata0 = readRDS(immfile)
|
|
20
20
|
groupname = filters$name
|
|
@@ -9,11 +9,11 @@ library(tibble)
|
|
|
9
9
|
library(glue)
|
|
10
10
|
library(bracer)
|
|
11
11
|
|
|
12
|
-
metafile = {{ in.metafile |
|
|
13
|
-
rdsfile = {{ out.rdsfile |
|
|
14
|
-
metatxt = {{ out.metatxt |
|
|
15
|
-
tmpdir = {{ envs.tmpdir |
|
|
16
|
-
mode = {{ envs.mode |
|
|
12
|
+
metafile = {{ in.metafile | r }}
|
|
13
|
+
rdsfile = {{ out.rdsfile | r }}
|
|
14
|
+
metatxt = {{ out.metatxt | r }}
|
|
15
|
+
tmpdir = {{ envs.tmpdir | r }}
|
|
16
|
+
mode = {{ envs.mode | r }}
|
|
17
17
|
extracols = {{ envs.extracols | r}}
|
|
18
18
|
prefix = {{ envs.prefix | r }}
|
|
19
19
|
|