biopipen 0.34.1__py3-none-any.whl → 0.34.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/scrna.py +259 -34
- biopipen/ns/scrna_metabolic_landscape.py +1 -1
- biopipen/ns/tcr.py +9 -4
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +12 -3
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +12 -3
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +12 -3
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +3 -10
- biopipen/scripts/scrna/MarkersFinder.R +34 -28
- biopipen/scripts/scrna/PseudoBulkDEG.R +592 -0
- biopipen/scripts/scrna/ScFGSEA.R +35 -35
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +16 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +29 -6
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +29 -1
- biopipen/scripts/scrna/SeuratClusterStats.R +1 -0
- biopipen/scripts/scrna/TopExpressingGenes.R +6 -6
- biopipen/scripts/scrna/celltypist-wrapper.py +2 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +9 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +1 -0
- biopipen/scripts/tcr/GIANA/GIANA4.py +2 -4
- biopipen/scripts/tcr/ScRepCombiningExpression.R +3 -2
- biopipen/scripts/tcr/ScRepLoading.R +7 -2
- biopipen/scripts/tcr/TCRClustering.R +9 -23
- biopipen/scripts/tcr/TESSA.R +4 -2
- {biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/METADATA +1 -1
- {biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/RECORD +30 -31
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -17
- biopipen/scripts/scrna/SCP-plot.R +0 -15202
- {biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/WHEEL +0 -0
- {biopipen-0.34.1.dist-info → biopipen-0.34.3.dist-info}/entry_points.txt +0 -0
biopipen/scripts/scrna/ScFGSEA.R
CHANGED
|
@@ -7,9 +7,9 @@ srtfile <- {{in.srtobj | r}} # nolint
|
|
|
7
7
|
outdir <- {{out.outdir | r}} # nolint
|
|
8
8
|
joboutdir <- {{job.outdir | r}} # nolint
|
|
9
9
|
mutaters <- {{envs.mutaters | r}} # nolint
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}} # nolint
|
|
11
|
+
ident_1 <- {{envs.ident_1 | default: envs["ident-1"] | default: None | r}} # nolint
|
|
12
|
+
ident_2 <- {{envs.ident_2 | default: envs["ident-2"] | default: None | r}} # nolint
|
|
13
13
|
each <- {{envs.each | r}} # nolint
|
|
14
14
|
subset <- {{envs.subset | r}} # nolint
|
|
15
15
|
gmtfile <- {{envs.gmtfile | r}} # nolint
|
|
@@ -18,8 +18,8 @@ top <- {{envs.top | r}} # nolint
|
|
|
18
18
|
minsize <- {{envs.minSize | default: envs.minsize | r}} # nolint
|
|
19
19
|
maxsize <- {{envs.maxSize | default: envs.maxsize | r}} # nolint
|
|
20
20
|
eps <- {{envs.eps | r}} # nolint
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
alleach_plots_defaults <- {{envs.alleach_plots_defaults | r}} # nolint
|
|
22
|
+
alleach_plots <- {{envs.alleach_plots | r}} #
|
|
23
23
|
ncores <- {{envs.ncores | r}} # nolint
|
|
24
24
|
rest <- {{envs.rest | r: todot="-"}} # nolint
|
|
25
25
|
cases <- {{envs.cases | r: todot="-"}} # nolint
|
|
@@ -27,8 +27,8 @@ cases <- {{envs.cases | r: todot="-"}} # nolint
|
|
|
27
27
|
log <- get_logger()
|
|
28
28
|
reporter <- get_reporter()
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
list_update(
|
|
30
|
+
alleach_plots <- lapply(alleach_plots, function(x) {
|
|
31
|
+
list_update(alleach_plots_defaults, x)
|
|
32
32
|
})
|
|
33
33
|
|
|
34
34
|
log$info("Reading Seurat object ...")
|
|
@@ -43,9 +43,9 @@ if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
|
43
43
|
}
|
|
44
44
|
|
|
45
45
|
defaults <- list(
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
group_by = group_by,
|
|
47
|
+
ident_1 = ident_1,
|
|
48
|
+
ident_2 = ident_2,
|
|
49
49
|
each = each,
|
|
50
50
|
subset = subset,
|
|
51
51
|
gmtfile = gmtfile,
|
|
@@ -54,8 +54,8 @@ defaults <- list(
|
|
|
54
54
|
minsize = minsize,
|
|
55
55
|
maxsize = maxsize,
|
|
56
56
|
eps = eps,
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
alleach_plots_defaults = alleach_plots_defaults,
|
|
58
|
+
alleach_plots = alleach_plots,
|
|
59
59
|
ncores = ncores,
|
|
60
60
|
rest = rest
|
|
61
61
|
)
|
|
@@ -63,11 +63,11 @@ defaults <- list(
|
|
|
63
63
|
expand_each <- function(name, case) {
|
|
64
64
|
outcases <- list()
|
|
65
65
|
|
|
66
|
-
case$
|
|
66
|
+
case$group_by <- case$group_by %||% "Identity"
|
|
67
67
|
|
|
68
68
|
if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
|
|
69
|
-
if (length(case$
|
|
70
|
-
stop("Cannot perform `
|
|
69
|
+
if (length(case$alleach_plots) > 0) {
|
|
70
|
+
stop("Cannot perform `alleach_plots` without `each` defined.")
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
outcases[[name]] <- case
|
|
@@ -93,8 +93,8 @@ expand_each <- function(name, case) {
|
|
|
93
93
|
newcase$each_name <- case$each
|
|
94
94
|
newcase$each <- each
|
|
95
95
|
|
|
96
|
-
newcase$
|
|
97
|
-
newcase$
|
|
96
|
+
newcase$alleach_plots_defaults <- NULL
|
|
97
|
+
newcase$alleach_plots <- NULL
|
|
98
98
|
|
|
99
99
|
if (!is.null(case$subset)) {
|
|
100
100
|
newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
|
|
@@ -105,13 +105,13 @@ expand_each <- function(name, case) {
|
|
|
105
105
|
outcases[[newname]] <- newcase
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
-
if (length(case$
|
|
108
|
+
if (length(case$alleach_plots) > 0) {
|
|
109
109
|
newcase <- case
|
|
110
110
|
|
|
111
111
|
newcase$gseas <- list()
|
|
112
|
-
newcase$
|
|
113
|
-
newcase$
|
|
114
|
-
function(x) { list_update(newcase$
|
|
112
|
+
newcase$alleach_plots <- lapply(
|
|
113
|
+
newcase$alleach_plots,
|
|
114
|
+
function(x) { list_update(newcase$alleach_plots_defaults, x) }
|
|
115
115
|
)
|
|
116
116
|
|
|
117
117
|
outcases[[paste0(name, " (all ", case$each,")")]] <- newcase
|
|
@@ -154,8 +154,8 @@ do_case <- function(name) {
|
|
|
154
154
|
}))
|
|
155
155
|
gseas[[case$each]] <- factor(gseas[[case$each]], levels = each_levels)
|
|
156
156
|
|
|
157
|
-
for (plotname in names(case$
|
|
158
|
-
plotargs <- case$
|
|
157
|
+
for (plotname in names(case$alleach_plots)) {
|
|
158
|
+
plotargs <- case$alleach_plots[[plotname]]
|
|
159
159
|
plotargs <- extract_vars(plotargs, "devpars")
|
|
160
160
|
plotargs$gsea_results <- gseas
|
|
161
161
|
plotargs$group_by <- case$each
|
|
@@ -182,12 +182,12 @@ do_case <- function(name) {
|
|
|
182
182
|
allow_empty = !is.null(case$each)
|
|
183
183
|
# prepare expression matrix
|
|
184
184
|
log$info(" Preparing expression matrix...")
|
|
185
|
-
sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$
|
|
185
|
+
sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group_by))) }, allow_empty)
|
|
186
186
|
if (is.null(sobj)) {
|
|
187
187
|
reporter$add2(
|
|
188
188
|
list(
|
|
189
189
|
kind = "error",
|
|
190
|
-
content = paste0("No cells with non-NA `", case$
|
|
190
|
+
content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
|
|
191
191
|
),
|
|
192
192
|
hs = c(info$section, info$name)
|
|
193
193
|
)
|
|
@@ -200,20 +200,20 @@ do_case <- function(name) {
|
|
|
200
200
|
reporter$add2(
|
|
201
201
|
list(
|
|
202
202
|
kind = "error",
|
|
203
|
-
content = paste0("No cells with non-NA `", case$
|
|
203
|
+
content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
|
|
204
204
|
),
|
|
205
205
|
hs = c(info$section, info$name)
|
|
206
206
|
)
|
|
207
207
|
return(NULL)
|
|
208
208
|
}
|
|
209
209
|
}
|
|
210
|
-
if (!is.null(case$
|
|
211
|
-
sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$
|
|
210
|
+
if (!is.null(case$ident_2)) {
|
|
211
|
+
sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group_by) %in% c(case$ident_1, case$ident_2)) }, allow_empty)
|
|
212
212
|
if (is.null(sobj)) {
|
|
213
213
|
reporter$add2(
|
|
214
214
|
list(
|
|
215
215
|
kind = "error",
|
|
216
|
-
content = paste0("No cells with non-NA `", case$
|
|
216
|
+
content = paste0("No cells with non-NA `", case$group_by, "` in the Seurat object.")
|
|
217
217
|
),
|
|
218
218
|
hs = c(info$section, info$name)
|
|
219
219
|
)
|
|
@@ -221,16 +221,16 @@ do_case <- function(name) {
|
|
|
221
221
|
}
|
|
222
222
|
}
|
|
223
223
|
|
|
224
|
-
allclasses <- sobj@meta.data[, case$
|
|
225
|
-
if (is.null(case$
|
|
226
|
-
case$
|
|
227
|
-
allclasses[allclasses != case$
|
|
224
|
+
allclasses <- sobj@meta.data[, case$group_by, drop = TRUE]
|
|
225
|
+
if (is.null(case$ident_2)) {
|
|
226
|
+
case$ident_2 <- "Other"
|
|
227
|
+
allclasses[allclasses != case$ident_1] <- "Other"
|
|
228
228
|
}
|
|
229
229
|
exprs <- GetAssayData(sobj, layer = "data")
|
|
230
230
|
|
|
231
231
|
# get preranks
|
|
232
232
|
log$info(" Getting preranks...")
|
|
233
|
-
ranks <- RunGSEAPreRank(exprs, allclasses, case$
|
|
233
|
+
ranks <- RunGSEAPreRank(exprs, allclasses, case$ident_1, case$ident_2, case$method)
|
|
234
234
|
write.table(
|
|
235
235
|
as.data.frame(ranks),
|
|
236
236
|
file.path(info$prefix, "fgsea.rank.txt"),
|
|
@@ -310,7 +310,7 @@ do_case <- function(name) {
|
|
|
310
310
|
|
|
311
311
|
reporter$add2(
|
|
312
312
|
list(
|
|
313
|
-
name = paste0("Table (", case$
|
|
313
|
+
name = paste0("Table (", case$ident_1, " vs ", case$ident_2, ")"),
|
|
314
314
|
contents = list(
|
|
315
315
|
list(kind = "descr", content = paste0(
|
|
316
316
|
"Showing top 50 pathways by padj in descending order. ",
|
|
@@ -26,6 +26,22 @@ if (
|
|
|
26
26
|
if (length(clustrees) == 0) {
|
|
27
27
|
log$warn("- no case found, skipping ...")
|
|
28
28
|
} else {
|
|
29
|
+
reporter$add(
|
|
30
|
+
list(
|
|
31
|
+
kind = "descr",
|
|
32
|
+
content = 'The clustree plots displays clustering results from the Seurat object across different
|
|
33
|
+
resolutions of the clustering algorithm
|
|
34
|
+
(<a target="_blank" href="https://satijalab.org/seurat/reference/findclusters">Seurat::FindClusters</a>).
|
|
35
|
+
Each node represents a cluster, with the resolution levels labeled along the vertical (y) axis.
|
|
36
|
+
The size of each node reflects the number of cells in that cluster. Edges connect clusters between
|
|
37
|
+
adjacent resolutions and indicate how cells transition between clusters as resolution increases.
|
|
38
|
+
The thickness of the edges corresponds to the proportion of shared cells (in_prop) between clusters,
|
|
39
|
+
where darker lines signify a higher overlap (up to 100%). The color of the edges indicates the actual
|
|
40
|
+
number of cells that transitioned between clusters.'
|
|
41
|
+
),
|
|
42
|
+
h1 = "Clustree plots"
|
|
43
|
+
)
|
|
44
|
+
|
|
29
45
|
reports <- list()
|
|
30
46
|
for (name in names(clustrees)) {
|
|
31
47
|
if (is.null(clustrees[[name]]$prefix)) {
|
|
@@ -40,7 +40,7 @@ do_one_dimplot = function(name) {
|
|
|
40
40
|
reporter$add(
|
|
41
41
|
list(
|
|
42
42
|
kind = "descr",
|
|
43
|
-
content = paste0("Dimensionality reduction plot for ", case$
|
|
43
|
+
content = paste0("Dimensionality reduction plot for ", case$group_by)
|
|
44
44
|
),
|
|
45
45
|
reporter$image(prefix, "pdf", FALSE),
|
|
46
46
|
h1 = name
|
|
@@ -64,11 +64,11 @@ do_one_features <- function(name) {
|
|
|
64
64
|
log$info("- Case: {name}")
|
|
65
65
|
|
|
66
66
|
case <- list_update(features_defaults, features[[name]])
|
|
67
|
-
case$descr <- case$descr %||% ""
|
|
68
67
|
case <- extract_vars(
|
|
69
68
|
case,
|
|
70
69
|
"devpars", "more_formats", "save_code", "save_data", "order_by",
|
|
71
|
-
"subset", "features", "descr"
|
|
70
|
+
"subset", "features", "descr",
|
|
71
|
+
allow_nonexisting = TRUE)
|
|
72
72
|
|
|
73
73
|
if (!is.null(subset)) {
|
|
74
74
|
case$object <- srtobj %>% filter(!!parse_expr(subset))
|
|
@@ -77,6 +77,7 @@ do_one_features <- function(name) {
|
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
if (exists("order_by") && !is.null(order_by)) {
|
|
80
|
+
case$ident <- case$ident %||% GetIdentityColumn(case$object)
|
|
80
81
|
if (length(order_by) < 2) {
|
|
81
82
|
clusters <- case$object@meta.data %>%
|
|
82
83
|
group_by(!!sym(case$ident)) %>%
|
|
@@ -126,12 +127,34 @@ do_one_features <- function(name) {
|
|
|
126
127
|
caching$save(info$prefix)
|
|
127
128
|
}
|
|
128
129
|
# add reports
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
130
|
+
default_descr <- glue(
|
|
131
|
+
"The plot shows the distribution or pattern of the specified features ({paste(case$features %||% features, collapse = ', ')}) ",
|
|
132
|
+
"across cells",
|
|
133
|
+
"{if (!is.null(case$ident)) glue(', identified by \"{case$ident}\"') else ''}",
|
|
134
|
+
"{if (!is.null(case$group_by)) glue(', grouped by \"{case$group_by}\"') else ''}",
|
|
135
|
+
"{if (!is.null(case$split_by)) glue(', and split by \"{case$split_by}\"') else ''}. ",
|
|
136
|
+
"The plot type is '{case$plot_type}', ",
|
|
137
|
+
"{if (case$plot_type == 'dim') 'displaying the features on a dimensional reduction embedding' ",
|
|
138
|
+
" else if (case$plot_type == 'heatmap') 'arranged as a heatmap by rows_name and other grouping variables' ",
|
|
139
|
+
" else if (case$plot_type %in% c('violin', 'box', 'ridge')) 'showing the distribution of feature values by the grouping variables' ",
|
|
140
|
+
" else if (case$plot_type == 'cor') 'showing the correlation between features' ",
|
|
141
|
+
" else 'showing aggregated feature values by the grouping variables'}. ",
|
|
142
|
+
"{if (!is.null(case$facet_by)) glue('Plots are further faceted by \"{case$facet_by}\". ') else ''}",
|
|
143
|
+
"{if (case$plot_type == 'dim') glue('The reduction used is \"{if (!is.null(case$reduction)) case$reduction else DefaultDimReduc(case$object)}\"') else ''}",
|
|
144
|
+
"{if (case$plot_type == 'dim' && !is.null(case$graph)) glue(', with graph \"{case$graph}\" drawn to show cell neighbor edges') else ''}",
|
|
145
|
+
"{if (case$plot_type == 'dim' && !is.null(case$bg_cutoff) && case$bg_cutoff > 0) glue(', and a background cutoff of {case$bg_cutoff}') else ''}",
|
|
146
|
+
"{if (case$plot_type == 'dim') glue(', using dimensions {paste(case$dims %||% 1:2, collapse = \",\")}') else ''}"
|
|
147
|
+
)
|
|
148
|
+
if (!is.null(case$comparisons)) {
|
|
149
|
+
default_descr <- paste0(
|
|
150
|
+
default_descr,
|
|
151
|
+
"Statistical comparisons were performed between groups using '{case$pairwise_method %||% 'wilcox.test'}' method."
|
|
133
152
|
)
|
|
134
153
|
}
|
|
154
|
+
reporter$add2(
|
|
155
|
+
list(kind = "descr", content = descr %||% default_descr),
|
|
156
|
+
hs = c(info$section, info$name)
|
|
157
|
+
)
|
|
135
158
|
|
|
136
159
|
if (save_data) {
|
|
137
160
|
reporter$add2(
|
|
@@ -5,17 +5,26 @@ log$info("stats:")
|
|
|
5
5
|
odir <- file.path(outdir, "stats")
|
|
6
6
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
7
7
|
|
|
8
|
+
|
|
9
|
+
|
|
8
10
|
do_one_stats <- function(name) {
|
|
9
11
|
log$info("- Case: {name}")
|
|
10
12
|
|
|
11
13
|
case <- list_update(stats_defaults, stats[[name]])
|
|
12
|
-
extract_vars(case, "devpars", "more_formats", "save_code", "save_data", "subset")
|
|
14
|
+
case <- extract_vars(case, "devpars", "more_formats", "save_code", "save_data", "subset", "descr")
|
|
13
15
|
|
|
14
16
|
if (!is.null(subset)) {
|
|
15
17
|
case$object <- srtobj %>% filter(!!parse_expr(subset))
|
|
16
18
|
} else {
|
|
17
19
|
case$object <- srtobj
|
|
18
20
|
}
|
|
21
|
+
ident <- case$ident %||% GetIdentityColumn(case$object)
|
|
22
|
+
groupings <- unique(c(case$group_by, case$rows_by, case$columns_by, case$pie_group_by, ident))
|
|
23
|
+
if (length(groupings) > 0) {
|
|
24
|
+
for (g in groupings) {
|
|
25
|
+
case$object <- filter(case$object, !is.na(!!sym(g)))
|
|
26
|
+
}
|
|
27
|
+
}
|
|
19
28
|
|
|
20
29
|
info <- case_info(name, odir, is_dir = FALSE, create = TRUE)
|
|
21
30
|
p <- do_call(gglogger::register(CellStatPlot), case)
|
|
@@ -27,6 +36,20 @@ do_one_stats <- function(name) {
|
|
|
27
36
|
auto_data_setup = FALSE)
|
|
28
37
|
}
|
|
29
38
|
|
|
39
|
+
frac <- case$frac %||% "none"
|
|
40
|
+
default_descr <- glue(
|
|
41
|
+
"The {case$plot_type} plot shows the distribution of cells across categories defined by '{ident}'",
|
|
42
|
+
"{if (!is.null(case$group_by)) glue(', grouped by {case$group_by}') else ''}",
|
|
43
|
+
"{if (!is.null(case$split_by)) glue(', and split by {case$split_by}') else ''}. ",
|
|
44
|
+
"The values represent ",
|
|
45
|
+
"{if (frac == 'none') 'the number of cells' else glue('the fraction of cells calculated by \"{frac}\"')}. "
|
|
46
|
+
)
|
|
47
|
+
if (!is.null(case$comparisons)) {
|
|
48
|
+
default_descr <- paste0(
|
|
49
|
+
default_descr,
|
|
50
|
+
"Statistical comparisons were performed between groups using '{case$pairwise_method %||% 'wilcox.test'}' method."
|
|
51
|
+
)
|
|
52
|
+
}
|
|
30
53
|
if (save_data) {
|
|
31
54
|
pdata <- attr(p, "data") %||% p$data
|
|
32
55
|
if (!inherits(pdata, "data.frame") && !inherits(pdata, "matrix")) {
|
|
@@ -37,6 +60,10 @@ do_one_stats <- function(name) {
|
|
|
37
60
|
list(
|
|
38
61
|
name = "Plot",
|
|
39
62
|
contents = list(
|
|
63
|
+
list(
|
|
64
|
+
kind = "descr",
|
|
65
|
+
content = case$descr %||% default_descr
|
|
66
|
+
),
|
|
40
67
|
reporter$image(
|
|
41
68
|
info$prefix, more_formats, save_code, kind = "image")
|
|
42
69
|
)
|
|
@@ -60,6 +87,7 @@ do_one_stats <- function(name) {
|
|
|
60
87
|
)
|
|
61
88
|
} else {
|
|
62
89
|
reporter$add2(
|
|
90
|
+
list(kind = "descr", content = case$descr %||% default_descr),
|
|
63
91
|
reporter$image(info$prefix, more_formats, save_code, kind = "image"),
|
|
64
92
|
hs = c(info$section, info$name)
|
|
65
93
|
)
|
|
@@ -9,7 +9,7 @@ outdir <- {{out.outdir | r}}
|
|
|
9
9
|
joboutdir <- {{job.outdir | r}}
|
|
10
10
|
mutaters <- {{ envs.mutaters | r }}
|
|
11
11
|
ident <- {{ envs.ident | r }}
|
|
12
|
-
|
|
12
|
+
group_by <- {{ envs.group_by | default: envs["group-by"] | default: None | r }} # nolint
|
|
13
13
|
each <- {{ envs.each | r }}
|
|
14
14
|
dbs <- {{ envs.dbs | r }}
|
|
15
15
|
n <- {{ envs.n | r }}
|
|
@@ -41,7 +41,7 @@ enrich_plots <- lapply(enrich_plots, function(x) {
|
|
|
41
41
|
})
|
|
42
42
|
defaults <- list(
|
|
43
43
|
ident = ident,
|
|
44
|
-
|
|
44
|
+
group_by = group_by,
|
|
45
45
|
each = each,
|
|
46
46
|
dbs = dbs,
|
|
47
47
|
n = n,
|
|
@@ -171,17 +171,17 @@ run_case <- function(name) {
|
|
|
171
171
|
} else {
|
|
172
172
|
subobj <- srtobj
|
|
173
173
|
}
|
|
174
|
-
case$
|
|
174
|
+
case$group_by <- case$group_by %||% "Identity"
|
|
175
175
|
if (is.null(case$ident)) {
|
|
176
|
-
case$ident <- as.character(unique(subobj@meta.data[[case$
|
|
176
|
+
case$ident <- as.character(unique(subobj@meta.data[[case$group_by]]))
|
|
177
177
|
}
|
|
178
178
|
avgexpr <- AverageExpression(
|
|
179
179
|
subobj,
|
|
180
|
-
|
|
180
|
+
group_by = case$group_by,
|
|
181
181
|
assays = assay
|
|
182
182
|
)[[assay]]
|
|
183
183
|
# https://github.com/satijalab/seurat/issues/7893
|
|
184
|
-
colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$
|
|
184
|
+
colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group_by]]))
|
|
185
185
|
avgexpr <- avgexpr[, case$ident, drop = FALSE]
|
|
186
186
|
|
|
187
187
|
for (idt in case$ident) {
|
|
@@ -29,6 +29,8 @@ if __name__ == "__main__":
|
|
|
29
29
|
raise ValueError(
|
|
30
30
|
f"Over clustering column '{over_clustering}' not found in AnnData object."
|
|
31
31
|
)
|
|
32
|
+
if 'neighbors' in adata.uns and 'params' in adata.uns['neighbors']:
|
|
33
|
+
adata.uns['neighbors']['params'].setdefault('n_neighbors', 15)
|
|
32
34
|
|
|
33
35
|
annotated = celltypist.annotate(
|
|
34
36
|
adata,
|
|
@@ -98,7 +98,13 @@ do_comparison <- function(object, caseinfo, subset_by, subset_val, group_by, gro
|
|
|
98
98
|
}
|
|
99
99
|
|
|
100
100
|
classes <- as.character(object@meta.data[[group_by]])
|
|
101
|
-
|
|
101
|
+
if (!group1 %in% classes) {
|
|
102
|
+
stop("Group '", group1, "' not found in '", group_by, "' column of the Seurat object.")
|
|
103
|
+
}
|
|
104
|
+
if (!is.null(group2) && !group2 %in% classes) {
|
|
105
|
+
stop("Group '", group2, "' not found in '", group_by, "' column of the Seurat object.")
|
|
106
|
+
}
|
|
107
|
+
classes[classes != group1] <- "Other"
|
|
102
108
|
if (any(table(classes) < 5)) {
|
|
103
109
|
msg <- paste0(
|
|
104
110
|
" ! skipped. Group has less than 5 cells: ",
|
|
@@ -266,8 +272,8 @@ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, compari
|
|
|
266
272
|
rbind, lapply(
|
|
267
273
|
as.character(comparisons),
|
|
268
274
|
function(comparison) {
|
|
269
|
-
if (grepl("
|
|
270
|
-
group1 <- trimws(unlist(strsplit(comparison, "
|
|
275
|
+
if (grepl(":", comparison)) {
|
|
276
|
+
group1 <- trimws(unlist(strsplit(comparison, ":")))
|
|
271
277
|
group2 <- group1[2]
|
|
272
278
|
group1 <- group1[1]
|
|
273
279
|
} else {
|
|
@@ -315,8 +315,8 @@ do_subset <- function(
|
|
|
315
315
|
plotargs$keep_empty <- TRUE
|
|
316
316
|
|
|
317
317
|
p <- do_call(plotfn, plotargs)
|
|
318
|
-
devpars$width <- devpars$width %||% (attr(p, "width") * devpars$res) %||% 1000
|
|
319
|
-
devpars$height <- devpars$height %||% (attr(p, "height") * devpars$res) %||% 1000
|
|
318
|
+
devpars$width <- devpars$width %||% (attr(p, "width") * 2 * devpars$res) %||% 1000
|
|
319
|
+
devpars$height <- devpars$height %||% (attr(p, "height") * 2 * devpars$res) %||% 1000
|
|
320
320
|
} else { # heatmap
|
|
321
321
|
minval <- min(dat)
|
|
322
322
|
maxval <- max(dat)
|
|
@@ -195,6 +195,7 @@ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, plots,
|
|
|
195
195
|
plotprefix <- file.path(odir, slugify(plot))
|
|
196
196
|
plotargs$devpars$width <- plotargs$devpars$width %||% (attr(p, "width") * plotargs$devpars$res) %||% 800
|
|
197
197
|
plotargs$devpars$height <- plotargs$devpars$height %||% (attr(p, "height") * plotargs$devpars$res) %||% 600
|
|
198
|
+
plotargs$devpars$height <- max(plotargs$devpars$height, plotargs$devpars$width / 1.5)
|
|
198
199
|
png(
|
|
199
200
|
filename = paste0(plotprefix, ".png"),
|
|
200
201
|
width = plotargs$devpars$width,
|
|
@@ -36,9 +36,6 @@ from sklearn.manifold import MDS
|
|
|
36
36
|
import faiss
|
|
37
37
|
from query import *
|
|
38
38
|
try:
|
|
39
|
-
from Bio.SubsMat.MatrixInfo import blosum62
|
|
40
|
-
print(blosum62)
|
|
41
|
-
except ModuleNotFoundError:
|
|
42
39
|
from Bio.Align import substitution_matrices
|
|
43
40
|
blosum62 = substitution_matrices.load("BLOSUM62")
|
|
44
41
|
_tmp = {}
|
|
@@ -46,7 +43,8 @@ except ModuleNotFoundError:
|
|
|
46
43
|
for ab2 in blosum62.alphabet:
|
|
47
44
|
_tmp[(ab1, ab2)] = int(blosum62[(ab1, ab2)])
|
|
48
45
|
blosum62 = _tmp
|
|
49
|
-
|
|
46
|
+
except ModuleNotFoundError:
|
|
47
|
+
from Bio.SubsMat.MatrixInfo import blosum62
|
|
50
48
|
|
|
51
49
|
AAstring = "ACDEFGHIKLMNPQRSTVWY"
|
|
52
50
|
AAstringList = list(AAstring)
|
|
@@ -7,7 +7,7 @@ srtobjfile <- {{in.srtobj | r}}
|
|
|
7
7
|
outfile <- {{out.outfile | r}}
|
|
8
8
|
cloneCall <- {{envs.cloneCall | r}}
|
|
9
9
|
chain <- {{envs.chain | r}}
|
|
10
|
-
|
|
10
|
+
group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}}
|
|
11
11
|
proportion <- {{envs.proportion | r}}
|
|
12
12
|
filterNA <- {{envs.filterNA | r}}
|
|
13
13
|
cloneSize <- {{envs.cloneSize | r}}
|
|
@@ -28,12 +28,13 @@ obj <- combineExpression(
|
|
|
28
28
|
sc.data = srtobj,
|
|
29
29
|
cloneCall = cloneCall,
|
|
30
30
|
chain = chain,
|
|
31
|
-
group.by =
|
|
31
|
+
group.by = group_by,
|
|
32
32
|
proportion = proportion,
|
|
33
33
|
filterNA = filterNA,
|
|
34
34
|
cloneSize = unlist(cloneSize),
|
|
35
35
|
addLabel = addLabel
|
|
36
36
|
)
|
|
37
|
+
obj$TCR_Presence <- !is.na(obj$CTaa)
|
|
37
38
|
|
|
38
39
|
log$info("Saving combined object ...")
|
|
39
40
|
save_obj(obj, outfile)
|
|
@@ -118,8 +118,13 @@ load_contig <- function(input, sample, fmt) {
|
|
|
118
118
|
fmt <- dirfmt[[2]]
|
|
119
119
|
if (is.null(dir)) { return(NULL) }
|
|
120
120
|
x <- loadContigs(dir, format = fmt %||% "10X")
|
|
121
|
-
x[[1]]
|
|
122
|
-
x
|
|
121
|
+
x <- x[[1]]
|
|
122
|
+
x$sample <- NULL
|
|
123
|
+
if (identical(fmt %||% "10X", "10X") && colnames(x)[1] == "X") {
|
|
124
|
+
x$X <- NULL
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
x
|
|
123
128
|
}
|
|
124
129
|
|
|
125
130
|
|
|
@@ -130,11 +130,10 @@ output.clusters_df.to_csv(clustcr_dir + "/clusters.txt", sep="\t", index=False)
|
|
|
130
130
|
clustcr_file
|
|
131
131
|
}
|
|
132
132
|
|
|
133
|
-
clean_clustcr_output = function(clustcr_outfile
|
|
133
|
+
clean_clustcr_output = function(clustcr_outfile) {
|
|
134
134
|
clustcr_out = read.delim2(clustcr_outfile, header=TRUE, row.names = NULL)
|
|
135
135
|
colnames(clustcr_out) = c("CDR3.aa", "TCR_Cluster")
|
|
136
|
-
|
|
137
|
-
out = left_join(in_cdr3, distinct(clustcr_out), by=c("CDR3.aa")) %>%
|
|
136
|
+
out = left_join(cdr3aa_df, distinct(clustcr_out), by=c(cdr3seq4clustering = "CDR3.aa")) %>%
|
|
138
137
|
mutate(
|
|
139
138
|
TCR_Cluster = if_else(
|
|
140
139
|
is.na(TCR_Cluster),
|
|
@@ -170,7 +169,7 @@ run_clustcr = function() {
|
|
|
170
169
|
quit(status=rc)
|
|
171
170
|
}
|
|
172
171
|
clustcr_outfile = file.path(clustcr_dir, "clusters.txt")
|
|
173
|
-
clean_clustcr_output(clustcr_outfile
|
|
172
|
+
clean_clustcr_output(clustcr_outfile)
|
|
174
173
|
}
|
|
175
174
|
|
|
176
175
|
prepare_giana = function() {
|
|
@@ -193,21 +192,8 @@ prepare_giana = function() {
|
|
|
193
192
|
}
|
|
194
193
|
|
|
195
194
|
prepare_input = function() {
|
|
196
|
-
#
|
|
197
|
-
cdr3
|
|
198
|
-
# cdr3col = if (!on_multi) "cdr3" else "CDR3.aa"
|
|
199
|
-
cdr3col = "CDR3.aa"
|
|
200
|
-
for (sample in names(seqdata)) {
|
|
201
|
-
sdata = seqdata[[sample]]
|
|
202
|
-
if (on_multi) {
|
|
203
|
-
sdata[[cdr3col]] = sub(";", "", sdata[[cdr3col]])
|
|
204
|
-
} else if ("chain" %in% colnames(sdata)) {
|
|
205
|
-
sdata = sdata %>% separate_rows(chain, cdr3col, sep = ";") %>%
|
|
206
|
-
filter(chain == "TRB")
|
|
207
|
-
}
|
|
208
|
-
cdr3 = union(cdr3, unique(sdata[[cdr3col]]))
|
|
209
|
-
}
|
|
210
|
-
cdr3 = unique(cdr3)
|
|
195
|
+
cdr3aa_df$cdr3seq4clustering <<- gsub("[^A-Z]", "", cdr3aa_df$CDR3.aa) # Remove non-amino acid characters
|
|
196
|
+
cdr3 <- unique(cdr3aa_df$cdr3seq4clustering)
|
|
211
197
|
|
|
212
198
|
# cdr3 = distinct(cdr3, aminoAcid, vMaxResolved)
|
|
213
199
|
|
|
@@ -220,15 +206,14 @@ prepare_input = function() {
|
|
|
220
206
|
cdr3file
|
|
221
207
|
}
|
|
222
208
|
|
|
223
|
-
clean_giana_output = function(giana_outfile
|
|
209
|
+
clean_giana_output = function(giana_outfile) {
|
|
224
210
|
# generate an output file with columns:
|
|
225
211
|
# CDR3.aa, TCR_Cluster, V.name, Sample
|
|
226
212
|
# If sequence doesn't exist in the input file,
|
|
227
213
|
# Then a unique cluster id is assigned to it.
|
|
228
214
|
giana_out = read.delim2(giana_outfile, header=FALSE, comment.char = "#", row.names = NULL)[, 1:2, drop=FALSE]
|
|
229
215
|
colnames(giana_out) = c("CDR3.aa", "TCR_Cluster")
|
|
230
|
-
|
|
231
|
-
out = left_join(in_cdr3, distinct(giana_out), by=c("CDR3.aa")) %>%
|
|
216
|
+
out = left_join(cdr3aa_df, distinct(giana_out), by=c(cdr3seq4clustering = "CDR3.aa")) %>%
|
|
232
217
|
mutate(
|
|
233
218
|
TCR_Cluster = if_else(
|
|
234
219
|
is.na(TCR_Cluster),
|
|
@@ -283,10 +268,11 @@ run_giana = function() {
|
|
|
283
268
|
quit(status=rc)
|
|
284
269
|
}
|
|
285
270
|
giana_outfile = file.path(giana_outdir, "cdr3--RotationEncodingBL62.txt")
|
|
286
|
-
clean_giana_output(giana_outfile
|
|
271
|
+
clean_giana_output(giana_outfile)
|
|
287
272
|
}
|
|
288
273
|
|
|
289
274
|
attach_to_obj = function(obj, out) {
|
|
275
|
+
out <- as.data.frame(out)
|
|
290
276
|
rownames(out) <- out$Barcode
|
|
291
277
|
if (is_seurat) {
|
|
292
278
|
# Attach results to Seurat object
|
biopipen/scripts/tcr/TESSA.R
CHANGED
|
@@ -39,9 +39,11 @@ log$info("Preparing TCR input file ...")
|
|
|
39
39
|
# If immfile endswith .rds, then it is an immunarch object
|
|
40
40
|
tcrdata <- sobj@meta.data %>%
|
|
41
41
|
rownames_to_column("contig_id") %>%
|
|
42
|
+
select(contig_id, CTaa, CTgene, sample = Sample) %>%
|
|
42
43
|
filter(!is.na(CTaa) & !is.na(CTgene)) %>%
|
|
43
|
-
separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove =
|
|
44
|
-
|
|
44
|
+
separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove = TRUE) %>%
|
|
45
|
+
filter(!is.na(cdr3) & cdr3 != "NA" & cdr3 != "nan") %>%
|
|
46
|
+
separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = TRUE) %>%
|
|
45
47
|
separate(vjgene, into = c("v_gene", NA, "j_gene", NA), sep = "\\.", remove = TRUE) %>%
|
|
46
48
|
mutate(v_gene = sub("-\\d+$", "", v_gene), j_gene = sub("-\\d+$", "", j_gene))
|
|
47
49
|
|