biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +328 -292
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +481 -215
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +231 -76
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +6 -5
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/ScFGSEA.svelte +0 -16
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
biopipen/utils/mutate_helpers.R
DELETED
|
@@ -1,581 +0,0 @@
|
|
|
1
|
-
suppressPackageStartupMessages(library(rlang))
|
|
2
|
-
suppressPackageStartupMessages(library(tidyselect))
|
|
3
|
-
suppressPackageStartupMessages(library(dplyr))
|
|
4
|
-
suppressPackageStartupMessages(library(tidyr))
|
|
5
|
-
|
|
6
|
-
#' Get expanded, collapsed, emerged or vanished clones from a meta data frame
|
|
7
|
-
#'
|
|
8
|
-
#' @rdname Get expanded, collapsed, emerged or vanished clones
|
|
9
|
-
#'
|
|
10
|
-
#' @param df The meta data frame
|
|
11
|
-
#' @param group.by The column name (without quotes) in metadata to group the
|
|
12
|
-
#' cells.
|
|
13
|
-
#' @param idents The groups of cells to compare (values in `group-by` column).
|
|
14
|
-
#' Either length 1 (`ident_1`) or length 2 (`ident_1` and `ident_2`).
|
|
15
|
-
#' If length 1, the rest of the cells with non-NA values in `group.by` will
|
|
16
|
-
#' be used as `ident_2`.
|
|
17
|
-
#' @param subset An expression to subset the cells, will be passed to
|
|
18
|
-
#' `dplyr::filter()`. Default is `TRUE` (no filtering).
|
|
19
|
-
#' @param each A column name (without quotes) in metadata to split the cells.
|
|
20
|
-
#' Each comparison will be done for each value in this column.
|
|
21
|
-
#' @param id The column name (without quotes) in metadata for the
|
|
22
|
-
#' group ids (i.e. `CDR3.aa`)
|
|
23
|
-
#' @param compare Either a (numeric) column name (i.e. `Clones`, without quotes)
|
|
24
|
-
#' in metadata to compare between groups, or `.n` to compare the
|
|
25
|
-
#' number of cells in each group.
|
|
26
|
-
#' @param fun The way to compare between groups. Either `"expanded"`,
|
|
27
|
-
#' `"collapsed"`, `"emerged"` or `"vanished"`.
|
|
28
|
-
#' @param uniq Whether to return unique ids or not. Default is `TRUE`.
|
|
29
|
-
#' If `FALSE`, you can mutate the meta data frame with the returned ids.
|
|
30
|
-
#' For example, `df %>% mutate(expanded = expanded(...))`.
|
|
31
|
-
#' @param debug Return the transformed data frame with counts, predicates, sum, and diff.
|
|
32
|
-
#' @param order The order of the returned ids. It could be `sum` or `diff`,
|
|
33
|
-
#' which is the sum or diff of the `compare` between idents. Two kinds of
|
|
34
|
-
#' modifiers can be added, including `desc` and `abs`. For example,
|
|
35
|
-
#' `sum,desc` means the sum of `compare` between idents in descending order.
|
|
36
|
-
#' Default is `diff,abs,desc`.
|
|
37
|
-
#' It only works when `uniq` is `TRUE`. If `uniq` is `FALSE`, the returned
|
|
38
|
-
#' ids will be in the same order as in `df`.
|
|
39
|
-
#' @param include_emerged Whether to include emerged clones for the expanded clones.
|
|
40
|
-
#' Default is `FALSE`. It only works for `"expanded"`.
|
|
41
|
-
#' @param include_vanished Whether to include vanished clones for the collapsed clones.
|
|
42
|
-
#' Default is `FALSE`. It only works for `"collapsed"`.
|
|
43
|
-
#'
|
|
44
|
-
#' @return A vector of expanded or collapsed clones (in `id` column)
|
|
45
|
-
#' If uniq is `FALSE`, the vector will be the same length as `df`.
|
|
46
|
-
#'
|
|
47
|
-
#' @examples
|
|
48
|
-
#' # Get expanded clones
|
|
49
|
-
#' df <- tibble(
|
|
50
|
-
#' Clones = c(10, 8, 1, 5, 9, 2, 3, 7, 6, 4, 9, 9),
|
|
51
|
-
#' Source = c(
|
|
52
|
-
#' "Tumor", "Normal", "Normal", "Normal", "Tumor", "Tumor",
|
|
53
|
-
#' "Tumor", "Normal", "Normal", "Normal", NA, "X"
|
|
54
|
-
#' ),
|
|
55
|
-
#' CDR3.aa = c("A", "C", "B", "E", "D", "E", "E", "B", "B", "B", "A", "A")
|
|
56
|
-
#' )
|
|
57
|
-
#'
|
|
58
|
-
#' expanded(df, Source, c("Tumor", "Normal"))
|
|
59
|
-
#' # The transformed data frame looks like this:
|
|
60
|
-
# CDR3.aa ..predicate ..sum ..diff
|
|
61
|
-
# <chr> <lgl> <dbl> <dbl>
|
|
62
|
-
# 1 A TRUE 10 10
|
|
63
|
-
# 2 B FALSE 1 -1
|
|
64
|
-
# 3 C FALSE 8 -8
|
|
65
|
-
# 4 D TRUE 9 9
|
|
66
|
-
# 5 E FALSE 7 -3
|
|
67
|
-
#'
|
|
68
|
-
#' # [1] "A" "D"
|
|
69
|
-
#'
|
|
70
|
-
#' # Get collapsed clones
|
|
71
|
-
#' collapsed(df, Source, c("Tumor", "Normal"))
|
|
72
|
-
#' # [1] "B" "C" "E"
|
|
73
|
-
#'
|
|
74
|
-
#' # Get emerged clones
|
|
75
|
-
#' emerged(df, Source, c("Tumor", "Normal"))
|
|
76
|
-
#' # [1] "A" "D"
|
|
77
|
-
#'
|
|
78
|
-
#' # Get vanished clones
|
|
79
|
-
#' vanished(df, Source, c("Tumor", "Normal"))
|
|
80
|
-
#' # [1] "B" "C"
|
|
81
|
-
.size_compare <- function(
|
|
82
|
-
df,
|
|
83
|
-
group.by, # nolint
|
|
84
|
-
idents,
|
|
85
|
-
subset,
|
|
86
|
-
id,
|
|
87
|
-
compare,
|
|
88
|
-
fun,
|
|
89
|
-
each,
|
|
90
|
-
uniq,
|
|
91
|
-
order,
|
|
92
|
-
debug
|
|
93
|
-
) {
|
|
94
|
-
if (length(idents) == 1) {
|
|
95
|
-
ident_1 <- idents[1]
|
|
96
|
-
ident_2 <- NULL
|
|
97
|
-
} else if (length(idents) == 2) {
|
|
98
|
-
ident_1 <- idents[1]
|
|
99
|
-
ident_2 <- idents[2]
|
|
100
|
-
} else {
|
|
101
|
-
stop("idents must be length 1 or 2")
|
|
102
|
-
}
|
|
103
|
-
if (is.null(ident_2)) ident_2 <- "<NULL>"
|
|
104
|
-
|
|
105
|
-
if (is_empty(attr(group.by, ".Environment"))) {
|
|
106
|
-
# Works if a (quoted) string passed
|
|
107
|
-
group.by <- sym(as_name(group.by))
|
|
108
|
-
}
|
|
109
|
-
if (is_empty(attr(id, ".Environment"))) {
|
|
110
|
-
id <- sym(as_name(id))
|
|
111
|
-
}
|
|
112
|
-
if (is_empty(attr(compare, ".Environment"))) {
|
|
113
|
-
compare <- sym(as_name(compare))
|
|
114
|
-
}
|
|
115
|
-
compare_label <- as_name(compare)
|
|
116
|
-
compare_is_count <- compare_label == '.n'
|
|
117
|
-
|
|
118
|
-
if (!as_name(group.by) %in% colnames(df)) {
|
|
119
|
-
stop(paste0(
|
|
120
|
-
'`group.by` must be a column name in df. Got "',
|
|
121
|
-
as_name(group.by),
|
|
122
|
-
'"'
|
|
123
|
-
))
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
if (!compare_is_count && !compare_label %in% colnames(df)) {
|
|
127
|
-
stop(paste0(
|
|
128
|
-
"`compare` must be either a column name in df, or 'count'/'.n'. ",
|
|
129
|
-
'Got "',
|
|
130
|
-
compare_label,
|
|
131
|
-
'"'
|
|
132
|
-
))
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
predicate <- function(ident_1, ident_2) {
|
|
136
|
-
if (fun == "expanded") {
|
|
137
|
-
ident_1 > ident_2 && ident_2 > 0
|
|
138
|
-
} else if (fun == "expanded+") {
|
|
139
|
-
ident_1 > ident_2
|
|
140
|
-
} else if (fun == "collapsed") {
|
|
141
|
-
ident_1 < ident_2 && ident_1 > 0
|
|
142
|
-
} else if (fun == "collapsed+") {
|
|
143
|
-
ident_1 < ident_2
|
|
144
|
-
} else if (fun == "emerged") {
|
|
145
|
-
ident_1 > 0 && ident_2 == 0
|
|
146
|
-
} else if (fun == "vanished") {
|
|
147
|
-
ident_1 == 0 && ident_2 > 0
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
# subset the data frame
|
|
152
|
-
trans <- df %>%
|
|
153
|
-
dplyr::filter(!!subset) %>%
|
|
154
|
-
drop_na(!!id) %>%
|
|
155
|
-
# # remove NA values in group.by column
|
|
156
|
-
# dplyr::filter(!is.na(!!group.by)) %>%
|
|
157
|
-
# mark the group.by column (as .group) as ident_1 or ident_2 or NA
|
|
158
|
-
mutate(
|
|
159
|
-
.group = if_else(
|
|
160
|
-
!!group.by == ident_1,
|
|
161
|
-
"ident_1",
|
|
162
|
-
if_else(ident_2 != "<NULL>" & !!group.by != ident_2, NA, "ident_2")
|
|
163
|
-
)
|
|
164
|
-
) %>%
|
|
165
|
-
# remove NA values in ..group column
|
|
166
|
-
drop_na(.group)
|
|
167
|
-
|
|
168
|
-
if (is_empty(attr(each, ".Environment"))) {
|
|
169
|
-
if (as_label(each) == "NULL") {
|
|
170
|
-
each <- NULL
|
|
171
|
-
} else {
|
|
172
|
-
each <- sym(as_name(each))
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
if (is.null(each)) {
|
|
176
|
-
trans <- trans %>% group_by(!!id, .group)
|
|
177
|
-
} else {
|
|
178
|
-
trans <- trans %>% group_by(!!each, !!id, .group)
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
if (compare_is_count) {
|
|
182
|
-
trans <- trans %>% summarise(.n = n(), .groups = "drop")
|
|
183
|
-
} else {
|
|
184
|
-
trans <- trans %>% summarise(.n = first(!!compare), .groups = "drop")
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
trans <- trans %>% pivot_wider(names_from = .group, values_from = .n) %>%
|
|
188
|
-
replace_na(list(ident_1 = 0, ident_2 = 0)) %>%
|
|
189
|
-
rowwise() %>%
|
|
190
|
-
# add the predicates, sums and diffs
|
|
191
|
-
mutate(
|
|
192
|
-
.predicate = predicate(ident_1, ident_2),
|
|
193
|
-
.sum = ident_1 + ident_2,
|
|
194
|
-
.diff = ident_1 - ident_2
|
|
195
|
-
) %>%
|
|
196
|
-
ungroup() %>%
|
|
197
|
-
arrange(!!order)
|
|
198
|
-
|
|
199
|
-
if (debug) {
|
|
200
|
-
return(trans)
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
uniq_ids <- trans %>% filter(.predicate) %>% pull(!!id) %>% as.vector() %>% unique()
|
|
204
|
-
if (uniq) {
|
|
205
|
-
return(uniq_ids)
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
df %>%
|
|
209
|
-
mutate(
|
|
210
|
-
.group = if_else(
|
|
211
|
-
!!group.by == ident_1,
|
|
212
|
-
"ident_1",
|
|
213
|
-
if_else(ident_2 != "<NULL>" & !!group.by != ident_2, NA, "ident_2")
|
|
214
|
-
),
|
|
215
|
-
.out = if_else(!!id %in% uniq_ids & !!subset & !is.na(.group), !!id, NA)
|
|
216
|
-
) %>%
|
|
217
|
-
pull(.out)
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
#' @export
|
|
221
|
-
expanded <- function(
|
|
222
|
-
df = .,
|
|
223
|
-
group.by, # nolint
|
|
224
|
-
idents,
|
|
225
|
-
subset = TRUE,
|
|
226
|
-
each = NULL,
|
|
227
|
-
id = CDR3.aa,
|
|
228
|
-
compare = .n,
|
|
229
|
-
uniq = TRUE,
|
|
230
|
-
debug = FALSE,
|
|
231
|
-
order = desc(.sum),
|
|
232
|
-
include_emerged = FALSE
|
|
233
|
-
) {
|
|
234
|
-
lbl <- as_label(enquo(df))
|
|
235
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
236
|
-
df <- across(everything())
|
|
237
|
-
}
|
|
238
|
-
fun = if (include_emerged) "expanded+" else "expanded"
|
|
239
|
-
.size_compare(
|
|
240
|
-
df = df,
|
|
241
|
-
group.by = enquo(group.by),
|
|
242
|
-
idents = idents,
|
|
243
|
-
subset = enexpr(subset),
|
|
244
|
-
id = enquo(id),
|
|
245
|
-
compare = enquo(compare),
|
|
246
|
-
fun = fun,
|
|
247
|
-
each = tryCatch(enquo(each), error = function(e) NULL),
|
|
248
|
-
uniq = uniq,
|
|
249
|
-
order = enexpr(order),
|
|
250
|
-
debug = debug
|
|
251
|
-
)
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
#' @export
|
|
255
|
-
collapsed <- function(
|
|
256
|
-
df = .,
|
|
257
|
-
group.by, # nolint
|
|
258
|
-
idents,
|
|
259
|
-
subset = TRUE,
|
|
260
|
-
each = NULL,
|
|
261
|
-
id = CDR3.aa,
|
|
262
|
-
compare = .n,
|
|
263
|
-
uniq = TRUE,
|
|
264
|
-
debug = FALSE,
|
|
265
|
-
order = desc(.sum),
|
|
266
|
-
include_vanished = FALSE
|
|
267
|
-
) {
|
|
268
|
-
lbl <- as_label(enquo(df))
|
|
269
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
270
|
-
df <- across(everything())
|
|
271
|
-
}
|
|
272
|
-
fun = if (include_vanished) "collapsed+" else "collapsed"
|
|
273
|
-
.size_compare(
|
|
274
|
-
df = df,
|
|
275
|
-
group.by = enquo(group.by),
|
|
276
|
-
idents = idents,
|
|
277
|
-
subset = enexpr(subset),
|
|
278
|
-
id = enquo(id),
|
|
279
|
-
compare = enquo(compare),
|
|
280
|
-
fun = fun,
|
|
281
|
-
each = tryCatch(enquo(each), error = function(e) NULL),
|
|
282
|
-
uniq = uniq,
|
|
283
|
-
order = enexpr(order),
|
|
284
|
-
debug = debug
|
|
285
|
-
)
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
#' @export
|
|
289
|
-
emerged <- function(
|
|
290
|
-
df = .,
|
|
291
|
-
group.by, # nolint
|
|
292
|
-
idents,
|
|
293
|
-
subset = TRUE,
|
|
294
|
-
each = NULL,
|
|
295
|
-
id = CDR3.aa,
|
|
296
|
-
compare = .n,
|
|
297
|
-
uniq = TRUE,
|
|
298
|
-
debug = FALSE,
|
|
299
|
-
order = desc(.sum)
|
|
300
|
-
) {
|
|
301
|
-
lbl <- as_label(enquo(df))
|
|
302
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
303
|
-
df <- across(everything())
|
|
304
|
-
}
|
|
305
|
-
.size_compare(
|
|
306
|
-
df = df,
|
|
307
|
-
group.by = enquo(group.by),
|
|
308
|
-
idents = idents,
|
|
309
|
-
subset = enexpr(subset),
|
|
310
|
-
id = enquo(id),
|
|
311
|
-
compare = enquo(compare),
|
|
312
|
-
fun = "emerged",
|
|
313
|
-
each = tryCatch(enquo(each), error = function(e) NULL),
|
|
314
|
-
uniq = uniq,
|
|
315
|
-
order = enexpr(order),
|
|
316
|
-
debug = debug
|
|
317
|
-
)
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
#' @export
|
|
321
|
-
vanished <- function(
|
|
322
|
-
df = .,
|
|
323
|
-
group.by, # nolint
|
|
324
|
-
idents,
|
|
325
|
-
subset = TRUE,
|
|
326
|
-
each = NULL,
|
|
327
|
-
id = CDR3.aa,
|
|
328
|
-
compare = .n,
|
|
329
|
-
uniq = TRUE,
|
|
330
|
-
debug = FALSE,
|
|
331
|
-
order = desc(.sum)
|
|
332
|
-
) {
|
|
333
|
-
lbl <- as_label(enquo(df))
|
|
334
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
335
|
-
df <- across(everything())
|
|
336
|
-
}
|
|
337
|
-
.size_compare(
|
|
338
|
-
df = df,
|
|
339
|
-
group.by = enquo(group.by),
|
|
340
|
-
idents = idents,
|
|
341
|
-
subset = enexpr(subset),
|
|
342
|
-
id = enquo(id),
|
|
343
|
-
compare = enquo(compare),
|
|
344
|
-
fun = "vanished",
|
|
345
|
-
each = tryCatch(enquo(each), error = function(e) NULL),
|
|
346
|
-
uniq = uniq,
|
|
347
|
-
order = enexpr(order),
|
|
348
|
-
debug = debug
|
|
349
|
-
)
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
#' Get paired entities from a data frame based on the other column
|
|
353
|
-
#'
|
|
354
|
-
#' @rdname Get paired entities
|
|
355
|
-
#' @param df The data frame. Use `.` if the function is called in a dplyr pipe.
|
|
356
|
-
#' @param id The column name in `df` for the ids to be returned in the
|
|
357
|
-
#' final output
|
|
358
|
-
#' @param compare The column name in `df` to compare the values for each
|
|
359
|
-
#' id in `id`.
|
|
360
|
-
#' @param idents The values in `compare` to compare. It could be either an
|
|
361
|
-
#' an integer or a vector. If it is an integer, the number of values in
|
|
362
|
-
#' `compare` must be the same as the integer for the `id` to be regarded
|
|
363
|
-
#' as paired. If it is a vector, the values in `compare` must be the same
|
|
364
|
-
#' as the values in `idents` for the `id` to be regarded as paired.
|
|
365
|
-
#' @param uniq Whether to return unique ids or not. Default is `TRUE`.
|
|
366
|
-
#' If `FALSE`, you can mutate the meta data frame with the returned ids.
|
|
367
|
-
#' Non-paired ids will be `NA`.
|
|
368
|
-
#' @return A vector of paired ids (in `id` column)
|
|
369
|
-
#' @examples
|
|
370
|
-
#' df <- tibble(
|
|
371
|
-
#' id = c("A", "A", "B", "B", "C", "C", "D", "D"),
|
|
372
|
-
#' compare = c(1, 2, 1, 1, 1, 2, 1, 2)
|
|
373
|
-
#' )
|
|
374
|
-
#' paired(df, id, compare, 2)
|
|
375
|
-
#' # [1] "A" "B" "C" "D"
|
|
376
|
-
#' paired(df, id, compare, c(1, 2))
|
|
377
|
-
#' # [1] "A" "C" "D"
|
|
378
|
-
#' paired(df, id, compare, c(1, 2), uniq = FALSE)
|
|
379
|
-
#' # [1] "A" "A" NA NA "C" "C" "D" "D"
|
|
380
|
-
#'
|
|
381
|
-
paired <- function(
|
|
382
|
-
df = .,
|
|
383
|
-
id,
|
|
384
|
-
compare,
|
|
385
|
-
idents = 2,
|
|
386
|
-
uniq = TRUE
|
|
387
|
-
) {
|
|
388
|
-
lbl <- as_label(enquo(df))
|
|
389
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
390
|
-
df <- across(everything())
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
id <- enquo(id)
|
|
394
|
-
compare <- enquo(compare)
|
|
395
|
-
if (is_empty(attr(id, ".Environment"))) {
|
|
396
|
-
id <- sym(as_name(id))
|
|
397
|
-
}
|
|
398
|
-
if (is_empty(attr(compare, ".Environment"))) {
|
|
399
|
-
compare <- sym(as_name(compare))
|
|
400
|
-
}
|
|
401
|
-
if (!as_name(id) %in% colnames(df)) {
|
|
402
|
-
stop(paste0(
|
|
403
|
-
'`id` must be a column name in df. Got "',
|
|
404
|
-
as_name(id),
|
|
405
|
-
'"'
|
|
406
|
-
))
|
|
407
|
-
}
|
|
408
|
-
if (!as_name(compare) %in% colnames(df)) {
|
|
409
|
-
stop(paste0(
|
|
410
|
-
'`compare` must be a column name in df. Got "',
|
|
411
|
-
as_name(compare),
|
|
412
|
-
'"'
|
|
413
|
-
))
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
if (is.numeric(idents) && length(idents) == 1) {
|
|
417
|
-
if (idents <= 1) {
|
|
418
|
-
stop(paste0(
|
|
419
|
-
'`idents` must be greater than 1. Got ',
|
|
420
|
-
idents
|
|
421
|
-
))
|
|
422
|
-
}
|
|
423
|
-
out <- df %>%
|
|
424
|
-
add_count(!!id, name = "..count") %>%
|
|
425
|
-
mutate(..paired = if_else(..count == idents, !!id, NA))
|
|
426
|
-
} else {
|
|
427
|
-
if (length(idents) <= 1) {
|
|
428
|
-
stop(paste0(
|
|
429
|
-
'`idents` must be a vector with length greater than 1. Got ',
|
|
430
|
-
length(idents)
|
|
431
|
-
))
|
|
432
|
-
}
|
|
433
|
-
out <- df %>%
|
|
434
|
-
group_by(!!id) %>%
|
|
435
|
-
mutate(
|
|
436
|
-
..paired = if_else(
|
|
437
|
-
rep(setequal(!!compare, idents), n()),
|
|
438
|
-
!!id,
|
|
439
|
-
NA
|
|
440
|
-
)
|
|
441
|
-
) %>%
|
|
442
|
-
ungroup()
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
out <- out %>% pull(..paired)
|
|
446
|
-
if (uniq) {
|
|
447
|
-
return(out %>% na.omit() %>% unique() %>% as.vector())
|
|
448
|
-
} else {
|
|
449
|
-
return(out)
|
|
450
|
-
}
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
#' @export
|
|
454
|
-
#' @rdname Get top entities by size of group
|
|
455
|
-
#' @param df The data frame. Use `.` if the function is called in a dplyr pipe.
|
|
456
|
-
#' @param id The column name in `df` for the groups.
|
|
457
|
-
#' @param compare The column name in `df` to compare the values for each group.
|
|
458
|
-
#' It could be either a numeric column or `.n` to compare the number of
|
|
459
|
-
#' entities in each group. If a column is passed, the values in the column
|
|
460
|
-
#' must be numeric and the same in each group. This won't be checked.
|
|
461
|
-
#' @param n The number of top entities to return. if `n` < 1, it will be
|
|
462
|
-
#' regarded as the percentage of the total number of entities in each group
|
|
463
|
-
#' (after subsetting or each applied).
|
|
464
|
-
#' Specify 0 to return all entities.
|
|
465
|
-
#' @param subset An expression to subset the entities, will be passed to
|
|
466
|
-
#' `dplyr::filter()`. Default is `TRUE` (no filtering).
|
|
467
|
-
#' @param with_ties Whether to return all entities with the same size as the
|
|
468
|
-
#' last entity in the top list. Default is `FALSE`.
|
|
469
|
-
#' @param each A column name (without quotes) in metadata to split the cells.
|
|
470
|
-
#' @param debug Return the transformed data frame with counts and predicates
|
|
471
|
-
#' @param uniq Whether to return unique ids or not. Default is `TRUE`.
|
|
472
|
-
#' If `FALSE`, you can mutate the meta data frame with the returned ids.
|
|
473
|
-
top <- function(
|
|
474
|
-
df = .,
|
|
475
|
-
id = CDR3.aa,
|
|
476
|
-
n = 10,
|
|
477
|
-
compare = .n,
|
|
478
|
-
subset = TRUE,
|
|
479
|
-
with_ties = FALSE,
|
|
480
|
-
each = NULL,
|
|
481
|
-
debug = FALSE,
|
|
482
|
-
uniq = TRUE
|
|
483
|
-
) {
|
|
484
|
-
lbl <- as_label(enquo(df))
|
|
485
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
486
|
-
df <- across(everything())
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
id <- enquo(id)
|
|
490
|
-
compare <- enquo(compare)
|
|
491
|
-
if (is.character(subset)) {
|
|
492
|
-
subset <- parse_expr(subset)
|
|
493
|
-
} else {
|
|
494
|
-
subset <- enexpr(subset)
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
each <- tryCatch(enquo(each), error = function(e) NULL)
|
|
498
|
-
if (is_empty(attr(id, ".Environment"))) {
|
|
499
|
-
id <- sym(as_name(id))
|
|
500
|
-
}
|
|
501
|
-
if (is_empty(attr(compare, ".Environment"))) {
|
|
502
|
-
compare <- sym(as_name(compare))
|
|
503
|
-
}
|
|
504
|
-
if (!as_name(id) %in% colnames(df)) {
|
|
505
|
-
stop(paste0(
|
|
506
|
-
'`id` must be a column name in df. Got "',
|
|
507
|
-
as_name(id),
|
|
508
|
-
'"'
|
|
509
|
-
))
|
|
510
|
-
}
|
|
511
|
-
if (!as_name(compare) %in% colnames(df) && as_name(compare) != '.n') {
|
|
512
|
-
stop(paste0(
|
|
513
|
-
'`compare` must be a column name in df. Got "',
|
|
514
|
-
as_name(compare),
|
|
515
|
-
'"'
|
|
516
|
-
))
|
|
517
|
-
}
|
|
518
|
-
if (is_empty(attr(each, ".Environment"))) {
|
|
519
|
-
if (as_label(each) == "NULL") {
|
|
520
|
-
each <- NULL
|
|
521
|
-
} else {
|
|
522
|
-
each <- sym(as_name(each))
|
|
523
|
-
}
|
|
524
|
-
}
|
|
525
|
-
if (!is.null(each) && !as_name(each) %in% colnames(df)) {
|
|
526
|
-
stop(paste0(
|
|
527
|
-
'`each` must be a column name in df. Got "',
|
|
528
|
-
as_name(each),
|
|
529
|
-
'"'
|
|
530
|
-
))
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
subdf <- df %>% dplyr::filter(!!subset) %>% tidyr::drop_na(!!id)
|
|
534
|
-
|
|
535
|
-
handle_one_each <- function(d) {
|
|
536
|
-
if (!is.null(each)) {
|
|
537
|
-
d <- d %>% group_by(!!each, !!id)
|
|
538
|
-
} else {
|
|
539
|
-
d <- d %>% group_by(!!id)
|
|
540
|
-
}
|
|
541
|
-
d <- d %>%
|
|
542
|
-
dplyr::summarise(.n = dplyr::n(), .groups = "drop") %>%
|
|
543
|
-
dplyr::arrange(dplyr::desc(!!compare))
|
|
544
|
-
|
|
545
|
-
if (n > 0 && n < 1) {
|
|
546
|
-
o <- d %>% dplyr::slice_max(prop = n, order_by = !!compare, with_ties = with_ties)
|
|
547
|
-
} else if (n >= 1) {
|
|
548
|
-
o <- d %>% dplyr::slice_max(n = n, order_by = !!compare, with_ties = with_ties)
|
|
549
|
-
} else {
|
|
550
|
-
o <- d
|
|
551
|
-
}
|
|
552
|
-
d %>% dplyr::mutate(.predicate = !!id %in% dplyr::pull(o, !!id))
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
if (is.null(each)) {
|
|
556
|
-
out <- handle_one_each(subdf)
|
|
557
|
-
} else {
|
|
558
|
-
out <- subdf %>% dplyr::group_by(!!each) %>%
|
|
559
|
-
dplyr::group_split() %>%
|
|
560
|
-
purrr::map(handle_one_each) %>%
|
|
561
|
-
dplyr::bind_rows()
|
|
562
|
-
}
|
|
563
|
-
|
|
564
|
-
if (isTRUE(debug)) {
|
|
565
|
-
return(out)
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
uniq_ids <- out %>% dplyr::filter(.predicate) %>%
|
|
569
|
-
dplyr::pull(!!id) %>% as.vector() %>% unique()
|
|
570
|
-
if (isTRUE(uniq)) {
|
|
571
|
-
return(uniq_ids)
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
df <- df %>% left_join(
|
|
575
|
-
out,
|
|
576
|
-
by = if(is.null(each)) as_name(id) else c(as_name(each), as_name(id)))
|
|
577
|
-
|
|
578
|
-
df %>% dplyr::mutate(
|
|
579
|
-
.out = if_else(.predicate & !!subset, !!id, NA)
|
|
580
|
-
) %>% dplyr::pull(.out)
|
|
581
|
-
}
|