biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +307 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +14 -2
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
biopipen/reports/utils/gsea.liq
DELETED
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
{% from "utils/misc.liq" import table_of_images -%}
|
|
2
|
-
|
|
3
|
-
{%- macro fgsea_report_script() -%}
|
|
4
|
-
import { Image, DataTable } from "$libs";
|
|
5
|
-
{%- endmacro -%}
|
|
6
|
-
|
|
7
|
-
{%- macro fgsea_report(fgsea_dir, h, envs, nrows=100) -%}
|
|
8
|
-
{%- addfilter splitgenes -%}
|
|
9
|
-
def splitgenes(data):
|
|
10
|
-
for dat in data:
|
|
11
|
-
dat["leadingEdge"] = dat["leadingEdge"].replace(",", " ")
|
|
12
|
-
return json_dumps(data)
|
|
13
|
-
{%- endaddfilter -%}
|
|
14
|
-
|
|
15
|
-
<h{{h}}>Enrichment table</h{{h}}>
|
|
16
|
-
<Image src={{ fgsea_dir | joinpaths: "gsea_table.png" | quote }} />
|
|
17
|
-
|
|
18
|
-
{% set data = fgsea_dir | joinpaths: "fgsea.txt" | datatable: sep="\t", nrows=nrows | json_loads %}
|
|
19
|
-
|
|
20
|
-
<h{{h}}>Enrichment pathways</h{{h}}>
|
|
21
|
-
<DataTable src={{ fgsea_dir | joinpaths: "fgsea.txt" | quote }}
|
|
22
|
-
data={ {{ data | splitgenes: }} }
|
|
23
|
-
pageSize={10} />
|
|
24
|
-
|
|
25
|
-
<h{{h}}>Enrichment plot of pathways</h{{h}}>
|
|
26
|
-
{%- python -%}
|
|
27
|
-
import os
|
|
28
|
-
def fgsea_plots(pathways, fgsea_dir):
|
|
29
|
-
out = []
|
|
30
|
-
for pathway in pathways:
|
|
31
|
-
pathway = pathway.replace("/", "-")
|
|
32
|
-
pwfig = joinpaths(fgsea_dir, f"fgsea_{pathway}.png")
|
|
33
|
-
if os.path.exists(pwfig):
|
|
34
|
-
out.append(pwfig)
|
|
35
|
-
return out
|
|
36
|
-
{%- endpython -%}
|
|
37
|
-
{{ table_of_images(
|
|
38
|
-
fgsea_plots(liquid_map(data, "pathway"), fgsea_dir),
|
|
39
|
-
liquid_map(data, "pathway"),
|
|
40
|
-
table_width=75
|
|
41
|
-
) }}
|
|
42
|
-
|
|
43
|
-
{%- endmacro -%}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
{%- macro gsea_report(gsea_dir, h, envs, nrows=100) -%}
|
|
47
|
-
<h{{h}}>Global view</h{{h}}>
|
|
48
|
-
|
|
49
|
-
<embed src={{gsea_dir | joinpaths: "*.global.plots.pdf" | glob | first | quote}}
|
|
50
|
-
width="100%"
|
|
51
|
-
height="1000"
|
|
52
|
-
type="application/pdf" />
|
|
53
|
-
|
|
54
|
-
<h{{h}}>Summary</h{{h}}>
|
|
55
|
-
{% for sumfile in gsea_dir | joinpaths: "*.SUMMARY.RESULTS.REPORT.*.txt" | glob %}
|
|
56
|
-
{% set klass = stem(sumfile).split(".")[-1] %}
|
|
57
|
-
<h{{h+1}}>{{klass}}</h{{h+1}}>
|
|
58
|
-
<DataTable data={ {{sumfile | datatable: sep="\t", nrows=nrows}} } />
|
|
59
|
-
{% endfor %}
|
|
60
|
-
|
|
61
|
-
<h{{h}}>Enrichment details</h{{h}}>
|
|
62
|
-
{% set cutoff = envs.get("fdr.q.val.threshold", envs.get("fdr_q_val_threshold", 0.25)) %}
|
|
63
|
-
{% for sumfile in gsea_dir | joinpaths: "*.SUMMARY.RESULTS.REPORT.*.txt" | glob %}
|
|
64
|
-
{% set klass = stem(sumfile).split(".")[-1] %}
|
|
65
|
-
<h{{h+1}}>{{klass}}</h{{h+1}}>
|
|
66
|
-
{% set sumdata = sumfile | datatable: sep="\t" | json_loads %}
|
|
67
|
-
{% set has_signif = [] %}
|
|
68
|
-
{% for row in sumdata %}
|
|
69
|
-
{% if row["FDR_q_val"] < cutoff %}
|
|
70
|
-
{% set _ = has_signif.append(1) %}
|
|
71
|
-
<embed src={{gsea_dir | joinpaths: "*." + row["GS"] + ".plot." + klass + ".*.pdf" | glob | first | quote}}
|
|
72
|
-
width="100%"
|
|
73
|
-
height="700"
|
|
74
|
-
type="application/pdf" />
|
|
75
|
-
{% endif %}
|
|
76
|
-
{% endfor %}
|
|
77
|
-
{% if len(has_signif) == 0 %}
|
|
78
|
-
<Tile>No significantly (FDR_q_val < {{cutoff}}) enriched pathways found.</Tile>
|
|
79
|
-
{% endif %}
|
|
80
|
-
{% endfor %}
|
|
81
|
-
|
|
82
|
-
{%- endmacro -%}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
{%- macro enrichr_report_script() -%}
|
|
86
|
-
import { Image, DataTable } from "$libs";
|
|
87
|
-
import { Tabs, Tab, TabContent, InlineNotification } from "$ccs";
|
|
88
|
-
{%- endmacro -%}
|
|
89
|
-
|
|
90
|
-
{%- macro enrichr_report(enrichr_dir) -%}
|
|
91
|
-
<Tabs>
|
|
92
|
-
{% for enrtxt in enrichr_dir | glob: "Enrichr-*.txt" %}
|
|
93
|
-
{% set db = enrtxt | stem | replace: "Enrichr-", "" %}
|
|
94
|
-
<Tab label="{{db}}" title="{{db}}" />
|
|
95
|
-
{% endfor %}
|
|
96
|
-
<div slot="content">
|
|
97
|
-
{% for enrtxt in enrichr_dir | glob: "Enrichr-*.txt" %}
|
|
98
|
-
{% set db = enrtxt | stem | replace: "Enrichr-", "" %}
|
|
99
|
-
<TabContent>
|
|
100
|
-
<Image src={{enrichr_dir | joinpaths: "Enrichr-" + db + ".png" | quote}} />
|
|
101
|
-
<DataTable
|
|
102
|
-
src={{ enrtxt | quote }}
|
|
103
|
-
data={ {{ enrtxt | datatable: sep="\t", nrows=100 }} }
|
|
104
|
-
/>
|
|
105
|
-
</TabContent>
|
|
106
|
-
{% endfor %}
|
|
107
|
-
</div>
|
|
108
|
-
</Tabs>
|
|
109
|
-
{%- endmacro -%}
|
|
110
|
-
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
merge_clusters_with_same_labels <- function(sobj, newcol) {
|
|
2
|
-
if (is.null(newcol)) {
|
|
3
|
-
sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
|
|
4
|
-
Idents(sobj) <- "seurat_clusters"
|
|
5
|
-
} else {
|
|
6
|
-
sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
sobj
|
|
10
|
-
}
|
|
@@ -1,213 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
expand_dims <- function(args, name = "dims") {
|
|
3
|
-
# Expand dims from 30 to 1:30
|
|
4
|
-
if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
5
|
-
args[[name]] <- 1:args[[name]]
|
|
6
|
-
}
|
|
7
|
-
args
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
expand_resolution <- function(resolution) {
|
|
11
|
-
expanded_res <- c()
|
|
12
|
-
for (res in resolution) {
|
|
13
|
-
if (is.numeric(res)) {
|
|
14
|
-
expanded_res <- c(expanded_res, res)
|
|
15
|
-
} else {
|
|
16
|
-
# is.character
|
|
17
|
-
parts <- trimws(unlist(strsplit(res, ",")))
|
|
18
|
-
for (part in parts) {
|
|
19
|
-
if (grepl(":", part)) {
|
|
20
|
-
ps <- trimws(unlist(strsplit(part, ":")))
|
|
21
|
-
if (length(ps) == 2) { ps <- c(ps, 0.1) }
|
|
22
|
-
if (length(ps) != 3) {
|
|
23
|
-
stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
|
|
24
|
-
}
|
|
25
|
-
ps <- as.numeric(ps)
|
|
26
|
-
expanded_res <- c(expanded_res, seq(ps[1], ps[2], by = ps[3]))
|
|
27
|
-
} else {
|
|
28
|
-
expanded_res <- c(expanded_res, as.numeric(part))
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
# keep the last resolution at last
|
|
34
|
-
rev(unique(rev(round(expanded_res, 2))))
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
# recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
|
|
38
|
-
recode_clusters <- function(clusters) {
|
|
39
|
-
recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
|
|
40
|
-
clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
|
|
41
|
-
clusters
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
run_transformation <- function(sobj) {
|
|
45
|
-
if (length(envs$ScaleData) == 0 && length(envs$SCTransform) == 0) {
|
|
46
|
-
log_warn("Skipping ScaleData/SCTransform (neither specified) ...")
|
|
47
|
-
return(sobj)
|
|
48
|
-
}
|
|
49
|
-
if (length(envs$ScaleData) > 0 && length(envs$SCTransform) > 0) {
|
|
50
|
-
stop("Both envs.ScaleData and envs.SCTransform are specified. Please choose either.")
|
|
51
|
-
}
|
|
52
|
-
if (length(envs$ScaleData) > 0) {
|
|
53
|
-
if (DefaultAssay(sobj) == "SCT") {
|
|
54
|
-
stop("SCT assay detected, but envs.ScaleData is specified. Use envs.SCTransform instead.")
|
|
55
|
-
}
|
|
56
|
-
cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
|
|
57
|
-
if (is.null(cached$data)) {
|
|
58
|
-
log_info("Running ScaleData ...")
|
|
59
|
-
sobj <- do_call(ScaleData, c(list(object = sobj), envs$ScaleData))
|
|
60
|
-
cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
|
|
61
|
-
save_to_cache(cached, "ScaleData", cache_dir)
|
|
62
|
-
} else {
|
|
63
|
-
log_info("Loading cached ScaleData ...")
|
|
64
|
-
sobj@assays$RNA <- cached$data$assay
|
|
65
|
-
sobj@commands <- cached$data$commands
|
|
66
|
-
DefaultAssay(sobj) <- "RNA"
|
|
67
|
-
}
|
|
68
|
-
} else if (length(envs$SCTransform) > 0) {
|
|
69
|
-
if (DefaultAssay(sobj) != "SCT") {
|
|
70
|
-
stop("SCT assay not detected, but envs.SCTransform is specified. Use envs.ScaleData instead.")
|
|
71
|
-
}
|
|
72
|
-
cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
|
|
73
|
-
asssay <- envs$SCTransform$new.assay.name %||% "SCT"
|
|
74
|
-
if (is.null(cached$data)) {
|
|
75
|
-
log_info("Running SCTransform ...")
|
|
76
|
-
sobj <- do_call(SCTransform, c(list(object = sobj), envs$SCTransform))
|
|
77
|
-
cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
|
|
78
|
-
save_to_cache(cached, "SCTransform", cache_dir)
|
|
79
|
-
} else {
|
|
80
|
-
log_info("Loading cached SCTransform ...")
|
|
81
|
-
sobj@assays[[assay]] <- cached$data$assay
|
|
82
|
-
sobj@commands <- cached$data$commands
|
|
83
|
-
DefaultAssay(sobj) <- assay
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
sobj
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
run_umap <- function(sobj) {
|
|
90
|
-
cached <- get_cached(
|
|
91
|
-
list(sobj = sobj, RunUMAP = envs$RunUMAP),
|
|
92
|
-
"RunUMAP",
|
|
93
|
-
cache_dir
|
|
94
|
-
)
|
|
95
|
-
reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
|
|
96
|
-
if (is.null(cached$data)) {
|
|
97
|
-
log_info("Running RunUMAP ...")
|
|
98
|
-
umap_args <- list_setdefault(
|
|
99
|
-
envs$RunUMAP,
|
|
100
|
-
object = sobj,
|
|
101
|
-
dims = 1:30,
|
|
102
|
-
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
103
|
-
)
|
|
104
|
-
ncells <- ncol(sobj)
|
|
105
|
-
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
106
|
-
umap_method <- envs$RunUMAP$umap.method %||% "uwot"
|
|
107
|
-
if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
|
|
108
|
-
# https://github.com/satijalab/seurat/issues/4312
|
|
109
|
-
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
110
|
-
}
|
|
111
|
-
sobj <- do_call(RunUMAP, umap_args)
|
|
112
|
-
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
113
|
-
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
114
|
-
} else {
|
|
115
|
-
log_info("Loading cached RunUMAP ...")
|
|
116
|
-
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
117
|
-
sobj@commands <- cached$data$commands
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
sobj
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
run_findneighbors <- function(sobj) {
|
|
124
|
-
cached <- get_cached(
|
|
125
|
-
list(sobj = sobj, FindNeighbors = envs$FindNeighbors),
|
|
126
|
-
"FindNeighbors",
|
|
127
|
-
cache_dir
|
|
128
|
-
)
|
|
129
|
-
if (is.null(cached$data)) {
|
|
130
|
-
log_info("Running FindNeighbors ...")
|
|
131
|
-
envs$FindNeighbors$object <- sobj
|
|
132
|
-
envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
133
|
-
sobj <- do_call(FindNeighbors, envs$FindNeighbors)
|
|
134
|
-
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
135
|
-
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
136
|
-
} else {
|
|
137
|
-
log_info("Loading cached FindNeighbors ...")
|
|
138
|
-
sobj@graphs <- cached$data$graphs
|
|
139
|
-
sobj@commands <- cached$data$commands
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
sobj
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
run_findclusters <- function(sobj) {
|
|
146
|
-
cached <- get_cached(
|
|
147
|
-
list(sobj = sobj, FindClusters = envs$FindClusters),
|
|
148
|
-
"FindClusters",
|
|
149
|
-
cache_dir
|
|
150
|
-
)
|
|
151
|
-
if (is.null(cached$data)) {
|
|
152
|
-
findclusters_args <- envs$FindClusters
|
|
153
|
-
findclusters_args$random.seed <- findclusters_args$random.seed %||% 8525
|
|
154
|
-
resolution <- findclusters_args$resolution <- expand_resolution(findclusters_args$resolution %||% 0.8)
|
|
155
|
-
log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
|
|
156
|
-
|
|
157
|
-
findclusters_args$object <- sobj
|
|
158
|
-
findclusters_args$cluster.name <- paste0("seurat_clusters.", resolution)
|
|
159
|
-
sobj <- do_call(FindClusters, findclusters_args)
|
|
160
|
-
|
|
161
|
-
for (clname in findclusters_args$cluster.name) {
|
|
162
|
-
sobj@meta.data[[clname]] <- recode_clusters(sobj@meta.data[[clname]])
|
|
163
|
-
}
|
|
164
|
-
sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
|
|
165
|
-
Idents(sobj) <- "seurat_clusters"
|
|
166
|
-
|
|
167
|
-
ident_table <- table(Idents(sobj))
|
|
168
|
-
log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
|
|
169
|
-
print(ident_table)
|
|
170
|
-
cat("\n")
|
|
171
|
-
|
|
172
|
-
cached$data <- list(
|
|
173
|
-
clusters = sobj@meta.data[, c(findclusters_args$cluster.name, "seurat_clusters"), drop = FALSE],
|
|
174
|
-
commands = sobj@commands
|
|
175
|
-
)
|
|
176
|
-
save_to_cache(cached, "FindClusters", cache_dir)
|
|
177
|
-
} else {
|
|
178
|
-
log_info("Loading cached FindClusters ...")
|
|
179
|
-
|
|
180
|
-
sobj <- AddMetaData(sobj, metadata = cached$data$clusters)
|
|
181
|
-
Idents(sobj) <- "seurat_clusters"
|
|
182
|
-
sobj@commands <- cached$data$commands
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
sobj
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
run_prepsctfindmarkers <- function(sobj) {
|
|
189
|
-
if (DefaultAssay(sobj) == "SCT") {
|
|
190
|
-
cached <- get_cached(list(sobj = sobj), "PrepSCTFindMarkers", cache_dir)
|
|
191
|
-
if (is.null(cached$data)) {
|
|
192
|
-
# https://github.com/satijalab/seurat/issues/6968
|
|
193
|
-
log_info("Running PrepSCTFindMarkers ...")
|
|
194
|
-
sobj <- PrepSCTFindMarkers(sobj)
|
|
195
|
-
# compose a new SeuratCommand to record it to sobj@commands
|
|
196
|
-
scommand <- sobj@commands$FindClusters
|
|
197
|
-
scommand@name <- "PrepSCTFindMarkers"
|
|
198
|
-
scommand@time.stamp <- Sys.time()
|
|
199
|
-
scommand@assay.used <- "SCT"
|
|
200
|
-
scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
|
|
201
|
-
scommand@params <- list()
|
|
202
|
-
sobj@commands$PrepSCTFindMarkers <- scommand
|
|
203
|
-
|
|
204
|
-
cached$data <- sobj
|
|
205
|
-
save_to_cache(cached, "PrepSCTFindMarkers", cache_dir)
|
|
206
|
-
} else {
|
|
207
|
-
log_info("Loading cached PrepSCTFindMarkers ...")
|
|
208
|
-
sobj <- cached$data
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
sobj
|
|
213
|
-
}
|
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
|
|
3
|
-
|
|
4
|
-
library(parallel)
|
|
5
|
-
library(scater)
|
|
6
|
-
library(Seurat)
|
|
7
|
-
|
|
8
|
-
sobjfile <- {{ in.sobjfile | r }}
|
|
9
|
-
outdir <- {{ out.outdir | r }}
|
|
10
|
-
joboutdir <- {{ job.outdir | r }}
|
|
11
|
-
gmtfile <- {{ envs.gmtfile | r }}
|
|
12
|
-
ncores <- {{ envs.ncores | r }}
|
|
13
|
-
fgsea <- {{ envs.fgsea | r }}
|
|
14
|
-
top <- {{ envs.top | r }}
|
|
15
|
-
prerank_method <- {{ envs.prerank_method | r }}
|
|
16
|
-
grouping <- {{ envs.grouping | r }}
|
|
17
|
-
grouping_prefix <- {{ envs.grouping_prefix | r }}
|
|
18
|
-
subsetting_cols <- {{ envs.subsetting | r }}
|
|
19
|
-
subsetting_prefix <- {{ envs.subsetting_prefix | r }}
|
|
20
|
-
subsetting_comparison <- {{ envs.subsetting_comparison | r }}
|
|
21
|
-
|
|
22
|
-
if (!is.null(grouping_prefix) && nchar(grouping_prefix) > 0) {
|
|
23
|
-
grouping_prefix = paste0(grouping_prefix, "_")
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
if (!is.null(subsetting_prefix) && nchar(subsetting_prefix) > 0) {
|
|
27
|
-
subsetting_prefix = paste0(subsetting_prefix, "_")
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
set.seed(8525)
|
|
31
|
-
|
|
32
|
-
## gmt_pathways is copied from fgsea package.
|
|
33
|
-
gmt_pathways <- function(gmt_file) {
|
|
34
|
-
pathway_lines <- strsplit(readLines(gmt_file), "\t")
|
|
35
|
-
pathways <- lapply(pathway_lines, tail, -2)
|
|
36
|
-
names(pathways) <- sapply(pathway_lines, head, 1)
|
|
37
|
-
pathways
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
gmtfile <- localizeGmtfile(gmtfile)
|
|
41
|
-
pathways <- gmt_pathways(gmtfile)
|
|
42
|
-
metabolics <- unique(as.vector(unname(unlist(pathways))))
|
|
43
|
-
sobj <- readRDS(sobjfile)
|
|
44
|
-
|
|
45
|
-
do_one_comparison <- function(
|
|
46
|
-
obj,
|
|
47
|
-
compname,
|
|
48
|
-
genes,
|
|
49
|
-
case,
|
|
50
|
-
control,
|
|
51
|
-
groupdir,
|
|
52
|
-
subset_col,
|
|
53
|
-
subset_prefix,
|
|
54
|
-
groupname
|
|
55
|
-
) {
|
|
56
|
-
log_info(paste(" Design: {compname} ({case}, {control})"))
|
|
57
|
-
case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
|
|
58
|
-
case_obj = tryCatch({
|
|
59
|
-
eval(parse(text = case_code))
|
|
60
|
-
}, error = function(e) {
|
|
61
|
-
NULL
|
|
62
|
-
})
|
|
63
|
-
if (is.null(case_obj)) {
|
|
64
|
-
log_warn(" Skip (not enough cells in case)")
|
|
65
|
-
return (NULL)
|
|
66
|
-
}
|
|
67
|
-
control_code = paste0("subset(obj, subset = ", subset_col, " == '", control, "')")
|
|
68
|
-
control_obj = tryCatch({
|
|
69
|
-
eval(parse(text = control_code))
|
|
70
|
-
}, error = function(e) {
|
|
71
|
-
NULL
|
|
72
|
-
})
|
|
73
|
-
if (is.null(control_obj)) {
|
|
74
|
-
log_warn(" Skip (not enough cells in control)")
|
|
75
|
-
add_report(
|
|
76
|
-
list(kind = "error", content = "Not enough cells in control"),
|
|
77
|
-
h1 = groupname,
|
|
78
|
-
h2 = compname
|
|
79
|
-
)
|
|
80
|
-
return (NULL)
|
|
81
|
-
}
|
|
82
|
-
exprs_case = GetAssayData(case_obj)[genes, , drop = FALSE]
|
|
83
|
-
exprs_control = GetAssayData(control_obj)[genes, , drop = FALSE]
|
|
84
|
-
|
|
85
|
-
odir = file.path(groupdir, paste0(subset_prefix, compname))
|
|
86
|
-
dir.create(odir, showWarnings = FALSE)
|
|
87
|
-
if (ncol(exprs_case) < 5 || ncol(exprs_control) < 5) {
|
|
88
|
-
log_warn(" Skipped (not enough cells).")
|
|
89
|
-
wfile <- file.path(odir, "warning.txt")
|
|
90
|
-
write("Skipped (not enough cells)\n\n", file = wfile)
|
|
91
|
-
write(paste0("n_cells (Case):", ncol(exprs_case)), file = wfile, append = TRUE)
|
|
92
|
-
write(paste0("n_cells (Control):", ncol(exprs_control)), file = wfile, append = TRUE)
|
|
93
|
-
|
|
94
|
-
return(list(
|
|
95
|
-
list(kind = "error", content = "Not enough cells"),
|
|
96
|
-
h1 = groupname,
|
|
97
|
-
h2 = compname
|
|
98
|
-
))
|
|
99
|
-
}
|
|
100
|
-
if (fgsea) {
|
|
101
|
-
ranks = prerank(
|
|
102
|
-
cbind(exprs_case, exprs_control),
|
|
103
|
-
case,
|
|
104
|
-
control,
|
|
105
|
-
c(rep(case, ncol(exprs_case)), rep(control, ncol(exprs_control))),
|
|
106
|
-
method = prerank_method
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
runFGSEA(
|
|
110
|
-
ranks,
|
|
111
|
-
gmtfile,
|
|
112
|
-
top = top,
|
|
113
|
-
outdir = odir,
|
|
114
|
-
envs = list(nproc = 1)
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
report = list(
|
|
118
|
-
list(kind = "fgsea", dir = odir),
|
|
119
|
-
h1 = groupname,
|
|
120
|
-
h2 = compname
|
|
121
|
-
)
|
|
122
|
-
} else {
|
|
123
|
-
runGSEA(
|
|
124
|
-
cbind(exprs_case, exprs_control),
|
|
125
|
-
c(rep(case, ncol(exprs_case)), rep(control, ncol(exprs_control))),
|
|
126
|
-
gmtfile,
|
|
127
|
-
odir
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
report = list()
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
report
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
do_one_group <- function(group) {
|
|
137
|
-
log_info("- Group: {group} ...")
|
|
138
|
-
|
|
139
|
-
genes = intersect(metabolics, rownames(sobj))
|
|
140
|
-
group_code = paste0(
|
|
141
|
-
"subset(sobj, subset = ", grouping, " == '", group, "')"
|
|
142
|
-
)
|
|
143
|
-
obj = eval(parse(text = group_code))
|
|
144
|
-
groupname = paste0(grouping_prefix, group)
|
|
145
|
-
groupdir = file.path(outdir, slugify(groupname))
|
|
146
|
-
dir.create(groupdir, showWarnings = FALSE)
|
|
147
|
-
|
|
148
|
-
report = list()
|
|
149
|
-
for (i in seq_along(subsetting_comparison)) {
|
|
150
|
-
sci = subsetting_comparison[[i]]
|
|
151
|
-
if (is.null(sci) || length(sci) == 0) {
|
|
152
|
-
next
|
|
153
|
-
}
|
|
154
|
-
rs = lapply(
|
|
155
|
-
names(sci),
|
|
156
|
-
function(compname) {
|
|
157
|
-
do_one_comparison(
|
|
158
|
-
obj,
|
|
159
|
-
compname,
|
|
160
|
-
genes,
|
|
161
|
-
sci[[compname]][1],
|
|
162
|
-
sci[[compname]][2],
|
|
163
|
-
groupdir,
|
|
164
|
-
subsetting_cols[i],
|
|
165
|
-
subsetting_prefix[i],
|
|
166
|
-
groupname
|
|
167
|
-
)
|
|
168
|
-
}
|
|
169
|
-
)
|
|
170
|
-
if (length(rs) > 0) {
|
|
171
|
-
report = c(report, rs)
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
report
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
groups = sort(as.character(unique(sobj@meta.data[[grouping]])))
|
|
178
|
-
if (ncores == 1) {
|
|
179
|
-
x = lapply(groups, do_one_group)
|
|
180
|
-
} else {
|
|
181
|
-
x = mclapply(groups, do_one_group, mc.cores = ncores)
|
|
182
|
-
if (any(unlist(lapply(x, class)) == "try-error")) {
|
|
183
|
-
stop("mclapply error")
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
report = unlist(x, recursive = FALSE)
|
|
187
|
-
for (r in report) {
|
|
188
|
-
if (!is.null(r)) {
|
|
189
|
-
do.call(add_report, r)
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
save_report(joboutdir)
|
biopipen/utils/caching.R
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
library(digest)
|
|
2
|
-
|
|
3
|
-
#' Get signatures and cached data
|
|
4
|
-
#'
|
|
5
|
-
#' @param x An object to infer signature from
|
|
6
|
-
#' @param kind A string indicating the kind of the object
|
|
7
|
-
#' Used as part of the filename of the cached file
|
|
8
|
-
#' @param cache_dir A string indicating the directory to store cached files
|
|
9
|
-
#'
|
|
10
|
-
#' @return A list containing the signature, digested signature and cached data
|
|
11
|
-
get_cached <- function(x, kind, cache_dir) {
|
|
12
|
-
if (is.null(cache_dir) || isFALSE(cache_dir)) {
|
|
13
|
-
return(list(sig = NULL, dig = NULL, data = NULL))
|
|
14
|
-
}
|
|
15
|
-
# Get signature of an object
|
|
16
|
-
sig <- capture.output(str(x))
|
|
17
|
-
dig <- digest::digest(sig, algo = "md5")
|
|
18
|
-
dig <- substr(dig, 1, 8)
|
|
19
|
-
cached_file <- file.path(cache_dir, paste0(dig, ".", kind, ".RDS"))
|
|
20
|
-
if (!file.exists(cached_file)) {
|
|
21
|
-
return(list(sig = sig, dig = dig, data = NULL))
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
list(sig = sig, dig = dig, data = readRDS(cached_file))
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
#' Save an object to cache
|
|
28
|
-
#'
|
|
29
|
-
#' @param to_cache An list to cache,
|
|
30
|
-
#' including the signature, digested signature and data
|
|
31
|
-
#' @param kind A string indicating the kind of the object
|
|
32
|
-
#' Used as part of the filename of the cached file
|
|
33
|
-
#' @param cache_dir A string indicating the directory to store cached files
|
|
34
|
-
save_to_cache <- function(to_cache, kind, cache_dir) {
|
|
35
|
-
if (is.null(cache_dir) || isFALSE(cache_dir)) { return() }
|
|
36
|
-
dig <- to_cache$dig
|
|
37
|
-
sig <- to_cache$sig
|
|
38
|
-
data <- to_cache$data
|
|
39
|
-
# Save an object to cache
|
|
40
|
-
sig_file <- file.path(cache_dir, paste0(dig, ".", kind , ".signature.txt"))
|
|
41
|
-
writeLines(c(as.character(Sys.time()), "", sig), sig_file)
|
|
42
|
-
cached_file <- file.path(cache_dir, paste0(dig, ".", kind, ".RDS"))
|
|
43
|
-
saveRDS(data, cached_file)
|
|
44
|
-
}
|
biopipen/utils/gene.R
DELETED
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
suppressPackageStartupMessages({
|
|
2
|
-
library(rlang)
|
|
3
|
-
library(dplyr)
|
|
4
|
-
library(mygene)
|
|
5
|
-
})
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
#@' Convert gene names between different formats
|
|
9
|
-
#@'
|
|
10
|
-
#@' @param genes A character/integer vector of gene names/ids
|
|
11
|
-
#@' @param species A character vector of species names
|
|
12
|
-
#@' @param infmt A character vector of input gene name formats
|
|
13
|
-
#@' See the available scopes at
|
|
14
|
-
#@' https://docs.mygene.info/en/latest/doc/data.html#available-fields
|
|
15
|
-
#@' You can use ensg as a shortcut for ensembl.gene
|
|
16
|
-
#@' @param outfmt A character vector of output gene name formats
|
|
17
|
-
#@' @param dup How to deal with duplicate gene names found.
|
|
18
|
-
#@' "first": keep the first one (default), sorted by score descendingly
|
|
19
|
-
#@' "last": keep the last one, sorted by score descendingly
|
|
20
|
-
#@' "all": keep all of them, each will be a separate row
|
|
21
|
-
#@' "<X>": combine them into a single string, separated by X
|
|
22
|
-
#@' @param notfound How to deal with gene names that are not found
|
|
23
|
-
#@' "error": stop with an error message
|
|
24
|
-
#@' "use-query": use the query gene name as the converted gene name
|
|
25
|
-
#@' "skip": skip the gene names that are not found
|
|
26
|
-
#@' "ignore": Same as "skip"
|
|
27
|
-
#@' "na": use NA as the converted gene name (default)
|
|
28
|
-
#@' @param suppress_messages Whether to suppress the warning messages
|
|
29
|
-
#@' @return A tibble with the query gene names and the converted gene names
|
|
30
|
-
#@' When a gene name is not found, the converted name will be NA
|
|
31
|
-
#@' When duplicate gene names are found, the one with the highest score will be kept
|
|
32
|
-
#@' @export
|
|
33
|
-
gene_name_conversion <- function(
|
|
34
|
-
genes,
|
|
35
|
-
infmt,
|
|
36
|
-
outfmt,
|
|
37
|
-
dup = "first",
|
|
38
|
-
species = "human",
|
|
39
|
-
notfound = "na",
|
|
40
|
-
suppress_messages = FALSE
|
|
41
|
-
) {
|
|
42
|
-
notfound <- arg_match(notfound, c("error", "use-query", "skip", "ignore", "na"))
|
|
43
|
-
|
|
44
|
-
if (infmt %in% c("ensg", "ensmusg")) { infmt = "ensembl.gene" }
|
|
45
|
-
if (outfmt %in% c("ensg", "ensmusg")) { outfmt = "ensembl.gene" }
|
|
46
|
-
|
|
47
|
-
orig_genes <- genes
|
|
48
|
-
if (infmt == "ensembl.gene") {
|
|
49
|
-
# Remove version numbers from ensembl gene ids
|
|
50
|
-
genes <- gsub("\\..*", "", genes)
|
|
51
|
-
}
|
|
52
|
-
query_df <- tibble(query = genes, orig = orig_genes)
|
|
53
|
-
|
|
54
|
-
if (suppress_messages) {
|
|
55
|
-
capture.output(suppressWarnings(suppressMessages({
|
|
56
|
-
out <- queryMany(genes, scopes=infmt, fields=outfmt, species=species) %>%
|
|
57
|
-
as_tibble()
|
|
58
|
-
})))
|
|
59
|
-
} else {
|
|
60
|
-
out <- queryMany(genes, scopes=infmt, fields=outfmt, species=species) %>%
|
|
61
|
-
as_tibble()
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
if (nrow(out) == 0) {
|
|
65
|
-
return(tibble(query = orig_genes, converted = NA_character_))
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
if (dup == "first") {
|
|
69
|
-
out = out %>% group_by(query) %>% arrange(desc(X_score)) %>%
|
|
70
|
-
slice_head(n=1) %>% ungroup() %>% dplyr::select(all_of(c("query", outfmt)))
|
|
71
|
-
} else if (dup == "last") {
|
|
72
|
-
out = out %>% group_by(query) %>% arrange(X_score) %>%
|
|
73
|
-
slice_head(n=1) %>% ungroup() %>% dplyr::select(all_of(c("query", outfmt)))
|
|
74
|
-
} else if (dup != "all") {
|
|
75
|
-
out = out %>% group_by(query) %>% arrange(desc(X_score)) %>%
|
|
76
|
-
summarise(!!sym(outfmt) := paste(unique(!!sym(outfmt)), collapse=dup))
|
|
77
|
-
}
|
|
78
|
-
out <- query_df %>%
|
|
79
|
-
left_join(out, by="query") %>%
|
|
80
|
-
dplyr::select(-"query") %>%
|
|
81
|
-
dplyr::select(query = orig, everything())
|
|
82
|
-
|
|
83
|
-
if (notfound == "error") {
|
|
84
|
-
if (any(is.na(out[[outfmt]]))) {
|
|
85
|
-
nagenes = out %>% filter(is.na(.[[outfmt]])) %>% pull("query")
|
|
86
|
-
stop(paste("Query genes not found:", paste(nagenes, collapse=",")))
|
|
87
|
-
}
|
|
88
|
-
} else if (notfound == "use-query") {
|
|
89
|
-
out = out %>% mutate(!!sym(outfmt) := coalesce(!!sym(outfmt), query))
|
|
90
|
-
} else if (notfound == "skip" || notfound == "ignore") {
|
|
91
|
-
out = out %>% filter(!is.na(!!sym(outfmt)))
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
return(out)
|
|
95
|
-
}
|