biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +307 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +14 -2
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  73. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  74. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  75. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  76. biopipen/scripts/scrna/RadarPlots.R +1 -1
  77. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  78. biopipen/scripts/scrna/ScSimulation.R +11 -10
  79. biopipen/scripts/scrna/ScVelo.py +605 -0
  80. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  81. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  82. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  83. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  84. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  85. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  86. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  87. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  88. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  89. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  90. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  91. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  92. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  93. biopipen/scripts/scrna/Subset10X.R +2 -2
  94. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  95. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  96. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  99. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  100. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  101. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  102. biopipen/scripts/snp/PlinkFreq.R +34 -41
  103. biopipen/scripts/snp/PlinkHWE.R +23 -18
  104. biopipen/scripts/snp/PlinkHet.R +26 -22
  105. biopipen/scripts/snp/PlinkIBD.R +30 -34
  106. biopipen/scripts/stats/ChowTest.R +9 -8
  107. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  108. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  109. biopipen/scripts/stats/Mediation.R +8 -8
  110. biopipen/scripts/stats/MetaPvalue.R +11 -13
  111. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  112. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  113. biopipen/scripts/tcr/ClonalStats.R +5 -4
  114. biopipen/scripts/tcr/CloneResidency.R +3 -3
  115. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  116. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  117. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  118. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  119. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  120. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  121. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  122. biopipen/scripts/tcr/TCRClustering.R +86 -97
  123. biopipen/scripts/tcr/TESSA.R +65 -115
  124. biopipen/scripts/tcr/VJUsage.R +5 -5
  125. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  126. biopipen/utils/common_docstrs.py +66 -63
  127. biopipen/utils/reporter.py +177 -0
  128. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  129. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
  130. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  131. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,110 +0,0 @@
1
- {% from "utils/misc.liq" import table_of_images -%}
2
-
3
- {%- macro fgsea_report_script() -%}
4
- import { Image, DataTable } from "$libs";
5
- {%- endmacro -%}
6
-
7
- {%- macro fgsea_report(fgsea_dir, h, envs, nrows=100) -%}
8
- {%- addfilter splitgenes -%}
9
- def splitgenes(data):
10
- for dat in data:
11
- dat["leadingEdge"] = dat["leadingEdge"].replace(",", " ")
12
- return json_dumps(data)
13
- {%- endaddfilter -%}
14
-
15
- <h{{h}}>Enrichment table</h{{h}}>
16
- <Image src={{ fgsea_dir | joinpaths: "gsea_table.png" | quote }} />
17
-
18
- {% set data = fgsea_dir | joinpaths: "fgsea.txt" | datatable: sep="\t", nrows=nrows | json_loads %}
19
-
20
- <h{{h}}>Enrichment pathways</h{{h}}>
21
- <DataTable src={{ fgsea_dir | joinpaths: "fgsea.txt" | quote }}
22
- data={ {{ data | splitgenes: }} }
23
- pageSize={10} />
24
-
25
- <h{{h}}>Enrichment plot of pathways</h{{h}}>
26
- {%- python -%}
27
- import os
28
- def fgsea_plots(pathways, fgsea_dir):
29
- out = []
30
- for pathway in pathways:
31
- pathway = pathway.replace("/", "-")
32
- pwfig = joinpaths(fgsea_dir, f"fgsea_{pathway}.png")
33
- if os.path.exists(pwfig):
34
- out.append(pwfig)
35
- return out
36
- {%- endpython -%}
37
- {{ table_of_images(
38
- fgsea_plots(liquid_map(data, "pathway"), fgsea_dir),
39
- liquid_map(data, "pathway"),
40
- table_width=75
41
- ) }}
42
-
43
- {%- endmacro -%}
44
-
45
-
46
- {%- macro gsea_report(gsea_dir, h, envs, nrows=100) -%}
47
- <h{{h}}>Global view</h{{h}}>
48
-
49
- <embed src={{gsea_dir | joinpaths: "*.global.plots.pdf" | glob | first | quote}}
50
- width="100%"
51
- height="1000"
52
- type="application/pdf" />
53
-
54
- <h{{h}}>Summary</h{{h}}>
55
- {% for sumfile in gsea_dir | joinpaths: "*.SUMMARY.RESULTS.REPORT.*.txt" | glob %}
56
- {% set klass = stem(sumfile).split(".")[-1] %}
57
- <h{{h+1}}>{{klass}}</h{{h+1}}>
58
- <DataTable data={ {{sumfile | datatable: sep="\t", nrows=nrows}} } />
59
- {% endfor %}
60
-
61
- <h{{h}}>Enrichment details</h{{h}}>
62
- {% set cutoff = envs.get("fdr.q.val.threshold", envs.get("fdr_q_val_threshold", 0.25)) %}
63
- {% for sumfile in gsea_dir | joinpaths: "*.SUMMARY.RESULTS.REPORT.*.txt" | glob %}
64
- {% set klass = stem(sumfile).split(".")[-1] %}
65
- <h{{h+1}}>{{klass}}</h{{h+1}}>
66
- {% set sumdata = sumfile | datatable: sep="\t" | json_loads %}
67
- {% set has_signif = [] %}
68
- {% for row in sumdata %}
69
- {% if row["FDR_q_val"] < cutoff %}
70
- {% set _ = has_signif.append(1) %}
71
- <embed src={{gsea_dir | joinpaths: "*." + row["GS"] + ".plot." + klass + ".*.pdf" | glob | first | quote}}
72
- width="100%"
73
- height="700"
74
- type="application/pdf" />
75
- {% endif %}
76
- {% endfor %}
77
- {% if len(has_signif) == 0 %}
78
- <Tile>No significantly (FDR_q_val &lt; {{cutoff}}) enriched pathways found.</Tile>
79
- {% endif %}
80
- {% endfor %}
81
-
82
- {%- endmacro -%}
83
-
84
-
85
- {%- macro enrichr_report_script() -%}
86
- import { Image, DataTable } from "$libs";
87
- import { Tabs, Tab, TabContent, InlineNotification } from "$ccs";
88
- {%- endmacro -%}
89
-
90
- {%- macro enrichr_report(enrichr_dir) -%}
91
- <Tabs>
92
- {% for enrtxt in enrichr_dir | glob: "Enrichr-*.txt" %}
93
- {% set db = enrtxt | stem | replace: "Enrichr-", "" %}
94
- <Tab label="{{db}}" title="{{db}}" />
95
- {% endfor %}
96
- <div slot="content">
97
- {% for enrtxt in enrichr_dir | glob: "Enrichr-*.txt" %}
98
- {% set db = enrtxt | stem | replace: "Enrichr-", "" %}
99
- <TabContent>
100
- <Image src={{enrichr_dir | joinpaths: "Enrichr-" + db + ".png" | quote}} />
101
- <DataTable
102
- src={{ enrtxt | quote }}
103
- data={ {{ enrtxt | datatable: sep="\t", nrows=100 }} }
104
- />
105
- </TabContent>
106
- {% endfor %}
107
- </div>
108
- </Tabs>
109
- {%- endmacro -%}
110
-
@@ -1,10 +0,0 @@
1
- merge_clusters_with_same_labels <- function(sobj, newcol) {
2
- if (is.null(newcol)) {
3
- sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
4
- Idents(sobj) <- "seurat_clusters"
5
- } else {
6
- sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
7
- }
8
-
9
- sobj
10
- }
@@ -1,213 +0,0 @@
1
-
2
- expand_dims <- function(args, name = "dims") {
3
- # Expand dims from 30 to 1:30
4
- if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
5
- args[[name]] <- 1:args[[name]]
6
- }
7
- args
8
- }
9
-
10
- expand_resolution <- function(resolution) {
11
- expanded_res <- c()
12
- for (res in resolution) {
13
- if (is.numeric(res)) {
14
- expanded_res <- c(expanded_res, res)
15
- } else {
16
- # is.character
17
- parts <- trimws(unlist(strsplit(res, ",")))
18
- for (part in parts) {
19
- if (grepl(":", part)) {
20
- ps <- trimws(unlist(strsplit(part, ":")))
21
- if (length(ps) == 2) { ps <- c(ps, 0.1) }
22
- if (length(ps) != 3) {
23
- stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
24
- }
25
- ps <- as.numeric(ps)
26
- expanded_res <- c(expanded_res, seq(ps[1], ps[2], by = ps[3]))
27
- } else {
28
- expanded_res <- c(expanded_res, as.numeric(part))
29
- }
30
- }
31
- }
32
- }
33
- # keep the last resolution at last
34
- rev(unique(rev(round(expanded_res, 2))))
35
- }
36
-
37
- # recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
38
- recode_clusters <- function(clusters) {
39
- recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
40
- clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
41
- clusters
42
- }
43
-
44
- run_transformation <- function(sobj) {
45
- if (length(envs$ScaleData) == 0 && length(envs$SCTransform) == 0) {
46
- log_warn("Skipping ScaleData/SCTransform (neither specified) ...")
47
- return(sobj)
48
- }
49
- if (length(envs$ScaleData) > 0 && length(envs$SCTransform) > 0) {
50
- stop("Both envs.ScaleData and envs.SCTransform are specified. Please choose either.")
51
- }
52
- if (length(envs$ScaleData) > 0) {
53
- if (DefaultAssay(sobj) == "SCT") {
54
- stop("SCT assay detected, but envs.ScaleData is specified. Use envs.SCTransform instead.")
55
- }
56
- cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
57
- if (is.null(cached$data)) {
58
- log_info("Running ScaleData ...")
59
- sobj <- do_call(ScaleData, c(list(object = sobj), envs$ScaleData))
60
- cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
61
- save_to_cache(cached, "ScaleData", cache_dir)
62
- } else {
63
- log_info("Loading cached ScaleData ...")
64
- sobj@assays$RNA <- cached$data$assay
65
- sobj@commands <- cached$data$commands
66
- DefaultAssay(sobj) <- "RNA"
67
- }
68
- } else if (length(envs$SCTransform) > 0) {
69
- if (DefaultAssay(sobj) != "SCT") {
70
- stop("SCT assay not detected, but envs.SCTransform is specified. Use envs.ScaleData instead.")
71
- }
72
- cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
73
- asssay <- envs$SCTransform$new.assay.name %||% "SCT"
74
- if (is.null(cached$data)) {
75
- log_info("Running SCTransform ...")
76
- sobj <- do_call(SCTransform, c(list(object = sobj), envs$SCTransform))
77
- cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
78
- save_to_cache(cached, "SCTransform", cache_dir)
79
- } else {
80
- log_info("Loading cached SCTransform ...")
81
- sobj@assays[[assay]] <- cached$data$assay
82
- sobj@commands <- cached$data$commands
83
- DefaultAssay(sobj) <- assay
84
- }
85
- }
86
- sobj
87
- }
88
-
89
- run_umap <- function(sobj) {
90
- cached <- get_cached(
91
- list(sobj = sobj, RunUMAP = envs$RunUMAP),
92
- "RunUMAP",
93
- cache_dir
94
- )
95
- reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
96
- if (is.null(cached$data)) {
97
- log_info("Running RunUMAP ...")
98
- umap_args <- list_setdefault(
99
- envs$RunUMAP,
100
- object = sobj,
101
- dims = 1:30,
102
- reduction = sobj@misc$integrated_new_reduction %||% "pca"
103
- )
104
- ncells <- ncol(sobj)
105
- umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
106
- umap_method <- envs$RunUMAP$umap.method %||% "uwot"
107
- if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
108
- # https://github.com/satijalab/seurat/issues/4312
109
- umap_args$n.neighbors <- min(ncells - 1, 30)
110
- }
111
- sobj <- do_call(RunUMAP, umap_args)
112
- cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
113
- save_to_cache(cached, "RunUMAP", cache_dir)
114
- } else {
115
- log_info("Loading cached RunUMAP ...")
116
- sobj@reductions[[reduc_name]] <- cached$data$reduc
117
- sobj@commands <- cached$data$commands
118
- }
119
-
120
- sobj
121
- }
122
-
123
- run_findneighbors <- function(sobj) {
124
- cached <- get_cached(
125
- list(sobj = sobj, FindNeighbors = envs$FindNeighbors),
126
- "FindNeighbors",
127
- cache_dir
128
- )
129
- if (is.null(cached$data)) {
130
- log_info("Running FindNeighbors ...")
131
- envs$FindNeighbors$object <- sobj
132
- envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
133
- sobj <- do_call(FindNeighbors, envs$FindNeighbors)
134
- cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
135
- save_to_cache(cached, "FindNeighbors", cache_dir)
136
- } else {
137
- log_info("Loading cached FindNeighbors ...")
138
- sobj@graphs <- cached$data$graphs
139
- sobj@commands <- cached$data$commands
140
- }
141
-
142
- sobj
143
- }
144
-
145
- run_findclusters <- function(sobj) {
146
- cached <- get_cached(
147
- list(sobj = sobj, FindClusters = envs$FindClusters),
148
- "FindClusters",
149
- cache_dir
150
- )
151
- if (is.null(cached$data)) {
152
- findclusters_args <- envs$FindClusters
153
- findclusters_args$random.seed <- findclusters_args$random.seed %||% 8525
154
- resolution <- findclusters_args$resolution <- expand_resolution(findclusters_args$resolution %||% 0.8)
155
- log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
156
-
157
- findclusters_args$object <- sobj
158
- findclusters_args$cluster.name <- paste0("seurat_clusters.", resolution)
159
- sobj <- do_call(FindClusters, findclusters_args)
160
-
161
- for (clname in findclusters_args$cluster.name) {
162
- sobj@meta.data[[clname]] <- recode_clusters(sobj@meta.data[[clname]])
163
- }
164
- sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
165
- Idents(sobj) <- "seurat_clusters"
166
-
167
- ident_table <- table(Idents(sobj))
168
- log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
169
- print(ident_table)
170
- cat("\n")
171
-
172
- cached$data <- list(
173
- clusters = sobj@meta.data[, c(findclusters_args$cluster.name, "seurat_clusters"), drop = FALSE],
174
- commands = sobj@commands
175
- )
176
- save_to_cache(cached, "FindClusters", cache_dir)
177
- } else {
178
- log_info("Loading cached FindClusters ...")
179
-
180
- sobj <- AddMetaData(sobj, metadata = cached$data$clusters)
181
- Idents(sobj) <- "seurat_clusters"
182
- sobj@commands <- cached$data$commands
183
- }
184
-
185
- sobj
186
- }
187
-
188
- run_prepsctfindmarkers <- function(sobj) {
189
- if (DefaultAssay(sobj) == "SCT") {
190
- cached <- get_cached(list(sobj = sobj), "PrepSCTFindMarkers", cache_dir)
191
- if (is.null(cached$data)) {
192
- # https://github.com/satijalab/seurat/issues/6968
193
- log_info("Running PrepSCTFindMarkers ...")
194
- sobj <- PrepSCTFindMarkers(sobj)
195
- # compose a new SeuratCommand to record it to sobj@commands
196
- scommand <- sobj@commands$FindClusters
197
- scommand@name <- "PrepSCTFindMarkers"
198
- scommand@time.stamp <- Sys.time()
199
- scommand@assay.used <- "SCT"
200
- scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
201
- scommand@params <- list()
202
- sobj@commands$PrepSCTFindMarkers <- scommand
203
-
204
- cached$data <- sobj
205
- save_to_cache(cached, "PrepSCTFindMarkers", cache_dir)
206
- } else {
207
- log_info("Loading cached PrepSCTFindMarkers ...")
208
- sobj <- cached$data
209
- }
210
- }
211
-
212
- sobj
213
- }
@@ -1,193 +0,0 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
3
-
4
- library(parallel)
5
- library(scater)
6
- library(Seurat)
7
-
8
- sobjfile <- {{ in.sobjfile | r }}
9
- outdir <- {{ out.outdir | r }}
10
- joboutdir <- {{ job.outdir | r }}
11
- gmtfile <- {{ envs.gmtfile | r }}
12
- ncores <- {{ envs.ncores | r }}
13
- fgsea <- {{ envs.fgsea | r }}
14
- top <- {{ envs.top | r }}
15
- prerank_method <- {{ envs.prerank_method | r }}
16
- grouping <- {{ envs.grouping | r }}
17
- grouping_prefix <- {{ envs.grouping_prefix | r }}
18
- subsetting_cols <- {{ envs.subsetting | r }}
19
- subsetting_prefix <- {{ envs.subsetting_prefix | r }}
20
- subsetting_comparison <- {{ envs.subsetting_comparison | r }}
21
-
22
- if (!is.null(grouping_prefix) && nchar(grouping_prefix) > 0) {
23
- grouping_prefix = paste0(grouping_prefix, "_")
24
- }
25
-
26
- if (!is.null(subsetting_prefix) && nchar(subsetting_prefix) > 0) {
27
- subsetting_prefix = paste0(subsetting_prefix, "_")
28
- }
29
-
30
- set.seed(8525)
31
-
32
- ## gmt_pathways is copied from fgsea package.
33
- gmt_pathways <- function(gmt_file) {
34
- pathway_lines <- strsplit(readLines(gmt_file), "\t")
35
- pathways <- lapply(pathway_lines, tail, -2)
36
- names(pathways) <- sapply(pathway_lines, head, 1)
37
- pathways
38
- }
39
-
40
- gmtfile <- localizeGmtfile(gmtfile)
41
- pathways <- gmt_pathways(gmtfile)
42
- metabolics <- unique(as.vector(unname(unlist(pathways))))
43
- sobj <- readRDS(sobjfile)
44
-
45
- do_one_comparison <- function(
46
- obj,
47
- compname,
48
- genes,
49
- case,
50
- control,
51
- groupdir,
52
- subset_col,
53
- subset_prefix,
54
- groupname
55
- ) {
56
- log_info(paste(" Design: {compname} ({case}, {control})"))
57
- case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
58
- case_obj = tryCatch({
59
- eval(parse(text = case_code))
60
- }, error = function(e) {
61
- NULL
62
- })
63
- if (is.null(case_obj)) {
64
- log_warn(" Skip (not enough cells in case)")
65
- return (NULL)
66
- }
67
- control_code = paste0("subset(obj, subset = ", subset_col, " == '", control, "')")
68
- control_obj = tryCatch({
69
- eval(parse(text = control_code))
70
- }, error = function(e) {
71
- NULL
72
- })
73
- if (is.null(control_obj)) {
74
- log_warn(" Skip (not enough cells in control)")
75
- add_report(
76
- list(kind = "error", content = "Not enough cells in control"),
77
- h1 = groupname,
78
- h2 = compname
79
- )
80
- return (NULL)
81
- }
82
- exprs_case = GetAssayData(case_obj)[genes, , drop = FALSE]
83
- exprs_control = GetAssayData(control_obj)[genes, , drop = FALSE]
84
-
85
- odir = file.path(groupdir, paste0(subset_prefix, compname))
86
- dir.create(odir, showWarnings = FALSE)
87
- if (ncol(exprs_case) < 5 || ncol(exprs_control) < 5) {
88
- log_warn(" Skipped (not enough cells).")
89
- wfile <- file.path(odir, "warning.txt")
90
- write("Skipped (not enough cells)\n\n", file = wfile)
91
- write(paste0("n_cells (Case):", ncol(exprs_case)), file = wfile, append = TRUE)
92
- write(paste0("n_cells (Control):", ncol(exprs_control)), file = wfile, append = TRUE)
93
-
94
- return(list(
95
- list(kind = "error", content = "Not enough cells"),
96
- h1 = groupname,
97
- h2 = compname
98
- ))
99
- }
100
- if (fgsea) {
101
- ranks = prerank(
102
- cbind(exprs_case, exprs_control),
103
- case,
104
- control,
105
- c(rep(case, ncol(exprs_case)), rep(control, ncol(exprs_control))),
106
- method = prerank_method
107
- )
108
-
109
- runFGSEA(
110
- ranks,
111
- gmtfile,
112
- top = top,
113
- outdir = odir,
114
- envs = list(nproc = 1)
115
- )
116
-
117
- report = list(
118
- list(kind = "fgsea", dir = odir),
119
- h1 = groupname,
120
- h2 = compname
121
- )
122
- } else {
123
- runGSEA(
124
- cbind(exprs_case, exprs_control),
125
- c(rep(case, ncol(exprs_case)), rep(control, ncol(exprs_control))),
126
- gmtfile,
127
- odir
128
- )
129
-
130
- report = list()
131
- }
132
-
133
- report
134
- }
135
-
136
- do_one_group <- function(group) {
137
- log_info("- Group: {group} ...")
138
-
139
- genes = intersect(metabolics, rownames(sobj))
140
- group_code = paste0(
141
- "subset(sobj, subset = ", grouping, " == '", group, "')"
142
- )
143
- obj = eval(parse(text = group_code))
144
- groupname = paste0(grouping_prefix, group)
145
- groupdir = file.path(outdir, slugify(groupname))
146
- dir.create(groupdir, showWarnings = FALSE)
147
-
148
- report = list()
149
- for (i in seq_along(subsetting_comparison)) {
150
- sci = subsetting_comparison[[i]]
151
- if (is.null(sci) || length(sci) == 0) {
152
- next
153
- }
154
- rs = lapply(
155
- names(sci),
156
- function(compname) {
157
- do_one_comparison(
158
- obj,
159
- compname,
160
- genes,
161
- sci[[compname]][1],
162
- sci[[compname]][2],
163
- groupdir,
164
- subsetting_cols[i],
165
- subsetting_prefix[i],
166
- groupname
167
- )
168
- }
169
- )
170
- if (length(rs) > 0) {
171
- report = c(report, rs)
172
- }
173
- }
174
- report
175
- }
176
-
177
- groups = sort(as.character(unique(sobj@meta.data[[grouping]])))
178
- if (ncores == 1) {
179
- x = lapply(groups, do_one_group)
180
- } else {
181
- x = mclapply(groups, do_one_group, mc.cores = ncores)
182
- if (any(unlist(lapply(x, class)) == "try-error")) {
183
- stop("mclapply error")
184
- }
185
- }
186
- report = unlist(x, recursive = FALSE)
187
- for (r in report) {
188
- if (!is.null(r)) {
189
- do.call(add_report, r)
190
- }
191
- }
192
-
193
- save_report(joboutdir)
biopipen/utils/caching.R DELETED
@@ -1,44 +0,0 @@
1
- library(digest)
2
-
3
- #' Get signatures and cached data
4
- #'
5
- #' @param x An object to infer signature from
6
- #' @param kind A string indicating the kind of the object
7
- #' Used as part of the filename of the cached file
8
- #' @param cache_dir A string indicating the directory to store cached files
9
- #'
10
- #' @return A list containing the signature, digested signature and cached data
11
- get_cached <- function(x, kind, cache_dir) {
12
- if (is.null(cache_dir) || isFALSE(cache_dir)) {
13
- return(list(sig = NULL, dig = NULL, data = NULL))
14
- }
15
- # Get signature of an object
16
- sig <- capture.output(str(x))
17
- dig <- digest::digest(sig, algo = "md5")
18
- dig <- substr(dig, 1, 8)
19
- cached_file <- file.path(cache_dir, paste0(dig, ".", kind, ".RDS"))
20
- if (!file.exists(cached_file)) {
21
- return(list(sig = sig, dig = dig, data = NULL))
22
- }
23
-
24
- list(sig = sig, dig = dig, data = readRDS(cached_file))
25
- }
26
-
27
- #' Save an object to cache
28
- #'
29
- #' @param to_cache An list to cache,
30
- #' including the signature, digested signature and data
31
- #' @param kind A string indicating the kind of the object
32
- #' Used as part of the filename of the cached file
33
- #' @param cache_dir A string indicating the directory to store cached files
34
- save_to_cache <- function(to_cache, kind, cache_dir) {
35
- if (is.null(cache_dir) || isFALSE(cache_dir)) { return() }
36
- dig <- to_cache$dig
37
- sig <- to_cache$sig
38
- data <- to_cache$data
39
- # Save an object to cache
40
- sig_file <- file.path(cache_dir, paste0(dig, ".", kind , ".signature.txt"))
41
- writeLines(c(as.character(Sys.time()), "", sig), sig_file)
42
- cached_file <- file.path(cache_dir, paste0(dig, ".", kind, ".RDS"))
43
- saveRDS(data, cached_file)
44
- }
biopipen/utils/gene.R DELETED
@@ -1,95 +0,0 @@
1
- suppressPackageStartupMessages({
2
- library(rlang)
3
- library(dplyr)
4
- library(mygene)
5
- })
6
-
7
-
8
- #@' Convert gene names between different formats
9
- #@'
10
- #@' @param genes A character/integer vector of gene names/ids
11
- #@' @param species A character vector of species names
12
- #@' @param infmt A character vector of input gene name formats
13
- #@' See the available scopes at
14
- #@' https://docs.mygene.info/en/latest/doc/data.html#available-fields
15
- #@' You can use ensg as a shortcut for ensembl.gene
16
- #@' @param outfmt A character vector of output gene name formats
17
- #@' @param dup How to deal with duplicate gene names found.
18
- #@' "first": keep the first one (default), sorted by score descendingly
19
- #@' "last": keep the last one, sorted by score descendingly
20
- #@' "all": keep all of them, each will be a separate row
21
- #@' "<X>": combine them into a single string, separated by X
22
- #@' @param notfound How to deal with gene names that are not found
23
- #@' "error": stop with an error message
24
- #@' "use-query": use the query gene name as the converted gene name
25
- #@' "skip": skip the gene names that are not found
26
- #@' "ignore": Same as "skip"
27
- #@' "na": use NA as the converted gene name (default)
28
- #@' @param suppress_messages Whether to suppress the warning messages
29
- #@' @return A tibble with the query gene names and the converted gene names
30
- #@' When a gene name is not found, the converted name will be NA
31
- #@' When duplicate gene names are found, the one with the highest score will be kept
32
- #@' @export
33
- gene_name_conversion <- function(
34
- genes,
35
- infmt,
36
- outfmt,
37
- dup = "first",
38
- species = "human",
39
- notfound = "na",
40
- suppress_messages = FALSE
41
- ) {
42
- notfound <- arg_match(notfound, c("error", "use-query", "skip", "ignore", "na"))
43
-
44
- if (infmt %in% c("ensg", "ensmusg")) { infmt = "ensembl.gene" }
45
- if (outfmt %in% c("ensg", "ensmusg")) { outfmt = "ensembl.gene" }
46
-
47
- orig_genes <- genes
48
- if (infmt == "ensembl.gene") {
49
- # Remove version numbers from ensembl gene ids
50
- genes <- gsub("\\..*", "", genes)
51
- }
52
- query_df <- tibble(query = genes, orig = orig_genes)
53
-
54
- if (suppress_messages) {
55
- capture.output(suppressWarnings(suppressMessages({
56
- out <- queryMany(genes, scopes=infmt, fields=outfmt, species=species) %>%
57
- as_tibble()
58
- })))
59
- } else {
60
- out <- queryMany(genes, scopes=infmt, fields=outfmt, species=species) %>%
61
- as_tibble()
62
- }
63
-
64
- if (nrow(out) == 0) {
65
- return(tibble(query = orig_genes, converted = NA_character_))
66
- }
67
-
68
- if (dup == "first") {
69
- out = out %>% group_by(query) %>% arrange(desc(X_score)) %>%
70
- slice_head(n=1) %>% ungroup() %>% dplyr::select(all_of(c("query", outfmt)))
71
- } else if (dup == "last") {
72
- out = out %>% group_by(query) %>% arrange(X_score) %>%
73
- slice_head(n=1) %>% ungroup() %>% dplyr::select(all_of(c("query", outfmt)))
74
- } else if (dup != "all") {
75
- out = out %>% group_by(query) %>% arrange(desc(X_score)) %>%
76
- summarise(!!sym(outfmt) := paste(unique(!!sym(outfmt)), collapse=dup))
77
- }
78
- out <- query_df %>%
79
- left_join(out, by="query") %>%
80
- dplyr::select(-"query") %>%
81
- dplyr::select(query = orig, everything())
82
-
83
- if (notfound == "error") {
84
- if (any(is.na(out[[outfmt]]))) {
85
- nagenes = out %>% filter(is.na(.[[outfmt]])) %>% pull("query")
86
- stop(paste("Query genes not found:", paste(nagenes, collapse=",")))
87
- }
88
- } else if (notfound == "use-query") {
89
- out = out %>% mutate(!!sym(outfmt) := coalesce(!!sym(outfmt), query))
90
- } else if (notfound == "skip" || notfound == "ignore") {
91
- out = out %>% filter(!is.na(!!sym(outfmt)))
92
- }
93
-
94
- return(out)
95
- }