biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,8 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(Seurat)
4
- library(tibble)
5
- library(enrichR)
6
2
  library(rlang)
7
3
  library(dplyr)
8
- library(ggprism)
9
-
10
- setEnrichrSite("Enrichr")
4
+ library(tidyselect)
5
+ library(biopipen.utils)
11
6
 
12
7
  srtfile <- {{in.srtobj | r}}
13
8
  outdir <- {{out.outdir | r}}
@@ -16,238 +11,200 @@ mutaters <- {{ envs.mutaters | r }}
16
11
  ident <- {{ envs.ident | r }}
17
12
  group.by <- {{ envs["group-by"] | r }} # nolint
18
13
  each <- {{ envs.each | r }}
19
- prefix_each <- {{ envs.prefix_each | r }}
20
- section <- {{ envs.section | r }}
21
14
  dbs <- {{ envs.dbs | r }}
22
15
  n <- {{ envs.n | r }}
16
+ enrich_style <- {{ envs.enrich_style | r }}
23
17
  sset <- {{ envs.subset | r }}
18
+ enrich_plots_defaults <- {{ envs.enrich_plots_defaults | r }}
19
+ enrich_plots <- {{ envs.enrich_plots | r }}
24
20
  cases <- {{ envs.cases | r: todot = "-" }} # nolint
25
21
 
26
22
  set.seed(8525)
23
+ log <- get_logger()
24
+ reporter <- get_reporter()
27
25
 
28
- log_info("- Loading Seurat object ...")
29
- srtobj <- readRDS(srtfile)
26
+ log$info("Reading Seurat object ...")
27
+ srtobj <- read_obj(srtfile)
28
+ if (!"Identity" %in% colnames(srtobj@meta.data)) {
29
+ srtobj@meta.data$Identity <- Idents(srtobj)
30
+ }
30
31
  assay <- DefaultAssay(srtobj)
31
32
 
32
- log_info("- Mutate meta data if needed ...")
33
- if (!is.null(mutaters) && length(mutaters)) {
33
+ if (!is.null(mutaters) && length(mutaters) > 0) {
34
+ log$info("Mutating meta data ...")
34
35
  srtobj@meta.data <- srtobj@meta.data %>%
35
36
  mutate(!!!lapply(mutaters, parse_expr))
36
37
  }
37
38
 
39
+ enrich_plots <- lapply(enrich_plots, function(x) {
40
+ list_update(enrich_plots_defaults, x)
41
+ })
38
42
  defaults <- list(
39
43
  ident = ident,
40
44
  group.by = group.by,
41
45
  each = each,
42
- prefix_each = prefix_each,
43
- section = section,
44
46
  dbs = dbs,
45
47
  n = n,
48
+ enrich_style = enrich_style,
49
+ enrich_plots = enrich_plots,
50
+ enrich_plots_defaults = enrich_plots_defaults,
46
51
  subset = sset
47
52
  )
48
53
 
49
- expand_each <- function(name, case) {
54
+ cases <- expand_cases(cases, defaults, default_case = "Top Expressing Genes", post = function(name, case) {
50
55
  outcases <- list()
51
- no_each <- is.null(case$each) || nchar(case$each) == 0
52
- no_ident <- is.null(case$ident)
53
- has_section <- !is.null(case$section) && case$section != "DEFAULT"
54
- if (no_each && !no_ident) {
55
- # single cases
56
- if (is.null(case$section) || case$section == "DEFAULT") {
57
- outcases[[name]] <- case
58
- } else {
59
- outcases[[paste0(case$section, "::", name)]] <- case
60
- }
61
- } else if (no_each) { # no_ident
62
- # expanding idents
63
- if (has_section) {
64
- log_warn(" Ignoring `section` in case `{name}` when no `ident` is set.")
65
- case$section <- NULL
66
- }
67
- if (!is.null(case$subset)) {
68
- idents <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
69
- pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
70
- } else {
71
- idents <- srtobj@meta.data %>%
72
- pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
73
- }
56
+ if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
57
+ case$enrich_plots <- lapply(
58
+ case$enrich_plots,
59
+ function(x) { list_update(case$enrich_plots_defaults, x) }
60
+ )
61
+ case$enrich_plots_defaults <- NULL
74
62
 
75
- for (ident in idents) {
76
- key <- paste0(name, "::", ident)
77
- outcases[[key]] <- case
78
- outcases[[key]]$ident <- ident
79
- outcases[[key]]$section <- name
80
- }
81
- } else { # has_each
82
- if (no_ident) {
83
- stop(" `ident` must be set when `each` is set for case `{name}`.")
84
- }
85
- # expanding eachs
86
- if (has_section) {
87
- log_warn(" Ignoring `section` in case `{name}` when `each` is set.")
88
- case$section <- NULL
63
+ outcases[[name]] <- case
64
+ } else {
65
+ eachs <- if (!is.null(case$subset)) {
66
+ srtobj@meta.data %>%
67
+ filter(!!parse_expr(case$subset)) %>%
68
+ pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
69
+ } else {
70
+ srtobj@meta.data %>%
71
+ pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
89
72
  }
90
73
 
91
- if (!is.null(case$subset)) {
92
- eachs <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
93
- pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
94
- } else {
95
- eachs <- srtobj@meta.data %>%
96
- pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
74
+ if (length(cases) == 0 && name == "Top Expressing Genes") {
75
+ name <- case$each
97
76
  }
98
77
 
99
78
  for (each in eachs) {
100
- by <- make.names(paste0(".", name, "_", case$each,"_", each))
101
- srtobj@meta.data <<- srtobj@meta.data %>% mutate(
102
- !!sym(by) := if_else(
103
- !!sym(case$each) == each,
104
- !!sym(case$group.by),
105
- NA
106
- )
107
- )
79
+ newname <- paste0(name, " - ", each)
80
+ newcase <- case
81
+ newcase$each_name <- case$each
82
+ newcase$each <- each
108
83
 
109
- if (isTRUE(case$prefix_each)) {
110
- key <- paste0(name, "::", case$each, " - ", each)
84
+ if (!is.null(case$subset)) {
85
+ newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
111
86
  } else {
112
- key <- paste0(name, "::", each)
87
+ newcase$subset <- paste0(bQuote(case$each), " == '", each, "'")
113
88
  }
114
- outcases[[key]] <- case
115
- outcases[[key]]$section <- name
116
- outcases[[key]]$group.by <- by
89
+
90
+ newcase$enrich_plots <- lapply(
91
+ case$enrich_plots,
92
+ function(x) { list_update(case$enrich_plots_defaults, x) }
93
+ )
94
+ newcase$enrich_plots_defaults <- NULL
95
+
96
+ outcases[[newname]] <- newcase
117
97
  }
118
98
  }
119
- outcases
120
- }
121
99
 
122
- log_info("- Expanding cases ...")
123
- cases <- expand_cases(cases, defaults, expand_each)
124
-
125
- do_enrich <- function(expr, odir) {
126
- log_debug(" Saving expressions ...")
127
- expr <- expr %>% as.data.frame()
128
- colnames(expr) <- c("Expression")
129
- expr <- expr %>% rownames_to_column("Gene") %>% select(Gene, Expression)
130
- write.table(
131
- expr,
132
- file.path(odir, "expr.txt"),
133
- sep = "\t",
134
- row.names = TRUE,
135
- col.names = TRUE,
136
- quote = FALSE
100
+ outcases
101
+ })
102
+
103
+ log$info("Running cases ...")
104
+
105
+ process_markers <- function(markers, info, case) {
106
+ # Save markers
107
+ write.table(markers, file.path(info$prefix, "top_genes.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
108
+ reporter$add2(
109
+ list(
110
+ name = "Table",
111
+ contents = list(
112
+ list(kind = "descr", content = "Showing top expressing genes ordered by their expression descendingly."),
113
+ list(kind = "table", src = file.path(info$prefix, "top_genes.tsv"), data = list(nrows = 100))
114
+ )
115
+ ),
116
+ hs = c(info$section, info$name),
117
+ hs2 = paste0("Top Genes"),
118
+ ui = "tabs"
137
119
  )
138
- write.table(
139
- expr %>% head(n),
140
- file.path(odir, "exprn.txt"),
141
- sep = "\t",
142
- row.names = TRUE,
143
- col.names = TRUE,
144
- quote = FALSE
120
+
121
+ enrich <- RunEnrichment(
122
+ markers$gene,
123
+ dbs = case$dbs, style = case$enrich_style)
124
+
125
+ write.table(enrich, file.path(info$prefix, "enrich.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
126
+ reporter$add2(
127
+ list(
128
+ name = "Table",
129
+ contents = list(list(kind = "table", src = file.path(info$prefix, "enrich.tsv"), data = list(nrows = 100)))
130
+ ),
131
+ hs = c(info$section, info$name),
132
+ hs2 = "Enrichment Analysis",
133
+ ui = "tabs"
145
134
  )
146
135
 
147
- log_debug(" Running enrichment ...")
148
- enriched <- enrichr(head(expr$Gene, n), dbs) # nolint
149
- for (db in dbs) {
150
- write.table(
151
- enriched[[db]],
152
- file.path(odir, paste0("Enrichr-", db, ".txt")),
153
- sep = "\t",
154
- row.names = FALSE,
155
- col.names = TRUE,
156
- quote = FALSE
157
- )
136
+ # Visualize enriched terms
137
+ if (length(case$enrich_plots) > 0) {
138
+ for (db in case$dbs) {
139
+ plots <- list()
140
+ for (plotname in names(case$enrich_plots)) {
141
+ plotargs <- case$enrich_plots[[plotname]]
142
+ plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
158
143
 
159
- if (nrow(enriched[[db]]) == 0) {
160
- log_warn(paste0(" No enriched terms for ", db))
161
- next
162
- }
144
+ p <- do_call(VizEnrichment, plotargs)
163
145
 
164
- enrich_p <- plotEnrich(enriched[[db]], showTerms = 20, title = db) +
165
- theme_prism()
166
- enrich_plot <- file.path(odir, paste0("Enrichr-", db, ".png"))
167
- png(enrich_plot, res = 100, height = 1000, width = 1000)
168
- print(enrich_p)
169
- dev.off()
170
-
171
- enrich_plot_pdf <- file.path(odir, paste0("Enrichr-", db, ".pdf"))
172
- pdf(enrich_plot_pdf, height = 10, width = 10)
173
- print(enrich_p)
174
- dev.off()
146
+ outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
147
+ attr(p, "height") <- attr(p, "height") / 1.5
148
+ save_plot(p, outprefix, plotargs$devpars, formats = "png")
149
+ plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
150
+ }
151
+ reporter$add2(
152
+ list(name = db, contents = plots),
153
+ hs = c(info$section, info$name),
154
+ hs2 = "Enrichment Analysis",
155
+ ui = "tabs"
156
+ )
157
+ }
175
158
  }
176
159
  }
177
160
 
178
- do_case <- function(casename) {
179
- log_info("- Running for case: {casename} ...")
180
- case <- cases[[casename]]
181
- info <- casename_info(casename, cases, outdir, create = TRUE)
182
161
 
183
- log_debug(" Calculating average expression ...")
162
+ run_case <- function(name) {
163
+ log$info("Case: {name} ...")
164
+ case <- cases[[name]]
165
+
166
+ log$info("- Subsetting cells and calculating average expression ...")
184
167
  if (!is.null(case$subset)) {
185
- tryCatch({
186
- sobj <- subset(srtobj, !!parse_expr(case$subset))
187
- }, error = function(e) {
188
- log_warn(" No cells found for the subset, skipping ...")
189
- })
168
+ subobj <- filter(srtobj, !!parse_expr(case$subset))
190
169
  } else {
191
- sobj <- srtobj
170
+ subobj <- srtobj
171
+ }
172
+ case$group.by <- case$group.by %||% "Identity"
173
+ if (is.null(case$ident)) {
174
+ case$ident <- as.character(unique(subobj@meta.data[[case$group.by]]))
192
175
  }
193
176
  avgexpr <- AverageExpression(
194
- sobj,
177
+ subobj,
195
178
  group.by = case$group.by,
196
179
  assays = assay
197
180
  )[[assay]]
198
181
  # https://github.com/satijalab/seurat/issues/7893
199
- colnames(avgexpr) <- as.character(unique(sobj@meta.data[[case$group.by]]))
182
+ colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group.by]]))
200
183
  avgexpr <- avgexpr[, case$ident, drop = FALSE]
201
- avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
202
184
 
203
- do_enrich(avgexpr, info$casedir)
185
+ for (idt in case$ident) {
186
+ log$info("- Processing {idt} ...")
187
+ info <- case_info(paste0(name, "::", idt), outdir, create = TRUE)
188
+ expr <- avgexpr[, idt, drop = FALSE]
189
+ expr <- expr[order(expr, decreasing = TRUE), , drop = FALSE]
190
+ expr <- expr[1:min(case$n, nrow(expr)), , drop = FALSE]
191
+ expr <- as.data.frame(expr)
192
+ expr$gene <- rownames(expr)
193
+ colnames(expr) <- c("avg_expr", "gene")
194
+ expr <- expr[, c("gene", "avg_expr"), drop = FALSE]
195
+
196
+ log$info(" Performing enrichment analysis ...")
197
+ process_markers(expr, info, case = list(
198
+ ident = idt,
199
+ dbs = case$dbs,
200
+ enrich_style = case$enrich_style,
201
+ enrich_plots = case$enrich_plots
202
+ ))
203
+ }
204
204
 
205
- add_case_report(info)
205
+ invisible()
206
206
  }
207
207
 
208
- add_case_report <- function(info) {
209
- log_debug(" Adding case report ...")
210
- h1 = info$h1
211
- h2 = info$h2
212
-
213
- if (!is.null(info$error)) {
214
- add_report(
215
- list(
216
- kind = "descr",
217
- content = paste0("Top ", n, " expressing genes")
218
- ),
219
- list(kind = "error", content = info$error),
220
- h1 = h1,
221
- h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
222
- h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
223
- )
224
- } else {
225
- add_report(
226
- list(
227
- kind = "descr",
228
- content = paste0("Top ", n, " expressing genes")
229
- ),
230
- list(
231
- kind = "table",
232
- src = file.path(info$casedir, "exprn.txt")
233
- ),
234
- h1 = h1,
235
- h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
236
- h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
237
- )
238
-
239
- add_report(
240
- list(
241
- kind = "descr",
242
- content = paste0("Enrichment analysis for the top ", n, " expressing genes")
243
- ),
244
- list(kind = "enrichr", dir = info$casedir),
245
- h1 = h1,
246
- h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
247
- h3 = ifelse(h2 == "#", "#", "Enrichment Analysis")
248
- )
249
- }
250
- }
208
+ sapply(names(cases), run_case)
251
209
 
252
- sapply(sort(names(cases)), do_case)
253
- save_report(joboutdir)
210
+ reporter$save(joboutdir)
@@ -12,8 +12,9 @@ parser.add_argument(
12
12
  parser.add_argument(
13
13
  "-c",
14
14
  "--over_clustering",
15
- default="seurat_clusters",
16
- help="Over clustering. Ignored if the column does not exist.",
15
+ required=False,
16
+ default=None,
17
+ help="Over clustering. Error if the column does not exist.",
17
18
  )
18
19
 
19
20
 
@@ -25,8 +26,9 @@ if __name__ == "__main__":
25
26
  adata = sc.read_h5ad(args.input)
26
27
  over_clustering = args.over_clustering
27
28
  if over_clustering and over_clustering not in adata.obs.columns:
28
- print("WARNING: Over clustering column not found. Ignoring over clustering.")
29
- over_clustering = None
29
+ raise ValueError(
30
+ f"Over clustering column '{over_clustering}' not found in AnnData object."
31
+ )
30
32
 
31
33
  annotated = celltypist.annotate(
32
34
  adata,
@@ -0,0 +1,81 @@
1
+ """Convert Seurat objects to AnnData format back and forth.
2
+
3
+ Need R and R packages Seurat, SeuratDisk and biopipen.utils.R installed.
4
+ """
5
+
6
+
7
+ def convert_seurat_to_anndata(
8
+ input_file,
9
+ output_file,
10
+ assay=None,
11
+ subset=None,
12
+ rscript="Rscript",
13
+ ):
14
+ """Convert Seurat object to AnnData format.
15
+
16
+ Args:
17
+ input_file (str): Path to the input Seurat RDS or qs/qs2 file.
18
+ output_file (str): Path to the output AnnData H5AD file.
19
+ assay (str): Name of the assay to use in the Seurat object.
20
+ subset (str): An R expression to subset the Seurat object to convert.
21
+ rscript (RScript): R script executor.
22
+ """
23
+ from biopipen.utils.misc import run_command
24
+
25
+ script = f"""
26
+ library(biopipen.utils)
27
+
28
+ assay <- {repr(assay) if assay else 'NULL'}
29
+ subset <- {repr(subset) if subset else 'NULL'}
30
+
31
+ ConvertSeuratToAnnData(
32
+ "{input_file}", "{output_file}", assay = assay, subset = subset
33
+ )
34
+ """
35
+
36
+ # Save the script to a temporary file
37
+ from tempfile import NamedTemporaryFile
38
+ with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
39
+ temp_script.write(script.encode('utf-8'))
40
+ temp_script_path = temp_script.name
41
+
42
+ # Run the R script using the provided Rscript command
43
+ cmd = [rscript, temp_script_path]
44
+ run_command(cmd, fg=True)
45
+
46
+
47
+ def convert_anndata_to_seurat(
48
+ input_file,
49
+ output_file,
50
+ assay=None,
51
+ rscript="Rscript",
52
+ ):
53
+ """Convert AnnData object to Seurat format.
54
+
55
+ Args:
56
+ input_file (str): Path to the input AnnData H5AD file.
57
+ output_file (str): Path to the output Seurat RDS or qs/qs2 file.
58
+ assay (str): Name of the assay to use in the Seurat object.
59
+ rscript (RScript): R script executor.
60
+ """
61
+ from biopipen.utils.misc import run_command
62
+
63
+ script = f"""
64
+ library(biopipen.utils)
65
+
66
+ assay <- {repr(assay) if assay else 'NULL'}
67
+
68
+ ConvertAnnDataToSeurat(
69
+ "{input_file}", "{output_file}", assay = assay
70
+ )
71
+ """
72
+
73
+ # Save the script to a temporary file
74
+ from tempfile import NamedTemporaryFile
75
+ with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
76
+ temp_script.write(script.encode('utf-8'))
77
+ temp_script_path = temp_script.name
78
+
79
+ # Run the R script using the provided Rscript command
80
+ cmd = [rscript, temp_script_path]
81
+ run_command(cmd, fg=True)