biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +328 -292
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +481 -215
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +231 -76
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +6 -5
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/ScFGSEA.svelte +0 -16
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,8 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(Seurat)
4
- library(tibble)
5
- library(enrichR)
6
2
  library(rlang)
7
3
  library(dplyr)
8
- library(ggprism)
9
-
10
- setEnrichrSite("Enrichr")
4
+ library(tidyselect)
5
+ library(biopipen.utils)
11
6
 
12
7
  srtfile <- {{in.srtobj | r}}
13
8
  outdir <- {{out.outdir | r}}
@@ -16,238 +11,202 @@ mutaters <- {{ envs.mutaters | r }}
16
11
  ident <- {{ envs.ident | r }}
17
12
  group.by <- {{ envs["group-by"] | r }} # nolint
18
13
  each <- {{ envs.each | r }}
19
- prefix_each <- {{ envs.prefix_each | r }}
20
- section <- {{ envs.section | r }}
21
14
  dbs <- {{ envs.dbs | r }}
22
15
  n <- {{ envs.n | r }}
16
+ enrich_style <- {{ envs.enrich_style | r }}
23
17
  sset <- {{ envs.subset | r }}
18
+ enrich_plots_defaults <- {{ envs.enrich_plots_defaults | r }}
19
+ enrich_plots <- {{ envs.enrich_plots | r }}
24
20
  cases <- {{ envs.cases | r: todot = "-" }} # nolint
25
21
 
26
22
  set.seed(8525)
23
+ log <- get_logger()
24
+ reporter <- get_reporter()
27
25
 
28
- log_info("- Loading Seurat object ...")
29
- srtobj <- readRDS(srtfile)
26
+ log$info("Reading Seurat object ...")
27
+ srtobj <- read_obj(srtfile)
28
+ if (!"Identity" %in% colnames(srtobj@meta.data)) {
29
+ srtobj@meta.data$Identity <- Idents(srtobj)
30
+ }
30
31
  assay <- DefaultAssay(srtobj)
31
32
 
32
- log_info("- Mutate meta data if needed ...")
33
- if (!is.null(mutaters) && length(mutaters)) {
33
+ if (!is.null(mutaters) && length(mutaters) > 0) {
34
+ log$info("Mutating meta data ...")
34
35
  srtobj@meta.data <- srtobj@meta.data %>%
35
36
  mutate(!!!lapply(mutaters, parse_expr))
36
37
  }
37
38
 
39
+ enrich_plots <- lapply(enrich_plots, function(x) {
40
+ list_update(enrich_plots_defaults, x)
41
+ })
38
42
  defaults <- list(
39
43
  ident = ident,
40
44
  group.by = group.by,
41
45
  each = each,
42
- prefix_each = prefix_each,
43
- section = section,
44
46
  dbs = dbs,
45
47
  n = n,
48
+ enrich_style = enrich_style,
49
+ enrich_plots = enrich_plots,
50
+ enrich_plots_defaults = enrich_plots_defaults,
46
51
  subset = sset
47
52
  )
48
53
 
49
- expand_each <- function(name, case) {
54
+ cases <- expand_cases(cases, defaults, default_case = "Top Expressing Genes", post = function(name, case) {
50
55
  outcases <- list()
51
- no_each <- is.null(case$each) || nchar(case$each) == 0
52
- no_ident <- is.null(case$ident)
53
- has_section <- !is.null(case$section) && case$section != "DEFAULT"
54
- if (no_each && !no_ident) {
55
- # single cases
56
- if (is.null(case$section) || case$section == "DEFAULT") {
57
- outcases[[name]] <- case
58
- } else {
59
- outcases[[paste0(case$section, "::", name)]] <- case
60
- }
61
- } else if (no_each) { # no_ident
62
- # expanding idents
63
- if (has_section) {
64
- log_warn(" Ignoring `section` in case `{name}` when no `ident` is set.")
65
- case$section <- NULL
66
- }
67
- if (!is.null(case$subset)) {
68
- idents <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
69
- pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
70
- } else {
71
- idents <- srtobj@meta.data %>%
72
- pull(case$group.by) %>% unique() %>% na.omit() %>% as.vector()
73
- }
56
+ if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
57
+ case$enrich_plots <- lapply(
58
+ case$enrich_plots,
59
+ function(x) { list_update(case$enrich_plots_defaults, x) }
60
+ )
61
+ case$enrich_plots_defaults <- NULL
74
62
 
75
- for (ident in idents) {
76
- key <- paste0(name, "::", ident)
77
- outcases[[key]] <- case
78
- outcases[[key]]$ident <- ident
79
- outcases[[key]]$section <- name
80
- }
81
- } else { # has_each
82
- if (no_ident) {
83
- stop(" `ident` must be set when `each` is set for case `{name}`.")
84
- }
85
- # expanding eachs
86
- if (has_section) {
87
- log_warn(" Ignoring `section` in case `{name}` when `each` is set.")
88
- case$section <- NULL
63
+ outcases[[name]] <- case
64
+ } else {
65
+ eachs <- if (!is.null(case$subset)) {
66
+ srtobj@meta.data %>%
67
+ filter(!!parse_expr(case$subset)) %>%
68
+ pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
69
+ } else {
70
+ srtobj@meta.data %>%
71
+ pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
89
72
  }
90
73
 
91
- if (!is.null(case$subset)) {
92
- eachs <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
93
- pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
94
- } else {
95
- eachs <- srtobj@meta.data %>%
96
- pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
74
+ if (length(cases) == 0 && name == "Top Expressing Genes") {
75
+ name <- case$each
97
76
  }
98
77
 
99
78
  for (each in eachs) {
100
- by <- make.names(paste0(".", name, "_", case$each,"_", each))
101
- srtobj@meta.data <<- srtobj@meta.data %>% mutate(
102
- !!sym(by) := if_else(
103
- !!sym(case$each) == each,
104
- !!sym(case$group.by),
105
- NA
106
- )
107
- )
79
+ newname <- paste0(name, " - ", each)
80
+ newcase <- case
81
+ newcase$each_name <- case$each
82
+ newcase$each <- each
108
83
 
109
- if (isTRUE(case$prefix_each)) {
110
- key <- paste0(name, "::", case$each, " - ", each)
84
+ if (!is.null(case$subset)) {
85
+ newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
111
86
  } else {
112
- key <- paste0(name, "::", each)
87
+ newcase$subset <- paste0(bQuote(case$each), " == '", each, "'")
113
88
  }
114
- outcases[[key]] <- case
115
- outcases[[key]]$section <- name
116
- outcases[[key]]$group.by <- by
89
+
90
+ newcase$enrich_plots <- lapply(
91
+ case$enrich_plots,
92
+ function(x) { list_update(case$enrich_plots_defaults, x) }
93
+ )
94
+ newcase$enrich_plots_defaults <- NULL
95
+
96
+ outcases[[newname]] <- newcase
117
97
  }
118
98
  }
119
- outcases
120
- }
121
99
 
122
- log_info("- Expanding cases ...")
123
- cases <- expand_cases(cases, defaults, expand_each)
124
-
125
- do_enrich <- function(expr, odir) {
126
- log_debug(" Saving expressions ...")
127
- expr <- expr %>% as.data.frame()
128
- colnames(expr) <- c("Expression")
129
- expr <- expr %>% rownames_to_column("Gene") %>% select(Gene, Expression)
130
- write.table(
131
- expr,
132
- file.path(odir, "expr.txt"),
133
- sep = "\t",
134
- row.names = TRUE,
135
- col.names = TRUE,
136
- quote = FALSE
137
- )
138
- write.table(
139
- expr %>% head(n),
140
- file.path(odir, "exprn.txt"),
141
- sep = "\t",
142
- row.names = TRUE,
143
- col.names = TRUE,
144
- quote = FALSE
100
+ outcases
101
+ })
102
+
103
+ log$info("Running cases ...")
104
+
105
+ process_markers <- function(markers, info, case) {
106
+ # Save markers
107
+ write.table(markers, file.path(info$prefix, "top_genes.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
108
+ reporter$add2(
109
+ list(
110
+ name = "Table",
111
+ contents = list(
112
+ list(kind = "descr", content = "Showing top expressing genes ordered by their expression descendingly."),
113
+ list(kind = "table", src = file.path(info$prefix, "top_genes.tsv"), data = list(nrows = 100))
114
+ )
115
+ ),
116
+ hs = c(info$section, info$name),
117
+ hs2 = paste0("Top Genes"),
118
+ ui = "tabs"
145
119
  )
146
120
 
147
- log_debug(" Running enrichment ...")
148
- enriched <- enrichr(head(expr$Gene, n), dbs) # nolint
149
- for (db in dbs) {
150
- write.table(
151
- enriched[[db]],
152
- file.path(odir, paste0("Enrichr-", db, ".txt")),
153
- sep = "\t",
154
- row.names = FALSE,
155
- col.names = TRUE,
156
- quote = FALSE
157
- )
121
+ enrich <- RunEnrichment(
122
+ markers$gene,
123
+ dbs = case$dbs, style = case$enrich_style)
124
+
125
+ write.table(enrich, file.path(info$prefix, "enrich.tsv"), sep = "\t", quote = FALSE, row.names = FALSE)
126
+ reporter$add2(
127
+ list(
128
+ name = "Table",
129
+ contents = list(list(kind = "table", src = file.path(info$prefix, "enrich.tsv"), data = list(nrows = 100)))
130
+ ),
131
+ hs = c(info$section, info$name),
132
+ hs2 = "Enrichment Analysis",
133
+ ui = "tabs"
134
+ )
158
135
 
159
- if (nrow(enriched[[db]]) == 0) {
160
- log_warn(paste0(" No enriched terms for ", db))
161
- next
136
+ # Visualize enriched terms
137
+ if (length(case$enrich_plots) > 0) {
138
+ for (db in case$dbs) {
139
+ plots <- list()
140
+ for (plotname in names(case$enrich_plots)) {
141
+ plotargs <- case$enrich_plots[[plotname]]
142
+ plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
143
+
144
+ p <- do_call(VizEnrichment, plotargs)
145
+
146
+ outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
147
+ if (plotargs$plot_type == "bar") {
148
+ attr(p, "height") <- attr(p, "height") / 1.5
149
+ }
150
+ save_plot(p, outprefix, plotargs$devpars, formats = "png")
151
+ plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
152
+ }
153
+ reporter$add2(
154
+ list(name = db, contents = plots),
155
+ hs = c(info$section, info$name),
156
+ hs2 = "Enrichment Analysis",
157
+ ui = "tabs"
158
+ )
162
159
  }
163
-
164
- enrich_p <- plotEnrich(enriched[[db]], showTerms = 20, title = db) +
165
- theme_prism()
166
- enrich_plot <- file.path(odir, paste0("Enrichr-", db, ".png"))
167
- png(enrich_plot, res = 100, height = 1000, width = 1000)
168
- print(enrich_p)
169
- dev.off()
170
-
171
- enrich_plot_pdf <- file.path(odir, paste0("Enrichr-", db, ".pdf"))
172
- pdf(enrich_plot_pdf, height = 10, width = 10)
173
- print(enrich_p)
174
- dev.off()
175
160
  }
176
161
  }
177
162
 
178
- do_case <- function(casename) {
179
- log_info("- Running for case: {casename} ...")
180
- case <- cases[[casename]]
181
- info <- casename_info(casename, cases, outdir, create = TRUE)
182
163
 
183
- log_debug(" Calculating average expression ...")
164
+ run_case <- function(name) {
165
+ log$info("Case: {name} ...")
166
+ case <- cases[[name]]
167
+
168
+ log$info("- Subsetting cells and calculating average expression ...")
184
169
  if (!is.null(case$subset)) {
185
- tryCatch({
186
- sobj <- subset(srtobj, !!parse_expr(case$subset))
187
- }, error = function(e) {
188
- log_warn(" No cells found for the subset, skipping ...")
189
- })
170
+ subobj <- filter(srtobj, !!parse_expr(case$subset))
190
171
  } else {
191
- sobj <- srtobj
172
+ subobj <- srtobj
173
+ }
174
+ case$group.by <- case$group.by %||% "Identity"
175
+ if (is.null(case$ident)) {
176
+ case$ident <- as.character(unique(subobj@meta.data[[case$group.by]]))
192
177
  }
193
178
  avgexpr <- AverageExpression(
194
- sobj,
179
+ subobj,
195
180
  group.by = case$group.by,
196
181
  assays = assay
197
182
  )[[assay]]
198
183
  # https://github.com/satijalab/seurat/issues/7893
199
- colnames(avgexpr) <- as.character(unique(sobj@meta.data[[case$group.by]]))
184
+ colnames(avgexpr) <- as.character(unique(subobj@meta.data[[case$group.by]]))
200
185
  avgexpr <- avgexpr[, case$ident, drop = FALSE]
201
- avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
202
186
 
203
- do_enrich(avgexpr, info$casedir)
187
+ for (idt in case$ident) {
188
+ log$info("- Processing {idt} ...")
189
+ info <- case_info(paste0(name, "::", idt), outdir, create = TRUE)
190
+ expr <- avgexpr[, idt, drop = FALSE]
191
+ expr <- expr[order(expr, decreasing = TRUE), , drop = FALSE]
192
+ expr <- expr[1:min(case$n, nrow(expr)), , drop = FALSE]
193
+ expr <- as.data.frame(expr)
194
+ expr$gene <- rownames(expr)
195
+ colnames(expr) <- c("avg_expr", "gene")
196
+ expr <- expr[, c("gene", "avg_expr"), drop = FALSE]
197
+
198
+ log$info(" Performing enrichment analysis ...")
199
+ process_markers(expr, info, case = list(
200
+ ident = idt,
201
+ dbs = case$dbs,
202
+ enrich_style = case$enrich_style,
203
+ enrich_plots = case$enrich_plots
204
+ ))
205
+ }
204
206
 
205
- add_case_report(info)
207
+ invisible()
206
208
  }
207
209
 
208
- add_case_report <- function(info) {
209
- log_debug(" Adding case report ...")
210
- h1 = info$h1
211
- h2 = info$h2
212
-
213
- if (!is.null(info$error)) {
214
- add_report(
215
- list(
216
- kind = "descr",
217
- content = paste0("Top ", n, " expressing genes")
218
- ),
219
- list(kind = "error", content = info$error),
220
- h1 = h1,
221
- h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
222
- h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
223
- )
224
- } else {
225
- add_report(
226
- list(
227
- kind = "descr",
228
- content = paste0("Top ", n, " expressing genes")
229
- ),
230
- list(
231
- kind = "table",
232
- src = file.path(info$casedir, "exprn.txt")
233
- ),
234
- h1 = h1,
235
- h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
236
- h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
237
- )
238
-
239
- add_report(
240
- list(
241
- kind = "descr",
242
- content = paste0("Enrichment analysis for the top ", n, " expressing genes")
243
- ),
244
- list(kind = "enrichr", dir = info$casedir),
245
- h1 = h1,
246
- h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
247
- h3 = ifelse(h2 == "#", "#", "Enrichment Analysis")
248
- )
249
- }
250
- }
210
+ sapply(names(cases), run_case)
251
211
 
252
- sapply(sort(names(cases)), do_case)
253
- save_report(joboutdir)
212
+ reporter$save(joboutdir)
@@ -12,8 +12,9 @@ parser.add_argument(
12
12
  parser.add_argument(
13
13
  "-c",
14
14
  "--over_clustering",
15
- default="seurat_clusters",
16
- help="Over clustering. Ignored if the column does not exist.",
15
+ required=False,
16
+ default=None,
17
+ help="Over clustering. Error if the column does not exist.",
17
18
  )
18
19
 
19
20
 
@@ -25,8 +26,9 @@ if __name__ == "__main__":
25
26
  adata = sc.read_h5ad(args.input)
26
27
  over_clustering = args.over_clustering
27
28
  if over_clustering and over_clustering not in adata.obs.columns:
28
- print("WARNING: Over clustering column not found. Ignoring over clustering.")
29
- over_clustering = None
29
+ raise ValueError(
30
+ f"Over clustering column '{over_clustering}' not found in AnnData object."
31
+ )
30
32
 
31
33
  annotated = celltypist.annotate(
32
34
  adata,
@@ -0,0 +1,81 @@
1
+ """Convert Seurat objects to AnnData format back and forth.
2
+
3
+ Need R and R packages Seurat, SeuratDisk and biopipen.utils.R installed.
4
+ """
5
+
6
+
7
+ def convert_seurat_to_anndata(
8
+ input_file,
9
+ output_file,
10
+ assay=None,
11
+ subset=None,
12
+ rscript="Rscript",
13
+ ):
14
+ """Convert Seurat object to AnnData format.
15
+
16
+ Args:
17
+ input_file (str): Path to the input Seurat RDS or qs/qs2 file.
18
+ output_file (str): Path to the output AnnData H5AD file.
19
+ assay (str): Name of the assay to use in the Seurat object.
20
+ subset (str): An R expression to subset the Seurat object to convert.
21
+ rscript (RScript): R script executor.
22
+ """
23
+ from biopipen.utils.misc import run_command
24
+
25
+ script = f"""
26
+ library(biopipen.utils)
27
+
28
+ assay <- {repr(assay) if assay else 'NULL'}
29
+ subset <- {repr(subset) if subset else 'NULL'}
30
+
31
+ ConvertSeuratToAnnData(
32
+ "{input_file}", "{output_file}", assay = assay, subset = subset
33
+ )
34
+ """
35
+
36
+ # Save the script to a temporary file
37
+ from tempfile import NamedTemporaryFile
38
+ with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
39
+ temp_script.write(script.encode('utf-8'))
40
+ temp_script_path = temp_script.name
41
+
42
+ # Run the R script using the provided Rscript command
43
+ cmd = [rscript, temp_script_path]
44
+ run_command(cmd, fg=True)
45
+
46
+
47
+ def convert_anndata_to_seurat(
48
+ input_file,
49
+ output_file,
50
+ assay=None,
51
+ rscript="Rscript",
52
+ ):
53
+ """Convert AnnData object to Seurat format.
54
+
55
+ Args:
56
+ input_file (str): Path to the input AnnData H5AD file.
57
+ output_file (str): Path to the output Seurat RDS or qs/qs2 file.
58
+ assay (str): Name of the assay to use in the Seurat object.
59
+ rscript (RScript): R script executor.
60
+ """
61
+ from biopipen.utils.misc import run_command
62
+
63
+ script = f"""
64
+ library(biopipen.utils)
65
+
66
+ assay <- {repr(assay) if assay else 'NULL'}
67
+
68
+ ConvertAnnDataToSeurat(
69
+ "{input_file}", "{output_file}", assay = assay
70
+ )
71
+ """
72
+
73
+ # Save the script to a temporary file
74
+ from tempfile import NamedTemporaryFile
75
+ with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
76
+ temp_script.write(script.encode('utf-8'))
77
+ temp_script_path = temp_script.name
78
+
79
+ # Run the R script using the provided Rscript command
80
+ cmd = [rscript, temp_script_path]
81
+ run_command(cmd, fg=True)