biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +142 -0
  3. biopipen/ns/scrna.py +19 -1
  4. biopipen/ns/tcr.py +30 -10
  5. biopipen/reports/delim/SampleInfo.svelte +2 -22
  6. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  7. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  8. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  9. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  10. biopipen/reports/scrna/ScFGSEA.svelte +4 -23
  11. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
  12. biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
  13. biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
  14. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
  15. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
  16. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
  17. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
  18. biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
  19. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  20. biopipen/reports/tcr/Immunarch.svelte +4 -168
  21. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  22. biopipen/reports/tcr/TESSA.svelte +11 -28
  23. biopipen/scripts/delim/SampleInfo.R +41 -7
  24. biopipen/scripts/scrna/CellsDistribution.R +127 -16
  25. biopipen/scripts/scrna/MarkersFinder.R +245 -100
  26. biopipen/scripts/scrna/MetaMarkers.R +163 -82
  27. biopipen/scripts/scrna/RadarPlots.R +163 -110
  28. biopipen/scripts/scrna/ScFGSEA.R +51 -11
  29. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
  30. biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
  31. biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
  32. biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
  33. biopipen/scripts/scrna/SeuratClustering.R +73 -26
  34. biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
  35. biopipen/scripts/scrna/SeuratPreparing.R +93 -19
  36. biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
  37. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
  38. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
  39. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
  40. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
  41. biopipen/scripts/tcr/Attach2Seurat.R +2 -1
  42. biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
  43. biopipen/scripts/tcr/CloneResidency.R +114 -34
  44. biopipen/scripts/tcr/Immunarch-basic.R +18 -4
  45. biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
  46. biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
  47. biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
  48. biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
  49. biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
  50. biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
  51. biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
  52. biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
  53. biopipen/scripts/tcr/Immunarch.R +7 -0
  54. biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
  55. biopipen/scripts/tcr/TCRClusterStats.R +124 -11
  56. biopipen/scripts/tcr/TCRClustering.R +8 -9
  57. biopipen/scripts/tcr/TESSA.R +66 -41
  58. biopipen/utils/misc.R +96 -1
  59. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
  60. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
  61. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
  62. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
@@ -6,13 +6,16 @@ library(dplyr)
6
6
  library(rlang)
7
7
  library(immunarch)
8
8
  library(ggprism)
9
+ library(slugify)
9
10
 
10
11
  immfile = {{in.immfile | quote}}
11
12
  outdir = {{out.outdir | quote}}
12
13
  cluster_size_envs = {{envs.cluster_size | r}}
13
14
  shared_clusters_envs = {{envs.shared_clusters | r}}
14
15
  sample_diversity_envs = {{envs.sample_diversity | r}}
16
+ joboutdir = {{job.outdir | r}}
15
17
 
18
+ log_info("Expanding analysis cases ...")
16
19
  expand_cases = function(envs) {
17
20
  cases = envs$cases
18
21
  envs$cases = NULL
@@ -51,8 +54,9 @@ shared_clusters_cases = expand_cases(shared_clusters_envs)
51
54
  sample_diversity_cases = expand_cases(sample_diversity_envs)
52
55
 
53
56
  cluster_size_distribution = function(name) {
54
- print(paste0("- Working on cluster size distribution: ", name))
55
- odir = file.path(outdir, "ClusterSizeDistribution", name)
57
+ log_info("- Working on cluster size distribution: {name}")
58
+
59
+ odir = file.path(outdir, "ClusterSizeDistribution", slugify(name, tolower = FALSE))
56
60
  dir.create(odir, showWarnings = FALSE, recursive = TRUE)
57
61
  case = cluster_size_cases[[name]]
58
62
 
@@ -75,16 +79,28 @@ cluster_size_distribution = function(name) {
75
79
  ggs = c(
76
80
  "theme_prism()",
77
81
  "scale_y_continuous(trans='log10')",
78
- "labs(x='TCR cluster size', y='Count')"
82
+ "labs(x='TCR cluster size', y='Count')",
83
+ "scale_fill_biopipen()"
79
84
  ),
80
85
  devpars = case$devpars,
81
86
  outfile = outplot
82
87
  )
88
+
89
+ add_report(
90
+ list(
91
+ src = outplot,
92
+ name = ifelse(name == "DEFAULT", FALSE, name),
93
+ descr = paste0("Cluster size distribution for each ", case$by)
94
+ ),
95
+ ui = "table_of_images",
96
+ h1 = "Cluster Size Distribution"
97
+ )
83
98
  }
84
99
 
85
100
  shared_clusters = function(name) {
86
- print(paste0("- Working on shared clusters: ", name))
87
- odir = file.path(outdir, "SharedClusters", name)
101
+ log_info("- Working on shared clusters: {name}")
102
+
103
+ odir = file.path(outdir, "SharedClusters", slugify(name, tolower = FALSE))
88
104
  dir.create(odir, showWarnings = FALSE, recursive = TRUE)
89
105
  case = shared_clusters_cases[[name]]
90
106
  if (!is.null(case$grouping)) {
@@ -139,12 +155,21 @@ shared_clusters = function(name) {
139
155
  ),
140
156
  devpars = case$devpars,
141
157
  outfile = file.path(odir, "shared_clusters.png")
158
+ )
142
159
 
160
+ add_report(
161
+ list(
162
+ src = file.path(odir, "shared_clusters.png"),
163
+ name = ifelse(name == "DEFAULT", FALSE, name),
164
+ descr = paste0("Shared TCR clusters across samples")
165
+ ),
166
+ ui = "table_of_images",
167
+ h1 = "Shared TCR Clusters"
143
168
  )
144
169
  }
145
170
 
146
171
  shared_clusters_by_grouping = function(name) {
147
- odir = file.path(outdir, "SharedClusters", name)
172
+ odir = file.path(outdir, "SharedClusters", slugify(name, tolower = FALSE))
148
173
  case = shared_clusters_cases[[name]]
149
174
 
150
175
  data = list()
@@ -176,12 +201,47 @@ shared_clusters_by_grouping = function(name) {
176
201
  devpars = case$devpars,
177
202
  outfile = outfile
178
203
  )
204
+
205
+ add_report(
206
+ list(
207
+ src = outfile,
208
+ name = ifelse(name == "DEFAULT", FALSE, name),
209
+ descr = paste0("Shared TCR clusters across ", grouping)
210
+ ),
211
+ ui = "table_of_images",
212
+ h1 = "Shared TCR Clusters"
213
+ )
179
214
  }
180
215
 
181
216
 
217
+ div_methods = list(
218
+ gini = list(
219
+ name = "The Gini coefficient",
220
+ descr = "The Gini coefficient is a measure of statistical dispersion intended to represent the income or wealth distribution of a nation's residents, and is the most commonly used measurement of inequality."
221
+ ),
222
+ gini.simp = list(
223
+ name = "The Gini-Simpson index",
224
+ descr = "The Gini-Simpson index is a measure of diversity. It is one of the most commonly used in ecology. It is also known as the Simpson index, the Simpson concentration index, the Simpson dominance index, or the Simpson diversity index."
225
+ ),
226
+ inv.simp = list(
227
+ name = "The inverse Simpson index",
228
+ descr = "It is the effective number of types that is obtained when
229
+ the weighted arithmetic mean is used to quantify average
230
+ proportional abundance of types in the dataset of interest."
231
+ ),
232
+ div = list(
233
+ name = "The true diversity",
234
+ descr = "It refers to the number of equally abundant types needed
235
+ for the average proportional abundance of the types to
236
+ equal that observed in the dataset of interest where all
237
+ types may not be equally abundant."
238
+ )
239
+ )
240
+
182
241
  sample_diversity = function(name) {
183
- print(paste0("- Working on sample diversity: ", name))
184
- odir = file.path(outdir, "SampleDiversity", name)
242
+ log_info("- Working on sample diversity: {name}")
243
+
244
+ odir = file.path(outdir, "SampleDiversity", slugify(name, tolower = FALSE))
185
245
  dir.create(odir, showWarnings = FALSE, recursive = TRUE)
186
246
  case = sample_diversity_cases[[name]]
187
247
 
@@ -192,7 +252,19 @@ sample_diversity = function(name) {
192
252
  outfile = file.path(odir, "diversity.txt")
193
253
  outplot = file.path(odir, "diversity.png")
194
254
  div = repDiversity(data, .method = case$method)
195
- write.table(div, outfile, row.names=TRUE, col.names=TRUE, quote=FALSE, sep="\t")
255
+ write.table(
256
+ if (ncol(div) == 1) {
257
+ as.data.frame(div) %>% rownames_to_column("Sample")
258
+ } else {
259
+ div
260
+ },
261
+ outfile,
262
+ row.names=TRUE,
263
+ col.names=TRUE,
264
+ quote=FALSE,
265
+ sep="\t"
266
+ )
267
+
196
268
  if (case$method == "gini") {
197
269
  div = as.data.frame(div) %>% rownames_to_column("Sample")
198
270
  colnames(div)[2] = "gini"
@@ -201,7 +273,8 @@ sample_diversity = function(name) {
201
273
  mapping = aes(x = Sample, y = gini, fill = Sample)
202
274
  ggs = c(
203
275
  "theme_prism(axis_text_angle = 90)",
204
- "labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')"
276
+ "labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')",
277
+ "scale_fill_biopipen()"
205
278
  )
206
279
  if (is.null(case$by) || length(case$by) == 0) {
207
280
 
@@ -225,7 +298,6 @@ sample_diversity = function(name) {
225
298
  devpars = case$devpars,
226
299
  outfile = outplot
227
300
  )
228
-
229
301
  } else {
230
302
  if (is.null(case$by) || length(case$by) == 0) {
231
303
  p = vis(div)
@@ -243,6 +315,41 @@ sample_diversity = function(name) {
243
315
  print(p)
244
316
  dev.off()
245
317
  }
318
+
319
+ add_report(
320
+ list(
321
+ ui = "flat",
322
+ label = "Diversity Plot",
323
+ contents = list(
324
+ list(
325
+ kind = "descr",
326
+ content = paste(
327
+ div_methods[[case$method]]$name,
328
+ ifelse(
329
+ is.null(case$by) || length(case$by) == 0,
330
+ "",
331
+ paste0(" grouped by ", paste(case$by, collapse = ", "))
332
+ ),
333
+ div_methods[[case$method]]$descr
334
+ )
335
+ ),
336
+ list(
337
+ kind = "image",
338
+ src = outplot
339
+ )
340
+ )
341
+ ),
342
+ list(
343
+ ui = "flat",
344
+ label = "Diversity Table",
345
+ contents = list(
346
+ list(kind = "table", src = outfile, data = list(index_col = 0))
347
+ )
348
+ ),
349
+ ui = "tabs",
350
+ h2 = ifelse(name == "DEFAULT", "#", name),
351
+ h1 = "Sample Diversity using TCR clusters"
352
+ )
246
353
  }
247
354
 
248
355
 
@@ -250,14 +357,20 @@ sample_diversity = function(name) {
250
357
  # main
251
358
  # --------------------------------------------------
252
359
  # Load immunarch data
360
+ log_info("Loading immunarch data ...")
253
361
  immdata = readRDS(immfile)
254
362
 
255
363
  # Cluster size distribution
364
+ log_info("Performing cluster size distribution analysis ...")
256
365
  sapply(names(cluster_size_cases), cluster_size_distribution)
257
366
 
258
367
  # Shared clusters
368
+ log_info("Performing shared clusters analysis ...")
259
369
  sapply(names(shared_clusters_cases), shared_clusters)
260
370
 
261
371
  # Diversity
372
+ log_info("Performing sample diversity analysis ...")
262
373
  sapply(names(sample_diversity_cases), sample_diversity)
374
+
375
+ save_report(joboutdir)
263
376
  }
@@ -3,11 +3,13 @@
3
3
  # python = Sys.which({{envs.python | r}})
4
4
  # Sys.setenv(RETICULATE_PYTHON = python)
5
5
  # library(reticulate)
6
+ source("{{biopipen_dir}}/utils/single_cell.R")
6
7
 
7
8
  library(immunarch)
8
9
  library(dplyr)
9
10
  library(tidyr)
10
11
  library(tibble)
12
+ library(glue)
11
13
 
12
14
  immfile = {{in.immfile | r}}
13
15
  outdir = normalizePath({{job.outdir | r}})
@@ -17,6 +19,7 @@ tool = {{envs.tool | r}}
17
19
  python = {{envs.python | r}}
18
20
  on_multi = {{envs.on_multi | r}}
19
21
  args = {{envs.args | r}}
22
+ prefix = {{envs.prefix | r}}
20
23
 
21
24
  setwd(outdir)
22
25
 
@@ -26,17 +29,13 @@ if (on_multi) {
26
29
  } else {
27
30
  seqdata = immdata$data
28
31
  }
32
+ if (is.null(prefix)) { prefix = immdata$prefix }
33
+ if (is.null(prefix)) { prefix = "" }
29
34
 
30
35
  get_cdr3aa_df = function() {
31
- out = NULL
32
- for (sample in names(immdata$data)) {
33
- tmpdf = immdata$data[[sample]] %>%
34
- select(Barcode, CDR3.aa) %>%
35
- separate_rows(Barcode, sep = ";") %>%
36
- mutate(Barcode = paste0(sample, "_", Barcode))
37
- out = bind_rows(out, tmpdf)
38
- }
39
- out
36
+ expand_immdata(immdata, cell_id = "Barcode") %>%
37
+ mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
38
+ select(Barcode, CDR3.aa)
40
39
  }
41
40
  cdr3aa_df = get_cdr3aa_df()
42
41
 
@@ -1,8 +1,10 @@
1
1
  source("{{biopipen_dir}}/utils/misc.R")
2
+ source("{{biopipen_dir}}/utils/single_cell.R")
2
3
 
3
4
  library(glue)
4
5
  library(dplyr)
5
6
  library(tidyr)
7
+ library(tibble)
6
8
  library(immunarch)
7
9
  library(Seurat)
8
10
  library(ggplot2)
@@ -11,7 +13,9 @@ library(ggprism)
11
13
  immfile <- {{in.immdata | r}}
12
14
  exprfile <- {{in.srtobj | r}}
13
15
  outfile <- {{out.outfile | r}}
16
+ joboutdir <- {{job.outdir | r}}
14
17
  python <- {{envs.python | r}}
18
+ prefix <- {{envs.prefix | r}}
15
19
  within_sample <- {{envs.within_sample | r}}
16
20
  assay <- {{envs.assay | r}}
17
21
  predefined_b <- {{envs.predefined_b | r}}
@@ -27,35 +31,22 @@ if (!dir.exists(tessa_dir)) dir.create(tessa_dir)
27
31
 
28
32
  ### Start preparing input files for TESSA
29
33
  # Prepare input files
30
- print("Preparing TCR input file ...")
31
- immdata <- readRDS(immfile)
32
-
33
- has_VJ <- "V.name" %in% colnames(immdata$data[[1]]) && "J.name" %in% colnames(immdata$data[[1]])
34
- # Merge all samples
35
- tcrdata <- do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
36
- # Clones Proportion CDR3.aa Barcode
37
- # 5 4 0.008583691 CAVRDTGNTPLVF;CASSEYSNQPQHF GTTCGGGCACTTACGA-1;TCTCTAAGTACCAGTT-1
38
- # 6 4 0.008583691 CALTQAAGNKLTF;CASRPEDLRGQPQHF GCTTGAAGTCGGCACT-1;TACTCGCTCCTAAGTG-1
39
- if (has_VJ) {
40
- cldata = immdata$data[[i]][, c("Barcode", "CDR3.aa", "V.name", "J.name")]
41
- } else {
42
- cldata = immdata$data[[i]][, c("Barcode", "CDR3.aa")]
43
- }
44
- # # A tibble: 4 × 5
45
- # Sample Patient Timepoint Tissue
46
- # <chr> <chr> <chr> <chr>
47
- # 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline PB
48
- mdata = as.list(immdata$meta[i, , drop=FALSE])
49
- for (mname in names(mdata)) {
50
- assign(mname, mdata[[mname]])
51
- }
34
+ log_info("Preparing TCR input file ...")
35
+ # If immfile endswith .rds, then it is an immunarch object
36
+ if (endsWith(tolower(immfile), ".rds")) {
37
+ immdata <- readRDS(immfile)
38
+ if (is.null(prefix)) { prefix = immdata$prefix }
39
+ if (is.null(prefix)) { prefix = "" }
40
+ tcrdata <- expand_immdata(immdata) %>%
41
+ mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
42
+ rm(immdata)
43
+ } else {
44
+ tcrdata <- read.table(immfile, sep="\t", header=TRUE, row.names=1) %>%
45
+ rownames_to_column("Barcode")
46
+ }
47
+
48
+ has_VJ <- "V.name" %in% colnames(tcrdata) && "J.name" %in% colnames(tcrdata)
52
49
 
53
- cldata %>%
54
- separate_rows(Barcode, sep=";") %>%
55
- # Just in case there are duplicated barcodes
56
- distinct(Barcode, .keep_all = TRUE) %>%
57
- mutate(Barcode = glue("{{envs.prefix}}{Barcode}"), sample = Sample)
58
- }))
59
50
  if (has_VJ) {
60
51
  tcrdata <- tcrdata %>% dplyr::mutate(
61
52
  v_gene = sub("-\\d+$", "", V.name),
@@ -65,18 +56,18 @@ if (has_VJ) {
65
56
  cdr3 = CDR3.aa,
66
57
  v_gene,
67
58
  j_gene,
68
- sample
59
+ sample = Sample
69
60
  )
70
61
  } else {
71
62
  tcrdata <- tcrdata %>% dplyr::select(
72
63
  contig_id = Barcode,
73
64
  cdr3 = CDR3.aa,
74
- sample
65
+ sample = Sample
75
66
  )
76
67
  }
77
68
 
78
69
 
79
- print("Preparing expression input file ...")
70
+ log_info("Preparing expression input file ...")
80
71
  is_seurat <- endsWith(tolower(exprfile), ".rds")
81
72
  is_gz <- endsWith(tolower(exprfile), ".gz")
82
73
 
@@ -94,31 +85,34 @@ cell_ids <- intersect(tcrdata$contig_id, colnames(expr))
94
85
  unused_tcr_cells <- setdiff(tcrdata$contig_id, cell_ids)
95
86
  unused_expr_cells <- setdiff(colnames(expr), cell_ids)
96
87
  if (length(unused_tcr_cells) > 0) {
97
- warning(glue("{length(unused_tcr_cells)}/{nrow(tcrdata)} TCR cells are not used."), immediate. = TRUE)
88
+ log_warn(glue("{length(unused_tcr_cells)}/{nrow(tcrdata)} TCR cells are not used."))
98
89
  }
99
90
  if (length(unused_expr_cells) > 0) {
100
- warning(glue("{length(unused_expr_cells)}/{ncol(expr)} expression cells are not used."), immediate. = TRUE)
91
+ log_warn(glue("{length(unused_expr_cells)}/{ncol(expr)} expression cells are not used."))
101
92
  }
102
93
  if (length(cell_ids) == 0) {
103
- stop("No common cells between TCR and expression data. Are you using the correct prefix?")
94
+ stop(paste0(
95
+ "No common cells between TCR and expression data. ",
96
+ "Are you using the correct `envs.prefix` here or in `ImmunarchLoading`?"
97
+ ))
104
98
  }
105
99
  tcrdata <- tcrdata[tcrdata$contig_id %in% cell_ids, , drop=FALSE]
106
100
  expr <- as.matrix(expr)[, tcrdata$contig_id, drop=FALSE]
107
101
 
108
102
  # Write input files
109
- print("Writing input files ...")
103
+ log_info("Writing input files ...")
110
104
  write.table(tcrdata, file.path(tessa_dir, "tcrdata.txt"), sep=",", quote=FALSE, row.names=FALSE)
111
105
  write.table(expr, file.path(tessa_dir, "exprdata.txt"), sep=",", quote=FALSE, row.names=TRUE, col.names=TRUE)
112
106
 
113
107
  ### End preparing input files for TESSA
114
108
 
115
109
  ### Start running TESSA
116
- print("Running TESSA ...")
110
+ log_info("Running TESSA ...")
117
111
 
118
112
  # The original TESSA uses a python wrapper to run the encoder and tessa model
119
113
  # here we run those two steps directly here
120
114
 
121
- print("- Running encoder ...")
115
+ log_info("- Running encoder ...")
122
116
  cmd_encoder <- paste(
123
117
  python,
124
118
  file.path(tessa_srcdir, "BriseisEncoder.py"),
@@ -140,14 +134,14 @@ if (has_VJ) {
140
134
  file.path(tessa_dir, "tcr_vj.txt")
141
135
  )
142
136
  }
143
- print(paste("- ", cmd_encoder))
137
+ log_info(paste("- ", cmd_encoder))
144
138
 
145
139
  rc <- system(cmd_encoder)
146
140
  if (rc != 0) {
147
141
  stop("Error: Failed to run encoder.")
148
142
  }
149
143
 
150
- print("- Running TESSA model ...")
144
+ log_info("- Running TESSA model ...")
151
145
  source(file.path(tessa_srcdir, "real_data.R"))
152
146
 
153
147
  tessa <- run_tessa(
@@ -162,7 +156,7 @@ tessa <- run_tessa(
162
156
  )
163
157
 
164
158
  # Save TESSA results
165
- print("Saving TESSA results ...")
159
+ log_info("Saving TESSA results ...")
166
160
  if (is_seurat) {
167
161
  cells <- rownames(sobj@meta.data)
168
162
  sobj@meta.data <- sobj@meta.data %>%
@@ -187,7 +181,7 @@ if (is_seurat) {
187
181
  }
188
182
 
189
183
  # Post analysis
190
- print("Post analysis ...")
184
+ log_info("Post analysis ...")
191
185
  plot_tessa(tessa, result_dir)
192
186
  plot_Tessa_clusters(tessa, result_dir)
193
187
 
@@ -201,3 +195,34 @@ p <- tessa$meta %>%
201
195
  png(file.path(result_dir, "Cluster_size_dist.png"), width=8, height=8, units="in", res=100)
202
196
  print(p)
203
197
  dev.off()
198
+
199
+ add_report(
200
+ list(
201
+ src = file.path(result_dir, "Cluster_size_dist.png"),
202
+ descr = "Histogram of cluster size distribution"
203
+ ),
204
+ list(
205
+ src = file.path(result_dir, "clone_size.png"),
206
+ descr = "Center cluster size vs. non-center cluster size"
207
+ ),
208
+ list(
209
+ src = file.path(result_dir, "exp_TCR_pair_plot.png"),
210
+ descr = "Expression-TCR distance plot"
211
+ ),
212
+ list(
213
+ src = file.path(result_dir, "TCR_dist_density.png"),
214
+ descr = "TCR distance density plot"
215
+ ),
216
+ list(
217
+ src = file.path(result_dir, "TCR_explore.png"),
218
+ descr = "Exploratory plot at the TCR level"
219
+ ),
220
+ list(
221
+ src = file.path(result_dir, "TCR_explore_clusters.png"),
222
+ descr = "TESSA clusters"
223
+ ),
224
+ h1 = "TESSA Results",
225
+ ui = "table_of_images"
226
+ )
227
+
228
+ save_report(joboutdir)
biopipen/utils/misc.R CHANGED
@@ -1,12 +1,13 @@
1
1
  # Misc utilities for R
2
2
  library(logger)
3
+ library(jsonlite)
3
4
 
4
5
  .logger_layout <- layout_glue_generator(
5
6
  format = '{sprintf("%-7s", level)} [{format(time, "%Y-%m-%d %H:%M:%S")}] {msg}'
6
7
  )
7
8
  log_layout(.logger_layout)
8
9
  log_appender(appender_stdout)
9
- log_errors()
10
+ tryCatch(log_errors(), error = function(e) {})
10
11
 
11
12
  .isBQuoted <- function(x) {
12
13
  # Check if x is backtick-quoted
@@ -112,3 +113,97 @@ list_update <- function(x, y) {
112
113
  }
113
114
  x
114
115
  }
116
+
117
+ #’ Biopipen palette
118
+ #’ @param alpha Alpha value
119
+ #’ @return A palette function
120
+ #' @export
121
+ pal_biopipen <- function(alpha = 1) {
122
+ if (alpha > 1L | alpha <= 0L) stop("alpha must be in (0, 1]")
123
+ colors <- c(
124
+ "#ec3f3f", "#009e73", "#008ad8", "#cc79a7",
125
+ "#e69f00", "#50cada", "#f0e442", "#a76ce7",
126
+ "#ff864d", "#45e645", "#3699b5", "#ffdcda",
127
+ "#d55e00", "#778ba6", "#c37b35", "#bc28ff"
128
+ )
129
+ colors <- scales::alpha(colors, alpha)
130
+ function(n) {
131
+ if (n <= length(colors)) {
132
+ colors[1:n]
133
+ } else {
134
+ out_colors <- colors
135
+ out_alpha <- 1.0
136
+ while(length(out_colors) < n) {
137
+ out_alpha <- out_alpha - 0.3
138
+ out_colors <- c(out_colors, scales::alpha(colors, out_alpha))
139
+ }
140
+ out_colors[1:n]
141
+ }
142
+ }
143
+ }
144
+
145
+ scale_color_biopipen <- function(alpha = 1, ...) {
146
+ ggplot2::discrete_scale("colour", "biopipen", pal_biopipen(alpha), ...)
147
+ }
148
+
149
+ scale_colour_biopipen <- scale_color_biopipen
150
+
151
+ scale_fill_biopipen <- function(alpha = 1, ...) {
152
+ ggplot2::discrete_scale("fill", "biopipen", pal_biopipen(alpha), ...)
153
+ }
154
+
155
+ .report <- list(
156
+ # h1 => list(
157
+ # h2 => list(
158
+ # h3#1 => list(ui1 => list(content11, content12)),
159
+ # h3#2 => list(ui2 => list(content21, content22))
160
+ # )
161
+ # )
162
+ )
163
+
164
+ add_report <- function(..., h1, h2 = "#", h3 = "#", ui = "flat") {
165
+ if (is.null(.report[[h1]])) {
166
+ .report[[h1]] <<- list()
167
+ }
168
+ if (is.null(.report[[h1]][[h2]])) {
169
+ .report[[h1]][[h2]] <<- list()
170
+ }
171
+ if (is.null(.report[[h1]][[h2]][[h3]])) {
172
+ .report[[h1]][[h2]][[h3]] <<- list()
173
+ }
174
+ if (is.null(.report[[h1]][[h2]][[h3]][[ui]])) {
175
+ .report[[h1]][[h2]][[h3]][[ui]] <<- list()
176
+ }
177
+ content = list(...)
178
+ for (i in seq_along(content)) {
179
+ .report[[h1]][[h2]][[h3]][[ui]] <<- c(
180
+ .report[[h1]][[h2]][[h3]][[ui]],
181
+ list(content[[i]])
182
+ )
183
+ }
184
+ }
185
+
186
+ save_report <- function(path, clear = TRUE) {
187
+ if (dir.exists(path)) {
188
+ path <- file.path(path, "report.json")
189
+ }
190
+
191
+ writeLines(toJSON(.report, pretty = TRUE, auto_unbox = TRUE), path)
192
+ if (clear) {
193
+ .report <<- list()
194
+ }
195
+ }
196
+
197
+
198
+ # Escape html
199
+ html_escape <- function(text) {
200
+ if (is.null(text)) {
201
+ return("")
202
+ }
203
+ text = gsub("&", "&amp;", text)
204
+ text = gsub("<", "&lt;", text)
205
+ text = gsub(">", "&gt;", text)
206
+ text = gsub("\"", "&quot;", text)
207
+ text = gsub("'", "&#039;", text)
208
+ text
209
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.21.2
3
+ Version: 0.22.1
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang