biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +328 -292
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +481 -215
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +231 -76
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +6 -5
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/ScFGSEA.svelte +0 -16
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
@@ -1,8 +1,7 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(rlang)
4
2
  library(parallel)
5
3
  library(mediation)
4
+ library(biopipen.utils)
6
5
 
7
6
  infile <- {{in.infile | r}}
8
7
  fmlfile <- {{in.fmlfile | r}}
@@ -16,15 +15,16 @@ cases <- {{envs.cases | r}}
16
15
  transpose_input <- {{envs.transpose_input | r}}
17
16
 
18
17
  set.seed(123)
18
+ log <- get_logger()
19
19
 
20
- log_info("Reading input file ...")
20
+ log$info("Reading input file ...")
21
21
  indata <- read.table(infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
22
22
  if (transpose_input) { indata <- t(indata) }
23
23
 
24
- log_info("Reading formula file/cases ...")
24
+ log$info("Reading formula file/cases ...")
25
25
  if (!is.null(fmlfile)) {
26
26
  if (!is.null(cases) && length(cases) > 0) {
27
- log_warn("envs.cases ignored as in.fmlfile is provided")
27
+ log$warn("envs.cases ignored as in.fmlfile is provided")
28
28
  }
29
29
  fmldata <- read.table(fmlfile, header = TRUE, sep = "\t", row.names = NULL)
30
30
  # Case M Y X Cov Model_M Model_Y
@@ -39,14 +39,14 @@ medanalysis <- function(i, total) {
39
39
  casename <- names(cases)[i]
40
40
  case <- cases[[casename]]
41
41
  if (total < 50) {
42
- log_info("- Case: ", casename)
42
+ log$info("- Case: ", casename)
43
43
  } else if (total < 500) {
44
44
  if (i %% 10 == 0) {
45
- log_info("- Processing case {i}/{total} ...")
45
+ log$info("- Processing case {i}/{total} ...")
46
46
  }
47
47
  } else {
48
48
  if (i %% 100 == 0) {
49
- log_info("- Processing case {i}/{total} ...")
49
+ log$info("- Processing case {i}/{total} ...")
50
50
  }
51
51
  }
52
52
  M <- case$M
@@ -1,10 +1,9 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(metap)
4
2
  library(rlang)
5
3
  library(dplyr)
4
+ library(biopipen.utils)
6
5
 
7
- infiles <- {{in.infiles | r}}
6
+ infiles <- {{in.infiles | each: str | r}}
8
7
  outfile <- {{out.outfile | r}}
9
8
  id_cols <- {{envs.id_cols | r}}
10
9
  id_exprs <- {{envs.id_exprs | r}}
@@ -16,11 +15,13 @@ padj <- {{envs.padj | r}}
16
15
 
17
16
  if (method == "fisher") { method = "sumlog" }
18
17
 
18
+ log <- get_logger()
19
+
19
20
  if (length(infiles) == 1 && padj == "none") {
20
- log_info("Only one input file, copying to output ...")
21
+ log$info("Only one input file, copying to output ...")
21
22
  file.copy(infiles, outfile)
22
23
  } else if (length(infiles) == 1) {
23
- log_info("Only one input file, performing p-value adjustment ...")
24
+ log$info("Only one input file, performing p-value adjustment ...")
24
25
  if (is.null(pval_cols)) {
25
26
  stop("Must provide envs.pval_cols")
26
27
  }
@@ -30,7 +31,7 @@ if (length(infiles) == 1 && padj == "none") {
30
31
  }
31
32
  indata$Padj <- p.adjust(indata[, pval_cols], method = padj)
32
33
 
33
- log_info("Writing output ...")
34
+ log$info("Writing output ...")
34
35
  write.table(indata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
35
36
  } else {
36
37
  # Check pval_cols
@@ -68,7 +69,7 @@ if (length(infiles) == 1 && padj == "none") {
68
69
  }
69
70
  }
70
71
 
71
- log_info("Reading and preparing data ...")
72
+ log$info("Reading and preparing data ...")
72
73
  outdata <- NULL
73
74
  for (i in seq_along(infiles)) {
74
75
  infile <- infiles[i]
@@ -89,7 +90,7 @@ if (length(infiles) == 1 && padj == "none") {
89
90
  }
90
91
  }
91
92
 
92
- log_info("Running metap on each row ...")
93
+ log$info("Running metap on each row ...")
93
94
  metaps <- c()
94
95
  ns <- c()
95
96
  pval_columns <- setdiff(colnames(outdata), id_cols)
@@ -119,14 +120,11 @@ if (length(infiles) == 1 && padj == "none") {
119
120
  outdata <- outdata %>% arrange(MetaPval)
120
121
 
121
122
  if (padj != "none") {
122
- log_info("Calculating adjusted p-values ...")
123
+ log$info("Calculating adjusted p-values ...")
123
124
  outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
124
125
 
125
126
  }
126
127
 
127
- log_info("Writing output ...")
128
+ log$info("Writing output ...")
128
129
  write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
129
130
  }
130
-
131
-
132
-
@@ -1,8 +1,7 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(metap)
4
2
  library(rlang)
5
3
  library(dplyr)
4
+ library(biopipen.utils)
6
5
 
7
6
  infile <- {{in.infile | r}}
8
7
  outfile <- {{out.outfile | r}}
@@ -13,6 +12,8 @@ na <- {{envs.na | r}}
13
12
  keep_single <- {{envs.keep_single | r}}
14
13
  padj <- {{envs.padj | r}}
15
14
 
15
+ log <- get_logger()
16
+
16
17
  if (method == "fisher") { method = "sumlog" }
17
18
 
18
19
  # Check pval_cols
@@ -24,7 +25,7 @@ if (length(id_cols) == 1) {
24
25
  id_cols <- trimws(strsplit(id_cols, ",")[[1]])
25
26
  }
26
27
 
27
- log_info("Reading input and performing meta-analysis ...")
28
+ log$info("Reading input and performing meta-analysis ...")
28
29
  outdata <- read.table(
29
30
  infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
30
31
  ) %>%
@@ -64,10 +65,10 @@ outdata$.pvals <- NULL
64
65
  outdata <- outdata %>% arrange(MetaPval)
65
66
 
66
67
  if (padj != "none") {
67
- log_info("Calculating adjusted p-values ...")
68
+ log$info("Calculating adjusted p-values ...")
68
69
  outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
69
70
 
70
71
  }
71
72
 
72
- log_info("Writing output ...")
73
+ log$info("Writing output ...")
73
74
  write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
@@ -1,35 +1,45 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
1
+ library(rlang)
2
2
  library(dplyr)
3
3
  library(tidyr)
4
4
  library(tibble)
5
- library(ggplot2)
6
- library(ggridges)
7
5
  library(glue)
8
6
  library(hash)
9
7
  library(glmnet)
10
8
  library(broom.mixed)
11
9
  library(stringr)
10
+ library(plotthis)
11
+ library(biopipen.utils)
12
12
 
13
- immdatafile = {{in.immdata | quote}}
14
- srtobjfile = {{in.srtobj | r}}
15
- outdir = {{out.outdir | quote}}
16
- joboutdir = {{job.outdir | quote}}
17
- group_name = {{envs.group | r}}
18
- comparison = {{envs.comparison | r}}
19
- prefix = {{envs.prefix | r}}
20
- target = {{envs.target | r}}
21
- subset_cols = {{envs.subset | r}}
13
+ scrfile <- {{in.scrfile | r}}
14
+ outdir <- {{out.outdir | r}}
15
+ joboutdir <- {{job.outdir | r}}
16
+ group_name <- {{envs.group | r}}
17
+ comparison <- {{envs.comparison | r}}
18
+ target <- {{envs.target | r}}
19
+ each_cols <- {{envs.each | r}}
20
+
21
+ log <- get_logger()
22
+ reporter <- get_reporter()
22
23
 
23
24
  if (is.null(group_name) || is.null(comparison)) {
24
25
  stop("envs.group and envs.comparison must be specified")
25
26
  }
26
27
 
27
- if (is.null(target)) {
28
- stop("envs.target must be specified, which should be one of the keys in `envs.comparison`")
28
+ if (length(comparison) != 2) {
29
+ stop("envs.comparison must have exactly two elements or keys, representing the two groups to compare")
30
+ }
31
+
32
+ if (!is.list(comparison)) {
33
+ comparison <- stats::setNames(as.list(comparison), comparison)
34
+ }
35
+
36
+ target <- target %||% names(comparison)[1]
37
+ if (!(target %in% names(comparison))) {
38
+ stop(paste0("Target group '", target, "' not found in the comparison groups."))
29
39
  }
30
40
 
31
- if (is.character(subset_cols) && length(subset_cols) == 1) {
32
- subset_cols = trimws(strsplit(subset_cols, ",")[[1]])
41
+ if (is.character(each_cols) && length(each_cols) == 1) {
42
+ each_cols = trimws(strsplit(each_cols, ",")[[1]])
33
43
  }
34
44
 
35
45
  ### Helpers
@@ -142,103 +152,43 @@ for (i in 1:3){
142
152
  AA_MAPS[[i]] <- create_hashmap(as.character(RF$AA), as.vector(RF[,(i+1),drop=TRUE]))
143
153
  }
144
154
 
145
- # Loading metadata from srtobjfile
146
- log_info("Loading metadata from srtobjfile")
147
- if (is.null(srtobjfile)) {
148
- metadata = NULL
149
- } else {
150
- # Get the extension (lowercase) of srtobjfile, see if it is .rds file
151
- srtobjfile_ext = tolower(tools::file_ext(srtobjfile))
152
- if (srtobjfile_ext != "rds") {
153
- metadata = read.table(
154
- srtobjfile,
155
- sep = "\t",
156
- header = TRUE,
157
- row.names = 1,
158
- stringsAsFactors = FALSE,
159
- check.names = FALSE,
160
- )
161
- } else {
162
- metadata = readRDS(srtobjfile)@meta.data
163
- }
164
- }
165
-
166
- log_info("Loading immdata from immdatafile")
167
- immdata = readRDS(immdatafile)
155
+ log$info("Loading data from input file")
156
+ mdata <- read_obj(scrfile)@meta.data
168
157
 
158
+ if (!group_name %in% colnames(mdata)) {
159
+ stop(paste0("Group name '", group_name, "' not found in the data."))
160
+ }
169
161
 
170
- merge_data = function(sam) {
171
- # Merge the data for one sample from immdata and metadata
172
- out = immdata$data[[sam]]
173
- if ("chain" %in% colnames(out)) {
174
- out = out %>% separate_rows(chain, CDR3.aa, V.name, J.name, sep = ";") %>%
175
- filter(chain == "TRB")
176
- }
177
- out = out %>%
178
- mutate(
179
- Sample = sam,
180
- locus = "TCRB",
181
- sequence = CDR3.aa,
182
- length = nchar(sequence),
183
- vgene = V.name,
184
- jgene = J.name,
185
- ) %>%
186
- select(Sample, Barcode, locus, sequence, length, vgene, jgene) %>%
187
- separate_longer_delim(Barcode, delim = ";") %>%
188
- left_join(immdata$meta, by = "Sample")
189
-
190
- if (is.null(metadata)) {
191
- # No metadata, just return
192
- return (out)
193
- }
162
+ # check if valuess of comparison is in the group_name column
163
+ if (!all(unlist(comparison) %in% as.character(mdata[[group_name]]))) {
164
+ stop(paste0("Some values in comparison are not found in the group_name column: ",
165
+ paste(setdiff(unlist(comparison), mdata[[group_name]]), collapse = ", ")))
166
+ }
194
167
 
195
- # Merge with metadata
196
- sdata = metadata %>% filter(Sample == sam)
197
- if (!is.null(prefix) && nchar(prefix) > 0) {
198
- # Replace the placeholder like {Sample} with the data in other columns
199
- # in the same row
200
- sdata = sdata %>% mutate(.prefix_len = nchar(glue(prefix)))
201
- # Remove the prefix in the rownames of sdata
202
- rownames(sdata) = substring(rownames(sdata), sdata$.prefix_len + 1)
203
- sdata = sdata %>% select(-.prefix_len)
204
- }
205
- sdata = rownames_to_column(sdata, "Barcode")
206
- out = out %>% left_join(sdata, by = "Barcode", suffix = c("", "_seurat"))
207
- out$.Group = NA_character_
208
- for (k in names(comparison)) {
209
- group_mask = out[[group_name]] %in% comparison[[k]]
210
- if (sum(group_mask) == 0) {
211
- stop(
212
- glue("No cells in comparison group {k}. Please check if the group items {comparison[[k]]} exist.")
213
- )
168
+ # add a new column with the keys of comparison, when their values are in the group_name column
169
+ mdata$.Group <- sapply(as.character(mdata[[group_name]]), function(x) {
170
+ for (key in names(comparison)) {
171
+ if (x %in% comparison[[key]]) {
172
+ return(key)
214
173
  }
215
- out$.Group[out[[group_name]] %in% comparison[[k]]] = k
216
- }
217
- if (!is.null(subset_cols)) {
218
- out = out %>% unite(".Subset", all_of(subset_cols), sep = "_", remove = FALSE)
219
174
  }
220
- return (out)
221
- }
222
-
223
- # Expanded and merged with metadata
224
- # Now we are able to select the cells using group and comparison
225
- log_info("Merging data with metadata for each sample")
226
- merged = NULL
227
- for (sam in immdata$meta$Sample) {
228
- log_info("- For sample {sam}")
229
- md = merge_data(sam)
230
- merged = if (is.null(merged)) md else rbind(merged, md)
231
- }
175
+ return(NA)
176
+ })
177
+ mdata <- mdata %>%
178
+ separate(CTaa, into = c(NA, "sequence"), sep = "_", remove = FALSE) %>%
179
+ separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = FALSE) %>%
180
+ separate(vjgene, into = c("vgene", NA, "jgene", NA), sep = "\\.", remove = FALSE) %>%
181
+ mutate(length = nchar(sequence))
232
182
 
233
183
  # Statistics about the cell numbers with groups avaiable in metadata
234
184
  # !!group_name, TotalCells, AvailCells, AvailCellsPct
235
- log_info("Calculating statistics")
236
- if (is.null(subset_cols)) {
237
- stats = merged %>%
185
+ log$info("Calculating statistics")
186
+ if (is.null(each_cols)) {
187
+ stats = mdata %>%
238
188
  # group by group_name
239
189
  group_by(.Group) %>%
240
190
  summarise(
241
- TotalCells = nrow(merged),
191
+ TotalCells = nrow(mdata),
242
192
  CellsPerGroup = n(),
243
193
  AvailCellsPerGroup = sum(length >= CDR3_MINLEN & length <= CDR3_MAXLEN),
244
194
  # Percentage with % in character
@@ -246,14 +196,15 @@ if (is.null(subset_cols)) {
246
196
  .groups = "drop"
247
197
  )
248
198
  } else {
249
- stats = merged %>%
199
+ stats = mdata %>%
200
+ unite(".Subset", all_of(each_cols), sep = "_", remove = FALSE) %>%
250
201
  group_by(.Subset) %>%
251
202
  group_map(function(df, .y) {
252
203
  df %>%
253
204
  group_by(.Group) %>%
254
205
  summarise(
255
206
  .Subset = .y$.Subset[1],
256
- AllCells = nrow(merged),
207
+ AllCells = nrow(mdata),
257
208
  TotalCells = nrow(df),
258
209
  CellsPerGroup = n(),
259
210
  AvailCellsPerGroup = sum(length >= CDR3_MINLEN & length <= CDR3_MAXLEN),
@@ -274,7 +225,7 @@ write.table(
274
225
  row.names = FALSE,
275
226
  )
276
227
 
277
- add_report(
228
+ reporter$add(
278
229
  list(
279
230
  kind = "descr",
280
231
  content = "Statistics about the cells mapped to the comparison groups. Columns:"
@@ -304,20 +255,22 @@ add_report(
304
255
 
305
256
 
306
257
 
307
- log_info("Add amino acid features")
308
- merged = merged %>%
258
+ log$info("Add amino acid features")
259
+ mdata = mdata %>%
309
260
  filter(!is.na(.Group) & length >= CDR3_MINLEN & length <= CDR3_MAXLEN) %>%
310
261
  add_percentAA() %>%
311
262
  add_positionalAA()
312
263
 
313
264
 
314
265
  do_one_subset = function(s) {
315
- log_info(paste("Processing subset", s))
266
+ if (!is.null(s)) {
267
+ log$info(paste("Processing subset", s))
268
+ }
316
269
  if (is.null(s)) {
317
- data = merged
270
+ data = mdata
318
271
  odir = file.path(outdir, "ALL")
319
272
  } else {
320
- data = merged %>% filter(.Subset == s)
273
+ data = mdata %>% filter(.Subset == s)
321
274
  odir = file.path(outdir, slugify(s))
322
275
  }
323
276
  dir.create(odir, recursive = TRUE, showWarnings = FALSE)
@@ -342,6 +295,13 @@ do_one_subset = function(s) {
342
295
  }
343
296
  }
344
297
  y = ifelse(data_fit$.Group == target, 1, 0)
298
+ if (any(table(y) <= 3) || length(table(y)) < 2) {
299
+ if (is.null(s)) {
300
+ log$warn(paste0("Not enough observations for target group '", target, "' with CDR3 length ", len, ". At least 4 observations are required."))
301
+ } else {
302
+ log$warn(paste0("Not enough observations for target group '", target, "' in subset '", s, "' with CDR3 length ", len, ". At least 4 observations are required."))
303
+ }
304
+ }
345
305
  # one multinomial or binomial class has 1 or 0 observations; not allowed
346
306
  if (any(table(y) <= 1)) { next }
347
307
  fit = glmnet(x, y, data=data_fit, alpha=0, lambda=0.01, family="binomial")
@@ -370,26 +330,22 @@ do_one_subset = function(s) {
370
330
  write.table(alldf, file = file.path(odir, "estimates.txt"), sep = "\t", quote = FALSE, row.names = FALSE)
371
331
 
372
332
  # save the plots
373
- gr = alldf %>%
374
- group_by(imgt_pos, feature) |>
333
+ gr <- alldf %>%
334
+ group_by(imgt_pos, feature) %>%
375
335
  summarise(coef = mean(estimate))
376
336
  # Avoid too large values
377
- gr$coef[gr$coef > 1.5] = 1.5
337
+ gr$coef[gr$coef > 1.5] <- 1.5
338
+ gr$coef <- exp(gr$coef) # Exponentiate the coefficients
378
339
 
379
- g = ggplot(gr, aes(imgt_pos, exp(coef), color=feature))
380
- g = g + geom_point() + geom_line(aes(group=feature)) + theme_classic() + geom_hline(yintercept=1)
381
- g = g + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + scale_color_manual(values=c("#eead0c", "#ed6a51", "#02868a"))
382
- g = g + xlab("TCR position") + ylab(paste("Coefficient for", target, "prediction")) + ggtitle(s)
340
+ g <- LinePlot(gr, x = "imgt_pos", y = "coef", group_by = "feature",
341
+ add_line = 1, x_text_angle = 90, xlab = "TCR position",
342
+ ylab = paste("Coefficient for", target, "prediction"), title = s)
383
343
 
384
- png(file.path(odir, "estimated_coefficients.png"), width=1000, height=1000, res=100)
385
- print(g)
386
- dev.off()
344
+ save_plot(g, file.path(odir, "estimated_coefficients"),
345
+ devpars = list(width = 1000, height = 1000, res = 100),
346
+ formats = c("png", "pdf"))
387
347
 
388
- pdf(file.path(odir, "estimated_coefficients.pdf"), width=10, height=10)
389
- print(g)
390
- dev.off()
391
-
392
- add_report(
348
+ reporter$add(
393
349
  list(
394
350
  kind = "descr",
395
351
  content = "Estimated coefficients for each feature and position in the CDR3"
@@ -397,7 +353,7 @@ do_one_subset = function(s) {
397
353
  h1 = ifelse(
398
354
  is.null(s),
399
355
  "Estimated OR (per s.d.)",
400
- paste0(paste(subset_cols, collapse = ", "), " - ", s)
356
+ paste0(paste(each_cols, collapse = ", "), " - ", s)
401
357
  ),
402
358
  h2 = ifelse(
403
359
  is.null(s),
@@ -406,7 +362,7 @@ do_one_subset = function(s) {
406
362
  )
407
363
  )
408
364
 
409
- add_report(
365
+ reporter$add(
410
366
  list(
411
367
  name = "Plot",
412
368
  contents = list(
@@ -429,7 +385,7 @@ do_one_subset = function(s) {
429
385
  h1 = ifelse(
430
386
  is.null(s),
431
387
  "Estimated OR (per s.d.)",
432
- paste0(paste(subset_cols, collapse = ", "), " - ", s)
388
+ paste0(paste(each_cols, collapse = ", "), " - ", s)
433
389
  ),
434
390
  h2 = ifelse(
435
391
  is.null(s),
@@ -443,38 +399,23 @@ do_one_subset = function(s) {
443
399
  data$mid_hydro = sapply(data$midseq, function(x) get_feat_score(x, AA_MAPS[[2]]))
444
400
  data$smid_hydro = scale(data$mid_hydro)[,1]
445
401
 
446
- g = ggplot()
447
- # Give colors for different groups
448
- cols = c("turquoise3", "darkmagenta", "darkorange", "darkgreen", "darkblue", "darkred")
449
- groups = unique(data$.Group)
450
- if (length(groups) > length(cols)) {
451
- cols = c(cols, c("darkcyan", "darkviolet", "darkgoldenrod", "darkolivegreen", "darkslategray", "darkkhaki"))
452
- }
453
- cols = cols[1:length(groups)]
454
- for (i in seq_along(groups)) {
455
- g = g + geom_vline(
456
- xintercept = mean(data$smid_hydro[data$.Group==groups[i]]),
457
- color=cols[i]
458
- )
459
- }
460
- g = g + geom_density_ridges(
461
- aes(x=data$smid_hydro, y=data$.Group, color=data$.Group, fill=data$.Group),
462
- bandwidth=0.5,
463
- alpha=0.4,
464
- show.legend = FALSE
465
- ) + scale_color_manual(values=cols)
466
- g = g + scale_fill_manual(values=cols) + theme_bw(base_size=12)
467
- g = g + xlim(c(-4,4)) + xlab("CDR3bmr hydrophobicity") + ylab("") + coord_flip() + ggtitle(s)
468
-
469
- png(file.path(odir, "distribution.png"), width=1000, height=1000, res=100)
470
- print(g)
471
- dev.off()
472
-
473
- pdf(file.path(odir, "distribution.pdf"), width=10, height=10)
474
- print(g)
475
- dev.off()
476
-
477
- add_report(
402
+ g <- RidgePlot(
403
+ data = data,
404
+ x = "smid_hydro",
405
+ group_by = ".Group",
406
+ xlab = "CDR3bmr hydrophobicity",
407
+ ylab = "",
408
+ add_vline = TRUE,
409
+ alpha = 0.5,
410
+ title = s,
411
+ flip = TRUE
412
+ )
413
+
414
+ save_plot(g, file.path(odir, "distribution"),
415
+ devpars = list(width = 1000, height = 1000, res = 100),
416
+ formats = c("png", "pdf"))
417
+
418
+ reporter$add(
478
419
  list(
479
420
  kind = "table_image",
480
421
  descr = paste0(
@@ -488,7 +429,7 @@ do_one_subset = function(s) {
488
429
  h1 = ifelse(
489
430
  is.null(s),
490
431
  "Hydrophobicity Distribution",
491
- paste0(paste(subset_cols, collapse = ", "), " - ", s)
432
+ paste0(paste(each_cols, collapse = ", "), " - ", s)
492
433
  ),
493
434
  h2 = ifelse(
494
435
  is.null(s),
@@ -499,11 +440,11 @@ do_one_subset = function(s) {
499
440
 
500
441
  }
501
442
 
502
- if (is.null(subset_cols)) {
443
+ if (is.null(each_cols)) {
503
444
  do_one_subset(NULL)
504
445
  } else {
505
- subsets = na.omit(unique(merged$.Subset))
446
+ subsets = na.omit(unique(obj$.Subset))
506
447
  sapply(subsets, do_one_subset)
507
448
  }
508
449
 
509
- save_report(joboutdir)
450
+ reporter$save(joboutdir)
@@ -1,12 +1,13 @@
1
1
  library(rlang)
2
2
  library(glue)
3
+ library(dplyr)
3
4
  library(scplotter)
4
5
  library(biopipen.utils)
5
6
 
6
- screpfile <- {{in.screpfile | quote}}
7
- outdir <- {{out.outdir | quote}}
8
- joboutdir <- {{job.outdir | quote}}
9
- envs <- {{envs | r}}
7
+ screpfile <- {{in.screpfile | r}}
8
+ outdir <- {{out.outdir | r}}
9
+ joboutdir <- {{job.outdir | r}}
10
+ envs <- {{envs | r: todot="-"}}
10
11
  mutaters <- envs$mutaters
11
12
  cases <- envs$cases
12
13
  envs$mutaters <- NULL
@@ -397,7 +398,7 @@ get_plot_descr <- function(viz_type, case) {
397
398
  }
398
399
 
399
400
  log$info("Loading scRepertoire object ...")
400
- screp <- readRDS(screpfile)
401
+ screp <- read_obj(screpfile)
401
402
 
402
403
  log$info("Applying mutaters if any ...")
403
404
  screp <- ScRepMutate(screp, mutaters)
@@ -14,10 +14,10 @@ library(ComplexUpset)
14
14
  theme_set(theme_prism())
15
15
 
16
16
 
17
- immfile <- {{ in.immdata | quote }}
17
+ immfile <- {{ in.immdata | r }}
18
18
  metafile <- {{ in.metafile | r }}
19
- outdir <- {{ out.outdir | quote }}
20
- joboutdir <- {{ job.outdir | quote }}
19
+ outdir <- {{ out.outdir | r }}
20
+ joboutdir <- {{ job.outdir | r }}
21
21
 
22
22
  subject_key <- {{ envs.subject | r }}
23
23
  group_key <- {{ envs.group | r }}
@@ -6,8 +6,8 @@ library(tidyr)
6
6
  library(ggprism)
7
7
 
8
8
 
9
- immfile <- {{ in.immdata | quote }}
10
- outdir <- {{ out.outdir | quote }}
9
+ immfile <- {{ in.immdata | r }}
10
+ outdir <- {{ out.outdir | r }}
11
11
 
12
12
  subject_key <- {{ envs.subject | r }}
13
13
  group_key <- {{ envs.group | r }}
@@ -3,8 +3,8 @@ library(dplyr)
3
3
  library(tidyr)
4
4
  library(stringr)
5
5
 
6
- immfile = {{in.immdata | quote}}
7
- outdir = {{out.outdir | quote}}
6
+ immfile = {{in.immdata | r}}
7
+ outdir = {{out.outdir | r}}
8
8
 
9
9
  immdata = readRDS(immfile)
10
10
 
@@ -6,15 +6,15 @@ library(tidyr)
6
6
  library(tibble)
7
7
  library(immunarch)
8
8
 
9
- immfile = {{in.immdata | quote}}
9
+ immfile = {{in.immdata | r}}
10
10
  {% if in.filterfile %}
11
11
  filters = {{in.filterfile | toml_load | r}}
12
12
  {% else %}
13
13
  filters = {{envs.filters | r}}
14
14
  {% endif %}
15
15
  metacols = {{envs.metacols | r}}
16
- outfile = {{out.outfile | quote}}
17
- groupfile = {{out.groupfile | quote}}
16
+ outfile = {{out.outfile | r}}
17
+ groupfile = {{out.groupfile | r}}
18
18
 
19
19
  immdata0 = readRDS(immfile)
20
20
  groupname = filters$name
@@ -9,11 +9,11 @@ library(tibble)
9
9
  library(glue)
10
10
  library(bracer)
11
11
 
12
- metafile = {{ in.metafile | quote }}
13
- rdsfile = {{ out.rdsfile | quote }}
14
- metatxt = {{ out.metatxt | quote }}
15
- tmpdir = {{ envs.tmpdir | quote }}
16
- mode = {{ envs.mode | quote }}
12
+ metafile = {{ in.metafile | r }}
13
+ rdsfile = {{ out.rdsfile | r }}
14
+ metatxt = {{ out.metatxt | r }}
15
+ tmpdir = {{ envs.tmpdir | r }}
16
+ mode = {{ envs.mode | r }}
17
17
  extracols = {{ envs.extracols | r}}
18
18
  prefix = {{ envs.prefix | r }}
19
19