biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,27 +1,28 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
3
-
4
1
  library(glue)
5
2
  library(dplyr)
6
3
  library(tidyr)
7
4
  library(tibble)
8
- library(immunarch)
9
5
  library(Seurat)
10
- library(ggplot2)
11
- library(ggprism)
6
+ library(biopipen.utils)
12
7
 
13
- immfile <- {{in.immdata | r}}
14
- exprfile <- {{in.srtobj | r}}
8
+ screpdata <- {{in.screpdata | r}}
15
9
  outfile <- {{out.outfile | r}}
16
10
  joboutdir <- {{job.outdir | r}}
17
11
  python <- {{envs.python | r}}
18
- prefix <- {{envs.prefix | r}}
19
12
  within_sample <- {{envs.within_sample | r}}
20
13
  assay <- {{envs.assay | r}}
21
14
  predefined_b <- {{envs.predefined_b | r}}
22
15
  max_iter <- {{envs.max_iter | int}}
23
16
  save_tessa <- {{envs.save_tessa | r}}
24
- tessa_srcdir <- "{{biopipen_dir}}/scripts/tcr/TESSA_source"
17
+
18
+ log <- get_logger()
19
+ reporter <- get_reporter()
20
+
21
+ # In case this script is running in the cloud and <biopipen_dir> can not be found in there
22
+ # In stead, we use the python command, which is associated with the cloud environment,
23
+ # to get the biopipen directory
24
+ biopipen_dir <- get_biopipen_dir(python)
25
+ tessa_srcdir <- file.path(biopipen_dir, "scripts", "tcr", "TESSA_source")
25
26
 
26
27
  outdir <- dirname(outfile)
27
28
  result_dir <- file.path(outdir, "result")
@@ -31,88 +32,49 @@ if (!dir.exists(tessa_dir)) dir.create(tessa_dir)
31
32
 
32
33
  ### Start preparing input files for TESSA
33
34
  # Prepare input files
34
- log_info("Preparing TCR input file ...")
35
- # If immfile endswith .rds, then it is an immunarch object
36
- if (endsWith(tolower(immfile), ".rds")) {
37
- immdata <- readRDS(immfile)
38
- if (is.null(prefix)) { prefix = immdata$prefix }
39
- if (is.null(prefix)) { prefix = "" }
40
- tcrdata <- expand_immdata(immdata) %>%
41
- mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
42
- rm(immdata)
43
- } else {
44
- tcrdata <- read.table(immfile, sep="\t", header=TRUE, row.names=1) %>%
45
- rownames_to_column("Barcode")
46
- }
47
-
48
- has_VJ <- "V.name" %in% colnames(tcrdata) && "J.name" %in% colnames(tcrdata)
49
-
50
- if (has_VJ) {
51
- tcrdata <- tcrdata %>% dplyr::mutate(
52
- v_gene = sub("-\\d+$", "", V.name),
53
- j_gene = sub("-\\d+$", "", J.name)
54
- ) %>% dplyr::select(
55
- contig_id = Barcode,
56
- cdr3 = CDR3.aa,
57
- v_gene,
58
- j_gene,
59
- sample = Sample
60
- )
61
- } else {
62
- tcrdata <- tcrdata %>% dplyr::select(
63
- contig_id = Barcode,
64
- cdr3 = CDR3.aa,
65
- sample = Sample
66
- )
67
- }
68
-
69
-
70
- log_info("Preparing expression input file ...")
71
- is_seurat <- endsWith(tolower(exprfile), ".rds")
72
- is_gz <- endsWith(tolower(exprfile), ".gz")
73
-
74
- if (is_seurat) {
75
- sobj <- readRDS(exprfile)
76
- expr <- GetAssayData(sobj, layer = "data")
77
- } else if (is_gz) {
78
- expr <- read.table(gzfile(exprfile), sep="\t", header=TRUE, row.names=1)
79
- } else {
80
- expr <- read.table(exprfile, sep="\t", header=TRUE, row.names=1)
81
- }
35
+ log$info("Reading input file ...")
36
+ sobj <- read_obj(screpdata)
82
37
 
38
+ log$info("Preparing TCR input file ...")
39
+ # If immfile endswith .rds, then it is an immunarch object
40
+ tcrdata <- sobj@meta.data %>%
41
+ rownames_to_column("contig_id") %>%
42
+ filter(!is.na(CTaa) & !is.na(CTgene)) %>%
43
+ separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove = FALSE) %>%
44
+ separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = FALSE) %>%
45
+ separate(vjgene, into = c("v_gene", NA, "j_gene", NA), sep = "\\.", remove = TRUE) %>%
46
+ mutate(v_gene = sub("-\\d+$", "", v_gene), j_gene = sub("-\\d+$", "", j_gene))
47
+
48
+ log$info("Preparing expression input file ...")
49
+ expr <- GetAssayData(sobj, layer = "data")
83
50
  cell_ids <- intersect(tcrdata$contig_id, colnames(expr))
84
51
  # Warning about unused cells
85
- unused_tcr_cells <- setdiff(tcrdata$contig_id, cell_ids)
86
52
  unused_expr_cells <- setdiff(colnames(expr), cell_ids)
87
- if (length(unused_tcr_cells) > 0) {
88
- log_warn(glue("{length(unused_tcr_cells)}/{nrow(tcrdata)} TCR cells are not used."))
89
- }
90
53
  if (length(unused_expr_cells) > 0) {
91
- log_warn(glue("{length(unused_expr_cells)}/{ncol(expr)} expression cells are not used."))
54
+ log$warn(glue("{length(unused_expr_cells)}/{ncol(expr)} cells without TCR data are not used."))
92
55
  }
93
56
  if (length(cell_ids) == 0) {
94
- stop(paste0(
95
- "No common cells between TCR and expression data. ",
96
- "Are you using the correct `envs.prefix` here or in `ImmunarchLoading`?"
97
- ))
57
+ stop(
58
+ "No TCR data found in the Seurat object. ",
59
+ "Please use scRepertiore::combineExpression() to generate the Seurat object with TCR data."
60
+ )
98
61
  }
99
- tcrdata <- tcrdata[tcrdata$contig_id %in% cell_ids, , drop=FALSE]
100
62
  expr <- as.matrix(expr)[, tcrdata$contig_id, drop=FALSE]
101
63
 
102
64
  # Write input files
103
- log_info("Writing input files ...")
65
+ log$info("Writing input files ...")
104
66
  write.table(tcrdata, file.path(tessa_dir, "tcrdata.txt"), sep=",", quote=FALSE, row.names=FALSE)
105
67
  write.table(expr, file.path(tessa_dir, "exprdata.txt"), sep=",", quote=FALSE, row.names=TRUE, col.names=TRUE)
106
68
 
107
69
  ### End preparing input files for TESSA
108
70
 
109
71
  ### Start running TESSA
110
- log_info("Running TESSA ...")
72
+ log$info("Running TESSA ...")
111
73
 
112
74
  # The original TESSA uses a python wrapper to run the encoder and tessa model
113
75
  # here we run those two steps directly here
114
76
 
115
- log_info("- Running encoder ...")
77
+ log$info("- Running encoder ...")
116
78
  cmd_encoder <- paste(
117
79
  python,
118
80
  file.path(tessa_srcdir, "BriseisEncoder.py"),
@@ -127,23 +89,22 @@ cmd_encoder <- paste(
127
89
  "-output_log",
128
90
  file.path(tessa_dir, "tcr_encoder.log")
129
91
  )
130
- if (has_VJ) {
131
- cmd_encoder <- paste(
132
- cmd_encoder,
133
- "-output_VJ",
134
- file.path(tessa_dir, "tcr_vj.txt")
135
- )
136
- }
92
+ cmd_encoder <- paste(
93
+ cmd_encoder,
94
+ "-output_VJ",
95
+ file.path(tessa_dir, "tcr_vj.txt")
96
+ )
97
+
137
98
  print("Running:")
138
99
  print(cmd_encoder)
139
- log_debug(paste("- ", cmd_encoder))
100
+ log$debug(paste("- ", cmd_encoder))
140
101
 
141
102
  rc <- system(cmd_encoder)
142
103
  if (rc != 0) {
143
104
  stop("Error: Failed to run encoder.")
144
105
  }
145
106
 
146
- log_info("- Running TESSA model ...")
107
+ log$info("- Running TESSA model ...")
147
108
  source(file.path(tessa_srcdir, "real_data.R"))
148
109
 
149
110
  tessa <- run_tessa(
@@ -158,51 +119,40 @@ tessa <- run_tessa(
158
119
  )
159
120
 
160
121
  # Save TESSA results
161
- log_info("Saving TESSA results ...")
162
- if (is_seurat) {
163
- cells <- rownames(sobj@meta.data)
164
- sobj@meta.data <- sobj@meta.data %>%
165
- mutate(
166
- TESSA_Cluster = tessa$meta[
167
- match(cells, tessa$meta$barcode),
168
- "cluster_number"
169
- ]
170
- ) %>%
171
- add_count(TESSA_Cluster, name = "TESSA_Cluster_Size")
172
- rownames(sobj@meta.data) <- cells
173
-
174
- if (save_tessa) {
175
- sobj@misc$tessa <- tessa
176
- }
177
- saveRDS(sobj, outfile)
178
- } else {
179
- out <- tessa$meta %>%
180
- dplyr::select(barcode, TESSA_Cluster = cluster_number) %>%
181
- add_count(TESSA_Cluster, name = "TESSA_Cluster_Size")
182
- write.table(out, outfile, sep="\t", quote=FALSE, row.names=FALSE, col.names=TRUE)
122
+ log$info("Saving TESSA results ...")
123
+ cells <- rownames(sobj@meta.data)
124
+ sobj@meta.data <- sobj@meta.data %>%
125
+ mutate(
126
+ TESSA_Cluster = tessa$meta[
127
+ match(cells, tessa$meta$barcode),
128
+ "cluster_number"
129
+ ]
130
+ ) %>%
131
+ add_count(TESSA_Cluster, name = "TESSA_Cluster_Size")
132
+ rownames(sobj@meta.data) <- cells
133
+
134
+ if (save_tessa) {
135
+ sobj@misc$tessa <- tessa
183
136
  }
137
+ save_obj(sobj, outfile)
184
138
 
185
139
  # Post analysis
186
- log_info("Post analysis ...")
140
+ log$info("Post analysis ...")
187
141
  plot_tessa(tessa, result_dir)
188
142
  plot_Tessa_clusters(tessa, result_dir)
189
143
 
190
144
  p <- tessa$meta %>%
191
145
  dplyr::select(barcode, TESSA_Cluster = cluster_number) %>%
192
146
  add_count(TESSA_Cluster, name = "TESSA_Cluster_Size") %>%
193
- ggplot(aes(x = TESSA_Cluster_Size)) +
194
- geom_histogram(binwidth = 1) +
195
- theme_prism()
196
-
197
- png(file.path(result_dir, "Cluster_size_dist.png"), width=8, height=8, units="in", res=100)
198
- print(p)
199
- dev.off()
147
+ plotthis::Histogram(x = "TESSA_Cluster_Size")
200
148
 
201
- pdf(file.path(result_dir, "Cluster_size_dist.pdf"), width=8, height=8)
202
- print(p)
203
- dev.off()
149
+ res <- 100
150
+ height <- attr(p, "height") * res
151
+ width <- attr(p, "width") * res
152
+ prefix <- file.path(result_dir, "Cluster_size_dist")
153
+ save_plot(p, prefix, devpars = list(width = width, height = height, res = res))
204
154
 
205
- add_report(
155
+ reporter$add(
206
156
  list(
207
157
  src = file.path(result_dir, "Cluster_size_dist.png"),
208
158
  descr = "Histogram of cluster size distribution",
@@ -232,4 +182,4 @@ add_report(
232
182
  ui = "table_of_images"
233
183
  )
234
184
 
235
- save_report(joboutdir)
185
+ reporter$save(joboutdir)
@@ -1,9 +1,9 @@
1
1
 
2
- infile = {{in.infile | quote}}
3
- outprefix = {{out.outfile | prefix | replace: ".fancyvj.wt", "" | quote}}
4
- vdjtools = {{ envs.vdjtools | quote }}
5
- vdjtools_patch = {{ envs.vdjtools_patch | quote }}
6
- joboutdir = {{job.outdir | quote}}
2
+ infile = {{in.infile | r}}
3
+ outprefix = {{out.outfile | prefix | replace: ".fancyvj.wt", "" | r}}
4
+ vdjtools = {{ envs.vdjtools | r }}
5
+ vdjtools_patch = {{ envs.vdjtools_patch | r }}
6
+ joboutdir = {{job.outdir | r}}
7
7
 
8
8
  command = sprintf(
9
9
  "cd %s && bash %s %s PlotFancyVJUsage --plot-type png %s %s",
@@ -1,11 +1,7 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
-
4
- library(ggprism)
5
1
  library(rjson)
2
+ library(rlang)
6
3
  library(dplyr)
7
-
8
- theme_set(theme_prism(axis_text_angle = 90))
4
+ library(plotthis)
9
5
 
10
6
  indirs = {{in.indirs | r}}
11
7
  outdir = {{out.outdir | r}}
@@ -39,13 +35,21 @@ get_devpars = function() {
39
35
 
40
36
  plot_summary = function(col) {
41
37
  outfile = file.path(outdir, paste0(col, ".png"))
42
- plotGG(
38
+ p <- plotthis::BarPlot(
43
39
  summaries,
44
- "col",
45
- list(mapping = aes_string(x = "Sample", y = bQuote(col), fill = "Sample")),
46
- devpars = get_devpars(),
47
- outfile = outfile
40
+ x = "Sample",
41
+ y = col,
42
+ x_text_angle = 90
43
+ )
44
+ devpars <- get_devpars()
45
+ png(
46
+ filename = outfile,
47
+ width = devpars$width,
48
+ height = devpars$height,
49
+ res = devpars$res
48
50
  )
51
+ print(p)
52
+ dev.off()
49
53
  }
50
54
 
51
55
  main = function() {
@@ -27,74 +27,77 @@ def format_placeholder(**kwargs) -> Callable[[type], type]:
27
27
  """
28
28
 
29
29
  def decorator(klass: type) -> type:
30
+ if not klass.__doc__:
31
+ return klass
32
+
30
33
  klass.__doc__ = klass.__doc__ % kwargs
31
34
  return klass
32
35
 
33
36
  return decorator
34
37
 
35
38
 
36
- MUTATE_HELPERS_CLONESIZE = """
37
- There are also also 4 helper functions, `expanded`, `collapsed`, `emerged` and `vanished`,
38
- which can be used to identify the expanded/collpased/emerged/vanished groups (i.e. TCR clones).
39
- See also <https://pwwang.github.io/immunopipe/configurations/#mutater-helpers>.
40
- For example, you can use
41
- `{"Patient1_Tumor_Collapsed_Clones": "expanded(., Source, 'Tumor', subset = Patent == 'Patient1', uniq = FALSE)"}`
42
- to create a new column in metadata named `Patient1_Tumor_Collapsed_Clones`
43
- with the collapsed clones in the tumor sample (compared to the normal sample) of patient 1.
44
- The values in this columns for other clones will be `NA`.
45
- Those functions take following arguments:
46
- * `df`: The metadata data frame. You can use the `.` to refer to it.
47
- * `group.by`: The column name in metadata to group the cells.
48
- * `idents`: The first group or both groups of cells to compare (value in `group.by` column). If only the first group is given, the rest of the cells (with non-NA in `group.by` column) will be used as the second group.
49
- * `subset`: An expression to subset the cells, will be passed to `dplyr::filter()`. Default is `TRUE` (no filtering).
50
- * `each`: A column name (without quotes) in metadata to split the cells.
51
- Each comparison will be done for each value in this column (typically each patient or subject).
52
- * `id`: The column name in metadata for the group ids (i.e. `CDR3.aa`).
53
- * `compare`: Either a (numeric) column name (i.e. `Clones`) in metadata to compare between groups, or `.n` to compare the number of cells in each group.
54
- If numeric column is given, the values should be the same for all cells in the same group.
55
- This will not be checked (only the first value is used).
56
- It is helpful to use `Clones` to use the raw clone size from TCR data, in case the cells are not completely mapped to RNA data.
57
- Also if you have `subset` set or `NA`s in `group.by` column, you should use `.n` to compare the number of cells in each group.
58
- * `uniq`: Whether to return unique ids or not. Default is `TRUE`. If `FALSE`, you can mutate the meta data frame with the returned ids. For example, `df |> mutate(expanded = expanded(...))`.
59
- * `debug`: Return the data frame with intermediate columns instead of the ids. Default is `FALSE`.
60
- * `order`: The expression passed to `dplyr::arrange()` to order intermediate dataframe and get the ids in order accordingly.
61
- The intermediate dataframe includes the following columns:
62
- * `<id>`: The ids of clones (i.e. `CDR3.aa`).
63
- * `<each>`: The values in `each` column.
64
- * `ident_1`: The size of clones in the first group.
65
- * `ident_2`: The size of clones in the second group.
66
- * `.diff`: The difference between the sizes of clones in the first and second groups.
67
- * `.sum`: The sum of the sizes of clones in the first and second groups.
68
- * `.predicate`: Showing whether the clone is expanded/collapsed/emerged/vanished.
69
- * `include_emerged`: Whether to include the emerged group for `expanded` (only works for `expanded`). Default is `FALSE`.
70
- * `include_vanished`: Whether to include the vanished group for `collapsed` (only works for `collapsed`). Default is `FALSE`.
39
+ # MUTATE_HELPERS_CLONESIZE = """
40
+ # There are also also 4 helper functions, `expanded`, `collapsed`, `emerged` and `vanished`,
41
+ # which can be used to identify the expanded/collpased/emerged/vanished groups (i.e. TCR clones).
42
+ # See also <https://pwwang.github.io/immunopipe/configurations/#mutater-helpers>.
43
+ # For example, you can use
44
+ # `{"Patient1_Tumor_Collapsed_Clones": "expanded(., Source, 'Tumor', subset = Patent == 'Patient1', uniq = FALSE)"}`
45
+ # to create a new column in metadata named `Patient1_Tumor_Collapsed_Clones`
46
+ # with the collapsed clones in the tumor sample (compared to the normal sample) of patient 1.
47
+ # The values in this columns for other clones will be `NA`.
48
+ # Those functions take following arguments:
49
+ # * `df`: The metadata data frame. You can use the `.` to refer to it.
50
+ # * `group.by`: The column name in metadata to group the cells.
51
+ # * `idents`: The first group or both groups of cells to compare (value in `group.by` column). If only the first group is given, the rest of the cells (with non-NA in `group.by` column) will be used as the second group.
52
+ # * `subset`: An expression to subset the cells, will be passed to `dplyr::filter()`. Default is `TRUE` (no filtering).
53
+ # * `each`: A column name (without quotes) in metadata to split the cells.
54
+ # Each comparison will be done for each value in this column (typically each patient or subject).
55
+ # * `id`: The column name in metadata for the group ids (i.e. `CDR3.aa`).
56
+ # * `compare`: Either a (numeric) column name (i.e. `Clones`) in metadata to compare between groups, or `.n` to compare the number of cells in each group.
57
+ # If numeric column is given, the values should be the same for all cells in the same group.
58
+ # This will not be checked (only the first value is used).
59
+ # It is helpful to use `Clones` to use the raw clone size from TCR data, in case the cells are not completely mapped to RNA data.
60
+ # Also if you have `subset` set or `NA`s in `group.by` column, you should use `.n` to compare the number of cells in each group.
61
+ # * `uniq`: Whether to return unique ids or not. Default is `TRUE`. If `FALSE`, you can mutate the meta data frame with the returned ids. For example, `df |> mutate(expanded = expanded(...))`.
62
+ # * `debug`: Return the data frame with intermediate columns instead of the ids. Default is `FALSE`.
63
+ # * `order`: The expression passed to `dplyr::arrange()` to order intermediate dataframe and get the ids in order accordingly.
64
+ # The intermediate dataframe includes the following columns:
65
+ # * `<id>`: The ids of clones (i.e. `CDR3.aa`).
66
+ # * `<each>`: The values in `each` column.
67
+ # * `ident_1`: The size of clones in the first group.
68
+ # * `ident_2`: The size of clones in the second group.
69
+ # * `.diff`: The difference between the sizes of clones in the first and second groups.
70
+ # * `.sum`: The sum of the sizes of clones in the first and second groups.
71
+ # * `.predicate`: Showing whether the clone is expanded/collapsed/emerged/vanished.
72
+ # * `include_emerged`: Whether to include the emerged group for `expanded` (only works for `expanded`). Default is `FALSE`.
73
+ # * `include_vanished`: Whether to include the vanished group for `collapsed` (only works for `collapsed`). Default is `FALSE`.
71
74
 
72
- You can also use `top()` to get the top clones (i.e. the clones with the largest size) in each group.
73
- For example, you can use
74
- `{"Patient1_Top10_Clones": "top(subset = Patent == 'Patient1', uniq = FALSE)"}`
75
- to create a new column in metadata named `Patient1_Top10_Clones`.
76
- The values in this columns for other clones will be `NA`.
77
- This function takes following arguments:
78
- * `df`: The metadata data frame. You can use the `.` to refer to it.
79
- * `id`: The column name in metadata for the group ids (i.e. `CDR3.aa`).
80
- * `n`: The number of top clones to return. Default is `10`.
81
- If n < 1, it will be treated as the percentage of the size of the group.
82
- Specify `0` to get all clones.
83
- * `compare`: Either a (numeric) column name (i.e. `Clones`) in metadata to compare between groups, or `.n` to compare the number of cells in each group.
84
- If numeric column is given, the values should be the same for all cells in the same group.
85
- This will not be checked (only the first value is used).
86
- It is helpful to use `Clones` to use the raw clone size from TCR data, in case the cells are not completely mapped to RNA data.
87
- Also if you have `subset` set or `NA`s in `group.by` column, you should use `.n` to compare the number of cells in each group.
88
- * `subset`: An expression to subset the cells, will be passed to `dplyr::filter()`. Default is `TRUE` (no filtering).
89
- * `each`: A column name (without quotes) in metadata to split the cells.
90
- Each comparison will be done for each value in this column (typically each patient or subject).
91
- * `uniq`: Whether to return unique ids or not. Default is `TRUE`. If `FALSE`, you can mutate the meta data frame with the returned ids. For example, `df |> mutate(expanded = expanded(...))`.
92
- * `debug`: Return the data frame with intermediate columns instead of the ids. Default is `FALSE`.
93
- * `with_ties`: Whether to include ties (i.e. clones with the same size as the last clone) or not. Default is `FALSE`.
94
- """
75
+ # You can also use `top()` to get the top clones (i.e. the clones with the largest size) in each group.
76
+ # For example, you can use
77
+ # `{"Patient1_Top10_Clones": "top(subset = Patent == 'Patient1', uniq = FALSE)"}`
78
+ # to create a new column in metadata named `Patient1_Top10_Clones`.
79
+ # The values in this columns for other clones will be `NA`.
80
+ # This function takes following arguments:
81
+ # * `df`: The metadata data frame. You can use the `.` to refer to it.
82
+ # * `id`: The column name in metadata for the group ids (i.e. `CDR3.aa`).
83
+ # * `n`: The number of top clones to return. Default is `10`.
84
+ # If n < 1, it will be treated as the percentage of the size of the group.
85
+ # Specify `0` to get all clones.
86
+ # * `compare`: Either a (numeric) column name (i.e. `Clones`) in metadata to compare between groups, or `.n` to compare the number of cells in each group.
87
+ # If numeric column is given, the values should be the same for all cells in the same group.
88
+ # This will not be checked (only the first value is used).
89
+ # It is helpful to use `Clones` to use the raw clone size from TCR data, in case the cells are not completely mapped to RNA data.
90
+ # Also if you have `subset` set or `NA`s in `group.by` column, you should use `.n` to compare the number of cells in each group.
91
+ # * `subset`: An expression to subset the cells, will be passed to `dplyr::filter()`. Default is `TRUE` (no filtering).
92
+ # * `each`: A column name (without quotes) in metadata to split the cells.
93
+ # Each comparison will be done for each value in this column (typically each patient or subject).
94
+ # * `uniq`: Whether to return unique ids or not. Default is `TRUE`. If `FALSE`, you can mutate the meta data frame with the returned ids. For example, `df |> mutate(expanded = expanded(...))`.
95
+ # * `debug`: Return the data frame with intermediate columns instead of the ids. Default is `FALSE`.
96
+ # * `with_ties`: Whether to include ties (i.e. clones with the same size as the last clone) or not. Default is `FALSE`.
97
+ # """
95
98
 
96
- ENVS_SECTION_EACH = """
97
- The `section` is used to collect cases and put the results under the same directory and the same section in report.
98
- When `each` for a case is specified, the `section` will be ignored and case name will be used as `section`.
99
- The cases will be the expanded values in `each` column. When `prefix_each` is True, the column name specified by `each` will be prefixed to each value as directory name and expanded case name.
100
- """
99
+ # ENVS_SECTION_EACH = """
100
+ # The `section` is used to collect cases and put the results under the same directory and the same section in report.
101
+ # When `each` for a case is specified, the `section` will be ignored and case name will be used as `section`.
102
+ # The cases will be the expanded values in `each` column. When `prefix_each` is True, the column name specified by `each` will be prefixed to each value as directory name and expanded case name.
103
+ # """
@@ -0,0 +1,177 @@
1
+ from __future__ import annotations
2
+ from typing import Sequence
3
+ from os import PathLike
4
+ from pathlib import Path
5
+
6
+ """An implementation of reporter in python
7
+ "https://pwwang.github.io/biopipen.utils.R/reference/Reporter.html
8
+
9
+ to generate a json file for pipen-report to build a report for a process.
10
+ """
11
+
12
+ import json
13
+
14
+
15
+ class Reporter:
16
+
17
+ def __init__(self):
18
+ self.report = {}
19
+
20
+ def add(
21
+ self,
22
+ *args,
23
+ h1: str,
24
+ h2: str = "#",
25
+ h3: str = "#",
26
+ ui: str = "flat",
27
+ ) -> None:
28
+ """Add a content to the report
29
+
30
+ Args:
31
+ *args: The content of the report
32
+ h1 (str): The first level header
33
+ h2 (str): The second level header
34
+ h3 (str): The third level header
35
+ ui (str): The user interface of the report
36
+ """
37
+
38
+ self.report.setdefault(h1, {})
39
+ self.report[h1].setdefault(h2, {})
40
+ self.report[h1][h2].setdefault(h3, {})
41
+ self.report[h1][h2][h3][ui] = []
42
+
43
+ for arg in args:
44
+ self.report[h1][h2][h3][ui].append(arg)
45
+
46
+ def add2(
47
+ self,
48
+ *args,
49
+ hs: Sequence[str],
50
+ hs2: Sequence[str] = (),
51
+ ui: str = "flat",
52
+ collapse: str = ": ",
53
+ ) -> None:
54
+ """Add a content to the report
55
+
56
+ Args:
57
+ *args: The content of the report
58
+ hs: The headings of the case
59
+ hs2: The headings that must be shown.
60
+ When there are more items in `hs`, they will be concatenated.
61
+ For example, if `hs = c("Section1", "Case1")`, and `hs2 = c("A", "B")`,
62
+ then headings will be `h1 = "Section1: Case1"` and `h2 = "A"` and
63
+ `h3 = "B"`.
64
+ ui: The user interface of the report
65
+ collapse: The separator to concatenate the headings
66
+ """
67
+ if len(hs2) > 2:
68
+ raise ValueError("hs2 must have 2 or less items")
69
+
70
+ if len(hs2) == 2:
71
+ h1 = collapse.join(hs)
72
+ h2 = hs2[0]
73
+ h3 = hs2[1]
74
+ elif len(hs2) == 1:
75
+ h1 = hs[0]
76
+ hs = hs[1:]
77
+ if hs:
78
+ h2 = collapse.join(hs)
79
+ h3 = hs2[0]
80
+ else:
81
+ h2 = hs2[0]
82
+ h3 = "#"
83
+ else:
84
+ h1 = hs[0]
85
+ hs = hs[1:]
86
+ if hs:
87
+ h2 = hs[0]
88
+ hs = hs[1:]
89
+ else:
90
+ h2 = "#"
91
+
92
+ if hs:
93
+ h3 = collapse.join(hs)
94
+ else:
95
+ h3 = "#"
96
+
97
+ self.add(*args, h1=h1, h2=h2, h3=h3, ui=ui)
98
+
99
+ def image(
100
+ self,
101
+ prefix: str,
102
+ more_formats: str | Sequence[str],
103
+ save_code: bool,
104
+ kind: str = "image",
105
+ **kwargs,
106
+ ) -> dict:
107
+ """Generate a report for an image to be added.
108
+
109
+ Args:
110
+ prefix: The prefix of the image.
111
+ more_formats: More formats of the image available.
112
+ save_code: Whether to save the code to reproduce the plot.
113
+ kind: The kind of the report, default is "image".
114
+ **kwargs: Other arguments to add to the report.
115
+
116
+ Returns:
117
+ dict: The structured report for the image
118
+
119
+ Examples:
120
+ >>> reporter = Reporter()
121
+ >>> reporter.add(
122
+ >>> {
123
+ >>> "name": "Image 1",
124
+ >>> "contents": [
125
+ >>> reporter.image("/path/to/image1", "pdf", save_code=True)
126
+ >>> ]
127
+ >>> },
128
+ >>> h1="Images",
129
+ >>> h2="Image 1",
130
+ >>> )
131
+ """
132
+ out = {
133
+ "kind": kind,
134
+ "src": f"{prefix}.png",
135
+ **kwargs,
136
+ }
137
+
138
+ if more_formats or save_code:
139
+ out["download"] = []
140
+
141
+ if more_formats:
142
+ for mf in more_formats:
143
+ out["download"].append(f"{prefix}.{mf}")
144
+
145
+ if save_code:
146
+ out["download"].append(
147
+ {
148
+ "src": f"{prefix}.code.zip",
149
+ "tip": "Download the code to reproduce the plot",
150
+ "icon": "Code",
151
+ }
152
+ )
153
+
154
+ return out
155
+
156
+ def clear(self):
157
+ """Clear the report"""
158
+ self.report = {}
159
+
160
+ def save(self, path: str | PathLike, clear: bool = True) -> None:
161
+ """Save the report to a file
162
+
163
+ Args:
164
+ path: The path to save the report
165
+ If the path is a directory, the report will be saved as `report.json`
166
+ in the directory. Otherwise, the report will be saved to the file.
167
+ clear: Whether to clear the report after saving.
168
+ """
169
+ path = Path(path)
170
+ if path.is_dir():
171
+ path = path / "report.json"
172
+
173
+ with open(path, "w") as f:
174
+ json.dump(self.report, f, indent=2)
175
+
176
+ if clear:
177
+ self.clear()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: biopipen
3
- Version: 0.33.1
3
+ Version: 0.34.0
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -17,6 +17,7 @@ Provides-Extra: runinfo
17
17
  Requires-Dist: datar[pandas] (>=0.15.8,<0.16.0)
18
18
  Requires-Dist: pipen-board[report] (>=0.17,<0.18)
19
19
  Requires-Dist: pipen-cli-run (>=0.15,<0.16)
20
+ Requires-Dist: pipen-deprecated (>=0.0,<0.1)
20
21
  Requires-Dist: pipen-filters (>=0.15,<0.16)
21
22
  Requires-Dist: pipen-poplog (>=0.3,<0.4)
22
23
  Requires-Dist: pipen-runinfo (>=0.9,<0.10) ; extra == "runinfo"