biopipen 0.21.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (58) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +142 -0
  3. biopipen/ns/scrna.py +19 -1
  4. biopipen/ns/tcr.py +27 -0
  5. biopipen/reports/delim/SampleInfo.svelte +2 -22
  6. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  7. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  8. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  9. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  10. biopipen/reports/scrna/ScFGSEA.svelte +4 -23
  11. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
  12. biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
  13. biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
  14. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
  15. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
  16. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
  17. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
  18. biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
  19. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  20. biopipen/reports/tcr/Immunarch.svelte +4 -155
  21. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  22. biopipen/reports/tcr/TESSA.svelte +11 -28
  23. biopipen/scripts/delim/SampleInfo.R +41 -7
  24. biopipen/scripts/scrna/CellsDistribution.R +121 -16
  25. biopipen/scripts/scrna/MarkersFinder.R +245 -100
  26. biopipen/scripts/scrna/MetaMarkers.R +163 -82
  27. biopipen/scripts/scrna/RadarPlots.R +163 -110
  28. biopipen/scripts/scrna/ScFGSEA.R +51 -11
  29. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
  30. biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
  31. biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
  32. biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
  33. biopipen/scripts/scrna/SeuratClustering.R +73 -26
  34. biopipen/scripts/scrna/SeuratPreparing.R +93 -19
  35. biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
  36. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
  37. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
  38. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
  39. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
  40. biopipen/scripts/tcr/CDR3AAPhyschem.R +122 -9
  41. biopipen/scripts/tcr/CloneResidency.R +114 -34
  42. biopipen/scripts/tcr/Immunarch-basic.R +26 -6
  43. biopipen/scripts/tcr/Immunarch-clonality.R +22 -2
  44. biopipen/scripts/tcr/Immunarch-diversity.R +132 -22
  45. biopipen/scripts/tcr/Immunarch-geneusage.R +33 -4
  46. biopipen/scripts/tcr/Immunarch-kmer.R +57 -7
  47. biopipen/scripts/tcr/Immunarch-overlap.R +72 -3
  48. biopipen/scripts/tcr/Immunarch-spectratyping.R +28 -5
  49. biopipen/scripts/tcr/Immunarch-tracking.R +33 -4
  50. biopipen/scripts/tcr/Immunarch-vjjunc.R +118 -0
  51. biopipen/scripts/tcr/Immunarch.R +17 -0
  52. biopipen/scripts/tcr/TCRClusterStats.R +124 -11
  53. biopipen/scripts/tcr/TESSA.R +43 -11
  54. biopipen/utils/misc.R +96 -1
  55. {biopipen-0.21.1.dist-info → biopipen-0.22.0.dist-info}/METADATA +1 -1
  56. {biopipen-0.21.1.dist-info → biopipen-0.22.0.dist-info}/RECORD +58 -57
  57. {biopipen-0.21.1.dist-info → biopipen-0.22.0.dist-info}/WHEEL +0 -0
  58. {biopipen-0.21.1.dist-info → biopipen-0.22.0.dist-info}/entry_points.txt +0 -0
@@ -1,167 +1,16 @@
1
1
  {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
2
  <script>
3
- import { Image, DataTable } from "$libs";
4
- import { Tabs, Tab, TabContent, Accordion, AccordionItem } from "$ccs";
3
+ import { Image, DataTable, Descr } from "$libs";
4
+ import { Tabs, Tab, TabContent } from "$ccs";
5
5
  </script>
6
6
 
7
7
  {%- macro report_job(job, h=1) -%}
8
-
9
- <h{{h}}>Exploratory analysis</h{{h}}>
10
-
11
- <h{{h+1}}>CDR3 length distribution</h{{h+1}}>
12
-
13
- {% assign lenpngs = job.out.outdir | glob: "len", "*.png" %}
14
- {{ table_of_images(lenpngs) }}
15
-
16
- <h{{h+1}}>Clonotype volume (# clonotypes)</h{{h+1}}>
17
-
18
- {% assign volpngs = job.out.outdir | glob: "volume", "*.png" %}
19
- {{ table_of_images(volpngs) }}
20
-
21
- <h{{h+1}}>Clonotype abundances</h{{h+1}}>
22
-
23
- {% assign cntpngs = job.out.outdir | glob: "count", "*.png" %}
24
- {{ table_of_images(cntpngs) }}
25
-
26
- <h{{h}}>Clonality</h{{h}}>
27
-
28
- <h{{h+1}}>Top clones</h{{h+1}}>
29
-
30
- {% assign tcpngs = job.out.outdir | glob: "top_clones", "*.png" %}
31
- {{ table_of_images(tcpngs) }}
32
-
33
- <h{{h+1}}>Rare clones</h{{h+1}}>
34
-
35
- {% assign rcpngs = job.out.outdir | glob: "rare_clones", "*.png" %}
36
- {{ table_of_images(rcpngs) }}
37
-
38
- <h{{h+1}}>Clonal space homeostasis</h{{h+1}}>
39
-
40
- <p>The proportion of the repertoire occupied by the clones of a given size</p>
41
-
42
- {% assign hcpngs = job.out.outdir | glob: "homeo_clones", "*.png" %}
43
- {{ table_of_images(hcpngs) }}
44
-
45
- <h{{h}}>Repertoire overlaps</h{{h}}>
46
-
47
- {% if job.index == 0 %}
48
- <Accordion>
49
- <AccordionItem title="Overlapping methods">
50
- <p>
51
- Repertoire overlap is the most common approach to measure repertoire similarity.
52
- Immunarch provides several indices:
53
- </p>
54
- <p>
55
- - number of public clonotypes (.method = "public") - a classic measure of overlap similarity.
56
- </p>
57
- <p>
58
- - overlap coefficient (.method = "overlap") - a normalised measure of overlap
59
- similarity. It is defined as the size of the intersection divided by the smaller of the size of the two sets.
60
- </p>
61
- <p>
62
- - Jaccard index (.method = "jaccard") - it measures similarity between finite
63
- sample sets, and is defined as the size of the intersection divided by the size of the union of the sample sets.
64
- </p>
65
- <p>
66
- - Tversky index (.method = "tversky") - an asymmetric similarity measure on sets
67
- that compares a variant to a prototype. If using default arguments, it’s similar to Dice’s coefficient.
68
- </p>
69
- <p>
70
- - cosine similarity (.method = "cosine") - a measure of similarity between two non-zero vectors
71
- </p>
72
- <p>
73
- - Morisita’s overlap index (.method = "morisita") - a statistical measure of dispersion of individuals in a population.
74
- It is used to compare overlap among samples.
75
- </p>
76
- <p>
77
- - incremental overlap - overlaps of the N most abundant clonotypes with incrementally growing N
78
- (.method = "inc+METHOD", e.g., "inc+public" or "inc+morisita").
79
- </p>
80
- </AccordionItem>
81
- </Accordion>
82
- {% endif %}
83
-
84
- {% for ovdir in job.out.outdir | glob: "overlap", "*" | sort %}
85
- {% set ovname = ovdir | basename %}
86
- <h{{h+1}}>{{ovname}}</h{{h+1}}>
87
- {% assign ovpngs = ovdir | glob: "*.png" | sort %}
88
- {{ table_of_images(ovpngs) }}
89
- {% endfor %}
90
-
91
- <h{{h}}>Gene usage</h{{h}}>
92
- {% for gu_dir in job.out.outdir | glob: "gene_usage", "*" | sort %}
93
- {% set gu_name = gu_dir | basename %}
94
- {% if gu_name != "DEFAULT" %}
95
- <h{{h+1}}>{{gu_name}}</h{{h+1}}>
96
- {% endif %}
97
- {% assign gupngs = gu_dir | glob: "*.png" | sort %}
98
- {{ table_of_images(gupngs) }}
99
- {% endfor %}
100
-
101
- <h{{h}}>Spectratyping</h{{h}}>
102
- {% for spect_sam_dir in job.out.outdir | glob: "spectratyping", "*" | sort %}
103
- <h{{h+1}}>{{ spect_sam_dir | basename }}</h{{h+1}}>
104
- {% assign spectpngs = spect_sam_dir | glob: "*.png" | sort %}
105
- {{ table_of_images(spectpngs) }}
106
- {% endfor %}
107
-
108
- <h{{h}}>Diversity estimation</h{{h}}>
109
- {% assign div_met_dirs = job.out.outdir | glob: "diversity", "*" | sort %}
110
- {% for dm_dir in div_met_dirs %}
111
- <h{{h+1}}>{{dm_dir | basename}}</h{{h+1}}>
112
- {% if dm_dir | glob: "diversity.test.*.txt" %}
113
- {% assign dm_test_file = dm_dir | glob0: "diversity.test.*.txt" %}
114
- {% assign dm_test_method = dm_test_file | stem | replace: "diversity.test.", "" %}
115
- <Tabs>
116
- <Tab label="Plot" />
117
- <Tab label="Test: {{dm_test_method}}" />
118
- <div slot="content">
119
- <TabContent>
120
- <Image src={{ dm_dir | joinpaths: "diversity.png" | quote }} />
121
- </TabContent>
122
- <TabContent>
123
- <DataTable src={{ dm_test_file | quote }} data={ {{ dm_test_file | datatable: sep="\t" }} } />
124
- </TabContent>
125
- </div>
126
- </Tabs>
127
- {% else %}
128
- <Image src={{ dm_dir | joinpaths: "diversity.png" | quote }} />
129
- {% endif %}
130
- {% endfor %}
131
-
132
- {% if job.out.outdir | glob: "rarefraction", "*" %}
133
- <h{{h}}>Rarefaction analysis</h{{h}}>
134
- {% for rfdir in job.out.outdir | glob: "rarefraction", "*" | sort %}
135
- {% assign rfname = rfdir | basename %}
136
- {% if rfname != "DEFAULT" %}
137
- <h{{h+1}}>{{rfname}}</h{{h+1}}>
138
- {% endif %}
139
- {% assign rfpngs = rfdir | glob: "*.png" | sort %}
140
- {{ table_of_images(rfpngs) }}
141
- {% endfor %}
142
- {% endif %}
143
-
144
- {% if job.out.outdir | glob: "tracking", "*.png" %}
145
- <h{{h}}>Tracking of clonotypes</h{{h}}>
146
- {% assign trackpngs = job.out.outdir | glob: "tracking", "*.png" | sort %}
147
- {{ table_of_images(trackpngs) }}
148
- {% endif %}
149
-
150
- <h{{h}}>Kmer and sequence motif analysis</h{{h}}>
151
- {% for kmerdir in job.out.outdir | glob: "kmer", "*" | sort %}
152
- {% assign kmercase = kmerdir | basename %}
153
- {% if kmercase != "DEFAULT" %}
154
- <h{{h+1}}>{{kmercase}}</h{{h+1}}>
155
- {% endif %}
156
- {% assign kmerpngs = kmerdir | glob: "*.png" | sort %}
157
- {{ table_of_images(kmerpngs) }}
158
- {% endfor %}
159
-
8
+ {{ job | render_job: h=h }}
160
9
  {%- endmacro -%}
161
10
 
162
11
 
163
12
  {%- macro head_job(job) -%}
164
- <h1>{{job.out.outdir | stem | replace: ".immunarch", ""}}</h1>
13
+ <h1>{{job.out.outdir | stem | replace: ".immunarch", ""}}</h1>
165
14
  {%- endmacro -%}
166
15
 
167
16
  {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,57 +1,15 @@
1
1
  {% from "utils/misc.liq" import report_jobs -%}
2
2
  <script>
3
- import { Image, DataTable } from "$libs";
3
+ import { Image, DataTable, Descr } from "$libs";
4
4
  import { Tabs, Tab, TabContent } from "$ccs";
5
5
  </script>
6
6
 
7
7
  {%- macro report_job(job, h=1) -%}
8
- <h{{h}}>TCR Cluster size distribution</h{{h}}>
9
-
10
- {% for casedir in job.out.outdir | glob: "ClusterSizeDistribution", "*" %}
11
- {% set casename = casedir | basename %}
12
- {% if casename != "DEFAULT" %}
13
- <h{{h+1}}>{{casename}}</h{{h+1}}>
14
- {% endif %}
15
- <Image src={{casedir | joinpaths: "cluster_size_distribution.png" | quote}} />
16
- {% endfor %}
17
-
18
- <h{{h}}>Shared TCR clusters among samples</h{{h}}>
19
-
20
- {% for casedir in job.out.outdir | glob: "SharedClusters", "*" %}
21
- {% set casename = casedir | basename %}
22
- {% if casename != "DEFAULT" %}
23
- <h{{h+1}}>{{casename}}</h{{h+1}}>
24
- {% endif %}
25
- <Image src={{casedir | joinpaths: "shared_clusters.png" | quote}} />
26
- {% endfor %}
27
-
28
-
29
- <h{{h}}>Sample diversity using TCR clusters</h{{h}}>
30
-
31
- {% for casedir in job.out.outdir | glob: "SampleDiversity", "*" %}
32
- {% set casename = casedir | basename %}
33
- {% if casename != "DEFAULT" %}
34
- <h{{h+1}}>{{casename}}</h{{h+1}}>
35
- {% endif %}
36
-
37
- <Tabs>
38
- <Tab label="Plot" />
39
- <Tab label="Table" />
40
- <svelte:fragment slot="content">
41
- <TabContent>
42
- <Image src={{casedir | joinpaths: "diversity.png" | quote}} />
43
- </TabContent>
44
- <TabContent>
45
- <DataTable src={{casedir | joinpaths: "diversity.txt" | quote}}
46
- data={ {{ casedir | joinpaths: "diversity.txt" | datatable: sep="\t", index_col=0 }} } />
47
- </TabContent>
48
- </svelte:fragment>
49
- </Tabs>
50
- {% endfor %}
8
+ {{ job | render_job: h=h }}
51
9
  {%- endmacro -%}
52
10
 
53
11
  {%- macro head_job(job) -%}
54
- <h1>{{job.in.immfile | stem | replace: ".immunarch", ""}}</h1>
12
+ <h1>{{job.in.immfile | stem | replace: ".immunarch", ""}}</h1>
55
13
  {%- endmacro -%}
56
14
 
57
15
  {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,39 +1,22 @@
1
1
  {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
2
  <script>
3
- import { Image, DataTable } from "$libs";
4
- import { Tile } from "$ccs";
3
+ import { Image, DataTable, Descr } from "$libs";
4
+ import { UnorderedList, ListItem } from "$ccs";
5
5
  </script>
6
6
 
7
- <Tile>
7
+ <Descr>
8
+ <h1>Introduction</h1>
8
9
  <p><a href="https://github.com/jcao89757/TESSA" target="_blank">Tessa</a> is a Bayesian model to integrate T cell receptor (TCR) sequence profiling with transcriptomes of T cells. Enabled by the recently developed single cell sequencing techniques, which provide both TCR sequences and RNA sequences of each T cell concurrently, Tessa maps the functional landscape of the TCR repertoire, and generates insights into understanding human immune response to diseases. As the first part of tessa, BriseisEncoder is employed prior to the Bayesian algorithm to capture the TCR sequence features and create numerical embeddings. We showed that the reconstructed Atchley Factor matrices and CDR3 sequences, generated through the numerical embeddings, are highly similar to their original counterparts. The CDR3 peptide sequences are constructed via a RandomForest model applied on the reconstructed Atchley Factor matrices.</p>
9
-
10
+ <p></p>
10
11
  <p>For more information, please refer to the following papers:</p>
11
- <ul>
12
- <li>- <a href="https://www.nature.com/articles/s41592-020-01020-3" target="_blank">Mapping the Functional Landscape of TCR Repertoire</a>, Zhang, Z., Xiong, D., Wang, X. et al. 2021.</li>
13
- <li>- <a href="https://www.nature.com/articles/s42256-021-00383-2" target="_blank">Deep learning-based prediction of the T cell receptor–antigen binding specificity</a>, Lu, T., Zhang, Z., Zhu, J. et al. 2021.</li>
14
- </ul>
15
- </Tile>
16
- <p>&nbsp;</p>
12
+ <UnorderedList>
13
+ <ListItem><a href="https://www.nature.com/articles/s41592-020-01020-3" target="_blank">Mapping the Functional Landscape of TCR Repertoire</a>, Zhang, Z., Xiong, D., Wang, X. et al. 2021.</ListItem>
14
+ <ListItem><a href="https://www.nature.com/articles/s42256-021-00383-2" target="_blank">Deep learning-based prediction of the T cell receptor–antigen binding specificity</a>, Lu, T., Zhang, Z., Zhu, J. et al. 2021.</ListItem>
15
+ </UnorderedList>
16
+ </Descr>
17
17
 
18
18
  {%- macro report_job(job, h=1) -%}
19
- {{ table_of_images(
20
- [
21
- joinpaths(job.outdir, "result", "Cluster_size_dist.png"),
22
- joinpaths(job.outdir, "result", "clone_size.png"),
23
- joinpaths(job.outdir, "result", "exp_TCR_pair_plot.png"),
24
- joinpaths(job.outdir, "result", "TCR_dist_density.png"),
25
- joinpaths(job.outdir, "result", "TCR_explore.png"),
26
- joinpaths(job.outdir, "result", "TCR_explore_clusters.png"),
27
- ],
28
- [
29
- "TESSA cluster size distribution",
30
- "Cluster center size vs. non-center cluster size",
31
- "Expression-TCR distance plot",
32
- "Density of TCR distances",
33
- "Exploratory plot at the TCR level",
34
- "TESSA clusters",
35
- ],
36
- ) }}
19
+ {{ job | render_job: h=h }}
37
20
  {%- endmacro -%}
38
21
 
39
22
  {%- macro head_job(job) -%}
@@ -4,7 +4,6 @@ library(rlang)
4
4
  library(dplyr)
5
5
  library(ggplot2)
6
6
  library(ggprism)
7
- library(ggsci)
8
7
  library(ggrepel)
9
8
 
10
9
  infile <- {{in.infile | r}}
@@ -14,6 +13,13 @@ mutaters <- {{envs.mutaters | r}}
14
13
  save_mutated <- {{envs.save_mutated | r}}
15
14
  defaults <- {{envs.defaults | r}}
16
15
  stats <- {{envs.stats | r}}
16
+ exclude_cols <- {{envs.exclude_cols | r}}
17
+
18
+ if (is.null(exclude_cols)) {
19
+ exclude_cols <- c()
20
+ } else {
21
+ exclude_cols <- trimws(unlist(strsplit(exclude_cols, ",")))
22
+ }
17
23
 
18
24
  outdir <- dirname(outfile)
19
25
  indata <- read.delim(infile, sep = sep, header = TRUE, row.names = NULL)
@@ -37,6 +43,20 @@ write.table(
37
43
  col.names = TRUE,
38
44
  quote = FALSE
39
45
  )
46
+ add_report(
47
+ list(
48
+ kind = "descr",
49
+ content = "The samples used in the analysis. Each row is a sample, and columns are the meta information about the sample. This is literally the input sample information file, but the paths to the scRNA-seq and scTCR-seq data are hidden.",
50
+ once = TRUE
51
+ ),
52
+ list(
53
+ kind = "table",
54
+ pageSize = 50,
55
+ data = list(file = outfile, sep = sep, excluded = exclude_cols),
56
+ src = FALSE
57
+ ),
58
+ h1 = "Sample Information"
59
+ )
40
60
 
41
61
  theme_set(theme_prism())
42
62
  for (name in names(stats)) {
@@ -93,14 +113,14 @@ for (name in names(stats)) {
93
113
  if (stat$plot == "boxplot" || stat$plot == "box") {
94
114
  p <- ggplot(data, aes(x=!!group, y=!!sym(stat$on), fill=!!group)) +
95
115
  geom_boxplot(position = "dodge") +
96
- scale_fill_ucscgb(alpha = .8) +
116
+ scale_fill_biopipen() +
97
117
  xlab("")
98
118
  } else if (stat$plot == "violin" ||
99
119
  stat$plot == "violinplot" ||
100
120
  stat$plot == "vlnplot") {
101
121
  p <- ggplot(data, aes(x = !!group, y = !!sym(stat$on), fill=!!group)) +
102
122
  geom_violin(position = "dodge") +
103
- scale_fill_ucscgb(alpha = .8) +
123
+ scale_fill_biopipen() +
104
124
  xlab("")
105
125
  } else if (
106
126
  (grepl("violin", stat$plot) || grepl("vln", stat$plot)) &&
@@ -109,12 +129,12 @@ for (name in names(stats)) {
109
129
  p <- ggplot(data, aes(x = !!group, y = !!sym(stat$on), fill = !!group)) +
110
130
  geom_violin(position = "dodge") +
111
131
  geom_boxplot(width = 0.1, position = position_dodge(0.9), fill="white") +
112
- scale_fill_ucscgb(alpha = .8) +
132
+ scale_fill_biopipen() +
113
133
  xlab("")
114
134
  } else if (stat$plot == "histogram" || stat$plot == "hist") {
115
135
  p <- ggplot(data, aes(x = !!sym(stat$on), fill = !!group)) +
116
136
  geom_histogram(bins = 10, position = "dodge", alpha = 0.8, color = "white") +
117
- scale_fill_ucscgb(alpha = .8)
137
+ scale_fill_biopipen()
118
138
  } else if (stat$plot == "pie" || stat$plot == "piechart") {
119
139
  if (is.null(stat$each)) {
120
140
  data <- data %>% distinct(!!group, .keep_all = TRUE)
@@ -137,7 +157,7 @@ for (name in names(stats)) {
137
157
  fill="#EEEEEE",
138
158
  size=4
139
159
  ) +
140
- scale_fill_ucscgb(alpha = .7, name = group) +
160
+ scale_fill_biopipen(name = group) +
141
161
  ggtitle(paste0("# ", stat$on))
142
162
  } else if (stat$plot == "bar" || stat$plot == "barplot") {
143
163
  if (is.null(stat$each)) {
@@ -149,7 +169,7 @@ for (name in names(stats)) {
149
169
  data,
150
170
  aes(x = !!group, y = !!sym(count_on), fill = !!group)) +
151
171
  geom_bar(stat = "identity") +
152
- scale_fill_ucscgb(alpha = .8) +
172
+ scale_fill_biopipen() +
153
173
  ylab(paste0("# ", stat$on))
154
174
  } else {
155
175
  stop("Unknown plot type: ", stat$plot)
@@ -159,4 +179,18 @@ for (name in names(stats)) {
159
179
  }
160
180
  print(p)
161
181
  dev.off()
182
+
183
+ by_desc <- ifelse(is.null(stat$by), "", paste0(" by ", stat$by))
184
+ descr <- ifelse(
185
+ is_continuous,
186
+ paste0("The distribution of ", stat$on, by_desc),
187
+ paste0("The number of ", stat$on, by_desc)
188
+ )
189
+ add_report(
190
+ list(kind = "table_image", src = plotfile, name = name, descr = descr),
191
+ h1 = "Statistics",
192
+ ui = "table_of_images:2"
193
+ )
162
194
  }
195
+
196
+ save_report(outdir)
@@ -5,12 +5,13 @@ library(rlang)
5
5
  library(tidyr)
6
6
  library(dplyr)
7
7
  library(ggplot2)
8
- library(ggsci)
9
8
  library(ggVennDiagram)
10
9
  library(UpSetR)
10
+ library(slugify)
11
11
 
12
12
  srtfile <- {{in.srtobj | r}} # nolint
13
13
  outdir <- {{out.outdir | r}} # nolint
14
+ joboutdir <- {{job.outdir | r}} # nolint
14
15
  mutaters <- {{envs.mutaters | r}} # nolint
15
16
  group_by <- {{envs.group_by | r}} # nolint
16
17
  group_order <- {{envs.group_order | r}} # nolint
@@ -19,6 +20,7 @@ cells_order <- {{envs.cells_order | r}} # nolint
19
20
  cells_orderby <- {{envs.cells_orderby | r}} # nolint
20
21
  cells_n <- {{envs.cells_n | r}} # nolint
21
22
  subset <- {{envs.subset | r}} # nolint
23
+ descr <- {{envs.descr | r}} # nolint
22
24
  devpars <- {{envs.devpars | r}} # nolint
23
25
  each <- {{envs.each | r}} # nolint
24
26
  section <- {{envs.section | r}} # nolint
@@ -27,11 +29,11 @@ cases <- {{envs.cases | r}} # nolint
27
29
 
28
30
  if (is.null(overlap)) { overlap = c() }
29
31
  overlaps <- list()
30
- print("- Loading seurat object ...")
32
+ log_info("- Loading seurat object ...")
31
33
  srtobj <- readRDS(srtfile)
32
34
 
33
35
  if (!is.null(mutaters) && length(mutaters) > 0) {
34
- print("- Mutating seurat object ...")
36
+ log_info("- Mutating seurat object ...")
35
37
  srtobj@meta.data <- srtobj@meta.data %>%
36
38
  mutate(!!!lapply(mutaters, parse_expr))
37
39
  }
@@ -41,6 +43,7 @@ if (!is.factor(all_clusters)) {
41
43
  all_clusters = factor(all_clusters, levels = sort(unique(all_clusters)))
42
44
  }
43
45
 
46
+ single_section <- TRUE
44
47
  expand_cases <- function() {
45
48
  # fill up cases with missing parameters
46
49
  if (is.null(cases) || length(cases) == 0) {
@@ -55,7 +58,8 @@ expand_cases <- function() {
55
58
  devpars = devpars,
56
59
  each = each,
57
60
  section = section,
58
- subset = subset
61
+ subset = subset,
62
+ descr = descr
59
63
  )
60
64
  )
61
65
  } else {
@@ -72,7 +76,8 @@ expand_cases <- function() {
72
76
  devpars = devpars,
73
77
  each = each,
74
78
  section = section,
75
- subset = subset
79
+ subset = subset,
80
+ descr = descr
76
81
  )
77
82
  case$devpars <- list_setdefault(case$devpars, devpars)
78
83
  filled_cases[[name]] <- case
@@ -80,12 +85,15 @@ expand_cases <- function() {
80
85
  }
81
86
 
82
87
  outcases <- list()
88
+ sections <- c()
83
89
  # expand each
84
90
  for (name in names(filled_cases)) {
85
91
  case <- filled_cases[[name]]
86
92
  if (is.null(case$each) || nchar(case$each) == 0) {
93
+ sections <- c(sections, case$section)
87
94
  outcases[[paste0(case$section, ":", name)]] <- case
88
95
  } else {
96
+ sections <- c(sections, case$each)
89
97
  eachs <- srtobj@meta.data %>% pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
90
98
  for (ea in eachs) {
91
99
  by <- make.names(paste0(".", name, "_", case$each,"_", ea))
@@ -101,25 +109,46 @@ expand_cases <- function() {
101
109
  }
102
110
  }
103
111
  }
112
+ single_section <<- length(unique(sections)) == 1
104
113
  outcases
105
114
  }
106
115
 
116
+ casename_info <- function(casename, create = FALSE) {
117
+ sec_case_names <- strsplit(casename, ":")[[1]]
118
+ cname <- paste(sec_case_names[-1], collapse = ":")
119
+
120
+ out <- list(
121
+ casename = casename,
122
+ section = sec_case_names[1],
123
+ case = cname,
124
+ section_slug = slugify(sec_case_names[1], tolower = FALSE),
125
+ case_slug = slugify(cname, tolower = FALSE)
126
+ )
127
+ out$sec_dir <- file.path(outdir, out$section_slug)
128
+ if (create) {
129
+ dir.create(out$sec_dir, showWarnings = FALSE, recursive = TRUE)
130
+ }
131
+ out
132
+ }
133
+
107
134
  do_case <- function(name, case) {
108
- print(paste("- Running for case:", name))
135
+ log_info(paste("- Running for case:", name))
109
136
  if (is.null(case$group_by) || nchar(case$group_by) == 0) {
110
137
  stop(paste0("`group_by` must be specified for case", name))
111
138
  }
112
139
  if (is.null(case$cells_by) || nchar(case$cells_by) == 0) {
113
140
  stop(paste0("`cells_by` must be specified for case", name))
114
141
  }
142
+ info <- casename_info(name, create = TRUE)
115
143
  cells_by <- trimws(strsplit(case$cells_by, ",")[[1]])
116
144
 
117
145
  sec_case_names <- strsplit(name, ":")[[1]]
118
146
  sec_dir <- file.path(outdir, sec_case_names[1])
119
147
  casename <- paste(sec_case_names[-1], collapse = ":")
120
148
  dir.create(sec_dir, showWarnings = FALSE, recursive = TRUE)
121
- outfile <- file.path(sec_dir, paste0("case-", casename, ".png"))
122
- txtfile <- file.path(sec_dir, paste0("case-", casename, ".txt"))
149
+
150
+ outfile <- file.path(info$sec_dir, paste0("case-", info$case_slug, ".png"))
151
+ txtfile <- file.path(info$sec_dir, paste0("case-", info$case_slug, ".txt"))
123
152
 
124
153
  # subset the seurat object
125
154
  meta <- srtobj@meta.data
@@ -148,11 +177,11 @@ do_case <- function(name, case) {
148
177
  meta <- meta1
149
178
  }
150
179
 
151
- if (sec_case_names[1] %in% overlap) {
152
- if (is.null(overlaps[[sec_case_names[1]]])) {
153
- overlaps[[sec_case_names[1]]] <<- list()
180
+ if (info$section %in% overlap) {
181
+ if (is.null(overlaps[[info$section]])) {
182
+ overlaps[[info$section]] <<- list()
154
183
  }
155
- overlaps[[sec_case_names[1]]][[casename]] <<- meta %>% pull(case$cells_by) %>% unique()
184
+ overlaps[[info$section]][[info$case]] <<- meta %>% pull(case$cells_by) %>% unique()
156
185
  }
157
186
 
158
187
  # add sizes
@@ -197,7 +226,14 @@ do_case <- function(name, case) {
197
226
  }
198
227
 
199
228
  write.table(
200
- meta,
229
+ meta %>% select(
230
+ !!sym(cells_by),
231
+ !!sym(case$group_by),
232
+ CloneSize,
233
+ CloneGroupSize,
234
+ CloneClusterSize,
235
+ CloneGroupClusterSize,
236
+ ),
201
237
  txtfile,
202
238
  sep = "\t",
203
239
  row.names = TRUE,
@@ -226,7 +262,7 @@ do_case <- function(name, case) {
226
262
  geom_col(width=.01, position="fill", color = "#888888") +
227
263
  geom_bar(stat = "identity", position = position_fill(reverse = TRUE)) +
228
264
  coord_polar("y", start = 0) +
229
- scale_fill_ucscgb(name = "Cluster", alpha = 1, limits = levels(all_clusters)) +
265
+ scale_fill_biopipen(name = "Cluster", limits = levels(all_clusters)) +
230
266
  theme_void() +
231
267
  theme(
232
268
  plot.margin = unit(c(1,1,1,1), "cm"),
@@ -238,16 +274,63 @@ do_case <- function(name, case) {
238
274
  png(outfile, res = devpars$res, width = devpars$width, height = devpars$height)
239
275
  print(p)
240
276
  dev.off()
277
+
278
+ add_report(
279
+ list(
280
+ kind = "descr",
281
+ content = ifelse(
282
+ is.null(case$descr) || nchar(case$descr) == 0,
283
+ paste0(
284
+ "Distribution for cells in ",
285
+ "<code>", html_escape(cells_by), "</code>",
286
+ " for ",
287
+ "<code>", html_escape(case$group_by), "</code>"
288
+ ),
289
+ case$descr
290
+ )
291
+ ),
292
+ h1 = ifelse(
293
+ info$section == "DEFAULT",
294
+ info$case,
295
+ ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
296
+ ),
297
+ h2 = ifelse(single_section, "#", info$case)
298
+ )
299
+
300
+ add_report(
301
+ list(
302
+ name = "Distribution Plot",
303
+ contents = list(list(
304
+ kind = "image",
305
+ src = outfile
306
+ ))
307
+ ),
308
+ list(
309
+ name = "Distribution Table",
310
+ contents = list(list(
311
+ kind = "table",
312
+ data = list(nrows = 100),
313
+ src = txtfile
314
+ ))
315
+ ),
316
+ h1 = ifelse(
317
+ info$section == "DEFAULT",
318
+ info$case,
319
+ ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
320
+ ),
321
+ h2 = ifelse(single_section, "#", info$case),
322
+ ui = "tabs"
323
+ )
241
324
  }
242
325
 
243
326
  do_overlap <- function(section) {
244
- print(paste("- Running overlaps for section:", section))
327
+ log_info(paste("- Running overlaps for section:", section))
245
328
  overlap_cases <- overlaps[[section]]
246
329
  if (length(overlap_cases) < 2) {
247
330
  stop(paste0("Not enough cases for overlap for section: ", section))
248
331
  }
249
332
 
250
- sec_dir <- file.path(outdir, section)
333
+ sec_dir <- file.path(outdir, slugify(section, tolower = FALSE))
251
334
  venn_plot <- file.path(sec_dir, "venn.png")
252
335
  venn_p <- ggVennDiagram(overlap_cases, label_percent_digit = 1) +
253
336
  scale_fill_distiller(palette = "Reds", direction = 1) +
@@ -261,8 +344,30 @@ do_overlap <- function(section) {
261
344
  png(upset_plot, res = 100, width = 800, height = 600)
262
345
  print(upset_p)
263
346
  dev.off()
347
+
348
+ add_report(
349
+ list(
350
+ name = "Venn Plot",
351
+ contents = list(list(
352
+ kind = "image",
353
+ src = venn_plot
354
+ ))
355
+ ),
356
+ list(
357
+ name = "UpSet Plot",
358
+ contents = list(list(
359
+ kind = "image",
360
+ src = upset_plot
361
+ ))
362
+ ),
363
+ h1 = "Overlapping Groups",
364
+ h2 = section,
365
+ ui = "tabs"
366
+ )
264
367
  }
265
368
 
266
369
  cases <- expand_cases()
267
370
  sapply(sort(names(cases)), function(name) do_case(name, cases[[name]]))
268
371
  sapply(sort(names(overlaps)), do_overlap)
372
+
373
+ save_report(joboutdir)