biopipen 0.28.1__py3-none-any.whl → 0.29.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (85) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +8 -0
  3. biopipen/ns/bam.py +0 -2
  4. biopipen/ns/bed.py +35 -0
  5. biopipen/ns/cellranger_pipeline.py +5 -5
  6. biopipen/ns/cnv.py +18 -2
  7. biopipen/ns/cnvkit_pipeline.py +16 -11
  8. biopipen/ns/gene.py +68 -23
  9. biopipen/ns/misc.py +2 -15
  10. biopipen/ns/plot.py +204 -0
  11. biopipen/ns/regulatory.py +214 -0
  12. biopipen/ns/scrna.py +31 -5
  13. biopipen/ns/snp.py +516 -8
  14. biopipen/ns/stats.py +167 -3
  15. biopipen/ns/vcf.py +196 -0
  16. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  17. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  18. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  19. biopipen/reports/snp/PlinkHet.svelte +18 -0
  20. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  21. biopipen/scripts/bam/CNVpytor.py +144 -46
  22. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  23. biopipen/scripts/bed/BedtoolsMerge.py +1 -1
  24. biopipen/scripts/cnv/AneuploidyScore.R +30 -7
  25. biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
  26. biopipen/scripts/cnv/TMADScore.R +21 -5
  27. biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
  28. biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
  29. biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
  30. biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
  31. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
  32. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
  33. biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
  34. biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
  35. biopipen/scripts/delim/SampleInfo.R +10 -5
  36. biopipen/scripts/gene/GeneNameConversion.R +65 -0
  37. biopipen/scripts/gene/GenePromoters.R +61 -0
  38. biopipen/scripts/misc/Shell.sh +15 -0
  39. biopipen/scripts/plot/Manhattan.R +146 -0
  40. biopipen/scripts/plot/QQPlot.R +146 -0
  41. biopipen/scripts/regulatory/MotifAffinityTest.R +226 -0
  42. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +126 -0
  43. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +96 -0
  44. biopipen/scripts/regulatory/MotifScan.py +159 -0
  45. biopipen/scripts/regulatory/atSNP.R +33 -0
  46. biopipen/scripts/regulatory/motifBreakR.R +1594 -0
  47. biopipen/scripts/scrna/MarkersFinder.R +69 -67
  48. biopipen/scripts/scrna/SeuratClustering.R +71 -29
  49. biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
  50. biopipen/scripts/scrna/SeuratPreparing.R +252 -122
  51. biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
  52. biopipen/scripts/snp/MatrixEQTL.R +85 -44
  53. biopipen/scripts/snp/Plink2GTMat.py +133 -0
  54. biopipen/scripts/snp/PlinkCallRate.R +190 -0
  55. biopipen/scripts/snp/PlinkFilter.py +100 -0
  56. biopipen/scripts/snp/PlinkFreq.R +298 -0
  57. biopipen/scripts/snp/PlinkFromVcf.py +78 -0
  58. biopipen/scripts/snp/PlinkHWE.R +80 -0
  59. biopipen/scripts/snp/PlinkHet.R +92 -0
  60. biopipen/scripts/snp/PlinkIBD.R +200 -0
  61. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  62. biopipen/scripts/stats/Mediation.R +94 -0
  63. biopipen/scripts/stats/MetaPvalue.R +2 -1
  64. biopipen/scripts/stats/MetaPvalue1.R +70 -0
  65. biopipen/scripts/tcr/TCRClusterStats.R +12 -7
  66. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  67. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  68. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  69. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  70. biopipen/scripts/vcf/VcfFix_utils.py +1 -1
  71. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  72. biopipen/utils/gene.R +83 -37
  73. biopipen/utils/gene.py +108 -60
  74. biopipen/utils/misc.R +56 -0
  75. biopipen/utils/misc.py +5 -2
  76. biopipen/utils/reference.py +54 -10
  77. {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/METADATA +2 -2
  78. {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/RECORD +80 -51
  79. {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
  80. biopipen/ns/bcftools.py +0 -111
  81. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  82. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  83. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  84. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  85. {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,214 @@
1
+ """Provides processes for the regulatory related"""
2
+
3
+ from ..core.proc import Proc
4
+ from ..core.config import config
5
+
6
+
7
+ class MotifScan(Proc):
8
+ """Scan the input sequences for binding sites using motifs.
9
+
10
+ Currently only [fimo](https://meme-suite.org/meme/tools/fimo) from MEME suite
11
+ is supported, based on the research/comparisons done by the following reference.
12
+
13
+ Reference:
14
+ - [Evaluating tools for transcription factor binding site prediction](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6889335/)
15
+
16
+ Input:
17
+ motiffile: File containing motif names.
18
+ The file contains the motif and regulator names.
19
+ The motif names should match the names in the motif database.
20
+ This file must have a header.
21
+ If multiple columns are present, it should be delimited by tab.
22
+ seqfile: File containing sequences in FASTA format.
23
+
24
+ Output:
25
+ outdir: Directory containing the results.
26
+ Especially `fimo_output.txt` extending from `fimo.tsv`, which contains:
27
+ 1. the results with the regulator information if `envs.regulator_col`
28
+ is provided, otherwise, the `regulator` columns will be filled with
29
+ the motif names.
30
+ 2. the original sequence from the fasta file (in.seqfile)
31
+ 3. corrected genomic coordinates if the genomic coordinates are included
32
+ in the sequence names.
33
+
34
+ See also the `Output` section of
35
+ <https://meme-suite.org/meme/doc/fimo.html>.
36
+ Note that `--no-pgc` is passed to fimo to not parse the genomic coordinates
37
+ from the sequence names by fimo. When fimo parses the genomic coordinates,
38
+ `DDX11L1` in `>DDX11L1::chr1:11869-14412` will be lost.
39
+ The purpose of this is to keep the sequence names as they are in the output.
40
+ If the sequence names are in the format of `>NAME::chr1:START-END`, we will
41
+ correct the coordinates in the output.
42
+ Also note that it requires meme/fimo v5.5.5+ to do this
43
+ (where the --no-pgc option is available).
44
+
45
+ Envs:
46
+ tool (choice): The tool to use for scanning.
47
+ Currently only fimo is supported.
48
+ - fimo: Use fimo from MEME suite.
49
+ fimo: The path to fimo binary.
50
+ motif_col: The column name in the motif file containing the motif names.
51
+ regulator_col: The column name in the motif file containing the regulator names.
52
+ Both `motif_col` and `regulator_col` should be the direct column names or
53
+ the index (1-based) of the columns.
54
+ If no `regulator_col` is provided, no regulator information is written in
55
+ the output.
56
+ notfound (choice): What to do if a motif is not found in the database.
57
+ - error: Report error and stop the process.
58
+ - ignore: Ignore the motif and continue.
59
+ motifdb: The path to the motif database. This is required.
60
+ It should be in the format of MEME motif database.
61
+ Databases can be downloaded here: <https://meme-suite.org/meme/doc/download.html>.
62
+ See also introduction to the databases: <https://meme-suite.org/meme/db/motifs>.
63
+ cutoff (type=float): The cutoff for p-value to write the results.
64
+ When `envs.q_cutoff` is set, this is applied to the q-value.
65
+ This is passed to `--thresh` in fimo.
66
+ q (flag): Calculate q-value.
67
+ When `False`, `--no-qvalue` is passed to fimo.
68
+ The q-value calculation is that of Benjamini and Hochberg (BH) (1995).
69
+ q_cutoff (flag): Apply `envs.cutoff` to q-value.
70
+ args (ns): Additional arguments to pass to the tool.
71
+ - <more>: Additional arguments for fimo.
72
+ See: <https://meme-suite.org/meme/doc/fimo.html>
73
+ """ # noqa: E501
74
+ input = "motiffile:file, seqfile:file"
75
+ output = "outdir:dir:{{in.motiffile | stem}}.fimo"
76
+ lang = config.lang.python
77
+ envs = {
78
+ "tool": "fimo",
79
+ "fimo": config.exe.fimo,
80
+ "motif_col": 1,
81
+ "regulator_col": None,
82
+ "notfound": "error",
83
+ "motifdb": config.tf_motifdb,
84
+ "cutoff": 1e-4,
85
+ "q": False,
86
+ "q_cutoff": False,
87
+ "args": {},
88
+ }
89
+ script = "file://../scripts/regulatory/MotifScan.py"
90
+
91
+
92
+ class MotifAffinityTest(Proc):
93
+ """Test the affinity of motifs to the sequences and the affinity change
94
+ due the mutations.
95
+
96
+ See also <https://simon-coetzee.github.io/motifBreakR> and
97
+ <https://www.bioconductor.org/packages/release/bioc/vignettes/atSNP/inst/doc/atsnp-vignette.html>
98
+
99
+ When using atSNP, motifBreakR is also required to plot the variants and motifs.
100
+
101
+ Input:
102
+ motiffile: File containing motif names.
103
+ The file contains the motif and regulator names.
104
+ The motif names should match the names in the motif database.
105
+ This file must have a header.
106
+ If multiple columns are present, it should be delimited by tab.
107
+ varfile: File containing the variants.
108
+ It could be a VCF file or a BED-like file.
109
+ If it is a VCF file, it does not need to be indexed. Only records with `PASS` in the `FILTER` column are used.
110
+ If it is a BED-like file, it should contain the following columns, `chrom`, `start`, `end`, `name`, `score`, `strand`, `ref`, `alt`.
111
+
112
+ Output:
113
+ outdir: Directory containing the results.
114
+ For motifBreakR, `motifbreakr.txt` will be created. Records with effect `strong`/`weak` are written (`neutral` is not).
115
+ For atSNP, `atsnp.txt` will be created. Records with p-value (`envs.atsnp_args.p`) < `envs.cutoff` are written.
116
+
117
+ Envs:
118
+ ncores (type=int): The number of cores to use.
119
+ tool (choice): The tool to use for the test.
120
+ - motifbreakr: Use motifBreakR.
121
+ - motifBreakR: Use motifBreakR.
122
+ - atsnp: Use atSNP.
123
+ - atSNP: Use atSNP.
124
+ bcftools: The path to bcftools binary.
125
+ Used to convert the VCF file to the BED file when the input is a VCF file.
126
+ motif_col: The column name in the motif file containing the motif names.
127
+ If this is not provided, `envs.regulator_col` and `envs.regmotifs` are required,
128
+ which are used to infer the motif names from the regulator names.
129
+ regulator_col: The column name in the motif file containing the regulator names.
130
+ Both `motif_col` and `regulator_col` should be the direct column names or
131
+ the index (1-based) of the columns.
132
+ If no `regulator_col` is provided, no regulator information is written in
133
+ the output. Otherwise, the regulator information is written in the output in
134
+ the `Regulator` column.
135
+ notfound (choice): What to do if a motif is not found in the database,
136
+ or a regulator is not found in the regulator-motif mapping (envs.regmotifs)
137
+ file.
138
+ - error: Report error and stop the process.
139
+ - ignore: Ignore the motif and continue.
140
+ motifdb: The path to the motif database. This is required.
141
+ It should be in the format of MEME motif database.
142
+ Databases can be downloaded here: <https://meme-suite.org/meme/doc/download.html>.
143
+ See also introduction to the databases: <https://meme-suite.org/meme/db/motifs>.
144
+ [universalmotif](https://github.com/bjmt/universalmotif) is required to read the motif database.
145
+ genome: The genome assembly.
146
+ Used to fetch the sequences around the variants by package, for example, `BSgenome.Hsapiens.UCSC.hg19` is required if
147
+ `hg19`. If it is an organism other than human, please specify the full name of the package, for example, `BSgenome.Mmusculus.UCSC.mm10`.
148
+ cutoff (type=float): The cutoff for p-value to write the results.
149
+ devpars (ns): The default device parameters for the plot.
150
+ - width (type=int): The width of the plot.
151
+ - height (type=int): The height of the plot.
152
+ - res (type=int): The resolution of the plot.
153
+ plot_nvars (type=int): Number of variants to plot.
154
+ Plot top `<plot_nvars>` variants with the largest `abs(alleleDiff)` (motifBreakR) or smallest p-values (atSNP).
155
+ plots (type=json): Specify the details for the plots.
156
+ When specified, `plot_nvars` is ignored.
157
+ The keys are the variant names and the values are the details for the plots, including:
158
+ devpars: The device parameters for the plot to override the default (envs.devpars).
159
+ which: An expression passed to `subset(results, subset = ...)` to get the motifs for the variant to plot.
160
+ Or an integer to get the top `which` motifs.
161
+ For example, `effect == "strong"` to get the motifs with strong effect in motifBreakR result.
162
+ regmotifs: The path to the regulator-motif mapping file.
163
+ It must have header and the columns `Motif` or `Model` for motif names and
164
+ `TF`, `Regulator` or `Transcription factor` for regulator names.
165
+ motifbreakr_args (ns): Additional arguments to pass to motifBreakR.
166
+ - method (choice): The method to use.
167
+ See details of <https://rdrr.io/bioc/motifbreakR/man/motifbreakR.html>
168
+ and <https://simon-coetzee.github.io/motifBreakR/#methods>.
169
+ - default: Use the default method.
170
+ - log: Use the standard summation of log probabilities
171
+ - ic: Use information content
172
+ - notrans: Use the default method without transformation
173
+ atsnp_args (ns): Additional arguments to pass to atSNP.
174
+ - padj_cutoff (flag): The `envs.cutoff` will be applied to the adjusted p-value.
175
+ Only works for `atSNP`.
176
+ - padj (choice): The method to adjust the p-values.
177
+ Only works for `atSNP`
178
+ - holm: Holm's method
179
+ - hochberg: Hochberg's method
180
+ - hommel: Hommel's method
181
+ - bonferroni: Bonferroni method
182
+ - BH: Benjamini & Hochberg's method
183
+ - BY: Benjamini & Yekutieli's method
184
+ - fdr: False discovery rate
185
+ - none: No adjustment
186
+ - p (choice): Which p-value to use for adjustment and cutoff.
187
+ - pval_ref: p-value for the reference allele affinity score.
188
+ - pval_snp: p-value for the SNP allele affinity score.
189
+ - pval_cond_ref: and
190
+ - pval_cond_snp: conditional p-values for the affinity scores of the reference and SNP alleles.
191
+ - pval_diff: p-value for the affinity score change between the two alleles.
192
+ - pval_rank: p-value for the rank test between the two alleles.
193
+ """ # noqa: E501
194
+ input = "motiffile:file, varfile:file"
195
+ output = "outdir:dir:{{in.motiffile | stem}}.{{envs.tool | lower}}"
196
+ lang = config.lang.rscript
197
+ envs = {
198
+ "ncores": config.misc.ncores,
199
+ "tool": "atsnp",
200
+ "bcftools": config.exe.bcftools,
201
+ "motif_col": None,
202
+ "regulator_col": None,
203
+ "notfound": "error",
204
+ "motifdb": config.ref.tf_motifdb,
205
+ "regmotifs": config.ref.tf_motifs,
206
+ "genome": config.ref.genome,
207
+ "cutoff": 0.05,
208
+ "devpars": {"width": None, "height": None, "res": 100},
209
+ "plot_nvars": 10,
210
+ "plots": {},
211
+ "motifbreakr_args": {"method": "default"},
212
+ "atsnp_args": {"padj_cutoff": True, "padj": "BH", "p": "pval_diff"},
213
+ }
214
+ script = "file://../scripts/regulatory/MotifAffinityTest.R"
biopipen/ns/scrna.py CHANGED
@@ -53,7 +53,7 @@ class SeuratPreparing(Proc):
53
53
 
54
54
  See also
55
55
  - <https://satijalab.org/seurat/articles/pbmc3k_tutorial.html#standard-pre-processing-workflow-1)>
56
- - <https://nbisweden.github.io/workshop-scRNAseq/labs/compiled/seurat/seurat_01_qc.html#Create_one_merged_object>
56
+ - <https://satijalab.org/seurat/articles/integration_introduction>
57
57
 
58
58
  This process will read the scRNA-seq data, based on the information provided by
59
59
  `SampleInfo`, specifically, the paths specified by the `RNAData` column.
@@ -210,6 +210,19 @@ class SeuratPreparing(Proc):
210
210
  - PCs (type=int): Number of PCs to use for 'doubletFinder' function.
211
211
  - doublets (type=float): Number of expected doublets as a proportion of the pool size.
212
212
  - pN (type=float): Number of doublets to simulate as a proportion of the pool size.
213
+ - ncores (type=int): Number of cores to use for `DoubletFinder::paramSweep`.
214
+ Set to `None` to use `envs.ncores`.
215
+ Since parallelization of the function usually exhausts memory, if big `envs.ncores` does not work
216
+ for `DoubletFinder`, set this to a smaller number.
217
+
218
+ cache (type=auto): Whether to cache the information at different steps.
219
+ If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
220
+ The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
221
+ the input and envs of the process.
222
+ See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
223
+ <https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
224
+ To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
225
+ `<signature>.RDS` in the cache directory.
213
226
 
214
227
  Requires:
215
228
  r-seurat:
@@ -238,7 +251,8 @@ class SeuratPreparing(Proc):
238
251
  "min_cells": 5,
239
252
  },
240
253
  "IntegrateLayers": {"method": "harmony"},
241
- "DoubletFinder": {"PCs": 0, "pN": 0.25, "doublets": 0.075},
254
+ "DoubletFinder": {"PCs": 0, "pN": 0.25, "doublets": 0.075, "ncores": 1},
255
+ "cache": config.path.tmpdir,
242
256
  }
243
257
  script = "file://../scripts/scrna/SeuratPreparing.R"
244
258
  plugin_opts = {
@@ -288,10 +302,15 @@ class SeuratClustering(Proc):
288
302
  `object` is specified internally, and `-` in the key will be replaced with `.`.
289
303
  The cluster labels will be saved in `seurat_clusters` and prefixed with "c".
290
304
  The first cluster will be "c1", instead of "c0".
291
- - resolution: The resolution of the clustering. You can have multiple resolutions separated by comma.
305
+ - resolution (type=auto): The resolution of the clustering. You can have multiple resolutions as a list or as a string separated by comma.
306
+ Ranges are also supported, for example: `0.1:0.5:0.1` will generate `0.1, 0.2, 0.3, 0.4, 0.5`. The step can be omitted, defaulting to 0.1.
292
307
  The results will be saved in `seurat_clusters_<resolution>`.
293
308
  The final resolution will be used to define the clusters at `seurat_clusters`.
294
309
  - <more>: See <https://satijalab.org/seurat/reference/findclusters>
310
+ clustree_devpars (ns): The device parameters for the clustree plots.
311
+ - res (type=int): The resolution of the plots.
312
+ - height (type=int): The height of the plots.
313
+ - width (type=int): The width of the plots.
295
314
  cache (type=auto): Whether to cache the information at different steps.
296
315
  If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
297
316
  The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
@@ -319,6 +338,7 @@ class SeuratClustering(Proc):
319
338
  "RunUMAP": {"dims": 30},
320
339
  "FindNeighbors": {},
321
340
  "FindClusters": {"resolution": 0.8},
341
+ "clustree_devpars": {"res": 100, "height": 1000, "width": 800},
322
342
  "cache": config.path.tmpdir,
323
343
  }
324
344
  script = "file://../scripts/scrna/SeuratClustering.R"
@@ -367,10 +387,15 @@ class SeuratSubClustering(Proc):
367
387
  FindClusters (ns): Arguments for [`FindClusters()`](https://satijalab.org/seurat/reference/findclusters).
368
388
  `object` is specified internally, and `-` in the key will be replaced with `.`.
369
389
  The cluster labels will be prefixed with "s". The first cluster will be "s1", instead of "s0".
370
- - resolution: The resolution of the clustering. You can have multiple resolutions separated by comma.
390
+ - resolution (type=auto): The resolution of the clustering. You can have multiple resolutions as a list or as a string separated by comma.
391
+ Ranges are also supported, for example: `0.1:0.5:0.1` will generate `0.1, 0.2, 0.3, 0.4, 0.5`. The step can be omitted, defaulting to 0.1.
371
392
  The results will be saved in `<casename>_<resolution>`.
372
393
  The final resolution will be used to define the clusters at `<casename>`.
373
394
  - <more>: See <https://satijalab.org/seurat/reference/findclusters>
395
+ clustree_devpars (ns): The device parameters for the clustree plots.
396
+ - res (type=int): The resolution of the plots.
397
+ - height (type=int): The height of the plots.
398
+ - width (type=int): The width of the plots.
374
399
  cache (type=auto): Whether to cache the information at different steps.
375
400
  If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
376
401
  The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
@@ -394,6 +419,7 @@ class SeuratSubClustering(Proc):
394
419
  "RunUMAP": {"dims": 30},
395
420
  "FindNeighbors": {},
396
421
  "FindClusters": {"resolution": 0.8},
422
+ "clustree_devpars": {"res": 100, "height": 1000, "width": 800},
397
423
  "cache": config.path.tmpdir,
398
424
  "cases": {"subcluster": {}},
399
425
  }
@@ -499,7 +525,7 @@ class SeuratClusterStats(Proc):
499
525
  - pie (flag): Also output a pie chart?
500
526
  - circos (flag): Also output a circos plot?
501
527
  - table (flag): Whether to output a table (in tab-delimited format) and in the report.
502
- - frac_ofall(flag): Whether to output the fraction against all cells,
528
+ - frac_ofall (flag): Whether to output the fraction against all cells,
503
529
  instead of the fraction in each group.
504
530
  Does not work for circos plot.
505
531
  Only works when `frac` is `True` and `group-by` is specified.