biopipen 0.31.6__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/bam.py +28 -0
- biopipen/ns/bed.py +40 -0
- biopipen/ns/scrna.py +153 -0
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +14 -0
- biopipen/reports/scrna/SeuratMap2Ref.svelte +10 -6
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/scrna/AnnData2Seurat.R +22 -14
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +101 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +191 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -42
- biopipen/scripts/scrna/SeuratMap2Ref.R +20 -1
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1364 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/TCRClustering.R +25 -4
- biopipen/utils/single_cell.R +92 -1
- {biopipen-0.31.6.dist-info → biopipen-0.32.0.dist-info}/METADATA +1 -1
- {biopipen-0.31.6.dist-info → biopipen-0.32.0.dist-info}/RECORD +23 -17
- {biopipen-0.31.6.dist-info → biopipen-0.32.0.dist-info}/WHEEL +0 -0
- {biopipen-0.31.6.dist-info → biopipen-0.32.0.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.32.0"
|
biopipen/ns/bam.py
CHANGED
|
@@ -301,3 +301,31 @@ class BamSampling(Proc):
|
|
|
301
301
|
"sort_args": [],
|
|
302
302
|
}
|
|
303
303
|
script = "file://../scripts/bam/BamSampling.py"
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class BamSubsetByBed(Proc):
|
|
307
|
+
"""Subset bam file by the regions in a bed file
|
|
308
|
+
|
|
309
|
+
Input:
|
|
310
|
+
bamfile: The bam file
|
|
311
|
+
bedfile: The bed file
|
|
312
|
+
|
|
313
|
+
Output:
|
|
314
|
+
outfile: The output bam file
|
|
315
|
+
|
|
316
|
+
Envs:
|
|
317
|
+
ncores: Number of cores to use
|
|
318
|
+
samtools: Path to samtools executable
|
|
319
|
+
tool: The tool to use, currently only "samtools" is supported
|
|
320
|
+
index: Whether to index the output bam file
|
|
321
|
+
"""
|
|
322
|
+
input = "bamfile:file, bedfile:file"
|
|
323
|
+
output = "outfile:file:{{in.bamfile | stem}}-subset.bam"
|
|
324
|
+
lang = config.lang.python
|
|
325
|
+
envs = {
|
|
326
|
+
"ncores": config.misc.ncores,
|
|
327
|
+
"samtools": config.exe.samtools,
|
|
328
|
+
"tool": "samtools",
|
|
329
|
+
"index": True,
|
|
330
|
+
}
|
|
331
|
+
script = "file://../scripts/bam/BamSubsetByBed.py"
|
biopipen/ns/bed.py
CHANGED
|
@@ -198,3 +198,43 @@ class BedtoolsIntersect(Proc):
|
|
|
198
198
|
"postcmd": None,
|
|
199
199
|
}
|
|
200
200
|
script = "file://../scripts/bed/BedtoolsIntersect.py"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class BedtoolsMakeWindows(Proc):
|
|
204
|
+
"""Make windows from a BED file or genome size file, using `bedtools makewindows`.
|
|
205
|
+
|
|
206
|
+
Input:
|
|
207
|
+
infile: The input BED file or a genome size file
|
|
208
|
+
Type will be detected by the number of columns in the file.
|
|
209
|
+
If it has 3+ columns, it is treated as a BED file, otherwise
|
|
210
|
+
a genome size file.
|
|
211
|
+
|
|
212
|
+
Output:
|
|
213
|
+
outfile: The output BED file
|
|
214
|
+
|
|
215
|
+
Envs:
|
|
216
|
+
bedtools: The path to bedtools
|
|
217
|
+
window (type=int): The size of the windows
|
|
218
|
+
step (type=int): The step size of the windows
|
|
219
|
+
nwin (type=int): The number of windows to be generated
|
|
220
|
+
Exclusive with `window` and `step`.
|
|
221
|
+
Either `nwin` or `window` and `step` should be provided.
|
|
222
|
+
reverse (flag): Reverse numbering of windows in the output
|
|
223
|
+
name (choice): How to name the generated windows/regions
|
|
224
|
+
- none: Do not add any name
|
|
225
|
+
- src: Use the source interval's name
|
|
226
|
+
- winnum: Use the window number
|
|
227
|
+
- srcwinnum: Use the source interval's name and window number
|
|
228
|
+
""" # noqa: E501
|
|
229
|
+
input = "infile:file"
|
|
230
|
+
output = "outfile:file:{{in.infile | stem}}_windows.bed"
|
|
231
|
+
lang = config.lang.python
|
|
232
|
+
envs = {
|
|
233
|
+
"bedtools": config.exe.bedtools,
|
|
234
|
+
"window": None,
|
|
235
|
+
"step": None,
|
|
236
|
+
"nwin": None,
|
|
237
|
+
"reverse": False,
|
|
238
|
+
"name": "none",
|
|
239
|
+
}
|
|
240
|
+
script = "file://../scripts/bed/BedtoolsMakeWindows.py"
|
biopipen/ns/scrna.py
CHANGED
|
@@ -2314,3 +2314,156 @@ class ScSimulation(Proc):
|
|
|
2314
2314
|
"params": {},
|
|
2315
2315
|
}
|
|
2316
2316
|
script = "file://../scripts/scrna/ScSimulation.R"
|
|
2317
|
+
|
|
2318
|
+
|
|
2319
|
+
class CellCellCommunication(Proc):
|
|
2320
|
+
"""Cell-cell communication inference
|
|
2321
|
+
|
|
2322
|
+
This is implemented based on [LIANA](https://liana-py.readthedocs.io/en/latest/index.html),
|
|
2323
|
+
which is a Python package for cell-cell communication inference and provides a list of existing
|
|
2324
|
+
methods including [CellPhoneDB](https://github.com/ventolab/CellphoneDB),
|
|
2325
|
+
[Connectome](https://github.com/msraredon/Connectome/), log2FC,
|
|
2326
|
+
[NATMI](https://github.com/forrest-lab/NATMI),
|
|
2327
|
+
[SingleCellSignalR](https://github.com/SCA-IRCM/SingleCellSignalR), Rank_Aggregate, Geometric Mean,
|
|
2328
|
+
[scSeqComm](https://gitlab.com/sysbiobig/scseqcomm), and [CellChat](https://github.com/jinworks/CellChat).
|
|
2329
|
+
|
|
2330
|
+
You can also try `python -c 'import liana; liana.mt.show_methods()'` to see the methods available.
|
|
2331
|
+
|
|
2332
|
+
Note that this process does not do any visualization. You can use `CellCellCommunicationPlots`
|
|
2333
|
+
to visualize the results.
|
|
2334
|
+
|
|
2335
|
+
Reference:
|
|
2336
|
+
- [Review](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9184522/).
|
|
2337
|
+
- [LIANA](https://www.biorxiv.org/content/10.1101/2023.08.19.553863v1).
|
|
2338
|
+
|
|
2339
|
+
Input:
|
|
2340
|
+
sobjfile: The seurat object file in RDS or h5seurat format or AnnData file.
|
|
2341
|
+
|
|
2342
|
+
Output:
|
|
2343
|
+
outfile: The output file with the 'liana_res' data frame.
|
|
2344
|
+
Stats are provided for both ligand and receptor entities, more specifically: ligand and receptor are
|
|
2345
|
+
the two entities that potentially interact. As a reminder, CCC events are not limited to secreted signalling,
|
|
2346
|
+
but we refer to them as ligand and receptor for simplicity.
|
|
2347
|
+
Also, in the case of heteromeric complexes, the ligand and receptor columns represent the subunit with minimum
|
|
2348
|
+
expression, while *_complex corresponds to the actual complex, with subunits being separated by _.
|
|
2349
|
+
source and target columns represent the source/sender and target/receiver cell identity for each interaction, respectively
|
|
2350
|
+
* `*_props`: represents the proportion of cells that express the entity.
|
|
2351
|
+
By default, any interactions in which either entity is not expressed in above 10% of cells per cell type
|
|
2352
|
+
is considered as a false positive, under the assumption that since CCC occurs between cell types, a sufficient
|
|
2353
|
+
proportion of cells within should express the genes.
|
|
2354
|
+
* `*_means`: entity expression mean per cell type.
|
|
2355
|
+
* `lr_means`: mean ligand-receptor expression, as a measure of ligand-receptor interaction magnitude.
|
|
2356
|
+
* `cellphone_pvals`: permutation-based p-values, as a measure of interaction specificity.
|
|
2357
|
+
|
|
2358
|
+
Envs:
|
|
2359
|
+
method (choice): The method to use for cell-cell communication inference.
|
|
2360
|
+
- CellPhoneDB: Use CellPhoneDB method.
|
|
2361
|
+
Magnitude Score: lr_means; Specificity Score: cellphone_pvals.
|
|
2362
|
+
- Connectome: Use Connectome method.
|
|
2363
|
+
- log2FC: Use log2FC method.
|
|
2364
|
+
- NATMI: Use NATMI method.
|
|
2365
|
+
- SingleCellSignalR: Use SingleCellSignalR method.
|
|
2366
|
+
- Rank_Aggregate: Use Rank_Aggregate method.
|
|
2367
|
+
- Geometric_Mean: Use Geometric Mean method.
|
|
2368
|
+
- scSeqComm: Use scSeqComm method.
|
|
2369
|
+
- CellChat: Use CellChat method.
|
|
2370
|
+
- cellphonedb: alias for `CellPhoneDB`
|
|
2371
|
+
- connectome: alias for `Connectome`
|
|
2372
|
+
- log2fc: alias for `log2FC`
|
|
2373
|
+
- natmi: alias for `NATMI`
|
|
2374
|
+
- singlesignaler: alias for `SingleCellSignalR`
|
|
2375
|
+
- rank_aggregate: alias for `Rank_Aggregate`
|
|
2376
|
+
- geometric_mean: alias for `Geometric_Mean`
|
|
2377
|
+
- scseqcomm: alias for `scSeqComm`
|
|
2378
|
+
- cellchat: alias for `CellChat`
|
|
2379
|
+
assay: The assay to use for the analysis.
|
|
2380
|
+
Only works for Seurat object.
|
|
2381
|
+
seed (type=int): The seed for the random number generator.
|
|
2382
|
+
ncores (type=int): The number of cores to use.
|
|
2383
|
+
groupby: The column name in metadata to group the cells.
|
|
2384
|
+
Typically, this column should be the cluster id.
|
|
2385
|
+
species (choice): The species of the cells.
|
|
2386
|
+
- human: Human cells, the 'consensus' resource will be used.
|
|
2387
|
+
- mouse: Mouse cells, the 'mouseconsensus' resource will be used.
|
|
2388
|
+
expr_prop (type=float): Minimum expression proportion for the ligands and
|
|
2389
|
+
receptors (+ their subunits) in the corresponding cell identities. Set to 0
|
|
2390
|
+
to return unfiltered results.
|
|
2391
|
+
min_cells (type=int): Minimum cells (per cell identity if grouped by `groupby`)
|
|
2392
|
+
to be considered for downstream analysis.
|
|
2393
|
+
n_perms (type=int): Number of permutations for the permutation test.
|
|
2394
|
+
Relevant only for permutation-based methods (e.g., `CellPhoneDB`).
|
|
2395
|
+
If `0` is passed, no permutation testing is performed.
|
|
2396
|
+
rscript: The path to the Rscript executable used to convert RDS file to AnnData.
|
|
2397
|
+
if `in.sobjfile` is an RDS file, it will be converted to AnnData file (h5ad).
|
|
2398
|
+
You need `Seurat`, `SeuratDisk` and `digest` installed.
|
|
2399
|
+
<more>: Other arguments for the method.
|
|
2400
|
+
The arguments are passed to the method directly.
|
|
2401
|
+
See the method documentation for more details and also
|
|
2402
|
+
`help(liana.mt.<method>.__call__)` in Python.
|
|
2403
|
+
""" # noqa: E501
|
|
2404
|
+
input = "sobjfile:file"
|
|
2405
|
+
output = "outfile:file:{{in.sobjfile | stem}}-ccc.txt"
|
|
2406
|
+
lang = config.lang.python
|
|
2407
|
+
envs = {
|
|
2408
|
+
"method": "cellchat",
|
|
2409
|
+
"assay": None,
|
|
2410
|
+
"seed": 1337,
|
|
2411
|
+
"ncores": config.misc.ncores,
|
|
2412
|
+
"groupby": "seurat_clusters",
|
|
2413
|
+
"species": "human",
|
|
2414
|
+
"expr_prop": 0.1,
|
|
2415
|
+
"min_cells": 5,
|
|
2416
|
+
"n_perms": 1000,
|
|
2417
|
+
"rscript": config.lang.rscript,
|
|
2418
|
+
}
|
|
2419
|
+
script = "file://../scripts/scrna/CellCellCommunication.py"
|
|
2420
|
+
|
|
2421
|
+
|
|
2422
|
+
class CellCellCommunicationPlots(Proc):
|
|
2423
|
+
"""Visualization for cell-cell communication inference.
|
|
2424
|
+
|
|
2425
|
+
R package [`CCPlotR`](https://github.com/Sarah145/CCPlotR) is used to visualize
|
|
2426
|
+
the results.
|
|
2427
|
+
|
|
2428
|
+
Input:
|
|
2429
|
+
cccfile: The output file from `CellCellCommunication`
|
|
2430
|
+
or a tab-separated file with the following columns: `source`, `target`,
|
|
2431
|
+
`ligand`, `receptor`, and `score`.
|
|
2432
|
+
If so, `in.expfile` can be provided where `exp_df` is needed.
|
|
2433
|
+
expfile: The expression file with the expression of ligands and receptors.
|
|
2434
|
+
Columns include: `cell_type`, `gene` and `mean_exp`.
|
|
2435
|
+
|
|
2436
|
+
Output:
|
|
2437
|
+
outdir: The output directory for the plots.
|
|
2438
|
+
|
|
2439
|
+
Envs:
|
|
2440
|
+
score_col: The column name in the input file that contains the score, if
|
|
2441
|
+
the input file is from `CellCellCommunication`.
|
|
2442
|
+
Two alias columns are added in the result file of `CellCellCommunication`,
|
|
2443
|
+
`mag_score` and `spec_score`, which are the magnitude and specificity
|
|
2444
|
+
scores.
|
|
2445
|
+
subset: An expression to pass to `dplyr::filter()` to subset the ccc data.
|
|
2446
|
+
cases (type=json): The cases for the plots.
|
|
2447
|
+
The keys are the names of the cases and the values are the arguments for
|
|
2448
|
+
the plots. The arguments include:
|
|
2449
|
+
* kind: one of `arrow`, `circos`, `dotplot`, `heatmap`, `network`,
|
|
2450
|
+
and `sigmoid`.
|
|
2451
|
+
* devpars: The parameters for `png()` for the plot, including `res`,
|
|
2452
|
+
`width`, and `height`.
|
|
2453
|
+
* section: The section name for the report to group the plots.
|
|
2454
|
+
* <other>: Other arguments for `cc_<kind>` function in `CCPlotR`.
|
|
2455
|
+
See the documentation for more details.
|
|
2456
|
+
Or you can use `?CCPlotR::cc_<kind>` in R.
|
|
2457
|
+
"""
|
|
2458
|
+
input = "cccfile:file, expfile:file"
|
|
2459
|
+
output = "outdir:dir:{{in.cccfile | stem}}-ccc_plots"
|
|
2460
|
+
lang = config.lang.rscript
|
|
2461
|
+
envs = {
|
|
2462
|
+
"score_col": "mag_score",
|
|
2463
|
+
"subset": None,
|
|
2464
|
+
"cases": {},
|
|
2465
|
+
}
|
|
2466
|
+
script = "file://../scripts/scrna/CellCellCommunicationPlots.R"
|
|
2467
|
+
plugin_opts = {
|
|
2468
|
+
"report": "file://../reports/scrna/CellCellCommunicationPlots.svelte",
|
|
2469
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
|
+
<script>
|
|
3
|
+
import { Image } from "$libs";
|
|
4
|
+
</script>
|
|
5
|
+
|
|
6
|
+
{%- macro report_job(job, h=1) -%}
|
|
7
|
+
{{ job | render_job: h=h }}
|
|
8
|
+
{%- endmacro -%}
|
|
9
|
+
|
|
10
|
+
{%- macro head_job(job) -%}
|
|
11
|
+
<h1>{{job.in.cccfile | stem0 | escape}}</h1>
|
|
12
|
+
{%- endmacro -%}
|
|
13
|
+
|
|
14
|
+
{{ report_jobs(jobs, head_job, report_job) }}
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
2
|
<script>
|
|
3
|
-
import { Image } from "$libs";
|
|
3
|
+
import { Image, DataTable } from "$libs";
|
|
4
4
|
</script>
|
|
5
5
|
|
|
6
6
|
{%- macro report_job(job, h=1) -%}
|
|
7
|
-
<h{{h}}>Reference UMAP</h{{h}}>
|
|
8
|
-
{% set imgs = job.outdir | glob: "Reference_UMAP_*.png" %}
|
|
9
|
-
{{ table_of_images(imgs) }}
|
|
10
7
|
|
|
11
|
-
<h{{h}}>
|
|
12
|
-
{% set imgs = job.outdir | glob: "
|
|
8
|
+
<h{{h}}>UMAPs</h{{h}}>
|
|
9
|
+
{% set imgs = job.outdir | glob: "UMAPs-*.png" %}
|
|
13
10
|
{{ table_of_images(imgs) }}
|
|
11
|
+
|
|
12
|
+
<h{{h}}>Stats</h{{h}}>
|
|
13
|
+
{% for stfile in job.outdir | glob: "stats-*.txt" %}
|
|
14
|
+
<h{{h+1}}>{{stfile | stem | replace: "stats-", ""}}</h{{h+1}}>
|
|
15
|
+
<DataTable src="{{stfile}}" data={ {{stfile | datatable: sep="\t"}} } />
|
|
16
|
+
{% endfor %}
|
|
17
|
+
|
|
14
18
|
{%- endmacro -%}
|
|
15
19
|
|
|
16
20
|
{%- macro head_job(job) -%}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from biopipen.utils.misc import run_command, logger
|
|
3
|
+
|
|
4
|
+
# using:
|
|
5
|
+
# samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
|
|
6
|
+
|
|
7
|
+
bamfile = {{ in.bamfile | repr }} # pyright: ignore # noqa
|
|
8
|
+
bedfile = {{ in.bedfile | repr }} # pyright: ignore # noqa
|
|
9
|
+
outfile = Path({{ out.outfile | repr }}) # pyright: ignore
|
|
10
|
+
ncores = {{ envs.ncores | int }} # pyright: ignore
|
|
11
|
+
samtools = {{ envs.samtools | repr }} # pyright: ignore
|
|
12
|
+
tool = {{ envs.tool | repr }} # pyright: ignore
|
|
13
|
+
should_index = {{ envs.index | repr }} # pyright: ignore
|
|
14
|
+
|
|
15
|
+
if tool != "samtools":
|
|
16
|
+
raise ValueError(
|
|
17
|
+
f"Tool {tool} is not supported. "
|
|
18
|
+
"Currently only samtools is supported."
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
cmd = [
|
|
22
|
+
samtools,
|
|
23
|
+
"view",
|
|
24
|
+
"--target-file",
|
|
25
|
+
bedfile,
|
|
26
|
+
"-b",
|
|
27
|
+
"--threads",
|
|
28
|
+
ncores,
|
|
29
|
+
"-o",
|
|
30
|
+
outfile,
|
|
31
|
+
bamfile
|
|
32
|
+
]
|
|
33
|
+
run_command(cmd, fg=True)
|
|
34
|
+
|
|
35
|
+
if should_index:
|
|
36
|
+
logger.info("Indexing the output bam file.")
|
|
37
|
+
cmd = [samtools, "index", "-@", ncores, outfile]
|
|
38
|
+
run_command(cmd, fg=True)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from biopipen.utils.misc import run_command, logger
|
|
3
|
+
|
|
4
|
+
infile = Path({{in.afile | repr}}) # pyright: ignore # noqa: #999
|
|
5
|
+
outfile = Path({{in.bfile | repr}}) # pyright: ignore
|
|
6
|
+
bedtools = {{envs.bedtools | repr}} # pyright: ignore
|
|
7
|
+
window = {{envs.window | repr}} # pyright: ignore
|
|
8
|
+
step = {{envs.step | repr}} # pyright: ignore
|
|
9
|
+
nwin = {{envs.nwin | repr}} # pyright: ignore
|
|
10
|
+
reverse = {{envs.reverse | repr}} # pyright: ignore
|
|
11
|
+
name = {{envs.name | repr}} # pyright: ignore
|
|
12
|
+
|
|
13
|
+
if nwin is None and window is None:
|
|
14
|
+
raise ValueError("Either `nwin` or `window` should be provided.")
|
|
15
|
+
|
|
16
|
+
if nwin is not None and window is not None:
|
|
17
|
+
raise ValueError("Either `nwin` or `window` should be provided, not both.")
|
|
18
|
+
|
|
19
|
+
# detect if infile is a genome size file or a bed file
|
|
20
|
+
with infile.open() as f:
|
|
21
|
+
line = f.readline().strip()
|
|
22
|
+
if len(line.split("\t")) > 2:
|
|
23
|
+
is_bed = True
|
|
24
|
+
else:
|
|
25
|
+
is_bed = False
|
|
26
|
+
|
|
27
|
+
if is_bed:
|
|
28
|
+
logger.info("BED file is detected as input.")
|
|
29
|
+
cmd = [bedtools, "makewindows", "-b", infile]
|
|
30
|
+
else:
|
|
31
|
+
logger.info("Genome size file is detected as input.")
|
|
32
|
+
cmd = [bedtools, "makewindows", "-g", infile]
|
|
33
|
+
|
|
34
|
+
if nwin:
|
|
35
|
+
cmd.extend(["-n", nwin])
|
|
36
|
+
elif step is not None:
|
|
37
|
+
cmd.extend(["-w", window, "-s", step])
|
|
38
|
+
else:
|
|
39
|
+
cmd.extend(["-w", window])
|
|
40
|
+
|
|
41
|
+
if reverse:
|
|
42
|
+
cmd.append("-reverse")
|
|
43
|
+
|
|
44
|
+
if name != "none":
|
|
45
|
+
cmd.extend(["-name", name])
|
|
46
|
+
|
|
47
|
+
run_command(cmd, stdout=outfile)
|
|
@@ -41,25 +41,33 @@ if (outtype == "rds") {
|
|
|
41
41
|
f <- H5File$new(h5seurat_file, "r+")
|
|
42
42
|
groups <- f$ls(recursive = TRUE)
|
|
43
43
|
|
|
44
|
-
for (name in groups$name[grepl("categories", groups$name)]) {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
44
|
+
for (name in groups$name[grepl("/categories$", groups$name)]) {
|
|
45
|
+
valuenames <- levelnames <- codenames <- strsplit(name, "/")[[1]]
|
|
46
|
+
valuenames[length(valuenames)] <- "values"
|
|
47
|
+
valuenames <- paste(valuenames, collapse = "/")
|
|
48
|
+
levelnames[length(levelnames)] <- "levels"
|
|
49
|
+
levelnames <- paste(levelnames, collapse = "/")
|
|
50
|
+
codenames[length(codenames)] <- "codes"
|
|
51
|
+
codenames <- paste(codenames, collapse = "/")
|
|
52
|
+
if (!f$exists(codenames)) {
|
|
53
|
+
# No codes, skip
|
|
54
|
+
next
|
|
55
|
+
}
|
|
50
56
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
f
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
if (!f$exists(levelnames)) {
|
|
58
|
+
f[[levelnames]] <- f[[name]]
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (!f$exists(valuenames)) {
|
|
62
|
+
f[[valuenames]] <- f[[codenames]]
|
|
63
|
+
grp <- f[[valuenames]]
|
|
64
|
+
grp$write(args = list(1:grp$dims), value = grp$read() + 1)
|
|
65
|
+
}
|
|
58
66
|
}
|
|
59
67
|
f$close_all()
|
|
60
68
|
# end
|
|
61
69
|
|
|
62
|
-
sobj <- LoadH5Seurat(h5seurat_file)
|
|
70
|
+
sobj <- LoadH5Seurat(h5seurat_file, assays = assay)
|
|
63
71
|
if (!isFALSE(dotplot_check)) {
|
|
64
72
|
log_info("Checking dotplot ...")
|
|
65
73
|
dotfig <- file.path(outdir, "dotplot.png")
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# patched version of cc_circos
|
|
2
|
+
# See https://github.com/Sarah145/CCPlotR/issues/4
|
|
3
|
+
|
|
4
|
+
cc_circos <- function(cc_df, option = "A", n_top_ints = 15, exp_df = NULL, cell_cols = NULL, palette = "BuPu", cex = 1, show_legend = TRUE, scale = FALSE, ...) {
|
|
5
|
+
stopifnot("'cc_df' must be a dataframe" = is(cc_df, "data.frame"))
|
|
6
|
+
stopifnot("cc_df should contain columns named source, target, ligand, receptor and score. See `toy_data` for an example." = all(c('source', 'target', 'ligand', 'receptor', 'score') %in% colnames(cc_df)))
|
|
7
|
+
stopifnot("option must be either 'A', 'B', 'C'" = option %in% c('A', 'B', 'C'))
|
|
8
|
+
library(stringr)
|
|
9
|
+
library(ComplexHeatmap)
|
|
10
|
+
library(circlize)
|
|
11
|
+
circos.clear()
|
|
12
|
+
|
|
13
|
+
target <- score <- ligand <- receptor <- source_lig <- target_rec <- cell_type <- gene <- cell_gene <- NULL
|
|
14
|
+
if (option == "A") {
|
|
15
|
+
input_df <- cc_df %>%
|
|
16
|
+
mutate(source = factor(source), target = factor(target)) %>%
|
|
17
|
+
group_by(source, target) %>%
|
|
18
|
+
tally()
|
|
19
|
+
if (is.null(cell_cols)) {
|
|
20
|
+
cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
|
|
21
|
+
}
|
|
22
|
+
circlize_plot <- function() {
|
|
23
|
+
par(cex = cex)
|
|
24
|
+
chordDiagram(input_df,
|
|
25
|
+
scale = FALSE, grid.col = cell_cols,
|
|
26
|
+
annotationTrack = c("grid", "name"), directional = 1, direction.type = c("arrows", "diffHeight"), link.arr.type = "big.arrow", link.arr.length = 0.1, diffHeight = -mm_h(0.5), preAllocateTracks = list(
|
|
27
|
+
track.height = mm_h(10),
|
|
28
|
+
track.margin = c(mm_h(2), -mm_h(4))
|
|
29
|
+
), ...
|
|
30
|
+
)
|
|
31
|
+
}
|
|
32
|
+
} else if (option == "B") {
|
|
33
|
+
input_df <- cc_df %>%
|
|
34
|
+
slice_max(order_by = score, n = n_top_ints) %>%
|
|
35
|
+
mutate(
|
|
36
|
+
source_lig = paste0(source, "|", ligand),
|
|
37
|
+
target_rec = paste0(target, "|", receptor)
|
|
38
|
+
)
|
|
39
|
+
arr_wd <- (((input_df$score - min(input_df$score)) / (max(input_df$score) - min(input_df$score))) * (4)) + 1
|
|
40
|
+
|
|
41
|
+
if (is.null(cell_cols)) {
|
|
42
|
+
cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
link_cols <- c()
|
|
46
|
+
for (i in input_df$source_lig) {
|
|
47
|
+
link_cols <- c(link_cols, cell_cols[str_extract(i, "[^|]+")])
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), paste0(input_df$target, "|", input_df$receptor)))
|
|
51
|
+
grp <- str_extract(segments, "[^|]+")
|
|
52
|
+
names(grp) <- segments
|
|
53
|
+
lgd <- Legend(
|
|
54
|
+
labels = unique(c(input_df$source, input_df$target)),
|
|
55
|
+
title = "Cell type",
|
|
56
|
+
type = "points",
|
|
57
|
+
title_gp = gpar(fontsize = 14 * cex),
|
|
58
|
+
labels_gp = gpar(fontsize = 12 * cex),
|
|
59
|
+
legend_gp = gpar(col = "transparent"),
|
|
60
|
+
background = cell_cols[unique(c(input_df$source, input_df$target))]
|
|
61
|
+
)
|
|
62
|
+
circlize_plot <- function() {
|
|
63
|
+
par(cex = cex)
|
|
64
|
+
chordDiagram(
|
|
65
|
+
input_df %>%
|
|
66
|
+
select(source_lig, target_rec, score),
|
|
67
|
+
directional = 1, group = grp, link.sort = FALSE, scale = scale, diffHeight = 0.005,
|
|
68
|
+
direction.type = c("arrows"), link.arr.type = "triangle", annotationTrack = c(),
|
|
69
|
+
preAllocateTracks = list(list(track.height = 0.175), list(track.height = 0.05)),
|
|
70
|
+
big.gap = 3, transparency = 1, link.arr.lwd = arr_wd, link.arr.col = link_cols,
|
|
71
|
+
link.arr.length = 0.4, link.arr.width = 0.35, ...
|
|
72
|
+
)
|
|
73
|
+
circos.track(track.index = 1, panel.fun = function(x, y) {
|
|
74
|
+
circos.text(CELL_META$xcenter, CELL_META$ylim[1], str_extract(CELL_META$sector.index, "[^|]+$"),
|
|
75
|
+
facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1.3
|
|
76
|
+
)
|
|
77
|
+
}, bg.border = NA)
|
|
78
|
+
for (l in unique(str_extract(segments, "[^|]+"))) {
|
|
79
|
+
highlight.sector(segments[str_detect(segments, paste0("^", str_escape(l)))], track.index = 2, col = cell_cols[l])
|
|
80
|
+
}
|
|
81
|
+
if (show_legend == TRUE) {
|
|
82
|
+
draw(lgd, just = c("left", "bottom"), x = unit(5, "mm"), y = unit(5, "mm"))
|
|
83
|
+
}
|
|
84
|
+
circos.clear()
|
|
85
|
+
}
|
|
86
|
+
} else if (option == "C") {
|
|
87
|
+
stopifnot("'exp_df' must be a dataframe" = is(exp_df, "data.frame"))
|
|
88
|
+
stopifnot("exp_df should contain columns named cell_type, gene and mean_exp. See `toy_exp` for an example." = all(c('cell_type', 'gene', 'mean_exp') %in% colnames(exp_df)))
|
|
89
|
+
|
|
90
|
+
input_df <- cc_df %>%
|
|
91
|
+
slice_max(order_by = score, n = n_top_ints) %>%
|
|
92
|
+
mutate(
|
|
93
|
+
source_lig = paste0(source, "|", ligand),
|
|
94
|
+
target_rec = paste0(target, "|", receptor)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
arr_wd <- (((input_df$score - min(input_df$score)) / (max(input_df$score) - min(input_df$score))) * (4)) + 1
|
|
98
|
+
|
|
99
|
+
if (is.null(cell_cols)) {
|
|
100
|
+
cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), paste0(input_df$target, "|", input_df$receptor)))
|
|
104
|
+
grp <- str_extract(segments, "[^|]+")
|
|
105
|
+
names(grp) <- segments
|
|
106
|
+
|
|
107
|
+
gene_df <- as.data.frame(exp_df %>% mutate(cell_gene = paste0(cell_type, "|", gene)) %>% filter(cell_gene %in% segments))
|
|
108
|
+
rownames(gene_df) <- gene_df$cell_gene
|
|
109
|
+
|
|
110
|
+
brks <- scales::pretty_breaks(n = 5)(c(floor(min(gene_df$mean_exp)), ceiling(max(gene_df$mean_exp))))
|
|
111
|
+
gene_col_fun <- colorRamp2(brks, RColorBrewer::brewer.pal(length(brks), palette))
|
|
112
|
+
|
|
113
|
+
inner.cols <- setNames(gene_col_fun(gene_df[segments, "mean_exp"]), segments)
|
|
114
|
+
lgd1 <- Legend(
|
|
115
|
+
labels = unique(c(input_df$source, input_df$target)),
|
|
116
|
+
title = "Cell type",
|
|
117
|
+
type = "points",
|
|
118
|
+
title_gp = gpar(fontsize = 14 * cex),
|
|
119
|
+
labels_gp = gpar(fontsize = 12 * cex),
|
|
120
|
+
legend_gp = gpar(col = "transparent"),
|
|
121
|
+
background = cell_cols[unique(c(input_df$source, input_df$target))],
|
|
122
|
+
direction = "horizontal"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
lgd2 <- Legend(
|
|
126
|
+
title_gp = gpar(fontsize = 14 * cex),
|
|
127
|
+
labels_gp = gpar(fontsize = 12 * cex),
|
|
128
|
+
direction = "horizontal", at = brks,
|
|
129
|
+
col_fun = gene_col_fun, title = "Mean exp."
|
|
130
|
+
)
|
|
131
|
+
circlize_plot <- function() {
|
|
132
|
+
par(cex = cex)
|
|
133
|
+
chordDiagram(
|
|
134
|
+
input_df %>%
|
|
135
|
+
select(source_lig, target_rec, score),
|
|
136
|
+
directional = 1, group = grp, link.sort = FALSE, diffHeight = 0.005, scale = scale,
|
|
137
|
+
direction.type = c("arrows"), link.arr.type = "triangle", annotationTrack = c(),
|
|
138
|
+
preAllocateTracks = list(list(track.height = 0.175), list(track.height = 0.05), list(track.height = 0.045)),
|
|
139
|
+
big.gap = 3, transparency = 1, link.arr.lwd = arr_wd, link.arr.col = "black", link.arr.length = 0.4, link.arr.width = 0.35, ...
|
|
140
|
+
)
|
|
141
|
+
circos.track(track.index = 1, panel.fun = function(x, y) {
|
|
142
|
+
circos.text(CELL_META$xcenter, CELL_META$ylim[1], str_extract(CELL_META$sector.index, "[^|]+$"),
|
|
143
|
+
facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1.3
|
|
144
|
+
)
|
|
145
|
+
}, bg.border = NA)
|
|
146
|
+
for (l in unique(str_extract(segments, "[^|]+"))) {
|
|
147
|
+
highlight.sector(segments[str_detect(segments, paste0("^", str_escape(l)))], track.index = 2, col = cell_cols[l])
|
|
148
|
+
}
|
|
149
|
+
circos.track(track.index = 3, panel.fun = function(x, y) {
|
|
150
|
+
circos.rect(CELL_META$xlim[1], CELL_META$ylim[1], CELL_META$xlim[2], CELL_META$ylim[2],
|
|
151
|
+
sector.index = CELL_META$sector.index, col = inner.cols[CELL_META$sector.index]
|
|
152
|
+
)
|
|
153
|
+
}, bg.border = NA)
|
|
154
|
+
if (show_legend == TRUE) {
|
|
155
|
+
draw(packLegend(lgd1, lgd2, direction = "vertical"), just = c("left", "bottom"), x = unit(4.75, "mm"), y = unit(4.75, "mm"))
|
|
156
|
+
}
|
|
157
|
+
circos.clear()
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
circlize_plot()
|
|
161
|
+
}
|