biopipen 0.22.0__py3-none-any.whl → 0.22.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +6 -0
- biopipen/core/filters.py +12 -0
- biopipen/ns/cellranger.py +101 -0
- biopipen/ns/scrna.py +2 -0
- biopipen/ns/tcr.py +30 -10
- biopipen/reports/cellranger/CellRangerCount.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +16 -0
- biopipen/scripts/cellranger/CellRangerCount.py +79 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +79 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +31 -24
- biopipen/scripts/scrna/CellsDistribution.R +9 -8
- biopipen/scripts/scrna/MarkersFinder.R +106 -28
- biopipen/scripts/scrna/SeuratClusterStats-features.R +2 -2
- biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
- biopipen/scripts/tcr/Attach2Seurat.R +2 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
- biopipen/scripts/tcr/Immunarch.R +3 -0
- biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
- biopipen/scripts/tcr/TCRClustering.R +8 -9
- biopipen/scripts/tcr/TESSA.R +23 -30
- biopipen/utils/common_docstrs.py +3 -0
- biopipen/utils/mutate_helpers.R +110 -106
- {biopipen-0.22.0.dist-info → biopipen-0.22.2.dist-info}/METADATA +1 -1
- {biopipen-0.22.0.dist-info → biopipen-0.22.2.dist-info}/RECORD +27 -22
- {biopipen-0.22.0.dist-info → biopipen-0.22.2.dist-info}/entry_points.txt +1 -0
- {biopipen-0.22.0.dist-info → biopipen-0.22.2.dist-info}/WHEEL +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.22.
|
|
1
|
+
__version__ = "0.22.2"
|
biopipen/core/config.toml
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
bedtools = "bedtools"
|
|
5
5
|
# bcftools to handle bcf/vcf files
|
|
6
6
|
bcftools = "bcftools"
|
|
7
|
+
# cellranger
|
|
8
|
+
cellranger = "cellranger"
|
|
7
9
|
# Control-FREEC to call cnvs
|
|
8
10
|
freec = "freec"
|
|
9
11
|
# liftover coordinates across genomes
|
|
@@ -59,6 +61,10 @@ liftover_chain = ""
|
|
|
59
61
|
# tmpdir = ""
|
|
60
62
|
|
|
61
63
|
[ref]
|
|
64
|
+
# The reference for cellranger gex
|
|
65
|
+
ref_cellranger_gex = ""
|
|
66
|
+
# The reference for cellranger vdj
|
|
67
|
+
ref_cellranger_vdj = ""
|
|
62
68
|
# The reference genome
|
|
63
69
|
reffa = ""
|
|
64
70
|
# The directory with reference for each chromosome
|
biopipen/core/filters.py
CHANGED
|
@@ -15,6 +15,7 @@ filtermanager = FilterManager()
|
|
|
15
15
|
@filtermanager.register
|
|
16
16
|
def dict_to_cli_args(
|
|
17
17
|
dic: Mapping[str, Any],
|
|
18
|
+
exclude: List[str] = None,
|
|
18
19
|
prefix: str | None = None,
|
|
19
20
|
sep: str | None = " ",
|
|
20
21
|
dup_key: bool = True,
|
|
@@ -27,6 +28,7 @@ def dict_to_cli_args(
|
|
|
27
28
|
|
|
28
29
|
Args:
|
|
29
30
|
dic: The dict to convert
|
|
31
|
+
exclude: The keys to exclude
|
|
30
32
|
prefix: The prefix of the keys after conversion
|
|
31
33
|
Defaults to `None`, mean `-` for short keys and `--` for long keys
|
|
32
34
|
sep: The separator between key and value
|
|
@@ -37,6 +39,13 @@ def dict_to_cli_args(
|
|
|
37
39
|
If `sep` is `None` or `=`, this must be True, otherwise an error
|
|
38
40
|
will be raised
|
|
39
41
|
join: Whether to join the arguments into a single string
|
|
42
|
+
start_key: The key to start the arguments
|
|
43
|
+
This is useful when you want to put some arguments at the beginning
|
|
44
|
+
of the command line
|
|
45
|
+
end_key: The key to end the arguments
|
|
46
|
+
This is useful when you want to put some arguments at the end
|
|
47
|
+
of the command line
|
|
48
|
+
dashify: Whether to replace `_` with `-` in the keys
|
|
40
49
|
|
|
41
50
|
Returns:
|
|
42
51
|
The converted string or list of strings
|
|
@@ -44,6 +53,9 @@ def dict_to_cli_args(
|
|
|
44
53
|
if sep in [None, "="] and not dup_key:
|
|
45
54
|
raise ValueError("`dup_key` must be True when sep is `None` or `=`")
|
|
46
55
|
|
|
56
|
+
if exclude:
|
|
57
|
+
dic = {k: v for k, v in dic.items() if k not in exclude}
|
|
58
|
+
|
|
47
59
|
starts = []
|
|
48
60
|
ends = []
|
|
49
61
|
out = []
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Cellranger pipeline module for BioPipen"""
|
|
2
|
+
from ..core.proc import Proc
|
|
3
|
+
from ..core.config import config
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CellRangerCount(Proc):
|
|
7
|
+
"""Run cellranger count
|
|
8
|
+
|
|
9
|
+
to count gene expression and/or feature barcode reads
|
|
10
|
+
|
|
11
|
+
Input:
|
|
12
|
+
fastqs: The input fastq files
|
|
13
|
+
Either a list of fastq files or a directory containing fastq files
|
|
14
|
+
If a directory is provided, it should be passed as a list with one
|
|
15
|
+
element.
|
|
16
|
+
|
|
17
|
+
Output:
|
|
18
|
+
outdir: The output directory
|
|
19
|
+
|
|
20
|
+
Envs:
|
|
21
|
+
ncores: Number of cores to use
|
|
22
|
+
cellranger: Path to cellranger
|
|
23
|
+
ref: Path of folder containing 10x-compatible transcriptome reference
|
|
24
|
+
tmpdir: Path to temporary directory, used to save the soft-lined fastq files
|
|
25
|
+
to pass to cellranger
|
|
26
|
+
include_introns: Set to false to exclude intronic reads in count.
|
|
27
|
+
<more>: Other environment variables required by `cellranger count`
|
|
28
|
+
See `cellranger count --help` for more details or
|
|
29
|
+
https://www.10xgenomics.com/support/software/cell-ranger/advanced/cr-command-line-arguments#count
|
|
30
|
+
""" # noqa: E501
|
|
31
|
+
input = "fastqs:files"
|
|
32
|
+
output = """outdir:dir:
|
|
33
|
+
{%- set fastqs = in.fastqs -%}
|
|
34
|
+
{%- if len(fastqs) == 1 and isdir(fastqs[0]) -%}
|
|
35
|
+
{%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
|
|
36
|
+
{%- endif -%}
|
|
37
|
+
{%- set sample = commonprefix(*fastqs) |
|
|
38
|
+
regex_replace: "_L\\d+_$", "" |
|
|
39
|
+
regex_replace: "_S\\d+$", "" -%}
|
|
40
|
+
{{- sample -}}
|
|
41
|
+
"""
|
|
42
|
+
lang = config.lang.python
|
|
43
|
+
envs = {
|
|
44
|
+
"ncores": config.misc.ncores,
|
|
45
|
+
"cellranger": config.exe.cellranger,
|
|
46
|
+
"ref": config.ref.ref_cellranger_gex,
|
|
47
|
+
"tmpdir": config.path.tmpdir,
|
|
48
|
+
"include_introns": "true",
|
|
49
|
+
}
|
|
50
|
+
script = "file://../scripts/cellranger/CellRangerCount.py"
|
|
51
|
+
plugin_opts = {
|
|
52
|
+
"report": "file://../reports/cellranger/CellRangerCount.svelte",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CellRangerVdj(Proc):
|
|
57
|
+
"""Run cellranger vdj
|
|
58
|
+
|
|
59
|
+
to perform sequence assembly and paired clonotype calling
|
|
60
|
+
|
|
61
|
+
Input:
|
|
62
|
+
fastqs: The input fastq files
|
|
63
|
+
Either a list of fastq files or a directory containing fastq files
|
|
64
|
+
If a directory is provided, it should be passed as a list with one
|
|
65
|
+
element.
|
|
66
|
+
|
|
67
|
+
Output:
|
|
68
|
+
outdir: The output directory
|
|
69
|
+
|
|
70
|
+
Envs:
|
|
71
|
+
ncores: Number of cores to use
|
|
72
|
+
cellranger: Path to cellranger
|
|
73
|
+
ref: Path of folder containing 10x-compatible transcriptome reference
|
|
74
|
+
tmpdir: Path to temporary directory, used to save the soft-lined fastq files
|
|
75
|
+
to pass to cellranger
|
|
76
|
+
<more>: Other environment variables required by `cellranger vdj`
|
|
77
|
+
See `cellranger vdj --help` for more details or
|
|
78
|
+
https://www.10xgenomics.com/support/software/cell-ranger/advanced/cr-command-line-arguments#vdj
|
|
79
|
+
""" # noqa: E501
|
|
80
|
+
input = "fastqs:files"
|
|
81
|
+
output = """outdir:dir:
|
|
82
|
+
{%- set fastqs = in.fastqs -%}
|
|
83
|
+
{%- if len(fastqs) == 1 and isdir(fastqs[0]) -%}
|
|
84
|
+
{%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
|
|
85
|
+
{%- endif -%}
|
|
86
|
+
{%- set sample = commonprefix(*fastqs) |
|
|
87
|
+
regex_replace: "_L\\d+_$", "" |
|
|
88
|
+
regex_replace: "_S\\d+$", "" -%}
|
|
89
|
+
{{- sample -}}
|
|
90
|
+
"""
|
|
91
|
+
lang = config.lang.python
|
|
92
|
+
envs = {
|
|
93
|
+
"ncores": config.misc.ncores,
|
|
94
|
+
"cellranger": config.exe.cellranger,
|
|
95
|
+
"ref": config.ref.ref_cellranger_vdj,
|
|
96
|
+
"tmpdir": config.path.tmpdir,
|
|
97
|
+
}
|
|
98
|
+
script = "file://../scripts/cellranger/CellRangerVdj.py"
|
|
99
|
+
plugin_opts = {
|
|
100
|
+
"report": "file://../reports/cellranger/CellRangerVdj.svelte",
|
|
101
|
+
}
|
biopipen/ns/scrna.py
CHANGED
|
@@ -1422,6 +1422,8 @@ class CellTypeAnnotation(Proc):
|
|
|
1422
1422
|
If the length of `cell_types` is shorter than the number of
|
|
1423
1423
|
clusters, the remaining clusters will be kept as the original cell
|
|
1424
1424
|
types.
|
|
1425
|
+
You can also use `NA` to remove the clusters from downstream analysis. This
|
|
1426
|
+
only works when `envs.newcol` is not specified.
|
|
1425
1427
|
|
|
1426
1428
|
/// Note
|
|
1427
1429
|
If `tool` is `direct` and `cell_types` is not specified or an empty list,
|
biopipen/ns/tcr.py
CHANGED
|
@@ -40,11 +40,13 @@ class ImmunarchLoading(Proc):
|
|
|
40
40
|
|
|
41
41
|
Output:
|
|
42
42
|
rdsfile: The RDS file with the data and metadata
|
|
43
|
-
metatxt: The meta data
|
|
43
|
+
metatxt: The meta data at cell level, which can be used to attach to the Seurat object
|
|
44
44
|
|
|
45
45
|
Envs:
|
|
46
46
|
prefix: The prefix to the barcodes. You can use placeholder like `{Sample}_`
|
|
47
|
-
to use the meta data from the `immunarch` object.
|
|
47
|
+
to use the meta data from the `immunarch` object. The prefixed barcodes will
|
|
48
|
+
be saved in `out.metatxt`. The `immunarch` object keeps the original barcodes, but
|
|
49
|
+
the prefix is saved at `immdata$prefix`.
|
|
48
50
|
|
|
49
51
|
/// Note
|
|
50
52
|
This option is useful because the barcodes for the cells from scRNA-seq
|
|
@@ -65,10 +67,16 @@ class ImmunarchLoading(Proc):
|
|
|
65
67
|
paired chain data. For `single`, only TRB chain will be kept
|
|
66
68
|
at `immdata$data`, information for other chains will be
|
|
67
69
|
saved at `immdata$tra` and `immdata$multi`.
|
|
68
|
-
|
|
70
|
+
extracols (list): The extra columns to be exported to the text file.
|
|
69
71
|
You can refer to the
|
|
70
72
|
[immunarch documentation](https://immunarch.com/articles/v2_data.html#immunarch-data-format)
|
|
71
|
-
for the full list of the columns.
|
|
73
|
+
to get a sense for the full list of the columns.
|
|
74
|
+
The columns may vary depending on the data source.
|
|
75
|
+
The columns from `immdata$meta` and some core columns, including
|
|
76
|
+
`Barcode`, `CDR3.aa`, `Clones`, `Proportion`, `V.name`, `J.name`, and
|
|
77
|
+
`D.name` will be exported by default. You can use this option to
|
|
78
|
+
specify the extra columns to be exported.
|
|
79
|
+
|
|
72
80
|
""" # noqa: E501
|
|
73
81
|
input = "metafile:file"
|
|
74
82
|
output = [
|
|
@@ -80,7 +88,7 @@ class ImmunarchLoading(Proc):
|
|
|
80
88
|
"tmpdir": config.path.tmpdir,
|
|
81
89
|
"prefix": "{Sample}_",
|
|
82
90
|
"mode": "single",
|
|
83
|
-
"
|
|
91
|
+
"extracols": [],
|
|
84
92
|
}
|
|
85
93
|
script = "file://../scripts/tcr/ImmunarchLoading.R"
|
|
86
94
|
|
|
@@ -322,6 +330,7 @@ class Immunarch(Proc):
|
|
|
322
330
|
prefix: The prefix to the barcodes. You can use placeholder like `{Sample}_`
|
|
323
331
|
The prefixed barcodes will be used to match the barcodes in `in.metafile`.
|
|
324
332
|
Not used if `in.metafile` is not specified.
|
|
333
|
+
If `None` (default), `immdata$prefix` will be used.
|
|
325
334
|
volumes (ns): Explore clonotype volume (sizes).
|
|
326
335
|
- by: Groupings when visualize clonotype volumes, passed to the `.by` argument of `vis(imm_vol, .by = <values>)`.
|
|
327
336
|
Multiple columns should be separated by `,`.
|
|
@@ -682,7 +691,7 @@ class Immunarch(Proc):
|
|
|
682
691
|
lang = config.lang.rscript
|
|
683
692
|
envs = {
|
|
684
693
|
"mutaters": {},
|
|
685
|
-
"prefix":
|
|
694
|
+
"prefix": None,
|
|
686
695
|
# basic statistics
|
|
687
696
|
"volumes": {
|
|
688
697
|
"by": None,
|
|
@@ -1179,6 +1188,10 @@ class TCRClustering(Proc):
|
|
|
1179
1188
|
For GIANA, using TRBV mutations is not supported
|
|
1180
1189
|
- GIANA: by Li lab at UT Southwestern Medical Center
|
|
1181
1190
|
- ClusTCR: by Sebastiaan Valkiers, etc
|
|
1191
|
+
prefix: The prefix to the barcodes. You can use placeholder like `{Sample}_`
|
|
1192
|
+
The prefixed barcodes will be used to match the barcodes in `in.metafile`.
|
|
1193
|
+
Not used if `in.metafile` is not specified.
|
|
1194
|
+
If `None` (default), `immdata$prefix` will be used.
|
|
1182
1195
|
python: The path of python with `GIANA`'s dependencies installed
|
|
1183
1196
|
or with `clusTCR` installed. Depending on the `tool` you choose.
|
|
1184
1197
|
args (type=json): The arguments for the clustering tool
|
|
@@ -1202,6 +1215,7 @@ class TCRClustering(Proc):
|
|
|
1202
1215
|
lang = config.lang.rscript
|
|
1203
1216
|
envs = {
|
|
1204
1217
|
"tool": "GIANA", # or ClusTCR
|
|
1218
|
+
"prefix": None,
|
|
1205
1219
|
"on_multi": False,
|
|
1206
1220
|
"python": config.lang.python,
|
|
1207
1221
|
"args": {},
|
|
@@ -1507,7 +1521,8 @@ class TESSA(Proc):
|
|
|
1507
1521
|
[link](https://www.nature.com/articles/s42256-021-00383-2)
|
|
1508
1522
|
|
|
1509
1523
|
Input:
|
|
1510
|
-
immdata: The
|
|
1524
|
+
immdata: The immunarch object in RDS file or text file of TCR data loaded by
|
|
1525
|
+
[`ImmunarchLoading`](!!#biopipennstcrimmunarchloading)
|
|
1511
1526
|
srtobj: The `Seurat` object, saved in RDS format, with dimension
|
|
1512
1527
|
reduction performed if you want to use them to represent the
|
|
1513
1528
|
transcriptome of T cells.
|
|
@@ -1522,8 +1537,13 @@ class TESSA(Proc):
|
|
|
1522
1537
|
|
|
1523
1538
|
Envs:
|
|
1524
1539
|
python: The path of python with `TESSA`'s dependencies installed
|
|
1525
|
-
prefix: The prefix
|
|
1526
|
-
|
|
1540
|
+
prefix: The prefix of the cell barcodes in the `Seurat` object.
|
|
1541
|
+
Once could use a fixed prefix, or a placeholder with the column
|
|
1542
|
+
name in meta data. For example, `"{Sample}_"` will replace the
|
|
1543
|
+
placeholder with the value of the column `Sample` in meta data.
|
|
1544
|
+
If `in.immdata` is text file, the prefix will be ignored and the
|
|
1545
|
+
barcode should be already prefixed.
|
|
1546
|
+
If `None` and `in.immdata` is RDS file, `immdata$prefix` will be used.
|
|
1527
1547
|
within_sample (flag): Whether the TCR networks are constructed only
|
|
1528
1548
|
within TCRs from the same sample/patient (True) or with all the
|
|
1529
1549
|
TCRs in the meta data matrix (False).
|
|
@@ -1548,7 +1568,7 @@ class TESSA(Proc):
|
|
|
1548
1568
|
lang = config.lang.rscript
|
|
1549
1569
|
envs = {
|
|
1550
1570
|
"python": config.lang.python,
|
|
1551
|
-
"prefix":
|
|
1571
|
+
"prefix": None,
|
|
1552
1572
|
"assay": "RNA",
|
|
1553
1573
|
"within_sample": False,
|
|
1554
1574
|
"predefined_b": False,
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
|
+
|
|
3
|
+
{%- macro report_job(job, h=1) -%}
|
|
4
|
+
<h{{h}}>{{job.out.outdir | basename | escape}}</h{{h}}>
|
|
5
|
+
<iframe
|
|
6
|
+
src="{{job.out.outdir}}/outs/web_summary.html"
|
|
7
|
+
width="100%"
|
|
8
|
+
frameborder="0"
|
|
9
|
+
style="min-height: 80vh"></iframe>
|
|
10
|
+
{%- endmacro -%}
|
|
11
|
+
|
|
12
|
+
{%- macro head_job(job) -%}
|
|
13
|
+
<h1>{{job.out.outdir | basename | escape}}</h1>
|
|
14
|
+
{%- endmacro -%}
|
|
15
|
+
|
|
16
|
+
{{ report_jobs(jobs, head_job, report_job) }}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
|
+
|
|
3
|
+
{%- macro report_job(job, h=1) -%}
|
|
4
|
+
<h{{h}}>{{job.out.outdir | basename | escape}}</h{{h}}>
|
|
5
|
+
<iframe
|
|
6
|
+
src="{{job.out.outdir}}/outs/web_summary.html"
|
|
7
|
+
width="100%"
|
|
8
|
+
frameborder="0"
|
|
9
|
+
style="min-height: 80vh"></iframe>
|
|
10
|
+
{%- endmacro -%}
|
|
11
|
+
|
|
12
|
+
{%- macro head_job(job) -%}
|
|
13
|
+
<h1>{{job.out.outdir | basename | escape}}</h1>
|
|
14
|
+
{%- endmacro -%}
|
|
15
|
+
|
|
16
|
+
{{ report_jobs(jobs, head_job, report_job) }}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
import re
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from biopipen.utils.misc import run_command
|
|
5
|
+
|
|
6
|
+
fastqs = {{in.fastqs | repr}} # pyright: ignore # noqa
|
|
7
|
+
outdir = {{out.outdir | quote}} # pyright: ignore
|
|
8
|
+
|
|
9
|
+
cellranger = {{envs.cellranger | quote}} # pyright: ignore
|
|
10
|
+
tmpdir = Path({{envs.tmpdir | quote}}) # pyright: ignore
|
|
11
|
+
ref = {{envs.ref | quote}} # pyright: ignore
|
|
12
|
+
ncores = {{envs.ncores | int}} # pyright: ignore
|
|
13
|
+
|
|
14
|
+
{% if "id" in envs -%}
|
|
15
|
+
id = {{envs.id | quote}} # pyright: ignore
|
|
16
|
+
{%- else -%}
|
|
17
|
+
id = {{out.outdir | basename | quote}} # pyright: ignore
|
|
18
|
+
{%- endif %}
|
|
19
|
+
|
|
20
|
+
{% if "sample" in envs -%}
|
|
21
|
+
sample = {{envs.sample | quote}} # pyright: ignore
|
|
22
|
+
{%- else -%}
|
|
23
|
+
sample = {{out.outdir | basename | quote}} # pyright: ignore
|
|
24
|
+
{%- endif %}
|
|
25
|
+
|
|
26
|
+
# create a temporary unique directory to store the soft-linked fastq files
|
|
27
|
+
fastqdir = tmpdir / f"cellranger_count_{uuid.uuid4()}"
|
|
28
|
+
fastqdir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
if len(fastqs) == 1 and fastqs[0].is_dir():
|
|
30
|
+
fastqs = list(fastqs[0].glob("*.fastq.gz"))
|
|
31
|
+
|
|
32
|
+
# soft-link the fastq files to the temporary directory
|
|
33
|
+
for fastq in fastqs:
|
|
34
|
+
fastq = Path(fastq)
|
|
35
|
+
(fastqdir / fastq.name).symlink_to(fastq)
|
|
36
|
+
|
|
37
|
+
other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'transcriptome', 'ref', 'tmpdir', 'id', 'sample', 'ncores']}} # pyright: ignore
|
|
38
|
+
|
|
39
|
+
command = [
|
|
40
|
+
cellranger,
|
|
41
|
+
"count",
|
|
42
|
+
"--id",
|
|
43
|
+
id,
|
|
44
|
+
"--sample",
|
|
45
|
+
sample,
|
|
46
|
+
"--fastqs",
|
|
47
|
+
fastqdir,
|
|
48
|
+
"--transcriptome",
|
|
49
|
+
ref,
|
|
50
|
+
"--localcores",
|
|
51
|
+
ncores,
|
|
52
|
+
"--disable-ui",
|
|
53
|
+
*other_args,
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
run_command(command, fg=True, cwd=str(Path(outdir).parent))
|
|
57
|
+
|
|
58
|
+
web_summary_html = Path(outdir) / "outs" / "web_summary.html"
|
|
59
|
+
if not web_summary_html.exists():
|
|
60
|
+
raise RuntimeError(
|
|
61
|
+
f"web_summary.html does not exist in {outdir}/outs. "
|
|
62
|
+
"cellranger count failed."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Modify web_summary.html to move javascript to a separate file
|
|
66
|
+
# to void vscode live server breaking the page by injecting some code
|
|
67
|
+
print("# Modify web_summary.html to move javascript to a separate file")
|
|
68
|
+
try:
|
|
69
|
+
web_summary_js = Path(outdir) / "outs" / "web_summary.js"
|
|
70
|
+
web_summary_content = web_summary_html.read_text()
|
|
71
|
+
regex = re.compile(r"<script>(?=/\*! For license)(.+)</script>", re.DOTALL)
|
|
72
|
+
web_summary_html.write_text(regex.sub(
|
|
73
|
+
'<script src="web_summary.js"></script>',
|
|
74
|
+
web_summary_content,
|
|
75
|
+
))
|
|
76
|
+
web_summary_js.write_text(regex.search(web_summary_content).group(1))
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f"Error modifying web_summary.html: {e}")
|
|
79
|
+
raise e
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
import re
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from biopipen.utils.misc import run_command
|
|
5
|
+
|
|
6
|
+
fastqs = {{in.fastqs | repr}} # pyright: ignore # noqa
|
|
7
|
+
outdir = {{out.outdir | quote}} # pyright: ignore
|
|
8
|
+
|
|
9
|
+
cellranger = {{envs.cellranger | quote}} # pyright: ignore
|
|
10
|
+
tmpdir = Path({{envs.tmpdir | quote}}) # pyright: ignore
|
|
11
|
+
ref = {{envs.ref | quote}} # pyright: ignore
|
|
12
|
+
ncores = {{envs.ncores | int}} # pyright: ignore
|
|
13
|
+
|
|
14
|
+
{% if "id" in envs -%}
|
|
15
|
+
id = {{envs.id | quote}} # pyright: ignore
|
|
16
|
+
{%- else -%}
|
|
17
|
+
id = {{out.outdir | basename | quote}} # pyright: ignore
|
|
18
|
+
{%- endif %}
|
|
19
|
+
|
|
20
|
+
{% if "sample" in envs -%}
|
|
21
|
+
sample = {{envs.sample | quote}} # pyright: ignore
|
|
22
|
+
{%- else -%}
|
|
23
|
+
sample = {{out.outdir | basename | quote}} # pyright: ignore
|
|
24
|
+
{%- endif %}
|
|
25
|
+
|
|
26
|
+
# create a temporary unique directory to store the soft-linked fastq files
|
|
27
|
+
fastqdir = tmpdir / f"cellranger_count_{uuid.uuid4()}"
|
|
28
|
+
fastqdir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
if len(fastqs) == 1 and fastqs[0].is_dir():
|
|
30
|
+
fastqs = list(fastqs[0].glob("*.fastq.gz"))
|
|
31
|
+
|
|
32
|
+
# soft-link the fastq files to the temporary directory
|
|
33
|
+
for fastq in fastqs:
|
|
34
|
+
fastq = Path(fastq)
|
|
35
|
+
(fastqdir / fastq.name).symlink_to(fastq)
|
|
36
|
+
|
|
37
|
+
other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'reference', 'ref', 'tmpdir', 'id', 'sample', 'ncores']}} # pyright: ignore
|
|
38
|
+
|
|
39
|
+
command = [
|
|
40
|
+
cellranger,
|
|
41
|
+
"vdj",
|
|
42
|
+
"--id",
|
|
43
|
+
id,
|
|
44
|
+
"--sample",
|
|
45
|
+
sample,
|
|
46
|
+
"--fastqs",
|
|
47
|
+
fastqdir,
|
|
48
|
+
"--reference",
|
|
49
|
+
ref,
|
|
50
|
+
"--localcores",
|
|
51
|
+
ncores,
|
|
52
|
+
"--disable-ui",
|
|
53
|
+
*other_args,
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
run_command(command, fg=True, cwd=str(Path(outdir).parent))
|
|
57
|
+
|
|
58
|
+
web_summary_html = Path(outdir) / "outs" / "web_summary.html"
|
|
59
|
+
if not web_summary_html.exists():
|
|
60
|
+
raise RuntimeError(
|
|
61
|
+
f"web_summary.html does not exist in {outdir}/outs. "
|
|
62
|
+
"cellranger vdj failed."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Modify web_summary.html to move javascript to a separate file
|
|
66
|
+
# to void vscode live server breaking the page by injecting some code
|
|
67
|
+
print("# Modify web_summary.html to move javascript to a separate file")
|
|
68
|
+
try:
|
|
69
|
+
web_summary_js = Path(outdir) / "outs" / "web_summary.js"
|
|
70
|
+
web_summary_content = web_summary_html.read_text()
|
|
71
|
+
regex = re.compile(r"<script>(?=/\*! For license)(.+)</script>", re.DOTALL)
|
|
72
|
+
web_summary_html.write_text(regex.sub(
|
|
73
|
+
'<script src="web_summary.js"></script>',
|
|
74
|
+
web_summary_content,
|
|
75
|
+
))
|
|
76
|
+
web_summary_js.write_text(regex.search(web_summary_content).group(1))
|
|
77
|
+
except Exception as e:
|
|
78
|
+
print(f"Error modifying web_summary.html: {e}")
|
|
79
|
+
raise e
|
|
@@ -1,47 +1,54 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
2
|
library(Seurat)
|
|
3
3
|
|
|
4
|
-
sobjfile
|
|
5
|
-
outfile
|
|
6
|
-
celltypes
|
|
7
|
-
newcol
|
|
4
|
+
sobjfile <- {{in.sobjfile | r}}
|
|
5
|
+
outfile <- {{out.outfile | r}}
|
|
6
|
+
celltypes <- {{envs.cell_types | r}}
|
|
7
|
+
newcol <- {{envs.newcol | r}}
|
|
8
8
|
|
|
9
9
|
if (is.null(celltypes) || length(celltypes) == 0) {
|
|
10
|
-
|
|
10
|
+
log_warn("No cell types are given!")
|
|
11
11
|
|
|
12
12
|
# create a symbolic link to the input file
|
|
13
13
|
file.symlink(sobjfile, outfile)
|
|
14
14
|
} else {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
idents
|
|
15
|
+
log_info("Loading Seurat object ...")
|
|
16
|
+
sobj <- readRDS(sobjfile)
|
|
17
|
+
idents <- as.character(unique(Idents(sobj)))
|
|
18
|
+
idents <- idents[order(as.numeric(idents))]
|
|
18
19
|
|
|
19
20
|
if (length(celltypes) < length(idents)) {
|
|
20
|
-
celltypes
|
|
21
|
+
celltypes <- c(celltypes, idents[(length(celltypes) + 1):length(idents)])
|
|
21
22
|
} else if (length(celltypes) > length(idents)) {
|
|
22
|
-
celltypes
|
|
23
|
-
|
|
24
|
-
"The length of cell types is longer than the number of clusters!",
|
|
25
|
-
immediate. = TRUE
|
|
26
|
-
)
|
|
23
|
+
celltypes <- celltypes[1:length(idents)]
|
|
24
|
+
log_warn("The length of cell types is longer than the number of clusters!")
|
|
27
25
|
}
|
|
28
26
|
for (i in seq_along(celltypes)) {
|
|
29
27
|
if (celltypes[i] == "-" || celltypes[i] == "") {
|
|
30
|
-
celltypes[i]
|
|
28
|
+
celltypes[i] <- idents[i]
|
|
31
29
|
}
|
|
32
30
|
}
|
|
33
|
-
names(celltypes)
|
|
31
|
+
names(celltypes) <- idents
|
|
34
32
|
|
|
33
|
+
log_info("Renaming cell types ...")
|
|
35
34
|
if (is.null(newcol)) {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
sobj
|
|
35
|
+
has_na <- "NA" %in% unlist(celltypes) || anyNA(unlist(celltypes))
|
|
36
|
+
sobj$seurat_clusters_id <- Idents(sobj)
|
|
37
|
+
celltypes$object <- sobj
|
|
38
|
+
sobj <- do_call(RenameIdents, celltypes)
|
|
39
|
+
sobj$seurat_clusters <- Idents(sobj)
|
|
40
|
+
if (has_na) {
|
|
41
|
+
log_info("Filtering clusters if NA ...")
|
|
42
|
+
sobj <- subset(
|
|
43
|
+
sobj,
|
|
44
|
+
subset = seurat_clusters != "NA" & !is.na(seurat_clusters)
|
|
45
|
+
)
|
|
46
|
+
}
|
|
40
47
|
} else {
|
|
41
|
-
celltypes$object
|
|
42
|
-
sobj
|
|
43
|
-
sobj[[newcol]]
|
|
44
|
-
Idents(sobj)
|
|
48
|
+
celltypes$object <- sobj
|
|
49
|
+
sobj <- do_call(RenameIdents, celltypes)
|
|
50
|
+
sobj[[newcol]] <- Idents(sobj)
|
|
51
|
+
Idents(sobj) <- "seurat_clusters"
|
|
45
52
|
}
|
|
46
53
|
|
|
47
54
|
saveRDS(sobj, outfile)
|
|
@@ -142,13 +142,8 @@ do_case <- function(name, case) {
|
|
|
142
142
|
info <- casename_info(name, create = TRUE)
|
|
143
143
|
cells_by <- trimws(strsplit(case$cells_by, ",")[[1]])
|
|
144
144
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
casename <- paste(sec_case_names[-1], collapse = ":")
|
|
148
|
-
dir.create(sec_dir, showWarnings = FALSE, recursive = TRUE)
|
|
149
|
-
|
|
150
|
-
outfile <- file.path(info$sec_dir, paste0("case-", info$case_slug, ".png"))
|
|
151
|
-
txtfile <- file.path(info$sec_dir, paste0("case-", info$case_slug, ".txt"))
|
|
145
|
+
outfile <- file.path(info$sec_dir, paste0(info$case_slug, ".png"))
|
|
146
|
+
txtfile <- file.path(info$sec_dir, paste0(info$case_slug, ".txt"))
|
|
152
147
|
|
|
153
148
|
# subset the seurat object
|
|
154
149
|
meta <- srtobj@meta.data
|
|
@@ -229,14 +224,20 @@ do_case <- function(name, case) {
|
|
|
229
224
|
meta %>% select(
|
|
230
225
|
!!sym(cells_by),
|
|
231
226
|
!!sym(case$group_by),
|
|
227
|
+
seurat_clusters,
|
|
232
228
|
CloneSize,
|
|
233
229
|
CloneGroupSize,
|
|
234
230
|
CloneClusterSize,
|
|
235
231
|
CloneGroupClusterSize,
|
|
232
|
+
) %>% distinct(
|
|
233
|
+
!!sym(cells_by),
|
|
234
|
+
!!sym(case$group_by),
|
|
235
|
+
seurat_clusters,
|
|
236
|
+
.keep_all = TRUE
|
|
236
237
|
),
|
|
237
238
|
txtfile,
|
|
238
239
|
sep = "\t",
|
|
239
|
-
row.names =
|
|
240
|
+
row.names = FALSE,
|
|
240
241
|
col.names = TRUE,
|
|
241
242
|
quote = FALSE
|
|
242
243
|
)
|