biopipen 0.34.2__py3-none-any.whl → 0.34.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/scrna.py +16 -3
- biopipen/ns/scrna_metabolic_landscape.py +1 -1
- biopipen/ns/tcr.py +5 -0
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +12 -3
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +12 -3
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +12 -3
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +3 -10
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +16 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +29 -6
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +29 -1
- biopipen/scripts/scrna/SeuratClusterStats.R +1 -0
- biopipen/scripts/scrna/celltypist-wrapper.py +2 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +9 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +2 -2
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +1 -0
- biopipen/scripts/tcr/GIANA/GIANA4.py +2 -4
- biopipen/scripts/tcr/ScRepCombiningExpression.R +1 -0
- biopipen/scripts/tcr/ScRepLoading.R +7 -2
- biopipen/scripts/tcr/TCRClustering.R +9 -23
- biopipen/scripts/tcr/TESSA.R +4 -2
- {biopipen-0.34.2.dist-info → biopipen-0.34.3.dist-info}/METADATA +1 -1
- {biopipen-0.34.2.dist-info → biopipen-0.34.3.dist-info}/RECORD +26 -27
- biopipen/scripts/scrna/SCP-plot.R +0 -15202
- {biopipen-0.34.2.dist-info → biopipen-0.34.3.dist-info}/WHEEL +0 -0
- {biopipen-0.34.2.dist-info → biopipen-0.34.3.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.34.
|
|
1
|
+
__version__ = "0.34.3"
|
biopipen/ns/scrna.py
CHANGED
|
@@ -531,6 +531,8 @@ class SeuratClusterStats(Proc):
|
|
|
531
531
|
Envs:
|
|
532
532
|
mutaters (type=json): The mutaters to mutate the metadata to subset the cells.
|
|
533
533
|
The mutaters will be applied in the order specified.
|
|
534
|
+
You can also use the clone selectors to select the TCR clones/clusters.
|
|
535
|
+
See <https://pwwang.github.io/scplotter/reference/clone_selectors.html>.
|
|
534
536
|
cache (type=auto): Whether to cache the plots.
|
|
535
537
|
Currently only plots for features are supported, since creating the those
|
|
536
538
|
plots can be time consuming.
|
|
@@ -564,6 +566,7 @@ class SeuratClusterStats(Proc):
|
|
|
564
566
|
- res (type=int): The resolution of the plots.
|
|
565
567
|
- height (type=int): The height of the plots.
|
|
566
568
|
- width (type=int): The width of the plots.
|
|
569
|
+
- descr: The description of the plot, showing in the report.
|
|
567
570
|
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
568
571
|
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
569
572
|
- save_data (flag): Whether to save the data used to generate the plot.
|
|
@@ -655,6 +658,7 @@ class SeuratClusterStats(Proc):
|
|
|
655
658
|
"clustrees": {},
|
|
656
659
|
"stats_defaults": {
|
|
657
660
|
"subset": None,
|
|
661
|
+
"descr": None,
|
|
658
662
|
"devpars": {"res": 100},
|
|
659
663
|
"more_formats": [],
|
|
660
664
|
"save_code": False,
|
|
@@ -663,10 +667,12 @@ class SeuratClusterStats(Proc):
|
|
|
663
667
|
"stats": {
|
|
664
668
|
"Number of cells in each cluster (Bar Chart)": {
|
|
665
669
|
"plot_type": "bar",
|
|
670
|
+
"x_text_angle": 90,
|
|
666
671
|
},
|
|
667
672
|
"Number of cells in each cluster by Sample (Bar Chart)": {
|
|
668
673
|
"plot_type": "bar",
|
|
669
674
|
"group_by": "Sample",
|
|
675
|
+
"x_text_angle": 90,
|
|
670
676
|
},
|
|
671
677
|
},
|
|
672
678
|
"ngenes_defaults": {
|
|
@@ -698,7 +704,6 @@ class SeuratClusterStats(Proc):
|
|
|
698
704
|
"dimplots": {
|
|
699
705
|
"Dimensional reduction plot": {
|
|
700
706
|
"label": True,
|
|
701
|
-
"label_insitu": True,
|
|
702
707
|
},
|
|
703
708
|
},
|
|
704
709
|
}
|
|
@@ -1025,7 +1030,9 @@ class MarkersFinder(Proc):
|
|
|
1025
1030
|
ncores (type=int): Number of cores to use for parallel computing for some `Seurat` procedures.
|
|
1026
1031
|
* Used in `future::plan(strategy = "multicore", workers = <ncores>)` to parallelize some Seurat procedures.
|
|
1027
1032
|
* See also: <https://satijalab.org/seurat/articles/future_vignette.html>
|
|
1028
|
-
mutaters (type=json): The mutaters to mutate the metadata
|
|
1033
|
+
mutaters (type=json): The mutaters to mutate the metadata.
|
|
1034
|
+
You can also use the clone selectors to select the TCR clones/clusters.
|
|
1035
|
+
See <https://pwwang.github.io/scplotter/reference/clone_selectors.html>.
|
|
1029
1036
|
group_by: The column name in metadata to group the cells.
|
|
1030
1037
|
If only `group_by` is specified, and `ident-1` and `ident-2` are
|
|
1031
1038
|
not specified, markers will be found for all groups in this column
|
|
@@ -1237,7 +1244,9 @@ class TopExpressingGenes(Proc):
|
|
|
1237
1244
|
outdir: The output directory for the tables and plots
|
|
1238
1245
|
|
|
1239
1246
|
Envs:
|
|
1240
|
-
mutaters (type=json): The mutaters to mutate the metadata
|
|
1247
|
+
mutaters (type=json): The mutaters to mutate the metadata.
|
|
1248
|
+
You can also use the clone selectors to select the TCR clones/clusters.
|
|
1249
|
+
See <https://pwwang.github.io/scplotter/reference/clone_selectors.html>.
|
|
1241
1250
|
ident: The group of cells to find the top expressing genes.
|
|
1242
1251
|
The cells will be selected by the `group_by` column with this
|
|
1243
1252
|
`ident` value in metadata.
|
|
@@ -1606,6 +1615,8 @@ class ScFGSEA(Proc):
|
|
|
1606
1615
|
Passed to `nproc` of `fgseaMultilevel()`.
|
|
1607
1616
|
mutaters (type=json): The mutaters to mutate the metadata.
|
|
1608
1617
|
The key-value pairs will be passed the `dplyr::mutate()` to mutate the metadata.
|
|
1618
|
+
You can also use the clone selectors to select the TCR clones/clusters.
|
|
1619
|
+
See <https://pwwang.github.io/scplotter/reference/clone_selectors.html>.
|
|
1609
1620
|
|
|
1610
1621
|
group_by: The column name in metadata to group the cells.
|
|
1611
1622
|
ident_1: The first group of cells to compare
|
|
@@ -2699,6 +2710,8 @@ class PseudoBulkDEG(Proc):
|
|
|
2699
2710
|
seurat object. Keys are the new column names and values are the
|
|
2700
2711
|
expressions to mutate the columns. These new columns can be
|
|
2701
2712
|
used to define your cases.
|
|
2713
|
+
You can also use the clone selectors to select the TCR clones/clusters.
|
|
2714
|
+
See <https://pwwang.github.io/scplotter/reference/clone_selectors.html>.
|
|
2702
2715
|
each: The column name in metadata to separate the cells into different cases.
|
|
2703
2716
|
When specified, the case will be expanded to multiple cases for
|
|
2704
2717
|
each value in the column.
|
|
@@ -165,7 +165,7 @@ class MetabolicFeatures(Proc):
|
|
|
165
165
|
`1`, `2` and `3` in the `group_by` column, we could have
|
|
166
166
|
`comparisons = ["1", "2"]`, which will compare the group `1` with groups
|
|
167
167
|
`2` and `3`, and the group `2` with groups `1` and `3`. We could also
|
|
168
|
-
have `comparisons = ["1
|
|
168
|
+
have `comparisons = ["1:2", "1:3"]`, which will compare the group `1` with
|
|
169
169
|
group `2` and group `1` with group `3`.
|
|
170
170
|
fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
|
|
171
171
|
For example, `{"minSize": 15, "maxSize": 500}`.
|
biopipen/ns/tcr.py
CHANGED
|
@@ -1749,6 +1749,11 @@ class ScRepCombiningExpression(Proc):
|
|
|
1749
1749
|
|
|
1750
1750
|
Output:
|
|
1751
1751
|
outfile: The `Seurat` object with the TCR/BCR data combined
|
|
1752
|
+
In addition to the meta columns added by
|
|
1753
|
+
`scRepertoire::combineExpression()`, a new column `TCR_Presence` will be
|
|
1754
|
+
added to the metadata. It indicates whether the cell has a TCR/BCR
|
|
1755
|
+
sequence or not. The value is `TRUE` if the cell has a TCR/BCR sequence,
|
|
1756
|
+
and `FALSE` otherwise.
|
|
1752
1757
|
|
|
1753
1758
|
Envs:
|
|
1754
1759
|
cloneCall: How to call the clone - VDJC gene (gene), CDR3 nucleotide (nt),
|
|
@@ -34,15 +34,15 @@ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups th
|
|
|
34
34
|
|
|
35
35
|
<UnorderedList>
|
|
36
36
|
<ListItem>
|
|
37
|
-
<a href="
|
|
37
|
+
<a href="?proc=MetabolicPathwayActivity" class="listitem">MetabolicPathwayActivity</a>
|
|
38
38
|
<Tile><p>Investigating the metabolic pathways of the cells in different subsets and groups.</p></Tile>
|
|
39
39
|
</ListItem>
|
|
40
40
|
<ListItem>
|
|
41
|
-
<a href="
|
|
41
|
+
<a href="?proc=MetabolicPathwayHeterogeneity" class="listitem">MetabolicPathwayHeterogeneity</a>
|
|
42
42
|
<Tile><p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p></Tile>
|
|
43
43
|
</ListItem>
|
|
44
44
|
<ListItem>
|
|
45
|
-
MetabolicFeatures (this page)
|
|
45
|
+
<span class="listitem">MetabolicFeatures (this page)</span>
|
|
46
46
|
<Tile>
|
|
47
47
|
<p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
|
|
48
48
|
<p>The metabolic features are actual gene set enrichment analysis (GSEA) results for the metabolic pathways with given comparisons.</p>
|
|
@@ -59,3 +59,12 @@ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups th
|
|
|
59
59
|
{%- endmacro -%}
|
|
60
60
|
|
|
61
61
|
{{ report_jobs(jobs, head_job, report_job) }}
|
|
62
|
+
|
|
63
|
+
<style>
|
|
64
|
+
.listitem {
|
|
65
|
+
font-size: large;
|
|
66
|
+
font-weight: bold;
|
|
67
|
+
margin: 1rem 0 0.5rem 0;
|
|
68
|
+
display: inline-block;
|
|
69
|
+
}
|
|
70
|
+
</style>
|
|
@@ -34,7 +34,7 @@ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups th
|
|
|
34
34
|
|
|
35
35
|
<UnorderedList>
|
|
36
36
|
<ListItem>
|
|
37
|
-
MetabolicPathwayActivity (this page)
|
|
37
|
+
<span class="listitem">MetabolicPathwayActivity (this page)</span>
|
|
38
38
|
<Tile>
|
|
39
39
|
<p>Investigating the metabolic pathways of the cells in different subsets and groups.</p>
|
|
40
40
|
<p>The cells are first subset by subsets and then the metabolic activities are examined for each groups in different subsets.</p>
|
|
@@ -69,13 +69,13 @@ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups th
|
|
|
69
69
|
</Tile>
|
|
70
70
|
</ListItem>
|
|
71
71
|
<ListItem>
|
|
72
|
-
<a href="
|
|
72
|
+
<a href="?proc=MetabolicPathwayHeterogeneity" class="listitem">MetabolicPathwayHeterogeneity</a>
|
|
73
73
|
<Tile>
|
|
74
74
|
<p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
|
|
75
75
|
</Tile>
|
|
76
76
|
</ListItem>
|
|
77
77
|
<ListItem>
|
|
78
|
-
<a href="
|
|
78
|
+
<a href="?proc=MetabolicFeatures" class="listitem">MetabolicFeatures</a>
|
|
79
79
|
<Tile>
|
|
80
80
|
<p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
|
|
81
81
|
</Tile>
|
|
@@ -91,3 +91,12 @@ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups th
|
|
|
91
91
|
{%- endmacro -%}
|
|
92
92
|
|
|
93
93
|
{{ report_jobs(jobs, head_job, report_job) }}
|
|
94
|
+
|
|
95
|
+
<style>
|
|
96
|
+
.listitem {
|
|
97
|
+
font-size: large;
|
|
98
|
+
font-weight: bold;
|
|
99
|
+
margin: 1rem 0 0.5rem 0;
|
|
100
|
+
display: inline-block;
|
|
101
|
+
}
|
|
102
|
+
</style>
|
|
@@ -34,13 +34,13 @@ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups th
|
|
|
34
34
|
|
|
35
35
|
<UnorderedList>
|
|
36
36
|
<ListItem>
|
|
37
|
-
<a href="
|
|
37
|
+
<a href="?proc=MetabolicPathwayActivity" class="listitem">MetabolicPathwayActivity</a>
|
|
38
38
|
<Tile>
|
|
39
39
|
<p>Investigating the metabolic pathways of the cells in different subsets and groups.</p>
|
|
40
40
|
</Tile>
|
|
41
41
|
</ListItem>
|
|
42
42
|
<ListItem>
|
|
43
|
-
MetabolicPathwayHeterogeneity (this page)
|
|
43
|
+
<span class="listitem">MetabolicPathwayHeterogeneity (this page)</span>
|
|
44
44
|
<Tile>
|
|
45
45
|
<p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
|
|
46
46
|
<p>
|
|
@@ -54,7 +54,7 @@ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups th
|
|
|
54
54
|
</Tile>
|
|
55
55
|
</ListItem>
|
|
56
56
|
<ListItem>
|
|
57
|
-
<a href="
|
|
57
|
+
<a href="?proc=MetabolicFeatures" class="listitem">MetabolicFeatures</a>
|
|
58
58
|
<Tile>
|
|
59
59
|
<p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
|
|
60
60
|
</Tile>
|
|
@@ -70,3 +70,12 @@ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups th
|
|
|
70
70
|
{%- endmacro -%}
|
|
71
71
|
|
|
72
72
|
{{ report_jobs(jobs, head_job, report_job) }}
|
|
73
|
+
|
|
74
|
+
<style>
|
|
75
|
+
.listitem {
|
|
76
|
+
font-size: large;
|
|
77
|
+
font-weight: bold;
|
|
78
|
+
margin: 1rem 0 0.5rem 0;
|
|
79
|
+
display: inline-block;
|
|
80
|
+
}
|
|
81
|
+
</style>
|
|
@@ -26,15 +26,8 @@ if (is.null(celltypist_args$model)) {
|
|
|
26
26
|
}
|
|
27
27
|
dir.create(file.path(outdir, "data", "models"), recursive = TRUE, showWarnings = FALSE)
|
|
28
28
|
modelfile <- file.path(outdir, "data", "models", basename(celltypist_args$model))
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
} else {
|
|
32
|
-
real_modelfile <- normalizePath(Sys.readlink(modelfile))
|
|
33
|
-
if (real_modelfile != normalizePath(celltypist_args$model)) {
|
|
34
|
-
file.remove(modelfile)
|
|
35
|
-
file.symlink(celltypist_args$model, modelfile)
|
|
36
|
-
}
|
|
37
|
-
}
|
|
29
|
+
suppressWarnings(file.remove(modelfile))
|
|
30
|
+
file.symlink(normalizePath(celltypist_args$model), modelfile)
|
|
38
31
|
|
|
39
32
|
sobj <- NULL
|
|
40
33
|
if (!endsWith(sobjfile, ".h5ad")) {
|
|
@@ -43,7 +36,7 @@ if (!endsWith(sobjfile, ".h5ad")) {
|
|
|
43
36
|
# find the default ident name in meta.data
|
|
44
37
|
for (col in colnames(sobj@meta.data)) {
|
|
45
38
|
if (!is.factor(sobj@meta.data[[col]])) { next }
|
|
46
|
-
if (isTRUE(all.equal(Idents(sobj), sobj@meta.data[[col]]))) {
|
|
39
|
+
if (isTRUE(all.equal(unname(Idents(sobj)), sobj@meta.data[[col]]))) {
|
|
47
40
|
celltypist_args$over_clustering <- col
|
|
48
41
|
break
|
|
49
42
|
}
|
|
@@ -26,6 +26,22 @@ if (
|
|
|
26
26
|
if (length(clustrees) == 0) {
|
|
27
27
|
log$warn("- no case found, skipping ...")
|
|
28
28
|
} else {
|
|
29
|
+
reporter$add(
|
|
30
|
+
list(
|
|
31
|
+
kind = "descr",
|
|
32
|
+
content = 'The clustree plots displays clustering results from the Seurat object across different
|
|
33
|
+
resolutions of the clustering algorithm
|
|
34
|
+
(<a target="_blank" href="https://satijalab.org/seurat/reference/findclusters">Seurat::FindClusters</a>).
|
|
35
|
+
Each node represents a cluster, with the resolution levels labeled along the vertical (y) axis.
|
|
36
|
+
The size of each node reflects the number of cells in that cluster. Edges connect clusters between
|
|
37
|
+
adjacent resolutions and indicate how cells transition between clusters as resolution increases.
|
|
38
|
+
The thickness of the edges corresponds to the proportion of shared cells (in_prop) between clusters,
|
|
39
|
+
where darker lines signify a higher overlap (up to 100%). The color of the edges indicates the actual
|
|
40
|
+
number of cells that transitioned between clusters.'
|
|
41
|
+
),
|
|
42
|
+
h1 = "Clustree plots"
|
|
43
|
+
)
|
|
44
|
+
|
|
29
45
|
reports <- list()
|
|
30
46
|
for (name in names(clustrees)) {
|
|
31
47
|
if (is.null(clustrees[[name]]$prefix)) {
|
|
@@ -40,7 +40,7 @@ do_one_dimplot = function(name) {
|
|
|
40
40
|
reporter$add(
|
|
41
41
|
list(
|
|
42
42
|
kind = "descr",
|
|
43
|
-
content = paste0("Dimensionality reduction plot for ", case$
|
|
43
|
+
content = paste0("Dimensionality reduction plot for ", case$group_by)
|
|
44
44
|
),
|
|
45
45
|
reporter$image(prefix, "pdf", FALSE),
|
|
46
46
|
h1 = name
|
|
@@ -64,11 +64,11 @@ do_one_features <- function(name) {
|
|
|
64
64
|
log$info("- Case: {name}")
|
|
65
65
|
|
|
66
66
|
case <- list_update(features_defaults, features[[name]])
|
|
67
|
-
case$descr <- case$descr %||% ""
|
|
68
67
|
case <- extract_vars(
|
|
69
68
|
case,
|
|
70
69
|
"devpars", "more_formats", "save_code", "save_data", "order_by",
|
|
71
|
-
"subset", "features", "descr"
|
|
70
|
+
"subset", "features", "descr",
|
|
71
|
+
allow_nonexisting = TRUE)
|
|
72
72
|
|
|
73
73
|
if (!is.null(subset)) {
|
|
74
74
|
case$object <- srtobj %>% filter(!!parse_expr(subset))
|
|
@@ -77,6 +77,7 @@ do_one_features <- function(name) {
|
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
if (exists("order_by") && !is.null(order_by)) {
|
|
80
|
+
case$ident <- case$ident %||% GetIdentityColumn(case$object)
|
|
80
81
|
if (length(order_by) < 2) {
|
|
81
82
|
clusters <- case$object@meta.data %>%
|
|
82
83
|
group_by(!!sym(case$ident)) %>%
|
|
@@ -126,12 +127,34 @@ do_one_features <- function(name) {
|
|
|
126
127
|
caching$save(info$prefix)
|
|
127
128
|
}
|
|
128
129
|
# add reports
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
130
|
+
default_descr <- glue(
|
|
131
|
+
"The plot shows the distribution or pattern of the specified features ({paste(case$features %||% features, collapse = ', ')}) ",
|
|
132
|
+
"across cells",
|
|
133
|
+
"{if (!is.null(case$ident)) glue(', identified by \"{case$ident}\"') else ''}",
|
|
134
|
+
"{if (!is.null(case$group_by)) glue(', grouped by \"{case$group_by}\"') else ''}",
|
|
135
|
+
"{if (!is.null(case$split_by)) glue(', and split by \"{case$split_by}\"') else ''}. ",
|
|
136
|
+
"The plot type is '{case$plot_type}', ",
|
|
137
|
+
"{if (case$plot_type == 'dim') 'displaying the features on a dimensional reduction embedding' ",
|
|
138
|
+
" else if (case$plot_type == 'heatmap') 'arranged as a heatmap by rows_name and other grouping variables' ",
|
|
139
|
+
" else if (case$plot_type %in% c('violin', 'box', 'ridge')) 'showing the distribution of feature values by the grouping variables' ",
|
|
140
|
+
" else if (case$plot_type == 'cor') 'showing the correlation between features' ",
|
|
141
|
+
" else 'showing aggregated feature values by the grouping variables'}. ",
|
|
142
|
+
"{if (!is.null(case$facet_by)) glue('Plots are further faceted by \"{case$facet_by}\". ') else ''}",
|
|
143
|
+
"{if (case$plot_type == 'dim') glue('The reduction used is \"{if (!is.null(case$reduction)) case$reduction else DefaultDimReduc(case$object)}\"') else ''}",
|
|
144
|
+
"{if (case$plot_type == 'dim' && !is.null(case$graph)) glue(', with graph \"{case$graph}\" drawn to show cell neighbor edges') else ''}",
|
|
145
|
+
"{if (case$plot_type == 'dim' && !is.null(case$bg_cutoff) && case$bg_cutoff > 0) glue(', and a background cutoff of {case$bg_cutoff}') else ''}",
|
|
146
|
+
"{if (case$plot_type == 'dim') glue(', using dimensions {paste(case$dims %||% 1:2, collapse = \",\")}') else ''}"
|
|
147
|
+
)
|
|
148
|
+
if (!is.null(case$comparisons)) {
|
|
149
|
+
default_descr <- paste0(
|
|
150
|
+
default_descr,
|
|
151
|
+
"Statistical comparisons were performed between groups using '{case$pairwise_method %||% 'wilcox.test'}' method."
|
|
133
152
|
)
|
|
134
153
|
}
|
|
154
|
+
reporter$add2(
|
|
155
|
+
list(kind = "descr", content = descr %||% default_descr),
|
|
156
|
+
hs = c(info$section, info$name)
|
|
157
|
+
)
|
|
135
158
|
|
|
136
159
|
if (save_data) {
|
|
137
160
|
reporter$add2(
|
|
@@ -5,17 +5,26 @@ log$info("stats:")
|
|
|
5
5
|
odir <- file.path(outdir, "stats")
|
|
6
6
|
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
7
7
|
|
|
8
|
+
|
|
9
|
+
|
|
8
10
|
do_one_stats <- function(name) {
|
|
9
11
|
log$info("- Case: {name}")
|
|
10
12
|
|
|
11
13
|
case <- list_update(stats_defaults, stats[[name]])
|
|
12
|
-
extract_vars(case, "devpars", "more_formats", "save_code", "save_data", "subset")
|
|
14
|
+
case <- extract_vars(case, "devpars", "more_formats", "save_code", "save_data", "subset", "descr")
|
|
13
15
|
|
|
14
16
|
if (!is.null(subset)) {
|
|
15
17
|
case$object <- srtobj %>% filter(!!parse_expr(subset))
|
|
16
18
|
} else {
|
|
17
19
|
case$object <- srtobj
|
|
18
20
|
}
|
|
21
|
+
ident <- case$ident %||% GetIdentityColumn(case$object)
|
|
22
|
+
groupings <- unique(c(case$group_by, case$rows_by, case$columns_by, case$pie_group_by, ident))
|
|
23
|
+
if (length(groupings) > 0) {
|
|
24
|
+
for (g in groupings) {
|
|
25
|
+
case$object <- filter(case$object, !is.na(!!sym(g)))
|
|
26
|
+
}
|
|
27
|
+
}
|
|
19
28
|
|
|
20
29
|
info <- case_info(name, odir, is_dir = FALSE, create = TRUE)
|
|
21
30
|
p <- do_call(gglogger::register(CellStatPlot), case)
|
|
@@ -27,6 +36,20 @@ do_one_stats <- function(name) {
|
|
|
27
36
|
auto_data_setup = FALSE)
|
|
28
37
|
}
|
|
29
38
|
|
|
39
|
+
frac <- case$frac %||% "none"
|
|
40
|
+
default_descr <- glue(
|
|
41
|
+
"The {case$plot_type} plot shows the distribution of cells across categories defined by '{ident}'",
|
|
42
|
+
"{if (!is.null(case$group_by)) glue(', grouped by {case$group_by}') else ''}",
|
|
43
|
+
"{if (!is.null(case$split_by)) glue(', and split by {case$split_by}') else ''}. ",
|
|
44
|
+
"The values represent ",
|
|
45
|
+
"{if (frac == 'none') 'the number of cells' else glue('the fraction of cells calculated by \"{frac}\"')}. "
|
|
46
|
+
)
|
|
47
|
+
if (!is.null(case$comparisons)) {
|
|
48
|
+
default_descr <- paste0(
|
|
49
|
+
default_descr,
|
|
50
|
+
"Statistical comparisons were performed between groups using '{case$pairwise_method %||% 'wilcox.test'}' method."
|
|
51
|
+
)
|
|
52
|
+
}
|
|
30
53
|
if (save_data) {
|
|
31
54
|
pdata <- attr(p, "data") %||% p$data
|
|
32
55
|
if (!inherits(pdata, "data.frame") && !inherits(pdata, "matrix")) {
|
|
@@ -37,6 +60,10 @@ do_one_stats <- function(name) {
|
|
|
37
60
|
list(
|
|
38
61
|
name = "Plot",
|
|
39
62
|
contents = list(
|
|
63
|
+
list(
|
|
64
|
+
kind = "descr",
|
|
65
|
+
content = case$descr %||% default_descr
|
|
66
|
+
),
|
|
40
67
|
reporter$image(
|
|
41
68
|
info$prefix, more_formats, save_code, kind = "image")
|
|
42
69
|
)
|
|
@@ -60,6 +87,7 @@ do_one_stats <- function(name) {
|
|
|
60
87
|
)
|
|
61
88
|
} else {
|
|
62
89
|
reporter$add2(
|
|
90
|
+
list(kind = "descr", content = case$descr %||% default_descr),
|
|
63
91
|
reporter$image(info$prefix, more_formats, save_code, kind = "image"),
|
|
64
92
|
hs = c(info$section, info$name)
|
|
65
93
|
)
|
|
@@ -29,6 +29,8 @@ if __name__ == "__main__":
|
|
|
29
29
|
raise ValueError(
|
|
30
30
|
f"Over clustering column '{over_clustering}' not found in AnnData object."
|
|
31
31
|
)
|
|
32
|
+
if 'neighbors' in adata.uns and 'params' in adata.uns['neighbors']:
|
|
33
|
+
adata.uns['neighbors']['params'].setdefault('n_neighbors', 15)
|
|
32
34
|
|
|
33
35
|
annotated = celltypist.annotate(
|
|
34
36
|
adata,
|
|
@@ -98,7 +98,13 @@ do_comparison <- function(object, caseinfo, subset_by, subset_val, group_by, gro
|
|
|
98
98
|
}
|
|
99
99
|
|
|
100
100
|
classes <- as.character(object@meta.data[[group_by]])
|
|
101
|
-
|
|
101
|
+
if (!group1 %in% classes) {
|
|
102
|
+
stop("Group '", group1, "' not found in '", group_by, "' column of the Seurat object.")
|
|
103
|
+
}
|
|
104
|
+
if (!is.null(group2) && !group2 %in% classes) {
|
|
105
|
+
stop("Group '", group2, "' not found in '", group_by, "' column of the Seurat object.")
|
|
106
|
+
}
|
|
107
|
+
classes[classes != group1] <- "Other"
|
|
102
108
|
if (any(table(classes) < 5)) {
|
|
103
109
|
msg <- paste0(
|
|
104
110
|
" ! skipped. Group has less than 5 cells: ",
|
|
@@ -266,8 +272,8 @@ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, compari
|
|
|
266
272
|
rbind, lapply(
|
|
267
273
|
as.character(comparisons),
|
|
268
274
|
function(comparison) {
|
|
269
|
-
if (grepl("
|
|
270
|
-
group1 <- trimws(unlist(strsplit(comparison, "
|
|
275
|
+
if (grepl(":", comparison)) {
|
|
276
|
+
group1 <- trimws(unlist(strsplit(comparison, ":")))
|
|
271
277
|
group2 <- group1[2]
|
|
272
278
|
group1 <- group1[1]
|
|
273
279
|
} else {
|
|
@@ -315,8 +315,8 @@ do_subset <- function(
|
|
|
315
315
|
plotargs$keep_empty <- TRUE
|
|
316
316
|
|
|
317
317
|
p <- do_call(plotfn, plotargs)
|
|
318
|
-
devpars$width <- devpars$width %||% (attr(p, "width") * devpars$res) %||% 1000
|
|
319
|
-
devpars$height <- devpars$height %||% (attr(p, "height") * devpars$res) %||% 1000
|
|
318
|
+
devpars$width <- devpars$width %||% (attr(p, "width") * 2 * devpars$res) %||% 1000
|
|
319
|
+
devpars$height <- devpars$height %||% (attr(p, "height") * 2 * devpars$res) %||% 1000
|
|
320
320
|
} else { # heatmap
|
|
321
321
|
minval <- min(dat)
|
|
322
322
|
maxval <- max(dat)
|
|
@@ -195,6 +195,7 @@ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, plots,
|
|
|
195
195
|
plotprefix <- file.path(odir, slugify(plot))
|
|
196
196
|
plotargs$devpars$width <- plotargs$devpars$width %||% (attr(p, "width") * plotargs$devpars$res) %||% 800
|
|
197
197
|
plotargs$devpars$height <- plotargs$devpars$height %||% (attr(p, "height") * plotargs$devpars$res) %||% 600
|
|
198
|
+
plotargs$devpars$height <- max(plotargs$devpars$height, plotargs$devpars$width / 1.5)
|
|
198
199
|
png(
|
|
199
200
|
filename = paste0(plotprefix, ".png"),
|
|
200
201
|
width = plotargs$devpars$width,
|
|
@@ -36,9 +36,6 @@ from sklearn.manifold import MDS
|
|
|
36
36
|
import faiss
|
|
37
37
|
from query import *
|
|
38
38
|
try:
|
|
39
|
-
from Bio.SubsMat.MatrixInfo import blosum62
|
|
40
|
-
print(blosum62)
|
|
41
|
-
except ModuleNotFoundError:
|
|
42
39
|
from Bio.Align import substitution_matrices
|
|
43
40
|
blosum62 = substitution_matrices.load("BLOSUM62")
|
|
44
41
|
_tmp = {}
|
|
@@ -46,7 +43,8 @@ except ModuleNotFoundError:
|
|
|
46
43
|
for ab2 in blosum62.alphabet:
|
|
47
44
|
_tmp[(ab1, ab2)] = int(blosum62[(ab1, ab2)])
|
|
48
45
|
blosum62 = _tmp
|
|
49
|
-
|
|
46
|
+
except ModuleNotFoundError:
|
|
47
|
+
from Bio.SubsMat.MatrixInfo import blosum62
|
|
50
48
|
|
|
51
49
|
AAstring = "ACDEFGHIKLMNPQRSTVWY"
|
|
52
50
|
AAstringList = list(AAstring)
|
|
@@ -118,8 +118,13 @@ load_contig <- function(input, sample, fmt) {
|
|
|
118
118
|
fmt <- dirfmt[[2]]
|
|
119
119
|
if (is.null(dir)) { return(NULL) }
|
|
120
120
|
x <- loadContigs(dir, format = fmt %||% "10X")
|
|
121
|
-
x[[1]]
|
|
122
|
-
x
|
|
121
|
+
x <- x[[1]]
|
|
122
|
+
x$sample <- NULL
|
|
123
|
+
if (identical(fmt %||% "10X", "10X") && colnames(x)[1] == "X") {
|
|
124
|
+
x$X <- NULL
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
x
|
|
123
128
|
}
|
|
124
129
|
|
|
125
130
|
|
|
@@ -130,11 +130,10 @@ output.clusters_df.to_csv(clustcr_dir + "/clusters.txt", sep="\t", index=False)
|
|
|
130
130
|
clustcr_file
|
|
131
131
|
}
|
|
132
132
|
|
|
133
|
-
clean_clustcr_output = function(clustcr_outfile
|
|
133
|
+
clean_clustcr_output = function(clustcr_outfile) {
|
|
134
134
|
clustcr_out = read.delim2(clustcr_outfile, header=TRUE, row.names = NULL)
|
|
135
135
|
colnames(clustcr_out) = c("CDR3.aa", "TCR_Cluster")
|
|
136
|
-
|
|
137
|
-
out = left_join(in_cdr3, distinct(clustcr_out), by=c("CDR3.aa")) %>%
|
|
136
|
+
out = left_join(cdr3aa_df, distinct(clustcr_out), by=c(cdr3seq4clustering = "CDR3.aa")) %>%
|
|
138
137
|
mutate(
|
|
139
138
|
TCR_Cluster = if_else(
|
|
140
139
|
is.na(TCR_Cluster),
|
|
@@ -170,7 +169,7 @@ run_clustcr = function() {
|
|
|
170
169
|
quit(status=rc)
|
|
171
170
|
}
|
|
172
171
|
clustcr_outfile = file.path(clustcr_dir, "clusters.txt")
|
|
173
|
-
clean_clustcr_output(clustcr_outfile
|
|
172
|
+
clean_clustcr_output(clustcr_outfile)
|
|
174
173
|
}
|
|
175
174
|
|
|
176
175
|
prepare_giana = function() {
|
|
@@ -193,21 +192,8 @@ prepare_giana = function() {
|
|
|
193
192
|
}
|
|
194
193
|
|
|
195
194
|
prepare_input = function() {
|
|
196
|
-
#
|
|
197
|
-
cdr3
|
|
198
|
-
# cdr3col = if (!on_multi) "cdr3" else "CDR3.aa"
|
|
199
|
-
cdr3col = "CDR3.aa"
|
|
200
|
-
for (sample in names(seqdata)) {
|
|
201
|
-
sdata = seqdata[[sample]]
|
|
202
|
-
if (on_multi) {
|
|
203
|
-
sdata[[cdr3col]] = sub(";", "", sdata[[cdr3col]])
|
|
204
|
-
} else if ("chain" %in% colnames(sdata)) {
|
|
205
|
-
sdata = sdata %>% separate_rows(chain, cdr3col, sep = ";") %>%
|
|
206
|
-
filter(chain == "TRB")
|
|
207
|
-
}
|
|
208
|
-
cdr3 = union(cdr3, unique(sdata[[cdr3col]]))
|
|
209
|
-
}
|
|
210
|
-
cdr3 = unique(cdr3)
|
|
195
|
+
cdr3aa_df$cdr3seq4clustering <<- gsub("[^A-Z]", "", cdr3aa_df$CDR3.aa) # Remove non-amino acid characters
|
|
196
|
+
cdr3 <- unique(cdr3aa_df$cdr3seq4clustering)
|
|
211
197
|
|
|
212
198
|
# cdr3 = distinct(cdr3, aminoAcid, vMaxResolved)
|
|
213
199
|
|
|
@@ -220,15 +206,14 @@ prepare_input = function() {
|
|
|
220
206
|
cdr3file
|
|
221
207
|
}
|
|
222
208
|
|
|
223
|
-
clean_giana_output = function(giana_outfile
|
|
209
|
+
clean_giana_output = function(giana_outfile) {
|
|
224
210
|
# generate an output file with columns:
|
|
225
211
|
# CDR3.aa, TCR_Cluster, V.name, Sample
|
|
226
212
|
# If sequence doesn't exist in the input file,
|
|
227
213
|
# Then a unique cluster id is assigned to it.
|
|
228
214
|
giana_out = read.delim2(giana_outfile, header=FALSE, comment.char = "#", row.names = NULL)[, 1:2, drop=FALSE]
|
|
229
215
|
colnames(giana_out) = c("CDR3.aa", "TCR_Cluster")
|
|
230
|
-
|
|
231
|
-
out = left_join(in_cdr3, distinct(giana_out), by=c("CDR3.aa")) %>%
|
|
216
|
+
out = left_join(cdr3aa_df, distinct(giana_out), by=c(cdr3seq4clustering = "CDR3.aa")) %>%
|
|
232
217
|
mutate(
|
|
233
218
|
TCR_Cluster = if_else(
|
|
234
219
|
is.na(TCR_Cluster),
|
|
@@ -283,10 +268,11 @@ run_giana = function() {
|
|
|
283
268
|
quit(status=rc)
|
|
284
269
|
}
|
|
285
270
|
giana_outfile = file.path(giana_outdir, "cdr3--RotationEncodingBL62.txt")
|
|
286
|
-
clean_giana_output(giana_outfile
|
|
271
|
+
clean_giana_output(giana_outfile)
|
|
287
272
|
}
|
|
288
273
|
|
|
289
274
|
attach_to_obj = function(obj, out) {
|
|
275
|
+
out <- as.data.frame(out)
|
|
290
276
|
rownames(out) <- out$Barcode
|
|
291
277
|
if (is_seurat) {
|
|
292
278
|
# Attach results to Seurat object
|
biopipen/scripts/tcr/TESSA.R
CHANGED
|
@@ -39,9 +39,11 @@ log$info("Preparing TCR input file ...")
|
|
|
39
39
|
# If immfile endswith .rds, then it is an immunarch object
|
|
40
40
|
tcrdata <- sobj@meta.data %>%
|
|
41
41
|
rownames_to_column("contig_id") %>%
|
|
42
|
+
select(contig_id, CTaa, CTgene, sample = Sample) %>%
|
|
42
43
|
filter(!is.na(CTaa) & !is.na(CTgene)) %>%
|
|
43
|
-
separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove =
|
|
44
|
-
|
|
44
|
+
separate(CTaa, into = c(NA, "cdr3"), sep = "_", remove = TRUE) %>%
|
|
45
|
+
filter(!is.na(cdr3) & cdr3 != "NA" & cdr3 != "nan") %>%
|
|
46
|
+
separate(CTgene, into = c(NA, "vjgene"), sep = "_", remove = TRUE) %>%
|
|
45
47
|
separate(vjgene, into = c("v_gene", NA, "j_gene", NA), sep = "\\.", remove = TRUE) %>%
|
|
46
48
|
mutate(v_gene = sub("-\\d+$", "", v_gene), j_gene = sub("-\\d+$", "", j_gene))
|
|
47
49
|
|