biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +307 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +14 -2
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
{% from "utils/misc.liq" import report_jobs -%}
|
|
2
|
-
|
|
3
2
|
<script>
|
|
4
|
-
import { Image, Descr } from "$libs";
|
|
5
|
-
import {
|
|
3
|
+
import { Image, DataTable, Descr, Math } from "$libs";
|
|
4
|
+
import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification, Tile } from "$ccs";
|
|
6
5
|
</script>
|
|
7
6
|
|
|
8
7
|
<h1>Introduction</h1>
|
|
@@ -12,7 +11,7 @@
|
|
|
12
11
|
</Descr>
|
|
13
12
|
|
|
14
13
|
<h2>Workflow of the original analysis</h2>
|
|
15
|
-
<Image src="https://
|
|
14
|
+
<Image src="https://raw.githubusercontent.com/LocasaleLab/Single-Cell-Metabolic-Landscape/master/pipeline.png" />
|
|
16
15
|
|
|
17
16
|
<h2>Reference</h2>
|
|
18
17
|
<UnorderedList>
|
|
@@ -29,61 +28,66 @@
|
|
|
29
28
|
<h2>Analyses with this pipeline</h2>
|
|
30
29
|
|
|
31
30
|
<Descr>
|
|
32
|
-
The cells are grouped at 2 dimensions: `
|
|
33
|
-
|
|
31
|
+
The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups that bring biological meaning
|
|
32
|
+
(i.e. different timepoints or sample types (tumor/normal)), and `group_by`, usually the cell types.
|
|
34
33
|
</Descr>
|
|
35
34
|
|
|
36
35
|
<UnorderedList>
|
|
37
36
|
<ListItem>
|
|
38
37
|
MetabolicPathwayActivity (this page)
|
|
39
|
-
<
|
|
40
|
-
|
|
38
|
+
<Tile>
|
|
39
|
+
<p>Investigating the metabolic pathways of the cells in different subsets and groups.</p>
|
|
40
|
+
<p>The cells are first subset by subsets and then the metabolic activities are examined for each groups in different subsets.</p>
|
|
41
|
+
<p> </p>
|
|
42
|
+
<p>A pathway activity score defined as the relative gene expression value averaged over all genes in the pathway and all cells of the group.</p>
|
|
43
|
+
<p> </p>
|
|
44
|
+
<p>For the i-th metabolic gene, we first calculated its mean expression level across cells of the j-th cell group:
|
|
45
|
+
<Math displayMode>E_{i,j} = \frac{ {\mathop {\sum }\nolimits_{k = 1}^{n_j} g_{i,k}}}{ {n_j}},\,i \in 1 \ldots M,j \in 1 \ldots N</Math>
|
|
46
|
+
<p>
|
|
47
|
+
In which n<sub>j</sub> is the number of cells in the j-th cell group, g<sub>i,k</sub> is the expression level of the i-th gene in the k-th cell in this cell group,
|
|
48
|
+
M is the number of metabolic genes, and N is the number of cell groups. The relative expression level of the i-th gene in the j-th cell group was then
|
|
49
|
+
defined as the ratio of E<sub>i,j</sub> to its average over all cell groups:
|
|
50
|
+
</p>
|
|
51
|
+
<Math displayMode>r_{i,j} = \frac{ {E_{i,j}}}{ {\frac{1}{N}\mathop {\sum }\nolimits_j^N E_{i,j}}}</Math>
|
|
52
|
+
<p>
|
|
53
|
+
Here r<sub>i,j</sub> quantifies the relative expression level of gene i in cell group j comparing to the average expression level of this gene in all cell groups.
|
|
54
|
+
A r<sub>i,j</sub> value >1 means that expression level of gene i is higher in cell group j compared to its average expression level over all cell groups.
|
|
55
|
+
The pathway activity score for the t-th pathway and the j-th cell group was then defined as the weighted average of r<sub>i,j</sub> over all genes included
|
|
56
|
+
in this pathway:
|
|
57
|
+
</p>
|
|
58
|
+
<Math displayMode>p_{t,j} = \frac{ {\mathop {\sum }\nolimits_{i = 1}^{m_t} w_i \times r_{i,j}}}{ {\mathop {\sum }\nolimits_{i = 1}^{m_t} w_i}}</Math>
|
|
59
|
+
<p>Where p<sub>t,j</sub> represents the activity of the t-th pathway in the j-th cell group, m<sub>t</sub> is the number of genes in the pathway t, w<sub>i</sub>
|
|
60
|
+
is the weighting factor equal to the reciprocal of number of pathways that include the i-th gene.
|
|
61
|
+
To avoid the possibility that pathway activity scores were affected by genes with low expression level or high drop-out rates,
|
|
62
|
+
we excluded the outliers in each pathway defined by genes with relative expression levels greater than three times 75th percentile or below 1/3 times 25th percentile.
|
|
63
|
+
Statistical significance of higher or lower pathway activity in a specific cell group was then evaluated by a random permutation test,
|
|
64
|
+
in which the cell group labels were randomly shuffled for 5000 (for the scRNA datasets) to simulate a null distribution of the pathway activity scores
|
|
65
|
+
and compare to the pathway activity scores in the original, non-shuffled dataset.
|
|
66
|
+
For the pathway activity score p<sub>t,j</sub>, we then calculated a p-value defined as the fraction of random pathway activity scores larger than pt,j
|
|
67
|
+
(if p<sub>t,j</sub> is >1) or smaller than p<sub>t,j</sub> (if p<sub>t,j</sub> is <1) to assess if activity of this pathway is significantly
|
|
68
|
+
higher or lower in this cell group than average.</p>
|
|
69
|
+
</Tile>
|
|
41
70
|
</ListItem>
|
|
42
71
|
<ListItem>
|
|
43
72
|
<a href="../MetabolicPathwayHeterogeneity/index.html">MetabolicPathwayHeterogeneity</a>
|
|
44
|
-
<
|
|
73
|
+
<Tile>
|
|
74
|
+
<p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
|
|
75
|
+
</Tile>
|
|
45
76
|
</ListItem>
|
|
46
77
|
<ListItem>
|
|
47
78
|
<a href="../MetabolicFeatures/index.html">MetabolicFeatures</a>
|
|
48
|
-
<
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
<a href="../MetabolicFeaturesIntraSubsets/index.html">MetabolicFeaturesIntraSubsets</a>
|
|
52
|
-
<p>Gene set enrichment analysis against the metabolic pathways for subsets in different groups.</p>
|
|
79
|
+
<Tile>
|
|
80
|
+
<p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
|
|
81
|
+
</Tile>
|
|
53
82
|
</ListItem>
|
|
54
83
|
</UnorderedList>
|
|
55
84
|
|
|
56
|
-
|
|
57
|
-
{
|
|
58
|
-
{%- for ssdir in job.out.outdir | glob: "*" -%}
|
|
59
|
-
{%- if not isdir(ssdir) -%}
|
|
60
|
-
{%- continue -%}
|
|
61
|
-
{%- endif -%}
|
|
62
|
-
<h{{h}}>{{ ssdir | stem }}</h{{h}}>
|
|
63
|
-
|
|
64
|
-
<h{{ h+1 }}>Metabolic pathway activities by <code>{{envs.grouping}}</code></h{{ h+1 }}>
|
|
65
|
-
<Image src="{{ssdir | joinpaths: 'KEGGpathway_activity_heatmap.png'}}" />
|
|
66
|
-
|
|
67
|
-
<h{{ h+1 }}>Distributions of pathway activities by <code>{{envs.grouping}}</code></h{{ h+1 }}>
|
|
68
|
-
<Image src="{{ssdir | joinpaths: 'pathway_activity_violinplot.png'}}" />
|
|
69
|
-
{%- endfor -%}
|
|
70
|
-
|
|
71
|
-
{% if job.out.outdir | glob: "*.group-*.png" -%}
|
|
72
|
-
<h{{h}}>Merged heatmaps</h{{h}}>
|
|
73
|
-
{% for group_hm in job.out.outdir | glob: "*.group-*.png" -%}
|
|
74
|
-
{%- if group_hm.endswith(".group-unclustered.png") -%}
|
|
75
|
-
<h{{h+1}}>{{group_hm | stem | replace: ".group-unclustered", " (Group Unclustered)"}}</h{{h+1}}>
|
|
76
|
-
<Image src="{{group_hm}}" />
|
|
77
|
-
{%- else -%}
|
|
78
|
-
<h{{h+1}}>{{group_hm | stem | replace: ".group-clustered", " (Group Clustered)"}}</h{{h+1}}>
|
|
79
|
-
<Image src="{{group_hm}}" />
|
|
80
|
-
{%- endif -%}
|
|
81
|
-
{%- endfor -%}
|
|
82
|
-
{%- endif -%}
|
|
85
|
+
{%- macro report_job(job, h=1) -%}
|
|
86
|
+
{{ job | render_job: h=h }}
|
|
83
87
|
{%- endmacro -%}
|
|
84
88
|
|
|
85
89
|
{%- macro head_job(job) -%}
|
|
86
|
-
|
|
90
|
+
<h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
|
|
87
91
|
{%- endmacro -%}
|
|
88
92
|
|
|
89
93
|
{{ report_jobs(jobs, head_job, report_job) }}
|
|
@@ -1,15 +1,72 @@
|
|
|
1
|
-
{% from "utils/misc.liq" import report_jobs
|
|
2
|
-
|
|
1
|
+
{% from "utils/misc.liq" import report_jobs -%}
|
|
3
2
|
<script>
|
|
4
|
-
import { Image, Descr } from "$libs";
|
|
3
|
+
import { Image, DataTable, Descr } from "$libs";
|
|
4
|
+
import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification, Tile } from "$ccs";
|
|
5
5
|
</script>
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
<h1>Introduction</h1>
|
|
8
|
+
|
|
9
|
+
<Descr>
|
|
10
|
+
Metabolic landscape of single cells in the tumor microenvironment.
|
|
11
|
+
</Descr>
|
|
12
|
+
|
|
13
|
+
<h2>Workflow of the original analysis</h2>
|
|
14
|
+
<Image src="https://raw.githubusercontent.com/LocasaleLab/Single-Cell-Metabolic-Landscape/master/pipeline.png" />
|
|
15
|
+
|
|
16
|
+
<h2>Reference</h2>
|
|
17
|
+
<UnorderedList>
|
|
18
|
+
<ListItem><a href="https://www.nature.com/articles/s41467-019-11738-0" target="_blank">
|
|
19
|
+
Zhengtao, Ziwei Dai, and Jason W. Locasale.
|
|
20
|
+
"Metabolic landscape of the tumor microenvironment at single cell resolution."
|
|
21
|
+
Nature communications 10.1 (2019): 1-12.
|
|
22
|
+
</a></ListItem>
|
|
23
|
+
<ListItem><a href="https://github.com/LocasaleLab/Single-Cell-Metabolic-Landscape" target="_blank">
|
|
24
|
+
Orginal pipeline
|
|
25
|
+
</a></ListItem>
|
|
26
|
+
</UnorderedList>
|
|
27
|
+
|
|
28
|
+
<h2>Analyses with this pipeline</h2>
|
|
29
|
+
|
|
30
|
+
<Descr>
|
|
31
|
+
The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups that bring biological meaning
|
|
32
|
+
(i.e. different timepoints or sample types (tumor/normal)), and `group_by`, usually the cell types.
|
|
33
|
+
</Descr>
|
|
34
|
+
|
|
35
|
+
<UnorderedList>
|
|
36
|
+
<ListItem>
|
|
37
|
+
<a href="../MetabolicPathwayActivity/index.html">MetabolicPathwayActivity</a>
|
|
38
|
+
<Tile>
|
|
39
|
+
<p>Investigating the metabolic pathways of the cells in different subsets and groups.</p>
|
|
40
|
+
</Tile>
|
|
41
|
+
</ListItem>
|
|
42
|
+
<ListItem>
|
|
43
|
+
MetabolicPathwayHeterogeneity (this page)
|
|
44
|
+
<Tile>
|
|
45
|
+
<p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
|
|
46
|
+
<p>
|
|
47
|
+
The PCA analysis was applied on normalized expression values.
|
|
48
|
+
The function prcomp in R was used to perform the PCA analysis.
|
|
49
|
+
For each metabolic gene, we computed its PCA score defined as the sum of absolute values of the loadings of this gene in the top PCs
|
|
50
|
+
that in total account for certain variance to measure variability of gene expression across cells.
|
|
51
|
+
We then sorted the PCA scores of the genes in descending order and applied GSEA analysis to the ranked list of genes to identify metabolic pathways
|
|
52
|
+
enriched in genes with highest variability.
|
|
53
|
+
</p>
|
|
54
|
+
</Tile>
|
|
55
|
+
</ListItem>
|
|
56
|
+
<ListItem>
|
|
57
|
+
<a href="../MetabolicFeatures/index.html">MetabolicFeatures</a>
|
|
58
|
+
<Tile>
|
|
59
|
+
<p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
|
|
60
|
+
</Tile>
|
|
61
|
+
</ListItem>
|
|
62
|
+
</UnorderedList>
|
|
63
|
+
|
|
64
|
+
{%- macro report_job(job, h=1) -%}
|
|
65
|
+
{{ job | render_job: h=h }}
|
|
9
66
|
{%- endmacro -%}
|
|
10
67
|
|
|
11
68
|
{%- macro head_job(job) -%}
|
|
12
|
-
|
|
69
|
+
<h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
|
|
13
70
|
{%- endmacro -%}
|
|
14
71
|
|
|
15
72
|
{{ report_jobs(jobs, head_job, report_job) }}
|
|
@@ -5,13 +5,13 @@
|
|
|
5
5
|
|
|
6
6
|
{%- macro report_job(job, h=1) -%}
|
|
7
7
|
<h{{h+1}}>Sample Call Rate</h{{h+1}}>
|
|
8
|
-
{%- for pngfile in job.out.outdir |
|
|
8
|
+
{%- for pngfile in job.out.outdir | glob: '*.samplecr.png' -%}
|
|
9
9
|
<Descr>Cutoff: {{envs.samplecr}}</Descr>
|
|
10
10
|
<Image src="{{pngfile}}" />
|
|
11
11
|
{%- endfor -%}
|
|
12
12
|
|
|
13
13
|
<h{{h+1}}>Variant Call Rate</h{{h+1}}>
|
|
14
|
-
{%- for pngfile in job.out.outdir |
|
|
14
|
+
{%- for pngfile in job.out.outdir | glob: '*.varcr.png' -%}
|
|
15
15
|
<Descr>Cutoff: {{envs.varcr}}</Descr>
|
|
16
16
|
<Image src="{{pngfile}}" />
|
|
17
17
|
{%- endfor -%}
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
</script>
|
|
5
5
|
|
|
6
6
|
{%- macro report_job(job, h=1) -%}
|
|
7
|
-
{%- for pngfile in job.out.outdir |
|
|
7
|
+
{%- for pngfile in job.out.outdir | glob: '*.png' -%}
|
|
8
8
|
{% set metric_col = pngfile | stem | ext0 %}
|
|
9
9
|
<h{{h+1}}>{{metric_col}} distribution</h{{h+1}}>
|
|
10
10
|
<Image src="{{pngfile}}" />
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
</script>
|
|
5
5
|
|
|
6
6
|
{%- macro report_job(job, h=1) -%}
|
|
7
|
-
{%- for pngfile in job.out.outdir |
|
|
7
|
+
{%- for pngfile in job.out.outdir | glob: '*.png' -%}
|
|
8
8
|
<h{{h+1}}>Distribution</h{{h+1}}>
|
|
9
9
|
<Descr>Cutoff: {{envs.cutoff}}</Descr>
|
|
10
10
|
<Image src="{{pngfile}}" />
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
</script>
|
|
5
5
|
|
|
6
6
|
{%- macro report_job(job, h=1) -%}
|
|
7
|
-
{%- for pngfile in job.out.outdir |
|
|
7
|
+
{%- for pngfile in job.out.outdir | glob: '*.png' -%}
|
|
8
8
|
<h{{h+1}}>Distribution</h{{h+1}}>
|
|
9
9
|
<Descr>Cutoff: [mean - {{envs.cutoff}} x sd, mean + {{envs.cutoff}} x sd]</Descr>
|
|
10
10
|
<Image src="{{pngfile}}" />
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
</script>
|
|
5
5
|
|
|
6
6
|
{%- macro report_job(job, h=1) -%}
|
|
7
|
-
{%- for pngfile in job.out.outdir |
|
|
7
|
+
{%- for pngfile in job.out.outdir | glob: '*.png' -%}
|
|
8
8
|
<h{{h+1}}>Heatmap</h{{h+1}}>
|
|
9
9
|
<Descr>PI_HAT threshold = {{envs.pihat}}</Descr>
|
|
10
10
|
<Image src="{{pngfile}}" />
|
biopipen/scripts/bam/CNAClinic.R
CHANGED
|
@@ -1,14 +1,40 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(parallel)
|
|
4
2
|
library(dplyr)
|
|
3
|
+
library(biopipen.utils)
|
|
5
4
|
library(CNAclinic)
|
|
6
5
|
|
|
6
|
+
# https://github.com/sdchandra/CNAclinic/issues/4
|
|
7
|
+
.reorderByChrom.patched <- function(x){
|
|
8
|
+
chromosome <- as.character(x$chromosome)
|
|
9
|
+
chromosome[which(chromosome == "X")] <- "23"
|
|
10
|
+
chromosome[which(chromosome == "Y")] <- "24"
|
|
11
|
+
chromosome[which(chromosome == "MT")] <- "25"
|
|
12
|
+
|
|
13
|
+
x$chromosome <- as.numeric(chromosome)
|
|
14
|
+
# Error in xtfrm.data.frame(x) : cannot xtfrm data frames
|
|
15
|
+
# x <- x[order(x["chromosome"], x["start"]), ]
|
|
16
|
+
x <- x[order(x[, "chromosome"], x[, "start"]), ]
|
|
17
|
+
|
|
18
|
+
x$chromosome <- as.character(x$chromosome)
|
|
19
|
+
# Replace 23 by X:
|
|
20
|
+
x$chromosome[which(x$chromosome == "23")] <- "X"
|
|
21
|
+
|
|
22
|
+
# Replace 24 by Y
|
|
23
|
+
x$chromosome[which(x$chromosome == "24")] <- "Y"
|
|
24
|
+
|
|
25
|
+
# Replace 25 by MT
|
|
26
|
+
x$chromosome[which(x$chromosome == "25")] <- "MT"
|
|
27
|
+
|
|
28
|
+
return(x)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
monkey_patch("CNAclinic", ".reorderByChrom", .reorderByChrom.patched)
|
|
32
|
+
|
|
7
33
|
metafile = {{in.metafile | r}}
|
|
8
34
|
outdir = {{out.outdir | r}}
|
|
9
35
|
ncores = {{envs.ncores | int}}
|
|
10
36
|
binsizer = {{envs.binsizer | r}}
|
|
11
|
-
binsize = {{envs.binsize |
|
|
37
|
+
binsize = {{envs.binsize | r}}
|
|
12
38
|
seed = {{envs.seed | int}}
|
|
13
39
|
genome = {{envs.genome | r}}
|
|
14
40
|
run_args = {{envs.run_args | r}}
|
|
@@ -30,7 +56,11 @@ if (("Group" %in% metacols) && !("Patient" %in% metacols)) {
|
|
|
30
56
|
}
|
|
31
57
|
|
|
32
58
|
if (!("Binsizer" %in% metacols) && is.null(binsizer) && is.null(binsize)) {
|
|
33
|
-
stop(
|
|
59
|
+
stop(
|
|
60
|
+
"The metadata file must have a column named 'Binsizer' or ",
|
|
61
|
+
"the `envs.binsizer` must be specified when no `envs.binsize` is provided. ",
|
|
62
|
+
"The Binsizer column should indicate which samples are to be used for binsize selection."
|
|
63
|
+
)
|
|
34
64
|
}
|
|
35
65
|
|
|
36
66
|
# add missing columns
|
|
@@ -109,7 +139,7 @@ do_one_sample = function(i) {
|
|
|
109
139
|
bamfile,
|
|
110
140
|
sample,
|
|
111
141
|
refSamples=refSamples,
|
|
112
|
-
binSize=binsize
|
|
142
|
+
binSize=binsize / 1000
|
|
113
143
|
)
|
|
114
144
|
|
|
115
145
|
run_args_i = run_args
|
|
@@ -119,7 +149,12 @@ do_one_sample = function(i) {
|
|
|
119
149
|
|
|
120
150
|
plot_args_i = plot_args
|
|
121
151
|
plot_args_i$object = CNAData
|
|
122
|
-
genomewide_plot
|
|
152
|
+
genomewide_plot <- tryCatch({
|
|
153
|
+
do_call(plotSampleData, plot_args_i)
|
|
154
|
+
}, error = function(e) {
|
|
155
|
+
message("Error in plotting genomewide data for sample ", sample, ": ", e$message)
|
|
156
|
+
return(ggplot2::ggplot() + ggplot2::labs(title = paste("Error in plotting genomewide data for sample", sample)))
|
|
157
|
+
})
|
|
123
158
|
|
|
124
159
|
odir = file.path(outdir, sample)
|
|
125
160
|
dir.create(odir, recursive = TRUE, showWarnings = FALSE)
|
biopipen/scripts/bam/CNVpytor.py
CHANGED
|
@@ -3,6 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
import warnings
|
|
4
4
|
import pandas
|
|
5
5
|
from datetime import datetime
|
|
6
|
+
from diot import Diot # pyright: ignore
|
|
6
7
|
from biopipen.utils.reference import bam_index
|
|
7
8
|
from biopipen.utils.misc import run_command, dict_to_cli_args, logger
|
|
8
9
|
|
|
@@ -16,7 +17,7 @@ refdir = {{envs.refdir | quote}} # pyright: ignore
|
|
|
16
17
|
genome = {{envs.genome | quote}} # pyright: ignore
|
|
17
18
|
chrsize: str = {{envs.chrsize | quote}} # pyright: ignore
|
|
18
19
|
filters: dict = {{envs.filters | repr}} # pyright: ignore
|
|
19
|
-
args:
|
|
20
|
+
args: Diot = {{envs | repr}} # pyright: ignore
|
|
20
21
|
|
|
21
22
|
del args['cnvpytor']
|
|
22
23
|
del args['ncores']
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import glob
|
|
3
|
-
import rtoml
|
|
4
3
|
import shutil
|
|
5
|
-
from diot import Diot
|
|
4
|
+
from diot import Diot # type: ignore
|
|
6
5
|
from biopipen.utils.misc import dict_to_cli_args, run_command
|
|
7
6
|
|
|
8
7
|
bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
@@ -79,7 +78,7 @@ config.BAF |= Diot(
|
|
|
79
78
|
|
|
80
79
|
os.makedirs(f"{outdir}/FREEC-output", exist_ok=True)
|
|
81
80
|
|
|
82
|
-
config_ini =
|
|
81
|
+
config_ini = config.to_toml().replace('"', "") # type: ignore
|
|
83
82
|
|
|
84
83
|
with open(configfile, "w") as fconf:
|
|
85
84
|
fconf.write(config_ini)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from pathlib import PosixPath # type: ignore # noqa
|
|
2
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
3
|
+
from biopipen.utils.reference import bam_index
|
|
4
|
+
|
|
5
|
+
bamfile: str = {{ in.bamfile | quote }} # pyright: ignore # noqa
|
|
6
|
+
outfile: str = {{ out.outfile | quote }} # pyright: ignore # noqa
|
|
7
|
+
envs: dict = {{envs | attr: "to_dict" | call}} # pyright: ignore # noqa
|
|
8
|
+
ncores = envs.pop("ncores")
|
|
9
|
+
samtools = envs.pop("samtools")
|
|
10
|
+
should_index = envs.pop("index")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def run_samtools(infile):
|
|
14
|
+
cmd = [
|
|
15
|
+
samtools,
|
|
16
|
+
"view",
|
|
17
|
+
"-b",
|
|
18
|
+
"--threads",
|
|
19
|
+
str(ncores),
|
|
20
|
+
"-o",
|
|
21
|
+
outfile,
|
|
22
|
+
] + dict_to_cli_args(envs, dashify=True) + [infile]
|
|
23
|
+
|
|
24
|
+
run_command(cmd, fg=True)
|
|
25
|
+
if should_index:
|
|
26
|
+
bam_index(outfile, tool="samtools", samtools=samtools, ncores=ncores)
|
|
27
|
+
|
|
28
|
+
return outfile
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
if __name__ == "__main__":
|
|
32
|
+
infile = bam_index(bamfile, tool="samtools", samtools=samtools, ncores=ncores)
|
|
33
|
+
run_samtools(infile)
|
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(AneuploidyScore)
|
|
4
2
|
library(dplyr)
|
|
5
3
|
library(tidyr)
|
|
6
4
|
library(tibble)
|
|
7
|
-
library(
|
|
8
|
-
library(
|
|
5
|
+
library(plotthis)
|
|
6
|
+
library(biopipen.utils)
|
|
9
7
|
|
|
10
8
|
segfile = {{in.segfile | r}}
|
|
11
9
|
outdir = {{out.outdir | r}}
|
|
@@ -59,7 +57,15 @@ getCAA <- function(segf, cytoarm, tcn_col,
|
|
|
59
57
|
}
|
|
60
58
|
|
|
61
59
|
## Create a GRanges object with all unique intervals between segc and cytoc
|
|
62
|
-
starts <-
|
|
60
|
+
starts <- tryCatch({
|
|
61
|
+
sort(c(GenomicRanges::start(segc), GenomicRanges::start(cytoc)))
|
|
62
|
+
}, error=function(e) {
|
|
63
|
+
warning("Error to detect start on chromosome: ", chr_id, immediate. = TRUE)
|
|
64
|
+
NULL
|
|
65
|
+
})
|
|
66
|
+
if (is.null(starts)) {
|
|
67
|
+
return(NULL)
|
|
68
|
+
}
|
|
63
69
|
ends <- sort(c(GenomicRanges::end(segc), GenomicRanges::end(cytoc)))
|
|
64
70
|
combc <- GRanges(seqnames=chr_id,
|
|
65
71
|
IRanges(start=unique(sort(c(starts, ends[-length(ends)]+1))),
|
|
@@ -123,7 +129,7 @@ getCAA <- function(segf, cytoarm, tcn_col,
|
|
|
123
129
|
return(combc_arms)
|
|
124
130
|
})
|
|
125
131
|
names(seg_cyto_chr) <- names(seg_chr)
|
|
126
|
-
|
|
132
|
+
seg_cyto_chr <- seg_cyto_chr[!sapply(seg_cyto_chr, is.null)]
|
|
127
133
|
return(as(seg_cyto_chr, "GRangesList"))
|
|
128
134
|
}
|
|
129
135
|
|
|
@@ -250,11 +256,17 @@ sig_min = min(-1, plotdata$Signal, na.rm=TRUE)
|
|
|
250
256
|
sig_max = max(1, plotdata$Signal, na.rm=TRUE)
|
|
251
257
|
|
|
252
258
|
png(file.path(outdir, "AneuploidyScore.png"), width=1000, height=600, res=100)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
259
|
+
p <- BarPlot(
|
|
260
|
+
plotdata,
|
|
261
|
+
x = "Arms",
|
|
262
|
+
y = "Signal",
|
|
263
|
+
fill = "Type",
|
|
264
|
+
facet_by = "SignalType",
|
|
265
|
+
facet_nrow = 2,
|
|
266
|
+
y_min = sig_min,
|
|
267
|
+
y_max = sig_max,
|
|
268
|
+
x_text_angle = 90,
|
|
269
|
+
aspect.ratio = 0.2
|
|
270
|
+
)
|
|
271
|
+
print(p)
|
|
260
272
|
dev.off()
|