biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +307 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +14 -2
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  73. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  74. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  75. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  76. biopipen/scripts/scrna/RadarPlots.R +1 -1
  77. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  78. biopipen/scripts/scrna/ScSimulation.R +11 -10
  79. biopipen/scripts/scrna/ScVelo.py +605 -0
  80. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  81. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  82. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  83. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  84. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  85. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  86. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  87. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  88. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  89. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  90. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  91. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  92. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  93. biopipen/scripts/scrna/Subset10X.R +2 -2
  94. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  95. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  96. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  99. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  100. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  101. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  102. biopipen/scripts/snp/PlinkFreq.R +34 -41
  103. biopipen/scripts/snp/PlinkHWE.R +23 -18
  104. biopipen/scripts/snp/PlinkHet.R +26 -22
  105. biopipen/scripts/snp/PlinkIBD.R +30 -34
  106. biopipen/scripts/stats/ChowTest.R +9 -8
  107. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  108. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  109. biopipen/scripts/stats/Mediation.R +8 -8
  110. biopipen/scripts/stats/MetaPvalue.R +11 -13
  111. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  112. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  113. biopipen/scripts/tcr/ClonalStats.R +5 -4
  114. biopipen/scripts/tcr/CloneResidency.R +3 -3
  115. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  116. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  117. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  118. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  119. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  120. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  121. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  122. biopipen/scripts/tcr/TCRClustering.R +86 -97
  123. biopipen/scripts/tcr/TESSA.R +65 -115
  124. biopipen/scripts/tcr/VJUsage.R +5 -5
  125. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  126. biopipen/utils/common_docstrs.py +66 -63
  127. biopipen/utils/reporter.py +177 -0
  128. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  129. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
  130. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  131. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,8 +1,7 @@
1
1
  {% from "utils/misc.liq" import report_jobs -%}
2
-
3
2
  <script>
4
- import { Image, Descr } from "$libs";
5
- import { ListItem, UnorderedList } from "$ccs";
3
+ import { Image, DataTable, Descr, Math } from "$libs";
4
+ import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification, Tile } from "$ccs";
6
5
  </script>
7
6
 
8
7
  <h1>Introduction</h1>
@@ -12,7 +11,7 @@
12
11
  </Descr>
13
12
 
14
13
  <h2>Workflow of the original analysis</h2>
15
- <Image src="https://github.com/LocasaleLab/Single-Cell-Metabolic-Landscape/raw/master/pipeline.png" />
14
+ <Image src="https://raw.githubusercontent.com/LocasaleLab/Single-Cell-Metabolic-Landscape/master/pipeline.png" />
16
15
 
17
16
  <h2>Reference</h2>
18
17
  <UnorderedList>
@@ -29,61 +28,66 @@
29
28
  <h2>Analyses with this pipeline</h2>
30
29
 
31
30
  <Descr>
32
- The cells are grouped at 2 dimensions: `grouping`, usually the cell types, and `subsetting`, usually
33
- the groups that bring biological meaning (i.e. different timepoints or sample types (tumor/normal)).
31
+ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups that bring biological meaning
32
+ (i.e. different timepoints or sample types (tumor/normal)), and `group_by`, usually the cell types.
34
33
  </Descr>
35
34
 
36
35
  <UnorderedList>
37
36
  <ListItem>
38
37
  MetabolicPathwayActivity (this page)
39
- <p>Investigating the metabolic pathways of the cells in different groups and subsets.</p>
40
- <p>The cells are first grouped by subsets and then the metabolic activities are examined for each groups in different subsets.</p>
38
+ <Tile>
39
+ <p>Investigating the metabolic pathways of the cells in different subsets and groups.</p>
40
+ <p>The cells are first subset by subsets and then the metabolic activities are examined for each groups in different subsets.</p>
41
+ <p> </p>
42
+ <p>A pathway activity score defined as the relative gene expression value averaged over all genes in the pathway and all cells of the group.</p>
43
+ <p> </p>
44
+ <p>For the i-th metabolic gene, we first calculated its mean expression level across cells of the j-th cell group:
45
+ <Math displayMode>E_{i,j} = \frac{ {\mathop {\sum }\nolimits_{k = 1}^{n_j} g_{i,k}}}{ {n_j}},\,i \in 1 \ldots M,j \in 1 \ldots N</Math>
46
+ <p>
47
+ In which n<sub>j</sub> is the number of cells in the j-th cell group, g<sub>i,k</sub> is the expression level of the i-th gene in the k-th cell in this cell group,
48
+ M is the number of metabolic genes, and N is the number of cell groups. The relative expression level of the i-th gene in the j-th cell group was then
49
+ defined as the ratio of E<sub>i,j</sub> to its average over all cell groups:
50
+ </p>
51
+ <Math displayMode>r_{i,j} = \frac{ {E_{i,j}}}{ {\frac{1}{N}\mathop {\sum }\nolimits_j^N E_{i,j}}}</Math>
52
+ <p>
53
+ Here r<sub>i,j</sub> quantifies the relative expression level of gene i in cell group j comparing to the average expression level of this gene in all cell groups.
54
+ A r<sub>i,j</sub> value &gt;1 means that expression level of gene i is higher in cell group j compared to its average expression level over all cell groups.
55
+ The pathway activity score for the t-th pathway and the j-th cell group was then defined as the weighted average of r<sub>i,j</sub> over all genes included
56
+ in this pathway:
57
+ </p>
58
+ <Math displayMode>p_{t,j} = \frac{ {\mathop {\sum }\nolimits_{i = 1}^{m_t} w_i \times r_{i,j}}}{ {\mathop {\sum }\nolimits_{i = 1}^{m_t} w_i}}</Math>
59
+ <p>Where p<sub>t,j</sub> represents the activity of the t-th pathway in the j-th cell group, m<sub>t</sub> is the number of genes in the pathway t, w<sub>i</sub>
60
+ is the weighting factor equal to the reciprocal of number of pathways that include the i-th gene.
61
+ To avoid the possibility that pathway activity scores were affected by genes with low expression level or high drop-out rates,
62
+ we excluded the outliers in each pathway defined by genes with relative expression levels greater than three times 75th percentile or below 1/3 times 25th percentile.
63
+ Statistical significance of higher or lower pathway activity in a specific cell group was then evaluated by a random permutation test,
64
+ in which the cell group labels were randomly shuffled for 5000 (for the scRNA datasets) to simulate a null distribution of the pathway activity scores
65
+ and compare to the pathway activity scores in the original, non-shuffled dataset.
66
+ For the pathway activity score p<sub>t,j</sub>, we then calculated a p-value defined as the fraction of random pathway activity scores larger than pt,j
67
+ (if p<sub>t,j</sub> is &gt;1) or smaller than p<sub>t,j</sub> (if p<sub>t,j</sub> is &lt;1) to assess if activity of this pathway is significantly
68
+ higher or lower in this cell group than average.</p>
69
+ </Tile>
41
70
  </ListItem>
42
71
  <ListItem>
43
72
  <a href="../MetabolicPathwayHeterogeneity/index.html">MetabolicPathwayHeterogeneity</a>
44
- <p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
73
+ <Tile>
74
+ <p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
75
+ </Tile>
45
76
  </ListItem>
46
77
  <ListItem>
47
78
  <a href="../MetabolicFeatures/index.html">MetabolicFeatures</a>
48
- <p>Gene set enrichment analysis against the metabolic pathways for groups in different subsets.</p>
49
- </ListItem>
50
- <ListItem>
51
- <a href="../MetabolicFeaturesIntraSubsets/index.html">MetabolicFeaturesIntraSubsets</a>
52
- <p>Gene set enrichment analysis against the metabolic pathways for subsets in different groups.</p>
79
+ <Tile>
80
+ <p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
81
+ </Tile>
53
82
  </ListItem>
54
83
  </UnorderedList>
55
84
 
56
-
57
- {%- macro report_job(job, h=2) -%}
58
- {%- for ssdir in job.out.outdir | glob: "*" -%}
59
- {%- if not isdir(ssdir) -%}
60
- {%- continue -%}
61
- {%- endif -%}
62
- <h{{h}}>{{ ssdir | stem }}</h{{h}}>
63
-
64
- <h{{ h+1 }}>Metabolic pathway activities by <code>{{envs.grouping}}</code></h{{ h+1 }}>
65
- <Image src="{{ssdir | joinpaths: 'KEGGpathway_activity_heatmap.png'}}" />
66
-
67
- <h{{ h+1 }}>Distributions of pathway activities by <code>{{envs.grouping}}</code></h{{ h+1 }}>
68
- <Image src="{{ssdir | joinpaths: 'pathway_activity_violinplot.png'}}" />
69
- {%- endfor -%}
70
-
71
- {% if job.out.outdir | glob: "*.group-*.png" -%}
72
- <h{{h}}>Merged heatmaps</h{{h}}>
73
- {% for group_hm in job.out.outdir | glob: "*.group-*.png" -%}
74
- {%- if group_hm.endswith(".group-unclustered.png") -%}
75
- <h{{h+1}}>{{group_hm | stem | replace: ".group-unclustered", " (Group Unclustered)"}}</h{{h+1}}>
76
- <Image src="{{group_hm}}" />
77
- {%- else -%}
78
- <h{{h+1}}>{{group_hm | stem | replace: ".group-clustered", " (Group Clustered)"}}</h{{h+1}}>
79
- <Image src="{{group_hm}}" />
80
- {%- endif -%}
81
- {%- endfor -%}
82
- {%- endif -%}
85
+ {%- macro report_job(job, h=1) -%}
86
+ {{ job | render_job: h=h }}
83
87
  {%- endmacro -%}
84
88
 
85
89
  {%- macro head_job(job) -%}
86
- <h1>{{job.in.sobjfile | stem | escape}}</h1>
90
+ <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
87
91
  {%- endmacro -%}
88
92
 
89
93
  {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,15 +1,72 @@
1
- {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
-
1
+ {% from "utils/misc.liq" import report_jobs -%}
3
2
  <script>
4
- import { Image, Descr } from "$libs";
3
+ import { Image, DataTable, Descr } from "$libs";
4
+ import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification, Tile } from "$ccs";
5
5
  </script>
6
6
 
7
- {%- macro report_job(job, h=2) -%}
8
- {{ job | render_job: h=h }}
7
+ <h1>Introduction</h1>
8
+
9
+ <Descr>
10
+ Metabolic landscape of single cells in the tumor microenvironment.
11
+ </Descr>
12
+
13
+ <h2>Workflow of the original analysis</h2>
14
+ <Image src="https://raw.githubusercontent.com/LocasaleLab/Single-Cell-Metabolic-Landscape/master/pipeline.png" />
15
+
16
+ <h2>Reference</h2>
17
+ <UnorderedList>
18
+ <ListItem><a href="https://www.nature.com/articles/s41467-019-11738-0" target="_blank">
19
+ Zhengtao, Ziwei Dai, and Jason W. Locasale.
20
+ "Metabolic landscape of the tumor microenvironment at single cell resolution."
21
+ Nature communications 10.1 (2019): 1-12.
22
+ </a></ListItem>
23
+ <ListItem><a href="https://github.com/LocasaleLab/Single-Cell-Metabolic-Landscape" target="_blank">
24
+ Orginal pipeline
25
+ </a></ListItem>
26
+ </UnorderedList>
27
+
28
+ <h2>Analyses with this pipeline</h2>
29
+
30
+ <Descr>
31
+ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups that bring biological meaning
32
+ (i.e. different timepoints or sample types (tumor/normal)), and `group_by`, usually the cell types.
33
+ </Descr>
34
+
35
+ <UnorderedList>
36
+ <ListItem>
37
+ <a href="../MetabolicPathwayActivity/index.html">MetabolicPathwayActivity</a>
38
+ <Tile>
39
+ <p>Investigating the metabolic pathways of the cells in different subsets and groups.</p>
40
+ </Tile>
41
+ </ListItem>
42
+ <ListItem>
43
+ MetabolicPathwayHeterogeneity (this page)
44
+ <Tile>
45
+ <p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p>
46
+ <p>
47
+ The PCA analysis was applied on normalized expression values.
48
+ The function prcomp in R was used to perform the PCA analysis.
49
+ For each metabolic gene, we computed its PCA score defined as the sum of absolute values of the loadings of this gene in the top PCs
50
+ that in total account for certain variance to measure variability of gene expression across cells.
51
+ We then sorted the PCA scores of the genes in descending order and applied GSEA analysis to the ranked list of genes to identify metabolic pathways
52
+ enriched in genes with highest variability.
53
+ </p>
54
+ </Tile>
55
+ </ListItem>
56
+ <ListItem>
57
+ <a href="../MetabolicFeatures/index.html">MetabolicFeatures</a>
58
+ <Tile>
59
+ <p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
60
+ </Tile>
61
+ </ListItem>
62
+ </UnorderedList>
63
+
64
+ {%- macro report_job(job, h=1) -%}
65
+ {{ job | render_job: h=h }}
9
66
  {%- endmacro -%}
10
67
 
11
68
  {%- macro head_job(job) -%}
12
- <h1>{{job.in.sobjfile | stem | escape}}</h1>
69
+ <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
13
70
  {%- endmacro -%}
14
71
 
15
72
  {{ report_jobs(jobs, head_job, report_job) }}
@@ -5,13 +5,13 @@
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
7
  <h{{h+1}}>Sample Call Rate</h{{h+1}}>
8
- {%- for pngfile in job.out.outdir | joinpaths: '*.samplecr.png' | glob -%}
8
+ {%- for pngfile in job.out.outdir | glob: '*.samplecr.png' -%}
9
9
  <Descr>Cutoff: {{envs.samplecr}}</Descr>
10
10
  <Image src="{{pngfile}}" />
11
11
  {%- endfor -%}
12
12
 
13
13
  <h{{h+1}}>Variant Call Rate</h{{h+1}}>
14
- {%- for pngfile in job.out.outdir | joinpaths: '*.varcr.png' | glob -%}
14
+ {%- for pngfile in job.out.outdir | glob: '*.varcr.png' -%}
15
15
  <Descr>Cutoff: {{envs.varcr}}</Descr>
16
16
  <Image src="{{pngfile}}" />
17
17
  {%- endfor -%}
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  {% set metric_col = pngfile | stem | ext0 %}
9
9
  <h{{h+1}}>{{metric_col}} distribution</h{{h+1}}>
10
10
  <Image src="{{pngfile}}" />
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>Distribution</h{{h+1}}>
9
9
  <Descr>Cutoff: {{envs.cutoff}}</Descr>
10
10
  <Image src="{{pngfile}}" />
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>Distribution</h{{h+1}}>
9
9
  <Descr>Cutoff: [mean - {{envs.cutoff}} x sd, mean + {{envs.cutoff}} x sd]</Descr>
10
10
  <Image src="{{pngfile}}" />
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>Heatmap</h{{h+1}}>
9
9
  <Descr>PI_HAT threshold = {{envs.pihat}}</Descr>
10
10
  <Image src="{{pngfile}}" />
@@ -26,7 +26,7 @@
26
26
  {%- endmacro -%}
27
27
 
28
28
  {%- macro head_job(job) -%}
29
- <h1>{{job.out.outdir | stem | replace: ".immunarch", ""}}</h1>
29
+ <h1>{{job.out.outdir | stem | replace: ".scRep", ""}}</h1>
30
30
  {%- endmacro -%}
31
31
 
32
32
  {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,14 +1,40 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(parallel)
4
2
  library(dplyr)
3
+ library(biopipen.utils)
5
4
  library(CNAclinic)
6
5
 
6
+ # https://github.com/sdchandra/CNAclinic/issues/4
7
+ .reorderByChrom.patched <- function(x){
8
+ chromosome <- as.character(x$chromosome)
9
+ chromosome[which(chromosome == "X")] <- "23"
10
+ chromosome[which(chromosome == "Y")] <- "24"
11
+ chromosome[which(chromosome == "MT")] <- "25"
12
+
13
+ x$chromosome <- as.numeric(chromosome)
14
+ # Error in xtfrm.data.frame(x) : cannot xtfrm data frames
15
+ # x <- x[order(x["chromosome"], x["start"]), ]
16
+ x <- x[order(x[, "chromosome"], x[, "start"]), ]
17
+
18
+ x$chromosome <- as.character(x$chromosome)
19
+ # Replace 23 by X:
20
+ x$chromosome[which(x$chromosome == "23")] <- "X"
21
+
22
+ # Replace 24 by Y
23
+ x$chromosome[which(x$chromosome == "24")] <- "Y"
24
+
25
+ # Replace 25 by MT
26
+ x$chromosome[which(x$chromosome == "25")] <- "MT"
27
+
28
+ return(x)
29
+ }
30
+
31
+ monkey_patch("CNAclinic", ".reorderByChrom", .reorderByChrom.patched)
32
+
7
33
  metafile = {{in.metafile | r}}
8
34
  outdir = {{out.outdir | r}}
9
35
  ncores = {{envs.ncores | int}}
10
36
  binsizer = {{envs.binsizer | r}}
11
- binsize = {{envs.binsize | int}}
37
+ binsize = {{envs.binsize | r}}
12
38
  seed = {{envs.seed | int}}
13
39
  genome = {{envs.genome | r}}
14
40
  run_args = {{envs.run_args | r}}
@@ -30,7 +56,11 @@ if (("Group" %in% metacols) && !("Patient" %in% metacols)) {
30
56
  }
31
57
 
32
58
  if (!("Binsizer" %in% metacols) && is.null(binsizer) && is.null(binsize)) {
33
- stop("The metadata file must have a column named 'Binsizer' or the `envs.binsizer` must be specified")
59
+ stop(
60
+ "The metadata file must have a column named 'Binsizer' or ",
61
+ "the `envs.binsizer` must be specified when no `envs.binsize` is provided. ",
62
+ "The Binsizer column should indicate which samples are to be used for binsize selection."
63
+ )
34
64
  }
35
65
 
36
66
  # add missing columns
@@ -109,7 +139,7 @@ do_one_sample = function(i) {
109
139
  bamfile,
110
140
  sample,
111
141
  refSamples=refSamples,
112
- binSize=binsize
142
+ binSize=binsize / 1000
113
143
  )
114
144
 
115
145
  run_args_i = run_args
@@ -119,7 +149,12 @@ do_one_sample = function(i) {
119
149
 
120
150
  plot_args_i = plot_args
121
151
  plot_args_i$object = CNAData
122
- genomewide_plot = do_call(plotSampleData, plot_args_i)
152
+ genomewide_plot <- tryCatch({
153
+ do_call(plotSampleData, plot_args_i)
154
+ }, error = function(e) {
155
+ message("Error in plotting genomewide data for sample ", sample, ": ", e$message)
156
+ return(ggplot2::ggplot() + ggplot2::labs(title = paste("Error in plotting genomewide data for sample", sample)))
157
+ })
123
158
 
124
159
  odir = file.path(outdir, sample)
125
160
  dir.create(odir, recursive = TRUE, showWarnings = FALSE)
@@ -3,6 +3,7 @@ from pathlib import Path
3
3
  import warnings
4
4
  import pandas
5
5
  from datetime import datetime
6
+ from diot import Diot # pyright: ignore
6
7
  from biopipen.utils.reference import bam_index
7
8
  from biopipen.utils.misc import run_command, dict_to_cli_args, logger
8
9
 
@@ -16,7 +17,7 @@ refdir = {{envs.refdir | quote}} # pyright: ignore
16
17
  genome = {{envs.genome | quote}} # pyright: ignore
17
18
  chrsize: str = {{envs.chrsize | quote}} # pyright: ignore
18
19
  filters: dict = {{envs.filters | repr}} # pyright: ignore
19
- args: dict = {{envs | dict}} # pyright: ignore
20
+ args: Diot = {{envs | repr}} # pyright: ignore
20
21
 
21
22
  del args['cnvpytor']
22
23
  del args['ncores']
@@ -1,8 +1,7 @@
1
1
  import os
2
2
  import glob
3
- import rtoml
4
3
  import shutil
5
- from diot import Diot
4
+ from diot import Diot # type: ignore
6
5
  from biopipen.utils.misc import dict_to_cli_args, run_command
7
6
 
8
7
  bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
@@ -79,7 +78,7 @@ config.BAF |= Diot(
79
78
 
80
79
  os.makedirs(f"{outdir}/FREEC-output", exist_ok=True)
81
80
 
82
- config_ini = rtoml.dumps(config).replace('"', "")
81
+ config_ini = config.to_toml().replace('"', "") # type: ignore
83
82
 
84
83
  with open(configfile, "w") as fconf:
85
84
  fconf.write(config_ini)
@@ -0,0 +1,33 @@
1
+ from pathlib import PosixPath # type: ignore # noqa
2
+ from biopipen.utils.misc import run_command, dict_to_cli_args
3
+ from biopipen.utils.reference import bam_index
4
+
5
+ bamfile: str = {{ in.bamfile | quote }} # pyright: ignore # noqa
6
+ outfile: str = {{ out.outfile | quote }} # pyright: ignore # noqa
7
+ envs: dict = {{envs | attr: "to_dict" | call}} # pyright: ignore # noqa
8
+ ncores = envs.pop("ncores")
9
+ samtools = envs.pop("samtools")
10
+ should_index = envs.pop("index")
11
+
12
+
13
+ def run_samtools(infile):
14
+ cmd = [
15
+ samtools,
16
+ "view",
17
+ "-b",
18
+ "--threads",
19
+ str(ncores),
20
+ "-o",
21
+ outfile,
22
+ ] + dict_to_cli_args(envs, dashify=True) + [infile]
23
+
24
+ run_command(cmd, fg=True)
25
+ if should_index:
26
+ bam_index(outfile, tool="samtools", samtools=samtools, ncores=ncores)
27
+
28
+ return outfile
29
+
30
+
31
+ if __name__ == "__main__":
32
+ infile = bam_index(bamfile, tool="samtools", samtools=samtools, ncores=ncores)
33
+ run_samtools(infile)
@@ -1,11 +1,9 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(AneuploidyScore)
4
2
  library(dplyr)
5
3
  library(tidyr)
6
4
  library(tibble)
7
- library(ggplot2)
8
- library(ggprism)
5
+ library(plotthis)
6
+ library(biopipen.utils)
9
7
 
10
8
  segfile = {{in.segfile | r}}
11
9
  outdir = {{out.outdir | r}}
@@ -59,7 +57,15 @@ getCAA <- function(segf, cytoarm, tcn_col,
59
57
  }
60
58
 
61
59
  ## Create a GRanges object with all unique intervals between segc and cytoc
62
- starts <- sort(c(GenomicRanges::start(segc), GenomicRanges::start(cytoc)))
60
+ starts <- tryCatch({
61
+ sort(c(GenomicRanges::start(segc), GenomicRanges::start(cytoc)))
62
+ }, error=function(e) {
63
+ warning("Error to detect start on chromosome: ", chr_id, immediate. = TRUE)
64
+ NULL
65
+ })
66
+ if (is.null(starts)) {
67
+ return(NULL)
68
+ }
63
69
  ends <- sort(c(GenomicRanges::end(segc), GenomicRanges::end(cytoc)))
64
70
  combc <- GRanges(seqnames=chr_id,
65
71
  IRanges(start=unique(sort(c(starts, ends[-length(ends)]+1))),
@@ -123,7 +129,7 @@ getCAA <- function(segf, cytoarm, tcn_col,
123
129
  return(combc_arms)
124
130
  })
125
131
  names(seg_cyto_chr) <- names(seg_chr)
126
-
132
+ seg_cyto_chr <- seg_cyto_chr[!sapply(seg_cyto_chr, is.null)]
127
133
  return(as(seg_cyto_chr, "GRangesList"))
128
134
  }
129
135
 
@@ -250,11 +256,17 @@ sig_min = min(-1, plotdata$Signal, na.rm=TRUE)
250
256
  sig_max = max(1, plotdata$Signal, na.rm=TRUE)
251
257
 
252
258
  png(file.path(outdir, "AneuploidyScore.png"), width=1000, height=600, res=100)
253
- ggplot(plotdata) +
254
- geom_bar(aes(x=Arms, y=Signal, fill=Type), stat="identity") +
255
- geom_hline(yintercept=0, color="black", size=0.1) +
256
- ylim(c(sig_min, sig_max)) +
257
- theme_prism() +
258
- theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
259
- facet_wrap(~SignalType, scales="free_y", nrow=2)
259
+ p <- BarPlot(
260
+ plotdata,
261
+ x = "Arms",
262
+ y = "Signal",
263
+ fill = "Type",
264
+ facet_by = "SignalType",
265
+ facet_nrow = 2,
266
+ y_min = sig_min,
267
+ y_max = sig_max,
268
+ x_text_angle = 90,
269
+ aspect.ratio = 0.2
270
+ )
271
+ print(p)
260
272
  dev.off()