biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -0,0 +1,138 @@
1
+ from contextlib import suppress
2
+ import hashlib
3
+ import shutil
4
+ import re
5
+ from pathlib import Path, PosixPath # noqa: F401
6
+ from biopipen.utils.misc import run_command
7
+
8
+ fastqs: list[Path] = {{in.fastqs | each: as_path}} # pyright: ignore # noqa
9
+ outdir: Path = Path({{out.outdir | quote}}) # pyright: ignore
10
+ id: str = {{out.outdir | basename | quote}} # pyright: ignore
11
+
12
+ cellranger = {{envs.cellranger | quote}} # pyright: ignore
13
+ tmpdir = Path({{envs.tmpdir | quote}}) # pyright: ignore
14
+ ref: str = {{envs.ref | quote}} # pyright: ignore
15
+ ncores = {{envs.ncores | int}} # pyright: ignore
16
+ include_introns = {{envs.include_introns | repr}} # pyright: ignore
17
+ create_bam = {{envs.create_bam | repr}} # pyright: ignore
18
+ outdir_is_mounted: bool = {{envs.outdir_is_mounted | repr}} # pyright: ignore
19
+ copy_outs_only: bool = {{envs.copy_outs_only | repr}} # pyright: ignore
20
+
21
+ ref: Path = Path(ref).resolve() # pyright: ignore
22
+ if not ref.exists():
23
+ raise FileNotFoundError(f"Reference path does not exist: {ref}")
24
+ include_introns = str(include_introns).lower()
25
+ create_bam = str(create_bam).lower()
26
+
27
+ # create a temporary unique directory to store the soft-linked fastq files
28
+ uid = hashlib.md5(str(fastqs).encode()).hexdigest()[:8]
29
+ fastqdir = tmpdir / f"cellranger_count_{uid}"
30
+ fastqdir.mkdir(parents=True, exist_ok=True)
31
+ if len(fastqs) == 1 and fastqs[0].is_dir():
32
+ fastqs = list(fastqs[0].glob("*.fastq.gz"))
33
+
34
+ # soft-link the fastq files to the temporary directory
35
+ for fastq in fastqs:
36
+ fastq = Path(fastq)
37
+ fqnames = re.split(r"(_S\d+_)", fastq.name)
38
+ if len(fqnames) != 3:
39
+ raise ValueError(
40
+ fr"Expect one and only one '_S\d+_' in fastq file name: {fastq.name}"
41
+ )
42
+
43
+ linked = fastqdir / f"{id}{fqnames[1]}{fqnames[2]}"
44
+ if linked.exists():
45
+ linked.unlink()
46
+
47
+ linked.symlink_to(fastq)
48
+
49
+ other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['no_bam', 'create_bam', 'include_introns', 'cellranger', 'transcriptome', 'ref', 'tmpdir', 'id', 'ncores', 'outdir_is_mounted', 'copy_outs_only']}} # pyright: ignore
50
+
51
+ command = [
52
+ cellranger,
53
+ "count",
54
+ "--id",
55
+ id,
56
+ "--fastqs",
57
+ fastqdir,
58
+ "--transcriptome",
59
+ str(ref),
60
+ "--localcores",
61
+ ncores,
62
+ "--disable-ui",
63
+ "--include-introns",
64
+ include_introns,
65
+ *other_args,
66
+ ]
67
+
68
+ # check cellranger version
69
+ # cellranger cellranger-7.2.0
70
+ version: str = run_command([cellranger, "--version"], stdout = "RETURN") # type: ignore
71
+ version = version.replace("cellranger", "").replace("-", "").strip() # type: ignore
72
+ print(f"# Detected cellranger version: {version}")
73
+ version: list[int] = list(map(int, version.split("."))) # type: ignore
74
+ if version[0] >= 8:
75
+ command += ["--create-bam", create_bam]
76
+ elif create_bam != "true":
77
+ command += ["--no-bam"]
78
+
79
+ if outdir_is_mounted:
80
+ print("# Using mounted outdir, redirecting cellranger output to a local tmpdir")
81
+ local_outdir = tmpdir / f"{outdir.name}-{uid}" / id
82
+ if local_outdir.parent.exists():
83
+ shutil.rmtree(local_outdir.parent)
84
+ local_outdir.parent.mkdir(parents=True, exist_ok=True)
85
+ odir = local_outdir
86
+ else:
87
+ odir = outdir
88
+
89
+ run_command(command, fg=True, cwd=str(odir.parent))
90
+
91
+ web_summary_html = odir / "outs" / "web_summary.html"
92
+ if not web_summary_html.exists():
93
+ raise RuntimeError(
94
+ f"web_summary.html does not exist in {odir}/outs. "
95
+ "cellranger count failed."
96
+ )
97
+
98
+ # Modify web_summary.html to move javascript to a separate file
99
+ # to void vscode live server breaking the page by injecting some code
100
+ print("# Modify web_summary.html to move javascript to a separate file")
101
+ try:
102
+ web_summary_js = odir / "outs" / "web_summary.js"
103
+ web_summary_content = web_summary_html.read_text()
104
+ regex = re.compile(r"<script>(.+)</script>", re.DOTALL)
105
+ web_summary_html.write_text(regex.sub(
106
+ '<script src="web_summary.js"></script>',
107
+ web_summary_content,
108
+ ))
109
+ web_summary_js.write_text(regex.search(web_summary_content).group(1)) # type: ignore
110
+ except Exception as e:
111
+ print(f"Error modifying web_summary.html: {e}")
112
+ raise e
113
+
114
+ # If using local tmpdir for output, move results to the final outdir
115
+ if outdir_is_mounted:
116
+ print("# Copy results back to outdir")
117
+ if outdir.exists():
118
+ shutil.rmtree(outdir)
119
+
120
+ if copy_outs_only:
121
+ outdir.mkdir(parents=True, exist_ok=True)
122
+ with suppress(Exception):
123
+ # Some files may be failed to copy due to permission issues
124
+ # But the contents are actually copied
125
+ shutil.copytree(odir / "outs", outdir / "outs")
126
+ else:
127
+ with suppress(Exception):
128
+ shutil.copytree(local_outdir, outdir) # type: ignore
129
+
130
+ # Make sure essential files exist
131
+ web_summary_html = outdir / "outs" / "web_summary.html"
132
+ web_summary_js = outdir / "outs" / "web_summary.js"
133
+ for f in [web_summary_html, web_summary_js]:
134
+ if not f.exists():
135
+ raise RuntimeError(
136
+ f"{f} does not exist in {outdir}/outs. "
137
+ "Copying results back from tmpdir failed."
138
+ )
@@ -0,0 +1,181 @@
1
+ library(rlang)
2
+ library(dplyr)
3
+ library(plotthis)
4
+ library(biopipen.utils)
5
+
6
+ indirs <- {{in.indirs | r}}
7
+ outdir <- {{out.outdir | r}}
8
+ joboutdir <- {{job.outdir | r}}
9
+ group <- {{envs.group | r}}
10
+
11
+ logger <- get_logger()
12
+ reporter <- get_reporter()
13
+
14
+ if (is.character(group)) {
15
+ group <- read.csv(group, header = FALSE, row.names = NULL)
16
+ colnames(group) <- c("Sample", "Group")
17
+ } else if (is.list(group)) {
18
+ group <- do_call(
19
+ rbind,
20
+ lapply(names(group), function(n) data.frame(Sample = group[[n]], Group = n))
21
+ )
22
+ } else if (!is.null(group)) {
23
+ stop(paste0("Invalid group: ", paste0(group, collapse = ", ")))
24
+ }
25
+
26
+ cellranger_type <- NULL
27
+ logger$info("Reading and merging metrics for each sample ...")
28
+ metrics <- NULL
29
+ for (indir in indirs) {
30
+ sample <- basename(indir)
31
+ logger$debug("- Reading metrics for sample: ", sample)
32
+ metric <- read.csv(
33
+ file.path(indir, "outs", "metrics_summary.csv"),
34
+ header = TRUE, row.names = NULL, check.names = FALSE)
35
+ metric$Sample <- sample
36
+ sample_cellranger_type <- ifelse(
37
+ file.exists(file.path(indir, "outs", "clonotypes.csv")),
38
+ "vdj",
39
+ "count" # support more types in the future
40
+ )
41
+ cellranger_type <- cellranger_type %||% sample_cellranger_type
42
+ if (cellranger_type != sample_cellranger_type) {
43
+ stop("Multiple types of CellRanger output detected. Should be either count or vdj.")
44
+ }
45
+ if (!is.null(metrics)) {
46
+ missing_cols <- setdiff(colnames(metrics), colnames(metric))
47
+ if (length(missing_cols) > 0) {
48
+ logger$warn('- Missing columns: {paste0(missing_cols, collapse = ", ")} in sample: {sample}')
49
+ metric[missing_cols] <- NA
50
+ }
51
+ missing_cols <- setdiff(colnames(metric), colnames(metrics))
52
+ if (length(missing_cols) > 0) {
53
+ logger$warn('- Missing columns: {paste0(missing_cols, collapse = ", ")} in samples before {sample}')
54
+ metrics[missing_cols] <- NA
55
+ }
56
+ }
57
+ metrics <- rbind(metrics, metric)
58
+ }
59
+
60
+ if (is.null(metrics)) {
61
+ stop("No samples found, check the input directories.")
62
+ }
63
+
64
+ percent_columns <- sapply(colnames(metrics), function(x) {
65
+ is.character(metrics[[x]]) && grepl("%", metrics[[x]][1]) && x != "Sample"
66
+ })
67
+ percent_columns <- colnames(metrics)[percent_columns]
68
+ # Remove %
69
+ metrics <- metrics %>%
70
+ mutate(across(all_of(percent_columns), ~as.numeric(gsub("%", "", .x)))) %>%
71
+ rename_with(.fn = function(x) { paste0(x, " (%)") }, .cols = percent_columns) %>%
72
+ mutate(across(-Sample, ~as.numeric(gsub(",", "", .x))))
73
+
74
+ # Save metrics
75
+ write.table(
76
+ metrics,
77
+ file.path(outdir, "metrics.txt"),
78
+ sep = "\t",
79
+ quote = FALSE,
80
+ row.names = FALSE
81
+ )
82
+
83
+ reporter$add(
84
+ list(kind = "descr", content = "Metrics for all samples"),
85
+ list(kind = "table", src = file.path(outdir, "metrics.txt")),
86
+ h1 = "Metrics of all samples"
87
+ )
88
+
89
+ if (cellranger_type == "vdj") {
90
+ METRIC_DESCR = list(
91
+ `Estimated Number of Cells` = "The number of barcodes estimated to correspond to GEMs containing cells. See VDJ Cell Calling Algorithm.",
92
+ `Mean Read Pairs per Cell` = "Number of input read pairs divided by the estimated number of cells.",
93
+ `Number of Cells With Productive V-J Spanning Pair` = "Number of cell barcodes for which at least one productive sequence was found for each of TRA and TRB (or heavy and light chains, for Ig).",
94
+ `Number of Read Pairs` = "Total number of read pairs that were assigned to this library in demultiplexing.",
95
+ `Valid Barcodes` = "Fraction of reads with barcodes that match the whitelist after barcode correction.",
96
+ `Q30 Bases in Barcode` = "Fraction of cell barcode bases with Q-score greater than or equal to 30.",
97
+ `Q30 Bases in RNA Read 1` = "Fraction of read 1 bases with Q-score greater than or equal to 30. (Likewise for read 2.)",
98
+ `Q30 Bases in Sample Index` = "Fraction of sample index bases with Q-score greater than or equal to 30.",
99
+ `Q30 Bases in UMI` = "Fraction of UMI bases with Q-score ≥ 30.",
100
+ `Reads Mapped to Any V(D)J Gene` = "Fraction of reads that partially or wholly map to a V(D)J gene segment.",
101
+ `Reads Mapped to TRA` = "Fraction of reads that map partially or wholly to a TRA gene segment.",
102
+ `Mean Used Read Pairs per Cell` = "Mean number of read pairs used in assembly per cell barcode. These reads must have a cell barcode, map to a V(D)J gene, and have a UMI with sufficient read support, counted after subsampling.",
103
+ `Fraction Reads in Cells` = "Number of reads with cell barcodes divided by the number of reads with valid barcodes.",
104
+ `Median TRA UMIs per Cell` = "Median number of UMIs assigned to a TRA contig per cell.",
105
+ `Paired Clonotype Diversity` = "Effective diversity of the paired clonotypes, computed as the Inverse Simpson Index of the clonotype frequencies. A value of 1 indicates a minimally diverse sample - only one distinct clonotype was detected. A value equal to the estimated number of cells indicates a maximally diverse sample.",
106
+ `Cells With TRA Contig` = "Fraction of cell barcodes with at least one TRA contig annotated as a full or partial V(D)J gene.",
107
+ `Cells With CDR3-annotated TRA Contig` = "Fraction of cell barcodes with at least one TRA contig where a CDR3 was detected.",
108
+ `Cells With V-J Spanning Contig` = "Fraction of cell barcodes with at least one full-length contig.",
109
+ `Cells With V-J Spanning TRA Contig` = "Fraction of cell barcodes with at least one full-length TRA contig.",
110
+ `Cells With Productive TRA Contig` = "Fraction of cell barcodes with at least one full-length TRA contig that is productive.",
111
+ `Cells With Productive V-J Spanning Pair` = "Fraction of cell barcodes with at least one contig for each chain of the receptor pair that is productive."
112
+ )
113
+ } else {
114
+ METRIC_DESCR = list(
115
+ `Estimated Number of Cells` = "The number of barcodes associated with cell-containing partitions, estimated from the barcode UMI count distribution.",
116
+ `Mean Reads per Cell` = "The total number of reads divided by the estimated number of cells.",
117
+ `Median Genes per Cell` = "Median number of read pairs sequenced from the cells assigned to this sample. In case of multiplexing, only cell-associated barcodes assigned exactly one CMO can be assigned to a sample.",
118
+ `Number of Reads` = "Total number of sequencing reads.",
119
+ `Valid Barcodes` = "Fraction of reads with cell-barcodes that match the whitelist.",
120
+ `Sequencing Saturation` = 'Fraction of reads originating from an already-observed UMI. This is a function of library complexity and sequencing depth. More specifically, this is a ratio where: the denominator is the number of confidently-mapped reads with a valid cell-barcode and valid UMI, and the numerator is the subset of those reads that had a non-unique combination of (cell-barcode, UMI, gene). This metric was called "cDNA PCR Duplication" in versions of Cell Ranger prior to 1.2.',
121
+ `Q30 Bases in Barcode` = "Fraction of bases with Q-score at least 30 in the cell barcode sequences. This is the i7 index (I1) read for the Single Cell 3' v1 chemistry and the R1 read for the Single Cell 3' v2 chemistry.",
122
+ `Q30 Bases in RNA` = "Fraction of bases with Q-score at least 30 in the RNA read sequences. This is Illumina R1 for the Single Cell 3' v1 chemistry and Illumina R2 for the Single Cell 3' v2 chemistry.",
123
+ `Q30 Bases in UMI` = "Fraction of bases with Q-score at least 30 in the UMI sequences. This is the R2 read for the Single Cell 3' v1 chemistry and the R1 read for the Single Cell 3' v2 chemistry.",
124
+ `Reads Mapped to Genome` = "Fraction of reads that mapped to the genome.",
125
+ `Reads Mapped Confidently to Genome` = "Fraction of reads that mapped uniquely to the genome. If a read mapped to exonic loci from a single gene and also to non-exonic loci, it is considered uniquely mapped to one of the exonic loci.",
126
+ `Reads Mapped Confidently to Intergenic Regions` = "Fraction of reads that mapped to the intergenic regions of the genome with a high mapping quality score as reported by the aligner.",
127
+ `Reads Mapped Confidently to Intronic Regions` = "Fraction of reads that mapped to the intronic regions of the genome with a high mapping quality score as reported by the aligner.",
128
+ `Reads Mapped Confidently to Exonic Regions` = "Fraction of reads that mapped to the exonic regions of the genome with a high mapping quality score as reported by the aligner.",
129
+ `Reads Mapped Confidently to Transcriptome` = "Fraction of reads that mapped to a unique gene in the transcriptome with a high mapping quality score as reported by the aligner. The read must be consistent with annotated splice junctions when include-introns=false. These reads are considered for UMI counting.",
130
+ `Reads Confidently Mapped Antisense` = "Fraction of reads confidently mapped to the transcriptome, but on the opposite strand of their annotated gene. A read is counted as antisense if it has any alignments that are consistent with an exon of a transcript but antisense to it, and has no sense alignments.",
131
+ `Total Genes Detected Median UMI Counts per Cell` = "The number of genes with at least one UMI count in any cell."
132
+ )
133
+ }
134
+ logger$info("Plotting metrics ...")
135
+ for (metric in colnames(metrics)) {
136
+ if (metric == "Sample") { next }
137
+ metric_name <- sub(" \\(%\\)$", "", metric)
138
+ logger$info("- {metric_name}")
139
+
140
+ reporter$add(
141
+ list(
142
+ kind = "descr",
143
+ content = METRIC_DESCR[[metric_name]] %||% paste0("Metric: ", metric)
144
+ ),
145
+ h1 = metric
146
+ )
147
+
148
+ # barplot
149
+ p <- BarPlot(metrics, x = "Sample", y = metric, x_text_angle = 90)
150
+ figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
151
+ png(figfile, height = 600, res = 100, width = max(nrow(metrics) * 30 + 200, 400))
152
+ print(p)
153
+ dev.off()
154
+
155
+ reporter$add(
156
+ list(src = figfile, name = "By Sample"),
157
+ ui = "table_of_images",
158
+ h1 = metric
159
+ )
160
+
161
+ if (is.null(group)) { next }
162
+ # boxplot, if group is provided
163
+ # group: Sample, Group
164
+ pdata <- group %>%
165
+ left_join(metrics, by = "Sample") %>%
166
+ mutate(Group = factor(Group, levels = unique(Group)))
167
+
168
+ p <- BoxPlot(pdata, x = "Group", y = metric, x_text_angle = 90)
169
+ figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
170
+ png(figfile, height = 600, res = 100, width = max(length(unique(pdata$Group)) * 30 + 200, 400))
171
+ print(p)
172
+ dev.off()
173
+
174
+ reporter$add(
175
+ list(src = figfile, name = "By Group"),
176
+ ui = "table_of_images",
177
+ h1 = metric
178
+ )
179
+ }
180
+
181
+ reporter$save(joboutdir)
@@ -0,0 +1,112 @@
1
+ import hashlib
2
+ import shutil
3
+ import re
4
+ from contextlib import suppress
5
+ from pathlib import Path, PosixPath # noqa: F401
6
+ from biopipen.utils.misc import run_command
7
+
8
+ fastqs: list[Path] = {{in.fastqs | each: as_path}} # pyright: ignore # noqa
9
+ outdir: Path = Path({{out.outdir | quote}}) # pyright: ignore
10
+ id: str = {{out.outdir | basename | quote}} # pyright: ignore
11
+
12
+ cellranger: str = {{envs.cellranger | quote}} # pyright: ignore
13
+ tmpdir = Path({{envs.tmpdir | quote}}) # pyright: ignore
14
+ ref: str = {{envs.ref | quote}} # pyright: ignore
15
+ ncores: int = {{envs.ncores | int}} # pyright: ignore
16
+ outdir_is_mounted: bool = {{envs.outdir_is_mounted | repr}} # pyright: ignore
17
+ copy_outs_only: bool = {{envs.copy_outs_only | repr}} # pyright: ignore
18
+
19
+ # create a temporary unique directory to store the soft-linked fastq files
20
+ uid = hashlib.md5(str(fastqs).encode()).hexdigest()[:8]
21
+ fastqdir = tmpdir / f"cellranger_count_{uid}"
22
+ fastqdir.mkdir(parents=True, exist_ok=True)
23
+ if len(fastqs) == 1 and fastqs[0].is_dir():
24
+ fastqs = list(fastqs[0].glob("*.fastq.gz"))
25
+
26
+ # soft-link the fastq files to the temporary directory
27
+ for fastq in fastqs:
28
+ fastq = Path(fastq)
29
+ (fastqdir / fastq.name).symlink_to(fastq)
30
+
31
+ other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'reference', 'ref', 'tmpdir', 'id', 'ncores', 'outdir_is_mounted', 'copy_outs_only']}} # pyright: ignore
32
+
33
+ command = [
34
+ cellranger,
35
+ "vdj",
36
+ "--id",
37
+ id,
38
+ "--fastqs",
39
+ fastqdir,
40
+ "--reference",
41
+ Path(ref).resolve(),
42
+ "--localcores",
43
+ ncores,
44
+ "--disable-ui",
45
+ *other_args,
46
+ ]
47
+
48
+ version: str = run_command([cellranger, "--version"], stdout = "RETURN") # type: ignore
49
+ version = version.replace("cellranger", "").replace("-", "").strip() # type: ignore
50
+ print(f"# Detected cellranger version: {version}")
51
+
52
+ if outdir_is_mounted:
53
+ print("# Using mounted outdir, redirecting cellranger output to a local tmpdir")
54
+ local_outdir = tmpdir / f"{outdir.name}-{uid}" / id
55
+ if local_outdir.parent.exists():
56
+ shutil.rmtree(local_outdir.parent)
57
+ local_outdir.parent.mkdir(parents=True, exist_ok=True)
58
+ odir = local_outdir
59
+ else:
60
+ odir = outdir
61
+
62
+ run_command(command, fg=True, cwd=str(odir.parent))
63
+
64
+ web_summary_html = odir / "outs" / "web_summary.html"
65
+ if not web_summary_html.exists():
66
+ raise RuntimeError(
67
+ f"web_summary.html does not exist in {odir}/outs. "
68
+ "cellranger vdj failed."
69
+ )
70
+
71
+ # Modify web_summary.html to move javascript to a separate file
72
+ # to void vscode live server breaking the page by injecting some code
73
+ print("# Modify web_summary.html to move javascript to a separate file")
74
+ try:
75
+ web_summary_js = odir / "outs" / "web_summary.js"
76
+ web_summary_content = web_summary_html.read_text()
77
+ regex = re.compile(r"<script>(.+)</script>", re.DOTALL)
78
+ web_summary_html.write_text(regex.sub(
79
+ '<script src="web_summary.js"></script>',
80
+ web_summary_content,
81
+ ))
82
+ web_summary_js.write_text(regex.search(web_summary_content).group(1)) # type: ignore
83
+ except Exception as e:
84
+ print(f"Error modifying web_summary.html: {e}")
85
+ raise e
86
+
87
+ # If using local tmpdir for output, move results to the final outdir
88
+ if outdir_is_mounted:
89
+ print("# Copy results back to outdir")
90
+ if outdir.exists():
91
+ shutil.rmtree(outdir)
92
+
93
+ if copy_outs_only:
94
+ outdir.mkdir(parents=True, exist_ok=True)
95
+ with suppress(Exception):
96
+ # Some files may be failed to copy due to permission issues
97
+ # But the contents are actually copied
98
+ shutil.copytree(odir / "outs", outdir / "outs")
99
+ else:
100
+ with suppress(Exception):
101
+ shutil.copytree(local_outdir, outdir) # type: ignore
102
+
103
+ # Make sure essential files exist
104
+ web_summary_html = outdir / "outs" / "web_summary.html"
105
+ web_summary_js = outdir / "outs" / "web_summary.js"
106
+ filtered_annotations_csv = outdir / "outs" / "filtered_contig_annotations.csv"
107
+ for f in [web_summary_html, web_summary_js, filtered_annotations_csv]:
108
+ if not f.exists():
109
+ raise RuntimeError(
110
+ f"{f} does not exist in {outdir}/outs. "
111
+ "Copying results back from tmpdir failed."
112
+ )
@@ -1,11 +1,9 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
-
3
1
  library(AneuploidyScore)
4
2
  library(dplyr)
5
3
  library(tidyr)
6
4
  library(tibble)
7
- library(ggplot2)
8
- library(ggprism)
5
+ library(plotthis)
6
+ library(biopipen.utils)
9
7
 
10
8
  segfile = {{in.segfile | r}}
11
9
  outdir = {{out.outdir | r}}
@@ -59,7 +57,15 @@ getCAA <- function(segf, cytoarm, tcn_col,
59
57
  }
60
58
 
61
59
  ## Create a GRanges object with all unique intervals between segc and cytoc
62
- starts <- sort(c(GenomicRanges::start(segc), GenomicRanges::start(cytoc)))
60
+ starts <- tryCatch({
61
+ sort(c(GenomicRanges::start(segc), GenomicRanges::start(cytoc)))
62
+ }, error=function(e) {
63
+ warning("Error to detect start on chromosome: ", chr_id, immediate. = TRUE)
64
+ NULL
65
+ })
66
+ if (is.null(starts)) {
67
+ return(NULL)
68
+ }
63
69
  ends <- sort(c(GenomicRanges::end(segc), GenomicRanges::end(cytoc)))
64
70
  combc <- GRanges(seqnames=chr_id,
65
71
  IRanges(start=unique(sort(c(starts, ends[-length(ends)]+1))),
@@ -123,17 +129,36 @@ getCAA <- function(segf, cytoarm, tcn_col,
123
129
  return(combc_arms)
124
130
  })
125
131
  names(seg_cyto_chr) <- names(seg_chr)
126
-
132
+ seg_cyto_chr <- seg_cyto_chr[!sapply(seg_cyto_chr, is.null)]
127
133
  return(as(seg_cyto_chr, "GRangesList"))
128
134
  }
129
135
 
130
- segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
131
- seg = data.frame(
132
- seqnames = segments[, chrom_col],
133
- start = segments[, start_col],
134
- end = segments[, end_col],
135
- seg.mean = segments[, seg_col]
136
- )
136
+ if (endsWith(segfile, ".vcf") || endsWith(segfile, ".vcf.gz")) {
137
+ library(VariantAnnotation)
138
+ vcf = readVcf(segfile)
139
+ seg = data.frame(
140
+ seqnames = as.character(seqnames(vcf)),
141
+ start = start(vcf),
142
+ end = vcf@info[[end_col]],
143
+ seg.mean = vcf@info[[seg_col]]
144
+ )
145
+ } else if (endsWith(segfile, ".bed")) {
146
+ segments = read.table(segfile, header=F, row.names=NULL, sep="\t", stringsAsFactors=F)
147
+ seg = data.frame(
148
+ seqnames = segments[, 1],
149
+ start = segments[, 2],
150
+ end = segments[, 3],
151
+ seg.mean = segments[, 5]
152
+ )
153
+ } else {
154
+ segments = read.table(segfile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
155
+ seg = data.frame(
156
+ seqnames = segments[, chrom_col],
157
+ start = segments[, start_col],
158
+ end = segments[, end_col],
159
+ seg.mean = segments[, seg_col]
160
+ )
161
+ }
137
162
 
138
163
  {% if envs.segmean_transform %}
139
164
  segmean_transform = {{envs.segmean_transform}}
@@ -168,6 +193,10 @@ if (is.character(cn_transform)) {
168
193
  }
169
194
  {% endif %}
170
195
 
196
+ seg <- seg[
197
+ !is.na(seg$seg.mean) & !is.na(seg$TCN) & !is.infinite(seg$seg.mean) & !is.infinite(seg$TCN),,
198
+ drop=FALSE]
199
+
171
200
  write.table(seg, file.path(outdir, "seg.txt"), sep="\t", quote=F, row.names=F, col.names=T)
172
201
 
173
202
  wgd_ploidy = checkIfWGD(
@@ -227,11 +256,17 @@ sig_min = min(-1, plotdata$Signal, na.rm=TRUE)
227
256
  sig_max = max(1, plotdata$Signal, na.rm=TRUE)
228
257
 
229
258
  png(file.path(outdir, "AneuploidyScore.png"), width=1000, height=600, res=100)
230
- ggplot(plotdata) +
231
- geom_bar(aes(x=Arms, y=Signal, fill=Type), stat="identity") +
232
- geom_hline(yintercept=0, color="black", size=0.1) +
233
- ylim(c(sig_min, sig_max)) +
234
- theme_prism() +
235
- theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
236
- facet_wrap(~SignalType, scales="free_y", nrow=2)
259
+ p <- BarPlot(
260
+ plotdata,
261
+ x = "Arms",
262
+ y = "Signal",
263
+ fill = "Type",
264
+ facet_by = "SignalType",
265
+ facet_nrow = 2,
266
+ y_min = sig_min,
267
+ y_max = sig_max,
268
+ x_text_angle = 90,
269
+ aspect.ratio = 0.2
270
+ )
271
+ print(p)
237
272
  dev.off()