biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,433 +0,0 @@
1
- suppressPackageStartupMessages(library(rlang))
2
- suppressPackageStartupMessages(library(tidyselect))
3
- suppressPackageStartupMessages(library(dplyr))
4
-
5
- #' Get expanded, collapsed, emerged or vanished clones from a meta data frame
6
- #'
7
- #' @rdname Get expanded, collapsed, emerged or vanished clones
8
- #'
9
- #' @param df The meta data frame
10
- #' @param group.by The column name (without quotes) in metadata to group the
11
- #' cells.
12
- #' @param idents The groups of cells to compare (values in `group-by` column).
13
- #' Either length 1 (`ident_1`) or length 2 (`ident_1` and `ident_2`).
14
- #' If length 1, the rest of the cells with non-NA values in `group.by` will
15
- #' be used as `ident_2`.
16
- #' @param subset An expression to subset the cells, will be passed to
17
- #' `dplyr::filter()`. Default is `TRUE` (no filtering).
18
- #' @param id The column name (without quotes) in metadata for the
19
- #' group ids (i.e. `CDR3.aa`)
20
- #' @param compare Either a (numeric) column name (i.e. `Clones`, without quotes)
21
- #' in metadata to compare between groups, or `.n` to compare the
22
- #' number of cells in each group.
23
- #' @param fun The way to compare between groups. Either `"expanded"`,
24
- #' `"collapsed"`, `"emerged"` or `"vanished"`.
25
- #' @param uniq Whether to return unique ids or not. Default is `TRUE`.
26
- #' If `FALSE`, you can mutate the meta data frame with the returned ids.
27
- #' For example, `df %>% mutate(expanded = expanded(...))`.
28
- #' @param order The order of the returned ids. It could be `sum` or `diff`,
29
- #' which is the sum or diff of the `compare` between idents. Two kinds of
30
- #' modifiers can be added, including `desc` and `abs`. For example,
31
- #' `sum,desc` means the sum of `compare` between idents in descending order.
32
- #' Default is `diff,abs,desc`.
33
- #' It only works when `uniq` is `TRUE`. If `uniq` is `FALSE`, the returned
34
- #' ids will be in the same order as in `df`.
35
- #' @param include_emerged Whether to include emerged clones for the expanded clones.
36
- #' Default is `FALSE`. It only works for `"expanded"`.
37
- #' @param include_vanished Whether to include vanished clones for the collapsed clones.
38
- #' Default is `FALSE`. It only works for `"collapsed"`.
39
- #'
40
- #' @return A vector of expanded or collapsed clones (in `id` column)
41
- #' If uniq is `FALSE`, the vector will be the same length as `df`.
42
- #'
43
- #' @examples
44
- #' # Get expanded clones
45
- #' df <- tibble(
46
- #' Clones = c(10, 8, 1, 5, 9, 2, 3, 7, 6, 4, 9, 9),
47
- #' Source = c(
48
- #' "Tumor", "Normal", "Normal", "Normal", "Tumor", "Tumor",
49
- #' "Tumor", "Normal", "Normal", "Normal", NA, "X"
50
- #' ),
51
- #' CDR3.aa = c("A", "C", "B", "E", "D", "E", "E", "B", "B", "B", "A", "A")
52
- #' )
53
- #'
54
- #' expanded(df, Source, c("Tumor", "Normal"))
55
- #' # The transformed data frame looks like this:
56
- # CDR3.aa ..predicate ..sum ..diff
57
- # <chr> <lgl> <dbl> <dbl>
58
- # 1 A TRUE 10 10
59
- # 2 B FALSE 1 -1
60
- # 3 C FALSE 8 -8
61
- # 4 D TRUE 9 9
62
- # 5 E FALSE 7 -3
63
- #'
64
- #' # [1] "A" "D"
65
- #'
66
- #' # Get collapsed clones
67
- #' collapsed(df, Source, c("Tumor", "Normal"))
68
- #' # [1] "B" "C" "E"
69
- #'
70
- #' # Get emerged clones
71
- #' emerged(df, Source, c("Tumor", "Normal"))
72
- #' # [1] "A" "D"
73
- #'
74
- #' # Get vanished clones
75
- #' vanished(df, Source, c("Tumor", "Normal"))
76
- #' # [1] "B" "C"
77
- .size_compare <- function(
78
- df,
79
- group.by, # nolint
80
- idents,
81
- subset,
82
- id,
83
- compare,
84
- fun,
85
- uniq,
86
- order
87
- ) {
88
- if (length(idents) == 1) {
89
- ident_1 <- idents[1]
90
- ident_2 <- NULL
91
- } else if (length(idents) == 2) {
92
- ident_1 <- idents[1]
93
- ident_2 <- idents[2]
94
- } else {
95
- stop("idents must be length 1 or 2")
96
- }
97
- if (is.null(ident_2)) ident_2 <- "<NULL>"
98
-
99
- if (is_empty(attr(group.by, ".Environment"))) {
100
- # Works if a (quoted) string passed
101
- group.by <- sym(as_name(group.by))
102
- }
103
- if (is_empty(attr(id, ".Environment"))) {
104
- id <- sym(as_name(id))
105
- }
106
- if (is_empty(attr(compare, ".Environment"))) {
107
- compare <- sym(as_name(compare))
108
- }
109
- compare_label <- as_name(compare)
110
- compare_is_count <- compare_label == '.n'
111
-
112
- if (!as_name(group.by) %in% colnames(df)) {
113
- stop(paste0(
114
- '`group.by` must be a column name in df. Got "',
115
- as_name(group.by),
116
- '"'
117
- ))
118
- }
119
-
120
- if (!compare_is_count && !compare_label %in% colnames(df)) {
121
- stop(paste0(
122
- "`compare` must be either a column name in df, or 'count'/'n'. ",
123
- 'Got "',
124
- compare_label,
125
- '"'
126
- ))
127
- }
128
-
129
- predicate <- function(comp) {
130
- if (fun == "expanded") {
131
- comp[1] > comp[2] && comp[2] > 0
132
- } else if (fun == "expanded+") {
133
- comp[1] > comp[2]
134
- } else if (fun == "collapsed") {
135
- comp[1] < comp[2] && comp[1] > 0
136
- } else if (fun == "collapsed+") {
137
- comp[1] < comp[2]
138
- } else if (fun == "emerged") {
139
- comp[1] > 0 && comp[2] == 0
140
- } else if (fun == "vanished") {
141
- comp[1] == 0 && comp[2] > 0
142
- }
143
- }
144
-
145
- # subset the data frame
146
- trans <- df %>% dplyr::filter(!!subset) %>%
147
- # remove NA values in group.by column
148
- dplyr::filter(!is.na(!!group.by)) %>%
149
- # mark the group.by column (as ..group) as ident_1 or ident_2 or NA
150
- mutate(
151
- ..group = if_else(
152
- !!group.by == ident_1,
153
- "ident_1",
154
- if_else(ident_2 != "<NULL>" & !!group.by != ident_2, NA, "ident_2")
155
- )
156
- ) %>%
157
- # remove NA values in ..group column
158
- dplyr::filter(!is.na(..group)) %>%
159
- # for each clone and group (ident_1 and ident_2)
160
- group_by(!!id, ..group) %>%
161
- # summarise the number of cells in each clone and group
162
- # so that we can compare between groups later
163
- summarise(
164
- ..compare = ifelse(compare_is_count, n(), first(!!compare)),
165
- .groups = "drop"
166
- ) %>%
167
- # for each clone, either compare Clones or ..count between groups
168
- # (ident_1 and ident_2)
169
- group_by(!!id) %>%
170
- # add missing group (either ident_1 or ident_2)
171
- group_modify(function(d, ...) {
172
- if (nrow(d) == 1) {
173
- d <- d %>% add_row(
174
- ..group = ifelse(
175
- d$..group == "ident_1", "ident_2", "ident_1"
176
- ),
177
- ..compare = 0
178
- )
179
- }
180
- d
181
- }) %>%
182
- # make sure ident_1 and ident_2 are in order
183
- arrange(..group, .by_group = TRUE) %>%
184
- # add the predicates, sums and diffs
185
- summarise(
186
- ..predicate = predicate(..compare),
187
- ..sum = sum(..compare),
188
- ..diff = ..compare[1] - ..compare[2]
189
- ) %>%
190
- # filter the clones
191
- dplyr::filter(..predicate)
192
-
193
- order_sum <- grepl("sum", order)
194
- order_diff <- grepl("diff", order)
195
- order_desc <- grepl("desc", order)
196
- order_abs <- grepl("abs", order)
197
- if (order_sum && !order_desc) {
198
- out <- trans %>% arrange(..sum) %>% pull(!!id)
199
- } else if (order_sum) {
200
- out <- trans %>% arrange(desc(..sum)) %>% pull(!!id)
201
- } else if (order_diff && !order_desc && !order_abs) {
202
- out <- trans %>% arrange(..diff) %>% pull(!!id)
203
- } else if (order_diff && !order_desc && order_abs) {
204
- out <- trans %>% arrange(abs(..diff)) %>% pull(!!id)
205
- } else if (order_diff && order_desc && !order_abs) {
206
- out <- trans %>% arrange(desc(..diff)) %>% pull(!!id)
207
- } else if (order_diff && order_desc && order_abs) {
208
- out <- trans %>% arrange(desc(abs(..diff))) %>% pull(!!id)
209
- } else {
210
- out <- trans %>% pull(!!id)
211
- }
212
-
213
- if (uniq) { return(out) }
214
-
215
- df %>% mutate(..out = if_else(!!id %in% out, !!id, NA)) %>% pull(..out)
216
- }
217
-
218
- #' @export
219
- expanded <- function(
220
- df,
221
- group.by, # nolint
222
- idents,
223
- subset = TRUE,
224
- id = CDR3.aa,
225
- compare = Clones,
226
- uniq = TRUE,
227
- order = "diff+desc",
228
- include_emerged = FALSE
229
- ) {
230
- lbl <- as_label(enquo(df))
231
- if (length(lbl) == 1 && lbl == ".") {
232
- df <- across(everything())
233
- }
234
- fun = if (include_emerged) "expanded+" else "expanded"
235
- .size_compare(
236
- df,
237
- enquo(group.by),
238
- idents,
239
- enquo(subset),
240
- enquo(id),
241
- enquo(compare),
242
- fun,
243
- uniq = uniq,
244
- order = order
245
- )
246
- }
247
-
248
- #' @export
249
- collapsed <- function(
250
- df,
251
- group.by, # nolint
252
- idents,
253
- subset = TRUE,
254
- id = CDR3.aa,
255
- compare = Clones,
256
- uniq = TRUE,
257
- order = "diff+desc",
258
- include_vanished = FALSE
259
- ) {
260
- lbl <- as_label(enquo(df))
261
- if (length(lbl) == 1 && lbl == ".") {
262
- df <- across(everything())
263
- }
264
- fun = if (include_vanished) "collapsed+" else "collapsed"
265
- .size_compare(
266
- df,
267
- enquo(group.by),
268
- idents,
269
- enquo(subset),
270
- enquo(id),
271
- enquo(compare),
272
- fun,
273
- uniq = uniq,
274
- order = order
275
- )
276
- }
277
-
278
- #' @export
279
- emerged <- function(
280
- df,
281
- group.by, # nolint
282
- idents,
283
- subset = TRUE,
284
- id = CDR3.aa,
285
- compare = Clones,
286
- uniq = TRUE,
287
- order = "diff+desc"
288
- ) {
289
- lbl <- as_label(enquo(df))
290
- if (length(lbl) == 1 && lbl == ".") {
291
- df <- across(everything())
292
- }
293
- .size_compare(
294
- df,
295
- enquo(group.by),
296
- idents,
297
- enquo(subset),
298
- enquo(id),
299
- enquo(compare),
300
- "emerged",
301
- uniq = uniq,
302
- order = order
303
- )
304
- }
305
-
306
- #' @export
307
- vanished <- function(
308
- df,
309
- group.by, # nolint
310
- idents,
311
- subset = TRUE,
312
- id = CDR3.aa,
313
- compare = Clones,
314
- uniq = TRUE,
315
- order = "diff+desc"
316
- ) {
317
- lbl <- as_label(enquo(df))
318
- if (length(lbl) == 1 && lbl == ".") {
319
- df <- across(everything())
320
- }
321
- .size_compare(
322
- df,
323
- enquo(group.by),
324
- idents,
325
- enquo(subset),
326
- enquo(id),
327
- enquo(compare),
328
- "vanished",
329
- uniq = uniq,
330
- order = order
331
- )
332
- }
333
-
334
- #' Get paired entities from a data frame based on the other column
335
- #'
336
- #' @rdname Get paired entities
337
- #' @param df The data frame. Use `.` if the function is called in a dplyr pipe.
338
- #' @param id_col The column name in `df` for the ids to be returned in the
339
- #' final output
340
- #' @param compare_col The column name in `df` to compare the values for each
341
- #' id in `id_col`.
342
- #' @param idents The values in `compare_col` to compare. It could be either an
343
- #' an integer or a vector. If it is an integer, the number of values in
344
- #' `compare_col` must be the same as the integer for the `id` to be regarded
345
- #' as paired. If it is a vector, the values in `compare_col` must be the same
346
- #' as the values in `idents` for the `id` to be regarded as paired.
347
- #' @param uniq Whether to return unique ids or not. Default is `TRUE`.
348
- #' If `FALSE`, you can mutate the meta data frame with the returned ids.
349
- #' Non-paired ids will be `NA`.
350
- #' @return A vector of paired ids (in `id_col` column)
351
- #' @examples
352
- #' df <- tibble(
353
- #' id = c("A", "A", "B", "B", "C", "C", "D", "D"),
354
- #' compare = c(1, 2, 1, 1, 1, 2, 1, 2)
355
- #' )
356
- #' paired(df, id, compare, 2)
357
- #' # [1] "A" "B" "C" "D"
358
- #' paired(df, id, compare, c(1, 2))
359
- #' # [1] "A" "C" "D"
360
- #' paired(df, id, compare, c(1, 2), uniq = FALSE)
361
- #' # [1] "A" "A" NA NA "C" "C" "D" "D"
362
- #'
363
- paired <- function(
364
- df,
365
- id_col,
366
- compare_col,
367
- idents = 2,
368
- uniq = TRUE
369
- ) {
370
- lbl <- as_label(enquo(df))
371
- if (length(lbl) == 1 && lbl == ".") {
372
- df <- across(everything())
373
- }
374
-
375
- id_col <- enquo(id_col)
376
- compare_col <- enquo(compare_col)
377
- if (is_empty(attr(id_col, ".Environment"))) {
378
- id_col <- sym(as_name(id_col))
379
- }
380
- if (is_empty(attr(compare_col, ".Environment"))) {
381
- compare_col <- sym(as_name(compare_col))
382
- }
383
- if (!as_name(id_col) %in% colnames(df)) {
384
- stop(paste0(
385
- '`id_col` must be a column name in df. Got "',
386
- as_name(id_col),
387
- '"'
388
- ))
389
- }
390
- if (!as_name(compare_col) %in% colnames(df)) {
391
- stop(paste0(
392
- '`compare_col` must be a column name in df. Got "',
393
- as_name(compare_col),
394
- '"'
395
- ))
396
- }
397
-
398
- if (is.numeric(idents) && length(idents) == 1) {
399
- if (idents <= 1) {
400
- stop(paste0(
401
- '`idents` must be greater than 1. Got ',
402
- idents
403
- ))
404
- }
405
- out <- df %>%
406
- add_count(!!id_col, name = "..count") %>%
407
- mutate(..paired = if_else(..count == idents, !!id_col, NA))
408
- } else {
409
- if (length(idents) <= 1) {
410
- stop(paste0(
411
- '`idents` must be a vector with length greater than 1. Got ',
412
- length(idents)
413
- ))
414
- }
415
- out <- df %>%
416
- group_by(!!id_col) %>%
417
- mutate(
418
- ..paired = if_else(
419
- rep(setequal(!!compare_col, idents), n()),
420
- !!id_col,
421
- NA
422
- )
423
- ) %>%
424
- ungroup()
425
- }
426
-
427
- out <- out %>% pull(..paired)
428
- if (uniq) {
429
- return(out %>% na.omit() %>% unique() %>% as.vector())
430
- } else {
431
- return(out)
432
- }
433
- }
biopipen/utils/plot.R DELETED
@@ -1,173 +0,0 @@
1
- library(ggplot2)
2
- pdf(NULL) # preventing Rplots.pdf
3
-
4
- plotVenn = function(
5
- # A named list with elements,
6
- # e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
7
- data,
8
- # Arguments for `ggVennDiagram()`
9
- args = list(),
10
- # Extra ggplot components in string
11
- ggs = NULL,
12
- # Parameters for device (res, width, height) for `png()`
13
- devpars = list(res=100, width=1000, height=1000),
14
- # The output file. If NULL, will return the plot object
15
- outfile = NULL
16
- ) {
17
- library(ggVennDiagram)
18
-
19
- args$x = data
20
- p = do.call(ggVennDiagram, args)
21
- if (!is.null(ggs)) {
22
- for (gg in ggs) {
23
- if (is.character(gg)) {
24
- p = p + eval(parse(text=gg))
25
- } else {
26
- p = p + gg
27
- }
28
- }
29
- }
30
-
31
- if (is.null(outfile)) {
32
- return (p)
33
- } else {
34
- devpars$filename = outfile
35
- do.call(png, devpars)
36
- print(p)
37
- dev.off()
38
- }
39
- }
40
-
41
-
42
- plotGG = function(
43
- # A data frame (long format)
44
- data,
45
- # the geom
46
- geom,
47
- # Arguments for `geom_x()`
48
- args = list(),
49
- # Extra ggplot components in string
50
- ggs = NULL,
51
- # Parameters for device (res, width, height) for `png()`
52
- devpars = list(res=100, width=1000, height=1000),
53
- # The output file. If NULL, will return the plot object
54
- outfile = NULL
55
- ) {
56
-
57
- p = ggplot(data)
58
- p = p + do.call(paste0("geom_", geom), args)
59
- if (!is.null(ggs)) {
60
- for (gg in ggs) {
61
- if (is.character(gg)) {
62
- p = p + eval(parse(text=gg))
63
- } else {
64
- p = p + gg
65
- }
66
- }
67
- }
68
-
69
- if (is.null(outfile)) {
70
- return (p)
71
- } else {
72
- devpars$filename = outfile
73
- do.call(png, devpars)
74
- print(p)
75
- dev.off()
76
- }
77
- }
78
-
79
-
80
- plotViolin = function(
81
- # A data frame (long format)
82
- data,
83
- # Arguments for `geom_violin()`
84
- args = list(),
85
- # Extra ggplot components in string
86
- ggs = NULL,
87
- # Parameters for device (res, width, height) for `png()`
88
- devpars = list(res=100, width=1000, height=1000),
89
- # The output file. If NULL, will return the plot object
90
- outfile = NULL
91
- ) {
92
- plotGG(data, "violin", args, ggs, devpars, outfile)
93
- }
94
-
95
-
96
- plotUpset = function(
97
- # A named list with elements,
98
- # e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
99
- # Or a data frame
100
- # https://cran.r-project.org/web/packages/ggupset/readme/README.html
101
- data,
102
- # Arguments for `scale_x_upset()`
103
- args = list(),
104
- # Extra ggplot components in string
105
- ggs = "geom_bar(aes(x=V1))",
106
- # Parameters for device (res, width, height) for `png()`
107
- devpars = list(res=100, width=1000, height=1000),
108
- # The output file. If NULL, will return the plot object
109
- outfile = NULL
110
- ) {
111
- library(ggupset)
112
- library(tidyr)
113
- library(dplyr)
114
-
115
- if (!is.data.frame(data) && is.list(data)) {
116
- all_elems = unique(unlist(data))
117
- df = data.frame(ALL_ELEMS = all_elems)
118
- data = do.call(cbind, lapply(names(data), function(nd) {
119
- df[df$ALL_ELEMS %in% data[[nd]], nd] = nd
120
- df
121
- })) %>% select(-ALL_ELEMS) %>% unite("V1", sep="; ", na.rm = TRUE) %>%
122
- mutate(V1 = strsplit(V1, "; ", fixed=TRUE))
123
- }
124
-
125
- p = ggplot(data)
126
- for (gg in ggs) {
127
- if (is.character(gg)) {
128
- p = p + eval(parse(text=gg))
129
- } else {
130
- p = p + gg
131
- }
132
- }
133
- p = p + do.call(scale_x_upset, args)
134
-
135
- if (is.null(outfile)) {
136
- return (p)
137
- } else {
138
- devpars$filename = outfile
139
- do.call(png, devpars)
140
- print(p)
141
- dev.off()
142
- }
143
- }
144
-
145
- plotHeatmap = function(
146
- # Data matrix
147
- data,
148
- # Arguments for `ComplexHeatmap::Heatmap()`
149
- args = list(),
150
- # Other arguments for `ComplexHeatmap::draw()`
151
- draw = list(),
152
- # Parameters for device (res, width, height) for `png()`
153
- devpars = NULL,
154
- # The output file. If NULL, will return the plot object
155
- # If "draw", will call `ComplexHeatmap::draw()`
156
- outfile = NULL
157
- ) {
158
- library(ComplexHeatmap)
159
-
160
- args$matrix = as.matrix(data)
161
- hm = do.call(Heatmap, args)
162
-
163
- if (is.null(outfile)) {
164
- return(hm)
165
- } else if (outfile == "draw") {
166
- do.call(ComplexHeatmap::draw, c(list(hm), draw))
167
- } else {
168
- devpars$filename = outfile
169
- do.call(png, devpars)
170
- do.call(ComplexHeatmap::draw, c(list(hm), draw))
171
- dev.off()
172
- }
173
- }
biopipen/utils/rnaseq.R DELETED
@@ -1,48 +0,0 @@
1
-
2
- .normUnit = function(unit) {
3
- if ("count" %in% unit) {
4
- return("count")
5
- }
6
- return(unit)
7
- }
8
-
9
- glenFromGFFExons = function(exonfile) {
10
- gff = read.table(exonfile, header = F, row.names = NULL)
11
- # V4: start, V5: end, V10: gene name
12
- glen = aggregate(V5-V4+1 ~ V10, gff, sum)
13
- genes = glen[,1]
14
- glen = glen[,-1,drop=TRUE]
15
- names(glen) = genes
16
- return(glen)
17
- }
18
-
19
- count2tpm = function(x, args) {
20
- if (is.null(args$genelen)) {
21
- stop("Gene lengths are required to convert count to TPM.")
22
- }
23
- glengenes = names(args$genelen)
24
- mygenes = rownames(x)
25
- missing = setdiff(mygenes, glengenes)
26
- warning(paste(length(missing), "gene cannot be found in gene length data"))
27
- warning(paste(missing, sep=", "))
28
-
29
- genes = intersect(mygenes, glengenes)
30
- x = x[genes, , drop=FALSE]
31
-
32
- # see: https://gist.github.com/slowkow/c6ab0348747f86e2748b
33
- # and https://support.bioconductor.org/p/91218/
34
- out = x / unlist(args$genelen[genes])
35
- out = t(t(out) * 1e6 / colSums(out))
36
- rownames(out) = genes
37
- colnames(out) = colnames(x)
38
-
39
- return(out)
40
- }
41
-
42
-
43
- unit_conversion = function(x, inunit, outunit, args=list()) {
44
- inunit = .normUnit(inunit)
45
- outunit = .normUnit(outunit)
46
- func = get(paste0(inunit, "2", outunit))
47
- func(x, args)
48
- }