biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
biopipen/utils/mutate_helpers.R
DELETED
|
@@ -1,433 +0,0 @@
|
|
|
1
|
-
suppressPackageStartupMessages(library(rlang))
|
|
2
|
-
suppressPackageStartupMessages(library(tidyselect))
|
|
3
|
-
suppressPackageStartupMessages(library(dplyr))
|
|
4
|
-
|
|
5
|
-
#' Get expanded, collapsed, emerged or vanished clones from a meta data frame
|
|
6
|
-
#'
|
|
7
|
-
#' @rdname Get expanded, collapsed, emerged or vanished clones
|
|
8
|
-
#'
|
|
9
|
-
#' @param df The meta data frame
|
|
10
|
-
#' @param group.by The column name (without quotes) in metadata to group the
|
|
11
|
-
#' cells.
|
|
12
|
-
#' @param idents The groups of cells to compare (values in `group-by` column).
|
|
13
|
-
#' Either length 1 (`ident_1`) or length 2 (`ident_1` and `ident_2`).
|
|
14
|
-
#' If length 1, the rest of the cells with non-NA values in `group.by` will
|
|
15
|
-
#' be used as `ident_2`.
|
|
16
|
-
#' @param subset An expression to subset the cells, will be passed to
|
|
17
|
-
#' `dplyr::filter()`. Default is `TRUE` (no filtering).
|
|
18
|
-
#' @param id The column name (without quotes) in metadata for the
|
|
19
|
-
#' group ids (i.e. `CDR3.aa`)
|
|
20
|
-
#' @param compare Either a (numeric) column name (i.e. `Clones`, without quotes)
|
|
21
|
-
#' in metadata to compare between groups, or `.n` to compare the
|
|
22
|
-
#' number of cells in each group.
|
|
23
|
-
#' @param fun The way to compare between groups. Either `"expanded"`,
|
|
24
|
-
#' `"collapsed"`, `"emerged"` or `"vanished"`.
|
|
25
|
-
#' @param uniq Whether to return unique ids or not. Default is `TRUE`.
|
|
26
|
-
#' If `FALSE`, you can mutate the meta data frame with the returned ids.
|
|
27
|
-
#' For example, `df %>% mutate(expanded = expanded(...))`.
|
|
28
|
-
#' @param order The order of the returned ids. It could be `sum` or `diff`,
|
|
29
|
-
#' which is the sum or diff of the `compare` between idents. Two kinds of
|
|
30
|
-
#' modifiers can be added, including `desc` and `abs`. For example,
|
|
31
|
-
#' `sum,desc` means the sum of `compare` between idents in descending order.
|
|
32
|
-
#' Default is `diff,abs,desc`.
|
|
33
|
-
#' It only works when `uniq` is `TRUE`. If `uniq` is `FALSE`, the returned
|
|
34
|
-
#' ids will be in the same order as in `df`.
|
|
35
|
-
#' @param include_emerged Whether to include emerged clones for the expanded clones.
|
|
36
|
-
#' Default is `FALSE`. It only works for `"expanded"`.
|
|
37
|
-
#' @param include_vanished Whether to include vanished clones for the collapsed clones.
|
|
38
|
-
#' Default is `FALSE`. It only works for `"collapsed"`.
|
|
39
|
-
#'
|
|
40
|
-
#' @return A vector of expanded or collapsed clones (in `id` column)
|
|
41
|
-
#' If uniq is `FALSE`, the vector will be the same length as `df`.
|
|
42
|
-
#'
|
|
43
|
-
#' @examples
|
|
44
|
-
#' # Get expanded clones
|
|
45
|
-
#' df <- tibble(
|
|
46
|
-
#' Clones = c(10, 8, 1, 5, 9, 2, 3, 7, 6, 4, 9, 9),
|
|
47
|
-
#' Source = c(
|
|
48
|
-
#' "Tumor", "Normal", "Normal", "Normal", "Tumor", "Tumor",
|
|
49
|
-
#' "Tumor", "Normal", "Normal", "Normal", NA, "X"
|
|
50
|
-
#' ),
|
|
51
|
-
#' CDR3.aa = c("A", "C", "B", "E", "D", "E", "E", "B", "B", "B", "A", "A")
|
|
52
|
-
#' )
|
|
53
|
-
#'
|
|
54
|
-
#' expanded(df, Source, c("Tumor", "Normal"))
|
|
55
|
-
#' # The transformed data frame looks like this:
|
|
56
|
-
# CDR3.aa ..predicate ..sum ..diff
|
|
57
|
-
# <chr> <lgl> <dbl> <dbl>
|
|
58
|
-
# 1 A TRUE 10 10
|
|
59
|
-
# 2 B FALSE 1 -1
|
|
60
|
-
# 3 C FALSE 8 -8
|
|
61
|
-
# 4 D TRUE 9 9
|
|
62
|
-
# 5 E FALSE 7 -3
|
|
63
|
-
#'
|
|
64
|
-
#' # [1] "A" "D"
|
|
65
|
-
#'
|
|
66
|
-
#' # Get collapsed clones
|
|
67
|
-
#' collapsed(df, Source, c("Tumor", "Normal"))
|
|
68
|
-
#' # [1] "B" "C" "E"
|
|
69
|
-
#'
|
|
70
|
-
#' # Get emerged clones
|
|
71
|
-
#' emerged(df, Source, c("Tumor", "Normal"))
|
|
72
|
-
#' # [1] "A" "D"
|
|
73
|
-
#'
|
|
74
|
-
#' # Get vanished clones
|
|
75
|
-
#' vanished(df, Source, c("Tumor", "Normal"))
|
|
76
|
-
#' # [1] "B" "C"
|
|
77
|
-
.size_compare <- function(
|
|
78
|
-
df,
|
|
79
|
-
group.by, # nolint
|
|
80
|
-
idents,
|
|
81
|
-
subset,
|
|
82
|
-
id,
|
|
83
|
-
compare,
|
|
84
|
-
fun,
|
|
85
|
-
uniq,
|
|
86
|
-
order
|
|
87
|
-
) {
|
|
88
|
-
if (length(idents) == 1) {
|
|
89
|
-
ident_1 <- idents[1]
|
|
90
|
-
ident_2 <- NULL
|
|
91
|
-
} else if (length(idents) == 2) {
|
|
92
|
-
ident_1 <- idents[1]
|
|
93
|
-
ident_2 <- idents[2]
|
|
94
|
-
} else {
|
|
95
|
-
stop("idents must be length 1 or 2")
|
|
96
|
-
}
|
|
97
|
-
if (is.null(ident_2)) ident_2 <- "<NULL>"
|
|
98
|
-
|
|
99
|
-
if (is_empty(attr(group.by, ".Environment"))) {
|
|
100
|
-
# Works if a (quoted) string passed
|
|
101
|
-
group.by <- sym(as_name(group.by))
|
|
102
|
-
}
|
|
103
|
-
if (is_empty(attr(id, ".Environment"))) {
|
|
104
|
-
id <- sym(as_name(id))
|
|
105
|
-
}
|
|
106
|
-
if (is_empty(attr(compare, ".Environment"))) {
|
|
107
|
-
compare <- sym(as_name(compare))
|
|
108
|
-
}
|
|
109
|
-
compare_label <- as_name(compare)
|
|
110
|
-
compare_is_count <- compare_label == '.n'
|
|
111
|
-
|
|
112
|
-
if (!as_name(group.by) %in% colnames(df)) {
|
|
113
|
-
stop(paste0(
|
|
114
|
-
'`group.by` must be a column name in df. Got "',
|
|
115
|
-
as_name(group.by),
|
|
116
|
-
'"'
|
|
117
|
-
))
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
if (!compare_is_count && !compare_label %in% colnames(df)) {
|
|
121
|
-
stop(paste0(
|
|
122
|
-
"`compare` must be either a column name in df, or 'count'/'n'. ",
|
|
123
|
-
'Got "',
|
|
124
|
-
compare_label,
|
|
125
|
-
'"'
|
|
126
|
-
))
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
predicate <- function(comp) {
|
|
130
|
-
if (fun == "expanded") {
|
|
131
|
-
comp[1] > comp[2] && comp[2] > 0
|
|
132
|
-
} else if (fun == "expanded+") {
|
|
133
|
-
comp[1] > comp[2]
|
|
134
|
-
} else if (fun == "collapsed") {
|
|
135
|
-
comp[1] < comp[2] && comp[1] > 0
|
|
136
|
-
} else if (fun == "collapsed+") {
|
|
137
|
-
comp[1] < comp[2]
|
|
138
|
-
} else if (fun == "emerged") {
|
|
139
|
-
comp[1] > 0 && comp[2] == 0
|
|
140
|
-
} else if (fun == "vanished") {
|
|
141
|
-
comp[1] == 0 && comp[2] > 0
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
# subset the data frame
|
|
146
|
-
trans <- df %>% dplyr::filter(!!subset) %>%
|
|
147
|
-
# remove NA values in group.by column
|
|
148
|
-
dplyr::filter(!is.na(!!group.by)) %>%
|
|
149
|
-
# mark the group.by column (as ..group) as ident_1 or ident_2 or NA
|
|
150
|
-
mutate(
|
|
151
|
-
..group = if_else(
|
|
152
|
-
!!group.by == ident_1,
|
|
153
|
-
"ident_1",
|
|
154
|
-
if_else(ident_2 != "<NULL>" & !!group.by != ident_2, NA, "ident_2")
|
|
155
|
-
)
|
|
156
|
-
) %>%
|
|
157
|
-
# remove NA values in ..group column
|
|
158
|
-
dplyr::filter(!is.na(..group)) %>%
|
|
159
|
-
# for each clone and group (ident_1 and ident_2)
|
|
160
|
-
group_by(!!id, ..group) %>%
|
|
161
|
-
# summarise the number of cells in each clone and group
|
|
162
|
-
# so that we can compare between groups later
|
|
163
|
-
summarise(
|
|
164
|
-
..compare = ifelse(compare_is_count, n(), first(!!compare)),
|
|
165
|
-
.groups = "drop"
|
|
166
|
-
) %>%
|
|
167
|
-
# for each clone, either compare Clones or ..count between groups
|
|
168
|
-
# (ident_1 and ident_2)
|
|
169
|
-
group_by(!!id) %>%
|
|
170
|
-
# add missing group (either ident_1 or ident_2)
|
|
171
|
-
group_modify(function(d, ...) {
|
|
172
|
-
if (nrow(d) == 1) {
|
|
173
|
-
d <- d %>% add_row(
|
|
174
|
-
..group = ifelse(
|
|
175
|
-
d$..group == "ident_1", "ident_2", "ident_1"
|
|
176
|
-
),
|
|
177
|
-
..compare = 0
|
|
178
|
-
)
|
|
179
|
-
}
|
|
180
|
-
d
|
|
181
|
-
}) %>%
|
|
182
|
-
# make sure ident_1 and ident_2 are in order
|
|
183
|
-
arrange(..group, .by_group = TRUE) %>%
|
|
184
|
-
# add the predicates, sums and diffs
|
|
185
|
-
summarise(
|
|
186
|
-
..predicate = predicate(..compare),
|
|
187
|
-
..sum = sum(..compare),
|
|
188
|
-
..diff = ..compare[1] - ..compare[2]
|
|
189
|
-
) %>%
|
|
190
|
-
# filter the clones
|
|
191
|
-
dplyr::filter(..predicate)
|
|
192
|
-
|
|
193
|
-
order_sum <- grepl("sum", order)
|
|
194
|
-
order_diff <- grepl("diff", order)
|
|
195
|
-
order_desc <- grepl("desc", order)
|
|
196
|
-
order_abs <- grepl("abs", order)
|
|
197
|
-
if (order_sum && !order_desc) {
|
|
198
|
-
out <- trans %>% arrange(..sum) %>% pull(!!id)
|
|
199
|
-
} else if (order_sum) {
|
|
200
|
-
out <- trans %>% arrange(desc(..sum)) %>% pull(!!id)
|
|
201
|
-
} else if (order_diff && !order_desc && !order_abs) {
|
|
202
|
-
out <- trans %>% arrange(..diff) %>% pull(!!id)
|
|
203
|
-
} else if (order_diff && !order_desc && order_abs) {
|
|
204
|
-
out <- trans %>% arrange(abs(..diff)) %>% pull(!!id)
|
|
205
|
-
} else if (order_diff && order_desc && !order_abs) {
|
|
206
|
-
out <- trans %>% arrange(desc(..diff)) %>% pull(!!id)
|
|
207
|
-
} else if (order_diff && order_desc && order_abs) {
|
|
208
|
-
out <- trans %>% arrange(desc(abs(..diff))) %>% pull(!!id)
|
|
209
|
-
} else {
|
|
210
|
-
out <- trans %>% pull(!!id)
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
if (uniq) { return(out) }
|
|
214
|
-
|
|
215
|
-
df %>% mutate(..out = if_else(!!id %in% out, !!id, NA)) %>% pull(..out)
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
#' @export
|
|
219
|
-
expanded <- function(
|
|
220
|
-
df,
|
|
221
|
-
group.by, # nolint
|
|
222
|
-
idents,
|
|
223
|
-
subset = TRUE,
|
|
224
|
-
id = CDR3.aa,
|
|
225
|
-
compare = Clones,
|
|
226
|
-
uniq = TRUE,
|
|
227
|
-
order = "diff+desc",
|
|
228
|
-
include_emerged = FALSE
|
|
229
|
-
) {
|
|
230
|
-
lbl <- as_label(enquo(df))
|
|
231
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
232
|
-
df <- across(everything())
|
|
233
|
-
}
|
|
234
|
-
fun = if (include_emerged) "expanded+" else "expanded"
|
|
235
|
-
.size_compare(
|
|
236
|
-
df,
|
|
237
|
-
enquo(group.by),
|
|
238
|
-
idents,
|
|
239
|
-
enquo(subset),
|
|
240
|
-
enquo(id),
|
|
241
|
-
enquo(compare),
|
|
242
|
-
fun,
|
|
243
|
-
uniq = uniq,
|
|
244
|
-
order = order
|
|
245
|
-
)
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
#' @export
|
|
249
|
-
collapsed <- function(
|
|
250
|
-
df,
|
|
251
|
-
group.by, # nolint
|
|
252
|
-
idents,
|
|
253
|
-
subset = TRUE,
|
|
254
|
-
id = CDR3.aa,
|
|
255
|
-
compare = Clones,
|
|
256
|
-
uniq = TRUE,
|
|
257
|
-
order = "diff+desc",
|
|
258
|
-
include_vanished = FALSE
|
|
259
|
-
) {
|
|
260
|
-
lbl <- as_label(enquo(df))
|
|
261
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
262
|
-
df <- across(everything())
|
|
263
|
-
}
|
|
264
|
-
fun = if (include_vanished) "collapsed+" else "collapsed"
|
|
265
|
-
.size_compare(
|
|
266
|
-
df,
|
|
267
|
-
enquo(group.by),
|
|
268
|
-
idents,
|
|
269
|
-
enquo(subset),
|
|
270
|
-
enquo(id),
|
|
271
|
-
enquo(compare),
|
|
272
|
-
fun,
|
|
273
|
-
uniq = uniq,
|
|
274
|
-
order = order
|
|
275
|
-
)
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
#' @export
|
|
279
|
-
emerged <- function(
|
|
280
|
-
df,
|
|
281
|
-
group.by, # nolint
|
|
282
|
-
idents,
|
|
283
|
-
subset = TRUE,
|
|
284
|
-
id = CDR3.aa,
|
|
285
|
-
compare = Clones,
|
|
286
|
-
uniq = TRUE,
|
|
287
|
-
order = "diff+desc"
|
|
288
|
-
) {
|
|
289
|
-
lbl <- as_label(enquo(df))
|
|
290
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
291
|
-
df <- across(everything())
|
|
292
|
-
}
|
|
293
|
-
.size_compare(
|
|
294
|
-
df,
|
|
295
|
-
enquo(group.by),
|
|
296
|
-
idents,
|
|
297
|
-
enquo(subset),
|
|
298
|
-
enquo(id),
|
|
299
|
-
enquo(compare),
|
|
300
|
-
"emerged",
|
|
301
|
-
uniq = uniq,
|
|
302
|
-
order = order
|
|
303
|
-
)
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
#' @export
|
|
307
|
-
vanished <- function(
|
|
308
|
-
df,
|
|
309
|
-
group.by, # nolint
|
|
310
|
-
idents,
|
|
311
|
-
subset = TRUE,
|
|
312
|
-
id = CDR3.aa,
|
|
313
|
-
compare = Clones,
|
|
314
|
-
uniq = TRUE,
|
|
315
|
-
order = "diff+desc"
|
|
316
|
-
) {
|
|
317
|
-
lbl <- as_label(enquo(df))
|
|
318
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
319
|
-
df <- across(everything())
|
|
320
|
-
}
|
|
321
|
-
.size_compare(
|
|
322
|
-
df,
|
|
323
|
-
enquo(group.by),
|
|
324
|
-
idents,
|
|
325
|
-
enquo(subset),
|
|
326
|
-
enquo(id),
|
|
327
|
-
enquo(compare),
|
|
328
|
-
"vanished",
|
|
329
|
-
uniq = uniq,
|
|
330
|
-
order = order
|
|
331
|
-
)
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
#' Get paired entities from a data frame based on the other column
|
|
335
|
-
#'
|
|
336
|
-
#' @rdname Get paired entities
|
|
337
|
-
#' @param df The data frame. Use `.` if the function is called in a dplyr pipe.
|
|
338
|
-
#' @param id_col The column name in `df` for the ids to be returned in the
|
|
339
|
-
#' final output
|
|
340
|
-
#' @param compare_col The column name in `df` to compare the values for each
|
|
341
|
-
#' id in `id_col`.
|
|
342
|
-
#' @param idents The values in `compare_col` to compare. It could be either an
|
|
343
|
-
#' an integer or a vector. If it is an integer, the number of values in
|
|
344
|
-
#' `compare_col` must be the same as the integer for the `id` to be regarded
|
|
345
|
-
#' as paired. If it is a vector, the values in `compare_col` must be the same
|
|
346
|
-
#' as the values in `idents` for the `id` to be regarded as paired.
|
|
347
|
-
#' @param uniq Whether to return unique ids or not. Default is `TRUE`.
|
|
348
|
-
#' If `FALSE`, you can mutate the meta data frame with the returned ids.
|
|
349
|
-
#' Non-paired ids will be `NA`.
|
|
350
|
-
#' @return A vector of paired ids (in `id_col` column)
|
|
351
|
-
#' @examples
|
|
352
|
-
#' df <- tibble(
|
|
353
|
-
#' id = c("A", "A", "B", "B", "C", "C", "D", "D"),
|
|
354
|
-
#' compare = c(1, 2, 1, 1, 1, 2, 1, 2)
|
|
355
|
-
#' )
|
|
356
|
-
#' paired(df, id, compare, 2)
|
|
357
|
-
#' # [1] "A" "B" "C" "D"
|
|
358
|
-
#' paired(df, id, compare, c(1, 2))
|
|
359
|
-
#' # [1] "A" "C" "D"
|
|
360
|
-
#' paired(df, id, compare, c(1, 2), uniq = FALSE)
|
|
361
|
-
#' # [1] "A" "A" NA NA "C" "C" "D" "D"
|
|
362
|
-
#'
|
|
363
|
-
paired <- function(
|
|
364
|
-
df,
|
|
365
|
-
id_col,
|
|
366
|
-
compare_col,
|
|
367
|
-
idents = 2,
|
|
368
|
-
uniq = TRUE
|
|
369
|
-
) {
|
|
370
|
-
lbl <- as_label(enquo(df))
|
|
371
|
-
if (length(lbl) == 1 && lbl == ".") {
|
|
372
|
-
df <- across(everything())
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
id_col <- enquo(id_col)
|
|
376
|
-
compare_col <- enquo(compare_col)
|
|
377
|
-
if (is_empty(attr(id_col, ".Environment"))) {
|
|
378
|
-
id_col <- sym(as_name(id_col))
|
|
379
|
-
}
|
|
380
|
-
if (is_empty(attr(compare_col, ".Environment"))) {
|
|
381
|
-
compare_col <- sym(as_name(compare_col))
|
|
382
|
-
}
|
|
383
|
-
if (!as_name(id_col) %in% colnames(df)) {
|
|
384
|
-
stop(paste0(
|
|
385
|
-
'`id_col` must be a column name in df. Got "',
|
|
386
|
-
as_name(id_col),
|
|
387
|
-
'"'
|
|
388
|
-
))
|
|
389
|
-
}
|
|
390
|
-
if (!as_name(compare_col) %in% colnames(df)) {
|
|
391
|
-
stop(paste0(
|
|
392
|
-
'`compare_col` must be a column name in df. Got "',
|
|
393
|
-
as_name(compare_col),
|
|
394
|
-
'"'
|
|
395
|
-
))
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
if (is.numeric(idents) && length(idents) == 1) {
|
|
399
|
-
if (idents <= 1) {
|
|
400
|
-
stop(paste0(
|
|
401
|
-
'`idents` must be greater than 1. Got ',
|
|
402
|
-
idents
|
|
403
|
-
))
|
|
404
|
-
}
|
|
405
|
-
out <- df %>%
|
|
406
|
-
add_count(!!id_col, name = "..count") %>%
|
|
407
|
-
mutate(..paired = if_else(..count == idents, !!id_col, NA))
|
|
408
|
-
} else {
|
|
409
|
-
if (length(idents) <= 1) {
|
|
410
|
-
stop(paste0(
|
|
411
|
-
'`idents` must be a vector with length greater than 1. Got ',
|
|
412
|
-
length(idents)
|
|
413
|
-
))
|
|
414
|
-
}
|
|
415
|
-
out <- df %>%
|
|
416
|
-
group_by(!!id_col) %>%
|
|
417
|
-
mutate(
|
|
418
|
-
..paired = if_else(
|
|
419
|
-
rep(setequal(!!compare_col, idents), n()),
|
|
420
|
-
!!id_col,
|
|
421
|
-
NA
|
|
422
|
-
)
|
|
423
|
-
) %>%
|
|
424
|
-
ungroup()
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
out <- out %>% pull(..paired)
|
|
428
|
-
if (uniq) {
|
|
429
|
-
return(out %>% na.omit() %>% unique() %>% as.vector())
|
|
430
|
-
} else {
|
|
431
|
-
return(out)
|
|
432
|
-
}
|
|
433
|
-
}
|
biopipen/utils/plot.R
DELETED
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
library(ggplot2)
|
|
2
|
-
pdf(NULL) # preventing Rplots.pdf
|
|
3
|
-
|
|
4
|
-
plotVenn = function(
|
|
5
|
-
# A named list with elements,
|
|
6
|
-
# e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
|
|
7
|
-
data,
|
|
8
|
-
# Arguments for `ggVennDiagram()`
|
|
9
|
-
args = list(),
|
|
10
|
-
# Extra ggplot components in string
|
|
11
|
-
ggs = NULL,
|
|
12
|
-
# Parameters for device (res, width, height) for `png()`
|
|
13
|
-
devpars = list(res=100, width=1000, height=1000),
|
|
14
|
-
# The output file. If NULL, will return the plot object
|
|
15
|
-
outfile = NULL
|
|
16
|
-
) {
|
|
17
|
-
library(ggVennDiagram)
|
|
18
|
-
|
|
19
|
-
args$x = data
|
|
20
|
-
p = do.call(ggVennDiagram, args)
|
|
21
|
-
if (!is.null(ggs)) {
|
|
22
|
-
for (gg in ggs) {
|
|
23
|
-
if (is.character(gg)) {
|
|
24
|
-
p = p + eval(parse(text=gg))
|
|
25
|
-
} else {
|
|
26
|
-
p = p + gg
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
if (is.null(outfile)) {
|
|
32
|
-
return (p)
|
|
33
|
-
} else {
|
|
34
|
-
devpars$filename = outfile
|
|
35
|
-
do.call(png, devpars)
|
|
36
|
-
print(p)
|
|
37
|
-
dev.off()
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
plotGG = function(
|
|
43
|
-
# A data frame (long format)
|
|
44
|
-
data,
|
|
45
|
-
# the geom
|
|
46
|
-
geom,
|
|
47
|
-
# Arguments for `geom_x()`
|
|
48
|
-
args = list(),
|
|
49
|
-
# Extra ggplot components in string
|
|
50
|
-
ggs = NULL,
|
|
51
|
-
# Parameters for device (res, width, height) for `png()`
|
|
52
|
-
devpars = list(res=100, width=1000, height=1000),
|
|
53
|
-
# The output file. If NULL, will return the plot object
|
|
54
|
-
outfile = NULL
|
|
55
|
-
) {
|
|
56
|
-
|
|
57
|
-
p = ggplot(data)
|
|
58
|
-
p = p + do.call(paste0("geom_", geom), args)
|
|
59
|
-
if (!is.null(ggs)) {
|
|
60
|
-
for (gg in ggs) {
|
|
61
|
-
if (is.character(gg)) {
|
|
62
|
-
p = p + eval(parse(text=gg))
|
|
63
|
-
} else {
|
|
64
|
-
p = p + gg
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
if (is.null(outfile)) {
|
|
70
|
-
return (p)
|
|
71
|
-
} else {
|
|
72
|
-
devpars$filename = outfile
|
|
73
|
-
do.call(png, devpars)
|
|
74
|
-
print(p)
|
|
75
|
-
dev.off()
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
plotViolin = function(
|
|
81
|
-
# A data frame (long format)
|
|
82
|
-
data,
|
|
83
|
-
# Arguments for `geom_violin()`
|
|
84
|
-
args = list(),
|
|
85
|
-
# Extra ggplot components in string
|
|
86
|
-
ggs = NULL,
|
|
87
|
-
# Parameters for device (res, width, height) for `png()`
|
|
88
|
-
devpars = list(res=100, width=1000, height=1000),
|
|
89
|
-
# The output file. If NULL, will return the plot object
|
|
90
|
-
outfile = NULL
|
|
91
|
-
) {
|
|
92
|
-
plotGG(data, "violin", args, ggs, devpars, outfile)
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
plotUpset = function(
|
|
97
|
-
# A named list with elements,
|
|
98
|
-
# e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
|
|
99
|
-
# Or a data frame
|
|
100
|
-
# https://cran.r-project.org/web/packages/ggupset/readme/README.html
|
|
101
|
-
data,
|
|
102
|
-
# Arguments for `scale_x_upset()`
|
|
103
|
-
args = list(),
|
|
104
|
-
# Extra ggplot components in string
|
|
105
|
-
ggs = "geom_bar(aes(x=V1))",
|
|
106
|
-
# Parameters for device (res, width, height) for `png()`
|
|
107
|
-
devpars = list(res=100, width=1000, height=1000),
|
|
108
|
-
# The output file. If NULL, will return the plot object
|
|
109
|
-
outfile = NULL
|
|
110
|
-
) {
|
|
111
|
-
library(ggupset)
|
|
112
|
-
library(tidyr)
|
|
113
|
-
library(dplyr)
|
|
114
|
-
|
|
115
|
-
if (!is.data.frame(data) && is.list(data)) {
|
|
116
|
-
all_elems = unique(unlist(data))
|
|
117
|
-
df = data.frame(ALL_ELEMS = all_elems)
|
|
118
|
-
data = do.call(cbind, lapply(names(data), function(nd) {
|
|
119
|
-
df[df$ALL_ELEMS %in% data[[nd]], nd] = nd
|
|
120
|
-
df
|
|
121
|
-
})) %>% select(-ALL_ELEMS) %>% unite("V1", sep="; ", na.rm = TRUE) %>%
|
|
122
|
-
mutate(V1 = strsplit(V1, "; ", fixed=TRUE))
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
p = ggplot(data)
|
|
126
|
-
for (gg in ggs) {
|
|
127
|
-
if (is.character(gg)) {
|
|
128
|
-
p = p + eval(parse(text=gg))
|
|
129
|
-
} else {
|
|
130
|
-
p = p + gg
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
p = p + do.call(scale_x_upset, args)
|
|
134
|
-
|
|
135
|
-
if (is.null(outfile)) {
|
|
136
|
-
return (p)
|
|
137
|
-
} else {
|
|
138
|
-
devpars$filename = outfile
|
|
139
|
-
do.call(png, devpars)
|
|
140
|
-
print(p)
|
|
141
|
-
dev.off()
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
plotHeatmap = function(
|
|
146
|
-
# Data matrix
|
|
147
|
-
data,
|
|
148
|
-
# Arguments for `ComplexHeatmap::Heatmap()`
|
|
149
|
-
args = list(),
|
|
150
|
-
# Other arguments for `ComplexHeatmap::draw()`
|
|
151
|
-
draw = list(),
|
|
152
|
-
# Parameters for device (res, width, height) for `png()`
|
|
153
|
-
devpars = NULL,
|
|
154
|
-
# The output file. If NULL, will return the plot object
|
|
155
|
-
# If "draw", will call `ComplexHeatmap::draw()`
|
|
156
|
-
outfile = NULL
|
|
157
|
-
) {
|
|
158
|
-
library(ComplexHeatmap)
|
|
159
|
-
|
|
160
|
-
args$matrix = as.matrix(data)
|
|
161
|
-
hm = do.call(Heatmap, args)
|
|
162
|
-
|
|
163
|
-
if (is.null(outfile)) {
|
|
164
|
-
return(hm)
|
|
165
|
-
} else if (outfile == "draw") {
|
|
166
|
-
do.call(ComplexHeatmap::draw, c(list(hm), draw))
|
|
167
|
-
} else {
|
|
168
|
-
devpars$filename = outfile
|
|
169
|
-
do.call(png, devpars)
|
|
170
|
-
do.call(ComplexHeatmap::draw, c(list(hm), draw))
|
|
171
|
-
dev.off()
|
|
172
|
-
}
|
|
173
|
-
}
|
biopipen/utils/rnaseq.R
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
.normUnit = function(unit) {
|
|
3
|
-
if ("count" %in% unit) {
|
|
4
|
-
return("count")
|
|
5
|
-
}
|
|
6
|
-
return(unit)
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
glenFromGFFExons = function(exonfile) {
|
|
10
|
-
gff = read.table(exonfile, header = F, row.names = NULL)
|
|
11
|
-
# V4: start, V5: end, V10: gene name
|
|
12
|
-
glen = aggregate(V5-V4+1 ~ V10, gff, sum)
|
|
13
|
-
genes = glen[,1]
|
|
14
|
-
glen = glen[,-1,drop=TRUE]
|
|
15
|
-
names(glen) = genes
|
|
16
|
-
return(glen)
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
count2tpm = function(x, args) {
|
|
20
|
-
if (is.null(args$genelen)) {
|
|
21
|
-
stop("Gene lengths are required to convert count to TPM.")
|
|
22
|
-
}
|
|
23
|
-
glengenes = names(args$genelen)
|
|
24
|
-
mygenes = rownames(x)
|
|
25
|
-
missing = setdiff(mygenes, glengenes)
|
|
26
|
-
warning(paste(length(missing), "gene cannot be found in gene length data"))
|
|
27
|
-
warning(paste(missing, sep=", "))
|
|
28
|
-
|
|
29
|
-
genes = intersect(mygenes, glengenes)
|
|
30
|
-
x = x[genes, , drop=FALSE]
|
|
31
|
-
|
|
32
|
-
# see: https://gist.github.com/slowkow/c6ab0348747f86e2748b
|
|
33
|
-
# and https://support.bioconductor.org/p/91218/
|
|
34
|
-
out = x / unlist(args$genelen[genes])
|
|
35
|
-
out = t(t(out) * 1e6 / colSums(out))
|
|
36
|
-
rownames(out) = genes
|
|
37
|
-
colnames(out) = colnames(x)
|
|
38
|
-
|
|
39
|
-
return(out)
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
unit_conversion = function(x, inunit, outunit, args=list()) {
|
|
44
|
-
inunit = .normUnit(inunit)
|
|
45
|
-
outunit = .normUnit(outunit)
|
|
46
|
-
func = get(paste0(inunit, "2", outunit))
|
|
47
|
-
func(x, args)
|
|
48
|
-
}
|