PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/utils/mutate_helpers.R DELETED Viewed

@@ -1,433 +0,0 @@
-suppressPackageStartupMessages(library(rlang))
-suppressPackageStartupMessages(library(tidyselect))
-suppressPackageStartupMessages(library(dplyr))
-#' Get expanded, collapsed, emerged or vanished clones from a meta data frame
-#'
-#' @rdname Get expanded, collapsed, emerged or vanished clones
-#'
-#' @param df The meta data frame
-#' @param group.by The column name (without quotes) in metadata to group the
-#'  cells.
-#' @param idents The groups of cells to compare (values in `group-by` column).
-#'  Either length 1 (`ident_1`) or length 2 (`ident_1` and `ident_2`).
-#'  If length 1, the rest of the cells with non-NA values in `group.by` will
-#'  be used as `ident_2`.
-#' @param subset An expression to subset the cells, will be passed to
-#'  `dplyr::filter()`. Default is `TRUE` (no filtering).
-#' @param id The column name (without quotes) in metadata for the
-#'  group ids (i.e. `CDR3.aa`)
-#' @param compare Either a (numeric) column name (i.e. `Clones`, without quotes)
-#'  in metadata to compare between groups, or `.n` to compare the
-#'  number of cells in each group.
-#' @param fun The way to compare between groups. Either `"expanded"`,
-#'  `"collapsed"`, `"emerged"` or `"vanished"`.
-#' @param uniq Whether to return unique ids or not. Default is `TRUE`.
-#'  If `FALSE`, you can mutate the meta data frame with the returned ids.
-#'  For example, `df %>% mutate(expanded = expanded(...))`.
-#' @param order The order of the returned ids. It could be `sum` or `diff`,
-#'  which is the sum or diff of the `compare` between idents. Two kinds of
-#'  modifiers can be added, including `desc` and `abs`. For example,
-#'  `sum,desc` means the sum of `compare` between idents in descending order.
-#'  Default is `diff,abs,desc`.
-#'  It only works when `uniq` is `TRUE`. If `uniq` is `FALSE`, the returned
-#'  ids will be in the same order as in `df`.
-#' @param include_emerged Whether to include emerged clones for the expanded clones.
-#'  Default is `FALSE`. It only works for `"expanded"`.
-#' @param include_vanished Whether to include vanished clones for the collapsed clones.
-#'  Default is `FALSE`. It only works for `"collapsed"`.
-#'
-#' @return A vector of expanded or collapsed clones (in `id` column)
-#'  If uniq is `FALSE`, the vector will be the same length as `df`.
-#'
-#' @examples
-#' # Get expanded clones
-#' df <- tibble(
-#'  Clones = c(10, 8, 1, 5, 9, 2, 3, 7, 6, 4, 9, 9),
-#'  Source = c(
-#'      "Tumor", "Normal", "Normal", "Normal", "Tumor", "Tumor",
-#'      "Tumor", "Normal", "Normal", "Normal", NA, "X"
-#'  ),
-#'  CDR3.aa = c("A", "C", "B", "E", "D", "E", "E", "B", "B", "B", "A", "A")
-#' )
-#'
-#' expanded(df, Source, c("Tumor", "Normal"))
-#' # The transformed data frame looks like this:
-#   CDR3.aa ..predicate ..sum ..diff
-#   <chr>   <lgl>       <dbl>  <dbl>
-# 1 A       TRUE           10     10
-# 2 B       FALSE           1     -1
-# 3 C       FALSE           8     -8
-# 4 D       TRUE            9      9
-# 5 E       FALSE           7     -3
-#'
-#' # [1] "A" "D"
-#'
-#' # Get collapsed clones
-#' collapsed(df, Source, c("Tumor", "Normal"))
-#' # [1] "B" "C" "E"
-#'
-#' # Get emerged clones
-#' emerged(df, Source, c("Tumor", "Normal"))
-#' # [1] "A" "D"
-#'
-#' # Get vanished clones
-#' vanished(df, Source, c("Tumor", "Normal"))
-#' # [1] "B" "C"
-.size_compare <- function(
-    df,
-    group.by, # nolint
-    idents,
-    subset,
-    id,
-    compare,
-    fun,
-    uniq,
-    order
-) {
-    if (length(idents) == 1) {
-        ident_1 <- idents[1]
-        ident_2 <- NULL
-    } else if (length(idents) == 2) {
-        ident_1 <- idents[1]
-        ident_2 <- idents[2]
-    } else {
-        stop("idents must be length 1 or 2")
-    }
-    if (is.null(ident_2)) ident_2 <- "<NULL>"
-    if (is_empty(attr(group.by, ".Environment"))) {
-        # Works if a (quoted) string passed
-        group.by <- sym(as_name(group.by))
-    }
-    if (is_empty(attr(id, ".Environment"))) {
-        id <- sym(as_name(id))
-    }
-    if (is_empty(attr(compare, ".Environment"))) {
-        compare <- sym(as_name(compare))
-    }
-    compare_label <- as_name(compare)
-    compare_is_count <- compare_label == '.n'
-    if (!as_name(group.by) %in% colnames(df)) {
-        stop(paste0(
-            '`group.by` must be a column name in df. Got "',
-            as_name(group.by),
-            '"'
-        ))
-    }
-    if (!compare_is_count && !compare_label %in% colnames(df)) {
-        stop(paste0(
-            "`compare` must be either a column name in df, or 'count'/'n'. ",
-            'Got "',
-            compare_label,
-            '"'
-        ))
-    }
-    predicate <- function(comp) {
-        if (fun == "expanded") {
-            comp[1] > comp[2] && comp[2] > 0
-        } else if (fun == "expanded+") {
-            comp[1] > comp[2]
-        } else if (fun == "collapsed") {
-            comp[1] < comp[2] && comp[1] > 0
-        } else if (fun == "collapsed+") {
-            comp[1] < comp[2]
-        } else if (fun == "emerged") {
-            comp[1] > 0 && comp[2] == 0
-        } else if (fun == "vanished") {
-            comp[1] == 0 && comp[2] > 0
-        }
-    }
-    # subset the data frame
-    trans <- df %>% dplyr::filter(!!subset) %>%
-        # remove NA values in group.by column
-        dplyr::filter(!is.na(!!group.by)) %>%
-        # mark the group.by column (as ..group) as ident_1 or ident_2 or NA
-        mutate(
-            ..group = if_else(
-                !!group.by == ident_1,
-                "ident_1",
-                if_else(ident_2 != "<NULL>" & !!group.by != ident_2, NA, "ident_2")
-            )
-        ) %>%
-        # remove NA values in ..group column
-        dplyr::filter(!is.na(..group)) %>%
-        # for each clone and group (ident_1 and ident_2)
-        group_by(!!id, ..group) %>%
-        # summarise the number of cells in each clone and group
-        # so that we can compare between groups later
-        summarise(
-            ..compare = ifelse(compare_is_count, n(), first(!!compare)),
-            .groups = "drop"
-        ) %>%
-        # for each clone, either compare Clones or ..count between groups
-        # (ident_1 and ident_2)
-        group_by(!!id) %>%
-        # add missing group (either ident_1 or ident_2)
-        group_modify(function(d, ...) {
-            if (nrow(d) == 1) {
-                d <- d %>% add_row(
-                    ..group = ifelse(
-                        d$..group == "ident_1", "ident_2", "ident_1"
-                    ),
-                    ..compare = 0
-                )
-            }
-            d
-        }) %>%
-        # make sure ident_1 and ident_2 are in order
-        arrange(..group, .by_group = TRUE) %>%
-        # add the predicates, sums and diffs
-        summarise(
-            ..predicate = predicate(..compare),
-            ..sum = sum(..compare),
-            ..diff = ..compare[1] - ..compare[2]
-        ) %>%
-        # filter the clones
-        dplyr::filter(..predicate)
-    order_sum <- grepl("sum", order)
-    order_diff <- grepl("diff", order)
-    order_desc <- grepl("desc", order)
-    order_abs <- grepl("abs", order)
-    if (order_sum && !order_desc) {
-        out <- trans %>% arrange(..sum) %>% pull(!!id)
-    } else if (order_sum) {
-        out <- trans %>% arrange(desc(..sum)) %>% pull(!!id)
-    } else if (order_diff && !order_desc && !order_abs) {
-        out <- trans %>% arrange(..diff) %>% pull(!!id)
-    } else if (order_diff && !order_desc && order_abs) {
-        out <- trans %>% arrange(abs(..diff)) %>% pull(!!id)
-    } else if (order_diff && order_desc && !order_abs) {
-        out <- trans %>% arrange(desc(..diff)) %>% pull(!!id)
-    } else if (order_diff && order_desc && order_abs) {
-        out <- trans %>% arrange(desc(abs(..diff))) %>% pull(!!id)
-    } else {
-        out <- trans %>% pull(!!id)
-    }
-    if (uniq) { return(out) }
-    df %>% mutate(..out = if_else(!!id %in% out, !!id, NA)) %>% pull(..out)
-}
-#' @export
-expanded <- function(
-    df,
-    group.by, # nolint
-    idents,
-    subset = TRUE,
-    id = CDR3.aa,
-    compare = Clones,
-    uniq = TRUE,
-    order = "diff+desc",
-    include_emerged = FALSE
-) {
-    lbl <- as_label(enquo(df))
-    if (length(lbl) == 1 && lbl == ".") {
-        df <- across(everything())
-    }
-    fun = if (include_emerged) "expanded+" else "expanded"
-    .size_compare(
-        df,
-        enquo(group.by),
-        idents,
-        enquo(subset),
-        enquo(id),
-        enquo(compare),
-        fun,
-        uniq = uniq,
-        order = order
-    )
-}
-#' @export
-collapsed <- function(
-    df,
-    group.by, # nolint
-    idents,
-    subset = TRUE,
-    id = CDR3.aa,
-    compare = Clones,
-    uniq = TRUE,
-    order = "diff+desc",
-    include_vanished = FALSE
-) {
-    lbl <- as_label(enquo(df))
-    if (length(lbl) == 1 && lbl == ".") {
-        df <- across(everything())
-    }
-    fun = if (include_vanished) "collapsed+" else "collapsed"
-    .size_compare(
-        df,
-        enquo(group.by),
-        idents,
-        enquo(subset),
-        enquo(id),
-        enquo(compare),
-        fun,
-        uniq = uniq,
-        order = order
-    )
-}
-#' @export
-emerged <- function(
-    df,
-    group.by, # nolint
-    idents,
-    subset = TRUE,
-    id = CDR3.aa,
-    compare = Clones,
-    uniq = TRUE,
-    order = "diff+desc"
-) {
-    lbl <- as_label(enquo(df))
-    if (length(lbl) == 1 && lbl == ".") {
-        df <- across(everything())
-    }
-    .size_compare(
-        df,
-        enquo(group.by),
-        idents,
-        enquo(subset),
-        enquo(id),
-        enquo(compare),
-        "emerged",
-        uniq = uniq,
-        order = order
-    )
-}
-#' @export
-vanished <- function(
-    df,
-    group.by, # nolint
-    idents,
-    subset = TRUE,
-    id = CDR3.aa,
-    compare = Clones,
-    uniq = TRUE,
-    order = "diff+desc"
-) {
-    lbl <- as_label(enquo(df))
-    if (length(lbl) == 1 && lbl == ".") {
-        df <- across(everything())
-    }
-    .size_compare(
-        df,
-        enquo(group.by),
-        idents,
-        enquo(subset),
-        enquo(id),
-        enquo(compare),
-        "vanished",
-        uniq = uniq,
-        order = order
-    )
-}
-#' Get paired entities from a data frame based on the other column
-#'
-#' @rdname Get paired entities
-#' @param df The data frame. Use `.` if the function is called in a dplyr pipe.
-#' @param id_col The column name in `df` for the ids to be returned in the
-#'   final output
-#' @param compare_col The column name in `df` to compare the values for each
-#'   id in `id_col`.
-#' @param idents The values in `compare_col` to compare. It could be either an
-#'   an integer or a vector. If it is an integer, the number of values in
-#'   `compare_col` must be the same as the integer for the `id` to be regarded
-#'   as paired. If it is a vector, the values in `compare_col` must be the same
-#'   as the values in `idents` for the `id` to be regarded as paired.
-#' @param uniq Whether to return unique ids or not. Default is `TRUE`.
-#'   If `FALSE`, you can mutate the meta data frame with the returned ids.
-#'   Non-paired ids will be `NA`.
-#' @return A vector of paired ids (in `id_col` column)
-#' @examples
-#' df <- tibble(
-#'   id = c("A", "A", "B", "B", "C", "C", "D", "D"),
-#'   compare = c(1, 2, 1, 1, 1, 2, 1, 2)
-#' )
-#' paired(df, id, compare, 2)
-#' # [1] "A" "B" "C" "D"
-#' paired(df, id, compare, c(1, 2))
-#' # [1] "A" "C" "D"
-#' paired(df, id, compare, c(1, 2), uniq = FALSE)
-#' # [1] "A" "A" NA NA "C" "C" "D" "D"
-#'
-paired <- function(
-    df,
-    id_col,
-    compare_col,
-    idents = 2,
-    uniq = TRUE
-) {
-    lbl <- as_label(enquo(df))
-    if (length(lbl) == 1 && lbl == ".") {
-        df <- across(everything())
-    }
-    id_col <- enquo(id_col)
-    compare_col <- enquo(compare_col)
-    if (is_empty(attr(id_col, ".Environment"))) {
-        id_col <- sym(as_name(id_col))
-    }
-    if (is_empty(attr(compare_col, ".Environment"))) {
-        compare_col <- sym(as_name(compare_col))
-    }
-    if (!as_name(id_col) %in% colnames(df)) {
-        stop(paste0(
-            '`id_col` must be a column name in df. Got "',
-            as_name(id_col),
-            '"'
-        ))
-    }
-    if (!as_name(compare_col) %in% colnames(df)) {
-        stop(paste0(
-            '`compare_col` must be a column name in df. Got "',
-            as_name(compare_col),
-            '"'
-        ))
-    }
-    if (is.numeric(idents) && length(idents) == 1) {
-        if (idents <= 1) {
-            stop(paste0(
-                '`idents` must be greater than 1. Got ',
-                idents
-            ))
-        }
-        out <- df %>%
-            add_count(!!id_col, name = "..count") %>%
-            mutate(..paired = if_else(..count == idents, !!id_col, NA))
-    } else {
-        if (length(idents) <= 1) {
-            stop(paste0(
-                '`idents` must be a vector with length greater than 1. Got ',
-                length(idents)
-            ))
-        }
-        out <- df %>%
-            group_by(!!id_col) %>%
-            mutate(
-                ..paired = if_else(
-                    rep(setequal(!!compare_col, idents), n()),
-                    !!id_col,
-                    NA
-                )
-            ) %>%
-            ungroup()
-    }
-    out <- out %>% pull(..paired)
-    if (uniq) {
-        return(out %>% na.omit() %>% unique() %>% as.vector())
-    } else {
-        return(out)
-    }
-}

biopipen/utils/plot.R DELETED Viewed

@@ -1,173 +0,0 @@
-library(ggplot2)
-pdf(NULL) # preventing Rplots.pdf
-plotVenn = function(
-    # A named list with elements,
-    # e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
-    data,
-    # Arguments for `ggVennDiagram()`
-    args = list(),
-    # Extra ggplot components in string
-    ggs = NULL,
-    # Parameters for device (res, width, height) for `png()`
-    devpars = list(res=100, width=1000, height=1000),
-    # The output file. If NULL, will return the plot object
-    outfile = NULL
-) {
-    library(ggVennDiagram)
-    args$x = data
-    p = do.call(ggVennDiagram, args)
-    if (!is.null(ggs)) {
-        for (gg in ggs) {
-            if (is.character(gg)) {
-                p = p + eval(parse(text=gg))
-            } else {
-                p = p + gg
-            }
-        }
-    }
-    if (is.null(outfile)) {
-        return (p)
-    } else {
-        devpars$filename = outfile
-        do.call(png, devpars)
-        print(p)
-        dev.off()
-    }
-}
-plotGG = function(
-    # A data frame (long format)
-    data,
-    # the geom
-    geom,
-    # Arguments for `geom_x()`
-    args = list(),
-    # Extra ggplot components in string
-    ggs = NULL,
-    # Parameters for device (res, width, height) for `png()`
-    devpars = list(res=100, width=1000, height=1000),
-    # The output file. If NULL, will return the plot object
-    outfile = NULL
-) {
-    p = ggplot(data)
-    p = p + do.call(paste0("geom_", geom), args)
-    if (!is.null(ggs)) {
-        for (gg in ggs) {
-            if (is.character(gg)) {
-                p = p + eval(parse(text=gg))
-            } else {
-                p = p + gg
-            }
-        }
-    }
-    if (is.null(outfile)) {
-        return (p)
-    } else {
-        devpars$filename = outfile
-        do.call(png, devpars)
-        print(p)
-        dev.off()
-    }
-}
-plotViolin = function(
-    # A data frame (long format)
-    data,
-    # Arguments for `geom_violin()`
-    args = list(),
-    # Extra ggplot components in string
-    ggs = NULL,
-    # Parameters for device (res, width, height) for `png()`
-    devpars = list(res=100, width=1000, height=1000),
-    # The output file. If NULL, will return the plot object
-    outfile = NULL
-) {
-    plotGG(data, "violin", args, ggs, devpars, outfile)
-}
-plotUpset = function(
-    # A named list with elements,
-    # e.g. list(A=paste0("R", 1:5), B=paste0("R": 3:7))
-    # Or a data frame
-    # https://cran.r-project.org/web/packages/ggupset/readme/README.html
-    data,
-    # Arguments for `scale_x_upset()`
-    args = list(),
-    # Extra ggplot components in string
-    ggs = "geom_bar(aes(x=V1))",
-    # Parameters for device (res, width, height) for `png()`
-    devpars = list(res=100, width=1000, height=1000),
-    # The output file. If NULL, will return the plot object
-    outfile = NULL
-) {
-    library(ggupset)
-    library(tidyr)
-    library(dplyr)
-    if (!is.data.frame(data) && is.list(data)) {
-        all_elems = unique(unlist(data))
-        df = data.frame(ALL_ELEMS = all_elems)
-        data = do.call(cbind, lapply(names(data), function(nd) {
-            df[df$ALL_ELEMS %in% data[[nd]], nd] = nd
-            df
-        })) %>% select(-ALL_ELEMS) %>% unite("V1", sep="; ", na.rm = TRUE) %>%
-            mutate(V1 = strsplit(V1, "; ", fixed=TRUE))
-    }
-    p = ggplot(data)
-    for (gg in ggs) {
-        if (is.character(gg)) {
-            p = p + eval(parse(text=gg))
-        } else {
-            p = p + gg
-        }
-    }
-    p = p + do.call(scale_x_upset, args)
-    if (is.null(outfile)) {
-        return (p)
-    } else {
-        devpars$filename = outfile
-        do.call(png, devpars)
-        print(p)
-        dev.off()
-    }
-}
-plotHeatmap = function(
-    # Data matrix
-	data,
-    # Arguments for `ComplexHeatmap::Heatmap()`
-    args = list(),
-    # Other arguments for `ComplexHeatmap::draw()`
-    draw = list(),
-    # Parameters for device (res, width, height) for `png()`
-    devpars = NULL,
-    # The output file. If NULL, will return the plot object
-    # If "draw", will call `ComplexHeatmap::draw()`
-    outfile = NULL
-) {
-	library(ComplexHeatmap)
-	args$matrix = as.matrix(data)
-	hm = do.call(Heatmap, args)
-    if (is.null(outfile)) {
-        return(hm)
-    } else if (outfile == "draw") {
-        do.call(ComplexHeatmap::draw, c(list(hm), draw))
-    } else {
-        devpars$filename = outfile
-        do.call(png, devpars)
-        do.call(ComplexHeatmap::draw, c(list(hm), draw))
-        dev.off()
-    }
-}

biopipen/utils/rnaseq.R DELETED Viewed

@@ -1,48 +0,0 @@
-.normUnit = function(unit) {
-    if ("count" %in% unit) {
-        return("count")
-    }
-    return(unit)
-}
-glenFromGFFExons = function(exonfile) {
-    gff  = read.table(exonfile, header = F, row.names = NULL)
-    # V4: start, V5: end, V10: gene name
-    glen = aggregate(V5-V4+1 ~ V10, gff, sum)
-    genes = glen[,1]
-    glen = glen[,-1,drop=TRUE]
-    names(glen) = genes
-    return(glen)
-}
-count2tpm = function(x, args) {
-    if (is.null(args$genelen)) {
-        stop("Gene lengths are required to convert count to TPM.")
-    }
-    glengenes = names(args$genelen)
-    mygenes = rownames(x)
-    missing = setdiff(mygenes, glengenes)
-    warning(paste(length(missing), "gene cannot be found in gene length data"))
-    warning(paste(missing, sep=", "))
-    genes = intersect(mygenes, glengenes)
-    x = x[genes, , drop=FALSE]
-    # see: https://gist.github.com/slowkow/c6ab0348747f86e2748b
-    # and https://support.bioconductor.org/p/91218/
-    out = x / unlist(args$genelen[genes])
-    out = t(t(out) * 1e6 / colSums(out))
-    rownames(out) = genes
-    colnames(out) = colnames(x)
-    return(out)
-}
-unit_conversion = function(x, inunit, outunit, args=list()) {
-    inunit = .normUnit(inunit)
-    outunit = .normUnit(outunit)
-    func = get(paste0(inunit, "2", outunit))
-    func(x, args)
-}

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl