PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/scripts/stats/DiffCoexpr.R ADDED Viewed

@@ -0,0 +1,152 @@
+library(dcanr)
+library(scuttle)
+library(doRNG)
+library(doParallel)
+library(snpStats)
+library(rlang)
+library(dplyr)
+library(biopipen.utils)
+infile <- {{in.infile | r}}
+groupfile <- {{in.groupfile | r}}
+outfile <- {{out.outfile | r}}
+method <- {{envs.method | r}}
+beta <- {{envs.beta | r}}
+padj <- {{envs.padj | r}}
+perm_batch <- {{envs.perm_batch | r}}
+seed <- {{envs.seed | r}}
+ncores <- {{envs.ncores | r}}
+transpose_input <- {{envs.transpose_input | r}}
+transpose_group <- {{envs.transpose_group | r}}
+log <- get_logger()
+log$info("Setting seed and parallel backend ...")
+set.seed(seed)
+registerDoParallel(cores = ncores)
+registerDoRNG(seed)
+log$info("Reading input files ...")
+indata <- read.table(infile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
+if (transpose_input) {
+    indata <- t(indata)
+}
+gdata <- read.table(groupfile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
+if (transpose_group) {
+    gdata <- t(gdata)
+}
+ngroups <- ncol(gdata)
+sign2 <- function(x) sign(x) * x^2
+mat2vec <- dcanr:::mat2vec
+diffcoex_score <- function(group) {
+    gvals <- unique(gdata[, group, drop = TRUE])
+    if (length(gvals) < 2) {
+        log$debug("  Less than 2 groups in the input. Skipping ...")
+        return(NULL)
+    }
+    rs <- lapply(gvals, function(gval) {
+        samples <- rownames(gdata[gdata[[group]] == gval, , drop = FALSE])
+        expr <- indata[samples, , drop = FALSE]
+        if (length(samples) < 3) {
+            log$debug("  Less than 3 samples in one of the groups. Skipping ...")
+            return(NULL)
+        }
+        cor.pairs(as.matrix(expr), cor.method = method)
+    })
+    rs[sapply(rs, is.null)] <- NULL
+    if (length(rs) < 2) {
+        log$debug("  Less than 2 groups with at least 3 samples. Skipping ...")
+        return(NULL)
+    }
+    N <- length(rs)
+    C0 <- lapply(rs, sign2)
+    C0 <- Reduce(`+`, C0) / N
+    D <- lapply(rs, function(r) abs(sign2(r) - C0))
+    D <- Reduce(`+`, D) / 2 / (N - 1)
+    D <- sqrt(D)
+    D <- D^beta
+    T_ovlap <- D %*% D + ncol(D) * D  #calc topological ovlap
+    mins = matrix(rep(rowSums(D), ncol(D)), nrow = ncol(D))
+    mins = pmin(mins, matrix(rep(colSums(D), each = ncol(D)), nrow = ncol(D)))
+    T_ovlap = 1 - (T_ovlap/(mins + 1 - D))
+    diag(T_ovlap) = 1
+    #add run parameters as attributes
+    attributes(T_ovlap) = c(
+        attributes(T_ovlap),
+        'method' = method,
+        'beta' = beta,
+        'call' = match.call()
+    )
+    return(1 - T_ovlap)
+}
+perm_test <- function(dcscores, group, B = perm_batch) {
+    obs = mat2vec(dcscores)
+    #package requirements
+    pckgs = c('dcanr')
+    #perform permutation
+    pvals = foreach(
+        b = seq_len(B),
+        .combine = function(...) {mapply(sum, ...)},
+        .multicombine = TRUE,
+        .inorder = FALSE,
+        .packages = pckgs
+    ) %dorng% {
+        #shuffle condition and recalculate scores
+        env = new.env()
+        assign('group', group, envir = env)
+        permsc = eval(attr(dcscores, 'call'), envir = env)
+        permsc = mat2vec(permsc)
+        #count elements greater than obs
+        permsc = abs(permsc)
+        permsc = permsc[!(is.na(permsc) || is.infinite(permsc))]
+        permcounts = vapply(abs(obs), function(x) sum(permsc > x), 0)
+        return(c(permcounts, length(permsc)))
+    }
+    #p-values
+    N <- pvals[length(pvals)]
+    pvals <- pvals[-(length(pvals))] / N
+    # attributes(pvals) = attributes(obs)
+    # pvals = dcanr:::vec2mat(pvals)
+    # attr(pvals, 'dc.test') = 'permutation'
+    # return(pvals)
+    # Format into Group,Feature1,Feature2,Pval
+    feature_pairs <- as.data.frame(t(combn(attr(obs, 'feature.names'), 2)))
+    colnames(feature_pairs) <- c('Feature1', 'Feature2')
+    feature_pairs$Group <- group
+    feature_pairs$Pval <- pvals
+    feature_pairs[, c('Group', 'Feature1', 'Feature2', 'Pval'), drop = FALSE]
+}
+do_one_group <- function(i) {
+    group <- colnames(gdata)[i]
+    log$info("- Processing group {i}/{ngroups}: {group} ...")
+    log$info("  Calculating differential co-expression scores ...")
+    dcscores <- diffcoex_score(group)
+    if (!is.null(dcscores)) {
+        log$info("  Calculating p-values ...")
+        perm_test(dcscores, group)
+    }
+}
+trios <- do_call(rbind, lapply(seq_len(ngroups), do_one_group))
+if (padj != "none") {
+    log$info("Correcting p-values ...")
+    trios$Padj <- p.adjust(trios$Pval, method = padj)
+}
+log$info("Writing output ...")
+write.table(trios, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)

biopipen/scripts/stats/LiquidAssoc.R ADDED Viewed

@@ -0,0 +1,135 @@
+library(rlang)
+library(dplyr)
+library(tidyr)
+library(fastLiquidAssociation)
+library(biopipen.utils)
+infile <- {{in.infile | r}}
+covfile <- {{in.covfile | r: quote_none=False | r}}
+groupfile <- {{in.groupfile | r}}
+fmlfile <- {{in.fmlfile | r}}
+outfile <- {{out.outfile | r}}
+x <- {{envs.x | r}}
+nvec <- {{envs.nvec | r}}
+topn <- {{envs.topn | r}}
+rvalue <- {{envs.rvalue | r}}
+cut <- {{envs.cut | r}}
+ncores <- {{envs.ncores | r}}
+padj <- {{envs.padj | r}}
+transpose_input <- {{envs.transpose_input | r}}
+transpose_group <- {{envs.transpose_group | r}}
+transpose_cov <- {{envs.transpose_cov | r}}
+xyz_names <- {{envs.xyz_names | r}}
+if (!is.null(xyz_names) && length(xyz_names) == 1) {
+	xyz_names <- trimws(strsplit(xyz_names, ",")[[1]])
+}
+if (is.null(groupfile) && is.null(nvec)) {
+	stop("Must provide either in.groupfile or envs.nvec")
+}
+if (!is.null(groupfile) && !is.null(nvec)) {
+	stop("Must provide either in.groupfile or envs.nvec, not both")
+}
+log$info("Reading and preparing data ...")
+indata <- read.table(infile, header = TRUE, sep = "\t", row.names = 1, check.names = FALSE)
+if (transpose_input) {
+	indata <- t(indata)
+}
+if (!is.null(covfile)) {
+	covdata <- read.table(covfile, header = TRUE, sep = "\t", row.names = 1)
+	if (transpose_cov) {
+		covdata <- t(covdata)
+	}
+	if (!isTRUE(all.equal(rownames(indata), rownames(covdata)))) {
+		stop("Row names of indata and covdata must be identical")
+	}
+	indata <- indata %>% mutate(across(everything(), function(xx) {
+		lm(xx ~ as.matrix(covdata))$residuals
+	}))
+}
+expand_range <- function(range) {
+	items <- trimws(strsplit(range, ",|-")[[1]])
+	num_items <- as.numeric(items)
+	if (anyNA(num_items)) {
+		# it's sample names
+		return(match(items, colnames(indata)))
+	}
+	return(num_items)
+}
+cut <- cut %||% max(ceiling(nrow(indata)/22), 4)
+if (!is.null(x)) { x <- expand_range(x) }
+if (!is.null(groupfile)) {
+	groupdata <- read.table(groupfile, header = TRUE, sep = "\t", row.names = 1)
+	if (transpose_group) {
+		groupdata <- t(groupdata)
+	}
+	if (!isTRUE(all.equal(rownames(indata), rownames(groupdata)))) {
+		stop("Row names of indata and groupdata must be identical")
+	}
+	nvec <- (ncol(indata) + 1) : (ncol(indata) + ncol(groupdata))
+	indata <- cbind(indata, groupdata)
+} else {
+	nvec <- expand_range(nvec)
+}
+log$info("Running fastLiquidAssociation ...")
+indata <- as.matrix(indata)
+mla <- fastMLA(
+	data = indata,
+	topn = topn,
+	rvalue = rvalue,
+	cut = cut,
+	threads = ncores,
+	nvec = nvec
+)
+if (nrow(mla) == 0) {
+	log$warn("No significant associations found")
+	out <- data.frame(
+		X12 = character(),
+		X21 = character(),
+		X3 = character(),
+		rhodiff = numeric(),
+		`MLA.value` = numeric(),
+		estimates = numeric(),
+		`san.se` = numeric(),
+		wald = numeric(),
+		Pval = numeric(),
+		model = character()
+	)
+} else {
+	cnm <- mass.CNM(data = indata, GLA.mat = mla, nback = topn)
+	out <- cnm$`top p-values` %>%
+		dplyr::select(X12 = "X1 or X2", X21 = "X2 or X1", everything(), Pval = "p value")
+}
+if (!is.null(fmlfile)) {
+	fmldata <- read.table(fmlfile, header = FALSE, sep = "\t", row.names = NULL)
+	colnames(fmldata) <- c("Z", "X", "Y")
+	all_combns <- fmldata %>% unite("XYZ", X, Y, Z, sep = " // ") %>% pull(XYZ)
+	out <- out %>%
+		unite("XYZ", X12, X21, X3, sep = " // ", remove = FALSE) %>%
+		dplyr::filter(XYZ %in% all_combns) %>%
+		dplyr::select(-XYZ)
+}
+if (!is.null(xyz_names)) {
+	out <- out %>%
+		dplyr::select(
+			!!sym(xyz_names[1]) := "X12",
+			!!sym(xyz_names[2]) := "X21",
+			!!sym(xyz_names[3]) := "X3",
+			everything()
+		)
+}
+if (padj != "none") {
+	log$info("Calculating adjusted p-values ...")
+	out$Padj <- p.adjust(out$Pval, method = padj)
+}
+log$info("Writing output ...")
+write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)

biopipen/scripts/stats/Mediation.R ADDED Viewed

@@ -0,0 +1,108 @@
+library(rlang)
+library(parallel)
+library(mediation)
+library(biopipen.utils)
+infile <- {{in.infile | r}}
+fmlfile <- {{in.fmlfile | r}}
+outfile <- {{out.outfile | r}}
+ncores <- {{envs.ncores | r}}
+sims <- {{envs.sims | r}}
+args <- {{envs.args | r}}
+padj <- {{envs.padj | r}}
+cases <- {{envs.cases | r}}
+transpose_input <- {{envs.transpose_input | r}}
+set.seed(123)
+log <- get_logger()
+log$info("Reading input file ...")
+indata <- read.table(infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
+if (transpose_input) { indata <- t(indata) }
+log$info("Reading formula file/cases ...")
+if (!is.null(fmlfile)) {
+    if (!is.null(cases) && length(cases) > 0) {
+        log$warn("envs.cases ignored as in.fmlfile is provided")
+    }
+    fmldata <- read.table(fmlfile, header = TRUE, sep = "\t", row.names = NULL)
+    # Case   M   Y   X   Cov     Model_M    Model_Y
+    cases <- split(fmldata, fmldata$Case)
+} else if (is.null(cases) || length(cases) == 0) {
+    stop("Either envs.cases or in.fmlfile must be provided")
+}
+args <- args %||% list()
+medanalysis <- function(i, total) {
+    casename <- names(cases)[i]
+    case <- cases[[casename]]
+    if (total < 50) {
+        log$info("- Case: ", casename)
+    } else if (total < 500) {
+        if (i %% 10 == 0) {
+            log$info("- Processing case {i}/{total} ...")
+        }
+    } else {
+        if (i %% 100 == 0) {
+            log$info("- Processing case {i}/{total} ...")
+        }
+    }
+    M <- case$M
+    Y <- case$Y
+    X <- case$X
+    covs <- case$Cov
+    modelm <- match.fun(case$Model_M)
+    modely <- match.fun(case$Model_Y)
+    fmlm <- as.formula(sprintf("%s ~ %s", bQuote(M), bQuote(X)))
+    fmly <- as.formula(sprintf("%s ~ %s + %s", bQuote(Y), bQuote(M), bQuote(X)))
+    if (!is.null(covs) && length(covs) == 1) {
+        covs <- trimws(strsplit(covs, ",")[[1]])
+    }
+    if (!is.null(covs)) {
+        cov_fml <- as.formula(sprintf("~ . + %s", paste(bQuote(covs), collapse = " + ")))
+        fmlm <- update.formula(fmlm, cov_fml)
+        fmly <- update.formula(fmly, cov_fml)
+    }
+    data <- indata[, c(M, X, Y, covs), drop = FALSE]
+    data <- data[complete.cases(data), , drop = FALSE]
+    margs <- args
+    margs$sims <- sims
+    margs$model.m <- modelm(fmlm, data = data)
+    margs$model.y <- modely(fmly, data = data)
+    margs$treat <- X
+    margs$mediator <- M
+    margs$outcome <- Y
+    if (!is.null(covs)) {
+        margs$covariates <- data[, covs, drop = FALSE]
+    }
+    med <- do_call(mediate, margs)
+    if (is.na(med$d1.p) || is.na(med$n1)) {
+        NULL
+    } else {
+        data.frame(
+            Case         = casename,
+            M            = M,
+            X            = X,
+            Y            = Y,
+            ACME         = med$d1,
+            ACME95CI1    = med$d1.ci[1],
+            ACME95CI2    = med$d1.ci[2],
+            TotalEffect  = med$tau.coef,
+            ADE          = med$z1,
+            PropMediated = med$n1,
+            Pval         = med$d1.p
+        )
+    }
+}
+total <- length(cases)
+out <- do_call(rbind, mclapply(1:total, medanalysis, total = total, mc.cores = ncores))
+if (padj != "none") {
+    out$Padj <- p.adjust(out$Pval, method = padj)
+}
+write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)

biopipen/scripts/stats/MetaPvalue.R ADDED Viewed

@@ -0,0 +1,130 @@
+library(metap)
+library(rlang)
+library(dplyr)
+library(biopipen.utils)
+infiles <- {{in.infiles | each: str | r}}
+outfile <- {{out.outfile | r}}
+id_cols <- {{envs.id_cols | r}}
+id_exprs <- {{envs.id_exprs | r}}
+pval_cols <- {{envs.pval_cols | r}}
+method <- {{envs.method | r}}
+na <- {{envs.na | r}}
+keep_single <- {{envs.keep_single | r}}
+padj <- {{envs.padj | r}}
+if (method == "fisher") { method = "sumlog" }
+log <- get_logger()
+if (length(infiles) == 1 && padj == "none") {
+    log$info("Only one input file, copying to output ...")
+    file.copy(infiles, outfile)
+} else if (length(infiles) == 1) {
+    log$info("Only one input file, performing p-value adjustment ...")
+    if (is.null(pval_cols)) {
+        stop("Must provide envs.pval_cols")
+    }
+    indata <- read.table(infiles, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
+    if (!pval_cols %in% colnames(indata)) {
+        stop("envs.pval_cols does not exist in input file")
+    }
+    indata$Padj <- p.adjust(indata[, pval_cols], method = padj)
+    log$info("Writing output ...")
+    write.table(indata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
+} else {
+    # Check pval_cols
+    if (is.null(pval_cols)) {
+        stop("Must provide envs.pval_cols")
+    }
+    if (length(pval_cols) == 1) {
+        pval_cols <- trimws(strsplit(pval_cols, ",")[[1]])
+    }
+    if (length(pval_cols) == 1) {
+        pval_cols <- rep(pval_cols, length(infiles))
+    }
+    if (length(pval_cols) != length(infiles)) {
+        stop("envs.pval_cols must be a single name or have the same length as in.infiles")
+    }
+    # Check id_cols
+    if (is.null(id_cols)) {
+        stop("Must provide envs.id_cols")
+    }
+    if (length(id_cols) == 1) {
+        id_cols <- trimws(strsplit(id_cols, ",")[[1]])
+    }
+    # Check id_exprs
+    if (!is.null(id_exprs)) {
+        if (length(id_exprs) == 1) {
+            id_exprs <- rep(id_exprs, length(infiles))
+        }
+        if (length(id_exprs) != length(infiles)) {
+            stop("envs.id_exprs must be a single expression or have the same length as in.infiles")
+        }
+        if (length(id_cols) != 1) {
+            stop("envs.id_cols must be a single name if envs.id_exprs is provided")
+        }
+    }
+    log$info("Reading and preparing data ...")
+    outdata <- NULL
+    for (i in seq_along(infiles)) {
+        infile <- infiles[i]
+        name <- tools::file_path_sans_ext(basename(infile))
+        pval_col <- paste0("Pval_", name)
+        dat <- read.table(
+            infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
+        )
+        if (!is.null(id_exprs)) {
+            dat <- dat %>% mutate(!!sym(id_cols) := !!parse_expr(id_exprs[i]))
+        }
+        dat <- dat %>% dplyr::select(all_of(id_cols), !!sym(pval_col) := !!sym(pval_cols[i]))
+        if (is.null(outdata)) {
+            outdata <- dat
+        } else {
+            outdata <- full_join(outdata, dat, by = id_cols)
+        }
+    }
+    log$info("Running metap on each row ...")
+    metaps <- c()
+    ns <- c()
+    pval_columns <- setdiff(colnames(outdata), id_cols)
+    for (i in seq_len(nrow(outdata))) {
+        ps <- unlist(outdata[i, pval_columns, drop = TRUE])
+        if (na == -1) {
+            ps <- ps[!is.na(ps)]
+        } else {
+            ps[is.na(ps)] <- na
+        }
+        if (length(ps) == 0) {
+            metaps <- c(metaps, NA)
+            ns <- c(ns, NA)
+        } else if (length(ps) == 1 && keep_single) {
+            metaps <- c(metaps, ps)
+            ns <- c(ns, 1)
+        } else if (any(ps == 0)) {
+            metaps <- c(metaps, 0)
+            ns <- c(ns, length(ps))
+        } else {
+            metaps <- c(metaps, do.call(method, list(ps))$p)
+            ns <- c(ns, length(ps))
+        }
+    }
+    outdata$MetaPval <- metaps
+    outdata$N <- ns
+    outdata <- outdata %>% arrange(MetaPval)
+    if (padj != "none") {
+        log$info("Calculating adjusted p-values ...")
+        outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
+    }
+    log$info("Writing output ...")
+    write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
+}

biopipen/scripts/stats/MetaPvalue1.R ADDED Viewed

@@ -0,0 +1,74 @@
+library(metap)
+library(rlang)
+library(dplyr)
+library(biopipen.utils)
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+id_cols <- {{envs.id_cols | r}}
+pval_col <- {{envs.pval_col | r}}
+method <- {{envs.method | r}}
+na <- {{envs.na | r}}
+keep_single <- {{envs.keep_single | r}}
+padj <- {{envs.padj | r}}
+log <- get_logger()
+if (method == "fisher") { method = "sumlog" }
+# Check pval_cols
+if (is.null(pval_col)) { stop("Must provide envs.pval_col") }
+# Check id_cols
+if (is.null(id_cols)) { stop("Must provide envs.id_cols") }
+if (length(id_cols) == 1) {
+    id_cols <- trimws(strsplit(id_cols, ",")[[1]])
+}
+log$info("Reading input and performing meta-analysis ...")
+outdata <- read.table(
+        infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
+    ) %>%
+    group_by(!!!syms(id_cols)) %>%
+    summarise(
+        N = n(),
+        .pvals = list(!!sym(pval_col)),
+        .groups = "drop"
+    )
+metaps <- c()
+ns <- c()
+for (ps in outdata$.pvals) {
+    if (na == -1) {
+        ps <- ps[!is.na(ps)]
+    } else {
+        ps[is.na(ps)] <- na
+    }
+    if (length(ps) == 0) {
+        metaps <- c(metaps, NA)
+        ns <- c(ns, NA)
+    } else if (length(ps) == 1 && keep_single) {
+        metaps <- c(metaps, ps)
+        ns <- c(ns, 1)
+    } else if (any(ps == 0)) {
+        metaps <- c(metaps, 0)
+        ns <- c(ns, length(ps))
+    } else {
+        metaps <- c(metaps, do.call(method, list(ps))$p)
+        ns <- c(ns, length(ps))
+    }
+}
+outdata$MetaPval <- metaps
+outdata$N <- ns
+outdata$.pvals <- NULL
+outdata <- outdata %>% arrange(MetaPval)
+if (padj != "none") {
+    log$info("Calculating adjusted p-values ...")
+    outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
+}
+log$info("Writing output ...")
+write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)

biopipen/scripts/tcgamaf/Maf2Vcf.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from biopipen.utils.misc import run_command, dict_to_cli_args
-infile = {{in.infile | quote}}  # pyright: ignore
+infile: str = {{in.infile | quote}}  # pyright: ignore  # noqa
 outfile = {{out.outfile | quote}}  # pyright: ignore
 outdir = {{out.outdir | quote}}  # pyright: ignore
 perl = {{envs.perl | quote}}  # pyright: ignore
 ref = {{envs.ref | repr}}  # pyright: ignore
 samtools = {{envs.samtools | quote}}  # pyright: ignore
-args = {{envs.args | repr}}  # pyright: ignore
+args: dict = {{envs.args | dict}}  # pyright: ignore
 maf2vcf = {{biopipen_dir | append: "/scripts/tcgamaf/maf2vcf.pl" | repr}}  # pyright: ignore
 args['input-maf']  = infile

biopipen/scripts/tcgamaf/MafAddChr.py CHANGED Viewed

@@ -1,6 +1,6 @@
-infile = {{in.infile | quote}}  # pyright: ignore
-outfile = {{out.outfile | quote}}  # pyright: ignore
+infile: str = {{in.infile | quote}}  # pyright: ignore  # noqa
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
 with open(infile) as fin, open(outfile, "w") as fout:
     for line in fin:

biopipen/scripts/tcr/Attach2Seurat.R CHANGED Viewed

@@ -1,4 +1,4 @@
-source("{{biopipen_dir}}/utils/misc.R")
+{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
 library(Seurat)
 library(immunarch)
@@ -11,6 +11,7 @@ immfile = {{in.immfile | r}}
 sobjfile = {{in.sobjfile | r}}
 outfile = {{out.outfile | r}}
 metacols = {{envs.metacols | r}}
+prefix = {{envs.prefix | r}}
 immdata = readRDS(immfile)
 sobj = readRDS(sobjfile)
@@ -31,7 +32,7 @@ metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
     cldata %>%
         separate_rows(Barcode, sep=";") %>%
-        mutate(Barcode = glue("{{envs.prefix}}{Barcode}"))
+        mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
 }))

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl