biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
library(dcanr)
|
|
2
|
+
library(scuttle)
|
|
3
|
+
library(doRNG)
|
|
4
|
+
library(doParallel)
|
|
5
|
+
library(snpStats)
|
|
6
|
+
library(rlang)
|
|
7
|
+
library(dplyr)
|
|
8
|
+
library(biopipen.utils)
|
|
9
|
+
|
|
10
|
+
infile <- {{in.infile | r}}
|
|
11
|
+
groupfile <- {{in.groupfile | r}}
|
|
12
|
+
outfile <- {{out.outfile | r}}
|
|
13
|
+
method <- {{envs.method | r}}
|
|
14
|
+
beta <- {{envs.beta | r}}
|
|
15
|
+
padj <- {{envs.padj | r}}
|
|
16
|
+
perm_batch <- {{envs.perm_batch | r}}
|
|
17
|
+
seed <- {{envs.seed | r}}
|
|
18
|
+
ncores <- {{envs.ncores | r}}
|
|
19
|
+
transpose_input <- {{envs.transpose_input | r}}
|
|
20
|
+
transpose_group <- {{envs.transpose_group | r}}
|
|
21
|
+
|
|
22
|
+
log <- get_logger()
|
|
23
|
+
|
|
24
|
+
log$info("Setting seed and parallel backend ...")
|
|
25
|
+
set.seed(seed)
|
|
26
|
+
registerDoParallel(cores = ncores)
|
|
27
|
+
registerDoRNG(seed)
|
|
28
|
+
|
|
29
|
+
log$info("Reading input files ...")
|
|
30
|
+
indata <- read.table(infile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
|
|
31
|
+
if (transpose_input) {
|
|
32
|
+
indata <- t(indata)
|
|
33
|
+
}
|
|
34
|
+
gdata <- read.table(groupfile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
|
|
35
|
+
if (transpose_group) {
|
|
36
|
+
gdata <- t(gdata)
|
|
37
|
+
}
|
|
38
|
+
ngroups <- ncol(gdata)
|
|
39
|
+
|
|
40
|
+
sign2 <- function(x) sign(x) * x^2
|
|
41
|
+
mat2vec <- dcanr:::mat2vec
|
|
42
|
+
|
|
43
|
+
diffcoex_score <- function(group) {
|
|
44
|
+
|
|
45
|
+
gvals <- unique(gdata[, group, drop = TRUE])
|
|
46
|
+
if (length(gvals) < 2) {
|
|
47
|
+
log$debug(" Less than 2 groups in the input. Skipping ...")
|
|
48
|
+
return(NULL)
|
|
49
|
+
}
|
|
50
|
+
rs <- lapply(gvals, function(gval) {
|
|
51
|
+
samples <- rownames(gdata[gdata[[group]] == gval, , drop = FALSE])
|
|
52
|
+
expr <- indata[samples, , drop = FALSE]
|
|
53
|
+
if (length(samples) < 3) {
|
|
54
|
+
log$debug(" Less than 3 samples in one of the groups. Skipping ...")
|
|
55
|
+
return(NULL)
|
|
56
|
+
}
|
|
57
|
+
cor.pairs(as.matrix(expr), cor.method = method)
|
|
58
|
+
})
|
|
59
|
+
rs[sapply(rs, is.null)] <- NULL
|
|
60
|
+
if (length(rs) < 2) {
|
|
61
|
+
log$debug(" Less than 2 groups with at least 3 samples. Skipping ...")
|
|
62
|
+
return(NULL)
|
|
63
|
+
}
|
|
64
|
+
N <- length(rs)
|
|
65
|
+
C0 <- lapply(rs, sign2)
|
|
66
|
+
C0 <- Reduce(`+`, C0) / N
|
|
67
|
+
D <- lapply(rs, function(r) abs(sign2(r) - C0))
|
|
68
|
+
D <- Reduce(`+`, D) / 2 / (N - 1)
|
|
69
|
+
D <- sqrt(D)
|
|
70
|
+
D <- D^beta
|
|
71
|
+
T_ovlap <- D %*% D + ncol(D) * D #calc topological ovlap
|
|
72
|
+
|
|
73
|
+
mins = matrix(rep(rowSums(D), ncol(D)), nrow = ncol(D))
|
|
74
|
+
mins = pmin(mins, matrix(rep(colSums(D), each = ncol(D)), nrow = ncol(D)))
|
|
75
|
+
T_ovlap = 1 - (T_ovlap/(mins + 1 - D))
|
|
76
|
+
|
|
77
|
+
diag(T_ovlap) = 1
|
|
78
|
+
|
|
79
|
+
#add run parameters as attributes
|
|
80
|
+
attributes(T_ovlap) = c(
|
|
81
|
+
attributes(T_ovlap),
|
|
82
|
+
'method' = method,
|
|
83
|
+
'beta' = beta,
|
|
84
|
+
'call' = match.call()
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return(1 - T_ovlap)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
perm_test <- function(dcscores, group, B = perm_batch) {
|
|
92
|
+
obs = mat2vec(dcscores)
|
|
93
|
+
|
|
94
|
+
#package requirements
|
|
95
|
+
pckgs = c('dcanr')
|
|
96
|
+
|
|
97
|
+
#perform permutation
|
|
98
|
+
pvals = foreach(
|
|
99
|
+
b = seq_len(B),
|
|
100
|
+
.combine = function(...) {mapply(sum, ...)},
|
|
101
|
+
.multicombine = TRUE,
|
|
102
|
+
.inorder = FALSE,
|
|
103
|
+
.packages = pckgs
|
|
104
|
+
) %dorng% {
|
|
105
|
+
#shuffle condition and recalculate scores
|
|
106
|
+
env = new.env()
|
|
107
|
+
assign('group', group, envir = env)
|
|
108
|
+
permsc = eval(attr(dcscores, 'call'), envir = env)
|
|
109
|
+
permsc = mat2vec(permsc)
|
|
110
|
+
|
|
111
|
+
#count elements greater than obs
|
|
112
|
+
permsc = abs(permsc)
|
|
113
|
+
permsc = permsc[!(is.na(permsc) || is.infinite(permsc))]
|
|
114
|
+
permcounts = vapply(abs(obs), function(x) sum(permsc > x), 0)
|
|
115
|
+
return(c(permcounts, length(permsc)))
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
#p-values
|
|
119
|
+
N <- pvals[length(pvals)]
|
|
120
|
+
pvals <- pvals[-(length(pvals))] / N
|
|
121
|
+
# attributes(pvals) = attributes(obs)
|
|
122
|
+
# pvals = dcanr:::vec2mat(pvals)
|
|
123
|
+
# attr(pvals, 'dc.test') = 'permutation'
|
|
124
|
+
# return(pvals)
|
|
125
|
+
# Format into Group,Feature1,Feature2,Pval
|
|
126
|
+
feature_pairs <- as.data.frame(t(combn(attr(obs, 'feature.names'), 2)))
|
|
127
|
+
colnames(feature_pairs) <- c('Feature1', 'Feature2')
|
|
128
|
+
feature_pairs$Group <- group
|
|
129
|
+
feature_pairs$Pval <- pvals
|
|
130
|
+
feature_pairs[, c('Group', 'Feature1', 'Feature2', 'Pval'), drop = FALSE]
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
do_one_group <- function(i) {
|
|
134
|
+
group <- colnames(gdata)[i]
|
|
135
|
+
log$info("- Processing group {i}/{ngroups}: {group} ...")
|
|
136
|
+
log$info(" Calculating differential co-expression scores ...")
|
|
137
|
+
dcscores <- diffcoex_score(group)
|
|
138
|
+
|
|
139
|
+
if (!is.null(dcscores)) {
|
|
140
|
+
log$info(" Calculating p-values ...")
|
|
141
|
+
perm_test(dcscores, group)
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
trios <- do_call(rbind, lapply(seq_len(ngroups), do_one_group))
|
|
146
|
+
if (padj != "none") {
|
|
147
|
+
log$info("Correcting p-values ...")
|
|
148
|
+
trios$Padj <- p.adjust(trios$Pval, method = padj)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
log$info("Writing output ...")
|
|
152
|
+
write.table(trios, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
library(rlang)
|
|
2
|
+
library(dplyr)
|
|
3
|
+
library(tidyr)
|
|
4
|
+
library(fastLiquidAssociation)
|
|
5
|
+
library(biopipen.utils)
|
|
6
|
+
|
|
7
|
+
infile <- {{in.infile | r}}
|
|
8
|
+
covfile <- {{in.covfile | r: quote_none=False | r}}
|
|
9
|
+
groupfile <- {{in.groupfile | r}}
|
|
10
|
+
fmlfile <- {{in.fmlfile | r}}
|
|
11
|
+
outfile <- {{out.outfile | r}}
|
|
12
|
+
x <- {{envs.x | r}}
|
|
13
|
+
nvec <- {{envs.nvec | r}}
|
|
14
|
+
topn <- {{envs.topn | r}}
|
|
15
|
+
rvalue <- {{envs.rvalue | r}}
|
|
16
|
+
cut <- {{envs.cut | r}}
|
|
17
|
+
ncores <- {{envs.ncores | r}}
|
|
18
|
+
padj <- {{envs.padj | r}}
|
|
19
|
+
transpose_input <- {{envs.transpose_input | r}}
|
|
20
|
+
transpose_group <- {{envs.transpose_group | r}}
|
|
21
|
+
transpose_cov <- {{envs.transpose_cov | r}}
|
|
22
|
+
xyz_names <- {{envs.xyz_names | r}}
|
|
23
|
+
if (!is.null(xyz_names) && length(xyz_names) == 1) {
|
|
24
|
+
xyz_names <- trimws(strsplit(xyz_names, ",")[[1]])
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (is.null(groupfile) && is.null(nvec)) {
|
|
28
|
+
stop("Must provide either in.groupfile or envs.nvec")
|
|
29
|
+
}
|
|
30
|
+
if (!is.null(groupfile) && !is.null(nvec)) {
|
|
31
|
+
stop("Must provide either in.groupfile or envs.nvec, not both")
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
log$info("Reading and preparing data ...")
|
|
35
|
+
indata <- read.table(infile, header = TRUE, sep = "\t", row.names = 1, check.names = FALSE)
|
|
36
|
+
if (transpose_input) {
|
|
37
|
+
indata <- t(indata)
|
|
38
|
+
}
|
|
39
|
+
if (!is.null(covfile)) {
|
|
40
|
+
covdata <- read.table(covfile, header = TRUE, sep = "\t", row.names = 1)
|
|
41
|
+
if (transpose_cov) {
|
|
42
|
+
covdata <- t(covdata)
|
|
43
|
+
}
|
|
44
|
+
if (!isTRUE(all.equal(rownames(indata), rownames(covdata)))) {
|
|
45
|
+
stop("Row names of indata and covdata must be identical")
|
|
46
|
+
}
|
|
47
|
+
indata <- indata %>% mutate(across(everything(), function(xx) {
|
|
48
|
+
lm(xx ~ as.matrix(covdata))$residuals
|
|
49
|
+
}))
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
expand_range <- function(range) {
|
|
53
|
+
items <- trimws(strsplit(range, ",|-")[[1]])
|
|
54
|
+
num_items <- as.numeric(items)
|
|
55
|
+
if (anyNA(num_items)) {
|
|
56
|
+
# it's sample names
|
|
57
|
+
return(match(items, colnames(indata)))
|
|
58
|
+
}
|
|
59
|
+
return(num_items)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
cut <- cut %||% max(ceiling(nrow(indata)/22), 4)
|
|
63
|
+
if (!is.null(x)) { x <- expand_range(x) }
|
|
64
|
+
if (!is.null(groupfile)) {
|
|
65
|
+
groupdata <- read.table(groupfile, header = TRUE, sep = "\t", row.names = 1)
|
|
66
|
+
if (transpose_group) {
|
|
67
|
+
groupdata <- t(groupdata)
|
|
68
|
+
}
|
|
69
|
+
if (!isTRUE(all.equal(rownames(indata), rownames(groupdata)))) {
|
|
70
|
+
stop("Row names of indata and groupdata must be identical")
|
|
71
|
+
}
|
|
72
|
+
nvec <- (ncol(indata) + 1) : (ncol(indata) + ncol(groupdata))
|
|
73
|
+
indata <- cbind(indata, groupdata)
|
|
74
|
+
} else {
|
|
75
|
+
nvec <- expand_range(nvec)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
log$info("Running fastLiquidAssociation ...")
|
|
79
|
+
indata <- as.matrix(indata)
|
|
80
|
+
mla <- fastMLA(
|
|
81
|
+
data = indata,
|
|
82
|
+
topn = topn,
|
|
83
|
+
rvalue = rvalue,
|
|
84
|
+
cut = cut,
|
|
85
|
+
threads = ncores,
|
|
86
|
+
nvec = nvec
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if (nrow(mla) == 0) {
|
|
90
|
+
log$warn("No significant associations found")
|
|
91
|
+
out <- data.frame(
|
|
92
|
+
X12 = character(),
|
|
93
|
+
X21 = character(),
|
|
94
|
+
X3 = character(),
|
|
95
|
+
rhodiff = numeric(),
|
|
96
|
+
`MLA.value` = numeric(),
|
|
97
|
+
estimates = numeric(),
|
|
98
|
+
`san.se` = numeric(),
|
|
99
|
+
wald = numeric(),
|
|
100
|
+
Pval = numeric(),
|
|
101
|
+
model = character()
|
|
102
|
+
)
|
|
103
|
+
} else {
|
|
104
|
+
cnm <- mass.CNM(data = indata, GLA.mat = mla, nback = topn)
|
|
105
|
+
out <- cnm$`top p-values` %>%
|
|
106
|
+
dplyr::select(X12 = "X1 or X2", X21 = "X2 or X1", everything(), Pval = "p value")
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (!is.null(fmlfile)) {
|
|
110
|
+
fmldata <- read.table(fmlfile, header = FALSE, sep = "\t", row.names = NULL)
|
|
111
|
+
colnames(fmldata) <- c("Z", "X", "Y")
|
|
112
|
+
all_combns <- fmldata %>% unite("XYZ", X, Y, Z, sep = " // ") %>% pull(XYZ)
|
|
113
|
+
out <- out %>%
|
|
114
|
+
unite("XYZ", X12, X21, X3, sep = " // ", remove = FALSE) %>%
|
|
115
|
+
dplyr::filter(XYZ %in% all_combns) %>%
|
|
116
|
+
dplyr::select(-XYZ)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (!is.null(xyz_names)) {
|
|
120
|
+
out <- out %>%
|
|
121
|
+
dplyr::select(
|
|
122
|
+
!!sym(xyz_names[1]) := "X12",
|
|
123
|
+
!!sym(xyz_names[2]) := "X21",
|
|
124
|
+
!!sym(xyz_names[3]) := "X3",
|
|
125
|
+
everything()
|
|
126
|
+
)
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (padj != "none") {
|
|
130
|
+
log$info("Calculating adjusted p-values ...")
|
|
131
|
+
out$Padj <- p.adjust(out$Pval, method = padj)
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
log$info("Writing output ...")
|
|
135
|
+
write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
library(rlang)
|
|
2
|
+
library(parallel)
|
|
3
|
+
library(mediation)
|
|
4
|
+
library(biopipen.utils)
|
|
5
|
+
|
|
6
|
+
infile <- {{in.infile | r}}
|
|
7
|
+
fmlfile <- {{in.fmlfile | r}}
|
|
8
|
+
outfile <- {{out.outfile | r}}
|
|
9
|
+
|
|
10
|
+
ncores <- {{envs.ncores | r}}
|
|
11
|
+
sims <- {{envs.sims | r}}
|
|
12
|
+
args <- {{envs.args | r}}
|
|
13
|
+
padj <- {{envs.padj | r}}
|
|
14
|
+
cases <- {{envs.cases | r}}
|
|
15
|
+
transpose_input <- {{envs.transpose_input | r}}
|
|
16
|
+
|
|
17
|
+
set.seed(123)
|
|
18
|
+
log <- get_logger()
|
|
19
|
+
|
|
20
|
+
log$info("Reading input file ...")
|
|
21
|
+
indata <- read.table(infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
|
|
22
|
+
if (transpose_input) { indata <- t(indata) }
|
|
23
|
+
|
|
24
|
+
log$info("Reading formula file/cases ...")
|
|
25
|
+
if (!is.null(fmlfile)) {
|
|
26
|
+
if (!is.null(cases) && length(cases) > 0) {
|
|
27
|
+
log$warn("envs.cases ignored as in.fmlfile is provided")
|
|
28
|
+
}
|
|
29
|
+
fmldata <- read.table(fmlfile, header = TRUE, sep = "\t", row.names = NULL)
|
|
30
|
+
# Case M Y X Cov Model_M Model_Y
|
|
31
|
+
cases <- split(fmldata, fmldata$Case)
|
|
32
|
+
} else if (is.null(cases) || length(cases) == 0) {
|
|
33
|
+
stop("Either envs.cases or in.fmlfile must be provided")
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
args <- args %||% list()
|
|
37
|
+
|
|
38
|
+
medanalysis <- function(i, total) {
|
|
39
|
+
casename <- names(cases)[i]
|
|
40
|
+
case <- cases[[casename]]
|
|
41
|
+
if (total < 50) {
|
|
42
|
+
log$info("- Case: ", casename)
|
|
43
|
+
} else if (total < 500) {
|
|
44
|
+
if (i %% 10 == 0) {
|
|
45
|
+
log$info("- Processing case {i}/{total} ...")
|
|
46
|
+
}
|
|
47
|
+
} else {
|
|
48
|
+
if (i %% 100 == 0) {
|
|
49
|
+
log$info("- Processing case {i}/{total} ...")
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
M <- case$M
|
|
53
|
+
Y <- case$Y
|
|
54
|
+
X <- case$X
|
|
55
|
+
covs <- case$Cov
|
|
56
|
+
modelm <- match.fun(case$Model_M)
|
|
57
|
+
modely <- match.fun(case$Model_Y)
|
|
58
|
+
fmlm <- as.formula(sprintf("%s ~ %s", bQuote(M), bQuote(X)))
|
|
59
|
+
fmly <- as.formula(sprintf("%s ~ %s + %s", bQuote(Y), bQuote(M), bQuote(X)))
|
|
60
|
+
if (!is.null(covs) && length(covs) == 1) {
|
|
61
|
+
covs <- trimws(strsplit(covs, ",")[[1]])
|
|
62
|
+
}
|
|
63
|
+
if (!is.null(covs)) {
|
|
64
|
+
cov_fml <- as.formula(sprintf("~ . + %s", paste(bQuote(covs), collapse = " + ")))
|
|
65
|
+
fmlm <- update.formula(fmlm, cov_fml)
|
|
66
|
+
fmly <- update.formula(fmly, cov_fml)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
data <- indata[, c(M, X, Y, covs), drop = FALSE]
|
|
70
|
+
data <- data[complete.cases(data), , drop = FALSE]
|
|
71
|
+
margs <- args
|
|
72
|
+
margs$sims <- sims
|
|
73
|
+
margs$model.m <- modelm(fmlm, data = data)
|
|
74
|
+
margs$model.y <- modely(fmly, data = data)
|
|
75
|
+
margs$treat <- X
|
|
76
|
+
margs$mediator <- M
|
|
77
|
+
margs$outcome <- Y
|
|
78
|
+
if (!is.null(covs)) {
|
|
79
|
+
margs$covariates <- data[, covs, drop = FALSE]
|
|
80
|
+
}
|
|
81
|
+
med <- do_call(mediate, margs)
|
|
82
|
+
if (is.na(med$d1.p) || is.na(med$n1)) {
|
|
83
|
+
NULL
|
|
84
|
+
} else {
|
|
85
|
+
data.frame(
|
|
86
|
+
Case = casename,
|
|
87
|
+
M = M,
|
|
88
|
+
X = X,
|
|
89
|
+
Y = Y,
|
|
90
|
+
ACME = med$d1,
|
|
91
|
+
ACME95CI1 = med$d1.ci[1],
|
|
92
|
+
ACME95CI2 = med$d1.ci[2],
|
|
93
|
+
TotalEffect = med$tau.coef,
|
|
94
|
+
ADE = med$z1,
|
|
95
|
+
PropMediated = med$n1,
|
|
96
|
+
Pval = med$d1.p
|
|
97
|
+
)
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
total <- length(cases)
|
|
102
|
+
out <- do_call(rbind, mclapply(1:total, medanalysis, total = total, mc.cores = ncores))
|
|
103
|
+
|
|
104
|
+
if (padj != "none") {
|
|
105
|
+
out$Padj <- p.adjust(out$Pval, method = padj)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
library(metap)
|
|
2
|
+
library(rlang)
|
|
3
|
+
library(dplyr)
|
|
4
|
+
library(biopipen.utils)
|
|
5
|
+
|
|
6
|
+
infiles <- {{in.infiles | each: str | r}}
|
|
7
|
+
outfile <- {{out.outfile | r}}
|
|
8
|
+
id_cols <- {{envs.id_cols | r}}
|
|
9
|
+
id_exprs <- {{envs.id_exprs | r}}
|
|
10
|
+
pval_cols <- {{envs.pval_cols | r}}
|
|
11
|
+
method <- {{envs.method | r}}
|
|
12
|
+
na <- {{envs.na | r}}
|
|
13
|
+
keep_single <- {{envs.keep_single | r}}
|
|
14
|
+
padj <- {{envs.padj | r}}
|
|
15
|
+
|
|
16
|
+
if (method == "fisher") { method = "sumlog" }
|
|
17
|
+
|
|
18
|
+
log <- get_logger()
|
|
19
|
+
|
|
20
|
+
if (length(infiles) == 1 && padj == "none") {
|
|
21
|
+
log$info("Only one input file, copying to output ...")
|
|
22
|
+
file.copy(infiles, outfile)
|
|
23
|
+
} else if (length(infiles) == 1) {
|
|
24
|
+
log$info("Only one input file, performing p-value adjustment ...")
|
|
25
|
+
if (is.null(pval_cols)) {
|
|
26
|
+
stop("Must provide envs.pval_cols")
|
|
27
|
+
}
|
|
28
|
+
indata <- read.table(infiles, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
|
|
29
|
+
if (!pval_cols %in% colnames(indata)) {
|
|
30
|
+
stop("envs.pval_cols does not exist in input file")
|
|
31
|
+
}
|
|
32
|
+
indata$Padj <- p.adjust(indata[, pval_cols], method = padj)
|
|
33
|
+
|
|
34
|
+
log$info("Writing output ...")
|
|
35
|
+
write.table(indata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
|
|
36
|
+
} else {
|
|
37
|
+
# Check pval_cols
|
|
38
|
+
if (is.null(pval_cols)) {
|
|
39
|
+
stop("Must provide envs.pval_cols")
|
|
40
|
+
}
|
|
41
|
+
if (length(pval_cols) == 1) {
|
|
42
|
+
pval_cols <- trimws(strsplit(pval_cols, ",")[[1]])
|
|
43
|
+
}
|
|
44
|
+
if (length(pval_cols) == 1) {
|
|
45
|
+
pval_cols <- rep(pval_cols, length(infiles))
|
|
46
|
+
}
|
|
47
|
+
if (length(pval_cols) != length(infiles)) {
|
|
48
|
+
stop("envs.pval_cols must be a single name or have the same length as in.infiles")
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Check id_cols
|
|
52
|
+
if (is.null(id_cols)) {
|
|
53
|
+
stop("Must provide envs.id_cols")
|
|
54
|
+
}
|
|
55
|
+
if (length(id_cols) == 1) {
|
|
56
|
+
id_cols <- trimws(strsplit(id_cols, ",")[[1]])
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Check id_exprs
|
|
60
|
+
if (!is.null(id_exprs)) {
|
|
61
|
+
if (length(id_exprs) == 1) {
|
|
62
|
+
id_exprs <- rep(id_exprs, length(infiles))
|
|
63
|
+
}
|
|
64
|
+
if (length(id_exprs) != length(infiles)) {
|
|
65
|
+
stop("envs.id_exprs must be a single expression or have the same length as in.infiles")
|
|
66
|
+
}
|
|
67
|
+
if (length(id_cols) != 1) {
|
|
68
|
+
stop("envs.id_cols must be a single name if envs.id_exprs is provided")
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
log$info("Reading and preparing data ...")
|
|
73
|
+
outdata <- NULL
|
|
74
|
+
for (i in seq_along(infiles)) {
|
|
75
|
+
infile <- infiles[i]
|
|
76
|
+
name <- tools::file_path_sans_ext(basename(infile))
|
|
77
|
+
pval_col <- paste0("Pval_", name)
|
|
78
|
+
dat <- read.table(
|
|
79
|
+
infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
|
|
80
|
+
)
|
|
81
|
+
if (!is.null(id_exprs)) {
|
|
82
|
+
dat <- dat %>% mutate(!!sym(id_cols) := !!parse_expr(id_exprs[i]))
|
|
83
|
+
}
|
|
84
|
+
dat <- dat %>% dplyr::select(all_of(id_cols), !!sym(pval_col) := !!sym(pval_cols[i]))
|
|
85
|
+
|
|
86
|
+
if (is.null(outdata)) {
|
|
87
|
+
outdata <- dat
|
|
88
|
+
} else {
|
|
89
|
+
outdata <- full_join(outdata, dat, by = id_cols)
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
log$info("Running metap on each row ...")
|
|
94
|
+
metaps <- c()
|
|
95
|
+
ns <- c()
|
|
96
|
+
pval_columns <- setdiff(colnames(outdata), id_cols)
|
|
97
|
+
for (i in seq_len(nrow(outdata))) {
|
|
98
|
+
ps <- unlist(outdata[i, pval_columns, drop = TRUE])
|
|
99
|
+
if (na == -1) {
|
|
100
|
+
ps <- ps[!is.na(ps)]
|
|
101
|
+
} else {
|
|
102
|
+
ps[is.na(ps)] <- na
|
|
103
|
+
}
|
|
104
|
+
if (length(ps) == 0) {
|
|
105
|
+
metaps <- c(metaps, NA)
|
|
106
|
+
ns <- c(ns, NA)
|
|
107
|
+
} else if (length(ps) == 1 && keep_single) {
|
|
108
|
+
metaps <- c(metaps, ps)
|
|
109
|
+
ns <- c(ns, 1)
|
|
110
|
+
} else if (any(ps == 0)) {
|
|
111
|
+
metaps <- c(metaps, 0)
|
|
112
|
+
ns <- c(ns, length(ps))
|
|
113
|
+
} else {
|
|
114
|
+
metaps <- c(metaps, do.call(method, list(ps))$p)
|
|
115
|
+
ns <- c(ns, length(ps))
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
outdata$MetaPval <- metaps
|
|
119
|
+
outdata$N <- ns
|
|
120
|
+
outdata <- outdata %>% arrange(MetaPval)
|
|
121
|
+
|
|
122
|
+
if (padj != "none") {
|
|
123
|
+
log$info("Calculating adjusted p-values ...")
|
|
124
|
+
outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
|
|
125
|
+
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
log$info("Writing output ...")
|
|
129
|
+
write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
|
|
130
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
library(metap)
|
|
2
|
+
library(rlang)
|
|
3
|
+
library(dplyr)
|
|
4
|
+
library(biopipen.utils)
|
|
5
|
+
|
|
6
|
+
infile <- {{in.infile | r}}
|
|
7
|
+
outfile <- {{out.outfile | r}}
|
|
8
|
+
id_cols <- {{envs.id_cols | r}}
|
|
9
|
+
pval_col <- {{envs.pval_col | r}}
|
|
10
|
+
method <- {{envs.method | r}}
|
|
11
|
+
na <- {{envs.na | r}}
|
|
12
|
+
keep_single <- {{envs.keep_single | r}}
|
|
13
|
+
padj <- {{envs.padj | r}}
|
|
14
|
+
|
|
15
|
+
log <- get_logger()
|
|
16
|
+
|
|
17
|
+
if (method == "fisher") { method = "sumlog" }
|
|
18
|
+
|
|
19
|
+
# Check pval_cols
|
|
20
|
+
if (is.null(pval_col)) { stop("Must provide envs.pval_col") }
|
|
21
|
+
|
|
22
|
+
# Check id_cols
|
|
23
|
+
if (is.null(id_cols)) { stop("Must provide envs.id_cols") }
|
|
24
|
+
if (length(id_cols) == 1) {
|
|
25
|
+
id_cols <- trimws(strsplit(id_cols, ",")[[1]])
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
log$info("Reading input and performing meta-analysis ...")
|
|
29
|
+
outdata <- read.table(
|
|
30
|
+
infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
|
|
31
|
+
) %>%
|
|
32
|
+
group_by(!!!syms(id_cols)) %>%
|
|
33
|
+
summarise(
|
|
34
|
+
N = n(),
|
|
35
|
+
.pvals = list(!!sym(pval_col)),
|
|
36
|
+
.groups = "drop"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
metaps <- c()
|
|
40
|
+
ns <- c()
|
|
41
|
+
for (ps in outdata$.pvals) {
|
|
42
|
+
if (na == -1) {
|
|
43
|
+
ps <- ps[!is.na(ps)]
|
|
44
|
+
} else {
|
|
45
|
+
ps[is.na(ps)] <- na
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (length(ps) == 0) {
|
|
49
|
+
metaps <- c(metaps, NA)
|
|
50
|
+
ns <- c(ns, NA)
|
|
51
|
+
} else if (length(ps) == 1 && keep_single) {
|
|
52
|
+
metaps <- c(metaps, ps)
|
|
53
|
+
ns <- c(ns, 1)
|
|
54
|
+
} else if (any(ps == 0)) {
|
|
55
|
+
metaps <- c(metaps, 0)
|
|
56
|
+
ns <- c(ns, length(ps))
|
|
57
|
+
} else {
|
|
58
|
+
metaps <- c(metaps, do.call(method, list(ps))$p)
|
|
59
|
+
ns <- c(ns, length(ps))
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
outdata$MetaPval <- metaps
|
|
63
|
+
outdata$N <- ns
|
|
64
|
+
outdata$.pvals <- NULL
|
|
65
|
+
outdata <- outdata %>% arrange(MetaPval)
|
|
66
|
+
|
|
67
|
+
if (padj != "none") {
|
|
68
|
+
log$info("Calculating adjusted p-values ...")
|
|
69
|
+
outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
|
|
70
|
+
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
log$info("Writing output ...")
|
|
74
|
+
write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
2
2
|
|
|
3
|
-
infile = {{in.infile | quote}} # pyright: ignore
|
|
3
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa
|
|
4
4
|
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
5
5
|
outdir = {{out.outdir | quote}} # pyright: ignore
|
|
6
6
|
perl = {{envs.perl | quote}} # pyright: ignore
|
|
7
7
|
ref = {{envs.ref | repr}} # pyright: ignore
|
|
8
8
|
samtools = {{envs.samtools | quote}} # pyright: ignore
|
|
9
|
-
args = {{envs.args |
|
|
9
|
+
args: dict = {{envs.args | dict}} # pyright: ignore
|
|
10
10
|
maf2vcf = {{biopipen_dir | append: "/scripts/tcgamaf/maf2vcf.pl" | repr}} # pyright: ignore
|
|
11
11
|
|
|
12
12
|
args['input-maf'] = infile
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
|
-
infile = {{in.infile | quote}} # pyright: ignore
|
|
3
|
-
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
2
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa
|
|
3
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore
|
|
4
4
|
|
|
5
5
|
with open(infile) as fin, open(outfile, "w") as fout:
|
|
6
6
|
for line in fin:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
2
|
|
|
3
3
|
library(Seurat)
|
|
4
4
|
library(immunarch)
|
|
@@ -11,6 +11,7 @@ immfile = {{in.immfile | r}}
|
|
|
11
11
|
sobjfile = {{in.sobjfile | r}}
|
|
12
12
|
outfile = {{out.outfile | r}}
|
|
13
13
|
metacols = {{envs.metacols | r}}
|
|
14
|
+
prefix = {{envs.prefix | r}}
|
|
14
15
|
|
|
15
16
|
immdata = readRDS(immfile)
|
|
16
17
|
sobj = readRDS(sobjfile)
|
|
@@ -31,7 +32,7 @@ metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
|
|
|
31
32
|
|
|
32
33
|
cldata %>%
|
|
33
34
|
separate_rows(Barcode, sep=";") %>%
|
|
34
|
-
mutate(Barcode = glue("{
|
|
35
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
|
|
35
36
|
|
|
36
37
|
}))
|
|
37
38
|
|