biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
biopipen/utils/gsea.R
DELETED
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
library(ggplot2)
|
|
2
|
-
library(dplyr)
|
|
3
|
-
library(tibble)
|
|
4
|
-
|
|
5
|
-
prerank = function(
|
|
6
|
-
exprdata,
|
|
7
|
-
pos,
|
|
8
|
-
neg,
|
|
9
|
-
classes, # must be in the order of colnames(exprdata)
|
|
10
|
-
method = "signal_to_noise"
|
|
11
|
-
) {
|
|
12
|
-
library(matrixStats)
|
|
13
|
-
set.seed(8525)
|
|
14
|
-
# See: https://gseapy.readthedocs.io/en/latest/_modules/gseapy/algorithm.html#ranking_metric
|
|
15
|
-
expr_pos_mean = rowMeans(exprdata[, classes == pos, drop=F], na.rm=TRUE)
|
|
16
|
-
expr_neg_mean = rowMeans(exprdata[, classes == neg, drop=F], na.rm=TRUE)
|
|
17
|
-
expr_pos_std = rowSds(as.matrix(exprdata[, classes == pos, drop=F]), na.rm=TRUE)
|
|
18
|
-
expr_neg_std = rowSds(as.matrix(exprdata[, classes == neg, drop=F]), na.rm=TRUE)
|
|
19
|
-
rands = rnorm(length(expr_neg_std)) * 1e-6
|
|
20
|
-
|
|
21
|
-
if (method %in% c("s2n", "signal_to_noise")) {
|
|
22
|
-
out = (expr_pos_mean - expr_neg_mean) / (expr_pos_std + expr_neg_std + rands)
|
|
23
|
-
} else if (method %in% c("abs_s2n", "abs_signal_to_noise")) {
|
|
24
|
-
out = abs((expr_pos_mean - expr_neg_mean) / (expr_pos_std + expr_neg_std + rands))
|
|
25
|
-
} else if (method == "t_test") {
|
|
26
|
-
# ser = (df_mean[pos] - df_mean[neg])/ np.sqrt(df_std[pos]**2/len(df_std)+df_std[neg]**2/len(df_std) )
|
|
27
|
-
out = (expr_pos_mean - expr_neg_mean) / sqrt(
|
|
28
|
-
expr_pos_std ^ 2 / length(expr_pos_std) +
|
|
29
|
-
expr_neg_std ^ 2 / length(expr_neg_std)
|
|
30
|
-
)
|
|
31
|
-
} else if (method == "ratio_of_classes") {
|
|
32
|
-
out = expr_pos_mean / expr_neg_mean
|
|
33
|
-
} else if (method == "diff_of_classes") {
|
|
34
|
-
out = expr_pos_mean - expr_neg_mean
|
|
35
|
-
} else if (method == "log2_ratio_of_classes") {
|
|
36
|
-
out = log2(expr_pos_mean) - log2(expr_neg_mean)
|
|
37
|
-
} else {
|
|
38
|
-
stop(paste("Unknown method:", method))
|
|
39
|
-
}
|
|
40
|
-
# todo: log2fc * -log10(p)
|
|
41
|
-
# see https://github.com/crazyhottommy/RNA-seq-analysis/blob/master/GSEA_explained.md#2-using-a-pre-ranked-gene-list
|
|
42
|
-
out = as.data.frame(out) %>% rownames_to_column("Gene") %>% arrange(.[[2]])
|
|
43
|
-
colnames(out)[2] = paste(pos, "vs", neg, sep="_")
|
|
44
|
-
return(out)
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
runEnrichr = function(
|
|
48
|
-
genes,
|
|
49
|
-
dbs,
|
|
50
|
-
outdir,
|
|
51
|
-
showTerms = 20,
|
|
52
|
-
numChar =40,
|
|
53
|
-
orderBy = "P.value"
|
|
54
|
-
) {
|
|
55
|
-
library(enrichR)
|
|
56
|
-
setEnrichrSite("Enrichr") # Human genes
|
|
57
|
-
|
|
58
|
-
enriched = enrichr(genes, dbs)
|
|
59
|
-
|
|
60
|
-
for (db in dbs) {
|
|
61
|
-
enr = enriched[[db]] %>% select(-c(Old.P.value, Old.Adjusted.P.value))
|
|
62
|
-
outtable = file.path(outdir, paste0("Enrichr_", db, ".txt"))
|
|
63
|
-
outfig = file.path(outdir, paste0("Enrichr_", db, ".png"))
|
|
64
|
-
write.table(enr, outtable, row.names=T, col.names=F, sep="\t", quote=F)
|
|
65
|
-
|
|
66
|
-
png(outfig, res=100, height=1000, width=1400)
|
|
67
|
-
print(
|
|
68
|
-
plotEnrich(
|
|
69
|
-
enriched[[db]],
|
|
70
|
-
showTerms=showTerms,
|
|
71
|
-
numChar=numChar,
|
|
72
|
-
orderBy=orderBy
|
|
73
|
-
)
|
|
74
|
-
)
|
|
75
|
-
dev.off()
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
runFGSEA = function(
|
|
81
|
-
ranks,
|
|
82
|
-
gmtfile,
|
|
83
|
-
top,
|
|
84
|
-
outdir,
|
|
85
|
-
envs = list(),
|
|
86
|
-
plot = TRUE # only generate fgsea.txt?
|
|
87
|
-
) {
|
|
88
|
-
library(data.table)
|
|
89
|
-
library(fgsea)
|
|
90
|
-
set.seed(8525)
|
|
91
|
-
|
|
92
|
-
if (is.data.frame(ranks)) {
|
|
93
|
-
ranks = setNames(ranks[[2]], ranks[[1]])
|
|
94
|
-
} else if (is.list(ranks)) {
|
|
95
|
-
ranks = unlist(ranks)
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
envs$pathways = gmtPathways(gmtfile)
|
|
99
|
-
envs$stats = ranks
|
|
100
|
-
gsea_res = do.call(fgsea::fgsea, envs)
|
|
101
|
-
gsea_res = gsea_res[order(pval), ]
|
|
102
|
-
|
|
103
|
-
write.table(
|
|
104
|
-
gsea_res %>%
|
|
105
|
-
mutate(leadingEdge = sapply(leadingEdge, function(x) paste(x, collapse=","))),
|
|
106
|
-
file = file.path(outdir, "fgsea.txt"),
|
|
107
|
-
row.names = FALSE,
|
|
108
|
-
col.names = TRUE,
|
|
109
|
-
sep = "\t",
|
|
110
|
-
quote = FALSE
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
if (!plot) {return (NULL)}
|
|
114
|
-
|
|
115
|
-
if (top > 1) {
|
|
116
|
-
topPathways = head(gsea_res, n=top)[, "pathway"]
|
|
117
|
-
} else {
|
|
118
|
-
topPathways = gsea_res[padj < top][, "pathway"]
|
|
119
|
-
}
|
|
120
|
-
topPathways = unlist(topPathways)
|
|
121
|
-
|
|
122
|
-
tablefig = file.path(outdir, "gsea_table.png")
|
|
123
|
-
png(tablefig, res=100, width=1000, height=200 + 40 * length(topPathways))
|
|
124
|
-
print(plotGseaTable(
|
|
125
|
-
envs$pathways[topPathways],
|
|
126
|
-
ranks,
|
|
127
|
-
gsea_res,
|
|
128
|
-
gseaParam = if (!is.null(envs$gseaParam)) envs$gseaParam else 1
|
|
129
|
-
))
|
|
130
|
-
dev.off()
|
|
131
|
-
|
|
132
|
-
for (pathway in topPathways) {
|
|
133
|
-
enrfig = file.path(outdir, paste0("fgsea_", gsub("/", "-", pathway, fixed=T), ".png"))
|
|
134
|
-
png(enrfig, res=100, width=1000, height=800)
|
|
135
|
-
print(plotEnrichment(
|
|
136
|
-
envs$pathways[[pathway]],
|
|
137
|
-
ranks,
|
|
138
|
-
gseaParam = if (!is.null(envs$gseaParam)) envs$gseaParam else 1
|
|
139
|
-
) + labs(title = pathway))
|
|
140
|
-
dev.off()
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
runGSEA = function(
|
|
145
|
-
indata, # expression data
|
|
146
|
-
classes, # sample classes
|
|
147
|
-
gmtfile, # the GMT file
|
|
148
|
-
outdir,
|
|
149
|
-
envs = list() # other arguments for GSEA()
|
|
150
|
-
) {
|
|
151
|
-
library(GSEA)
|
|
152
|
-
# reproducibility
|
|
153
|
-
if (is.null(envs$random.seed)) {
|
|
154
|
-
envs$random.seed <- 8525
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
# prepare gct file
|
|
158
|
-
gctfile = file.path(outdir, "gsea.gct")
|
|
159
|
-
con = file(gctfile, open='w')
|
|
160
|
-
write("#1.2", con)
|
|
161
|
-
write(paste(dim(indata), collapse = "\t"), con)
|
|
162
|
-
close(con)
|
|
163
|
-
indata = indata %>%
|
|
164
|
-
as.data.frame() %>%
|
|
165
|
-
mutate(Description = "na") %>%
|
|
166
|
-
rownames_to_column("NAME") %>%
|
|
167
|
-
select(NAME, Description, everything())
|
|
168
|
-
write.table(
|
|
169
|
-
indata,
|
|
170
|
-
gctfile,
|
|
171
|
-
row.names = F,
|
|
172
|
-
col.names = T,
|
|
173
|
-
sep="\t",
|
|
174
|
-
quote=F,
|
|
175
|
-
append = T
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
# prepare cls file
|
|
179
|
-
clsfile = file.path(outdir, "gsea.cls")
|
|
180
|
-
uniclasses = unique(classes)
|
|
181
|
-
con = file(clsfile, open='w')
|
|
182
|
-
write(paste(length(classes), length(uniclasses), '1'), con)
|
|
183
|
-
write(paste('#', paste(uniclasses, collapse=" ")), con)
|
|
184
|
-
write(paste(classes, collapse=" "), con)
|
|
185
|
-
close(con)
|
|
186
|
-
|
|
187
|
-
envs$input.ds = gctfile
|
|
188
|
-
envs$input.cls = clsfile
|
|
189
|
-
envs$gs.db = gmtfile
|
|
190
|
-
envs$output.directory = outdir
|
|
191
|
-
|
|
192
|
-
do.call(GSEA, envs)
|
|
193
|
-
}
|
biopipen/utils/io.R
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
read.table.opts = function(file, opts) {
|
|
2
|
-
rncol = NULL
|
|
3
|
-
if (!is.null(opts$row.names) && opts$row.names < 0) {
|
|
4
|
-
rncol = -opts$row.names
|
|
5
|
-
opts$row.names = NULL
|
|
6
|
-
opts = c(opts, list(row.names=NULL))
|
|
7
|
-
}
|
|
8
|
-
if (endsWith(file, ".gz")) {
|
|
9
|
-
opts$file = gzfile(file)
|
|
10
|
-
} else {
|
|
11
|
-
opts$file = file
|
|
12
|
-
}
|
|
13
|
-
out = do.call(read.table, opts)
|
|
14
|
-
if (!is.null(rncol)) {
|
|
15
|
-
rnames = make.unique(out[, rncol])
|
|
16
|
-
out = out[, -rncol, drop=F]
|
|
17
|
-
rownames(out) = rnames
|
|
18
|
-
}
|
|
19
|
-
return (out)
|
|
20
|
-
}
|
biopipen/utils/misc.R
DELETED
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
# Misc utilities for R
|
|
2
|
-
library(logger)
|
|
3
|
-
|
|
4
|
-
.logger_layout <- layout_glue_generator(
|
|
5
|
-
format = '{sprintf("%-7s", level)} [{format(time, "%Y-%m-%d %H:%M:%S")}] {msg}'
|
|
6
|
-
)
|
|
7
|
-
log_layout(.logger_layout)
|
|
8
|
-
log_appender(appender_stdout)
|
|
9
|
-
log_errors()
|
|
10
|
-
|
|
11
|
-
.isBQuoted <- function(x) {
|
|
12
|
-
# Check if x is backtick-quoted
|
|
13
|
-
nchar(x) >= 2 && x[1] == "`" && x[length(x)] == "`"
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
bQuote <- function(x) {
|
|
17
|
-
if (.isBQuoted(x)) {
|
|
18
|
-
x
|
|
19
|
-
} else {
|
|
20
|
-
paste0("`", x, "`")
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
slugify <- function(x, non_alphanum_replace="", space_replace="_", tolower=TRUE) {
|
|
25
|
-
x <- gsub("[^[:alnum:] ]", non_alphanum_replace, x)
|
|
26
|
-
x <- trimws(x)
|
|
27
|
-
x <- gsub("[[:space:]]", space_replace, x)
|
|
28
|
-
|
|
29
|
-
if(tolower) { x <- tolower(x) }
|
|
30
|
-
|
|
31
|
-
return(x)
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
do_call <- function (what, args, quote = FALSE, envir = parent.frame()) {
|
|
35
|
-
|
|
36
|
-
# source: Gmisc
|
|
37
|
-
# author: Max Gordon <max@gforge.se>
|
|
38
|
-
|
|
39
|
-
if (quote)
|
|
40
|
-
args <- lapply(args, enquote)
|
|
41
|
-
|
|
42
|
-
if (is.null(names(args)) ||
|
|
43
|
-
is.data.frame(args)){
|
|
44
|
-
argn <- args
|
|
45
|
-
args <- list()
|
|
46
|
-
}else{
|
|
47
|
-
# Add all the named arguments
|
|
48
|
-
argn <- lapply(names(args)[names(args) != ""], as.name)
|
|
49
|
-
names(argn) <- names(args)[names(args) != ""]
|
|
50
|
-
# Add the unnamed arguments
|
|
51
|
-
argn <- c(argn, args[names(args) == ""])
|
|
52
|
-
args <- args[names(args) != ""]
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
if (inherits(x = what, what = "character")){
|
|
56
|
-
if(is.character(what)){
|
|
57
|
-
fn <- strsplit(what, "[:]{2,3}")[[1]]
|
|
58
|
-
what <- if(length(fn)==1) {
|
|
59
|
-
get(fn[[1]], envir=envir, mode="function")
|
|
60
|
-
} else {
|
|
61
|
-
get(fn[[2]], envir=asNamespace(fn[[1]]), mode="function")
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
call <- as.call(c(list(what), argn))
|
|
65
|
-
}else if (inherits(x = what, "function")){
|
|
66
|
-
f_name <- deparse(substitute(what))
|
|
67
|
-
call <- as.call(c(list(as.name(f_name)), argn))
|
|
68
|
-
args[[f_name]] <- what
|
|
69
|
-
}else if (inherits(x = what, what="name")){
|
|
70
|
-
call <- as.call(c(list(what, argn)))
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
eval(call,
|
|
74
|
-
envir = args,
|
|
75
|
-
enclos = envir)
|
|
76
|
-
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
list_setdefault <- function(x, ...) {
|
|
80
|
-
# Set the default value of a key in a list
|
|
81
|
-
if (is.null(x)) {
|
|
82
|
-
x <- list()
|
|
83
|
-
}
|
|
84
|
-
if (!is.list(x)) {
|
|
85
|
-
stop("list_setdefault: list expected")
|
|
86
|
-
}
|
|
87
|
-
y <- list(...)
|
|
88
|
-
for (k in names(y)) {
|
|
89
|
-
if (!k %in% names(x)) {
|
|
90
|
-
# x[[k]] <- y[[k]]
|
|
91
|
-
x <- c(x, y[k])
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
x
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
list_update <- function(x, y) {
|
|
98
|
-
# Update the value in x from y
|
|
99
|
-
if (is.null(x)) {
|
|
100
|
-
x <- list()
|
|
101
|
-
}
|
|
102
|
-
if (is.null(y)) {
|
|
103
|
-
y <- list()
|
|
104
|
-
}
|
|
105
|
-
for (k in names(y)) {
|
|
106
|
-
if (is.null(y[[k]])) {
|
|
107
|
-
x[[k]] <- NULL
|
|
108
|
-
x <- c(x, y[k])
|
|
109
|
-
} else {
|
|
110
|
-
x[[k]] <- y[[k]]
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
x
|
|
114
|
-
}
|