biopipen 0.25.4__tar.gz → 0.26.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- {biopipen-0.25.4 → biopipen-0.26.1}/PKG-INFO +9 -10
- biopipen-0.26.1/biopipen/__init__.py +1 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/core/config.toml +2 -0
- biopipen-0.26.1/biopipen/ns/rnaseq.py +158 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/scrna.py +17 -3
- biopipen-0.26.1/biopipen/ns/snp.py +70 -0
- biopipen-0.26.1/biopipen/ns/stats.py +320 -0
- biopipen-0.26.1/biopipen/scripts/rnaseq/Simulation-ESCO.R +177 -0
- biopipen-0.26.1/biopipen/scripts/rnaseq/Simulation-RUVcorr.R +42 -0
- biopipen-0.26.1/biopipen/scripts/rnaseq/Simulation.R +23 -0
- biopipen-0.26.1/biopipen/scripts/rnaseq/UnitConversion.R +342 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/CellsDistribution.R +225 -147
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/MarkersFinder.R +53 -47
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/RadarPlots.R +6 -3
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratClusterStats-stats.R +37 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/TopExpressingGenes.R +58 -33
- biopipen-0.26.1/biopipen/scripts/snp/PlinkSimulation.py +88 -0
- biopipen-0.26.1/biopipen/scripts/stats/ChowTest.R +119 -0
- biopipen-0.26.1/biopipen/scripts/stats/DiffCoexpr.R +150 -0
- biopipen-0.26.1/biopipen/scripts/stats/LiquidAssoc.R +136 -0
- biopipen-0.26.1/biopipen/scripts/stats/MetaPvalue.R +128 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/CloneResidency.R +37 -72
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/misc.R +19 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/misc.py +15 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/pyproject.toml +12 -10
- {biopipen-0.25.4 → biopipen-0.26.1}/setup.py +13 -9
- biopipen-0.25.4/biopipen/__init__.py +0 -1
- biopipen-0.25.4/biopipen/ns/rnaseq.py +0 -21
- biopipen-0.25.4/biopipen/scripts/rnaseq/UnitConversion.R +0 -73
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/core/__init__.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/core/config.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/core/defaults.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/core/filters.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/core/proc.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/core/testing.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/__init__.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/bam.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/bcftools.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/bed.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/cellranger.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/cellranger_pipeline.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/cnv.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/cnvkit.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/cnvkit_pipeline.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/delim.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/gene.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/gsea.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/misc.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/plot.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/scrna_metabolic_landscape.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/tcgamaf.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/tcr.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/vcf.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/ns/web.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/bam/CNAClinic.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/bam/CNVpytor.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/bam/ControlFREEC.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cellranger/CellRangerCount.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cellranger/CellRangerSummary.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cellranger/CellRangerVdj.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cnv/AneuploidyScore.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cnv/AneuploidyScoreSummary.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cnv/TMADScoreSummary.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cnvkit/CNVkitDiagram.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cnvkit/CNVkitHeatmap.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/cnvkit/CNVkitScatter.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/delim/SampleInfo.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/gsea/FGSEA.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/gsea/GSEA.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/CellsDistribution.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/DimPlots.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/MarkersFinder.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/MetaMarkers.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/RadarPlots.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/ScFGSEA.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/SeuratClusterStats.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/SeuratPreparing.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna/TopExpressingGenes.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/tcr/CDR3AAPhyschem.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/tcr/CloneResidency.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/tcr/Immunarch.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/tcr/SampleDiversity.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/tcr/TCRClusterStats.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/tcr/TESSA.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/tcr/VJUsage.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/utils/gsea.liq +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/utils/misc.liq +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/vcf/TruvariBenchSummary.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/reports/vcf/TruvariConsistency.svelte +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bam/BamMerge.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bam/BamSplitChroms.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bam/CNAClinic.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bam/CNVpytor.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bam/ControlFREEC.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bcftools/BcftoolsFilter.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bcftools/BcftoolsSort.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bed/Bed2Vcf.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bed/BedConsensus.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bed/BedLiftOver.sh +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/bed/BedtoolsMerge.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cellranger/CellRangerCount.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cellranger/CellRangerSummary.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cellranger/CellRangerVdj.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnv/AneuploidyScore.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnv/AneuploidyScoreSummary.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnv/TMADScore.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnv/TMADScoreSummary.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitAccess.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitAutobin.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitBatch.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitCall.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitCoverage.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitDiagram.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitFix.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitGuessBaits.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitHeatmap.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitReference.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitScatter.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/CNVkitSegment.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/cnvkit/guess_baits.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/delim/RowsBinder.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/delim/SampleInfo.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/gene/GeneNameConversion.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/gsea/Enrichr.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/gsea/FGSEA.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/gsea/GSEA.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/gsea/PreRank.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/misc/Config2File.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/misc/Str2File.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/plot/Heatmap.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/plot/VennDiagram.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/AnnData2Seurat.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/CellTypeAnnotation-direct.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/CellTypeAnnotation.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/DimPlots.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/ExprImpution-alra.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/ExprImpution-scimpute.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/ExprImpution.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/MetaMarkers.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/ModuleScoreCalculator.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SCImpute.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/ScFGSEA.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/Seurat2AnnData.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratClusterStats-features.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratClusterStats.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratClustering.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratFilter.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratLoading.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratMap2Ref.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratMetadataMutater.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratPreparing.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratSplit.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratSubClustering.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratSubset.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/SeuratTo10X.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/Write10X.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/celltypist-wrapper.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna/sctype.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcgamaf/Maf2Vcf.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcgamaf/MafAddChr.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcgamaf/maf2vcf.pl +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Attach2Seurat.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/CDR3AAPhyschem.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/CloneSizeQQPlot.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/GIANA/GIANA.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/GIANA/GIANA4.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/GIANA/Imgt_Human_TRBV.fasta +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/GIANA/query.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-basic.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-clonality.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-diversity.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-geneusage.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-kmer.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-overlap.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-tracking.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/Immunarch2VDJtools.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/ImmunarchFilter.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/ImmunarchLoading.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/ImmunarchSplitIdents.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/SampleDiversity.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TCRClusterStats.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TCRClustering.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/MCMC_control.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/TrainedEncoder.h5 +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/fixed_b.csv +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/initialization.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/post_analysis.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/real_data.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/update.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/TESSA_source/utility.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/VJUsage.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/immunarch-patched.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/tcr/vdjtools-patch.sh +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/TruvariBench.sh +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/TruvariBenchSummary.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/TruvariConsistency.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/Vcf2Bed.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfAnno.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfDownSample.sh +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfFilter.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfFix.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfFix_utils.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfIndex.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfIntersect.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfLiftOver.sh +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/vcf/VcfSplitSamples.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/web/Download.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/scripts/web/DownloadList.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/__init__.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/caching.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/common_docstrs.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/gene.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/gene.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/gsea.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/io.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/mutate_helpers.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/plot.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/reference.py +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/rnaseq.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/single_cell.R +0 -0
- {biopipen-0.25.4 → biopipen-0.26.1}/biopipen/utils/vcf.py +0 -0
|
@@ -1,23 +1,22 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.26.1
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
7
7
|
Author-email: pwwang@pwwang.com
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.9,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
15
|
Provides-Extra: runinfo
|
|
17
|
-
Requires-Dist: datar[pandas] (>=0.15.
|
|
18
|
-
Requires-Dist: pipen-board[report] (>=0.
|
|
19
|
-
Requires-Dist: pipen-cli-run (>=0.
|
|
20
|
-
Requires-Dist: pipen-filters (>=0.
|
|
21
|
-
Requires-Dist: pipen-poplog (>=0.0.2
|
|
22
|
-
Requires-Dist: pipen-runinfo (>=0.
|
|
23
|
-
Requires-Dist: pipen-verbose (>=0.
|
|
16
|
+
Requires-Dist: datar[pandas] (>=0.15.5,<0.16.0)
|
|
17
|
+
Requires-Dist: pipen-board[report] (>=0.15,<0.16)
|
|
18
|
+
Requires-Dist: pipen-cli-run (>=0.13,<0.14)
|
|
19
|
+
Requires-Dist: pipen-filters (>=0.12,<0.13)
|
|
20
|
+
Requires-Dist: pipen-poplog (>=0.1,<0.2)
|
|
21
|
+
Requires-Dist: pipen-runinfo (>=0.6,<0.7) ; extra == "runinfo"
|
|
22
|
+
Requires-Dist: pipen-verbose (>=0.11,<0.12)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.26.1"
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""RNA-seq data analysis"""
|
|
2
|
+
|
|
3
|
+
from ..core.proc import Proc
|
|
4
|
+
from ..core.config import config
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class UnitConversion(Proc):
|
|
8
|
+
"""Convert expression value units back and forth
|
|
9
|
+
|
|
10
|
+
See <https://haroldpimentel.wordpress.com/2014/05/08/what-the-fpkm-a-review-rna-seq-expression-units/>
|
|
11
|
+
and <https://docs.gdc.cancer.gov/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline/#fpkm>.
|
|
12
|
+
|
|
13
|
+
Following converstions are supported -
|
|
14
|
+
* `count -> cpm, fpkm/rpkm, fpkmuq/rpkmrq, tpm, tmm`
|
|
15
|
+
* `fpkm/rpkm -> count, tpm, cpm`
|
|
16
|
+
* `tpm -> count, fpkm/rpkm, cpm`
|
|
17
|
+
* `cpm -> count, fpkm/rpkm, tpm`
|
|
18
|
+
NOTE that during some conversions, `sum(counts/effLen)` is approximated to
|
|
19
|
+
`sum(counts)/sum(effLen) * length(effLen))`
|
|
20
|
+
|
|
21
|
+
You can also use this process to just transform the expression values, e.g., take
|
|
22
|
+
log2 of the expression values. In this case, you can set `inunit` and `outunit` to
|
|
23
|
+
`count` and `log2(count + 1)` respectively.
|
|
24
|
+
|
|
25
|
+
Input:
|
|
26
|
+
infile: Input file containing expression values
|
|
27
|
+
The file should be a matrix with rows representing genes and columns
|
|
28
|
+
representing samples.
|
|
29
|
+
It could be an RDS file containing a data frame or a matrix, or a
|
|
30
|
+
text file containing a matrix with tab as the delimiter. The text
|
|
31
|
+
file can be gzipped.
|
|
32
|
+
|
|
33
|
+
Output:
|
|
34
|
+
outfile: Output file containing the converted expression values
|
|
35
|
+
The file will be a matrix with rows representing genes and columns
|
|
36
|
+
representing samples.
|
|
37
|
+
|
|
38
|
+
Envs:
|
|
39
|
+
inunit: The input unit of the expression values.
|
|
40
|
+
You can also use an expression to indicate the input unit, e.g.,
|
|
41
|
+
`log2(counts + 1)`. The expression should be like `A * fn(B*X + C) + D`,
|
|
42
|
+
where `A`, `B`, `C` and `D` are constants, `fn` is a function, and X is
|
|
43
|
+
the input unit.
|
|
44
|
+
Currently only `expr`, `sqrt`, `log2`, `log10` and `log` are supported as
|
|
45
|
+
functions.
|
|
46
|
+
Supported input units are:
|
|
47
|
+
* counts/count/rawcounts/rawcount: raw counts.
|
|
48
|
+
* cpm: counts per million.
|
|
49
|
+
* fpkm/rpkm: fragments per kilobase of transcript per million.
|
|
50
|
+
* fpkmuq/rpkmuq: upper quartile normalized FPKM/RPKM.
|
|
51
|
+
* tpm: transcripts per million.
|
|
52
|
+
* tmm: trimmed mean of M-values.
|
|
53
|
+
outunit: The output unit of the expression values. An expression can also be
|
|
54
|
+
used for transformation (e.g. `log2(tpm + 1)`). If `inunit` is `count`,
|
|
55
|
+
then this means we are converting raw counts to tpm, and transforming it
|
|
56
|
+
to `log2(tpm + 1)` as the output. Any expression supported by `R` can be
|
|
57
|
+
used. Same units as `inunit` are supported.
|
|
58
|
+
refexon: Path to the reference exon gff file.
|
|
59
|
+
meanfl (type=auto): A file containing the mean fragment length for each sample
|
|
60
|
+
by rows (samples as rowname), without header.
|
|
61
|
+
Or a fixed universal estimated number (1 used by TCGA).
|
|
62
|
+
nreads (type=auto): The estimatied total number of reads for each sample.
|
|
63
|
+
or you can pass a file with the number for each sample by rows
|
|
64
|
+
(samples as rowname), without header.
|
|
65
|
+
When converting `fpkm/rpkm -> count`, it should be total reads of that sample.
|
|
66
|
+
When converting `cpm -> count`: it should be total reads of that sample.
|
|
67
|
+
When converting `tpm -> count`: it should be total reads of that sample.
|
|
68
|
+
When converting `tpm -> cpm`: it should be total reads of that sample.
|
|
69
|
+
When converting `tpm -> fpkm/rpkm`: it should be `sum(fpkm)` of that sample.
|
|
70
|
+
It is not used when converting `count -> cpm, fpkm/rpkm, tpm`.
|
|
71
|
+
""" # noqa: E501
|
|
72
|
+
input = "infile:file"
|
|
73
|
+
output = "outfile:file:{{in.infile | basename}}"
|
|
74
|
+
lang = config.lang.rscript
|
|
75
|
+
envs = {
|
|
76
|
+
"inunit": None,
|
|
77
|
+
"outunit": None,
|
|
78
|
+
"refexon": config.ref.refexon,
|
|
79
|
+
"meanfl": 1,
|
|
80
|
+
"nreads": 1_000_000,
|
|
81
|
+
}
|
|
82
|
+
script = "file://../scripts/rnaseq/UnitConversion.R"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class Simulation(Proc):
|
|
86
|
+
"""Simulate RNA-seq data using ESCO/RUVcorr package
|
|
87
|
+
|
|
88
|
+
Input:
|
|
89
|
+
ngenes: Number of genes to simulate
|
|
90
|
+
nsamples: Number of samples to simulate
|
|
91
|
+
If you want to force the process to re-simulate for the same
|
|
92
|
+
`ngenes` and `nsamples`, you can set a different value for `envs.seed`.
|
|
93
|
+
Note that the samples will be shown as cells in the output (since
|
|
94
|
+
the simulation is designed for single-cell RNA-seq data).
|
|
95
|
+
|
|
96
|
+
Output:
|
|
97
|
+
outfile: Output file containing the simulated data with rows representing
|
|
98
|
+
genes and columns representing samples.
|
|
99
|
+
outdir: Output directory containing the simulated data
|
|
100
|
+
`sim.rds` and `True.rds` will be generated.
|
|
101
|
+
For `ESCO`, `sim.rds` contains the simulated data in a
|
|
102
|
+
`SingleCellExperiment` object, and `True.rds` contains the matrix of true
|
|
103
|
+
counts.
|
|
104
|
+
For `RUVcorr`, `sim.rds` contains the simulated data in list with
|
|
105
|
+
`Truth`, A matrix containing the values of Xβ; `Y` A matrix containing the
|
|
106
|
+
values in `Y`; `Noise` A matrix containing the values in `Wα`; `Sigma`
|
|
107
|
+
A matrix containing the true gene-gene correlations, as defined by Xβ; and
|
|
108
|
+
`Info` A matrix containing some of the general information about the
|
|
109
|
+
simulation.
|
|
110
|
+
For all matrices, rows represent genes and columns represent samples.
|
|
111
|
+
|
|
112
|
+
Envs:
|
|
113
|
+
tool (choice): Which tool to use for simulation.
|
|
114
|
+
- ESCO: uses the [ESCO](https://github.com/JINJINT/ESCO) package.
|
|
115
|
+
- RUVcorr: uses the [RUVcorr](https://rdrr.io/bioc/RUVcorr/) package.
|
|
116
|
+
ncores (type=int): Number of cores to use.
|
|
117
|
+
seed (type=int): Random seed.
|
|
118
|
+
If not set, seed will not be set.
|
|
119
|
+
esco_args (ns): Additional arguments to pass to the simulation function.
|
|
120
|
+
- save (choice): Which type of data to save to `out.outfile`.
|
|
121
|
+
- `simulated-truth`: saves the simulated true counts.
|
|
122
|
+
- `zero-inflated`: saves the zero-inflated counts.
|
|
123
|
+
- `down-sampled`: saves the down-sampled counts.
|
|
124
|
+
- type (choice): Which type of heterogenounity to use.
|
|
125
|
+
- single: produces a single population.
|
|
126
|
+
- group: produces distinct groups.
|
|
127
|
+
- tree: produces distinct groups but admits a tree structure.
|
|
128
|
+
- traj: produces distinct groups but admits a smooth trajectory
|
|
129
|
+
structure.
|
|
130
|
+
- <more>: See <https://rdrr.io/github/JINJINT/ESCO/man/escoParams.html>.
|
|
131
|
+
ruvcorr_args (ns): Additional arguments to pass to the simulation
|
|
132
|
+
function.
|
|
133
|
+
- <more>: See <https://rdrr.io/bioc/RUVcorr/man/simulateGEdata.html>.
|
|
134
|
+
transpose_output (flag): If set, the output will be transposed.
|
|
135
|
+
index_start (type=int): The index to start from when naming the samples.
|
|
136
|
+
Affects the sample names in `out.outfile` only.
|
|
137
|
+
"""
|
|
138
|
+
input = "ngenes:var, nsamples:var"
|
|
139
|
+
output = [
|
|
140
|
+
"outfile:file:{{in.ngenes}}x{{in.nsamples}}.sim/simulated.txt",
|
|
141
|
+
"outdir:dir:{{in.ngenes}}x{{in.nsamples}}.sim",
|
|
142
|
+
]
|
|
143
|
+
lang = config.lang.rscript
|
|
144
|
+
envs = {
|
|
145
|
+
"tool": "RUVcorr",
|
|
146
|
+
"ncores": config.misc.ncores,
|
|
147
|
+
"type": "single",
|
|
148
|
+
"esco_args": {
|
|
149
|
+
"dropout-type": "none",
|
|
150
|
+
"save": "simulated-truth",
|
|
151
|
+
"type": "single",
|
|
152
|
+
},
|
|
153
|
+
"ruvcorr_args": {},
|
|
154
|
+
"seed": None,
|
|
155
|
+
"transpose_output": False,
|
|
156
|
+
"index_start": 1,
|
|
157
|
+
}
|
|
158
|
+
script = "file://../scripts/rnaseq/Simulation.R"
|
|
@@ -483,14 +483,18 @@ class SeuratClusterStats(Proc):
|
|
|
483
483
|
The parameters from the cases can overwrite the default parameters.
|
|
484
484
|
- frac (flag): Whether to output the fraction of cells instead of number.
|
|
485
485
|
- pie (flag): Also output a pie chart?
|
|
486
|
+
- circos (flag): Also output a circos plot?
|
|
486
487
|
- table (flag): Whether to output a table (in tab-delimited format) and in the report.
|
|
487
488
|
- frac_ofall(flag): Whether to output the fraction against all cells,
|
|
488
489
|
instead of the fraction in each group.
|
|
490
|
+
Does not work for circos plot.
|
|
489
491
|
Only works when `frac` is `True` and `group-by` is specified.
|
|
490
492
|
- transpose (flag): Whether to transpose the cluster and group, that is,
|
|
491
493
|
using group as the x-axis and cluster to fill the plot.
|
|
494
|
+
For circos plot, when transposed, the arrows will be drawn from the idents (by `ident`) to the
|
|
495
|
+
the groups (by `group-by`).
|
|
492
496
|
Only works when `group-by` is specified.
|
|
493
|
-
- position (choice): The position of the bars.
|
|
497
|
+
- position (choice): The position of the bars. Does not work for pie and circos plots.
|
|
494
498
|
- stack: Use `position_stack()`.
|
|
495
499
|
- fill: Use `position_fill()`.
|
|
496
500
|
- dodge: Use `position_dodge()`.
|
|
@@ -499,8 +503,13 @@ class SeuratClusterStats(Proc):
|
|
|
499
503
|
- group-by: The column name in metadata to group the cells.
|
|
500
504
|
Does NOT support for pie charts.
|
|
501
505
|
- split-by: The column name in metadata to split the cells into different plots.
|
|
506
|
+
Does NOT support for circos plots.
|
|
502
507
|
- subset: An expression to subset the cells, will be passed to
|
|
503
508
|
`dplyr::filter()` on metadata.
|
|
509
|
+
- circos_devpars (ns): The device parameters for the circos plots.
|
|
510
|
+
- res (type=int): The resolution of the plots.
|
|
511
|
+
- height (type=int): The height of the plots.
|
|
512
|
+
- width (type=int): The width of the plots.
|
|
504
513
|
- pie_devpars (ns): The device parameters for the pie charts.
|
|
505
514
|
- res (type=int): The resolution of the plots.
|
|
506
515
|
- height (type=int): The height of the plots.
|
|
@@ -634,6 +643,7 @@ class SeuratClusterStats(Proc):
|
|
|
634
643
|
"stats_defaults": {
|
|
635
644
|
"frac": False,
|
|
636
645
|
"pie": False,
|
|
646
|
+
"circos": False,
|
|
637
647
|
"table": False,
|
|
638
648
|
"position": "auto",
|
|
639
649
|
"frac_ofall": False,
|
|
@@ -644,6 +654,7 @@ class SeuratClusterStats(Proc):
|
|
|
644
654
|
"subset": None,
|
|
645
655
|
"devpars": {"res": 100, "height": 600, "width": 800},
|
|
646
656
|
"pie_devpars": {"res": 100, "height": 600, "width": 800},
|
|
657
|
+
"circos_devpars": {"res": 100, "height": 600, "width": 600},
|
|
647
658
|
},
|
|
648
659
|
"stats": {
|
|
649
660
|
"Number of cells in each cluster": {
|
|
@@ -882,8 +893,9 @@ class CellsDistribution(Proc):
|
|
|
882
893
|
each: The column name in metadata to separate the cells into different plots.
|
|
883
894
|
section: The section to show in the report. This allows different cases to be put in the same section in report.
|
|
884
895
|
Only works when `each` is not specified.
|
|
885
|
-
overlap (list): Plot the overlap of
|
|
886
|
-
|
|
896
|
+
overlap (list): Plot the overlap of cell groups (values of `cells_by`) in different cases
|
|
897
|
+
under the same section.
|
|
898
|
+
The section must have at least 2 cases, each case should have a single `cells_by` column.
|
|
887
899
|
cases (type=json;order=99): If you have multiple cases, you can specify them here.
|
|
888
900
|
Keys are the names of the cases and values are the options above except `mutaters`.
|
|
889
901
|
If some options are not specified, the options in `envs` will be used.
|
|
@@ -1141,6 +1153,7 @@ class TopExpressingGenes(Proc):
|
|
|
1141
1153
|
markers See below for all libraries.
|
|
1142
1154
|
<https://maayanlab.cloud/Enrichr/#libraries>
|
|
1143
1155
|
n (type=int): The number of top expressing genes to find.
|
|
1156
|
+
subset: An expression to subset the cells for each case.
|
|
1144
1157
|
cases (type=json): If you have multiple cases, you can specify them
|
|
1145
1158
|
here. The keys are the names of the cases and the values are the
|
|
1146
1159
|
above options except `mutaters`. If some options are
|
|
@@ -1161,6 +1174,7 @@ class TopExpressingGenes(Proc):
|
|
|
1161
1174
|
"section": "DEFAULT",
|
|
1162
1175
|
"dbs": ["KEGG_2021_Human", "MSigDB_Hallmark_2020"],
|
|
1163
1176
|
"n": 250,
|
|
1177
|
+
"subset": None,
|
|
1164
1178
|
"cases": {},
|
|
1165
1179
|
}
|
|
1166
1180
|
plugin_opts = {
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Plink processes"""
|
|
2
|
+
|
|
3
|
+
from ..core.proc import Proc
|
|
4
|
+
from ..core.config import config
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PlinkSimulation(Proc):
|
|
8
|
+
"""Simulate SNPs using PLINK v1.9
|
|
9
|
+
|
|
10
|
+
See also <https://www.cog-genomics.org/plink/1.9/input#simulate>.
|
|
11
|
+
|
|
12
|
+
Input:
|
|
13
|
+
nsnps: Number of SNPs to simulate
|
|
14
|
+
ncases: Number of cases to simulate
|
|
15
|
+
nctrls: Number of controls to simulate
|
|
16
|
+
|
|
17
|
+
Output:
|
|
18
|
+
outdir: Output directory containing the simulated data
|
|
19
|
+
`plink_sim.bed`, `plink_sim.bim`, and `plink_sim.fam` will be generated.
|
|
20
|
+
gtmat: Genotype matrix file containing the simulated data with rows representing
|
|
21
|
+
SNPs and columns representing samples.
|
|
22
|
+
|
|
23
|
+
Envs:
|
|
24
|
+
plink: Path to PLINK v1.9
|
|
25
|
+
seed (type=int): Random seed.
|
|
26
|
+
If not set, seed will not be set.
|
|
27
|
+
label: Prefix label for the SNPs.
|
|
28
|
+
prevalence (type=float): Disease prevalence.
|
|
29
|
+
minfreq (type=float): Minimum allele frequency.
|
|
30
|
+
maxfreq (type=float): Maximum allele frequency.
|
|
31
|
+
hetodds (type=float): Odds ratio for heterozygous genotypes.
|
|
32
|
+
homodds (type=float): Odds ratio for homozygous genotypes.
|
|
33
|
+
missing (type=float): Proportion of missing genotypes.
|
|
34
|
+
args (ns): Additional arguments to pass to PLINK.
|
|
35
|
+
- <more>: see <https://www.cog-genomics.org/plink/1.9/input#simulate>.
|
|
36
|
+
transpose_gtmat (flag): If set, the genotype matrix (`out.gtmat`) will
|
|
37
|
+
be transposed.
|
|
38
|
+
sample_prefix: Use this prefix for the sample names. If not set, the sample
|
|
39
|
+
names will be `per0_per0`, `per1_per1`, `per2_per2`, etc. If set, the
|
|
40
|
+
sample names will be `prefix0`, `prefix1`, `prefix2`, etc.
|
|
41
|
+
This only affects the sample names in the genotype matrix file
|
|
42
|
+
(`out.gtmat`).
|
|
43
|
+
"""
|
|
44
|
+
input = "nsnps:var, ncases:var, nctrls:var"
|
|
45
|
+
output = [
|
|
46
|
+
(
|
|
47
|
+
"outdir:dir:{{in.nsnps | int}}_"
|
|
48
|
+
"{{in.ncases | int}}xcases_{{in.nctrls | int}}xctrls.plink_sim"
|
|
49
|
+
),
|
|
50
|
+
(
|
|
51
|
+
"gtmat:file:{{in.nsnps | int}}_"
|
|
52
|
+
"{{in.ncases | int}}xcases_{{in.nctrls | int}}xctrls.plink_sim/gtmat.txt"
|
|
53
|
+
),
|
|
54
|
+
]
|
|
55
|
+
lang = config.lang.python
|
|
56
|
+
envs = {
|
|
57
|
+
"plink": config.exe.plink,
|
|
58
|
+
"seed": None,
|
|
59
|
+
"label": "SNP",
|
|
60
|
+
"prevalence": 0.01,
|
|
61
|
+
"minfreq": 0.0,
|
|
62
|
+
"maxfreq": 1.0,
|
|
63
|
+
"hetodds": 1.0,
|
|
64
|
+
"homodds": 1.0,
|
|
65
|
+
"missing": 0.0,
|
|
66
|
+
"args": {},
|
|
67
|
+
"transpose_gtmat": False,
|
|
68
|
+
"sample_prefix": None,
|
|
69
|
+
}
|
|
70
|
+
script = "file://../scripts/snp/PlinkSimulation.py"
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
"""Provides processes for statistics."""
|
|
2
|
+
|
|
3
|
+
from ..core.proc import Proc
|
|
4
|
+
from ..core.config import config
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ChowTest(Proc):
|
|
8
|
+
"""Massive Chow tests.
|
|
9
|
+
|
|
10
|
+
See Also https://en.wikipedia.org/wiki/Chow_test
|
|
11
|
+
|
|
12
|
+
Input:
|
|
13
|
+
infile: The input data file. The rows are samples and the columns are
|
|
14
|
+
features. It must be tab-delimited.
|
|
15
|
+
```
|
|
16
|
+
Sample F1 F2 F3 ... Fn
|
|
17
|
+
S1 1.2 3.4 5.6 7.8
|
|
18
|
+
S2 2.3 4.5 6.7 8.9
|
|
19
|
+
...
|
|
20
|
+
Sm 5.6 7.8 9.0 1.2
|
|
21
|
+
```
|
|
22
|
+
groupfile: The group file. The rows are the samples and the columns
|
|
23
|
+
are the groupings. It must be tab-delimited.
|
|
24
|
+
```
|
|
25
|
+
Sample G1 G2 G3 ... Gk
|
|
26
|
+
S1 0 1 0 0
|
|
27
|
+
S2 2 1 0 NA # exclude this sample
|
|
28
|
+
...
|
|
29
|
+
Sm 1 0 0 0
|
|
30
|
+
```
|
|
31
|
+
fmlfile: The formula file. The first column is grouping and the
|
|
32
|
+
second column is the formula. It must be tab-delimited.
|
|
33
|
+
```
|
|
34
|
+
Group Formula ... # Other columns to be added to outfile
|
|
35
|
+
G1 Fn ~ F1 + Fx + Fy # Fx, Fy could be covariates
|
|
36
|
+
G1 Fn ~ F2 + Fx + Fy
|
|
37
|
+
...
|
|
38
|
+
Gk Fn ~ F3 + Fx + Fy
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Output:
|
|
42
|
+
outfile: The output file. It is a tab-delimited file with the first
|
|
43
|
+
column as the grouping and the second column as the p-value.
|
|
44
|
+
```
|
|
45
|
+
Group Formula ... Pooled Groups SSR SumSSR Fstat Pval Padj
|
|
46
|
+
G1 Fn ~ F1 0.123 2 1 0.123 0.123 0.123 0.123
|
|
47
|
+
G1 Fn ~ F2 0.123 2 1 0.123 0.123 0.123 0.123
|
|
48
|
+
...
|
|
49
|
+
Gk Fn ~ F3 0.123 2 1 0.123 0.123 0.123 0.123
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Envs:
|
|
53
|
+
padj (choice): The method for p-value adjustment.
|
|
54
|
+
- none: No p-value adjustment (no Padj column in outfile).
|
|
55
|
+
- holm: Holm-Bonferroni method.
|
|
56
|
+
- hochberg: Hochberg method.
|
|
57
|
+
- hommel: Hommel method.
|
|
58
|
+
- bonferroni: Bonferroni method.
|
|
59
|
+
- BH: Benjamini-Hochberg method.
|
|
60
|
+
- BY: Benjamini-Yekutieli method.
|
|
61
|
+
- fdr: FDR correction method.
|
|
62
|
+
transpose_input (flag): Whether to transpose the input file.
|
|
63
|
+
transpose_group (flag): Whether to transpose the group file.
|
|
64
|
+
"""
|
|
65
|
+
input = "infile:file, groupfile:file, fmlfile:file"
|
|
66
|
+
output = "outfile:file:{{in.infile | stem}}.chowtest.txt"
|
|
67
|
+
lang = config.lang.rscript
|
|
68
|
+
envs = {
|
|
69
|
+
"padj": "none",
|
|
70
|
+
"transpose_input": False,
|
|
71
|
+
"transpose_group": False,
|
|
72
|
+
}
|
|
73
|
+
script = "file://../scripts/stats/ChowTest.R"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class LiquidAssoc(Proc):
|
|
77
|
+
"""Liquid association tests.
|
|
78
|
+
|
|
79
|
+
See Also https://github.com/gundt/fastLiquidAssociation
|
|
80
|
+
Requieres https://github.com/pwwang/fastLiquidAssociation
|
|
81
|
+
|
|
82
|
+
Input:
|
|
83
|
+
infile: The input data file. The rows are samples and the columns are
|
|
84
|
+
features. It must be tab-delimited.
|
|
85
|
+
```
|
|
86
|
+
Sample F1 F2 F3 ... Fn
|
|
87
|
+
S1 1.2 3.4 5.6 7.8
|
|
88
|
+
S2 2.3 4.5 6.7 8.9
|
|
89
|
+
...
|
|
90
|
+
Sm 5.6 7.8 9.0 1.2
|
|
91
|
+
```
|
|
92
|
+
The features (columns) will be tested pairwise, which will be the X and
|
|
93
|
+
Y columns in the result of `fastMLA`
|
|
94
|
+
covfile: The covariate file. The rows are the samples and the columns
|
|
95
|
+
are the covariates. It must be tab-delimited.
|
|
96
|
+
If provided, the data in `in.infile` will be adjusted by covariates by
|
|
97
|
+
regressing out the covariates and the residuals will be used for
|
|
98
|
+
liquid association tests.
|
|
99
|
+
groupfile: The group file. The rows are the samples and the columns
|
|
100
|
+
are the groupings. It must be tab-delimited.
|
|
101
|
+
```
|
|
102
|
+
Sample G1 G2 G3 ... Gk
|
|
103
|
+
S1 0 1 0 0
|
|
104
|
+
S2 2 1 0 NA # exclude this sample
|
|
105
|
+
...
|
|
106
|
+
Sm 1 0 0 0
|
|
107
|
+
```
|
|
108
|
+
This will be served as the Z column in the result of `fastMLA`
|
|
109
|
+
This can be omitted. If so, `envs.nvec` should be specified, which is
|
|
110
|
+
to select column from `in.infile` as Z.
|
|
111
|
+
fmlfile: The formula file. The 3 columns are X3, X12 and X21. The results
|
|
112
|
+
will be filtered based on the formula. It must be tab-delimited without
|
|
113
|
+
header.
|
|
114
|
+
|
|
115
|
+
Output:
|
|
116
|
+
outfile: The output file.
|
|
117
|
+
```
|
|
118
|
+
X12 X21 X3 rhodiff MLA value estimates san.se wald Pval model
|
|
119
|
+
C38 C46 C5 0.87 0.32 0.67 0.20 10.87 0 F
|
|
120
|
+
C46 C38 C5 0.87 0.32 0.67 0.20 10.87 0 F
|
|
121
|
+
C27 C39 C4 0.94 0.34 1.22 0.38 10.03 0 F
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Envs:
|
|
125
|
+
nvec: The column index (1-based) of Z in `in.infile`, if `in.groupfile` is
|
|
126
|
+
omitted. You can specify multiple columns by comma-seperated values, or
|
|
127
|
+
a range of columns by `-`. For example, `1,3,5-7,9`. It also supports
|
|
128
|
+
column names. For example, `F1,F3`. `-` is not supported for column
|
|
129
|
+
names.
|
|
130
|
+
x: Similar as `nvec`, but limit X group to given features.
|
|
131
|
+
The rest of features (other than X and Z) in `in.infile` will
|
|
132
|
+
be used as Y.
|
|
133
|
+
The features in `in.infile` will still be tested pairwise, but only
|
|
134
|
+
features in X and Y will be kept.
|
|
135
|
+
topn (type=int): Number of results to return by `fastMLA`, ordered from
|
|
136
|
+
highest `|MLA|` value descending.
|
|
137
|
+
The default of the package is 2000, but here we set to 1e6 to return as
|
|
138
|
+
many results as possible (also good to do pvalue adjustment).
|
|
139
|
+
rvalue (type=float): Tolerance value for LA approximation. Lower values of
|
|
140
|
+
rvalue will cause a more thorough search, but take longer.
|
|
141
|
+
cut (type=int): Value passed to the GLA function to create buckets
|
|
142
|
+
(equal to number of buckets+1). Values placing between 15-30 samples per
|
|
143
|
+
bucket are optimal. Must be a positive integer>1. By default,
|
|
144
|
+
`max(ceiling(nrow(data)/22), 4)` is used.
|
|
145
|
+
ncores (type=int): Number of cores to use for parallelization.
|
|
146
|
+
padj (choice): The method for p-value adjustment.
|
|
147
|
+
- none: No p-value adjustment (no Padj column in outfile).
|
|
148
|
+
- holm: Holm-Bonferroni method.
|
|
149
|
+
- hochberg: Hochberg method.
|
|
150
|
+
- hommel: Hommel method.
|
|
151
|
+
- bonferroni: Bonferroni method.
|
|
152
|
+
- BH: Benjamini-Hochberg method.
|
|
153
|
+
- BY: Benjamini-Yekutieli method.
|
|
154
|
+
- fdr: FDR correction method.
|
|
155
|
+
transpose_input (flag): Whether to transpose the input file.
|
|
156
|
+
transpose_group (flag): Whether to transpose the group file.
|
|
157
|
+
transpose_cov (flag): Whether to transpose the covariate file.
|
|
158
|
+
xyz_names: The names of X12, X21 and X3 in the final output file. Separated
|
|
159
|
+
by comma. For example, `X12,X21,X3`.
|
|
160
|
+
"""
|
|
161
|
+
input = "infile:file, covfile:file, groupfile:file, fmlfile:file"
|
|
162
|
+
output = "outfile:file:{{in.infile | stem}}.liquidassoc.txt"
|
|
163
|
+
lang = config.lang.rscript
|
|
164
|
+
envs = {
|
|
165
|
+
"nvec": None,
|
|
166
|
+
"x": None,
|
|
167
|
+
"topn": 1e6,
|
|
168
|
+
"rvalue": 0.5,
|
|
169
|
+
"cut": 20,
|
|
170
|
+
"ncores": config.misc.ncores,
|
|
171
|
+
"padj": "none",
|
|
172
|
+
"transpose_input": False,
|
|
173
|
+
"transpose_group": False,
|
|
174
|
+
"transpose_cov": False,
|
|
175
|
+
"xyz_names": None,
|
|
176
|
+
}
|
|
177
|
+
script = "file://../scripts/stats/LiquidAssoc.R"
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class DiffCoexpr(Proc):
|
|
181
|
+
"""Differential co-expression analysis.
|
|
182
|
+
|
|
183
|
+
See also <https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-497>
|
|
184
|
+
and <https://github.com/DavisLaboratory/dcanr/blob/8958d61788937eef3b7e2b4118651cbd7af7469d/R/inference_methods.R#L199>.
|
|
185
|
+
|
|
186
|
+
Input:
|
|
187
|
+
infile: The input data file. The rows are samples and the columns are
|
|
188
|
+
features. It must be tab-delimited.
|
|
189
|
+
```
|
|
190
|
+
Sample F1 F2 F3 ... Fn
|
|
191
|
+
S1 1.2 3.4 5.6 7.8
|
|
192
|
+
S2 2.3 4.5 6.7 8.9
|
|
193
|
+
...
|
|
194
|
+
Sm 5.6 7.8 9.0 1.2
|
|
195
|
+
```
|
|
196
|
+
groupfile: The group file. The rows are the samples and the columns
|
|
197
|
+
are the groupings. It must be tab-delimited.
|
|
198
|
+
```
|
|
199
|
+
Sample G1 G2 G3 ... Gk
|
|
200
|
+
S1 0 1 0 0
|
|
201
|
+
S2 2 1 0 NA # exclude this sample
|
|
202
|
+
...
|
|
203
|
+
Sm 1 0 0 0
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
Output:
|
|
207
|
+
outfile: The output file. It is a tab-delimited file with the first
|
|
208
|
+
column as the feature pair and the second column as the p-value.
|
|
209
|
+
```
|
|
210
|
+
Group Feature1 Feature2 Pval Padj
|
|
211
|
+
G1 F1 F2 0.123 0.123
|
|
212
|
+
G1 F1 F3 0.123 0.123
|
|
213
|
+
...
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Envs:
|
|
217
|
+
method (choice): The method used to calculate the differential
|
|
218
|
+
co-expression.
|
|
219
|
+
- pearson: Pearson correlation.
|
|
220
|
+
- spearman: Spearman correlation.
|
|
221
|
+
beta: The beta value for the differential co-expression analysis.
|
|
222
|
+
padj (choice): The method for p-value adjustment.
|
|
223
|
+
- none: No p-value adjustment (no Padj column in outfile).
|
|
224
|
+
- holm: Holm-Bonferroni method.
|
|
225
|
+
- hochberg: Hochberg method.
|
|
226
|
+
- hommel: Hommel method.
|
|
227
|
+
- bonferroni: Bonferroni method.
|
|
228
|
+
- BH: Benjamini-Hochberg method.
|
|
229
|
+
- BY: Benjamini-Yekutieli method.
|
|
230
|
+
- fdr: FDR correction method.
|
|
231
|
+
perm_batch (type=int): The number of permutations to run in each batch
|
|
232
|
+
seed (type=int): The seed for random number generation
|
|
233
|
+
ncores (type=int): The number of cores to use for parallelization
|
|
234
|
+
transpose_input (flag): Whether to transpose the input file.
|
|
235
|
+
transpose_group (flag): Whether to transpose the group file.
|
|
236
|
+
""" # noqa: E501
|
|
237
|
+
input = "infile:file, groupfile:file"
|
|
238
|
+
output = "outfile:file:{{in.infile | stem}}.diffcoexpr.txt"
|
|
239
|
+
lang = config.lang.rscript
|
|
240
|
+
envs = {
|
|
241
|
+
"method": "pearson",
|
|
242
|
+
"beta": 6,
|
|
243
|
+
"padj": "none",
|
|
244
|
+
"perm_batch": 20,
|
|
245
|
+
"seed": 8525,
|
|
246
|
+
"ncores": config.misc.ncores,
|
|
247
|
+
"transpose_input": False,
|
|
248
|
+
"transpose_group": False,
|
|
249
|
+
}
|
|
250
|
+
script = "file://../scripts/stats/DiffCoexpr.R"
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class MetaPvalue(Proc):
|
|
254
|
+
"""Calulation of meta p-values.
|
|
255
|
+
|
|
256
|
+
If there is only one input file, only the p-value adjustment will be performed.
|
|
257
|
+
|
|
258
|
+
Input:
|
|
259
|
+
infiles: The input files. Each file is a tab-delimited file with multiple
|
|
260
|
+
columns. There should be ID column(s) to match the rows in other files and
|
|
261
|
+
p-value column(s) to be combined. The records will be full-joined by ID.
|
|
262
|
+
When only one file is provided, only the pvalue adjustment will be
|
|
263
|
+
performed when `envs.padj` is not `none`, otherwise the input file will
|
|
264
|
+
be copied to `out.outfile`.
|
|
265
|
+
|
|
266
|
+
Output:
|
|
267
|
+
outfile: The output file. It is a tab-delimited file with the first column as
|
|
268
|
+
the ID and the second column as the combined p-value.
|
|
269
|
+
```
|
|
270
|
+
ID ID1 ... Pval Padj
|
|
271
|
+
a x ... 0.123 0.123
|
|
272
|
+
b y ... 0.123 0.123
|
|
273
|
+
...
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
Envs:
|
|
277
|
+
id_cols: The column names used in all `in.infiles` as ID columns. Multiple
|
|
278
|
+
columns can be specified by comma-seperated values. For example, `ID1,ID2`.
|
|
279
|
+
If `id_expr` is specified, this should be a single column name for the new
|
|
280
|
+
ID column in each `in.infiles` and the final `out.outfile`.
|
|
281
|
+
id_exprs: The R expressions for each `in.infiles` to get ID column(s).
|
|
282
|
+
pval_cols: The column names used in all `in.infiles` as p-value columns.
|
|
283
|
+
Different columns can be specified by comma-seperated values for each
|
|
284
|
+
`in.infiles`. For example, `Pval1,Pval2`.
|
|
285
|
+
method (choice): The method used to calculate the meta-pvalue.
|
|
286
|
+
- fisher: Fisher's method.
|
|
287
|
+
- sumlog: Sum of logarithms (same as Fisher's method)
|
|
288
|
+
- logitp: Logit method.
|
|
289
|
+
- sumz: Sum of z method (Stouffer's method).
|
|
290
|
+
- meanz: Mean of z method.
|
|
291
|
+
- meanp: Mean of p method.
|
|
292
|
+
- invt: Inverse t method.
|
|
293
|
+
- sump: Sum of p method (Edgington's method).
|
|
294
|
+
- votep: Vote counting method.
|
|
295
|
+
- wilkinsonp: Wilkinson's method.
|
|
296
|
+
- invchisq: Inverse chi-square method.
|
|
297
|
+
na: The method to handle NA values. -1 to skip the record. Otherwise NA
|
|
298
|
+
will be replaced by the given value.
|
|
299
|
+
padj (choice): The method for p-value adjustment.
|
|
300
|
+
- none: No p-value adjustment (no Padj column in outfile).
|
|
301
|
+
- holm: Holm-Bonferroni method.
|
|
302
|
+
- hochberg: Hochberg method.
|
|
303
|
+
- hommel: Hommel method.
|
|
304
|
+
- bonferroni: Bonferroni method.
|
|
305
|
+
- BH: Benjamini-Hochberg method.
|
|
306
|
+
- BY: Benjamini-Yekutieli method.
|
|
307
|
+
- fdr: FDR correction method.
|
|
308
|
+
"""
|
|
309
|
+
input = "infiles:files"
|
|
310
|
+
output = "outfile:file:{{in.infiles | first | stem}}.metapval.txt"
|
|
311
|
+
lang = config.lang.rscript
|
|
312
|
+
envs = {
|
|
313
|
+
"id_cols": None,
|
|
314
|
+
"id_exprs": None,
|
|
315
|
+
"pval_cols": None,
|
|
316
|
+
"method": "fisher",
|
|
317
|
+
"na": -1,
|
|
318
|
+
"padj": "none",
|
|
319
|
+
}
|
|
320
|
+
script = "file://../scripts/stats/MetaPvalue.R"
|