biopipen 0.23.7__tar.gz → 0.24.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- {biopipen-0.23.7 → biopipen-0.24.0}/PKG-INFO +8 -7
- biopipen-0.24.0/biopipen/__init__.py +1 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/core/proc.py +7 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/cellranger.py +2 -2
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/scrna.py +15 -20
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/tcr.py +8 -6
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/ScFGSEA.R +6 -0
- biopipen-0.24.0/biopipen/scripts/scrna/SeuratClustering.R +174 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratPreparing.R +21 -10
- biopipen-0.24.0/biopipen/scripts/scrna/SeuratSubClustering.R +169 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +5 -5
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-basic.R +2 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-clonality.R +2 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-diversity.R +70 -49
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-geneusage.R +2 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-kmer.R +2 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-overlap.R +2 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-spectratyping.R +2 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-tracking.R +2 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch-vjjunc.R +2 -3
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch.R +1 -1
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/ImmunarchLoading.R +2 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TCRClustering.R +6 -2
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA.R +3 -1
- biopipen-0.24.0/biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen-0.24.0/biopipen/utils/caching.R +44 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/pyproject.toml +9 -8
- {biopipen-0.23.7 → biopipen-0.24.0}/setup.py +8 -7
- biopipen-0.23.7/biopipen/__init__.py +0 -1
- biopipen-0.23.7/biopipen/scripts/scrna/SeuratClustering.R +0 -157
- biopipen-0.23.7/biopipen/scripts/scrna/SeuratSubClustering.R +0 -185
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/core/__init__.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/core/config.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/core/config.toml +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/core/defaults.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/core/filters.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/core/testing.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/__init__.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/bam.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/bcftools.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/bed.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/cnv.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/cnvkit.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/cnvkit_pipeline.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/delim.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/gene.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/gsea.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/misc.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/plot.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/rnaseq.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/scrna_metabolic_landscape.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/tcgamaf.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/vcf.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/ns/web.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/bam/CNAClinic.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/bam/CNVpytor.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/bam/ControlFREEC.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/cellranger/CellRangerCount.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/cellranger/CellRangerVdj.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/cnv/AneuploidyScore.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/cnv/AneuploidyScoreSummary.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/cnv/TMADScoreSummary.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/cnvkit/CNVkitDiagram.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/cnvkit/CNVkitHeatmap.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/cnvkit/CNVkitScatter.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/delim/SampleInfo.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/gsea/FGSEA.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/gsea/GSEA.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/CellsDistribution.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/DimPlots.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/MarkersFinder.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/MetaMarkers.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/RadarPlots.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/ScFGSEA.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/SeuratClusterStats.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/SeuratPreparing.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna/TopExpressingGenes.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/tcr/CDR3AAPhyschem.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/tcr/CloneResidency.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/tcr/Immunarch.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/tcr/SampleDiversity.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/tcr/TCRClusterStats.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/tcr/TESSA.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/tcr/VJUsage.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/utils/gsea.liq +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/utils/misc.liq +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/vcf/TruvariBenchSummary.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/reports/vcf/TruvariConsistency.svelte +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bam/BamMerge.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bam/BamSplitChroms.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bam/CNAClinic.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bam/CNVpytor.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bam/ControlFREEC.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bcftools/BcftoolsFilter.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bcftools/BcftoolsSort.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bed/Bed2Vcf.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bed/BedConsensus.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bed/BedLiftOver.sh +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/bed/BedtoolsMerge.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cellranger/CellRangerCount.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cellranger/CellRangerVdj.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnv/AneuploidyScore.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnv/AneuploidyScoreSummary.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnv/TMADScore.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnv/TMADScoreSummary.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitAccess.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitAutobin.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitBatch.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitCall.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitCoverage.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitDiagram.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitFix.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitGuessBaits.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitHeatmap.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitReference.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitScatter.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/CNVkitSegment.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/cnvkit/guess_baits.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/delim/RowsBinder.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/delim/SampleInfo.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/gene/GeneNameConversion.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/gsea/Enrichr.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/gsea/FGSEA.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/gsea/GSEA.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/gsea/PreRank.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/misc/Config2File.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/misc/Str2File.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/plot/Heatmap.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/plot/VennDiagram.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/rnaseq/UnitConversion.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/CellTypeAnnotation-direct.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/CellTypeAnnotation.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/CellsDistribution.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/DimPlots.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/ExprImpution-alra.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/ExprImpution-scimpute.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/ExprImpution.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/MarkersFinder.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/MetaMarkers.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/ModuleScoreCalculator.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/RadarPlots.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SCImpute.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratClusterStats-features.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratClusterStats-stats.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratClusterStats.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratFilter.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratLoading.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratMap2Ref.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratMetadataMutater.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratSplit.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratSubset.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/SeuratTo10X.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/TopExpressingGenes.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/Write10X.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna/sctype.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcgamaf/Maf2Vcf.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcgamaf/MafAddChr.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcgamaf/maf2vcf.pl +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Attach2Seurat.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/CDR3AAPhyschem.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/CloneResidency.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/CloneSizeQQPlot.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/GIANA/GIANA.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/GIANA/GIANA4.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/GIANA/Imgt_Human_TRBV.fasta +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/GIANA/query.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/Immunarch2VDJtools.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/ImmunarchFilter.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/ImmunarchSplitIdents.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/SampleDiversity.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TCRClusterStats.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/MCMC_control.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/TrainedEncoder.h5 +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/fixed_b.csv +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/initialization.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/post_analysis.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/real_data.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/update.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/TESSA_source/utility.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/VJUsage.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/tcr/vdjtools-patch.sh +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/TruvariBench.sh +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/TruvariBenchSummary.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/TruvariConsistency.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/Vcf2Bed.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfAnno.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfDownSample.sh +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfFilter.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfFix.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfFix_utils.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfIndex.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfIntersect.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfLiftOver.sh +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/vcf/VcfSplitSamples.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/web/Download.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/scripts/web/DownloadList.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/__init__.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/common_docstrs.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/gene.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/gene.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/gsea.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/io.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/misc.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/misc.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/mutate_helpers.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/plot.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/reference.py +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/rnaseq.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/single_cell.R +0 -0
- {biopipen-0.23.7 → biopipen-0.24.0}/biopipen/utils/vcf.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.24.0
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
@@ -14,9 +14,10 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
16
|
Provides-Extra: runinfo
|
|
17
|
-
Requires-Dist: datar[pandas] (>=0.15.
|
|
18
|
-
Requires-Dist: pipen-board[report] (>=0.
|
|
19
|
-
Requires-Dist: pipen-cli-run (>=0.
|
|
20
|
-
Requires-Dist: pipen-filters (>=0.
|
|
21
|
-
Requires-Dist: pipen-
|
|
22
|
-
Requires-Dist: pipen-
|
|
17
|
+
Requires-Dist: datar[pandas] (>=0.15.3,<0.16.0)
|
|
18
|
+
Requires-Dist: pipen-board[report] (>=0.14,<0.15)
|
|
19
|
+
Requires-Dist: pipen-cli-run (>=0.12,<0.13)
|
|
20
|
+
Requires-Dist: pipen-filters (>=0.11,<0.12)
|
|
21
|
+
Requires-Dist: pipen-poplog (>=0.0.2,<0.0.3)
|
|
22
|
+
Requires-Dist: pipen-runinfo (>=0.5,<0.6) ; extra == "runinfo"
|
|
23
|
+
Requires-Dist: pipen-verbose (>=0.10,<0.11)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.24.0"
|
|
@@ -25,3 +25,10 @@ class Proc(PipenProc):
|
|
|
25
25
|
"filters": {**FILTERS, **filtermanager.filters},
|
|
26
26
|
"search_paths": SEARCH_PATHS + [str(REPORT_DIR)],
|
|
27
27
|
}
|
|
28
|
+
|
|
29
|
+
plugin_opts = {
|
|
30
|
+
"poplog_pattern": (
|
|
31
|
+
r"^(?P<level>INFO|WARN|WARNING|CRITICAL|ERROR|DEBUG?)\s*"
|
|
32
|
+
r"\[\d+-\d+-\d+ \d+:\d+:\d+\] (?P<message>.*)$"
|
|
33
|
+
)
|
|
34
|
+
}
|
|
@@ -35,7 +35,7 @@ class CellRangerCount(Proc):
|
|
|
35
35
|
{%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
|
|
36
36
|
{%- endif -%}
|
|
37
37
|
{%- set sample = commonprefix(*fastqs) |
|
|
38
|
-
regex_replace: "_L\\d+_
|
|
38
|
+
regex_replace: "_L\\d+_?$", "" |
|
|
39
39
|
regex_replace: "_S\\d+$", "" -%}
|
|
40
40
|
{{- sample -}}
|
|
41
41
|
"""
|
|
@@ -84,7 +84,7 @@ class CellRangerVdj(Proc):
|
|
|
84
84
|
{%- set fastqs = fastqs[0] | glob: "*.fastq.gz" -%}
|
|
85
85
|
{%- endif -%}
|
|
86
86
|
{%- set sample = commonprefix(*fastqs) |
|
|
87
|
-
regex_replace: "_L\\d+_
|
|
87
|
+
regex_replace: "_L\\d+_?$", "" |
|
|
88
88
|
regex_replace: "_S\\d+$", "" -%}
|
|
89
89
|
{{- sample -}}
|
|
90
90
|
"""
|
|
@@ -278,18 +278,14 @@ class SeuratClustering(Proc):
|
|
|
278
278
|
The results will be saved in `seurat_clusters_<resolution>`.
|
|
279
279
|
The final resolution will be used to define the clusters at `seurat_clusters`.
|
|
280
280
|
- <more>: See <https://satijalab.org/seurat/reference/findclusters>
|
|
281
|
-
cache (type=auto): Whether to cache the
|
|
281
|
+
cache (type=auto): Whether to cache the information at different steps.
|
|
282
282
|
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
283
|
-
The cached seurat object will be saved as `<signature>.
|
|
283
|
+
The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
|
|
284
284
|
the input and envs of the process.
|
|
285
|
-
See
|
|
286
|
-
|
|
287
|
-
* <https://github.com/satijalab/seurat/issues/5358> and
|
|
288
|
-
* <https://github.com/satijalab/seurat/issues/6748> for more details.
|
|
285
|
+
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
286
|
+
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
289
287
|
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
290
|
-
`<signature>.
|
|
291
|
-
If `True`, the cache directory is `.pipen/<Pipeline>/SeuratClustering/0/output/`
|
|
292
|
-
You can also specify customized directory to save the cached seurat object by setting `cache` to the directory path.
|
|
288
|
+
`<signature>.RDS` in the cache directory.
|
|
293
289
|
|
|
294
290
|
Requires:
|
|
295
291
|
r-seurat:
|
|
@@ -309,7 +305,7 @@ class SeuratClustering(Proc):
|
|
|
309
305
|
"RunUMAP": {"dims": 30},
|
|
310
306
|
"FindNeighbors": {},
|
|
311
307
|
"FindClusters": {"resolution": 0.8},
|
|
312
|
-
"cache":
|
|
308
|
+
"cache": config.path.tmpdir,
|
|
313
309
|
}
|
|
314
310
|
script = "file://../scripts/scrna/SeuratClustering.R"
|
|
315
311
|
|
|
@@ -361,18 +357,14 @@ class SeuratSubClustering(Proc):
|
|
|
361
357
|
The results will be saved in `<casename>_<resolution>`.
|
|
362
358
|
The final resolution will be used to define the clusters at `<casename>`.
|
|
363
359
|
- <more>: See <https://satijalab.org/seurat/reference/findclusters>
|
|
364
|
-
cache (type=auto): Whether to cache the
|
|
360
|
+
cache (type=auto): Whether to cache the information at different steps.
|
|
365
361
|
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
366
|
-
The cached seurat object will be saved as `<signature>.
|
|
362
|
+
The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
|
|
367
363
|
the input and envs of the process.
|
|
368
|
-
See
|
|
369
|
-
|
|
370
|
-
* <https://github.com/satijalab/seurat/issues/5358> and
|
|
371
|
-
* <https://github.com/satijalab/seurat/issues/6748> for more details.
|
|
364
|
+
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
365
|
+
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
372
366
|
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
373
|
-
`<signature>.
|
|
374
|
-
If `True`, the cache directory is `.pipen/<Pipeline>/SeuratClustering/0/output/`
|
|
375
|
-
You can also specify customized directory to save the cached seurat object by setting `cache` to the directory path.
|
|
367
|
+
`<signature>.RDS` in the cache directory.
|
|
376
368
|
cases (type=json): The cases to perform subclustering.
|
|
377
369
|
Keys are the names of the cases and values are the dicts inherited from `envs` except `mutaters` and `cache`.
|
|
378
370
|
If empty, a case with name `subcluster` will be created with default parameters.
|
|
@@ -387,7 +379,7 @@ class SeuratSubClustering(Proc):
|
|
|
387
379
|
"RunUMAP": {"dims": 30},
|
|
388
380
|
"FindNeighbors": {},
|
|
389
381
|
"FindClusters": {"resolution": 0.8},
|
|
390
|
-
"cache":
|
|
382
|
+
"cache": config.path.tmpdir,
|
|
391
383
|
"cases": {"subcluster": {}},
|
|
392
384
|
}
|
|
393
385
|
script = "file://../scripts/scrna/SeuratSubClustering.R"
|
|
@@ -1463,6 +1455,7 @@ class ScFGSEA(Proc):
|
|
|
1463
1455
|
ident-1: The first group of cells to compare
|
|
1464
1456
|
ident-2: The second group of cells to compare, if not provided, the rest of the cells that are not `NA`s in `group-by` column are used for `ident-2`.
|
|
1465
1457
|
each: The column name in metadata to separate the cells into different subsets to do the analysis.
|
|
1458
|
+
subset: An expression to subset the cells.
|
|
1466
1459
|
section: The section name for the report. Worked only when `each` is not specified. Otherwise, the section name will be constructed from `each` and its value.
|
|
1467
1460
|
This allows different cases to be put into the same section in the report.
|
|
1468
1461
|
gmtfile: The pathways in GMT format, with the gene names/ids in the same format as the seurat object.
|
|
@@ -1513,6 +1506,7 @@ class ScFGSEA(Proc):
|
|
|
1513
1506
|
"ident-1": None,
|
|
1514
1507
|
"ident-2": None,
|
|
1515
1508
|
"each": None,
|
|
1509
|
+
"subset": None,
|
|
1516
1510
|
"section": "DEFAULT",
|
|
1517
1511
|
"gmtfile": "",
|
|
1518
1512
|
"method": "s2n",
|
|
@@ -2000,4 +1994,5 @@ class MetaMarkers(Proc):
|
|
|
2000
1994
|
plugin_opts = {
|
|
2001
1995
|
"report": "file://../reports/scrna/MetaMarkers.svelte",
|
|
2002
1996
|
"report_paging": 8,
|
|
1997
|
+
"poplog_max": 15,
|
|
2003
1998
|
}
|
|
@@ -563,12 +563,13 @@ class Immunarch(Proc):
|
|
|
563
563
|
A Gini coefficient of one (or 100 percents) expresses maximal inequality among values (for example where only one person has all the income).
|
|
564
564
|
- d50: The D50 index.
|
|
565
565
|
It is the number of types that are needed to cover 50%% of the total abundance.
|
|
566
|
-
- dxx: The Dxx index.
|
|
567
|
-
It is the number of types that are needed to cover xx%% of the total abundance.
|
|
568
|
-
The percentage should be specified in the `args` argument using `perc` key.
|
|
569
566
|
- raref: Species richness from the results of sampling through extrapolation.
|
|
570
567
|
- by: The variables (column names) to group samples.
|
|
571
568
|
Multiple columns should be separated by `,`.
|
|
569
|
+
- plot_type (choice): The type of the plot, works when `by` is specified.
|
|
570
|
+
Not working for `raref`.
|
|
571
|
+
- box: Boxplot
|
|
572
|
+
- bar: Barplot with error bars
|
|
572
573
|
- subset: Subset the data before calculating the clonotype volumes.
|
|
573
574
|
The whole data will be expanded to cell level, and then subsetted.
|
|
574
575
|
Clone sizes will be re-calculated based on the subsetted data.
|
|
@@ -789,9 +790,9 @@ class Immunarch(Proc):
|
|
|
789
790
|
},
|
|
790
791
|
# Diversity
|
|
791
792
|
"divs": {
|
|
792
|
-
"filter": None,
|
|
793
793
|
"method": "gini",
|
|
794
794
|
"by": None,
|
|
795
|
+
"plot_type": "bar",
|
|
795
796
|
"args": {},
|
|
796
797
|
"order": [],
|
|
797
798
|
"test": {
|
|
@@ -805,8 +806,8 @@ class Immunarch(Proc):
|
|
|
805
806
|
"align_y": False,
|
|
806
807
|
"log": False,
|
|
807
808
|
"devpars": {
|
|
808
|
-
"width":
|
|
809
|
-
"height":
|
|
809
|
+
"width": 800,
|
|
810
|
+
"height": 800,
|
|
810
811
|
"res": 100,
|
|
811
812
|
},
|
|
812
813
|
"subset": None,
|
|
@@ -851,6 +852,7 @@ class Immunarch(Proc):
|
|
|
851
852
|
plugin_opts = {
|
|
852
853
|
"report": "file://../reports/tcr/Immunarch.svelte",
|
|
853
854
|
"report_paging": 3,
|
|
855
|
+
"poplog_max": 999,
|
|
854
856
|
}
|
|
855
857
|
|
|
856
858
|
|
|
@@ -14,6 +14,7 @@ group.by <- {{envs["group-by"] | r}} # nolint
|
|
|
14
14
|
ident.1 <- {{envs["ident-1"] | r}} # nolint
|
|
15
15
|
ident.2 <- {{envs["ident-2"] | r}} # nolint
|
|
16
16
|
each <- {{envs.each | r}} # nolint
|
|
17
|
+
subset <- {{envs.subset | r}} # nolint
|
|
17
18
|
section <- {{envs.section | r}} # nolint
|
|
18
19
|
gmtfile <- {{envs.gmtfile | r}} # nolint
|
|
19
20
|
method <- {{envs.method | r}} # nolint
|
|
@@ -43,6 +44,7 @@ expand_cases <- function() {
|
|
|
43
44
|
ident.1 = ident.1,
|
|
44
45
|
ident.2 = ident.2,
|
|
45
46
|
each = each,
|
|
47
|
+
subset = subset,
|
|
46
48
|
section = section,
|
|
47
49
|
gmtfile = gmtfile,
|
|
48
50
|
method = method,
|
|
@@ -63,6 +65,7 @@ expand_cases <- function() {
|
|
|
63
65
|
ident.1 = ident.1,
|
|
64
66
|
ident.2 = ident.2,
|
|
65
67
|
each = each,
|
|
68
|
+
subset = subset,
|
|
66
69
|
section = section,
|
|
67
70
|
gmtfile = gmtfile,
|
|
68
71
|
method = method,
|
|
@@ -136,6 +139,9 @@ do_case <- function(name, case) {
|
|
|
136
139
|
# prepare expression matrix
|
|
137
140
|
log_info(" Preparing expression matrix...")
|
|
138
141
|
sobj <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
|
|
142
|
+
if (!is.null(case$subset)) {
|
|
143
|
+
sobj <- sobj %>% filter(!!!parse_exprs(case$subset))
|
|
144
|
+
}
|
|
139
145
|
if (!is.null(case$ident.2)) {
|
|
140
146
|
sobj <- sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2))
|
|
141
147
|
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/caching.R")
|
|
3
|
+
|
|
4
|
+
library(Seurat)
|
|
5
|
+
library(future)
|
|
6
|
+
library(tidyr)
|
|
7
|
+
library(dplyr)
|
|
8
|
+
library(digest)
|
|
9
|
+
|
|
10
|
+
set.seed(8525)
|
|
11
|
+
|
|
12
|
+
srtfile <- {{in.srtobj | quote}}
|
|
13
|
+
rdsfile <- {{out.rdsfile | quote}}
|
|
14
|
+
joboutdir <- {{job.outdir | quote}}
|
|
15
|
+
envs <- {{envs | r: todot="-"}}
|
|
16
|
+
|
|
17
|
+
if (length(envs$ScaleData) > 0 && length(envs$SCTransform) > 0) {
|
|
18
|
+
stop("Cannot specify both ScaleData and SCTransform")
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
options(str = strOptions(vec.len = 5, digits.d = 5))
|
|
22
|
+
options(future.globals.maxSize = 80000 * 1024^2)
|
|
23
|
+
plan(strategy = "multicore", workers = envs$ncores)
|
|
24
|
+
|
|
25
|
+
.expand_dims <- function(args, name = "dims") {
|
|
26
|
+
# Expand dims from 30 to 1:30
|
|
27
|
+
if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
28
|
+
args[[name]] <- 1:args[[name]]
|
|
29
|
+
}
|
|
30
|
+
args
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
envs$RunUMAP <- .expand_dims(envs$RunUMAP)
|
|
34
|
+
envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
|
|
35
|
+
|
|
36
|
+
log_info("Reading Seurat object ...")
|
|
37
|
+
sobj <- readRDS(srtfile)
|
|
38
|
+
|
|
39
|
+
if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
|
|
40
|
+
if (length(envs$cache) > 1) {
|
|
41
|
+
log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
|
|
42
|
+
envs$cache <- envs$cache[1]
|
|
43
|
+
}
|
|
44
|
+
sobj_sig <- capture.output(str(sobj))
|
|
45
|
+
dig_sig <- digest::digest(sobj_sig, algo = "md5")
|
|
46
|
+
dig_sig <- substr(dig_sig, 1, 8)
|
|
47
|
+
cache_dir <- NULL
|
|
48
|
+
if (is.character(envs$cache)) {
|
|
49
|
+
cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
|
|
50
|
+
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
|
|
51
|
+
writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (length(envs$ScaleData) > 0) {
|
|
55
|
+
if (DefaultAssay(sobj) == "SCT") {
|
|
56
|
+
stop("SCT assay detected, but ScaleData is specified. Use SCTransform instead.")
|
|
57
|
+
}
|
|
58
|
+
cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
|
|
59
|
+
if (is.null(cached$data)) {
|
|
60
|
+
log_info("Running ScaleData ...")
|
|
61
|
+
envs$ScaleData$object <- sobj
|
|
62
|
+
sobj <- do_call(ScaleData, envs$ScaleData)
|
|
63
|
+
cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
|
|
64
|
+
save_to_cache(cached, "ScaleData", cache_dir)
|
|
65
|
+
} else {
|
|
66
|
+
log_info("Loading cached ScaleData ...")
|
|
67
|
+
sobj@assays$RNA <- cached$data$assay
|
|
68
|
+
sobj@commands <- cached$data$commands
|
|
69
|
+
DefaultAssay(sobj) <- "RNA"
|
|
70
|
+
}
|
|
71
|
+
} else if (length(envs$SCTransform) > 0) {
|
|
72
|
+
if (DefaultAssay(sobj) != "SCT") {
|
|
73
|
+
stop("SCT assay not detected, but SCTransform is specified. Use ScaleData instead.")
|
|
74
|
+
}
|
|
75
|
+
cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
|
|
76
|
+
asssay <- envs$SCTransform$new.assay.name %||% "SCT"
|
|
77
|
+
if (is.null(cached$data)) {
|
|
78
|
+
log_info("Running SCTransform ...")
|
|
79
|
+
envs$SCTransform$object <- sobj
|
|
80
|
+
sobj <- do_call(SCTransform, envs$SCTransform)
|
|
81
|
+
cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
|
|
82
|
+
save_to_cache(cached, "SCTransform", cache_dir)
|
|
83
|
+
} else {
|
|
84
|
+
log_info("Loading cached SCTransform ...")
|
|
85
|
+
sobj@assays[[assay]] <- cached$data$assay
|
|
86
|
+
sobj@commands <- cached$data$commands
|
|
87
|
+
DefaultAssay(sobj) <- assay
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
cached <- get_cached(envs$RunUMAP, "RunUMAP", cache_dir)
|
|
92
|
+
reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
|
|
93
|
+
if (is.null(cached$data)) {
|
|
94
|
+
log_info("Running RunUMAP ...")
|
|
95
|
+
umap_args <- list_setdefault(
|
|
96
|
+
envs$RunUMAP,
|
|
97
|
+
object = sobj,
|
|
98
|
+
dims = 1:30,
|
|
99
|
+
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
100
|
+
)
|
|
101
|
+
ncells <- ncol(sobj)
|
|
102
|
+
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
103
|
+
umap_method <- envs$RunUMAP$umap.method %||% "uwot"
|
|
104
|
+
if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
|
|
105
|
+
# https://github.com/satijalab/seurat/issues/4312
|
|
106
|
+
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
107
|
+
}
|
|
108
|
+
sobj <- do_call(RunUMAP, umap_args)
|
|
109
|
+
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
110
|
+
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
111
|
+
} else {
|
|
112
|
+
log_info("Loading cached RunUMAP ...")
|
|
113
|
+
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
114
|
+
sobj@commands <- cached$data$commands
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
cached <- get_cached(envs$FindNeighbors, "FindNeighbors", cache_dir)
|
|
118
|
+
if (is.null(cached$data)) {
|
|
119
|
+
log_info("Running FindNeighbors ...")
|
|
120
|
+
envs$FindNeighbors$object <- sobj
|
|
121
|
+
envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
122
|
+
sobj <- do_call(FindNeighbors, envs$FindNeighbors)
|
|
123
|
+
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
124
|
+
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
125
|
+
} else {
|
|
126
|
+
log_info("Loading cached FindNeighbors ...")
|
|
127
|
+
sobj@graphs <- cached$data$graphs
|
|
128
|
+
sobj@commands <- cached$data$commands
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
|
|
132
|
+
resolution <- envs$FindClusters$resolution %||% 0.8
|
|
133
|
+
if (is.character(resolution)) {
|
|
134
|
+
if (grepl(",", resolution)) {
|
|
135
|
+
resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
|
|
136
|
+
} else {
|
|
137
|
+
resolution <- as.numeric(resolution)
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
for (res in resolution) {
|
|
142
|
+
envs$FindClusters$resolution <- res
|
|
143
|
+
cached <- get_cached(envs$FindClusters, paste0("FindClusters_", res), cache_dir)
|
|
144
|
+
res_key <- paste0("seurat_clusters_", res)
|
|
145
|
+
if (is.null(cached$data)) {
|
|
146
|
+
log_info("Running FindClusters at resolution: {res} ...")
|
|
147
|
+
envs$FindClusters$object <- sobj
|
|
148
|
+
sobj <- do_call(FindClusters, envs$FindClusters)
|
|
149
|
+
levels(sobj$seurat_clusters) <- paste0("c", as.numeric(levels(sobj$seurat_clusters)) + 1)
|
|
150
|
+
sobj[[res_key]] <- sobj$seurat_clusters
|
|
151
|
+
Idents(sobj) <- "seurat_clusters"
|
|
152
|
+
cached$data <- list(clusters = sobj$seurat_clusters, commands = sobj@commands)
|
|
153
|
+
save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
|
|
154
|
+
} else {
|
|
155
|
+
log_info("Loading cached FindClusters at resolution: {res} ...")
|
|
156
|
+
sobj@commands <- cached$data$commands
|
|
157
|
+
sobj[[res_key]] <- cached$data$clusters
|
|
158
|
+
sobj$seurat_clusters <- cached$data$clusters
|
|
159
|
+
Idents(sobj) <- "seurat_clusters"
|
|
160
|
+
}
|
|
161
|
+
ident_table <- table(Idents(sobj))
|
|
162
|
+
log_info("- Found {length(ident_table)} clusters")
|
|
163
|
+
print(ident_table)
|
|
164
|
+
cat("\n")
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (DefaultAssay(sobj) == "SCT") {
|
|
168
|
+
# https://github.com/satijalab/seurat/issues/6968
|
|
169
|
+
log_info("Running PrepSCTFindMarkers ...")
|
|
170
|
+
sobj <- PrepSCTFindMarkers(sobj)
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
log_info("Saving results ...")
|
|
174
|
+
saveRDS(sobj, file = rdsfile)
|
|
@@ -99,8 +99,8 @@ load_sample = function(sample) {
|
|
|
99
99
|
}
|
|
100
100
|
obj <- CreateSeuratObject(exprs, project=sample)
|
|
101
101
|
# filter the cells that don't have any gene expressions
|
|
102
|
-
cell_exprs = colSums(obj@assays$RNA)
|
|
103
|
-
obj = subset(obj, cells = names(cell_exprs[cell_exprs > 0]))
|
|
102
|
+
# cell_exprs = colSums(obj@assays$RNA)
|
|
103
|
+
# obj = subset(obj, cells = names(cell_exprs[cell_exprs > 0]))
|
|
104
104
|
obj = RenameCells(obj, add.cell.id = sample)
|
|
105
105
|
# Attach meta data
|
|
106
106
|
for (mname in names(mdata)) {
|
|
@@ -128,13 +128,7 @@ log_info("Reading samples individually ...")
|
|
|
128
128
|
obj_list = lapply(samples, load_sample)
|
|
129
129
|
|
|
130
130
|
log_info("Merging samples ...")
|
|
131
|
-
|
|
132
|
-
y = c()
|
|
133
|
-
for (i in 2:length(obj_list)) y = c(y, obj_list[[i]])
|
|
134
|
-
sobj = merge(obj_list[[1]], y)
|
|
135
|
-
} else {
|
|
136
|
-
sobj = obj_list[[1]]
|
|
137
|
-
}
|
|
131
|
+
sobj = Reduce(merge, obj_list)
|
|
138
132
|
|
|
139
133
|
log_info("Adding metadata for QC ...")
|
|
140
134
|
sobj$percent.mt = PercentageFeatureSet(sobj, pattern = "^MT-")
|
|
@@ -297,28 +291,41 @@ add_report(
|
|
|
297
291
|
h1 = "Filters and QC"
|
|
298
292
|
)
|
|
299
293
|
|
|
294
|
+
.formatArgs <- function(args) {
|
|
295
|
+
paste(capture.output(str(args)), collapse = ", ")
|
|
296
|
+
}
|
|
297
|
+
|
|
300
298
|
log_info("Performing transformation/scaling ...")
|
|
301
299
|
# Not joined yet
|
|
302
300
|
# sobj[["RNA"]] <- split(sobj[["RNA"]], f = sobj$Sample)
|
|
303
301
|
if (envs$use_sct) {
|
|
304
302
|
log_info("- Running SCTransform ...")
|
|
305
303
|
SCTransformArgs <- envs$SCTransform
|
|
304
|
+
# log to stdout but don't populate it to running log
|
|
305
|
+
print(" SCTransform: {.formatArgs(SCTransformArgs)}")
|
|
306
|
+
log_debug(" SCTransform: {.formatArgs(SCTransformArgs)}")
|
|
306
307
|
SCTransformArgs$object <- sobj
|
|
307
308
|
sobj <- do_call(SCTransform, SCTransformArgs)
|
|
308
309
|
# Default is to use the SCT assay
|
|
309
310
|
} else {
|
|
310
311
|
log_info("- Running NormalizeData ...")
|
|
311
312
|
NormalizeDataArgs <- envs$NormalizeData
|
|
313
|
+
print(" NormalizeData: {.formatArgs(NormalizeDataArgs)}")
|
|
314
|
+
log_debug(" NormalizeData: {.formatArgs(NormalizeDataArgs)}")
|
|
312
315
|
NormalizeDataArgs$object <- sobj
|
|
313
316
|
sobj <- do_call(NormalizeData, NormalizeDataArgs)
|
|
314
317
|
|
|
315
318
|
log_info("- Running FindVariableFeatures ...")
|
|
316
319
|
FindVariableFeaturesArgs <- envs$FindVariableFeatures
|
|
320
|
+
print(" FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
|
|
321
|
+
log_debug(" FindVariableFeatures: {.formatArgs(FindVariableFeaturesArgs)}")
|
|
317
322
|
FindVariableFeaturesArgs$object <- sobj
|
|
318
323
|
sobj <- do_call(FindVariableFeatures, FindVariableFeaturesArgs)
|
|
319
324
|
|
|
320
325
|
log_info("- Running ScaleData ...")
|
|
321
326
|
ScaleDataArgs <- envs$ScaleData
|
|
327
|
+
print(" ScaleData: {.formatArgs(ScaleDataArgs)}")
|
|
328
|
+
log_debug(" ScaleData: {.formatArgs(ScaleDataArgs)}")
|
|
322
329
|
ScaleDataArgs$object <- sobj
|
|
323
330
|
sobj <- do_call(ScaleData, ScaleDataArgs)
|
|
324
331
|
}
|
|
@@ -326,13 +333,14 @@ if (envs$use_sct) {
|
|
|
326
333
|
log_info("- Running RunPCA ...")
|
|
327
334
|
RunPCAArgs <- envs$RunPCA
|
|
328
335
|
RunPCAArgs$npcs <- if (is.null(RunPCAArgs$npcs)) { 50 } else { min(RunPCAArgs$npcs, ncol(sobj) - 1) }
|
|
336
|
+
print(" RunPCA: {.formatArgs(RunPCAArgs)}")
|
|
337
|
+
log_debug(" RunPCA: {.formatArgs(RunPCAArgs)}")
|
|
329
338
|
RunPCAArgs$object <- sobj
|
|
330
339
|
sobj <- do_call(RunPCA, RunPCAArgs)
|
|
331
340
|
|
|
332
341
|
if (!envs$no_integration) {
|
|
333
342
|
log_info("- Running IntegrateLayers ...")
|
|
334
343
|
IntegrateLayersArgs <- envs$IntegrateLayers
|
|
335
|
-
IntegrateLayersArgs$object <- sobj
|
|
336
344
|
method <- IntegrateLayersArgs$method
|
|
337
345
|
if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
|
|
338
346
|
log_info(" Using reference samples: {paste(IntegrateLayersArgs$reference, collapse = ', ')}")
|
|
@@ -359,6 +367,9 @@ if (!envs$no_integration) {
|
|
|
359
367
|
if (is.null(IntegrateLayersArgs$new.reduction)) {
|
|
360
368
|
IntegrateLayersArgs$new.reduction <- new_reductions[[method]]
|
|
361
369
|
}
|
|
370
|
+
print(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
|
|
371
|
+
log_debug(" IntegrateLayers: {.formatArgs(IntegrateLayersArgs)}")
|
|
372
|
+
IntegrateLayersArgs$object <- sobj
|
|
362
373
|
sobj <- do_call(IntegrateLayers, IntegrateLayersArgs)
|
|
363
374
|
# Save it for dimension reduction plots
|
|
364
375
|
sobj@misc$integrated_new_reduction <- IntegrateLayersArgs$new.reduction
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/caching.R")
|
|
3
|
+
|
|
4
|
+
library(Seurat)
|
|
5
|
+
library(future)
|
|
6
|
+
library(rlang)
|
|
7
|
+
library(tidyr)
|
|
8
|
+
library(dplyr)
|
|
9
|
+
library(tidyseurat)
|
|
10
|
+
library(digest)
|
|
11
|
+
|
|
12
|
+
set.seed(8525)
|
|
13
|
+
|
|
14
|
+
srtfile <- {{in.srtobj | quote}}
|
|
15
|
+
rdsfile <- {{out.rdsfile | quote}}
|
|
16
|
+
joboutdir <- {{job.outdir | quote}}
|
|
17
|
+
envs <- {{envs | r: todot = "-"}}
|
|
18
|
+
|
|
19
|
+
options(str = strOptions(vec.len = 5, digits.d = 5))
|
|
20
|
+
options(future.globals.maxSize = 80000 * 1024^2)
|
|
21
|
+
plan(strategy = "multicore", workers = envs$ncores)
|
|
22
|
+
|
|
23
|
+
.expand_dims <- function(args, name = "dims") {
|
|
24
|
+
# Expand dims from 30 to 1:30
|
|
25
|
+
if (!is.null(args) && is.numeric(args[[name]]) && length(args[[name]] == 1)) {
|
|
26
|
+
args[[name]] <- 1:args[[name]]
|
|
27
|
+
}
|
|
28
|
+
args
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
envs$RunUMAP <- .expand_dims(envs$RunUMAP)
|
|
32
|
+
envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
|
|
33
|
+
|
|
34
|
+
log_info("Reading Seurat object ...")
|
|
35
|
+
srtobj <- readRDS(srtfile)
|
|
36
|
+
|
|
37
|
+
if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
|
|
38
|
+
if (length(envs$cache) > 1) {
|
|
39
|
+
log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
|
|
40
|
+
envs$cache <- envs$cache[1]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (!is.null(envs$mutaters) && length(envs$mutaters) > 0) {
|
|
44
|
+
log_info("Mutating Seurat object ...")
|
|
45
|
+
srtobj@meta.data <- srtobj@meta.data %>%
|
|
46
|
+
mutate(!!!lapply(mutaters, parse_expr))
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (length(envs$cases) == 0) {
|
|
50
|
+
envs$cases <- list(subcluster = list())
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
for (key in names(envs$cases)) {
|
|
54
|
+
log_info("")
|
|
55
|
+
log_info("Running case '{key}' ...")
|
|
56
|
+
log_info("===========================================")
|
|
57
|
+
case <- envs$cases[[key]]
|
|
58
|
+
case$RunUMAP <- .expand_dims(case$RunUMAP)
|
|
59
|
+
case$FindNeighbors <- .expand_dims(case$FindNeighbors)
|
|
60
|
+
|
|
61
|
+
case <- list_update(
|
|
62
|
+
list(
|
|
63
|
+
subset = envs$subset,
|
|
64
|
+
RunUMAP = envs$RunUMAP,
|
|
65
|
+
FindNeighbors = envs$FindNeighbors,
|
|
66
|
+
FindClusters = envs$FindClusters
|
|
67
|
+
),
|
|
68
|
+
case
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if (is.null(case$subset) || length(case$subset) == 0) {
|
|
72
|
+
stop(paste0("`subset` for case '", key, "' is empty."))
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
log_info("- Subsetting ...")
|
|
76
|
+
sobj <- tryCatch({
|
|
77
|
+
srtobj %>% filter(!!parse_expr(case$subset))
|
|
78
|
+
}, error = function(e) {
|
|
79
|
+
stop(paste0(" Error in subset: ", e$message))
|
|
80
|
+
})
|
|
81
|
+
sobj_sig <- capture.output(str(sobj))
|
|
82
|
+
dig_sig <- digest::digest(sobj_sig, algo = "md5")
|
|
83
|
+
dig_sig <- substr(dig_sig, 1, 8)
|
|
84
|
+
cache_dir <- NULL
|
|
85
|
+
if (is.character(envs$cache)) {
|
|
86
|
+
cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
|
|
87
|
+
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
|
|
88
|
+
writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
cached <- get_cached(case$RunUMAP, "RunUMAP", cache_dir)
|
|
92
|
+
reduc_name <- case$RunUMAP$reduction.name %||% "umap"
|
|
93
|
+
if (is.null(cached$data)) {
|
|
94
|
+
log_info("- Running RunUMAP ...")
|
|
95
|
+
umap_args <- list_setdefault(
|
|
96
|
+
case$RunUMAP,
|
|
97
|
+
object = sobj,
|
|
98
|
+
dims = 1:30,
|
|
99
|
+
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
100
|
+
)
|
|
101
|
+
ncells <- ncol(sobj)
|
|
102
|
+
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
103
|
+
umap_method <- case$RunUMAP$umap.method %||% "uwot"
|
|
104
|
+
if (umap_method == "uwot" && is.null(case$RunUMAP$n.neighbors)) {
|
|
105
|
+
# https://github.com/satijalab/seurat/issues/4312
|
|
106
|
+
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
107
|
+
}
|
|
108
|
+
sobj <- do_call(RunUMAP, umap_args)
|
|
109
|
+
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
110
|
+
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
111
|
+
} else {
|
|
112
|
+
log_info("- Loading cached RunUMAP ...")
|
|
113
|
+
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
114
|
+
sobj@commands <- cached$data$commands
|
|
115
|
+
}
|
|
116
|
+
reduc <- cached$data$reduc
|
|
117
|
+
|
|
118
|
+
cached <- get_cached(case$FindNeighbors, "FindNeighbors", cache_dir)
|
|
119
|
+
if (is.null(cached$data)) {
|
|
120
|
+
log_info("- Running FindNeighbors ...")
|
|
121
|
+
case$FindNeighbors$object <- sobj
|
|
122
|
+
if (is.null(case$FindNeighbors$reduction)) {
|
|
123
|
+
case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
124
|
+
}
|
|
125
|
+
sobj <- do_call(FindNeighbors, case$FindNeighbors)
|
|
126
|
+
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
127
|
+
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
128
|
+
} else {
|
|
129
|
+
log_info("- Loading cached FindNeighbors ...")
|
|
130
|
+
sobj@graphs <- cached$data$graphs
|
|
131
|
+
sobj@commands <- cached$data$commands
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
case$FindClusters$random.seed <- case$FindClusters$random.seed %||% 8525
|
|
135
|
+
resolution <- case$FindClusters$resolution %||% 0.8
|
|
136
|
+
if (is.character(resolution)) {
|
|
137
|
+
if (grepl(",", resolution)) {
|
|
138
|
+
resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
|
|
139
|
+
} else {
|
|
140
|
+
resolution <- as.numeric(resolution)
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
for (res in resolution) {
|
|
144
|
+
case$FindClusters$resolution <- res
|
|
145
|
+
cached <- get_cached(case$FindClusters, paste0("FindClusters_", res), cache_dir)
|
|
146
|
+
res_key <- paste0("seurat_clusters_", res)
|
|
147
|
+
if (is.null(cached$data)) {
|
|
148
|
+
log_info("- Running FindClusters at resolution: {res} ...")
|
|
149
|
+
case$FindClusters$object <- sobj
|
|
150
|
+
sobj1 <- do_call(FindClusters, case$FindClusters)
|
|
151
|
+
levels(sobj1$seurat_clusters) <- paste0("s", as.numeric(levels(sobj1$seurat_clusters)) + 1)
|
|
152
|
+
sobj1[[res_key]] <- sobj1$seurat_clusters
|
|
153
|
+
cached$data <- sobj1@meta.data[, res_key, drop = FALSE]
|
|
154
|
+
save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
|
|
155
|
+
} else {
|
|
156
|
+
log_info("- Using cached FindClusters at resolution: {res} ...")
|
|
157
|
+
}
|
|
158
|
+
ident_table <- table(cached$data[[res_key]])
|
|
159
|
+
log_info(" Found {length(ident_table)} clusters")
|
|
160
|
+
print(ident_table)
|
|
161
|
+
cat("\n")
|
|
162
|
+
}
|
|
163
|
+
log_info("- Updating meta.data with subclusters...")
|
|
164
|
+
srtobj <- AddMetaData(srtobj, metadata = cached$data, col.name = key)
|
|
165
|
+
srtobj[[paste0("sub_umap_", key)]] <- reduc
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
log_info("Saving results ...")
|
|
169
|
+
saveRDS(srtobj, file = rdsfile)
|