biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""This file is used to patch scvelo's paga to fix
|
|
2
|
+
https://github.com/theislab/scvelo/issues/1241
|
|
3
|
+
|
|
4
|
+
This is from pull request
|
|
5
|
+
https://github.com/theislab/scvelo/pull/1308
|
|
6
|
+
which has not been merged yet as of 2025-11-07.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from scipy.sparse import csr_matrix
|
|
12
|
+
|
|
13
|
+
from scanpy.tools._paga import PAGA
|
|
14
|
+
import scvelo
|
|
15
|
+
|
|
16
|
+
# This is adapted from https://github.com/theislab/paga
|
|
17
|
+
from scvelo import logging as logg
|
|
18
|
+
from scvelo import settings
|
|
19
|
+
from scvelo.tools.rank_velocity_genes import velocity_clusters
|
|
20
|
+
from scvelo.tools.utils import strings_to_categoricals
|
|
21
|
+
from scvelo.tools.velocity_graph import vals_to_csr
|
|
22
|
+
from scvelo.tools.velocity_pseudotime import velocity_pseudotime
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# TODO: Finish docstrings
|
|
26
|
+
def get_igraph_from_adjacency(adjacency, directed=None):
|
|
27
|
+
"""Get igraph graph from adjacency matrix."""
|
|
28
|
+
import igraph as ig
|
|
29
|
+
|
|
30
|
+
sources, targets = adjacency.nonzero()
|
|
31
|
+
weights = adjacency[sources, targets]
|
|
32
|
+
if isinstance(weights, np.matrix):
|
|
33
|
+
weights = weights.A1
|
|
34
|
+
g = ig.Graph(directed=directed)
|
|
35
|
+
g.add_vertices(adjacency.shape[0]) # this adds adjacency.shap[0] vertices
|
|
36
|
+
g.add_edges(list(zip(sources, targets)))
|
|
37
|
+
g.es["weight"] = weights
|
|
38
|
+
if g.vcount() != adjacency.shape[0]:
|
|
39
|
+
logg.warn(
|
|
40
|
+
f"The constructed graph has only {g.vcount()} nodes. "
|
|
41
|
+
"Your adjacency matrix contained redundant nodes."
|
|
42
|
+
)
|
|
43
|
+
return g
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# TODO: Add docstrings
|
|
47
|
+
def get_sparse_from_igraph(graph, weight_attr=None):
|
|
48
|
+
"""TODO."""
|
|
49
|
+
edges = graph.get_edgelist()
|
|
50
|
+
if weight_attr is None:
|
|
51
|
+
weights = [1] * len(edges)
|
|
52
|
+
else:
|
|
53
|
+
weights = graph.es[weight_attr]
|
|
54
|
+
if not graph.is_directed():
|
|
55
|
+
edges.extend([(v, u) for u, v in edges])
|
|
56
|
+
weights.extend(weights)
|
|
57
|
+
shape = graph.vcount()
|
|
58
|
+
shape = (shape, shape)
|
|
59
|
+
if len(edges) > 0:
|
|
60
|
+
rows, cols = zip(*edges)
|
|
61
|
+
return csr_matrix((weights, (rows, cols)), shape=shape)
|
|
62
|
+
else:
|
|
63
|
+
return csr_matrix(shape)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# TODO: Finish docstrings
|
|
67
|
+
def set_row_csr(csr, rows, value=0):
|
|
68
|
+
"""Set all nonzero elements to the given value. Useful to set to 0 mostly."""
|
|
69
|
+
for row in rows:
|
|
70
|
+
start = csr.indptr[row]
|
|
71
|
+
end = csr.indptr[row + 1]
|
|
72
|
+
csr.data[start:end] = value
|
|
73
|
+
if value == 0:
|
|
74
|
+
csr.eliminate_zeros()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# TODO: Add docstrings
|
|
78
|
+
class PAGA_tree(PAGA):
|
|
79
|
+
"""TODO."""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
adata,
|
|
84
|
+
groups=None,
|
|
85
|
+
vkey=None,
|
|
86
|
+
use_time_prior=None,
|
|
87
|
+
root_key=None,
|
|
88
|
+
end_key=None,
|
|
89
|
+
threshold_root_end_prior=None,
|
|
90
|
+
minimum_spanning_tree=None,
|
|
91
|
+
):
|
|
92
|
+
super().__init__(adata=adata, groups=groups, model="v1.2")
|
|
93
|
+
self.groups = groups
|
|
94
|
+
self.vkey = vkey
|
|
95
|
+
self.use_time_prior = use_time_prior
|
|
96
|
+
self.root_key = root_key
|
|
97
|
+
self.end_key = end_key
|
|
98
|
+
self.threshold_root_end_prior = threshold_root_end_prior
|
|
99
|
+
if self.threshold_root_end_prior is None:
|
|
100
|
+
self.threshold_root_end_prior = 0.9
|
|
101
|
+
self.minimum_spanning_tree = minimum_spanning_tree
|
|
102
|
+
|
|
103
|
+
# TODO: Add docstrings
|
|
104
|
+
def compute_transitions(self):
|
|
105
|
+
"""TODO."""
|
|
106
|
+
try:
|
|
107
|
+
import igraph
|
|
108
|
+
except ImportError:
|
|
109
|
+
raise ImportError("To run paga, you need to install `pip install igraph`")
|
|
110
|
+
vkey = f"{self.vkey}_graph"
|
|
111
|
+
if vkey not in self._adata.uns:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
"The passed AnnData needs to have an `uns` annotation "
|
|
114
|
+
"with key 'velocity_graph' - a sparse matrix from RNA velocity."
|
|
115
|
+
)
|
|
116
|
+
if self._adata.uns[vkey].shape != (self._adata.n_obs, self._adata.n_obs):
|
|
117
|
+
raise ValueError(
|
|
118
|
+
f"The passed 'velocity_graph' has shape {self._adata.uns[vkey].shape} "
|
|
119
|
+
f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
clusters = self._adata.obs[self.groups]
|
|
123
|
+
cats = clusters.cat.categories
|
|
124
|
+
vgraph = self._adata.uns[vkey] > 0.1
|
|
125
|
+
time_prior = self.use_time_prior
|
|
126
|
+
|
|
127
|
+
if isinstance(time_prior, str) and time_prior in self._adata.obs.keys():
|
|
128
|
+
vpt = self._adata.obs[time_prior].values
|
|
129
|
+
vpt_mean = self._adata.obs.groupby(self.groups)[time_prior].mean()
|
|
130
|
+
vpt_means = np.array([vpt_mean[cat] for cat in clusters])
|
|
131
|
+
rows, cols, vals = [], [], []
|
|
132
|
+
for i in range(vgraph.shape[0]):
|
|
133
|
+
indices = vgraph[i].indices
|
|
134
|
+
idx_bool = vpt[i] < vpt[indices]
|
|
135
|
+
idx_bool &= vpt_means[indices] > vpt_means[i] - 0.1
|
|
136
|
+
cols.extend(indices[idx_bool])
|
|
137
|
+
vals.extend(vgraph[i].data[idx_bool])
|
|
138
|
+
rows.extend([i] * np.sum(idx_bool))
|
|
139
|
+
vgraph = vals_to_csr(vals, rows, cols, shape=vgraph.shape)
|
|
140
|
+
|
|
141
|
+
lb = self.threshold_root_end_prior # cells to be consider as terminal states
|
|
142
|
+
if isinstance(self.end_key, str) and self.end_key in self._adata.obs.keys():
|
|
143
|
+
set_row_csr(vgraph, rows=np.where(self._adata.obs[self.end_key] > lb)[0])
|
|
144
|
+
if isinstance(self.root_key, str) and self.root_key in self._adata.obs.keys():
|
|
145
|
+
vgraph[:, np.where(self._adata.obs[self.root_key] > lb)[0]] = 0
|
|
146
|
+
vgraph.eliminate_zeros()
|
|
147
|
+
|
|
148
|
+
membership = self._adata.obs[self.groups].cat.codes.values
|
|
149
|
+
g = get_igraph_from_adjacency(vgraph, directed=True)
|
|
150
|
+
vc = igraph.VertexClustering(g, membership=membership)
|
|
151
|
+
cg_full = vc.cluster_graph(combine_edges="sum")
|
|
152
|
+
transitions = get_sparse_from_igraph(cg_full, weight_attr="weight")
|
|
153
|
+
transitions = transitions - transitions.T
|
|
154
|
+
transitions_conf = transitions.copy()
|
|
155
|
+
transitions = transitions.tocoo()
|
|
156
|
+
total_n = self._neighbors.n_neighbors * np.array(vc.sizes())
|
|
157
|
+
for i, j, v in zip(transitions.row, transitions.col, transitions.data):
|
|
158
|
+
reference = np.sqrt(total_n[i] * total_n[j])
|
|
159
|
+
transitions_conf[i, j] = 0 if v < 0 else v / reference
|
|
160
|
+
transitions_conf.eliminate_zeros()
|
|
161
|
+
|
|
162
|
+
# remove non-confident direct paths if more confident indirect path is found.
|
|
163
|
+
T = transitions_conf.toarray()
|
|
164
|
+
threshold = max(np.nanmin(np.nanmax(T / (T > 0), axis=0)) - 1e-6, 0.01)
|
|
165
|
+
T *= T > threshold
|
|
166
|
+
for i in range(len(T)):
|
|
167
|
+
idx = T[i] > 0
|
|
168
|
+
if np.any(idx):
|
|
169
|
+
indirect = np.clip(T[idx], None, T[i][idx][:, None]).max(0)
|
|
170
|
+
T[i, T[i] < indirect] = 0
|
|
171
|
+
|
|
172
|
+
if self.minimum_spanning_tree:
|
|
173
|
+
T_tmp = T.copy()
|
|
174
|
+
T_num = T > 0
|
|
175
|
+
T_sum = np.sum(T_num, 0)
|
|
176
|
+
T_max = np.max(T_tmp)
|
|
177
|
+
for i in range(len(T_tmp)):
|
|
178
|
+
if T_sum[i] == 1:
|
|
179
|
+
T_tmp[np.where(T_num[:, i])[0][0], i] = T_max
|
|
180
|
+
from scipy.sparse.csgraph import minimum_spanning_tree
|
|
181
|
+
|
|
182
|
+
T_tmp = np.abs(minimum_spanning_tree(-T_tmp).toarray()) > 0
|
|
183
|
+
T = T_tmp * T
|
|
184
|
+
|
|
185
|
+
transitions_conf = csr_matrix(T)
|
|
186
|
+
self.transitions_confidence = transitions_conf.T
|
|
187
|
+
|
|
188
|
+
# set threshold for minimal spanning tree.
|
|
189
|
+
df = pd.DataFrame(T, index=cats, columns=cats)
|
|
190
|
+
self.threshold = np.nanmin(np.nanmax(df.values / (df.values > 0), axis=0))
|
|
191
|
+
self.threshold = max(self.threshold - 1e-6, 0.01)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def paga(
|
|
195
|
+
adata,
|
|
196
|
+
groups=None,
|
|
197
|
+
vkey="velocity",
|
|
198
|
+
use_time_prior=True,
|
|
199
|
+
root_key=None,
|
|
200
|
+
end_key=None,
|
|
201
|
+
threshold_root_end_prior=None,
|
|
202
|
+
minimum_spanning_tree=True,
|
|
203
|
+
copy=False,
|
|
204
|
+
):
|
|
205
|
+
"""PAGA graph with velocity-directed edges.
|
|
206
|
+
|
|
207
|
+
Mapping out the coarse-grained connectivity structures of complex manifolds
|
|
208
|
+
:cite:p:`Wolf19`. By quantifying the connectivity of partitions (groups, clusters) of the
|
|
209
|
+
single-cell graph, partition-based graph abstraction (PAGA) generates a much
|
|
210
|
+
simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights
|
|
211
|
+
represent confidence in the presence of connections.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
adata : :class:`~anndata.AnnData`
|
|
216
|
+
An annotated data matrix.
|
|
217
|
+
groups : key for categorical in `adata.obs`, optional (default: 'louvain')
|
|
218
|
+
You can pass your predefined groups by choosing any categorical
|
|
219
|
+
annotation of observations (`adata.obs`).
|
|
220
|
+
vkey: `str` or `None` (default: `None`)
|
|
221
|
+
Key for annotations of observations/cells or variables/genes.
|
|
222
|
+
use_time_prior : `str` or bool, optional (default: True)
|
|
223
|
+
Obs key for pseudo-time values.
|
|
224
|
+
If True, 'velocity_pseudotime' is used if available.
|
|
225
|
+
root_key : `str` or bool, optional (default: None)
|
|
226
|
+
Obs key for root states.
|
|
227
|
+
end_key : `str` or bool, optional (default: None)
|
|
228
|
+
Obs key for end states.
|
|
229
|
+
threshold_root_end_prior : `float` (default: 0.9)
|
|
230
|
+
Threshold for root and final states priors, to be in the range of [0,1].
|
|
231
|
+
Values above the threshold will be considered as terminal and included as prior.
|
|
232
|
+
minimum_spanning_tree : bool, optional (default: True)
|
|
233
|
+
Whether to prune the tree such that a path from A-to-B
|
|
234
|
+
is removed if another more confident path exists.
|
|
235
|
+
copy : `bool`, optional (default: `False`)
|
|
236
|
+
Copy `adata` before computation and return a copy.
|
|
237
|
+
Otherwise, perform computation inplace and return `None`.
|
|
238
|
+
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
connectivities: `.uns`
|
|
242
|
+
The full adjacency matrix of the abstracted graph, weights correspond to
|
|
243
|
+
confidence in the connectivities of partitions.
|
|
244
|
+
connectivities_tree: `.uns`
|
|
245
|
+
The adjacency matrix of the tree-like subgraph that best explains the topology.
|
|
246
|
+
transitions_confidence: `.uns`
|
|
247
|
+
The adjacency matrix of the abstracted directed graph, weights correspond to
|
|
248
|
+
confidence in the transitions between partitions.
|
|
249
|
+
"""
|
|
250
|
+
if "neighbors" not in adata.uns:
|
|
251
|
+
raise ValueError(
|
|
252
|
+
"You need to run `pp.neighbors` first to compute a neighborhood graph."
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
adata = adata.copy() if copy else adata
|
|
256
|
+
strings_to_categoricals(adata)
|
|
257
|
+
|
|
258
|
+
if groups is None:
|
|
259
|
+
groups = (
|
|
260
|
+
"clusters"
|
|
261
|
+
if "clusters" in adata.obs.keys()
|
|
262
|
+
else "louvain"
|
|
263
|
+
if "louvain" in adata.obs.keys()
|
|
264
|
+
else None
|
|
265
|
+
)
|
|
266
|
+
elif groups == "velocity_clusters" and "velocity_clusters" not in adata.obs.keys():
|
|
267
|
+
velocity_clusters(adata)
|
|
268
|
+
if use_time_prior and not isinstance(use_time_prior, str):
|
|
269
|
+
use_time_prior = "velocity_pseudotime"
|
|
270
|
+
if use_time_prior not in adata.obs.keys():
|
|
271
|
+
velocity_pseudotime(adata, vkey=vkey, root_key=root_key, end_key=end_key)
|
|
272
|
+
|
|
273
|
+
priors = [p for p in [use_time_prior, root_key, end_key] if p in adata.obs.keys()]
|
|
274
|
+
logg.info(
|
|
275
|
+
"running PAGA",
|
|
276
|
+
f"using priors: {priors}" if len(priors) > 0 else "",
|
|
277
|
+
r=True,
|
|
278
|
+
)
|
|
279
|
+
paga = PAGA_tree(
|
|
280
|
+
adata,
|
|
281
|
+
groups,
|
|
282
|
+
vkey=vkey,
|
|
283
|
+
use_time_prior=use_time_prior,
|
|
284
|
+
root_key=root_key,
|
|
285
|
+
end_key=end_key,
|
|
286
|
+
threshold_root_end_prior=threshold_root_end_prior,
|
|
287
|
+
minimum_spanning_tree=minimum_spanning_tree,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
if "paga" not in adata.uns:
|
|
291
|
+
adata.uns["paga"] = {}
|
|
292
|
+
|
|
293
|
+
paga.compute_connectivities()
|
|
294
|
+
adata.uns["paga"]["connectivities"] = paga.connectivities
|
|
295
|
+
adata.uns["paga"]["connectivities_tree"] = paga.connectivities_tree
|
|
296
|
+
adata.uns[f"{groups}_sizes"] = np.array(paga.ns)
|
|
297
|
+
|
|
298
|
+
paga.compute_transitions()
|
|
299
|
+
adata.uns["paga"]["transitions_confidence"] = paga.transitions_confidence
|
|
300
|
+
adata.uns["paga"]["threshold"] = paga.threshold
|
|
301
|
+
adata.uns["paga"]["groups"] = groups
|
|
302
|
+
|
|
303
|
+
logg.info(" finished", time=True, end=" " if settings.verbosity > 2 else "\n")
|
|
304
|
+
logg.hint(
|
|
305
|
+
"added\n" + " 'paga/connectivities', connectivities adjacency (adata.uns)\n"
|
|
306
|
+
" 'paga/connectivities_tree', connectivities subtree (adata.uns)\n"
|
|
307
|
+
" 'paga/transitions_confidence', velocity transitions (adata.uns)"
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
return adata if copy else None
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
scvelo.tl.paga = paga
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Convert Seurat objects to AnnData format back and forth.
|
|
2
|
+
|
|
3
|
+
Need R and R packages Seurat, SeuratDisk and biopipen.utils.R installed.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def convert_seurat_to_anndata(
|
|
9
|
+
input_file,
|
|
10
|
+
output_file,
|
|
11
|
+
assay=None,
|
|
12
|
+
subset=None,
|
|
13
|
+
rscript="Rscript",
|
|
14
|
+
return_ident_col=False,
|
|
15
|
+
) -> None | str:
|
|
16
|
+
"""Convert Seurat object to AnnData format.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
input_file (str): Path to the input Seurat RDS or qs/qs2 file.
|
|
20
|
+
output_file (str): Path to the output AnnData H5AD file.
|
|
21
|
+
assay (str): Name of the assay to use in the Seurat object.
|
|
22
|
+
subset (str): An R expression to subset the Seurat object to convert.
|
|
23
|
+
rscript (RScript): R script executor.
|
|
24
|
+
"""
|
|
25
|
+
from biopipen.utils.misc import run_command
|
|
26
|
+
|
|
27
|
+
script = f"""
|
|
28
|
+
library(biopipen.utils)
|
|
29
|
+
|
|
30
|
+
assay <- {repr(assay) if assay else 'NULL'}
|
|
31
|
+
subset <- {repr(subset) if subset else 'NULL'}
|
|
32
|
+
|
|
33
|
+
ConvertSeuratToAnnData(
|
|
34
|
+
"{input_file}", "{output_file}", assay = assay, subset = subset
|
|
35
|
+
)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# Save the script to a temporary file
|
|
39
|
+
from tempfile import NamedTemporaryFile
|
|
40
|
+
with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
|
|
41
|
+
temp_script.write(script.encode('utf-8'))
|
|
42
|
+
temp_script_path = temp_script.name
|
|
43
|
+
|
|
44
|
+
# Run the R script using the provided Rscript command
|
|
45
|
+
cmd = [rscript, temp_script_path]
|
|
46
|
+
run_command(cmd, fg=True)
|
|
47
|
+
|
|
48
|
+
if return_ident_col:
|
|
49
|
+
ident_col_script = f"""
|
|
50
|
+
library(biopipen.utils)
|
|
51
|
+
|
|
52
|
+
obj <- read_obj("{input_file}")
|
|
53
|
+
cat(GetIdentityColumn(obj))
|
|
54
|
+
"""
|
|
55
|
+
with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
|
|
56
|
+
temp_script.write(ident_col_script.encode('utf-8'))
|
|
57
|
+
temp_script_path = temp_script.name
|
|
58
|
+
|
|
59
|
+
cmd = [rscript, temp_script_path]
|
|
60
|
+
ident_col = run_command(cmd, stdout="RETURN").strip()
|
|
61
|
+
return ident_col
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def convert_anndata_to_seurat(
|
|
65
|
+
input_file,
|
|
66
|
+
output_file,
|
|
67
|
+
assay=None,
|
|
68
|
+
rscript="Rscript",
|
|
69
|
+
):
|
|
70
|
+
"""Convert AnnData object to Seurat format.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
input_file (str): Path to the input AnnData H5AD file.
|
|
74
|
+
output_file (str): Path to the output Seurat RDS or qs/qs2 file.
|
|
75
|
+
assay (str): Name of the assay to use in the Seurat object.
|
|
76
|
+
rscript (RScript): R script executor.
|
|
77
|
+
"""
|
|
78
|
+
from biopipen.utils.misc import run_command
|
|
79
|
+
|
|
80
|
+
script = f"""
|
|
81
|
+
library(biopipen.utils)
|
|
82
|
+
|
|
83
|
+
assay <- {repr(assay) if assay else 'NULL'}
|
|
84
|
+
|
|
85
|
+
ConvertAnnDataToSeurat(
|
|
86
|
+
"{input_file}", "{output_file}", assay = assay
|
|
87
|
+
)
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
# Save the script to a temporary file
|
|
91
|
+
from tempfile import NamedTemporaryFile
|
|
92
|
+
with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
|
|
93
|
+
temp_script.write(script.encode('utf-8'))
|
|
94
|
+
temp_script_path = temp_script.name
|
|
95
|
+
|
|
96
|
+
# Run the R script using the provided Rscript command
|
|
97
|
+
cmd = [rscript, temp_script_path]
|
|
98
|
+
run_command(cmd, fg=True)
|