biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -0,0 +1,313 @@
1
+ """This file is used to patch scvelo's paga to fix
2
+ https://github.com/theislab/scvelo/issues/1241
3
+
4
+ This is from pull request
5
+ https://github.com/theislab/scvelo/pull/1308
6
+ which has not been merged yet as of 2025-11-07.
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from scipy.sparse import csr_matrix
12
+
13
+ from scanpy.tools._paga import PAGA
14
+ import scvelo
15
+
16
+ # This is adapted from https://github.com/theislab/paga
17
+ from scvelo import logging as logg
18
+ from scvelo import settings
19
+ from scvelo.tools.rank_velocity_genes import velocity_clusters
20
+ from scvelo.tools.utils import strings_to_categoricals
21
+ from scvelo.tools.velocity_graph import vals_to_csr
22
+ from scvelo.tools.velocity_pseudotime import velocity_pseudotime
23
+
24
+
25
+ # TODO: Finish docstrings
26
+ def get_igraph_from_adjacency(adjacency, directed=None):
27
+ """Get igraph graph from adjacency matrix."""
28
+ import igraph as ig
29
+
30
+ sources, targets = adjacency.nonzero()
31
+ weights = adjacency[sources, targets]
32
+ if isinstance(weights, np.matrix):
33
+ weights = weights.A1
34
+ g = ig.Graph(directed=directed)
35
+ g.add_vertices(adjacency.shape[0]) # this adds adjacency.shap[0] vertices
36
+ g.add_edges(list(zip(sources, targets)))
37
+ g.es["weight"] = weights
38
+ if g.vcount() != adjacency.shape[0]:
39
+ logg.warn(
40
+ f"The constructed graph has only {g.vcount()} nodes. "
41
+ "Your adjacency matrix contained redundant nodes."
42
+ )
43
+ return g
44
+
45
+
46
+ # TODO: Add docstrings
47
+ def get_sparse_from_igraph(graph, weight_attr=None):
48
+ """TODO."""
49
+ edges = graph.get_edgelist()
50
+ if weight_attr is None:
51
+ weights = [1] * len(edges)
52
+ else:
53
+ weights = graph.es[weight_attr]
54
+ if not graph.is_directed():
55
+ edges.extend([(v, u) for u, v in edges])
56
+ weights.extend(weights)
57
+ shape = graph.vcount()
58
+ shape = (shape, shape)
59
+ if len(edges) > 0:
60
+ rows, cols = zip(*edges)
61
+ return csr_matrix((weights, (rows, cols)), shape=shape)
62
+ else:
63
+ return csr_matrix(shape)
64
+
65
+
66
+ # TODO: Finish docstrings
67
+ def set_row_csr(csr, rows, value=0):
68
+ """Set all nonzero elements to the given value. Useful to set to 0 mostly."""
69
+ for row in rows:
70
+ start = csr.indptr[row]
71
+ end = csr.indptr[row + 1]
72
+ csr.data[start:end] = value
73
+ if value == 0:
74
+ csr.eliminate_zeros()
75
+
76
+
77
+ # TODO: Add docstrings
78
+ class PAGA_tree(PAGA):
79
+ """TODO."""
80
+
81
+ def __init__(
82
+ self,
83
+ adata,
84
+ groups=None,
85
+ vkey=None,
86
+ use_time_prior=None,
87
+ root_key=None,
88
+ end_key=None,
89
+ threshold_root_end_prior=None,
90
+ minimum_spanning_tree=None,
91
+ ):
92
+ super().__init__(adata=adata, groups=groups, model="v1.2")
93
+ self.groups = groups
94
+ self.vkey = vkey
95
+ self.use_time_prior = use_time_prior
96
+ self.root_key = root_key
97
+ self.end_key = end_key
98
+ self.threshold_root_end_prior = threshold_root_end_prior
99
+ if self.threshold_root_end_prior is None:
100
+ self.threshold_root_end_prior = 0.9
101
+ self.minimum_spanning_tree = minimum_spanning_tree
102
+
103
+ # TODO: Add docstrings
104
+ def compute_transitions(self):
105
+ """TODO."""
106
+ try:
107
+ import igraph
108
+ except ImportError:
109
+ raise ImportError("To run paga, you need to install `pip install igraph`")
110
+ vkey = f"{self.vkey}_graph"
111
+ if vkey not in self._adata.uns:
112
+ raise ValueError(
113
+ "The passed AnnData needs to have an `uns` annotation "
114
+ "with key 'velocity_graph' - a sparse matrix from RNA velocity."
115
+ )
116
+ if self._adata.uns[vkey].shape != (self._adata.n_obs, self._adata.n_obs):
117
+ raise ValueError(
118
+ f"The passed 'velocity_graph' has shape {self._adata.uns[vkey].shape} "
119
+ f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}"
120
+ )
121
+
122
+ clusters = self._adata.obs[self.groups]
123
+ cats = clusters.cat.categories
124
+ vgraph = self._adata.uns[vkey] > 0.1
125
+ time_prior = self.use_time_prior
126
+
127
+ if isinstance(time_prior, str) and time_prior in self._adata.obs.keys():
128
+ vpt = self._adata.obs[time_prior].values
129
+ vpt_mean = self._adata.obs.groupby(self.groups)[time_prior].mean()
130
+ vpt_means = np.array([vpt_mean[cat] for cat in clusters])
131
+ rows, cols, vals = [], [], []
132
+ for i in range(vgraph.shape[0]):
133
+ indices = vgraph[i].indices
134
+ idx_bool = vpt[i] < vpt[indices]
135
+ idx_bool &= vpt_means[indices] > vpt_means[i] - 0.1
136
+ cols.extend(indices[idx_bool])
137
+ vals.extend(vgraph[i].data[idx_bool])
138
+ rows.extend([i] * np.sum(idx_bool))
139
+ vgraph = vals_to_csr(vals, rows, cols, shape=vgraph.shape)
140
+
141
+ lb = self.threshold_root_end_prior # cells to be consider as terminal states
142
+ if isinstance(self.end_key, str) and self.end_key in self._adata.obs.keys():
143
+ set_row_csr(vgraph, rows=np.where(self._adata.obs[self.end_key] > lb)[0])
144
+ if isinstance(self.root_key, str) and self.root_key in self._adata.obs.keys():
145
+ vgraph[:, np.where(self._adata.obs[self.root_key] > lb)[0]] = 0
146
+ vgraph.eliminate_zeros()
147
+
148
+ membership = self._adata.obs[self.groups].cat.codes.values
149
+ g = get_igraph_from_adjacency(vgraph, directed=True)
150
+ vc = igraph.VertexClustering(g, membership=membership)
151
+ cg_full = vc.cluster_graph(combine_edges="sum")
152
+ transitions = get_sparse_from_igraph(cg_full, weight_attr="weight")
153
+ transitions = transitions - transitions.T
154
+ transitions_conf = transitions.copy()
155
+ transitions = transitions.tocoo()
156
+ total_n = self._neighbors.n_neighbors * np.array(vc.sizes())
157
+ for i, j, v in zip(transitions.row, transitions.col, transitions.data):
158
+ reference = np.sqrt(total_n[i] * total_n[j])
159
+ transitions_conf[i, j] = 0 if v < 0 else v / reference
160
+ transitions_conf.eliminate_zeros()
161
+
162
+ # remove non-confident direct paths if more confident indirect path is found.
163
+ T = transitions_conf.toarray()
164
+ threshold = max(np.nanmin(np.nanmax(T / (T > 0), axis=0)) - 1e-6, 0.01)
165
+ T *= T > threshold
166
+ for i in range(len(T)):
167
+ idx = T[i] > 0
168
+ if np.any(idx):
169
+ indirect = np.clip(T[idx], None, T[i][idx][:, None]).max(0)
170
+ T[i, T[i] < indirect] = 0
171
+
172
+ if self.minimum_spanning_tree:
173
+ T_tmp = T.copy()
174
+ T_num = T > 0
175
+ T_sum = np.sum(T_num, 0)
176
+ T_max = np.max(T_tmp)
177
+ for i in range(len(T_tmp)):
178
+ if T_sum[i] == 1:
179
+ T_tmp[np.where(T_num[:, i])[0][0], i] = T_max
180
+ from scipy.sparse.csgraph import minimum_spanning_tree
181
+
182
+ T_tmp = np.abs(minimum_spanning_tree(-T_tmp).toarray()) > 0
183
+ T = T_tmp * T
184
+
185
+ transitions_conf = csr_matrix(T)
186
+ self.transitions_confidence = transitions_conf.T
187
+
188
+ # set threshold for minimal spanning tree.
189
+ df = pd.DataFrame(T, index=cats, columns=cats)
190
+ self.threshold = np.nanmin(np.nanmax(df.values / (df.values > 0), axis=0))
191
+ self.threshold = max(self.threshold - 1e-6, 0.01)
192
+
193
+
194
+ def paga(
195
+ adata,
196
+ groups=None,
197
+ vkey="velocity",
198
+ use_time_prior=True,
199
+ root_key=None,
200
+ end_key=None,
201
+ threshold_root_end_prior=None,
202
+ minimum_spanning_tree=True,
203
+ copy=False,
204
+ ):
205
+ """PAGA graph with velocity-directed edges.
206
+
207
+ Mapping out the coarse-grained connectivity structures of complex manifolds
208
+ :cite:p:`Wolf19`. By quantifying the connectivity of partitions (groups, clusters) of the
209
+ single-cell graph, partition-based graph abstraction (PAGA) generates a much
210
+ simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights
211
+ represent confidence in the presence of connections.
212
+
213
+ Parameters
214
+ ----------
215
+ adata : :class:`~anndata.AnnData`
216
+ An annotated data matrix.
217
+ groups : key for categorical in `adata.obs`, optional (default: 'louvain')
218
+ You can pass your predefined groups by choosing any categorical
219
+ annotation of observations (`adata.obs`).
220
+ vkey: `str` or `None` (default: `None`)
221
+ Key for annotations of observations/cells or variables/genes.
222
+ use_time_prior : `str` or bool, optional (default: True)
223
+ Obs key for pseudo-time values.
224
+ If True, 'velocity_pseudotime' is used if available.
225
+ root_key : `str` or bool, optional (default: None)
226
+ Obs key for root states.
227
+ end_key : `str` or bool, optional (default: None)
228
+ Obs key for end states.
229
+ threshold_root_end_prior : `float` (default: 0.9)
230
+ Threshold for root and final states priors, to be in the range of [0,1].
231
+ Values above the threshold will be considered as terminal and included as prior.
232
+ minimum_spanning_tree : bool, optional (default: True)
233
+ Whether to prune the tree such that a path from A-to-B
234
+ is removed if another more confident path exists.
235
+ copy : `bool`, optional (default: `False`)
236
+ Copy `adata` before computation and return a copy.
237
+ Otherwise, perform computation inplace and return `None`.
238
+
239
+ Returns
240
+ -------
241
+ connectivities: `.uns`
242
+ The full adjacency matrix of the abstracted graph, weights correspond to
243
+ confidence in the connectivities of partitions.
244
+ connectivities_tree: `.uns`
245
+ The adjacency matrix of the tree-like subgraph that best explains the topology.
246
+ transitions_confidence: `.uns`
247
+ The adjacency matrix of the abstracted directed graph, weights correspond to
248
+ confidence in the transitions between partitions.
249
+ """
250
+ if "neighbors" not in adata.uns:
251
+ raise ValueError(
252
+ "You need to run `pp.neighbors` first to compute a neighborhood graph."
253
+ )
254
+
255
+ adata = adata.copy() if copy else adata
256
+ strings_to_categoricals(adata)
257
+
258
+ if groups is None:
259
+ groups = (
260
+ "clusters"
261
+ if "clusters" in adata.obs.keys()
262
+ else "louvain"
263
+ if "louvain" in adata.obs.keys()
264
+ else None
265
+ )
266
+ elif groups == "velocity_clusters" and "velocity_clusters" not in adata.obs.keys():
267
+ velocity_clusters(adata)
268
+ if use_time_prior and not isinstance(use_time_prior, str):
269
+ use_time_prior = "velocity_pseudotime"
270
+ if use_time_prior not in adata.obs.keys():
271
+ velocity_pseudotime(adata, vkey=vkey, root_key=root_key, end_key=end_key)
272
+
273
+ priors = [p for p in [use_time_prior, root_key, end_key] if p in adata.obs.keys()]
274
+ logg.info(
275
+ "running PAGA",
276
+ f"using priors: {priors}" if len(priors) > 0 else "",
277
+ r=True,
278
+ )
279
+ paga = PAGA_tree(
280
+ adata,
281
+ groups,
282
+ vkey=vkey,
283
+ use_time_prior=use_time_prior,
284
+ root_key=root_key,
285
+ end_key=end_key,
286
+ threshold_root_end_prior=threshold_root_end_prior,
287
+ minimum_spanning_tree=minimum_spanning_tree,
288
+ )
289
+
290
+ if "paga" not in adata.uns:
291
+ adata.uns["paga"] = {}
292
+
293
+ paga.compute_connectivities()
294
+ adata.uns["paga"]["connectivities"] = paga.connectivities
295
+ adata.uns["paga"]["connectivities_tree"] = paga.connectivities_tree
296
+ adata.uns[f"{groups}_sizes"] = np.array(paga.ns)
297
+
298
+ paga.compute_transitions()
299
+ adata.uns["paga"]["transitions_confidence"] = paga.transitions_confidence
300
+ adata.uns["paga"]["threshold"] = paga.threshold
301
+ adata.uns["paga"]["groups"] = groups
302
+
303
+ logg.info(" finished", time=True, end=" " if settings.verbosity > 2 else "\n")
304
+ logg.hint(
305
+ "added\n" + " 'paga/connectivities', connectivities adjacency (adata.uns)\n"
306
+ " 'paga/connectivities_tree', connectivities subtree (adata.uns)\n"
307
+ " 'paga/transitions_confidence', velocity transitions (adata.uns)"
308
+ )
309
+
310
+ return adata if copy else None
311
+
312
+
313
+ scvelo.tl.paga = paga
@@ -0,0 +1,98 @@
1
+ """Convert Seurat objects to AnnData format back and forth.
2
+
3
+ Need R and R packages Seurat, SeuratDisk and biopipen.utils.R installed.
4
+ """
5
+ from __future__ import annotations
6
+
7
+
8
+ def convert_seurat_to_anndata(
9
+ input_file,
10
+ output_file,
11
+ assay=None,
12
+ subset=None,
13
+ rscript="Rscript",
14
+ return_ident_col=False,
15
+ ) -> None | str:
16
+ """Convert Seurat object to AnnData format.
17
+
18
+ Args:
19
+ input_file (str): Path to the input Seurat RDS or qs/qs2 file.
20
+ output_file (str): Path to the output AnnData H5AD file.
21
+ assay (str): Name of the assay to use in the Seurat object.
22
+ subset (str): An R expression to subset the Seurat object to convert.
23
+ rscript (RScript): R script executor.
24
+ """
25
+ from biopipen.utils.misc import run_command
26
+
27
+ script = f"""
28
+ library(biopipen.utils)
29
+
30
+ assay <- {repr(assay) if assay else 'NULL'}
31
+ subset <- {repr(subset) if subset else 'NULL'}
32
+
33
+ ConvertSeuratToAnnData(
34
+ "{input_file}", "{output_file}", assay = assay, subset = subset
35
+ )
36
+ """
37
+
38
+ # Save the script to a temporary file
39
+ from tempfile import NamedTemporaryFile
40
+ with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
41
+ temp_script.write(script.encode('utf-8'))
42
+ temp_script_path = temp_script.name
43
+
44
+ # Run the R script using the provided Rscript command
45
+ cmd = [rscript, temp_script_path]
46
+ run_command(cmd, fg=True)
47
+
48
+ if return_ident_col:
49
+ ident_col_script = f"""
50
+ library(biopipen.utils)
51
+
52
+ obj <- read_obj("{input_file}")
53
+ cat(GetIdentityColumn(obj))
54
+ """
55
+ with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
56
+ temp_script.write(ident_col_script.encode('utf-8'))
57
+ temp_script_path = temp_script.name
58
+
59
+ cmd = [rscript, temp_script_path]
60
+ ident_col = run_command(cmd, stdout="RETURN").strip()
61
+ return ident_col
62
+
63
+
64
+ def convert_anndata_to_seurat(
65
+ input_file,
66
+ output_file,
67
+ assay=None,
68
+ rscript="Rscript",
69
+ ):
70
+ """Convert AnnData object to Seurat format.
71
+
72
+ Args:
73
+ input_file (str): Path to the input AnnData H5AD file.
74
+ output_file (str): Path to the output Seurat RDS or qs/qs2 file.
75
+ assay (str): Name of the assay to use in the Seurat object.
76
+ rscript (RScript): R script executor.
77
+ """
78
+ from biopipen.utils.misc import run_command
79
+
80
+ script = f"""
81
+ library(biopipen.utils)
82
+
83
+ assay <- {repr(assay) if assay else 'NULL'}
84
+
85
+ ConvertAnnDataToSeurat(
86
+ "{input_file}", "{output_file}", assay = assay
87
+ )
88
+ """
89
+
90
+ # Save the script to a temporary file
91
+ from tempfile import NamedTemporaryFile
92
+ with NamedTemporaryFile(suffix=".R", delete=False) as temp_script:
93
+ temp_script.write(script.encode('utf-8'))
94
+ temp_script_path = temp_script.name
95
+
96
+ # Run the R script using the provided Rscript command
97
+ cmd = [rscript, temp_script_path]
98
+ run_command(cmd, fg=True)