scloop 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scloop-0.1.2/src/scloop.egg-info → scloop-0.1.4}/PKG-INFO +8 -2
- {scloop-0.1.2 → scloop-0.1.4}/README.md +6 -1
- {scloop-0.1.2 → scloop-0.1.4}/pyproject.toml +4 -1
- {scloop-0.1.2 → scloop-0.1.4}/setup.py +3 -2
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/__init__.py +2 -2
- scloop-0.1.4/src/scloop/computing/__init__.py +8 -0
- scloop-0.1.4/src/scloop/computing/homology.py +92 -0
- scloop-0.1.4/src/scloop/data/__init__.py +2 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/analysis_containers.py +1 -1
- scloop-0.1.4/src/scloop/data/containers.py +221 -0
- scloop-0.1.4/src/scloop/data/loop_reconstruction.py +150 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/metadata.py +9 -1
- scloop-0.1.4/src/scloop/data/types.py +33 -0
- scloop-0.1.4/src/scloop/data/utils.py +67 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/delve/delve.py +12 -12
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/delve/kh.py +7 -6
- scloop-0.1.4/src/scloop/preprocessing/downsample.py +91 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/prepare.py +37 -4
- scloop-0.1.4/src/scloop/tools/__init__.py +4 -0
- scloop-0.1.4/src/scloop/tools/_loops.py +63 -0
- {scloop-0.1.2 → scloop-0.1.4/src/scloop.egg-info}/PKG-INFO +8 -2
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop.egg-info/SOURCES.txt +4 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop.egg-info/requires.txt +1 -0
- scloop-0.1.2/src/scloop/data/__init__.py +0 -3
- scloop-0.1.2/src/scloop/data/containers.py +0 -49
- scloop-0.1.2/src/scloop/data/types.py +0 -6
- scloop-0.1.2/src/scloop/data/utils.py +0 -410
- scloop-0.1.2/src/scloop/preprocessing/downsample.py +0 -1
- scloop-0.1.2/src/scloop/tools/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/LICENSE +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/MANIFEST.in +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/setup.cfg +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/analyzing/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/benchmarking/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/ripser.cpp +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/ripser.hpp +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/ripser_lib.cpp +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/ripser_lib.pyx +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/matching/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/matching/data_modules.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/matching/mlp.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/matching/nf.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/plotting/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/plotting/plot.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/__init__.py +1 -1
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/delve/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/py.typed +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/discrete-frechet-distance/Frechet.cpp +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/discrete-frechet-distance/Frechet.h +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/frechet.cpp +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/frechet.pyx +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/linear_algebra_gf2/__init__.py +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/linear_algebra_gf2/gf2_toolkit_lib.pyx +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/linear_algebra_gf2/m4ri_lib.c +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/linear_algebra_gf2/m4ri_lib.pyx +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop.egg-info/dependency_links.txt +0 -0
- {scloop-0.1.2 → scloop-0.1.4}/src/scloop.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scloop
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: single-cell loop analysis
|
|
5
5
|
Author-email: "Zhiyuan(Stan) Yu" <zyyu@umich.edu>, Idse Heemskerk <idse.heemskerk@gmail.com>
|
|
6
6
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
@@ -9,6 +9,7 @@ Requires-Python: >=3.12
|
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
|
11
11
|
Requires-Dist: cython>=3.0.12
|
|
12
|
+
Requires-Dist: igraph>=0.11
|
|
12
13
|
Requires-Dist: numba>=0.62.1
|
|
13
14
|
Requires-Dist: numpy<2
|
|
14
15
|
Requires-Dist: pandas
|
|
@@ -75,10 +76,14 @@ src/scloop/
|
|
|
75
76
|
│ └── __init__.py
|
|
76
77
|
├── benchmarking
|
|
77
78
|
│ └── __init__.py
|
|
79
|
+
├── computing
|
|
80
|
+
│ ├── homology.py
|
|
81
|
+
│ └── __init__.py
|
|
78
82
|
├── data
|
|
79
83
|
│ ├── analysis_containers.py
|
|
80
84
|
│ ├── containers.py
|
|
81
85
|
│ ├── __init__.py
|
|
86
|
+
│ ├── loop_reconstruction.py
|
|
82
87
|
│ ├── metadata.py
|
|
83
88
|
│ ├── ripser.cpp
|
|
84
89
|
│ ├── ripser.hpp
|
|
@@ -105,7 +110,8 @@ src/scloop/
|
|
|
105
110
|
│ └── prepare.py
|
|
106
111
|
├── py.typed
|
|
107
112
|
├── tools
|
|
108
|
-
│
|
|
113
|
+
│ ├── __init__.py
|
|
114
|
+
│ └── _loops.py
|
|
109
115
|
└── utils
|
|
110
116
|
├── distance_metrics
|
|
111
117
|
│ ├── discrete-frechet-distance
|
|
@@ -46,10 +46,14 @@ src/scloop/
|
|
|
46
46
|
│ └── __init__.py
|
|
47
47
|
├── benchmarking
|
|
48
48
|
│ └── __init__.py
|
|
49
|
+
├── computing
|
|
50
|
+
│ ├── homology.py
|
|
51
|
+
│ └── __init__.py
|
|
49
52
|
├── data
|
|
50
53
|
│ ├── analysis_containers.py
|
|
51
54
|
│ ├── containers.py
|
|
52
55
|
│ ├── __init__.py
|
|
56
|
+
│ ├── loop_reconstruction.py
|
|
53
57
|
│ ├── metadata.py
|
|
54
58
|
│ ├── ripser.cpp
|
|
55
59
|
│ ├── ripser.hpp
|
|
@@ -76,7 +80,8 @@ src/scloop/
|
|
|
76
80
|
│ └── prepare.py
|
|
77
81
|
├── py.typed
|
|
78
82
|
├── tools
|
|
79
|
-
│
|
|
83
|
+
│ ├── __init__.py
|
|
84
|
+
│ └── _loops.py
|
|
80
85
|
└── utils
|
|
81
86
|
├── distance_metrics
|
|
82
87
|
│ ├── discrete-frechet-distance
|
|
@@ -19,7 +19,7 @@ requires = [ "cython>=3.0.12", "setuptools>=74.1" ]
|
|
|
19
19
|
|
|
20
20
|
[project]
|
|
21
21
|
name = "scloop"
|
|
22
|
-
version = "0.1.
|
|
22
|
+
version = "0.1.4"
|
|
23
23
|
description = "single-cell loop analysis"
|
|
24
24
|
readme = "README.md"
|
|
25
25
|
authors = [
|
|
@@ -34,6 +34,7 @@ classifiers = [
|
|
|
34
34
|
]
|
|
35
35
|
dependencies = [
|
|
36
36
|
"cython>=3.0.12",
|
|
37
|
+
"igraph>=0.11",
|
|
37
38
|
"numba>=0.62.1",
|
|
38
39
|
"numpy<2",
|
|
39
40
|
"pandas",
|
|
@@ -75,6 +76,8 @@ explicit = true
|
|
|
75
76
|
[tool.ruff]
|
|
76
77
|
format.indent-style = "space"
|
|
77
78
|
format.quote-style = "double"
|
|
79
|
+
lint.extend-select = [ "I" ]
|
|
80
|
+
lint.fixable = [ "ALL" ]
|
|
78
81
|
|
|
79
82
|
[tool.pyproject-fmt]
|
|
80
83
|
column_width = 120
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from setuptools import setup, Extension
|
|
2
|
-
from Cython.Build import cythonize
|
|
3
1
|
import os
|
|
4
2
|
|
|
3
|
+
from Cython.Build import cythonize
|
|
4
|
+
from setuptools import Extension, setup
|
|
5
|
+
|
|
5
6
|
project_root = os.path.dirname(os.path.abspath(__file__))
|
|
6
7
|
m4ri_dir = os.path.join(project_root, "src/scloop/utils/linear_algebra_gf2")
|
|
7
8
|
|
|
@@ -3,6 +3,6 @@ import warnings
|
|
|
3
3
|
warnings.filterwarnings("ignore", category=FutureWarning, module="scanpy")
|
|
4
4
|
warnings.filterwarnings("ignore", category=FutureWarning, module="anndata")
|
|
5
5
|
|
|
6
|
-
from . import data
|
|
7
|
-
from . import utils
|
|
6
|
+
from . import data, utils
|
|
8
7
|
from . import preprocessing as pp
|
|
8
|
+
from . import tools as tl
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Lightweight compute helpers used by the public API layer."""
|
|
2
|
+
# Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
|
|
3
|
+
|
|
4
|
+
from .homology import (
|
|
5
|
+
compute_boundary_matrix_data,
|
|
6
|
+
compute_persistence_diagram_and_cocycles,
|
|
7
|
+
compute_sparse_pairwise_distance,
|
|
8
|
+
)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from anndata import AnnData
|
|
6
|
+
from scipy.sparse import csr_matrix
|
|
7
|
+
from sklearn.neighbors import radius_neighbors_graph
|
|
8
|
+
|
|
9
|
+
from ..data.metadata import ScloopMeta
|
|
10
|
+
from ..data.ripser_lib import get_boundary_matrix, ripser
|
|
11
|
+
from ..data.types import Diameter_t, IndexListDistMatrix
|
|
12
|
+
from ..data.utils import encode_triangles_and_edges
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def compute_sparse_pairwise_distance(
|
|
16
|
+
adata: AnnData,
|
|
17
|
+
meta: ScloopMeta,
|
|
18
|
+
bootstrap: bool = False,
|
|
19
|
+
noise_scale: float = 1e-3,
|
|
20
|
+
thresh: Diameter_t | None = None,
|
|
21
|
+
**nei_kwargs,
|
|
22
|
+
) -> tuple[csr_matrix, IndexListDistMatrix | None]:
|
|
23
|
+
assert meta.preprocess is not None
|
|
24
|
+
assert meta.preprocess.embedding_method is not None
|
|
25
|
+
emb = adata.obsm[f"X_{meta.preprocess.embedding_method}"]
|
|
26
|
+
selected_indices = (
|
|
27
|
+
meta.preprocess.indices_downsample
|
|
28
|
+
if meta.preprocess.indices_downsample is not None
|
|
29
|
+
else list(range(emb.shape[0]))
|
|
30
|
+
)
|
|
31
|
+
X = emb[selected_indices]
|
|
32
|
+
boot_idx = None
|
|
33
|
+
if bootstrap:
|
|
34
|
+
sample_idx = np.random.choice(
|
|
35
|
+
len(selected_indices), size=len(selected_indices), replace=True
|
|
36
|
+
).tolist()
|
|
37
|
+
boot_idx = [selected_indices[i] for i in sample_idx]
|
|
38
|
+
X = X[sample_idx] + np.random.normal(scale=noise_scale, size=X.shape)
|
|
39
|
+
else:
|
|
40
|
+
boot_idx = selected_indices
|
|
41
|
+
return (
|
|
42
|
+
radius_neighbors_graph(
|
|
43
|
+
X=X,
|
|
44
|
+
radius=thresh,
|
|
45
|
+
**nei_kwargs,
|
|
46
|
+
),
|
|
47
|
+
boot_idx,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def compute_persistence_diagram_and_cocycles(
|
|
52
|
+
adata: AnnData,
|
|
53
|
+
meta: ScloopMeta,
|
|
54
|
+
thresh: Diameter_t | None = None,
|
|
55
|
+
bootstrap: bool = False,
|
|
56
|
+
**nei_kwargs,
|
|
57
|
+
) -> tuple[list[np.ndarray], list, IndexListDistMatrix | None, csr_matrix]:
|
|
58
|
+
sparse_pairwise_distance_matrix, boot_idx = compute_sparse_pairwise_distance(
|
|
59
|
+
adata=adata, meta=meta, bootstrap=bootstrap, thresh=thresh, **nei_kwargs
|
|
60
|
+
)
|
|
61
|
+
result = ripser(
|
|
62
|
+
distance_matrix=sparse_pairwise_distance_matrix.tocoo(copy=False),
|
|
63
|
+
modulus=2,
|
|
64
|
+
dim_max=1,
|
|
65
|
+
threshold=thresh,
|
|
66
|
+
do_cocycles=True,
|
|
67
|
+
)
|
|
68
|
+
return (
|
|
69
|
+
result.births_and_deaths_by_dim,
|
|
70
|
+
result.cocycles_by_dim,
|
|
71
|
+
boot_idx,
|
|
72
|
+
sparse_pairwise_distance_matrix,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def compute_boundary_matrix_data(
|
|
77
|
+
adata: AnnData, meta: ScloopMeta, thresh: Diameter_t | None = None, **nei_kwargs
|
|
78
|
+
) -> tuple:
|
|
79
|
+
assert meta.preprocess is not None
|
|
80
|
+
assert meta.preprocess.num_vertices is not None
|
|
81
|
+
sparse_pairwise_distance_matrix, vertex_indices = compute_sparse_pairwise_distance(
|
|
82
|
+
adata=adata, meta=meta, bootstrap=False, thresh=thresh, **nei_kwargs
|
|
83
|
+
)
|
|
84
|
+
result = get_boundary_matrix(sparse_pairwise_distance_matrix.tocoo(), thresh)
|
|
85
|
+
triangles = np.asarray(result.triangle_vertices, dtype=np.int64)
|
|
86
|
+
if len(triangles) == 0:
|
|
87
|
+
edge_ids, trig_ids = [], []
|
|
88
|
+
else:
|
|
89
|
+
edge_ids, trig_ids = encode_triangles_and_edges(
|
|
90
|
+
triangles, meta.preprocess.num_vertices
|
|
91
|
+
)
|
|
92
|
+
return result, edge_ids, trig_ids, sparse_pairwise_distance_matrix, vertex_indices
|
|
@@ -3,8 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
|
+
from pydantic import ConfigDict, Field
|
|
6
7
|
from pydantic.dataclasses import dataclass
|
|
7
|
-
from pydantic import Field, ConfigDict
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from anndata import AnnData
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, ValidationInfo, field_validator
|
|
7
|
+
from pydantic.dataclasses import dataclass
|
|
8
|
+
from scipy.sparse import csr_matrix
|
|
9
|
+
|
|
10
|
+
from ..computing.homology import (
|
|
11
|
+
compute_boundary_matrix_data,
|
|
12
|
+
compute_persistence_diagram_and_cocycles,
|
|
13
|
+
compute_sparse_pairwise_distance,
|
|
14
|
+
)
|
|
15
|
+
from .analysis_containers import BootstrapAnalysis, HodgeAnalysis
|
|
16
|
+
from .loop_reconstruction import reconstruct_n_loop_representatives
|
|
17
|
+
from .metadata import ScloopMeta
|
|
18
|
+
from .types import Diameter_t, Index_t, IndexListDistMatrix, Size_t
|
|
19
|
+
from .utils import decode_edges, decode_triangles
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BoundaryMatrix(BaseModel):
|
|
23
|
+
num_vertices: Size_t
|
|
24
|
+
data: tuple[list, list] # in coo format (row indices, col indices) of ones
|
|
25
|
+
shape: tuple[Size_t, Size_t]
|
|
26
|
+
row_simplex_ids: list[Index_t]
|
|
27
|
+
col_simplex_ids: list[Index_t]
|
|
28
|
+
row_simplex_diams: list[Diameter_t]
|
|
29
|
+
col_simplex_diams: list[Diameter_t]
|
|
30
|
+
|
|
31
|
+
@field_validator(
|
|
32
|
+
"row_simplex_ids", "col_simplex_ids", "col_simplex_diams", mode="before"
|
|
33
|
+
)
|
|
34
|
+
@classmethod
|
|
35
|
+
def validate_fields(cls, v: list[Index_t], info: ValidationInfo):
|
|
36
|
+
shape = info.data.get("shape")
|
|
37
|
+
assert shape
|
|
38
|
+
if info.field_name == "row_simplex_ids":
|
|
39
|
+
if len(v) != shape[0]:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
"Length of row ids does not match the number of rows of the matrix"
|
|
42
|
+
)
|
|
43
|
+
elif info.field_name in ["col_simplex_ids", "col_simplex_diams"]:
|
|
44
|
+
if len(v) != shape[1]:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Length of {info.field_name} does not match the number of columns of the matrix"
|
|
47
|
+
)
|
|
48
|
+
return v
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def row_simplex_decode(self) -> list:
|
|
52
|
+
"""
|
|
53
|
+
From simplex id (row) to vertex ids
|
|
54
|
+
"""
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def col_simplex_decode(self) -> list:
|
|
59
|
+
"""
|
|
60
|
+
From simplex id (column) to vertex ids
|
|
61
|
+
"""
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class BoundaryMatrixD1(BoundaryMatrix):
|
|
66
|
+
data: tuple[list[list[Index_t]], list[list[Index_t]]]
|
|
67
|
+
|
|
68
|
+
def row_simplex_decode(self) -> list[tuple[Index_t, Index_t]]:
|
|
69
|
+
return decode_edges(np.array(self.row_simplex_ids), self.num_vertices)
|
|
70
|
+
|
|
71
|
+
def col_simplex_decode(self) -> list[tuple[Index_t, Index_t, Index_t]]:
|
|
72
|
+
return decode_triangles(np.array(self.col_simplex_ids), self.num_vertices)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
|
76
|
+
class HomologyData:
|
|
77
|
+
"""
|
|
78
|
+
store core homology data and associated analysis data
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
meta: ScloopMeta
|
|
82
|
+
persistence_diagram: list[np.ndarray] | None = None
|
|
83
|
+
loop_representatives: list[list[list[int]]] | None = None
|
|
84
|
+
cocycles: list | None = None
|
|
85
|
+
pairwise_distance_matrix: csr_matrix | None = None
|
|
86
|
+
selected_vertex_indices: list[int] | None = None
|
|
87
|
+
boundary_matrix_d1: BoundaryMatrixD1 | None = None
|
|
88
|
+
bootstrap_data: BootstrapAnalysis | None = None
|
|
89
|
+
hodge_data: HodgeAnalysis | None = None
|
|
90
|
+
|
|
91
|
+
def _compute_sparse_pairwise_distance(
|
|
92
|
+
self,
|
|
93
|
+
adata: AnnData,
|
|
94
|
+
bootstrap: bool = False,
|
|
95
|
+
thresh: Diameter_t | None = None,
|
|
96
|
+
**nei_kwargs,
|
|
97
|
+
) -> tuple[csr_matrix, IndexListDistMatrix | None]:
|
|
98
|
+
return compute_sparse_pairwise_distance(
|
|
99
|
+
adata=adata,
|
|
100
|
+
meta=self.meta,
|
|
101
|
+
bootstrap=bootstrap,
|
|
102
|
+
thresh=thresh,
|
|
103
|
+
**nei_kwargs,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def _compute_homology(
|
|
107
|
+
self,
|
|
108
|
+
adata: AnnData,
|
|
109
|
+
thresh: Diameter_t | None = None,
|
|
110
|
+
bootstrap: bool = False,
|
|
111
|
+
**nei_kwargs,
|
|
112
|
+
) -> None:
|
|
113
|
+
(
|
|
114
|
+
persistence_diagram,
|
|
115
|
+
cocycles,
|
|
116
|
+
vertex_indices,
|
|
117
|
+
sparse_pairwise_distance_matrix,
|
|
118
|
+
) = compute_persistence_diagram_and_cocycles(
|
|
119
|
+
adata=adata,
|
|
120
|
+
meta=self.meta,
|
|
121
|
+
thresh=thresh,
|
|
122
|
+
bootstrap=bootstrap,
|
|
123
|
+
**nei_kwargs,
|
|
124
|
+
)
|
|
125
|
+
self.persistence_diagram = persistence_diagram
|
|
126
|
+
self.cocycles = cocycles
|
|
127
|
+
self.pairwise_distance_matrix = sparse_pairwise_distance_matrix
|
|
128
|
+
self.selected_vertex_indices = vertex_indices
|
|
129
|
+
|
|
130
|
+
def _compute_boundary_matrix(
|
|
131
|
+
self, adata: AnnData, thresh: Diameter_t | None = None, **nei_kwargs
|
|
132
|
+
) -> None:
|
|
133
|
+
assert self.meta.preprocess
|
|
134
|
+
assert self.meta.preprocess.num_vertices
|
|
135
|
+
(
|
|
136
|
+
result,
|
|
137
|
+
edge_ids,
|
|
138
|
+
trig_ids,
|
|
139
|
+
sparse_pairwise_distance_matrix,
|
|
140
|
+
vertex_indices,
|
|
141
|
+
) = compute_boundary_matrix_data(
|
|
142
|
+
adata=adata, meta=self.meta, thresh=thresh, **nei_kwargs
|
|
143
|
+
)
|
|
144
|
+
self.pairwise_distance_matrix = sparse_pairwise_distance_matrix
|
|
145
|
+
self.selected_vertex_indices = vertex_indices
|
|
146
|
+
edge_ids_1d = np.array(edge_ids).flatten()
|
|
147
|
+
# reindex edges (also keep as colllection of triplets, easier to subset later)
|
|
148
|
+
edge_ids_reindex = np.searchsorted(edge_ids_1d, edge_ids)
|
|
149
|
+
edge_diameters = decode_edges(edge_ids_1d, self.meta.preprocess.num_vertices)
|
|
150
|
+
edge_diameters = [
|
|
151
|
+
sparse_pairwise_distance_matrix[i, j] for i, j in edge_diameters
|
|
152
|
+
]
|
|
153
|
+
self.boundary_matrix_d1 = BoundaryMatrixD1(
|
|
154
|
+
num_vertices=self.meta.preprocess.num_vertices,
|
|
155
|
+
data=(
|
|
156
|
+
edge_ids_reindex.tolist(),
|
|
157
|
+
np.repeat(
|
|
158
|
+
np.expand_dims(np.arange(edge_ids_reindex.shape[0]), 1), 3, axis=1
|
|
159
|
+
).tolist(),
|
|
160
|
+
),
|
|
161
|
+
shape=(len(edge_ids_1d), len(trig_ids)),
|
|
162
|
+
row_simplex_ids=edge_ids_1d.tolist(),
|
|
163
|
+
col_simplex_ids=trig_ids,
|
|
164
|
+
row_simplex_diams=edge_diameters,
|
|
165
|
+
col_simplex_diams=result.triangle_diameters,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def _compute_loop_representatives(
|
|
169
|
+
self,
|
|
170
|
+
loop_idx: int,
|
|
171
|
+
n: int = 8,
|
|
172
|
+
life_pct: float = 0.1,
|
|
173
|
+
n_force_deviate: int = 4,
|
|
174
|
+
n_reps_per_loop: int = 8,
|
|
175
|
+
loop_lower_pct: float = 5,
|
|
176
|
+
loop_upper_pct: float = 95,
|
|
177
|
+
n_max_cocycles: int = 10,
|
|
178
|
+
):
|
|
179
|
+
assert self.persistence_diagram is not None
|
|
180
|
+
assert self.cocycles is not None
|
|
181
|
+
assert self.pairwise_distance_matrix is not None
|
|
182
|
+
|
|
183
|
+
births, deaths = self.persistence_diagram[1]
|
|
184
|
+
loop_birth = float(births[loop_idx])
|
|
185
|
+
loop_death = float(deaths[loop_idx])
|
|
186
|
+
|
|
187
|
+
dm = self.pairwise_distance_matrix.tocoo()
|
|
188
|
+
edge_weights: dict[tuple[int, int], float] = {}
|
|
189
|
+
for i, j, w in zip(dm.row.tolist(), dm.col.tolist(), dm.data.tolist()):
|
|
190
|
+
if i == j:
|
|
191
|
+
continue
|
|
192
|
+
key = (i, j) if i < j else (j, i)
|
|
193
|
+
if key not in edge_weights or w < edge_weights[key]:
|
|
194
|
+
edge_weights[key] = float(w)
|
|
195
|
+
if not edge_weights:
|
|
196
|
+
return [], []
|
|
197
|
+
|
|
198
|
+
edges = list(edge_weights.keys())
|
|
199
|
+
edge_births = np.array([edge_weights[e] for e in edges], dtype=float)
|
|
200
|
+
|
|
201
|
+
loops, dists = reconstruct_n_loop_representatives(
|
|
202
|
+
cocycles_dim1=self.cocycles[1][loop_idx],
|
|
203
|
+
edges=edges,
|
|
204
|
+
edge_births=edge_births,
|
|
205
|
+
loop_birth=loop_birth,
|
|
206
|
+
loop_death=loop_death,
|
|
207
|
+
n=n,
|
|
208
|
+
life_pct=life_pct,
|
|
209
|
+
n_force_deviate=n_force_deviate,
|
|
210
|
+
n_reps_per_loop=n_reps_per_loop,
|
|
211
|
+
loop_lower_pct=loop_lower_pct,
|
|
212
|
+
loop_upper_pct=loop_upper_pct,
|
|
213
|
+
n_max_cocycles=n_max_cocycles,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if self.loop_representatives is None:
|
|
217
|
+
self.loop_representatives = []
|
|
218
|
+
while len(self.loop_representatives) <= loop_idx:
|
|
219
|
+
self.loop_representatives.append([])
|
|
220
|
+
self.loop_representatives[loop_idx] = loops
|
|
221
|
+
return loops, dists
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import math
|
|
5
|
+
from typing import Iterable, List, Sequence, Tuple
|
|
6
|
+
|
|
7
|
+
import igraph as ig
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def reconstruct_n_loop_representatives(
|
|
12
|
+
cocycles_dim1: List,
|
|
13
|
+
edges: List[Tuple[int, int]],
|
|
14
|
+
edge_births: np.ndarray,
|
|
15
|
+
loop_birth: float,
|
|
16
|
+
loop_death: float,
|
|
17
|
+
n: int,
|
|
18
|
+
life_pct: float = 0.1,
|
|
19
|
+
n_force_deviate: int = 4,
|
|
20
|
+
n_reps_per_loop: int = 8,
|
|
21
|
+
loop_lower_pct: float = 5,
|
|
22
|
+
loop_upper_pct: float = 95,
|
|
23
|
+
n_max_cocycles: int = 10,
|
|
24
|
+
) -> Tuple[List[List[int]], List[float]]:
|
|
25
|
+
"""
|
|
26
|
+
Reconstruct diverse loop representatives using Yen-style deviation rounds.
|
|
27
|
+
"""
|
|
28
|
+
if n <= 0 or len(edges) == 0:
|
|
29
|
+
return [], []
|
|
30
|
+
|
|
31
|
+
filt_t = loop_birth + (loop_death - loop_birth) * life_pct
|
|
32
|
+
|
|
33
|
+
# Parse cocycle edges (each entry is [[i, j], coeff])
|
|
34
|
+
cocycle_edges: list[tuple[int, int]] = []
|
|
35
|
+
for simplex in cocycles_dim1:
|
|
36
|
+
try:
|
|
37
|
+
verts, coeff = simplex
|
|
38
|
+
except ValueError:
|
|
39
|
+
continue
|
|
40
|
+
if coeff == 0 or len(verts) != 2:
|
|
41
|
+
continue
|
|
42
|
+
cocycle_edges.append((int(verts[0]), int(verts[1])))
|
|
43
|
+
if len(cocycle_edges) == n_max_cocycles:
|
|
44
|
+
break
|
|
45
|
+
|
|
46
|
+
edge_births = np.asarray(edge_births)
|
|
47
|
+
mask = edge_births <= filt_t
|
|
48
|
+
if not np.any(mask):
|
|
49
|
+
return [], []
|
|
50
|
+
edges_filt = [e for e, keep in zip(edges, mask) if keep]
|
|
51
|
+
weights_filt = edge_births[mask].tolist()
|
|
52
|
+
|
|
53
|
+
sources = [e[0] for e in edges_filt] + [e[0] for e in cocycle_edges]
|
|
54
|
+
destinations = [e[1] for e in edges_filt] + [e[1] for e in cocycle_edges]
|
|
55
|
+
weights = weights_filt + [math.inf] * len(cocycle_edges)
|
|
56
|
+
if len(sources) == 0:
|
|
57
|
+
return [], []
|
|
58
|
+
|
|
59
|
+
n_vertices = max(max(sources), max(destinations)) + 1
|
|
60
|
+
g = ig.Graph(n=n_vertices, edges=list(zip(sources, destinations)), directed=False)
|
|
61
|
+
g.es["weight"] = weights
|
|
62
|
+
|
|
63
|
+
cycles_pool: list[list[int]] = []
|
|
64
|
+
cycles_dist: list[float] = []
|
|
65
|
+
|
|
66
|
+
for _ in range(n_force_deviate):
|
|
67
|
+
paths_this_round: list[list[int]] = []
|
|
68
|
+
for i, j in cocycle_edges:
|
|
69
|
+
paths = _k_shortest_paths(g, i, j, n_reps_per_loop)
|
|
70
|
+
if not paths:
|
|
71
|
+
continue
|
|
72
|
+
for path in paths:
|
|
73
|
+
dist = _path_weight(g, path)
|
|
74
|
+
cycles_pool.append(path)
|
|
75
|
+
paths_this_round.append(path)
|
|
76
|
+
cycles_dist.append(dist)
|
|
77
|
+
|
|
78
|
+
# Force deviation
|
|
79
|
+
for path in paths_this_round:
|
|
80
|
+
for u, v in zip(path[:-1], path[1:]):
|
|
81
|
+
try:
|
|
82
|
+
eid = g.get_eid(u, v, directed=False)
|
|
83
|
+
except ig._igraph.InternalError:
|
|
84
|
+
continue
|
|
85
|
+
g.es[eid]["weight"] = math.inf
|
|
86
|
+
|
|
87
|
+
return _select_diverse_loops(
|
|
88
|
+
cycles=cycles_pool,
|
|
89
|
+
distances=cycles_dist,
|
|
90
|
+
n=n,
|
|
91
|
+
lower_pct=loop_lower_pct,
|
|
92
|
+
upper_pct=loop_upper_pct,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _k_shortest_paths(g: ig.Graph, source: int, target: int, k: int) -> list[list[int]]:
|
|
97
|
+
if source == target:
|
|
98
|
+
return []
|
|
99
|
+
try:
|
|
100
|
+
return g.get_k_shortest_paths(
|
|
101
|
+
source, target, k=k, weights=g.es["weight"], mode="ALL"
|
|
102
|
+
)
|
|
103
|
+
except ig._igraph.InternalError:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _path_weight(g: ig.Graph, path: Sequence[int]) -> float:
|
|
108
|
+
if len(path) < 2:
|
|
109
|
+
return math.inf
|
|
110
|
+
weight = 0.0
|
|
111
|
+
for u, v in zip(path[:-1], path[1:]):
|
|
112
|
+
try:
|
|
113
|
+
eid = g.get_eid(u, v, directed=False)
|
|
114
|
+
except ig._igraph.InternalError:
|
|
115
|
+
return math.inf
|
|
116
|
+
w = g.es[eid]["weight"]
|
|
117
|
+
weight += float(w)
|
|
118
|
+
return weight
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _select_diverse_loops(
|
|
122
|
+
cycles: Iterable[Sequence[int]],
|
|
123
|
+
distances: Iterable[float],
|
|
124
|
+
n: int,
|
|
125
|
+
lower_pct: float,
|
|
126
|
+
upper_pct: float,
|
|
127
|
+
) -> Tuple[List[List[int]], List[float]]:
|
|
128
|
+
pairs = sorted(
|
|
129
|
+
[(float(d), list(c)) for d, c in zip(distances, cycles) if math.isfinite(d)],
|
|
130
|
+
key=lambda x: x[0],
|
|
131
|
+
)
|
|
132
|
+
if not pairs:
|
|
133
|
+
return [], []
|
|
134
|
+
|
|
135
|
+
n_total = len(pairs)
|
|
136
|
+
n_return = min(n_total, n)
|
|
137
|
+
if n_return == 1:
|
|
138
|
+
idxs = [n_total // 2]
|
|
139
|
+
else:
|
|
140
|
+
step = (upper_pct - lower_pct) / (n_return - 1)
|
|
141
|
+
idxs = []
|
|
142
|
+
for i in range(n_return):
|
|
143
|
+
pct = (lower_pct + step * i) / 100
|
|
144
|
+
idx = min(int(math.floor(n_total * pct)), n_total - 1)
|
|
145
|
+
idxs.append(idx)
|
|
146
|
+
|
|
147
|
+
selected = [pairs[i] for i in idxs]
|
|
148
|
+
dists = [p[0] for p in selected]
|
|
149
|
+
loops = [p[1] for p in selected]
|
|
150
|
+
return loops, dists
|
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
# Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
|
|
4
|
-
from .types import
|
|
4
|
+
from .types import (
|
|
5
|
+
EmbeddingMethod,
|
|
6
|
+
EmbeddingNeighbors,
|
|
7
|
+
FeatureSelectionMethod,
|
|
8
|
+
IndexListDownSample,
|
|
9
|
+
Size_t,
|
|
10
|
+
)
|
|
5
11
|
|
|
6
12
|
|
|
7
13
|
class PreprocessMeta(BaseModel):
|
|
@@ -17,6 +23,8 @@ class PreprocessMeta(BaseModel):
|
|
|
17
23
|
n_neighbors: int
|
|
18
24
|
n_diffusion_comps: int | None = None
|
|
19
25
|
scvi_key: str | None = None
|
|
26
|
+
indices_downsample: IndexListDownSample | None = None
|
|
27
|
+
num_vertices: Size_t | None = None
|
|
20
28
|
|
|
21
29
|
|
|
22
30
|
class ScloopMeta(BaseModel):
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
|
|
2
|
+
from typing import Annotated, Literal, TypeAlias
|
|
3
|
+
|
|
4
|
+
from pydantic import Field
|
|
5
|
+
|
|
6
|
+
FeatureSelectionMethod = Literal["hvg", "delve", "none"]
|
|
7
|
+
EmbeddingMethod = Literal["pca", "diffmap", "scvi"]
|
|
8
|
+
EmbeddingNeighbors = Literal["pca", "scvi"]
|
|
9
|
+
|
|
10
|
+
Index_t = Annotated[int, Field(ge=0)]
|
|
11
|
+
Size_t = Annotated[int, Field(ge=0)]
|
|
12
|
+
Diameter_t = Annotated[float, Field(ge=0)]
|
|
13
|
+
SizeDownSample = Annotated[
|
|
14
|
+
int, Field(ge=2, description="Sample to this number of cells")
|
|
15
|
+
]
|
|
16
|
+
# need at least 2 points to compute PH. Maybe also set an upper bound later as it is not feasible to compute PH on a lot of points
|
|
17
|
+
IndexListDownSample: TypeAlias = Annotated[
|
|
18
|
+
list[Index_t],
|
|
19
|
+
Field(min_length=2, description="Downsampled indices for PH computation"),
|
|
20
|
+
]
|
|
21
|
+
IndexListDistMatrix: TypeAlias = Annotated[
|
|
22
|
+
list[Index_t],
|
|
23
|
+
Field(
|
|
24
|
+
min_length=2,
|
|
25
|
+
description="Corresponding vertex index for each column of a distance matrix",
|
|
26
|
+
),
|
|
27
|
+
]
|
|
28
|
+
IndexListSimplex: TypeAlias = Annotated[
|
|
29
|
+
list[Index_t],
|
|
30
|
+
Field(min_length=0, description="Unique indicies for simplicies"),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
# TODO: make a type for boundary matrix. Restrict matrix size for efficient computation
|