scloop 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {scloop-0.1.2/src/scloop.egg-info → scloop-0.1.4}/PKG-INFO +8 -2
  2. {scloop-0.1.2 → scloop-0.1.4}/README.md +6 -1
  3. {scloop-0.1.2 → scloop-0.1.4}/pyproject.toml +4 -1
  4. {scloop-0.1.2 → scloop-0.1.4}/setup.py +3 -2
  5. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/__init__.py +2 -2
  6. scloop-0.1.4/src/scloop/computing/__init__.py +8 -0
  7. scloop-0.1.4/src/scloop/computing/homology.py +92 -0
  8. scloop-0.1.4/src/scloop/data/__init__.py +2 -0
  9. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/analysis_containers.py +1 -1
  10. scloop-0.1.4/src/scloop/data/containers.py +221 -0
  11. scloop-0.1.4/src/scloop/data/loop_reconstruction.py +150 -0
  12. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/metadata.py +9 -1
  13. scloop-0.1.4/src/scloop/data/types.py +33 -0
  14. scloop-0.1.4/src/scloop/data/utils.py +67 -0
  15. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/delve/delve.py +12 -12
  16. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/delve/kh.py +7 -6
  17. scloop-0.1.4/src/scloop/preprocessing/downsample.py +91 -0
  18. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/prepare.py +37 -4
  19. scloop-0.1.4/src/scloop/tools/__init__.py +4 -0
  20. scloop-0.1.4/src/scloop/tools/_loops.py +63 -0
  21. {scloop-0.1.2 → scloop-0.1.4/src/scloop.egg-info}/PKG-INFO +8 -2
  22. {scloop-0.1.2 → scloop-0.1.4}/src/scloop.egg-info/SOURCES.txt +4 -0
  23. {scloop-0.1.2 → scloop-0.1.4}/src/scloop.egg-info/requires.txt +1 -0
  24. scloop-0.1.2/src/scloop/data/__init__.py +0 -3
  25. scloop-0.1.2/src/scloop/data/containers.py +0 -49
  26. scloop-0.1.2/src/scloop/data/types.py +0 -6
  27. scloop-0.1.2/src/scloop/data/utils.py +0 -410
  28. scloop-0.1.2/src/scloop/preprocessing/downsample.py +0 -1
  29. scloop-0.1.2/src/scloop/tools/__init__.py +0 -0
  30. {scloop-0.1.2 → scloop-0.1.4}/LICENSE +0 -0
  31. {scloop-0.1.2 → scloop-0.1.4}/MANIFEST.in +0 -0
  32. {scloop-0.1.2 → scloop-0.1.4}/setup.cfg +0 -0
  33. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/analyzing/__init__.py +0 -0
  34. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/benchmarking/__init__.py +0 -0
  35. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/ripser.cpp +0 -0
  36. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/ripser.hpp +0 -0
  37. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/ripser_lib.cpp +0 -0
  38. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/data/ripser_lib.pyx +0 -0
  39. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/matching/__init__.py +0 -0
  40. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/matching/data_modules.py +0 -0
  41. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/matching/mlp.py +0 -0
  42. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/matching/nf.py +0 -0
  43. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/plotting/__init__.py +0 -0
  44. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/plotting/plot.py +0 -0
  45. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/__init__.py +1 -1
  46. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/preprocessing/delve/__init__.py +0 -0
  47. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/py.typed +0 -0
  48. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/__init__.py +0 -0
  49. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/__init__.py +0 -0
  50. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/discrete-frechet-distance/Frechet.cpp +0 -0
  51. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/discrete-frechet-distance/Frechet.h +0 -0
  52. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/frechet.cpp +0 -0
  53. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/distance_metrics/frechet.pyx +0 -0
  54. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/linear_algebra_gf2/__init__.py +0 -0
  55. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/linear_algebra_gf2/gf2_toolkit_lib.pyx +0 -0
  56. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/linear_algebra_gf2/m4ri_lib.c +0 -0
  57. {scloop-0.1.2 → scloop-0.1.4}/src/scloop/utils/linear_algebra_gf2/m4ri_lib.pyx +0 -0
  58. {scloop-0.1.2 → scloop-0.1.4}/src/scloop.egg-info/dependency_links.txt +0 -0
  59. {scloop-0.1.2 → scloop-0.1.4}/src/scloop.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scloop
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: single-cell loop analysis
5
5
  Author-email: "Zhiyuan(Stan) Yu" <zyyu@umich.edu>, Idse Heemskerk <idse.heemskerk@gmail.com>
6
6
  Classifier: Programming Language :: Python :: 3 :: Only
@@ -9,6 +9,7 @@ Requires-Python: >=3.12
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: cython>=3.0.12
12
+ Requires-Dist: igraph>=0.11
12
13
  Requires-Dist: numba>=0.62.1
13
14
  Requires-Dist: numpy<2
14
15
  Requires-Dist: pandas
@@ -75,10 +76,14 @@ src/scloop/
75
76
  │   └── __init__.py
76
77
  ├── benchmarking
77
78
  │   └── __init__.py
79
+ ├── computing
80
+ │   ├── homology.py
81
+ │   └── __init__.py
78
82
  ├── data
79
83
  │   ├── analysis_containers.py
80
84
  │   ├── containers.py
81
85
  │   ├── __init__.py
86
+ │   ├── loop_reconstruction.py
82
87
  │   ├── metadata.py
83
88
  │   ├── ripser.cpp
84
89
  │   ├── ripser.hpp
@@ -105,7 +110,8 @@ src/scloop/
105
110
  │   └── prepare.py
106
111
  ├── py.typed
107
112
  ├── tools
108
- │   └── __init__.py
113
+ │   ├── __init__.py
114
+ │   └── _loops.py
109
115
  └── utils
110
116
  ├── distance_metrics
111
117
  │   ├── discrete-frechet-distance
@@ -46,10 +46,14 @@ src/scloop/
46
46
  │   └── __init__.py
47
47
  ├── benchmarking
48
48
  │   └── __init__.py
49
+ ├── computing
50
+ │   ├── homology.py
51
+ │   └── __init__.py
49
52
  ├── data
50
53
  │   ├── analysis_containers.py
51
54
  │   ├── containers.py
52
55
  │   ├── __init__.py
56
+ │   ├── loop_reconstruction.py
53
57
  │   ├── metadata.py
54
58
  │   ├── ripser.cpp
55
59
  │   ├── ripser.hpp
@@ -76,7 +80,8 @@ src/scloop/
76
80
  │   └── prepare.py
77
81
  ├── py.typed
78
82
  ├── tools
79
- │   └── __init__.py
83
+ │   ├── __init__.py
84
+ │   └── _loops.py
80
85
  └── utils
81
86
  ├── distance_metrics
82
87
  │   ├── discrete-frechet-distance
@@ -19,7 +19,7 @@ requires = [ "cython>=3.0.12", "setuptools>=74.1" ]
19
19
 
20
20
  [project]
21
21
  name = "scloop"
22
- version = "0.1.2"
22
+ version = "0.1.4"
23
23
  description = "single-cell loop analysis"
24
24
  readme = "README.md"
25
25
  authors = [
@@ -34,6 +34,7 @@ classifiers = [
34
34
  ]
35
35
  dependencies = [
36
36
  "cython>=3.0.12",
37
+ "igraph>=0.11",
37
38
  "numba>=0.62.1",
38
39
  "numpy<2",
39
40
  "pandas",
@@ -75,6 +76,8 @@ explicit = true
75
76
  [tool.ruff]
76
77
  format.indent-style = "space"
77
78
  format.quote-style = "double"
79
+ lint.extend-select = [ "I" ]
80
+ lint.fixable = [ "ALL" ]
78
81
 
79
82
  [tool.pyproject-fmt]
80
83
  column_width = 120
@@ -1,7 +1,8 @@
1
- from setuptools import setup, Extension
2
- from Cython.Build import cythonize
3
1
  import os
4
2
 
3
+ from Cython.Build import cythonize
4
+ from setuptools import Extension, setup
5
+
5
6
  project_root = os.path.dirname(os.path.abspath(__file__))
6
7
  m4ri_dir = os.path.join(project_root, "src/scloop/utils/linear_algebra_gf2")
7
8
 
@@ -3,6 +3,6 @@ import warnings
3
3
  warnings.filterwarnings("ignore", category=FutureWarning, module="scanpy")
4
4
  warnings.filterwarnings("ignore", category=FutureWarning, module="anndata")
5
5
 
6
- from . import data
7
- from . import utils
6
+ from . import data, utils
8
7
  from . import preprocessing as pp
8
+ from . import tools as tl
@@ -0,0 +1,8 @@
1
+ """Lightweight compute helpers used by the public API layer."""
2
+ # Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
3
+
4
+ from .homology import (
5
+ compute_boundary_matrix_data,
6
+ compute_persistence_diagram_and_cocycles,
7
+ compute_sparse_pairwise_distance,
8
+ )
@@ -0,0 +1,92 @@
1
+ # Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
2
+ from __future__ import annotations
3
+
4
+ import numpy as np
5
+ from anndata import AnnData
6
+ from scipy.sparse import csr_matrix
7
+ from sklearn.neighbors import radius_neighbors_graph
8
+
9
+ from ..data.metadata import ScloopMeta
10
+ from ..data.ripser_lib import get_boundary_matrix, ripser
11
+ from ..data.types import Diameter_t, IndexListDistMatrix
12
+ from ..data.utils import encode_triangles_and_edges
13
+
14
+
15
+ def compute_sparse_pairwise_distance(
16
+ adata: AnnData,
17
+ meta: ScloopMeta,
18
+ bootstrap: bool = False,
19
+ noise_scale: float = 1e-3,
20
+ thresh: Diameter_t | None = None,
21
+ **nei_kwargs,
22
+ ) -> tuple[csr_matrix, IndexListDistMatrix | None]:
23
+ assert meta.preprocess is not None
24
+ assert meta.preprocess.embedding_method is not None
25
+ emb = adata.obsm[f"X_{meta.preprocess.embedding_method}"]
26
+ selected_indices = (
27
+ meta.preprocess.indices_downsample
28
+ if meta.preprocess.indices_downsample is not None
29
+ else list(range(emb.shape[0]))
30
+ )
31
+ X = emb[selected_indices]
32
+ boot_idx = None
33
+ if bootstrap:
34
+ sample_idx = np.random.choice(
35
+ len(selected_indices), size=len(selected_indices), replace=True
36
+ ).tolist()
37
+ boot_idx = [selected_indices[i] for i in sample_idx]
38
+ X = X[sample_idx] + np.random.normal(scale=noise_scale, size=X.shape)
39
+ else:
40
+ boot_idx = selected_indices
41
+ return (
42
+ radius_neighbors_graph(
43
+ X=X,
44
+ radius=thresh,
45
+ **nei_kwargs,
46
+ ),
47
+ boot_idx,
48
+ )
49
+
50
+
51
+ def compute_persistence_diagram_and_cocycles(
52
+ adata: AnnData,
53
+ meta: ScloopMeta,
54
+ thresh: Diameter_t | None = None,
55
+ bootstrap: bool = False,
56
+ **nei_kwargs,
57
+ ) -> tuple[list[np.ndarray], list, IndexListDistMatrix | None, csr_matrix]:
58
+ sparse_pairwise_distance_matrix, boot_idx = compute_sparse_pairwise_distance(
59
+ adata=adata, meta=meta, bootstrap=bootstrap, thresh=thresh, **nei_kwargs
60
+ )
61
+ result = ripser(
62
+ distance_matrix=sparse_pairwise_distance_matrix.tocoo(copy=False),
63
+ modulus=2,
64
+ dim_max=1,
65
+ threshold=thresh,
66
+ do_cocycles=True,
67
+ )
68
+ return (
69
+ result.births_and_deaths_by_dim,
70
+ result.cocycles_by_dim,
71
+ boot_idx,
72
+ sparse_pairwise_distance_matrix,
73
+ )
74
+
75
+
76
+ def compute_boundary_matrix_data(
77
+ adata: AnnData, meta: ScloopMeta, thresh: Diameter_t | None = None, **nei_kwargs
78
+ ) -> tuple:
79
+ assert meta.preprocess is not None
80
+ assert meta.preprocess.num_vertices is not None
81
+ sparse_pairwise_distance_matrix, vertex_indices = compute_sparse_pairwise_distance(
82
+ adata=adata, meta=meta, bootstrap=False, thresh=thresh, **nei_kwargs
83
+ )
84
+ result = get_boundary_matrix(sparse_pairwise_distance_matrix.tocoo(), thresh)
85
+ triangles = np.asarray(result.triangle_vertices, dtype=np.int64)
86
+ if len(triangles) == 0:
87
+ edge_ids, trig_ids = [], []
88
+ else:
89
+ edge_ids, trig_ids = encode_triangles_and_edges(
90
+ triangles, meta.preprocess.num_vertices
91
+ )
92
+ return result, edge_ids, trig_ids, sparse_pairwise_distance_matrix, vertex_indices
@@ -0,0 +1,2 @@
1
+ from .containers import HomologyData
2
+ from .ripser_lib import RipserResults, get_boundary_matrix, ripser
@@ -3,8 +3,8 @@ from __future__ import annotations
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
6
+ from pydantic import ConfigDict, Field
6
7
  from pydantic.dataclasses import dataclass
7
- from pydantic import Field, ConfigDict
8
8
 
9
9
 
10
10
  @dataclass(config=ConfigDict(arbitrary_types_allowed=True))
@@ -0,0 +1,221 @@
1
+ # Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
2
+ from abc import abstractmethod
3
+
4
+ import numpy as np
5
+ from anndata import AnnData
6
+ from pydantic import BaseModel, ConfigDict, ValidationInfo, field_validator
7
+ from pydantic.dataclasses import dataclass
8
+ from scipy.sparse import csr_matrix
9
+
10
+ from ..computing.homology import (
11
+ compute_boundary_matrix_data,
12
+ compute_persistence_diagram_and_cocycles,
13
+ compute_sparse_pairwise_distance,
14
+ )
15
+ from .analysis_containers import BootstrapAnalysis, HodgeAnalysis
16
+ from .loop_reconstruction import reconstruct_n_loop_representatives
17
+ from .metadata import ScloopMeta
18
+ from .types import Diameter_t, Index_t, IndexListDistMatrix, Size_t
19
+ from .utils import decode_edges, decode_triangles
20
+
21
+
22
+ class BoundaryMatrix(BaseModel):
23
+ num_vertices: Size_t
24
+ data: tuple[list, list] # in coo format (row indices, col indices) of ones
25
+ shape: tuple[Size_t, Size_t]
26
+ row_simplex_ids: list[Index_t]
27
+ col_simplex_ids: list[Index_t]
28
+ row_simplex_diams: list[Diameter_t]
29
+ col_simplex_diams: list[Diameter_t]
30
+
31
+ @field_validator(
32
+ "row_simplex_ids", "col_simplex_ids", "col_simplex_diams", mode="before"
33
+ )
34
+ @classmethod
35
+ def validate_fields(cls, v: list[Index_t], info: ValidationInfo):
36
+ shape = info.data.get("shape")
37
+ assert shape
38
+ if info.field_name == "row_simplex_ids":
39
+ if len(v) != shape[0]:
40
+ raise ValueError(
41
+ "Length of row ids does not match the number of rows of the matrix"
42
+ )
43
+ elif info.field_name in ["col_simplex_ids", "col_simplex_diams"]:
44
+ if len(v) != shape[1]:
45
+ raise ValueError(
46
+ f"Length of {info.field_name} does not match the number of columns of the matrix"
47
+ )
48
+ return v
49
+
50
+ @abstractmethod
51
+ def row_simplex_decode(self) -> list:
52
+ """
53
+ From simplex id (row) to vertex ids
54
+ """
55
+ pass
56
+
57
+ @abstractmethod
58
+ def col_simplex_decode(self) -> list:
59
+ """
60
+ From simplex id (column) to vertex ids
61
+ """
62
+ pass
63
+
64
+
65
+ class BoundaryMatrixD1(BoundaryMatrix):
66
+ data: tuple[list[list[Index_t]], list[list[Index_t]]]
67
+
68
+ def row_simplex_decode(self) -> list[tuple[Index_t, Index_t]]:
69
+ return decode_edges(np.array(self.row_simplex_ids), self.num_vertices)
70
+
71
+ def col_simplex_decode(self) -> list[tuple[Index_t, Index_t, Index_t]]:
72
+ return decode_triangles(np.array(self.col_simplex_ids), self.num_vertices)
73
+
74
+
75
+ @dataclass(config=ConfigDict(arbitrary_types_allowed=True))
76
+ class HomologyData:
77
+ """
78
+ store core homology data and associated analysis data
79
+ """
80
+
81
+ meta: ScloopMeta
82
+ persistence_diagram: list[np.ndarray] | None = None
83
+ loop_representatives: list[list[list[int]]] | None = None
84
+ cocycles: list | None = None
85
+ pairwise_distance_matrix: csr_matrix | None = None
86
+ selected_vertex_indices: list[int] | None = None
87
+ boundary_matrix_d1: BoundaryMatrixD1 | None = None
88
+ bootstrap_data: BootstrapAnalysis | None = None
89
+ hodge_data: HodgeAnalysis | None = None
90
+
91
+ def _compute_sparse_pairwise_distance(
92
+ self,
93
+ adata: AnnData,
94
+ bootstrap: bool = False,
95
+ thresh: Diameter_t | None = None,
96
+ **nei_kwargs,
97
+ ) -> tuple[csr_matrix, IndexListDistMatrix | None]:
98
+ return compute_sparse_pairwise_distance(
99
+ adata=adata,
100
+ meta=self.meta,
101
+ bootstrap=bootstrap,
102
+ thresh=thresh,
103
+ **nei_kwargs,
104
+ )
105
+
106
+ def _compute_homology(
107
+ self,
108
+ adata: AnnData,
109
+ thresh: Diameter_t | None = None,
110
+ bootstrap: bool = False,
111
+ **nei_kwargs,
112
+ ) -> None:
113
+ (
114
+ persistence_diagram,
115
+ cocycles,
116
+ vertex_indices,
117
+ sparse_pairwise_distance_matrix,
118
+ ) = compute_persistence_diagram_and_cocycles(
119
+ adata=adata,
120
+ meta=self.meta,
121
+ thresh=thresh,
122
+ bootstrap=bootstrap,
123
+ **nei_kwargs,
124
+ )
125
+ self.persistence_diagram = persistence_diagram
126
+ self.cocycles = cocycles
127
+ self.pairwise_distance_matrix = sparse_pairwise_distance_matrix
128
+ self.selected_vertex_indices = vertex_indices
129
+
130
+ def _compute_boundary_matrix(
131
+ self, adata: AnnData, thresh: Diameter_t | None = None, **nei_kwargs
132
+ ) -> None:
133
+ assert self.meta.preprocess
134
+ assert self.meta.preprocess.num_vertices
135
+ (
136
+ result,
137
+ edge_ids,
138
+ trig_ids,
139
+ sparse_pairwise_distance_matrix,
140
+ vertex_indices,
141
+ ) = compute_boundary_matrix_data(
142
+ adata=adata, meta=self.meta, thresh=thresh, **nei_kwargs
143
+ )
144
+ self.pairwise_distance_matrix = sparse_pairwise_distance_matrix
145
+ self.selected_vertex_indices = vertex_indices
146
+ edge_ids_1d = np.array(edge_ids).flatten()
147
+ # reindex edges (also keep as colllection of triplets, easier to subset later)
148
+ edge_ids_reindex = np.searchsorted(edge_ids_1d, edge_ids)
149
+ edge_diameters = decode_edges(edge_ids_1d, self.meta.preprocess.num_vertices)
150
+ edge_diameters = [
151
+ sparse_pairwise_distance_matrix[i, j] for i, j in edge_diameters
152
+ ]
153
+ self.boundary_matrix_d1 = BoundaryMatrixD1(
154
+ num_vertices=self.meta.preprocess.num_vertices,
155
+ data=(
156
+ edge_ids_reindex.tolist(),
157
+ np.repeat(
158
+ np.expand_dims(np.arange(edge_ids_reindex.shape[0]), 1), 3, axis=1
159
+ ).tolist(),
160
+ ),
161
+ shape=(len(edge_ids_1d), len(trig_ids)),
162
+ row_simplex_ids=edge_ids_1d.tolist(),
163
+ col_simplex_ids=trig_ids,
164
+ row_simplex_diams=edge_diameters,
165
+ col_simplex_diams=result.triangle_diameters,
166
+ )
167
+
168
+ def _compute_loop_representatives(
169
+ self,
170
+ loop_idx: int,
171
+ n: int = 8,
172
+ life_pct: float = 0.1,
173
+ n_force_deviate: int = 4,
174
+ n_reps_per_loop: int = 8,
175
+ loop_lower_pct: float = 5,
176
+ loop_upper_pct: float = 95,
177
+ n_max_cocycles: int = 10,
178
+ ):
179
+ assert self.persistence_diagram is not None
180
+ assert self.cocycles is not None
181
+ assert self.pairwise_distance_matrix is not None
182
+
183
+ births, deaths = self.persistence_diagram[1]
184
+ loop_birth = float(births[loop_idx])
185
+ loop_death = float(deaths[loop_idx])
186
+
187
+ dm = self.pairwise_distance_matrix.tocoo()
188
+ edge_weights: dict[tuple[int, int], float] = {}
189
+ for i, j, w in zip(dm.row.tolist(), dm.col.tolist(), dm.data.tolist()):
190
+ if i == j:
191
+ continue
192
+ key = (i, j) if i < j else (j, i)
193
+ if key not in edge_weights or w < edge_weights[key]:
194
+ edge_weights[key] = float(w)
195
+ if not edge_weights:
196
+ return [], []
197
+
198
+ edges = list(edge_weights.keys())
199
+ edge_births = np.array([edge_weights[e] for e in edges], dtype=float)
200
+
201
+ loops, dists = reconstruct_n_loop_representatives(
202
+ cocycles_dim1=self.cocycles[1][loop_idx],
203
+ edges=edges,
204
+ edge_births=edge_births,
205
+ loop_birth=loop_birth,
206
+ loop_death=loop_death,
207
+ n=n,
208
+ life_pct=life_pct,
209
+ n_force_deviate=n_force_deviate,
210
+ n_reps_per_loop=n_reps_per_loop,
211
+ loop_lower_pct=loop_lower_pct,
212
+ loop_upper_pct=loop_upper_pct,
213
+ n_max_cocycles=n_max_cocycles,
214
+ )
215
+
216
+ if self.loop_representatives is None:
217
+ self.loop_representatives = []
218
+ while len(self.loop_representatives) <= loop_idx:
219
+ self.loop_representatives.append([])
220
+ self.loop_representatives[loop_idx] = loops
221
+ return loops, dists
@@ -0,0 +1,150 @@
1
+ # Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
2
+ from __future__ import annotations
3
+
4
+ import math
5
+ from typing import Iterable, List, Sequence, Tuple
6
+
7
+ import igraph as ig
8
+ import numpy as np
9
+
10
+
11
+ def reconstruct_n_loop_representatives(
12
+ cocycles_dim1: List,
13
+ edges: List[Tuple[int, int]],
14
+ edge_births: np.ndarray,
15
+ loop_birth: float,
16
+ loop_death: float,
17
+ n: int,
18
+ life_pct: float = 0.1,
19
+ n_force_deviate: int = 4,
20
+ n_reps_per_loop: int = 8,
21
+ loop_lower_pct: float = 5,
22
+ loop_upper_pct: float = 95,
23
+ n_max_cocycles: int = 10,
24
+ ) -> Tuple[List[List[int]], List[float]]:
25
+ """
26
+ Reconstruct diverse loop representatives using Yen-style deviation rounds.
27
+ """
28
+ if n <= 0 or len(edges) == 0:
29
+ return [], []
30
+
31
+ filt_t = loop_birth + (loop_death - loop_birth) * life_pct
32
+
33
+ # Parse cocycle edges (each entry is [[i, j], coeff])
34
+ cocycle_edges: list[tuple[int, int]] = []
35
+ for simplex in cocycles_dim1:
36
+ try:
37
+ verts, coeff = simplex
38
+ except ValueError:
39
+ continue
40
+ if coeff == 0 or len(verts) != 2:
41
+ continue
42
+ cocycle_edges.append((int(verts[0]), int(verts[1])))
43
+ if len(cocycle_edges) == n_max_cocycles:
44
+ break
45
+
46
+ edge_births = np.asarray(edge_births)
47
+ mask = edge_births <= filt_t
48
+ if not np.any(mask):
49
+ return [], []
50
+ edges_filt = [e for e, keep in zip(edges, mask) if keep]
51
+ weights_filt = edge_births[mask].tolist()
52
+
53
+ sources = [e[0] for e in edges_filt] + [e[0] for e in cocycle_edges]
54
+ destinations = [e[1] for e in edges_filt] + [e[1] for e in cocycle_edges]
55
+ weights = weights_filt + [math.inf] * len(cocycle_edges)
56
+ if len(sources) == 0:
57
+ return [], []
58
+
59
+ n_vertices = max(max(sources), max(destinations)) + 1
60
+ g = ig.Graph(n=n_vertices, edges=list(zip(sources, destinations)), directed=False)
61
+ g.es["weight"] = weights
62
+
63
+ cycles_pool: list[list[int]] = []
64
+ cycles_dist: list[float] = []
65
+
66
+ for _ in range(n_force_deviate):
67
+ paths_this_round: list[list[int]] = []
68
+ for i, j in cocycle_edges:
69
+ paths = _k_shortest_paths(g, i, j, n_reps_per_loop)
70
+ if not paths:
71
+ continue
72
+ for path in paths:
73
+ dist = _path_weight(g, path)
74
+ cycles_pool.append(path)
75
+ paths_this_round.append(path)
76
+ cycles_dist.append(dist)
77
+
78
+ # Force deviation
79
+ for path in paths_this_round:
80
+ for u, v in zip(path[:-1], path[1:]):
81
+ try:
82
+ eid = g.get_eid(u, v, directed=False)
83
+ except ig._igraph.InternalError:
84
+ continue
85
+ g.es[eid]["weight"] = math.inf
86
+
87
+ return _select_diverse_loops(
88
+ cycles=cycles_pool,
89
+ distances=cycles_dist,
90
+ n=n,
91
+ lower_pct=loop_lower_pct,
92
+ upper_pct=loop_upper_pct,
93
+ )
94
+
95
+
96
+ def _k_shortest_paths(g: ig.Graph, source: int, target: int, k: int) -> list[list[int]]:
97
+ if source == target:
98
+ return []
99
+ try:
100
+ return g.get_k_shortest_paths(
101
+ source, target, k=k, weights=g.es["weight"], mode="ALL"
102
+ )
103
+ except ig._igraph.InternalError:
104
+ return []
105
+
106
+
107
+ def _path_weight(g: ig.Graph, path: Sequence[int]) -> float:
108
+ if len(path) < 2:
109
+ return math.inf
110
+ weight = 0.0
111
+ for u, v in zip(path[:-1], path[1:]):
112
+ try:
113
+ eid = g.get_eid(u, v, directed=False)
114
+ except ig._igraph.InternalError:
115
+ return math.inf
116
+ w = g.es[eid]["weight"]
117
+ weight += float(w)
118
+ return weight
119
+
120
+
121
+ def _select_diverse_loops(
122
+ cycles: Iterable[Sequence[int]],
123
+ distances: Iterable[float],
124
+ n: int,
125
+ lower_pct: float,
126
+ upper_pct: float,
127
+ ) -> Tuple[List[List[int]], List[float]]:
128
+ pairs = sorted(
129
+ [(float(d), list(c)) for d, c in zip(distances, cycles) if math.isfinite(d)],
130
+ key=lambda x: x[0],
131
+ )
132
+ if not pairs:
133
+ return [], []
134
+
135
+ n_total = len(pairs)
136
+ n_return = min(n_total, n)
137
+ if n_return == 1:
138
+ idxs = [n_total // 2]
139
+ else:
140
+ step = (upper_pct - lower_pct) / (n_return - 1)
141
+ idxs = []
142
+ for i in range(n_return):
143
+ pct = (lower_pct + step * i) / 100
144
+ idx = min(int(math.floor(n_total * pct)), n_total - 1)
145
+ idxs.append(idx)
146
+
147
+ selected = [pairs[i] for i in idxs]
148
+ dists = [p[0] for p in selected]
149
+ loops = [p[1] for p in selected]
150
+ return loops, dists
@@ -1,7 +1,13 @@
1
1
  # Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
2
2
  from pydantic import BaseModel
3
3
 
4
- from .types import FeatureSelectionMethod, EmbeddingMethod, EmbeddingNeighbors
4
+ from .types import (
5
+ EmbeddingMethod,
6
+ EmbeddingNeighbors,
7
+ FeatureSelectionMethod,
8
+ IndexListDownSample,
9
+ Size_t,
10
+ )
5
11
 
6
12
 
7
13
  class PreprocessMeta(BaseModel):
@@ -17,6 +23,8 @@ class PreprocessMeta(BaseModel):
17
23
  n_neighbors: int
18
24
  n_diffusion_comps: int | None = None
19
25
  scvi_key: str | None = None
26
+ indices_downsample: IndexListDownSample | None = None
27
+ num_vertices: Size_t | None = None
20
28
 
21
29
 
22
30
  class ScloopMeta(BaseModel):
@@ -0,0 +1,33 @@
1
+ # Copyright 2025 Zhiyuan Yu (Heemskerk's lab, University of Michigan)
2
+ from typing import Annotated, Literal, TypeAlias
3
+
4
+ from pydantic import Field
5
+
6
+ FeatureSelectionMethod = Literal["hvg", "delve", "none"]
7
+ EmbeddingMethod = Literal["pca", "diffmap", "scvi"]
8
+ EmbeddingNeighbors = Literal["pca", "scvi"]
9
+
10
+ Index_t = Annotated[int, Field(ge=0)]
11
+ Size_t = Annotated[int, Field(ge=0)]
12
+ Diameter_t = Annotated[float, Field(ge=0)]
13
+ SizeDownSample = Annotated[
14
+ int, Field(ge=2, description="Sample to this number of cells")
15
+ ]
16
+ # need at least 2 points to compute PH. Maybe also set an upper bound later as it is not feasible to compute PH on a lot of points
17
+ IndexListDownSample: TypeAlias = Annotated[
18
+ list[Index_t],
19
+ Field(min_length=2, description="Downsampled indices for PH computation"),
20
+ ]
21
+ IndexListDistMatrix: TypeAlias = Annotated[
22
+ list[Index_t],
23
+ Field(
24
+ min_length=2,
25
+ description="Corresponding vertex index for each column of a distance matrix",
26
+ ),
27
+ ]
28
+ IndexListSimplex: TypeAlias = Annotated[
29
+ list[Index_t],
30
+ Field(min_length=0, description="Unique indicies for simplicies"),
31
+ ]
32
+
33
+ # TODO: make a type for boundary matrix. Restrict matrix size for efficient computation