multipers 2.0.0__cp310-cp310-macosx_13_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of multipers might be problematic. Click here for more details.
- multipers/.dylibs/libc++.1.0.dylib +0 -0
- multipers/.dylibs/libtbb.12.12.dylib +0 -0
- multipers/.dylibs/libtbbmalloc.2.12.dylib +0 -0
- multipers/__init__.py +11 -0
- multipers/_signed_measure_meta.py +268 -0
- multipers/_slicer_meta.py +171 -0
- multipers/data/MOL2.py +350 -0
- multipers/data/UCR.py +18 -0
- multipers/data/__init__.py +1 -0
- multipers/data/graphs.py +466 -0
- multipers/data/immuno_regions.py +27 -0
- multipers/data/minimal_presentation_to_st_bf.py +0 -0
- multipers/data/pytorch2simplextree.py +91 -0
- multipers/data/shape3d.py +101 -0
- multipers/data/synthetic.py +68 -0
- multipers/distances.py +198 -0
- multipers/euler_characteristic.pyx +132 -0
- multipers/filtration_conversions.pxd +229 -0
- multipers/filtrations.pxd +225 -0
- multipers/function_rips.cpython-310-darwin.so +0 -0
- multipers/function_rips.pyx +105 -0
- multipers/grids.cpython-310-darwin.so +0 -0
- multipers/grids.pyx +281 -0
- multipers/hilbert_function.pyi +46 -0
- multipers/hilbert_function.pyx +153 -0
- multipers/io.cpython-310-darwin.so +0 -0
- multipers/io.pyx +571 -0
- multipers/ml/__init__.py +0 -0
- multipers/ml/accuracies.py +90 -0
- multipers/ml/convolutions.py +532 -0
- multipers/ml/invariants_with_persistable.py +79 -0
- multipers/ml/kernels.py +176 -0
- multipers/ml/mma.py +659 -0
- multipers/ml/one.py +472 -0
- multipers/ml/point_clouds.py +238 -0
- multipers/ml/signed_betti.py +50 -0
- multipers/ml/signed_measures.py +1542 -0
- multipers/ml/sliced_wasserstein.py +461 -0
- multipers/ml/tools.py +113 -0
- multipers/mma_structures.cpython-310-darwin.so +0 -0
- multipers/mma_structures.pxd +127 -0
- multipers/mma_structures.pyx +2433 -0
- multipers/multiparameter_edge_collapse.py +41 -0
- multipers/multiparameter_module_approximation.cpython-310-darwin.so +0 -0
- multipers/multiparameter_module_approximation.pyx +211 -0
- multipers/pickle.py +53 -0
- multipers/plots.py +326 -0
- multipers/point_measure_integration.cpython-310-darwin.so +0 -0
- multipers/point_measure_integration.pyx +139 -0
- multipers/rank_invariant.cpython-310-darwin.so +0 -0
- multipers/rank_invariant.pyx +229 -0
- multipers/simplex_tree_multi.cpython-310-darwin.so +0 -0
- multipers/simplex_tree_multi.pxd +129 -0
- multipers/simplex_tree_multi.pyi +715 -0
- multipers/simplex_tree_multi.pyx +4655 -0
- multipers/slicer.cpython-310-darwin.so +0 -0
- multipers/slicer.pxd +781 -0
- multipers/slicer.pyx +3393 -0
- multipers/tensor.pxd +13 -0
- multipers/test.pyx +44 -0
- multipers/tests/__init__.py +40 -0
- multipers/tests/old_test_rank_invariant.py +91 -0
- multipers/tests/test_diff_helper.py +74 -0
- multipers/tests/test_hilbert_function.py +82 -0
- multipers/tests/test_mma.py +51 -0
- multipers/tests/test_point_clouds.py +59 -0
- multipers/tests/test_python-cpp_conversion.py +82 -0
- multipers/tests/test_signed_betti.py +181 -0
- multipers/tests/test_simplextreemulti.py +98 -0
- multipers/tests/test_slicer.py +63 -0
- multipers/torch/__init__.py +1 -0
- multipers/torch/diff_grids.py +217 -0
- multipers/torch/rips_density.py +257 -0
- multipers-2.0.0.dist-info/LICENSE +21 -0
- multipers-2.0.0.dist-info/METADATA +29 -0
- multipers-2.0.0.dist-info/RECORD +78 -0
- multipers-2.0.0.dist-info/WHEEL +5 -0
- multipers-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
from typing import Literal, Optional
|
|
2
|
+
|
|
3
|
+
import gudhi as gd
|
|
4
|
+
import numpy as np
|
|
5
|
+
from joblib import Parallel, delayed
|
|
6
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
7
|
+
from sklearn.metrics import pairwise_distances
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
import multipers as mp
|
|
11
|
+
import multipers.slicer as mps
|
|
12
|
+
from multipers.ml.convolutions import DTM, KDE
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _throw_nofit(any):
|
|
16
|
+
raise Exception("Fit first")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PointCloud2SimplexTree(BaseEstimator, TransformerMixin):
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
bandwidths=[],
|
|
23
|
+
masses=[],
|
|
24
|
+
threshold: float = np.inf,
|
|
25
|
+
complex: Literal["alpha", "rips", "delaunay"] = "rips",
|
|
26
|
+
sparse: float | None = None,
|
|
27
|
+
num_collapses: int | Literal["full"] = "full",
|
|
28
|
+
kernel: str = "gaussian",
|
|
29
|
+
log_density: bool = True,
|
|
30
|
+
expand_dim: int = 1,
|
|
31
|
+
progress: bool = False,
|
|
32
|
+
n_jobs: Optional[int] = None,
|
|
33
|
+
fit_fraction: float = 1,
|
|
34
|
+
verbose: bool = False,
|
|
35
|
+
safe_conversion: bool = False,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""
|
|
38
|
+
(Rips or Alpha or Delaunay) + (Density Estimation or DTM) 1-critical 2-filtration.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
- bandwidth : real : The kernel density estimation bandwidth, or the DTM mass. If negative, it replaced by abs(bandwidth)*(radius of the dataset)
|
|
43
|
+
- threshold : real, max edge lenfth of the rips or max alpha square of the alpha
|
|
44
|
+
- sparse : real, sparse rips (c.f. rips doc) WARNING : ONLY FOR RIPS
|
|
45
|
+
- num_collapse : int, Number of edge collapses applied to the simplextrees, WARNING : ONLY FOR RIPS
|
|
46
|
+
- expand_dim : int, expand the rips complex to this dimension. WARNING : ONLY FOR RIPS
|
|
47
|
+
- kernel : the kernel used for density estimation. Available ones are, e.g., "dtm", "gaussian", "exponential".
|
|
48
|
+
- progress : bool, shows the calculus status
|
|
49
|
+
- n_jobs : number of processes
|
|
50
|
+
- fit_fraction : real, the fraction of data on which to fit
|
|
51
|
+
- verbose : bool, Shows more information if true.
|
|
52
|
+
|
|
53
|
+
Output
|
|
54
|
+
------
|
|
55
|
+
A list of SimplexTreeMulti whose first parameter is a rips and the second is the codensity.
|
|
56
|
+
"""
|
|
57
|
+
super().__init__()
|
|
58
|
+
self.bandwidths = bandwidths
|
|
59
|
+
self.masses = masses
|
|
60
|
+
self.num_collapses = num_collapses
|
|
61
|
+
self.kernel = kernel
|
|
62
|
+
self.log_density = log_density
|
|
63
|
+
self.progress = progress
|
|
64
|
+
self._bandwidths = np.empty((0,))
|
|
65
|
+
self._threshold = np.inf
|
|
66
|
+
self.n_jobs = n_jobs
|
|
67
|
+
self._scale = np.empty((0,))
|
|
68
|
+
self.fit_fraction = fit_fraction
|
|
69
|
+
self.expand_dim = expand_dim
|
|
70
|
+
self.verbose = verbose
|
|
71
|
+
self.complex = complex
|
|
72
|
+
self.threshold = threshold
|
|
73
|
+
self.sparse = sparse
|
|
74
|
+
self._get_sts = _throw_nofit
|
|
75
|
+
self.safe_conversion = safe_conversion
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
def _get_distance_quantiles(self, X, qs):
|
|
79
|
+
if len(qs) == 0:
|
|
80
|
+
self._scale = []
|
|
81
|
+
return []
|
|
82
|
+
if self.progress:
|
|
83
|
+
print("Estimating scale...", flush=True, end="")
|
|
84
|
+
indices = np.random.choice(
|
|
85
|
+
len(X), min(len(X), int(self.fit_fraction * len(X)) + 1), replace=False
|
|
86
|
+
)
|
|
87
|
+
# diameter = np.asarray([distance_matrix(x,x).max() for x in (X[i] for i in indices)]).max()
|
|
88
|
+
diameter = np.max(
|
|
89
|
+
[pairwise_distances(X=x).max() for x in (X[i] for i in indices)]
|
|
90
|
+
)
|
|
91
|
+
self._scale = diameter * np.asarray(qs)
|
|
92
|
+
if self.threshold > 0:
|
|
93
|
+
self._scale[self._scale > self.threshold] = self.threshold
|
|
94
|
+
if self.progress:
|
|
95
|
+
print(f"Done. Chosen scales {qs} are {self._scale}", flush=True)
|
|
96
|
+
return self._scale
|
|
97
|
+
|
|
98
|
+
def _get_sts_rips(self, x):
|
|
99
|
+
st_init = gd.RipsComplex(
|
|
100
|
+
points=x, max_edge_length=self._threshold, sparse=self.sparse
|
|
101
|
+
).create_simplex_tree(max_dimension=1)
|
|
102
|
+
st_init = mp.simplex_tree_multi.SimplexTreeMulti(
|
|
103
|
+
st_init, num_parameters=2, safe_conversion=self.safe_conversion
|
|
104
|
+
)
|
|
105
|
+
codensities = self._get_codensities(x_fit=x, x_sample=x)
|
|
106
|
+
num_axes = codensities.shape[0]
|
|
107
|
+
sts = [st_init] + [st_init.copy() for _ in range(num_axes - 1)]
|
|
108
|
+
# no need to multithread here, most operations are memory
|
|
109
|
+
for codensity, st_copy in zip(codensities, sts):
|
|
110
|
+
# RIPS has contigus vertices, so vertices are ordered.
|
|
111
|
+
st_copy.fill_lowerstar(codensity, parameter=1)
|
|
112
|
+
|
|
113
|
+
def collapse_edges(st):
|
|
114
|
+
if self.verbose:
|
|
115
|
+
print("Num simplices :", st.num_simplices)
|
|
116
|
+
if isinstance(self.num_collapses, int):
|
|
117
|
+
st.collapse_edges(num=self.num_collapses)
|
|
118
|
+
if self.verbose:
|
|
119
|
+
print(", after collapse :", st.num_simplices, end="")
|
|
120
|
+
elif self.num_collapses == "full":
|
|
121
|
+
st.collapse_edges(full=True)
|
|
122
|
+
if self.verbose:
|
|
123
|
+
print(", after collapse :", st.num_simplices, end="")
|
|
124
|
+
if self.expand_dim > 1:
|
|
125
|
+
st.expansion(self.expand_dim)
|
|
126
|
+
if self.verbose:
|
|
127
|
+
print(", after expansion :", st.num_simplices, end="")
|
|
128
|
+
if self.verbose:
|
|
129
|
+
print("")
|
|
130
|
+
return st
|
|
131
|
+
|
|
132
|
+
return Parallel(backend="threading", n_jobs=self.n_jobs)(
|
|
133
|
+
delayed(collapse_edges)(st) for st in sts
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
def _get_sts_alpha(self, x: np.ndarray, return_alpha=False):
|
|
137
|
+
alpha_complex = gd.AlphaComplex(points=x)
|
|
138
|
+
st = alpha_complex.create_simplex_tree(max_alpha_square=self._threshold**2)
|
|
139
|
+
vertices = np.array([i for (i,), _ in st.get_skeleton(0)])
|
|
140
|
+
new_points = np.asarray(
|
|
141
|
+
[alpha_complex.get_point(i) for i in vertices]
|
|
142
|
+
) # Seems to be unsafe for some reason
|
|
143
|
+
# new_points = x
|
|
144
|
+
st = mp.simplex_tree_multi.SimplexTreeMulti(
|
|
145
|
+
st, num_parameters=2, safe_conversion=self.safe_conversion
|
|
146
|
+
)
|
|
147
|
+
codensities = self._get_codensities(x_fit=x, x_sample=new_points)
|
|
148
|
+
num_axes = codensities.shape[0]
|
|
149
|
+
sts = [st] + [st.copy() for _ in range(num_axes - 1)]
|
|
150
|
+
# no need to multithread here, most operations are memory
|
|
151
|
+
max_vertices = vertices.max() + 2 # +1 to be safe
|
|
152
|
+
for codensity, st_copy in zip(codensities, sts):
|
|
153
|
+
alligned_codensity = np.array([np.nan] * max_vertices)
|
|
154
|
+
alligned_codensity[vertices] = codensity
|
|
155
|
+
# alligned_codensity = np.array([codensity[i] if i in vertices else np.nan for i in range(max_vertices)])
|
|
156
|
+
st_copy.fill_lowerstar(alligned_codensity, parameter=1)
|
|
157
|
+
if return_alpha:
|
|
158
|
+
return alpha_complex, sts
|
|
159
|
+
return sts
|
|
160
|
+
|
|
161
|
+
def _get_sts_delaunay(self, x: np.ndarray):
|
|
162
|
+
codensities = self._get_codensities(x_fit=x, x_sample=x)
|
|
163
|
+
|
|
164
|
+
def get_st(c):
|
|
165
|
+
slicer = mps.from_function_delaunay(
|
|
166
|
+
x, c, verbose=self.verbose, clear=not self.verbose
|
|
167
|
+
)
|
|
168
|
+
st = mps.to_simplextree(slicer)
|
|
169
|
+
return st
|
|
170
|
+
|
|
171
|
+
sts = Parallel(backend="threading", n_jobs=self.n_jobs)(
|
|
172
|
+
delayed(get_st)(c) for c in codensities
|
|
173
|
+
)
|
|
174
|
+
return sts
|
|
175
|
+
|
|
176
|
+
def _get_codensities(self, x_fit, x_sample):
|
|
177
|
+
x_fit = np.asarray(x_fit, dtype=np.float32)
|
|
178
|
+
x_sample = np.asarray(x_sample, dtype=np.float32)
|
|
179
|
+
codensities_kde = np.asarray(
|
|
180
|
+
[
|
|
181
|
+
-KDE(
|
|
182
|
+
bandwidth=bandwidth, kernel=self.kernel, return_log=self.log_density
|
|
183
|
+
)
|
|
184
|
+
.fit(x_fit)
|
|
185
|
+
.score_samples(x_sample)
|
|
186
|
+
for bandwidth in self._bandwidths
|
|
187
|
+
],
|
|
188
|
+
).reshape(len(self._bandwidths), len(x_sample))
|
|
189
|
+
codensities_dtm = (
|
|
190
|
+
DTM(masses=self.masses)
|
|
191
|
+
.fit(x_fit)
|
|
192
|
+
.score_samples(x_sample)
|
|
193
|
+
.reshape(len(self.masses), len(x_sample))
|
|
194
|
+
)
|
|
195
|
+
return np.concatenate([codensities_kde, codensities_dtm])
|
|
196
|
+
|
|
197
|
+
def fit(self, X: np.ndarray | list, y=None):
|
|
198
|
+
# self.bandwidth = "silverman" ## not good, as is can make bandwidth not constant
|
|
199
|
+
match self.complex:
|
|
200
|
+
case "rips":
|
|
201
|
+
self._get_sts = self._get_sts_rips
|
|
202
|
+
case "alpha":
|
|
203
|
+
self._get_sts = self._get_sts_alpha
|
|
204
|
+
case "delaunay":
|
|
205
|
+
self._get_sts = self._get_sts_delaunay
|
|
206
|
+
case _:
|
|
207
|
+
raise ValueError(
|
|
208
|
+
f"Invalid complex \
|
|
209
|
+
{self.complex}. Possible choises are rips or alpha."
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
qs = [
|
|
213
|
+
q for q in [*-np.asarray(self.bandwidths), -self.threshold] if 0 <= q <= 1
|
|
214
|
+
]
|
|
215
|
+
self._get_distance_quantiles(X, qs=qs)
|
|
216
|
+
self._bandwidths = np.array(self.bandwidths)
|
|
217
|
+
count = 0
|
|
218
|
+
for i in range(len(self._bandwidths)):
|
|
219
|
+
if self.bandwidths[i] < 0:
|
|
220
|
+
self._bandwidths[i] = self._scale[count]
|
|
221
|
+
count += 1
|
|
222
|
+
self._threshold = self.threshold if self.threshold > 0 else self._scale[-1]
|
|
223
|
+
|
|
224
|
+
# PRECOMPILE FIRST
|
|
225
|
+
self._get_codensities(X[0][:4], X[0][:4])
|
|
226
|
+
return self
|
|
227
|
+
|
|
228
|
+
def transform(self, X):
|
|
229
|
+
# precompile first
|
|
230
|
+
# self._get_sts(X[0][:5])
|
|
231
|
+
self._get_codensities(X[0][:4], X[0][:4])
|
|
232
|
+
with tqdm(
|
|
233
|
+
X, desc="Filling simplextrees", disable=not self.progress, total=len(X)
|
|
234
|
+
) as data:
|
|
235
|
+
stss = Parallel(backend="threading", n_jobs=self.n_jobs)(
|
|
236
|
+
delayed(self._get_sts)(x) for x in data
|
|
237
|
+
)
|
|
238
|
+
return stss
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
## This code was written by Luis Scoccola
|
|
2
|
+
import numpy as np
|
|
3
|
+
from scipy.sparse import coo_array
|
|
4
|
+
from scipy.ndimage import convolve1d
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def signed_betti(hilbert_function, threshold=False, sparse=False):
|
|
8
|
+
n = len(hilbert_function.shape)
|
|
9
|
+
res = np.copy(hilbert_function)
|
|
10
|
+
# zero out the "end" of the Hilbert function
|
|
11
|
+
if threshold:
|
|
12
|
+
for dimension in range(n):
|
|
13
|
+
slicer = tuple([slice(None) if i != dimension else -1 for i in range(n)])
|
|
14
|
+
res[slicer] = 0
|
|
15
|
+
weights = np.array([0, 1, -1], dtype=int)
|
|
16
|
+
for i in range(n):
|
|
17
|
+
res = convolve1d(res, weights, axis=i, mode="constant", cval=0)
|
|
18
|
+
if sparse:
|
|
19
|
+
return coo_array(res)
|
|
20
|
+
else:
|
|
21
|
+
return res
|
|
22
|
+
|
|
23
|
+
def rank_decomposition_by_rectangles(rank_invariant, threshold=False):
|
|
24
|
+
# takes as input the rank invariant of an n-parameter persistence module
|
|
25
|
+
# M : [0, ..., s_1 - 1] x ... x [0, ..., s_n - 1] ---> Vec
|
|
26
|
+
# on a grid with dimensions of sizes s_1, ..., s_n. The input is assumed to be
|
|
27
|
+
# given as a tensor of dimensions (s_1, ..., s_n, s_1, ..., s_n), so that,
|
|
28
|
+
# at index [i_1, ..., i_n, j_1, ..., j_n] we have the rank of the structure
|
|
29
|
+
# map M(i) -> M(j), where i = (i_1, ..., i_n) and j = (j_1, ..., j_n), and
|
|
30
|
+
# i <= j, meaning that i_1 <= j_1, ..., i_n <= j_n.
|
|
31
|
+
# NOTE :
|
|
32
|
+
# - About the input, we assume that, if not( i <= j ), then at index
|
|
33
|
+
# [i_1, ..., i_n, j_1, ..., j_n] we have a zero.
|
|
34
|
+
# - Similarly, the output at index [i_1, ..., i_n, j_1, ..., j_n] only
|
|
35
|
+
# makes sense when i <= j. For indices where not( i <= j ) the output
|
|
36
|
+
# may take arbitrary values and they should be ignored.
|
|
37
|
+
n = len(rank_invariant.shape) // 2
|
|
38
|
+
if threshold:
|
|
39
|
+
rank_invariant = rank_invariant.copy()
|
|
40
|
+
# print(rank_invariant)
|
|
41
|
+
# zero out the "end"
|
|
42
|
+
for dimension in range(n):
|
|
43
|
+
slicer = tuple(
|
|
44
|
+
[slice(None) for _ in range(n)]
|
|
45
|
+
+ [slice(None) if i != dimension else -1 for i in range(n)]
|
|
46
|
+
)
|
|
47
|
+
rank_invariant[slicer] = 0
|
|
48
|
+
# print(rank_invariant)
|
|
49
|
+
to_flip = tuple(range(n, 2 * n))
|
|
50
|
+
return np.flip(signed_betti(np.flip(rank_invariant, to_flip)), to_flip)
|