multipers 1.0__cp311-cp311-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of multipers might be problematic. Click here for more details.

Files changed (56) hide show
  1. multipers/__init__.py +4 -0
  2. multipers/_old_rank_invariant.pyx +328 -0
  3. multipers/_signed_measure_meta.py +72 -0
  4. multipers/data/MOL2.py +350 -0
  5. multipers/data/UCR.py +18 -0
  6. multipers/data/__init__.py +1 -0
  7. multipers/data/graphs.py +272 -0
  8. multipers/data/immuno_regions.py +27 -0
  9. multipers/data/minimal_presentation_to_st_bf.py +0 -0
  10. multipers/data/pytorch2simplextree.py +91 -0
  11. multipers/data/shape3d.py +101 -0
  12. multipers/data/synthetic.py +68 -0
  13. multipers/distances.py +100 -0
  14. multipers/euler_characteristic.cpython-311-x86_64-linux-gnu.so +0 -0
  15. multipers/euler_characteristic.pyx +132 -0
  16. multipers/function_rips.cpython-311-x86_64-linux-gnu.so +0 -0
  17. multipers/function_rips.pyx +101 -0
  18. multipers/hilbert_function.cpython-311-x86_64-linux-gnu.so +0 -0
  19. multipers/hilbert_function.pyi +46 -0
  20. multipers/hilbert_function.pyx +145 -0
  21. multipers/ml/__init__.py +0 -0
  22. multipers/ml/accuracies.py +61 -0
  23. multipers/ml/convolutions.py +384 -0
  24. multipers/ml/invariants_with_persistable.py +79 -0
  25. multipers/ml/kernels.py +128 -0
  26. multipers/ml/mma.py +422 -0
  27. multipers/ml/one.py +472 -0
  28. multipers/ml/point_clouds.py +191 -0
  29. multipers/ml/signed_betti.py +50 -0
  30. multipers/ml/signed_measures.py +1046 -0
  31. multipers/ml/sliced_wasserstein.py +313 -0
  32. multipers/ml/tools.py +99 -0
  33. multipers/multiparameter_edge_collapse.py +29 -0
  34. multipers/multiparameter_module_approximation.cpython-311-x86_64-linux-gnu.so +0 -0
  35. multipers/multiparameter_module_approximation.pxd +147 -0
  36. multipers/multiparameter_module_approximation.pyi +439 -0
  37. multipers/multiparameter_module_approximation.pyx +931 -0
  38. multipers/pickle.py +53 -0
  39. multipers/plots.py +207 -0
  40. multipers/point_measure_integration.cpython-311-x86_64-linux-gnu.so +0 -0
  41. multipers/point_measure_integration.pyx +59 -0
  42. multipers/rank_invariant.cpython-311-x86_64-linux-gnu.so +0 -0
  43. multipers/rank_invariant.pyx +154 -0
  44. multipers/simplex_tree_multi.cpython-311-x86_64-linux-gnu.so +0 -0
  45. multipers/simplex_tree_multi.pxd +121 -0
  46. multipers/simplex_tree_multi.pyi +715 -0
  47. multipers/simplex_tree_multi.pyx +1284 -0
  48. multipers/tensor.pxd +13 -0
  49. multipers/test.pyx +44 -0
  50. multipers-1.0.dist-info/LICENSE +21 -0
  51. multipers-1.0.dist-info/METADATA +9 -0
  52. multipers-1.0.dist-info/RECORD +56 -0
  53. multipers-1.0.dist-info/WHEEL +5 -0
  54. multipers-1.0.dist-info/top_level.txt +1 -0
  55. multipers.libs/libtbb-5d1cde94.so.12.10 +0 -0
  56. multipers.libs/libtbbmalloc-5e0a3d4c.so.2.10 +0 -0
@@ -0,0 +1,313 @@
1
+ ## This code was written by Mathieu Carrière.
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin
5
+ from sklearn.metrics import pairwise_distances, pairwise_kernels
6
+ from joblib import Parallel, delayed
7
+
8
+ def _pairwise(fallback, skipdiag, X, Y, metric, n_jobs):
9
+ if Y is not None:
10
+ return fallback(X, Y, metric=metric, n_jobs=n_jobs)
11
+ triu = np.triu_indices(len(X), k=skipdiag)
12
+ tril = (triu[1], triu[0])
13
+ par = Parallel(n_jobs=n_jobs, prefer="threads")
14
+ d = par(delayed(metric)([triu[0][i]], [triu[1][i]]) for i in range(len(triu[0])))
15
+ m = np.empty((len(X), len(X)))
16
+ m[triu] = d
17
+ m[tril] = d
18
+ if skipdiag:
19
+ np.fill_diagonal(m, 0)
20
+ return m
21
+
22
+ def _sklearn_wrapper(metric, X, Y, **kwargs):
23
+ """
24
+ This function is a wrapper for any metric between two signed measures that takes two numpy arrays of shapes (nxD) and (mxD) as arguments.
25
+ """
26
+ if Y is None:
27
+ def flat_metric(a, b):
28
+ return metric(X[int(a[0])], X[int(b[0])], **kwargs)
29
+ else:
30
+ def flat_metric(a, b):
31
+ return metric(X[int(a[0])], Y[int(b[0])], **kwargs)
32
+ return flat_metric
33
+
34
+ def _compute_signed_measure_parts(X):
35
+ """
36
+ This is a function for separating the positive and negative points of a list of signed measures. This function can be used as a preprocessing step in order to speed up the running time for computing all pairwise (sliced) Wasserstein distances on a list of signed measures.
37
+
38
+ Parameters:
39
+ X (list of n tuples): list of signed measures.
40
+
41
+ Returns:
42
+ list of n pairs of numpy arrays of shape (num x dimension): list of positive and negative signed measures.
43
+ """
44
+ XX = []
45
+ for (C,M) in X:
46
+ pos_idxs = np.argwhere(M > 0).ravel()
47
+ neg_idxs = np.setdiff1d(np.arange(len(M)), pos_idxs)
48
+ XX.append([ np.repeat(C[pos_idxs], M[pos_idxs], axis=0), np.repeat(C[neg_idxs], -M[neg_idxs], axis=0) ])
49
+ return XX
50
+
51
+ def _compute_signed_measure_projections(X, num_directions, scales):
52
+ """
53
+ This is a function for projecting the points of a list of signed measures onto a fixed number of lines sampled uniformly. This function can be used as a preprocessing step in order to speed up the running time for computing all pairwise sliced Wasserstein distances on a list of signed measures.
54
+
55
+ Parameters:
56
+ X (list of n tuples): list of signed measures.
57
+ num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation.
58
+ scales (array of shape D): scales associated to the dimensions.
59
+
60
+ Returns:
61
+ list of n pairs of numpy arrays of shape (num x num_directions): list of positive and negative projected signed measures.
62
+ """
63
+ dimension = X[0][0].shape[1]
64
+ np.random.seed(42)
65
+ thetas = np.random.normal(0,1,[num_directions, dimension])
66
+ lines = (thetas/np.linalg.norm(thetas, axis=1)[:,None]).T
67
+ weights = np.linalg.norm(np.multiply(scales[:,None], lines), axis=0) if scales is not None else np.ones(num_directions)
68
+ XX = []
69
+ for (C,M) in X:
70
+ pos_idxs = np.argwhere(M > 0).ravel()
71
+ neg_idxs = np.setdiff1d(np.arange(len(M)), pos_idxs)
72
+ XX.append( [np.matmul( np.repeat(C[pos_idxs], M[pos_idxs], axis=0), lines), np.matmul(np.repeat(C[neg_idxs], -M[neg_idxs], axis=0), lines), weights] )
73
+ return XX
74
+
75
+
76
+ def pairwise_signed_measure_distances(X, Y=None, metric="sliced_wasserstein", n_jobs=None, **kwargs):
77
+ """
78
+ This function computes the distance matrix between two lists of signed measures given as numpy arrays of shape (nxD).
79
+
80
+ Parameters:
81
+ X (list of n tuples): first list of signed measures.
82
+ Y (list of m tuples): second list of signed measures (optional). If None, pairwise distances are computed from the first list only.
83
+ metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein") or a function taking two tuples as inputs. If it is a function, make sure that it is symmetric and that it outputs 0 if called on the same two tuples.
84
+ n_jobs (int): number of jobs to use for the computation. This uses joblib.Parallel(prefer="threads"), so metrics that do not release the GIL may not scale unless run inside a `joblib.parallel_backend <https://joblib.readthedocs.io/en/latest/parallel.html#joblib.parallel_backend>`_ block.
85
+ **kwargs: optional keyword parameters. Any further parameters are passed directly to the distance function. See the docs of the various distance classes in this module.
86
+
87
+ Returns:
88
+ numpy array of shape (nxm): distance matrix
89
+ """
90
+ XX = np.reshape(np.arange(len(X)), [-1,1])
91
+ YY = None if Y is None or Y is X else np.reshape(np.arange(len(Y)), [-1,1])
92
+ if metric == "sliced_wasserstein":
93
+ Xproj = _compute_signed_measure_projections(X, **kwargs)
94
+ Yproj = None if Y is None else _compute_signed_measure_projections(Y, **kwargs)
95
+ return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(_sliced_wasserstein_distance_on_projections, Xproj, Yproj), n_jobs=n_jobs)
96
+ elif metric == "wasserstein":
97
+ Xproj = _compute_signed_measure_parts(X)
98
+ Yproj = None if Y is None else _compute_signed_measure_parts(Y)
99
+ return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(_wasserstein_distance_on_parts(**kwargs), Xproj, Yproj), n_jobs=n_jobs)
100
+ else:
101
+ return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(metric, X, Y, **kwargs), n_jobs=n_jobs)
102
+
103
+ def _wasserstein_distance_on_parts(ground_norm=1, epsilon=1.):
104
+ """
105
+ This is a function for computing the Wasserstein distance between two signed measures that have already been separated into their positive and negative parts.
106
+
107
+ Parameters:
108
+ meas1: pair of (n x dimension) numpy.arrays containing the points of the positive and negative parts of the first measure.
109
+ meas2: pair of (m x dimension) numpy.arrays containing the points of the positive and negative parts of the second measure.
110
+
111
+ Returns:
112
+ float: the sliced Wasserstein distance between the projected signed measures.
113
+ """
114
+ def metric(meas1, meas2):
115
+ meas1_plus, meas1_minus = meas1[0], meas1[1]
116
+ meas2_plus, meas2_minus = meas2[0], meas2[1]
117
+ num_pts = len(meas1_plus) + len(meas2_minus)
118
+ meas_t1 = np.vstack([meas1_plus, meas2_minus])
119
+ meas_t2 = np.vstack([meas2_plus, meas1_minus])
120
+ import ot
121
+ if epsilon > 0:
122
+ wass = ot.sinkhorn2(1/num_pts * np.ones(num_pts), 1/num_pts * np.ones(num_pts), pairwise_distances(meas_t1, meas_t2, metric='minkowski', p=ground_norm), epsilon)
123
+ return wass[0]
124
+ else:
125
+ wass = ot.lp.emd2([],[], np.ascontiguousarray(pairwise_distances(meas_t1, meas_t2, metric='minkowski', p=ground_norm), dtype=np.float64))
126
+ return wass
127
+ return metric
128
+
129
+ def _sliced_wasserstein_distance_on_projections(meas1, meas2, scales=None):
130
+ """
131
+ This is a function for computing the sliced Wasserstein distance between two signed measures that have already been projected onto some lines. It simply amounts to comparing the sorted projections with the 1-norm, and averaging over the lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details.
132
+
133
+ Parameters:
134
+ meas1: pair of (n x number_of_lines) numpy.arrays containing the projected points of the positive and negative parts of the first measure.
135
+ meas2: pair of (m x number_of_lines) numpy.arrays containing the projected points of the positive and negative parts of the second measure.
136
+ scales (array of shape D): scales associated to the dimensions.
137
+
138
+ Returns:
139
+ float: the sliced Wasserstein distance between the projected signed measures.
140
+ """
141
+ #assert np.array_equal( meas1[2], meas2[2] )
142
+ weights = meas1[2]
143
+ meas1_plus, meas1_minus = meas1[0], meas1[1]
144
+ meas2_plus, meas2_minus = meas2[0], meas2[1]
145
+ A = np.sort(np.vstack([meas1_plus, meas2_minus]), axis=0)
146
+ B = np.sort(np.vstack([meas2_plus, meas1_minus]), axis=0)
147
+ L1 = np.sum(np.abs(A-B), axis=0)
148
+ return np.mean(np.multiply(L1, weights))
149
+
150
+
151
+ def _sliced_wasserstein_distance(meas1, meas2, num_directions, scales=None):
152
+ """
153
+ This is a function for computing the sliced Wasserstein distance from two signed measures. The Sliced Wasserstein distance is computed by projecting the signed measures onto lines, comparing the projections with the 1-norm, and finally averaging over the lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details.
154
+
155
+ Parameters:
156
+ meas1: ((n x D), (n)) tuple with numpy.array encoding the (finite points of the) first measure and their multiplicities. Must not contain essential points (i.e. with infinite coordinate).
157
+ meas2: ((m x D), (m)) tuple encoding the second measure.
158
+ num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation.
159
+ scales (array of shape D): scales associated to the dimensions.
160
+
161
+ Returns:
162
+ float: the sliced Wasserstein distance between signed measures.
163
+ """
164
+ C1, M1 = meas1[0], meas1[1]
165
+ C2, M2 = meas2[0], meas2[1]
166
+ dimension = C1.shape[1]
167
+ C1_plus_idxs, C2_plus_idxs = np.argwhere(M1 > 0).ravel(), np.argwhere(M2 > 0).ravel()
168
+ C1_minus_idxs, C2_minus_idxs = np.setdiff1d(np.arange(len(M1)), C1_plus_idxs), np.setdiff1d(np.arange(len(M2)), C2_plus_idxs)
169
+ np.random.seed(42)
170
+ thetas = np.random.normal(0,1,[num_directions, dimension])
171
+ lines = (thetas/np.linalg.norm(thetas, axis=1)[:,None]).T
172
+ weights = np.linalg.norm(np.multiply(scales[:,None], lines), axis=0) if scales is not None else np.ones(num_directions)
173
+ approx1 = np.matmul(np.vstack([ np.repeat(C1[C1_plus_idxs], M1[C1_plus_idxs], axis=0), np.repeat(C2[C2_minus_idxs], -M2[C2_minus_idxs], axis=0) ]), lines)
174
+ approx2 = np.matmul(np.vstack([ np.repeat(C2[C2_plus_idxs], M2[C2_plus_idxs], axis=0), np.repeat(C1[C1_minus_idxs], -M1[C1_minus_idxs], axis=0) ]), lines)
175
+ A = np.sort(approx1, axis=0)
176
+ B = np.sort(approx2, axis=0)
177
+ L1 = np.sum(np.abs(A-B), axis=0)
178
+ return np.mean(np.multiply(L1, weights))
179
+
180
+ def _wasserstein_distance(meas1, meas2, epsilon, ground_norm):
181
+ """
182
+ This is a function for computing the Wasserstein distance from two signed measures.
183
+
184
+ Parameters:
185
+ meas1: ((n x D), (n)) tuple with numpy.array encoding the (finite points of the) first measure and their multiplicities. Must not contain essential points (i.e. with infinite coordinate).
186
+ meas2: ((m x D), (m)) tuple encoding the second measure.
187
+ epsilon (float): entropy regularization parameter.
188
+ ground_norm (int): norm to use for ground metric cost.
189
+
190
+ Returns:
191
+ float: the Wasserstein distance between signed measures.
192
+ """
193
+ C1, M1 = meas1[0], meas1[1]
194
+ C2, M2 = meas2[0], meas2[1]
195
+ C1_plus_idxs, C2_plus_idxs = np.argwhere(M1 > 0).ravel(), np.argwhere(M2 > 0).ravel()
196
+ C1_minus_idxs, C2_minus_idxs = np.setdiff1d(np.arange(len(M1)), C1_plus_idxs), np.setdiff1d(np.arange(len(M2)), C2_plus_idxs)
197
+ approx1 = np.vstack([ np.repeat(C1[C1_plus_idxs], M1[C1_plus_idxs], axis=0), np.repeat(C2[C2_minus_idxs], -M2[C2_minus_idxs], axis=0) ])
198
+ approx2 = np.vstack([ np.repeat(C2[C2_plus_idxs], M2[C2_plus_idxs], axis=0), np.repeat(C1[C1_minus_idxs], -M1[C1_minus_idxs], axis=0) ])
199
+ num_pts = len(approx1)
200
+ import ot
201
+ if epsilon > 0:
202
+ wass = ot.sinkhorn2(1/num_pts * np.ones(num_pts), 1/num_pts * np.ones(num_pts), pairwise_distances(approx1, approx2, metric='minkowski', p=ground_norm), epsilon)
203
+ return wass[0]
204
+ else:
205
+ wass = ot.lp.emd2(1/num_pts * np.ones(num_pts), 1/num_pts * np.ones(num_pts), pairwise_distances(approx1, approx2, metric='minkowski', p=ground_norm))
206
+ return wass
207
+
208
+ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin):
209
+ """
210
+ This is a class for computing the sliced Wasserstein distance matrix from a list of signed measures. The Sliced Wasserstein distance is computed by projecting the signed measures onto lines, comparing the projections with the 1-norm, and finally integrating over all possible lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details.
211
+ """
212
+ def __init__(self, num_directions=10, scales=None, n_jobs=None):
213
+ """
214
+ Constructor for the SlicedWassersteinDistance class.
215
+
216
+ Parameters:
217
+ num_directions (int): number of lines evenly sampled in order to approximate and speed up the distance computation (default 10).
218
+ scales (array of shape D): scales associated to the dimensions.
219
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_signed_measure_distances` for details.
220
+ """
221
+ self.num_directions = num_directions
222
+ self.scales = scales
223
+ self.n_jobs = n_jobs
224
+
225
+ def fit(self, X, y=None):
226
+ """
227
+ Fit the SlicedWassersteinDistance class on a list of signed measures: signed measures are projected onto the different lines. The measures themselves are then stored in numpy arrays, called **measures_**.
228
+
229
+ Parameters:
230
+ X (list of tuples): input signed measures.
231
+ y (n x 1 array): signed measure labels (unused).
232
+ """
233
+ self.measures_ = X
234
+ return self
235
+
236
+ def transform(self, X):
237
+ """
238
+ Compute all sliced Wasserstein distances between the signed measures that were stored after calling the fit() method, and a given list of (possibly different) signed measures.
239
+
240
+ Parameters:
241
+ X (list of tuples): input signed measures.
242
+
243
+ Returns:
244
+ numpy array of shape (number of measures in **measures**) x (number of measures in X): matrix of pairwise sliced Wasserstein distances.
245
+ """
246
+ return pairwise_signed_measure_distances(X, self.measures_, metric="sliced_wasserstein", num_directions=self.num_directions, scales=self.scales, n_jobs=self.n_jobs)
247
+
248
+ def __call__(self, meas1, meas2):
249
+ """
250
+ Apply SlicedWassersteinDistance on a single pair of signed measures and outputs the result.
251
+
252
+ Parameters:
253
+ meas1: ((n x D), (n)) tuple with numpy.array encoding the (finite points of the) first measure and their multiplicities. Must not contain essential points (i.e. with infinite coordinate).
254
+ meas2: ((m x D), (m)) tuple encoding the second measure.
255
+
256
+ Returns:
257
+ float: sliced Wasserstein distance.
258
+ """
259
+ return _sliced_wasserstein_distance(meas1, meas2, num_directions=self.num_directions, scales=self.scales)
260
+
261
+ class WassersteinDistance(BaseEstimator, TransformerMixin):
262
+ """
263
+ This is a class for computing the Wasserstein distance matrix from a list of signed measures.
264
+ """
265
+ def __init__(self, epsilon=1., ground_norm=1, n_jobs=None):
266
+ """
267
+ Constructor for the WassersteinDistance class.
268
+
269
+ Parameters:
270
+ epsilon (float): entropy regularization parameter.
271
+ ground_norm (int): norm to use for ground metric cost.
272
+ n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_signed_measure_distances` for details.
273
+ """
274
+ self.epsilon = epsilon
275
+ self.ground_norm = ground_norm
276
+ self.n_jobs = n_jobs
277
+
278
+ def fit(self, X, y=None):
279
+ """
280
+ Fit the WassersteinDistance class on a list of signed measures. The measures themselves are then stored in numpy arrays, called **measures_**.
281
+
282
+ Parameters:
283
+ X (list of tuples): input signed measures.
284
+ y (n x 1 array): signed measure labels (unused).
285
+ """
286
+ self.measures_ = X
287
+ return self
288
+
289
+ def transform(self, X):
290
+ """
291
+ Compute all Wasserstein distances between the signed measures that were stored after calling the fit() method, and a given list of (possibly different) signed measures.
292
+
293
+ Parameters:
294
+ X (list of tuples): input signed measures.
295
+
296
+ Returns:
297
+ numpy array of shape (number of measures in **measures**) x (number of measures in X): matrix of pairwise Wasserstein distances.
298
+ """
299
+ return pairwise_signed_measure_distances(X, self.measures_, metric="wasserstein", epsilon=self.epsilon, ground_norm=self.ground_norm, n_jobs=self.n_jobs)
300
+
301
+ def __call__(self, meas1, meas2):
302
+ """
303
+ Apply WassersteinDistance on a single pair of signed measures and outputs the result.
304
+
305
+ Parameters:
306
+ meas1: ((n x D), (n)) tuple with numpy.array encoding the (finite points of the) first measure and their multiplicities. Must not contain essential points (i.e. with infinite coordinate).
307
+ meas2: ((m x D), (m)) tuple encoding the second measure.
308
+
309
+ Returns:
310
+ float: Wasserstein distance.
311
+ """
312
+ return _wasserstein_distance(meas1, meas2, epsilon=self.epsilon, ground_norm=self.ground_norm)
313
+
multipers/ml/tools.py ADDED
@@ -0,0 +1,99 @@
1
+ from types import FunctionType
2
+ from typing import Iterable
3
+
4
+ import numpy as np
5
+ from joblib import Parallel, delayed
6
+ from sklearn.base import BaseEstimator, TransformerMixin
7
+
8
+ import multipers as mp
9
+ from multipers.simplex_tree_multi import SimplexTreeMulti
10
+
11
+
12
+ reduce_grid = SimplexTreeMulti._reduce_grid
13
+
14
+
15
+
16
+ def get_simplex_tree_from_delayed(x)->mp.SimplexTreeMulti:
17
+ f,args, kwargs = x
18
+ return f(*args,**kwargs)
19
+
20
+ def get_simplextree(x)->mp.SimplexTreeMulti:
21
+ if isinstance(x, mp.SimplexTreeMulti):
22
+ return x
23
+ if len(x) == 3 and isinstance(x[0],FunctionType):
24
+ return get_simplex_tree_from_delayed(x)
25
+ else:
26
+ raise TypeError("Not a valid SimplexTree !")
27
+
28
+
29
+
30
+ def filtration_grid_to_coordinates(F, return_resolution):
31
+ # computes the mesh as a coordinate list
32
+ mesh = np.meshgrid(*F)
33
+ coordinates = np.concatenate([stuff.flatten()[:,None] for stuff in mesh], axis=1)
34
+ if return_resolution:
35
+ return coordinates, tuple(len(f) for f in F)
36
+ return coordinates
37
+
38
+ def get_filtration_weights_grid(num_parameters:int=2, resolution:int|Iterable[int]=3,*,
39
+ min:float=0, max:float=20, dtype=float,
40
+ remove_homothetie:bool=True, weights=None):
41
+ """
42
+ Provides a grid of weights, for filtration rescaling.
43
+ - num parameter : the dimension of the grid tensor
44
+ - resolution : the size of each coordinate
45
+ - min : minimum weight
46
+ - max : maximum weight
47
+ - weights : custom weights (instead of linspace between min and max)
48
+ - dtype : the type of the grid values (useful for int weights)
49
+ """
50
+ from itertools import product
51
+
52
+ # if isinstance(resolution, int):
53
+ try:
54
+ float(resolution)
55
+ resolution = [resolution]*num_parameters
56
+ except:
57
+ pass
58
+ if weights is None:
59
+ weights = [np.linspace(start=min,stop=max,num=r, dtype=dtype) for r in
60
+ resolution]
61
+ try:
62
+ float(weights[0]) # same weights for each filtrations
63
+ weights = [weights] * num_parameters
64
+ except:
65
+ None
66
+ out = np.asarray(list(product(*weights)))
67
+ if remove_homothetie:
68
+ _, indices = np.unique([x / x.max() for x in out if x.max() != 0],axis=0,
69
+ return_index=True)
70
+ out = out[indices]
71
+ return list(out)
72
+
73
+
74
+
75
+
76
+ class SimplexTreeEdgeCollapser(BaseEstimator, TransformerMixin):
77
+ def __init__(self, num_collapses:int=0,
78
+ full:bool=False, max_dimension:int|None=None,
79
+ n_jobs:int=1) -> None:
80
+ super().__init__()
81
+ self.full=full
82
+ self.num_collapses=num_collapses
83
+ self.max_dimension=max_dimension
84
+ self.n_jobs=n_jobs
85
+ return
86
+ def fit(self, X:np.ndarray|list, y=None):
87
+ return self
88
+ def transform(self,X):
89
+ edges_list = Parallel(n_jobs=-1,
90
+ prefer="threads")(delayed(mp.SimplextreeMulti.get_edge_list)(x)
91
+ for x in X)
92
+ collapsed_edge_lists = Parallel(n_jobs=self.n_jobs)(
93
+ delayed(mp._collapse_edge_list)(edges,full=self.full,
94
+ num=self.num_collapses) for edges in edges_list) ##
95
+ collapsed_simplextrees = Parallel(n_jobs=-1,
96
+ prefer="threads")(delayed(mp.SimplexTreeMulti._reconstruct_from_edge_list)(collapsed_edge_lists,
97
+ swap=True,
98
+ expand_dim = self.max_dimension))
99
+ return collapsed_simplextrees
@@ -0,0 +1,29 @@
1
+ from tqdm import tqdm
2
+
3
+ def _collapse_edge_list(edges, num:int=0, full:bool=False, strong:bool=False, progress:bool=False):
4
+ """
5
+ Given an edge list defining a 1 critical 2 parameter 1 dimensional simplicial complex, simplificates this filtered simplicial complex, using filtration-domination's edge collapser.
6
+ """
7
+ from filtration_domination import remove_strongly_filtration_dominated, remove_filtration_dominated
8
+ n = len(edges)
9
+ if full:
10
+ num = 100
11
+ with tqdm(range(num), total=num, desc="Removing edges", disable=not(progress)) as I:
12
+ for i in I:
13
+ if strong:
14
+ edges = remove_strongly_filtration_dominated(edges) # nogil ?
15
+ else:
16
+ edges = remove_filtration_dominated(edges)
17
+ # Prevents doing useless collapses
18
+ if len(edges) >= n:
19
+ if full and strong:
20
+ strong = False
21
+ n = len(edges)
22
+ # n = edges.size() # len(edges)
23
+ else :
24
+ break
25
+ else:
26
+ n = len(edges)
27
+ # n = edges.size()
28
+ return edges
29
+
@@ -0,0 +1,147 @@
1
+ from libcpp.utility cimport pair
2
+ from libcpp cimport bool
3
+ from libcpp.vector cimport vector
4
+ from libcpp cimport tuple
5
+
6
+ ctypedef float value_type
7
+
8
+ ctypedef pair[vector[value_type],vector[value_type]] interval
9
+ ctypedef vector[value_type] corner_type
10
+ ctypedef vector[vector[value_type]] image_type
11
+ ctypedef int dimension_type
12
+ ctypedef vector[value_type] point_type
13
+ ctypedef pair[vector[point_type], vector[point_type]] corner_list
14
+ ctypedef value_type filtration_value_type
15
+ ctypedef pair[corner_type, corner_type] bar
16
+ ctypedef vector[bar] barcode
17
+ ctypedef pair[vector[pair[value_type, value_type]], vector[unsigned int]] plot_interface_type
18
+ ctypedef vector[value_type] multipers_bar
19
+ ctypedef vector[multipers_bar] multipers_barcode
20
+ ctypedef vector[barcode] barcodes
21
+ ctypedef vector[int] euler_curve_type
22
+ ctypedef vector[value_type] filtration_type
23
+ ctypedef vector[filtration_type] multifiltration
24
+ ctypedef vector[pair[int,pair[value_type,value_type]]] barcoded
25
+ ctypedef vector[unsigned int] boundary_type
26
+ ctypedef vector[boundary_type] boundary_matrix
27
+ ctypedef pair[pair[value_type,value_type],pair[value_type,value_type]] interval_2
28
+
29
+ ctypedef vector[Summand] summand_list_type
30
+ ctypedef vector[summand_list_type] approx_summand_type
31
+ ctypedef vector[int] simplex_type
32
+
33
+
34
+ cdef extern from "gudhi/Simplex_tree/multi_filtrations/Finitely_critical_filtrations.h" namespace "Gudhi::multiparameter::multi_filtrations":
35
+ cdef cppclass Finitely_critical_multi_filtration "Gudhi::multiparameter::multi_filtrations::Finitely_critical_multi_filtration<Gudhi::multiparameter::Simplex_tree_options_multidimensional_filtration::value_type>":
36
+ Finitely_critical_multi_filtration() except + nogil
37
+ Finitely_critical_multi_filtration(filtration_type) except +
38
+ Finitely_critical_multi_filtration& operator=(const Finitely_critical_multi_filtration&) except +
39
+ filtration_type get_vector() nogil const
40
+ int size() nogil
41
+ @staticmethod
42
+ multifiltration& to_python(vector[Finitely_critical_multi_filtration]&) nogil const
43
+ @staticmethod
44
+ vector[Finitely_critical_multi_filtration]& from_python(multifiltration&) nogil const
45
+ vector[value_type]& _convert_back() nogil
46
+ filtration_type __filtration_type__(self):
47
+ return self.get_vector()
48
+
49
+ ctypedef Finitely_critical_multi_filtration cfiltration_type
50
+ ctypedef vector[cfiltration_type] cmultifiltration_type
51
+
52
+ cdef extern from "gudhi/Simplex_tree/multi_filtrations/Box.h" namespace "Gudhi::multiparameter::mma":
53
+ cdef cppclass Box[value_type]:
54
+ Box() except +
55
+ Box(const corner_type&, const corner_type&) nogil
56
+ Box(const pair[corner_type, corner_type]&) nogil
57
+ void inflate(value_type) nogil
58
+ const Finitely_critical_multi_filtration& get_bottom_corner() nogil
59
+ const Finitely_critical_multi_filtration& get_upper_corner() nogil
60
+ bool contains(corner_type&) nogil
61
+ pair[Finitely_critical_multi_filtration, Finitely_critical_multi_filtration] get_pair() nogil
62
+
63
+ cdef extern from "gudhi/Simplex_tree/multi_filtrations/Line.h" namespace "Gudhi::multiparameter::mma":
64
+ cdef cppclass Line[value_type]:
65
+ Line() except + nogil
66
+ Line(point_type&) except + nogil
67
+ Line(point_type&, point_type&) except + nogil
68
+
69
+ cdef extern from "multiparameter_module_approximation/approximation.h" namespace "Gudhi::multiparameter::mma":
70
+ cdef cppclass Summand:
71
+ Summand() except +
72
+ Summand(vector[Finitely_critical_multi_filtration]&, vector[Finitely_critical_multi_filtration]&, int) except + nogil
73
+ value_type get_interleaving() nogil
74
+ value_type get_local_weight(const corner_type&, const value_type) nogil
75
+ void add_bar(value_type, value_type, const corner_type&, corner_type&, corner_type&, const bool, const interval&) nogil
76
+ bool is_empty() nogil
77
+ cmultifiltration_type& get_birth_list() nogil
78
+ cmultifiltration_type& get_death_list() nogil
79
+ void complete_birth(const value_type) nogil
80
+ void complete_death(const value_type) nogil
81
+ dimension_type get_dimension() nogil const
82
+ void set_dimension(int) nogil
83
+ bool contains(const corner_type&) nogil const
84
+ Box[value_type] get_bounds() nogil const
85
+ void rescale(const vector[value_type]&) nogil
86
+
87
+
88
+
89
+
90
+
91
+ cdef extern from "multiparameter_module_approximation/utilities.h" namespace "Gudhi::multiparameter::mma":
92
+ cdef cppclass MultiDiagram_point:
93
+ MultiDiagram_point() except + nogil
94
+ MultiDiagram_point(dimension_type , corner_type , corner_type ) except + nogil
95
+ filtration_type get_birth() nogil const
96
+ filtration_type get_death() nogil const
97
+ dimension_type get_dimension() nogil const
98
+
99
+ cdef extern from "multiparameter_module_approximation/utilities.h" namespace "Gudhi::multiparameter::mma":
100
+ cdef cppclass MultiDiagram:
101
+ MultiDiagram() except + nogil
102
+ barcode get_points(const dimension_type) const
103
+ multipers_barcode to_multipers(const dimension_type) nogil const
104
+ vector[MultiDiagram_point].const_iterator begin()
105
+ vector[MultiDiagram_point].const_iterator end()
106
+ unsigned int size() const
107
+ MultiDiagram_point& at(unsigned int) nogil
108
+
109
+ cdef extern from "multiparameter_module_approximation/utilities.h" namespace "Gudhi::multiparameter::mma":
110
+ cdef cppclass MultiDiagrams:
111
+ MultiDiagrams() except + nogil
112
+ vector[vector[vector[value_type]]] to_multipers() nogil const
113
+ MultiDiagram& at(const unsigned int) nogil
114
+ unsigned int size() nogil const
115
+ vector[MultiDiagram].const_iterator begin()
116
+ vector[MultiDiagram].const_iterator end()
117
+ plot_interface_type _for_python_plot(dimension_type, value_type) nogil
118
+ barcodes get_points() nogil
119
+
120
+ cdef extern from "multiparameter_module_approximation/approximation.h" namespace "Gudhi::multiparameter::mma":
121
+ cdef cppclass Module:
122
+ Module() except + nogil
123
+ void resize(unsigned int) nogil
124
+ Summand& at(unsigned int) nogil
125
+ vector[Summand].iterator begin()
126
+ vector[Summand].iterator end()
127
+ void clean(const bool) nogil
128
+ void fill(const value_type) nogil
129
+ # vector[image_type] get_vectorization(const value_type,const value_type, unsigned int,unsigned int,const Box&)
130
+ # image_type get_vectorization_in_dimension(const int,const value_type,unsigned int,unsigned int,const Box&)
131
+ void add_summand(Summand) nogil
132
+ unsigned int size() const
133
+ Box[value_type] get_box() const
134
+ Box[value_type] get_bounds() nogil const
135
+ void set_box(Box[value_type] &box) nogil
136
+ int get_dimension() const
137
+ vector[corner_list] get_corners_of_dimension(unsigned int) nogil
138
+ image_type get_vectorization_in_dimension(const dimension_type, const value_type, const value_type, const bool, Box[value_type]&, unsigned int, unsigned int) nogil
139
+ vector[image_type] get_vectorization(const value_type, const value_type, const bool, Box[value_type], unsigned int, unsigned int) nogil
140
+ MultiDiagram get_barcode(Line[value_type]&, const dimension_type, const bool) nogil
141
+ MultiDiagrams get_barcodes(const vector[Finitely_critical_multi_filtration]& , const dimension_type, const bool ) nogil
142
+ image_type get_landscape(const dimension_type,const unsigned int,Box[value_type]&,const vector[unsigned int]&) nogil
143
+ vector[image_type] get_landscapes(const dimension_type,const vector[unsigned int],Box[value_type]&,const vector[unsigned int]&) nogil
144
+ euler_curve_type euler_curve(const vector[Finitely_critical_multi_filtration]&) nogil
145
+ void rescale(const vector[value_type]&, int) nogil
146
+ void translate(const vector[value_type]&, int) nogil
147
+ vector[vector[value_type]] compute_pixels(const vector[vector[value_type]], vector[int], Box[value_type], value_type, value_type, bool,int) nogil