multipers 2.0.0__cp310-cp310-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of multipers might be problematic. Click here for more details.

Files changed (78) hide show
  1. multipers/.dylibs/libc++.1.0.dylib +0 -0
  2. multipers/.dylibs/libtbb.12.12.dylib +0 -0
  3. multipers/.dylibs/libtbbmalloc.2.12.dylib +0 -0
  4. multipers/__init__.py +11 -0
  5. multipers/_signed_measure_meta.py +268 -0
  6. multipers/_slicer_meta.py +171 -0
  7. multipers/data/MOL2.py +350 -0
  8. multipers/data/UCR.py +18 -0
  9. multipers/data/__init__.py +1 -0
  10. multipers/data/graphs.py +466 -0
  11. multipers/data/immuno_regions.py +27 -0
  12. multipers/data/minimal_presentation_to_st_bf.py +0 -0
  13. multipers/data/pytorch2simplextree.py +91 -0
  14. multipers/data/shape3d.py +101 -0
  15. multipers/data/synthetic.py +68 -0
  16. multipers/distances.py +198 -0
  17. multipers/euler_characteristic.pyx +132 -0
  18. multipers/filtration_conversions.pxd +229 -0
  19. multipers/filtrations.pxd +225 -0
  20. multipers/function_rips.cpython-310-darwin.so +0 -0
  21. multipers/function_rips.pyx +105 -0
  22. multipers/grids.cpython-310-darwin.so +0 -0
  23. multipers/grids.pyx +281 -0
  24. multipers/hilbert_function.pyi +46 -0
  25. multipers/hilbert_function.pyx +153 -0
  26. multipers/io.cpython-310-darwin.so +0 -0
  27. multipers/io.pyx +571 -0
  28. multipers/ml/__init__.py +0 -0
  29. multipers/ml/accuracies.py +90 -0
  30. multipers/ml/convolutions.py +532 -0
  31. multipers/ml/invariants_with_persistable.py +79 -0
  32. multipers/ml/kernels.py +176 -0
  33. multipers/ml/mma.py +659 -0
  34. multipers/ml/one.py +472 -0
  35. multipers/ml/point_clouds.py +238 -0
  36. multipers/ml/signed_betti.py +50 -0
  37. multipers/ml/signed_measures.py +1542 -0
  38. multipers/ml/sliced_wasserstein.py +461 -0
  39. multipers/ml/tools.py +113 -0
  40. multipers/mma_structures.cpython-310-darwin.so +0 -0
  41. multipers/mma_structures.pxd +127 -0
  42. multipers/mma_structures.pyx +2433 -0
  43. multipers/multiparameter_edge_collapse.py +41 -0
  44. multipers/multiparameter_module_approximation.cpython-310-darwin.so +0 -0
  45. multipers/multiparameter_module_approximation.pyx +211 -0
  46. multipers/pickle.py +53 -0
  47. multipers/plots.py +326 -0
  48. multipers/point_measure_integration.cpython-310-darwin.so +0 -0
  49. multipers/point_measure_integration.pyx +139 -0
  50. multipers/rank_invariant.cpython-310-darwin.so +0 -0
  51. multipers/rank_invariant.pyx +229 -0
  52. multipers/simplex_tree_multi.cpython-310-darwin.so +0 -0
  53. multipers/simplex_tree_multi.pxd +129 -0
  54. multipers/simplex_tree_multi.pyi +715 -0
  55. multipers/simplex_tree_multi.pyx +4655 -0
  56. multipers/slicer.cpython-310-darwin.so +0 -0
  57. multipers/slicer.pxd +781 -0
  58. multipers/slicer.pyx +3393 -0
  59. multipers/tensor.pxd +13 -0
  60. multipers/test.pyx +44 -0
  61. multipers/tests/__init__.py +40 -0
  62. multipers/tests/old_test_rank_invariant.py +91 -0
  63. multipers/tests/test_diff_helper.py +74 -0
  64. multipers/tests/test_hilbert_function.py +82 -0
  65. multipers/tests/test_mma.py +51 -0
  66. multipers/tests/test_point_clouds.py +59 -0
  67. multipers/tests/test_python-cpp_conversion.py +82 -0
  68. multipers/tests/test_signed_betti.py +181 -0
  69. multipers/tests/test_simplextreemulti.py +98 -0
  70. multipers/tests/test_slicer.py +63 -0
  71. multipers/torch/__init__.py +1 -0
  72. multipers/torch/diff_grids.py +217 -0
  73. multipers/torch/rips_density.py +257 -0
  74. multipers-2.0.0.dist-info/LICENSE +21 -0
  75. multipers-2.0.0.dist-info/METADATA +29 -0
  76. multipers-2.0.0.dist-info/RECORD +78 -0
  77. multipers-2.0.0.dist-info/WHEEL +5 -0
  78. multipers-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,238 @@
1
+ from typing import Literal, Optional
2
+
3
+ import gudhi as gd
4
+ import numpy as np
5
+ from joblib import Parallel, delayed
6
+ from sklearn.base import BaseEstimator, TransformerMixin
7
+ from sklearn.metrics import pairwise_distances
8
+ from tqdm import tqdm
9
+
10
+ import multipers as mp
11
+ import multipers.slicer as mps
12
+ from multipers.ml.convolutions import DTM, KDE
13
+
14
+
15
+ def _throw_nofit(any):
16
+ raise Exception("Fit first")
17
+
18
+
19
+ class PointCloud2SimplexTree(BaseEstimator, TransformerMixin):
20
+ def __init__(
21
+ self,
22
+ bandwidths=[],
23
+ masses=[],
24
+ threshold: float = np.inf,
25
+ complex: Literal["alpha", "rips", "delaunay"] = "rips",
26
+ sparse: float | None = None,
27
+ num_collapses: int | Literal["full"] = "full",
28
+ kernel: str = "gaussian",
29
+ log_density: bool = True,
30
+ expand_dim: int = 1,
31
+ progress: bool = False,
32
+ n_jobs: Optional[int] = None,
33
+ fit_fraction: float = 1,
34
+ verbose: bool = False,
35
+ safe_conversion: bool = False,
36
+ ) -> None:
37
+ """
38
+ (Rips or Alpha or Delaunay) + (Density Estimation or DTM) 1-critical 2-filtration.
39
+
40
+ Parameters
41
+ ----------
42
+ - bandwidth : real : The kernel density estimation bandwidth, or the DTM mass. If negative, it replaced by abs(bandwidth)*(radius of the dataset)
43
+ - threshold : real, max edge lenfth of the rips or max alpha square of the alpha
44
+ - sparse : real, sparse rips (c.f. rips doc) WARNING : ONLY FOR RIPS
45
+ - num_collapse : int, Number of edge collapses applied to the simplextrees, WARNING : ONLY FOR RIPS
46
+ - expand_dim : int, expand the rips complex to this dimension. WARNING : ONLY FOR RIPS
47
+ - kernel : the kernel used for density estimation. Available ones are, e.g., "dtm", "gaussian", "exponential".
48
+ - progress : bool, shows the calculus status
49
+ - n_jobs : number of processes
50
+ - fit_fraction : real, the fraction of data on which to fit
51
+ - verbose : bool, Shows more information if true.
52
+
53
+ Output
54
+ ------
55
+ A list of SimplexTreeMulti whose first parameter is a rips and the second is the codensity.
56
+ """
57
+ super().__init__()
58
+ self.bandwidths = bandwidths
59
+ self.masses = masses
60
+ self.num_collapses = num_collapses
61
+ self.kernel = kernel
62
+ self.log_density = log_density
63
+ self.progress = progress
64
+ self._bandwidths = np.empty((0,))
65
+ self._threshold = np.inf
66
+ self.n_jobs = n_jobs
67
+ self._scale = np.empty((0,))
68
+ self.fit_fraction = fit_fraction
69
+ self.expand_dim = expand_dim
70
+ self.verbose = verbose
71
+ self.complex = complex
72
+ self.threshold = threshold
73
+ self.sparse = sparse
74
+ self._get_sts = _throw_nofit
75
+ self.safe_conversion = safe_conversion
76
+ return
77
+
78
+ def _get_distance_quantiles(self, X, qs):
79
+ if len(qs) == 0:
80
+ self._scale = []
81
+ return []
82
+ if self.progress:
83
+ print("Estimating scale...", flush=True, end="")
84
+ indices = np.random.choice(
85
+ len(X), min(len(X), int(self.fit_fraction * len(X)) + 1), replace=False
86
+ )
87
+ # diameter = np.asarray([distance_matrix(x,x).max() for x in (X[i] for i in indices)]).max()
88
+ diameter = np.max(
89
+ [pairwise_distances(X=x).max() for x in (X[i] for i in indices)]
90
+ )
91
+ self._scale = diameter * np.asarray(qs)
92
+ if self.threshold > 0:
93
+ self._scale[self._scale > self.threshold] = self.threshold
94
+ if self.progress:
95
+ print(f"Done. Chosen scales {qs} are {self._scale}", flush=True)
96
+ return self._scale
97
+
98
+ def _get_sts_rips(self, x):
99
+ st_init = gd.RipsComplex(
100
+ points=x, max_edge_length=self._threshold, sparse=self.sparse
101
+ ).create_simplex_tree(max_dimension=1)
102
+ st_init = mp.simplex_tree_multi.SimplexTreeMulti(
103
+ st_init, num_parameters=2, safe_conversion=self.safe_conversion
104
+ )
105
+ codensities = self._get_codensities(x_fit=x, x_sample=x)
106
+ num_axes = codensities.shape[0]
107
+ sts = [st_init] + [st_init.copy() for _ in range(num_axes - 1)]
108
+ # no need to multithread here, most operations are memory
109
+ for codensity, st_copy in zip(codensities, sts):
110
+ # RIPS has contigus vertices, so vertices are ordered.
111
+ st_copy.fill_lowerstar(codensity, parameter=1)
112
+
113
+ def collapse_edges(st):
114
+ if self.verbose:
115
+ print("Num simplices :", st.num_simplices)
116
+ if isinstance(self.num_collapses, int):
117
+ st.collapse_edges(num=self.num_collapses)
118
+ if self.verbose:
119
+ print(", after collapse :", st.num_simplices, end="")
120
+ elif self.num_collapses == "full":
121
+ st.collapse_edges(full=True)
122
+ if self.verbose:
123
+ print(", after collapse :", st.num_simplices, end="")
124
+ if self.expand_dim > 1:
125
+ st.expansion(self.expand_dim)
126
+ if self.verbose:
127
+ print(", after expansion :", st.num_simplices, end="")
128
+ if self.verbose:
129
+ print("")
130
+ return st
131
+
132
+ return Parallel(backend="threading", n_jobs=self.n_jobs)(
133
+ delayed(collapse_edges)(st) for st in sts
134
+ )
135
+
136
+ def _get_sts_alpha(self, x: np.ndarray, return_alpha=False):
137
+ alpha_complex = gd.AlphaComplex(points=x)
138
+ st = alpha_complex.create_simplex_tree(max_alpha_square=self._threshold**2)
139
+ vertices = np.array([i for (i,), _ in st.get_skeleton(0)])
140
+ new_points = np.asarray(
141
+ [alpha_complex.get_point(i) for i in vertices]
142
+ ) # Seems to be unsafe for some reason
143
+ # new_points = x
144
+ st = mp.simplex_tree_multi.SimplexTreeMulti(
145
+ st, num_parameters=2, safe_conversion=self.safe_conversion
146
+ )
147
+ codensities = self._get_codensities(x_fit=x, x_sample=new_points)
148
+ num_axes = codensities.shape[0]
149
+ sts = [st] + [st.copy() for _ in range(num_axes - 1)]
150
+ # no need to multithread here, most operations are memory
151
+ max_vertices = vertices.max() + 2 # +1 to be safe
152
+ for codensity, st_copy in zip(codensities, sts):
153
+ alligned_codensity = np.array([np.nan] * max_vertices)
154
+ alligned_codensity[vertices] = codensity
155
+ # alligned_codensity = np.array([codensity[i] if i in vertices else np.nan for i in range(max_vertices)])
156
+ st_copy.fill_lowerstar(alligned_codensity, parameter=1)
157
+ if return_alpha:
158
+ return alpha_complex, sts
159
+ return sts
160
+
161
+ def _get_sts_delaunay(self, x: np.ndarray):
162
+ codensities = self._get_codensities(x_fit=x, x_sample=x)
163
+
164
+ def get_st(c):
165
+ slicer = mps.from_function_delaunay(
166
+ x, c, verbose=self.verbose, clear=not self.verbose
167
+ )
168
+ st = mps.to_simplextree(slicer)
169
+ return st
170
+
171
+ sts = Parallel(backend="threading", n_jobs=self.n_jobs)(
172
+ delayed(get_st)(c) for c in codensities
173
+ )
174
+ return sts
175
+
176
+ def _get_codensities(self, x_fit, x_sample):
177
+ x_fit = np.asarray(x_fit, dtype=np.float32)
178
+ x_sample = np.asarray(x_sample, dtype=np.float32)
179
+ codensities_kde = np.asarray(
180
+ [
181
+ -KDE(
182
+ bandwidth=bandwidth, kernel=self.kernel, return_log=self.log_density
183
+ )
184
+ .fit(x_fit)
185
+ .score_samples(x_sample)
186
+ for bandwidth in self._bandwidths
187
+ ],
188
+ ).reshape(len(self._bandwidths), len(x_sample))
189
+ codensities_dtm = (
190
+ DTM(masses=self.masses)
191
+ .fit(x_fit)
192
+ .score_samples(x_sample)
193
+ .reshape(len(self.masses), len(x_sample))
194
+ )
195
+ return np.concatenate([codensities_kde, codensities_dtm])
196
+
197
+ def fit(self, X: np.ndarray | list, y=None):
198
+ # self.bandwidth = "silverman" ## not good, as is can make bandwidth not constant
199
+ match self.complex:
200
+ case "rips":
201
+ self._get_sts = self._get_sts_rips
202
+ case "alpha":
203
+ self._get_sts = self._get_sts_alpha
204
+ case "delaunay":
205
+ self._get_sts = self._get_sts_delaunay
206
+ case _:
207
+ raise ValueError(
208
+ f"Invalid complex \
209
+ {self.complex}. Possible choises are rips or alpha."
210
+ )
211
+
212
+ qs = [
213
+ q for q in [*-np.asarray(self.bandwidths), -self.threshold] if 0 <= q <= 1
214
+ ]
215
+ self._get_distance_quantiles(X, qs=qs)
216
+ self._bandwidths = np.array(self.bandwidths)
217
+ count = 0
218
+ for i in range(len(self._bandwidths)):
219
+ if self.bandwidths[i] < 0:
220
+ self._bandwidths[i] = self._scale[count]
221
+ count += 1
222
+ self._threshold = self.threshold if self.threshold > 0 else self._scale[-1]
223
+
224
+ # PRECOMPILE FIRST
225
+ self._get_codensities(X[0][:4], X[0][:4])
226
+ return self
227
+
228
+ def transform(self, X):
229
+ # precompile first
230
+ # self._get_sts(X[0][:5])
231
+ self._get_codensities(X[0][:4], X[0][:4])
232
+ with tqdm(
233
+ X, desc="Filling simplextrees", disable=not self.progress, total=len(X)
234
+ ) as data:
235
+ stss = Parallel(backend="threading", n_jobs=self.n_jobs)(
236
+ delayed(self._get_sts)(x) for x in data
237
+ )
238
+ return stss
@@ -0,0 +1,50 @@
1
+ ## This code was written by Luis Scoccola
2
+ import numpy as np
3
+ from scipy.sparse import coo_array
4
+ from scipy.ndimage import convolve1d
5
+
6
+
7
+ def signed_betti(hilbert_function, threshold=False, sparse=False):
8
+ n = len(hilbert_function.shape)
9
+ res = np.copy(hilbert_function)
10
+ # zero out the "end" of the Hilbert function
11
+ if threshold:
12
+ for dimension in range(n):
13
+ slicer = tuple([slice(None) if i != dimension else -1 for i in range(n)])
14
+ res[slicer] = 0
15
+ weights = np.array([0, 1, -1], dtype=int)
16
+ for i in range(n):
17
+ res = convolve1d(res, weights, axis=i, mode="constant", cval=0)
18
+ if sparse:
19
+ return coo_array(res)
20
+ else:
21
+ return res
22
+
23
+ def rank_decomposition_by_rectangles(rank_invariant, threshold=False):
24
+ # takes as input the rank invariant of an n-parameter persistence module
25
+ # M : [0, ..., s_1 - 1] x ... x [0, ..., s_n - 1] ---> Vec
26
+ # on a grid with dimensions of sizes s_1, ..., s_n. The input is assumed to be
27
+ # given as a tensor of dimensions (s_1, ..., s_n, s_1, ..., s_n), so that,
28
+ # at index [i_1, ..., i_n, j_1, ..., j_n] we have the rank of the structure
29
+ # map M(i) -> M(j), where i = (i_1, ..., i_n) and j = (j_1, ..., j_n), and
30
+ # i <= j, meaning that i_1 <= j_1, ..., i_n <= j_n.
31
+ # NOTE :
32
+ # - About the input, we assume that, if not( i <= j ), then at index
33
+ # [i_1, ..., i_n, j_1, ..., j_n] we have a zero.
34
+ # - Similarly, the output at index [i_1, ..., i_n, j_1, ..., j_n] only
35
+ # makes sense when i <= j. For indices where not( i <= j ) the output
36
+ # may take arbitrary values and they should be ignored.
37
+ n = len(rank_invariant.shape) // 2
38
+ if threshold:
39
+ rank_invariant = rank_invariant.copy()
40
+ # print(rank_invariant)
41
+ # zero out the "end"
42
+ for dimension in range(n):
43
+ slicer = tuple(
44
+ [slice(None) for _ in range(n)]
45
+ + [slice(None) if i != dimension else -1 for i in range(n)]
46
+ )
47
+ rank_invariant[slicer] = 0
48
+ # print(rank_invariant)
49
+ to_flip = tuple(range(n, 2 * n))
50
+ return np.flip(signed_betti(np.flip(rank_invariant, to_flip)), to_flip)