multipers 1.0__cp311-cp311-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of multipers might be problematic. Click here for more details.

Files changed (56) hide show
  1. multipers/__init__.py +4 -0
  2. multipers/_old_rank_invariant.pyx +328 -0
  3. multipers/_signed_measure_meta.py +72 -0
  4. multipers/data/MOL2.py +350 -0
  5. multipers/data/UCR.py +18 -0
  6. multipers/data/__init__.py +1 -0
  7. multipers/data/graphs.py +272 -0
  8. multipers/data/immuno_regions.py +27 -0
  9. multipers/data/minimal_presentation_to_st_bf.py +0 -0
  10. multipers/data/pytorch2simplextree.py +91 -0
  11. multipers/data/shape3d.py +101 -0
  12. multipers/data/synthetic.py +68 -0
  13. multipers/distances.py +100 -0
  14. multipers/euler_characteristic.cpython-311-x86_64-linux-gnu.so +0 -0
  15. multipers/euler_characteristic.pyx +132 -0
  16. multipers/function_rips.cpython-311-x86_64-linux-gnu.so +0 -0
  17. multipers/function_rips.pyx +101 -0
  18. multipers/hilbert_function.cpython-311-x86_64-linux-gnu.so +0 -0
  19. multipers/hilbert_function.pyi +46 -0
  20. multipers/hilbert_function.pyx +145 -0
  21. multipers/ml/__init__.py +0 -0
  22. multipers/ml/accuracies.py +61 -0
  23. multipers/ml/convolutions.py +384 -0
  24. multipers/ml/invariants_with_persistable.py +79 -0
  25. multipers/ml/kernels.py +128 -0
  26. multipers/ml/mma.py +422 -0
  27. multipers/ml/one.py +472 -0
  28. multipers/ml/point_clouds.py +191 -0
  29. multipers/ml/signed_betti.py +50 -0
  30. multipers/ml/signed_measures.py +1046 -0
  31. multipers/ml/sliced_wasserstein.py +313 -0
  32. multipers/ml/tools.py +99 -0
  33. multipers/multiparameter_edge_collapse.py +29 -0
  34. multipers/multiparameter_module_approximation.cpython-311-x86_64-linux-gnu.so +0 -0
  35. multipers/multiparameter_module_approximation.pxd +147 -0
  36. multipers/multiparameter_module_approximation.pyi +439 -0
  37. multipers/multiparameter_module_approximation.pyx +931 -0
  38. multipers/pickle.py +53 -0
  39. multipers/plots.py +207 -0
  40. multipers/point_measure_integration.cpython-311-x86_64-linux-gnu.so +0 -0
  41. multipers/point_measure_integration.pyx +59 -0
  42. multipers/rank_invariant.cpython-311-x86_64-linux-gnu.so +0 -0
  43. multipers/rank_invariant.pyx +154 -0
  44. multipers/simplex_tree_multi.cpython-311-x86_64-linux-gnu.so +0 -0
  45. multipers/simplex_tree_multi.pxd +121 -0
  46. multipers/simplex_tree_multi.pyi +715 -0
  47. multipers/simplex_tree_multi.pyx +1284 -0
  48. multipers/tensor.pxd +13 -0
  49. multipers/test.pyx +44 -0
  50. multipers-1.0.dist-info/LICENSE +21 -0
  51. multipers-1.0.dist-info/METADATA +9 -0
  52. multipers-1.0.dist-info/RECORD +56 -0
  53. multipers-1.0.dist-info/WHEEL +5 -0
  54. multipers-1.0.dist-info/top_level.txt +1 -0
  55. multipers.libs/libtbb-5d1cde94.so.12.10 +0 -0
  56. multipers.libs/libtbbmalloc-5e0a3d4c.so.2.10 +0 -0
@@ -0,0 +1,145 @@
1
+ # cimport multipers.tensor as mt
2
+ from libc.stdint cimport intptr_t, uint16_t, uint32_t, int32_t
3
+ from libcpp.vector cimport vector
4
+ from libcpp cimport bool, int, float
5
+ from libcpp.utility cimport pair
6
+ from typing import Optional,Iterable,Callable
7
+
8
+ import numpy as np
9
+ cimport numpy as cnp
10
+ cnp.import_array()
11
+
12
+ ctypedef float value_type
13
+ python_value_type=np.float32
14
+
15
+ ctypedef int32_t indices_type # uint fails for some reason
16
+ python_indices_type=np.int32
17
+
18
+ ctypedef int32_t tensor_dtype
19
+ python_tensor_dtype = np.int32
20
+
21
+
22
+ ctypedef pair[vector[vector[indices_type]], vector[tensor_dtype]] signed_measure_type
23
+
24
+ cdef extern from "multi_parameter_rank_invariant/hilbert_function.h" namespace "Gudhi::multiparameter::hilbert_function":
25
+ void get_hilbert_surface_python(const intptr_t, tensor_dtype* , const vector[indices_type], const vector[indices_type], bool, bool, indices_type, bool) except + nogil
26
+ signed_measure_type get_hilbert_signed_measure(const intptr_t, tensor_dtype* , const vector[indices_type], const vector[indices_type], bool, indices_type, bool, bool) except + nogil
27
+
28
+
29
+ def hilbert_signed_measure(simplextree, vector[indices_type] degrees, mass_default=None, plot=False, indices_type n_jobs=0, bool verbose=False, bool expand_collapse=False):
30
+ """
31
+ Computes the signed measures given by the decomposition of the hilbert function.
32
+
33
+ Input
34
+ -----
35
+ - simplextree:SimplexTreeMulti, the multifiltered simplicial complex
36
+ - degrees:array-like of ints, the degrees to compute
37
+ - mass_default: Either None, or 'auto' or 'inf', or array-like of floats. Where to put the default mass to get a zero-mass measure.
38
+ - plot:bool, plots the computed measures if true.
39
+ - n_jobs:int, number of jobs. Defaults to #cpu, but when doing parallel computations of signed measures, we recommend setting this to 1.
40
+ - verbose:bool, prints c++ logs.
41
+
42
+ Output
43
+ ------
44
+ `[signed_measure_of_degree for degree in degrees]`
45
+ with `signed_measure_of_degree` of the form `(dirac location, dirac weights)`.
46
+ """
47
+ assert simplextree._is_squeezed > 0, "Squeeze grid first."
48
+ cdef bool zero_pad = mass_default is not None
49
+ grid_conversion = [np.asarray(f) for f in simplextree.filtration_grid]
50
+ # assert simplextree.num_parameters == 2
51
+ grid_shape = np.array([len(f) for f in grid_conversion])
52
+
53
+ # match mass_default: ## Cython bug
54
+ # case None:
55
+ # pass
56
+ # case "inf":
57
+ # mass_default = np.array([np.inf]*simplextree.num_parameters)
58
+ # case "auto":
59
+ # mass_default = np.array([1.1*np.max(f) - 0.1*np.min(f) for f in grid_conversion])
60
+ # case _:
61
+ # mass_default = np.asarray(mass_default)
62
+ # assert mass_default.ndim == 1 and mass_default.shape[0] == simplextree.num_parameters
63
+ if mass_default is None:
64
+ mass_default = mass_default
65
+ else:
66
+ mass_default = np.asarray(mass_default)
67
+ assert mass_default.ndim == 1 and mass_default.shape[0] == simplextree.num_parameters
68
+ if zero_pad:
69
+ for i, _ in enumerate(grid_shape):
70
+ grid_shape[i] += 1 # adds a 0
71
+ for i,f in enumerate(grid_conversion):
72
+ grid_conversion[i] = np.concatenate([f, [mass_default[i]]])
73
+ assert len(grid_shape) == simplextree.num_parameters, "Grid shape size has to be the number of parameters."
74
+ grid_shape_with_degree = np.asarray(np.concatenate([[len(degrees)], grid_shape]), dtype=python_indices_type)
75
+ container_array = np.ascontiguousarray(np.zeros(grid_shape_with_degree, dtype=python_tensor_dtype).flatten())
76
+ assert len(container_array) < np.iinfo(np.uint32).max, "Too large container. Raise an issue on github if you encounter this issue. (Due to tensor's operator[])"
77
+ cdef intptr_t simplextree_ptr = simplextree.thisptr
78
+ cdef vector[indices_type] c_grid_shape = grid_shape_with_degree
79
+ cdef tensor_dtype[::1] container = container_array
80
+ cdef tensor_dtype* container_ptr = &container[0]
81
+ cdef signed_measure_type out
82
+ with nogil:
83
+ out = get_hilbert_signed_measure(simplextree_ptr, container_ptr, c_grid_shape, degrees, zero_pad, n_jobs, verbose, expand_collapse)
84
+ pts, weights = np.asarray(out.first, dtype=int).reshape(-1, simplextree.num_parameters+1), np.asarray(out.second, dtype=int)
85
+ # return pts, weights
86
+ degree_indices = [np.argwhere(pts[:,0] == degree_index).flatten() for degree_index, degree in enumerate(degrees)] ## TODO : maybe optimize
87
+ sms = [(pts[id,1:],weights[id]) for id in degree_indices]
88
+ for degree_index,(pts,weights) in enumerate(sms):
89
+ coords = np.empty(shape=pts.shape, dtype=float)
90
+ for i in range(coords.shape[1]):
91
+ coords[:,i] = grid_conversion[i][pts[:,i]]
92
+ sms[degree_index]=(coords, weights)
93
+ if plot:
94
+ from multipers.plots import plot_signed_measures
95
+ plot_signed_measures(sms)
96
+ return sms
97
+
98
+
99
+ def hilbert_surface(simplextree, vector[indices_type] degrees, mass_default=None, bool mobius_inversion=False, bool plot=False, indices_type n_jobs=0, bool expand_collapse=False):
100
+ """
101
+ Computes the hilbert function.
102
+
103
+ Input
104
+ -----
105
+ - simplextree:SimplexTreeMulti, the multifiltered simplicial complex
106
+ - degrees:array-like of ints, the degrees to compute
107
+ - mass_default: Either None, or 'auto' or 'inf', or array-like of floats. Where to put the default mass to get a zero-mass measure.
108
+ - plot:bool, plots the computed measures if true.
109
+ - n_jobs:int, number of jobs. Defaults to #cpu, but when doing parallel computations of signed measures, we recommend setting this to 1.
110
+ - verbose:bool, prints c++ logs.
111
+
112
+ Output
113
+ ------
114
+ Integer array of the form `(num_degrees, num_filtration_values_of_parameter 1, ..., num_filtration_values_of_parameter n)`
115
+ """
116
+ assert simplextree._is_squeezed > 0, "Squeeze grid first."
117
+ cdef bool zero_pad = mass_default is not None
118
+ grid_conversion = [np.asarray(f) for f in simplextree.filtration_grid]
119
+ grid_shape = np.array([len(f) for f in grid_conversion])
120
+ if mass_default is None:
121
+ mass_default = mass_default
122
+ else:
123
+ mass_default = np.asarray(mass_default)
124
+ assert mass_default.ndim == 1 and mass_default.shape[0] == simplextree.num_parameters
125
+ if zero_pad:
126
+ for i, _ in enumerate(grid_shape):
127
+ grid_shape[i] += 1 # adds a 0
128
+ for i,f in enumerate(grid_conversion):
129
+ grid_conversion[i] = np.concatenate([f, [mass_default[i]]])
130
+ assert len(grid_shape) == simplextree.num_parameters, "Grid shape size has to be the number of parameters."
131
+ grid_shape_with_degree = np.asarray(np.concatenate([[len(degrees)], grid_shape]), dtype=python_indices_type)
132
+ container_array = np.ascontiguousarray(np.zeros(grid_shape_with_degree, dtype=python_tensor_dtype).flatten())
133
+ assert len(container_array) < np.iinfo(np.uint32).max, "Too large container. Raise an issue on github if you encounter this issue. (Due to tensor's operator[])"
134
+ cdef intptr_t simplextree_ptr = simplextree.thisptr
135
+ cdef vector[indices_type] c_grid_shape = grid_shape_with_degree
136
+ cdef tensor_dtype[::1] container = container_array
137
+ cdef tensor_dtype* container_ptr = &container[0]
138
+ with nogil:
139
+ get_hilbert_surface_python(simplextree_ptr, container_ptr, c_grid_shape, degrees, mobius_inversion, zero_pad, n_jobs, expand_collapse)
140
+ out = (grid_conversion, container_array.reshape(grid_shape_with_degree))
141
+ if plot:
142
+ from multipers.plots import plot_surfaces
143
+ plot_surfaces(out)
144
+ return out
145
+
File without changes
@@ -0,0 +1,61 @@
1
+ import pandas as pd
2
+ from warnings import warn
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ from os.path import exists
6
+
7
+
8
+ def accuracy_to_csv(X,Y,cl, k:float=10, dataset:str = "", shuffle=True, verbose:bool=True, **more_columns):
9
+ assert k > 0, "k is either the number of kfold > 1 or the test size > 0."
10
+ if k>1:
11
+ k = int(k)
12
+ from sklearn.model_selection import StratifiedKFold as KFold
13
+ kfold = KFold(k, shuffle=shuffle).split(X,Y)
14
+ accuracies = np.zeros(k)
15
+ for i,(train_idx, test_idx) in enumerate(tqdm(kfold, total=k, desc="Computing kfold")):
16
+ xtrain = [X[i] for i in train_idx]
17
+ ytrain = [Y[i] for i in train_idx]
18
+ cl.fit(xtrain, ytrain)
19
+ xtest = [X[i] for i in test_idx]
20
+ ytest = [Y[i] for i in test_idx]
21
+ accuracies[i] = cl.score(xtest, ytest)
22
+ if verbose:
23
+ print(f"step {i+1}, {dataset} : {accuracies[i]}", flush=True)
24
+ try:
25
+ print("Best classification parameters : ", cl.best_params_)
26
+ except:
27
+ None
28
+
29
+ print(f"Accuracy {dataset} : {np.mean(accuracies).round(decimals=3)}±{np.std(accuracies).round(decimals=3)} ")
30
+ elif k > 0:
31
+ from sklearn.model_selection import train_test_split
32
+ print("Computing accuracy, with train test split", flush=True)
33
+ xtrain, xtest, ytrain, ytest = train_test_split(X, Y, shuffle=shuffle, test_size=k)
34
+ print("Fitting...", end="", flush=True)
35
+ cl.fit(xtrain, ytrain)
36
+ print("Computing score...", end="", flush=True)
37
+ accuracies = cl.score(xtest, ytest)
38
+ try:
39
+ print("Best classification parameters : ", cl.best_params_)
40
+ except:
41
+ None
42
+ print("Done.")
43
+ if verbose: print(f"Accuracy {dataset} : {accuracies} ")
44
+ file_path:str = f"result_{dataset}.csv".replace("/", "_").replace(".off", "")
45
+ columns:list[str] = ["dataset", "cv", "mean", "std"]
46
+ if exists(file_path):
47
+ df:pd.DataFrame = pd.read_csv(file_path)
48
+ else:
49
+ df:pd.DataFrame = pd.DataFrame(columns= columns)
50
+ more_names = []
51
+ more_values = []
52
+ for key, value in more_columns.items():
53
+ if key not in columns:
54
+ more_names.append(key)
55
+ more_values.append(value)
56
+ else:
57
+ warn(f"Duplicate key {key} ! with value {value}")
58
+ new_line:pd.DataFrame = pd.DataFrame([[dataset, k, np.mean(accuracies).round(decimals=3), np.std(accuracies).round(decimals=3)]+more_values], columns = columns+more_names)
59
+ print(new_line)
60
+ df = pd.concat([df, new_line])
61
+ df.to_csv(file_path, index=False)
@@ -0,0 +1,384 @@
1
+ from typing import Iterable
2
+ from joblib import Parallel, delayed
3
+ import numpy as np
4
+ from itertools import product
5
+
6
+ # from numba import njit, prange
7
+ # import numba.np.unsafe.ndarray ## WORKAROUND FOR NUMBA
8
+
9
+ # @njit(nogil=True,fastmath=True,inline="always", cache=True)
10
+ # def _pts_convolution_gaussian_pt(pts, weights, pt, bandwidth):
11
+ # """
12
+ # Evaluates the convolution of the signed measure (pts, weights) with a gaussian meaasure of bandwidth bandwidth, at point pt
13
+
14
+ # Parameters
15
+ # ----------
16
+
17
+ # - pts : (npts) x (num_parameters)
18
+ # - weight : (npts)
19
+ # - pt : (num_parameters)
20
+ # - bandwidth : real
21
+
22
+ # Outputs
23
+ # -------
24
+
25
+ # The float value
26
+ # """
27
+ # num_parameters = pts.shape[1]
28
+ # distances = np.empty(len(pts), dtype=float)
29
+ # for i in prange(len(pts)):
30
+ # distances[i] = np.sum((pt - pts[i])**2)/(2*bandwidth**2)
31
+ # distances = np.exp(-distances)*weights / (np.sqrt(2*np.pi)*(bandwidth**(num_parameters / 2))) # This last renormalization is not necessary
32
+ # return np.mean(distances)
33
+
34
+
35
+ # @njit(nogil=True,fastmath=True,inline="always", cache=True)
36
+ # def _pts_convolution_exponential_pt(pts, weights, pt, bandwidth):
37
+ # """
38
+ # Evaluates the convolution of the signed measure (pts, weights) with a gaussian meaasure of bandwidth bandwidth, at point pt
39
+
40
+ # Parameters
41
+ # ----------
42
+
43
+ # - pts : (npts) x (num_parameters)
44
+ # - weight : (npts)
45
+ # - pt : (num_parameters)
46
+ # - bandwidth : real
47
+
48
+ # Outputs
49
+ # -------
50
+
51
+ # The float value
52
+ # """
53
+ # num_parameters = pts.shape[1]
54
+ # distances = np.empty(len(pts), dtype=float)
55
+ # for i in prange(len(pts)):
56
+ # distances[i] = np.linalg.norm(pt - pts[i])
57
+ # # distances = np.linalg.norm(pts-pt, axis=1)
58
+ # distances = np.exp(-distances/bandwidth)*weights / (bandwidth**num_parameters) # This last renormalization is not necessary
59
+ # return np.mean(distances)
60
+
61
+ # @njit(nogil=True, cache=True) # not sure if parallel here is worth it...
62
+ # def _pts_convolution_sparse_pts(pts:np.ndarray, weights:np.ndarray, pt_list:np.ndarray, bandwidth, kernel:int=0):
63
+ # """
64
+ # Evaluates the convolution of the signed measure (pts, weights) with a gaussian meaasure of bandwidth bandwidth, at points pt_list
65
+
66
+ # Parameters
67
+ # ----------
68
+
69
+ # - pts : (npts) x (num_parameters)
70
+ # - weight : (npts)
71
+ # - pt : (n)x(num_parameters)
72
+ # - bandwidth : real
73
+
74
+ # Outputs
75
+ # -------
76
+
77
+ # The values : (n)
78
+ # """
79
+ # if kernel == 0:
80
+ # return np.array([_pts_convolution_gaussian_pt(pts,weights,pt_list[i],bandwidth) for i in prange(pt_list.shape[0])])
81
+ # elif kernel == 1:
82
+ # return np.array([_pts_convolution_exponential_pt(pts,weights,pt_list[i],bandwidth) for i in prange(pt_list.shape[0])])
83
+ # else:
84
+ # raise Exception("Unsupported kernel")
85
+
86
+ def convolution_signed_measures(iterable_of_signed_measures, filtrations, bandwidth, flatten:bool=True, n_jobs:int=1, backend="pykeops", kernel="gaussian", **kwargs):
87
+ """
88
+ Evaluates the convolution of the signed measures Iterable(pts, weights) with a gaussian measure of bandwidth bandwidth, on a grid given by the filtrations
89
+
90
+ Parameters
91
+ ----------
92
+
93
+ - iterable_of_signed_measures : (num_signed_measure) x [ (npts) x (num_parameters), (npts)]
94
+ - filtrations : (num_parameter) x (filtration values)
95
+ - flatten : bool
96
+ - n_jobs : int
97
+
98
+ Outputs
99
+ -------
100
+
101
+ The concatenated images, for each signed measure (num_signed_measures) x (len(f) for f in filtration_values)
102
+ """
103
+ grid_iterator = np.array(list(product(*filtrations)), dtype=float)
104
+ match backend:
105
+ case "sklearn":
106
+ def convolution_signed_measures_on_grid(signed_measures:Iterable[tuple[np.ndarray,np.ndarray]]):
107
+ return np.concatenate([
108
+ _pts_convolution_sparse_old(pts=pts,pts_weights=weights, grid_iterator = grid_iterator, bandwidth= bandwidth, kernel=kernel, **kwargs) for pts,weights in signed_measures
109
+ ], axis=0)
110
+ # case "numba":
111
+ # kernel2int = {"gaussian":0, "exponential":1, "other":2}
112
+ # def convolution_signed_measures_on_grid(signed_measures:Iterable[tuple[np.ndarray,np.ndarray]]):
113
+ # return np.concatenate([
114
+ # _pts_convolution_sparse_pts(pts,weights, grid_iterator, bandwidth, kernel=kernel2int[kernel]) for pts,weights in signed_measures
115
+ # ], axis=0)
116
+ case "pykeops":
117
+ def convolution_signed_measures_on_grid(signed_measures:Iterable[tuple[np.ndarray,np.ndarray]]):
118
+ return np.concatenate([
119
+ _pts_convolution_pykeops(pts=pts,pts_weights=weights, grid_iterator = grid_iterator, bandwidth= bandwidth, kernel=kernel, **kwargs) for pts,weights in signed_measures
120
+ ], axis=0)
121
+ ## compiles first once
122
+ pts,weights = iterable_of_signed_measures[0][0]
123
+ small_pts,small_weights = pts[:2], weights[:2]
124
+
125
+ _pts_convolution_pykeops(small_pts,small_weights, grid_iterator = grid_iterator, bandwidth= bandwidth, kernel=kernel, **kwargs)
126
+
127
+
128
+ if n_jobs>1 or n_jobs ==-1:
129
+ prefer = "processes" if backend=="sklearn" else "threads"
130
+ convolutions = Parallel(n_jobs=n_jobs, prefer=prefer)(delayed(convolution_signed_measures_on_grid)(sms) for sms in iterable_of_signed_measures)
131
+ else: convolutions = [convolution_signed_measures_on_grid(sms) for sms in iterable_of_signed_measures]
132
+ if not flatten:
133
+ out_shape = [-1] + [len(f) for f in filtrations] # Degree
134
+ convolutions = [x.reshape(out_shape) for x in convolutions]
135
+ return np.asarray(convolutions, dtype=float)
136
+
137
+ # def _test(r=1000, b=0.5, plot=True, kernel=0):
138
+ # import matplotlib.pyplot as plt
139
+ # pts, weigths = np.array([[1.,1.], [1.1,1.1]]), np.array([1,-1])
140
+ # pt_list = np.array(list(product(*[np.linspace(0,2,r)]*2)))
141
+ # img = _pts_convolution_sparse_pts(pts,weigths, pt_list,b,kernel=kernel)
142
+ # if plot:
143
+ # plt.imshow(img.reshape(r,-1).T, origin="lower")
144
+ # plt.show()
145
+
146
+
147
+ def _pts_convolution_sparse_old(pts:np.ndarray, pts_weights:np.ndarray, grid_iterator, kernel="gaussian", bandwidth=0.1, **more_kde_args):
148
+ """
149
+ Old version of `convolution_signed_measures`. Scikitlearn's convolution is slower than the code above.
150
+ """
151
+ from sklearn.neighbors import KernelDensity
152
+ if len(pts) == 0:
153
+ # warn("Found a trivial signed measure !")
154
+ return np.zeros(len(grid_iterator))
155
+ kde = KernelDensity(kernel=kernel, bandwidth=bandwidth, rtol = 1e-4, **more_kde_args) # TODO : check rtol
156
+ pos_indices = pts_weights>0
157
+ neg_indices = pts_weights<0
158
+ img_pos = np.zeros(len(grid_iterator)) if pos_indices.sum() == 0 else kde.fit(
159
+ pts[pos_indices], sample_weight=pts_weights[pos_indices]
160
+ ).score_samples(grid_iterator)
161
+ img_neg = np.zeros(len(grid_iterator)) if neg_indices.sum() == 0 else kde.fit(pts[neg_indices], sample_weight=-pts_weights[neg_indices]).score_samples(grid_iterator)
162
+ return np.exp(img_pos) - np.exp(img_neg)
163
+
164
+ def _pts_convolution_pykeops(pts:np.ndarray, pts_weights:np.ndarray, grid_iterator, kernel="gaussian", bandwidth=0.1, **more_kde_args):
165
+ """
166
+ Pykeops convolution
167
+ """
168
+ kde = KDE(kernel=kernel, bandwidth=bandwidth,return_log=False, **more_kde_args)
169
+ return kde.fit(pts, sample_weights=np.asarray(pts_weights, dtype=pts.dtype)).score_samples(grid_iterator)
170
+
171
+
172
+ import numpy as np
173
+
174
+
175
+ ## TODO : multiple bandwidths at once with lazy tensors
176
+ class KDE():
177
+ """
178
+ Fast, scikit-style, and differentiable kernel density estimation, using PyKeops.
179
+ """
180
+ def __init__(self, bandwidth=1, kernel="gaussian", return_log=True):
181
+ """
182
+ bandwidth : numeric
183
+ bandwidth for Gaussian kernel
184
+ """
185
+ self.X = None
186
+ self.bandwidth = bandwidth
187
+ self.kernel = kernel
188
+ self._kernel=None
189
+ self._backend=None
190
+ self._sample_weights=None
191
+ self.return_log=return_log
192
+
193
+
194
+ def fit(self, X, sample_weights=None, y=None):
195
+ self.X = X
196
+ self._sample_weights = sample_weights
197
+ if isinstance(X, np.ndarray):
198
+
199
+ self._backend = np
200
+ else:
201
+ import torch
202
+ if isinstance(X, torch.Tensor):
203
+ self._backend = torch
204
+ else:
205
+ raise Exception('Unsupported backend.')
206
+ match self.kernel:
207
+ case "gaussian":
208
+ self._kernel = self.gaussian_kernel
209
+ case "exponential":
210
+ self._kernel = self.exponential_kernel
211
+ case _:
212
+ assert callable(self.kernel), f"--------------------------\nUnknown kernel {self.kernel}.\n--------------------------\n Custom kernel has to be callable, (x:LazyTensor(n,1,D),y:LazyTensor(1,m,D),bandwidth:float) ---> kernel matrix"
213
+ self._kernel = self.kernel
214
+ return self
215
+
216
+ @staticmethod
217
+ def gaussian_kernel(x_i,y_j, bandwidth):
218
+ exponent = -( ( (x_i - y_j) / bandwidth)**2 ).sum(dim=2) /2
219
+ kernel = (exponent).exp()/bandwidth
220
+ return kernel
221
+ @staticmethod
222
+ def exponential_kernel(x_i,y_j, bandwidth):
223
+ exponent = - (((((x_i - y_j)**2).sum())**1/2)/bandwidth).sum(dim=2)
224
+ kernel = (exponent).exp()/bandwidth
225
+ return kernel
226
+ @staticmethod
227
+ def to_lazy(X,Y, x_weights):
228
+ if isinstance(X, np.ndarray):
229
+ from pykeops.numpy import LazyTensor
230
+ lazy_x = LazyTensor(X.reshape((X.shape[0], 1, X.shape[1])))
231
+ lazy_y = LazyTensor(Y.reshape((1,Y.shape[0], Y.shape[1])))
232
+ if x_weights is not None :
233
+ w = LazyTensor(x_weights[:,None], axis=0)
234
+ return lazy_x, lazy_y, w
235
+ return lazy_x, lazy_y,None
236
+ import torch
237
+ if isinstance(X, torch.Tensor):
238
+ from pykeops.torch import LazyTensor
239
+ lazy_x = LazyTensor(X.view(X.shape[0], 1, X.shape[1]))
240
+ lazy_y = LazyTensor(Y.view(1,Y.shape[0], Y.shape[1]))
241
+ if x_weights is not None :
242
+ w = LazyTensor(x_weights[:,None], axis=0)
243
+ return lazy_x, lazy_y, w
244
+ return lazy_x, lazy_y,None
245
+ raise Exception('Bad tensor type.')
246
+
247
+ def score_samples(self, Y, X=None, return_kernel=False):
248
+ """Returns the kernel density estimates of each point in `Y`.
249
+
250
+ Parameters
251
+ ----------
252
+ Y : tensor (m, d)
253
+ `m` points with `d` dimensions for which the probability density will
254
+ be calculated
255
+ X : tensor (n, d), optional
256
+ `n` points with `d` dimensions to which KDE will be fit. Provided to
257
+ allow batch calculations in `log_prob`. By default, `X` is None and
258
+ all points used to initialize KernelDensityEstimator are included.
259
+
260
+
261
+ Returns
262
+ -------
263
+ log_probs : tensor (m)
264
+ log probability densities for each of the queried points in `Y`
265
+ """
266
+ X = self.X if X is None else X
267
+ assert Y.shape[1] == X.shape[1] and X.ndim == Y.ndim == 2
268
+ lazy_x,lazy_y,w = self.to_lazy(X,Y, x_weights= self._sample_weights)
269
+ kernel = self._kernel(lazy_x,lazy_y, self.bandwidth)
270
+ if w is not None: kernel *= w
271
+ if return_kernel: return kernel
272
+ density_estimation = kernel.sum(dim=0).flatten()
273
+ return self._backend.log(density_estimation) if self.return_log else density_estimation
274
+
275
+
276
+ class DTM():
277
+
278
+ """
279
+ Fast, scikit-style, and differentiable DTM density estimation, using PyKeops.
280
+ Tuned version of KNN from
281
+ """
282
+ def __init__(self, masses=[.1], metric:str="euclidean", **_kdtree_kwargs):
283
+ """
284
+ mass : float in [0,1]
285
+ The mass threshold
286
+ metric :
287
+ The distance between points to consider
288
+ """
289
+ self.masses = masses
290
+ self.metric = metric
291
+ self._kdtree_kwargs=_kdtree_kwargs
292
+ self._ks =None
293
+ self._kdtree=None
294
+ self._X=None
295
+ self._backend=None
296
+
297
+ def fit(self, X, sample_weights=None, y=None):
298
+ if len(self.masses) == 0: return self
299
+ assert np.max(self.masses) <= 1, "All masses should be in (0,1]."
300
+ from sklearn.neighbors import KDTree
301
+ if not isinstance(X, np.ndarray):
302
+ import torch
303
+ assert isinstance(X, torch.Tensor), "Backend has to be numpy of torch"
304
+ _X = X.detach()
305
+ self._backend = 'torch'
306
+ else:
307
+ _X = X
308
+ self._backend = 'numpy'
309
+ self._ks = np.array([int(mass*X.shape[0])+1 for mass in self.masses])
310
+ self._kdtree=KDTree(_X, metric=self.metric, **self._kdtree_kwargs)
311
+ self._X = X
312
+ return self
313
+
314
+ def score_samples(self, Y, X=None):
315
+ """Returns the kernel density estimates of each point in `Y`.
316
+
317
+ Parameters
318
+ ----------
319
+ Y : tensor (m, d)
320
+ `m` points with `d` dimensions for which the probability density will
321
+ be calculated
322
+
323
+
324
+ Returns
325
+ -------
326
+ the DTMs of Y, for each mass in masses.
327
+ """
328
+ if len(self.masses) == 0: return np.empty((0,len(Y)))
329
+ assert Y.ndim == 2
330
+ if self._backend == 'torch':
331
+ _Y = Y.detach().numpy()
332
+ else:
333
+ _Y=Y
334
+ NN_Dist, NN = self._kdtree.query(_Y,self._ks.max(), return_distance=True)
335
+ DTMs = np.array([((NN_Dist**2)[:,:k].mean(1))**.5 for k in self._ks])
336
+ return DTMs
337
+ def score_samples_diff(self, Y):
338
+ """Returns the kernel density estimates of each point in `Y`.
339
+
340
+ Parameters
341
+ ----------
342
+ Y : tensor (m, d)
343
+ `m` points with `d` dimensions for which the probability density will
344
+ be calculated
345
+ X : tensor (n, d), optional
346
+ `n` points with `d` dimensions to which KDE will be fit. Provided to
347
+ allow batch calculations in `log_prob`. By default, `X` is None and
348
+ all points used to initialize KernelDensityEstimator are included.
349
+
350
+
351
+ Returns
352
+ -------
353
+ log_probs : tensor (m)
354
+ log probability densities for each of the queried points in `Y`
355
+ """
356
+ import torch
357
+ assert Y.ndim == 2
358
+ assert self._backend == 'torch', 'Use the non-diff version with numpy.'
359
+ if len(self.masses) == 0: return torch.empty(0,len(Y))
360
+ NN = self._kdtree.query(Y.detach(),self._ks.max(), return_distance=False)
361
+ DTMs = tuple((((self._X[NN] - Y[:,None,:])**2)[:,:k].sum(dim=(1,2)) / k)**.5 for k in self._ks) # TODO : kdtree already computes distance, find implementation of kdtree that is pytorch differentiable
362
+ return DTMs
363
+
364
+ # def _pts_convolution_sparse(pts:np.ndarray, pts_weights:np.ndarray, filtration_grid:Iterable[np.ndarray], kernel="gaussian", bandwidth=0.1, **more_kde_args):
365
+ # """
366
+ # Old version of `convolution_signed_measures`. Scikitlearn's convolution is slower than the code above.
367
+ # """
368
+ # from sklearn.neighbors import KernelDensity
369
+ # grid_iterator = np.asarray(list(product(*filtration_grid)))
370
+ # grid_shape = [len(f) for f in filtration_grid]
371
+ # if len(pts) == 0:
372
+ # # warn("Found a trivial signed measure !")
373
+ # return np.zeros(shape=grid_shape)
374
+ # kde = KernelDensity(kernel=kernel, bandwidth=bandwidth, rtol = 1e-4, **more_kde_args) # TODO : check rtol
375
+
376
+ # pos_indices = pts_weights>0
377
+ # neg_indices = pts_weights<0
378
+ # img_pos = kde.fit(pts[pos_indices], sample_weight=pts_weights[pos_indices]).score_samples(grid_iterator).reshape(grid_shape)
379
+ # img_neg = kde.fit(pts[neg_indices], sample_weight=-pts_weights[neg_indices]).score_samples(grid_iterator).reshape(grid_shape)
380
+ # return np.exp(img_pos) - np.exp(img_neg)
381
+
382
+
383
+ ### Precompiles the convolution
384
+ # _test(r=2,b=.5, plot=False)
@@ -0,0 +1,79 @@
1
+ import persistable
2
+
3
+
4
+ # requires installing ripser (pip install ripser) as well as persistable from the higher-homology branch,
5
+ # which can be done as follows:
6
+ # pip install git+https://github.com/LuisScoccola/persistable.git@higher-homology
7
+ # NOTE: only accepts as input a distance matrix
8
+ def hf_degree_rips(
9
+ distance_matrix,
10
+ min_rips_value,
11
+ max_rips_value,
12
+ max_normalized_degree,
13
+ min_normalized_degree,
14
+ grid_granularity,
15
+ max_homological_dimension,
16
+ subsample_size = None,
17
+ ):
18
+ if subsample_size == None:
19
+ p = persistable.Persistable(distance_matrix, metric="precomputed")
20
+ else:
21
+ p = persistable.Persistable(distance_matrix, metric="precomputed", subsample=subsample_size)
22
+
23
+ rips_values, normalized_degree_values, hilbert_functions, minimal_hilbert_decompositions = p._hilbert_function(
24
+ min_rips_value,
25
+ max_rips_value,
26
+ max_normalized_degree,
27
+ min_normalized_degree,
28
+ grid_granularity,
29
+ homological_dimension=max_homological_dimension,
30
+ )
31
+
32
+ return rips_values, normalized_degree_values, hilbert_functions, minimal_hilbert_decompositions
33
+
34
+
35
+
36
+ def hf_h0_degree_rips(
37
+ point_cloud,
38
+ min_rips_value,
39
+ max_rips_value,
40
+ max_normalized_degree,
41
+ min_normalized_degree,
42
+ grid_granularity,
43
+ ):
44
+ p = persistable.Persistable(point_cloud, n_neighbors="all")
45
+
46
+ rips_values, normalized_degree_values, hilbert_functions, minimal_hilbert_decompositions = p._hilbert_function(
47
+ min_rips_value,
48
+ max_rips_value,
49
+ max_normalized_degree,
50
+ min_normalized_degree,
51
+ grid_granularity,
52
+ )
53
+
54
+ return rips_values, normalized_degree_values, hilbert_functions[0], minimal_hilbert_decompositions[0]
55
+
56
+
57
+ def ri_h0_degree_rips(
58
+ point_cloud,
59
+ min_rips_value,
60
+ max_rips_value,
61
+ max_normalized_degree,
62
+ min_normalized_degree,
63
+ grid_granularity,
64
+ ):
65
+ p = persistable.Persistable(point_cloud, n_neighbors="all")
66
+
67
+ rips_values, normalized_degree_values, rank_invariant, _, _ = p._rank_invariant(
68
+ min_rips_value,
69
+ max_rips_value,
70
+ max_normalized_degree,
71
+ min_normalized_degree,
72
+ grid_granularity,
73
+ )
74
+
75
+ return rips_values, normalized_degree_values, rank_invariant
76
+
77
+
78
+
79
+