multipers 1.0__cp311-cp311-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of multipers might be problematic. Click here for more details.
- multipers/__init__.py +4 -0
- multipers/_old_rank_invariant.pyx +328 -0
- multipers/_signed_measure_meta.py +72 -0
- multipers/data/MOL2.py +350 -0
- multipers/data/UCR.py +18 -0
- multipers/data/__init__.py +1 -0
- multipers/data/graphs.py +272 -0
- multipers/data/immuno_regions.py +27 -0
- multipers/data/minimal_presentation_to_st_bf.py +0 -0
- multipers/data/pytorch2simplextree.py +91 -0
- multipers/data/shape3d.py +101 -0
- multipers/data/synthetic.py +68 -0
- multipers/distances.py +100 -0
- multipers/euler_characteristic.cpython-311-x86_64-linux-gnu.so +0 -0
- multipers/euler_characteristic.pyx +132 -0
- multipers/function_rips.cpython-311-x86_64-linux-gnu.so +0 -0
- multipers/function_rips.pyx +101 -0
- multipers/hilbert_function.cpython-311-x86_64-linux-gnu.so +0 -0
- multipers/hilbert_function.pyi +46 -0
- multipers/hilbert_function.pyx +145 -0
- multipers/ml/__init__.py +0 -0
- multipers/ml/accuracies.py +61 -0
- multipers/ml/convolutions.py +384 -0
- multipers/ml/invariants_with_persistable.py +79 -0
- multipers/ml/kernels.py +128 -0
- multipers/ml/mma.py +422 -0
- multipers/ml/one.py +472 -0
- multipers/ml/point_clouds.py +191 -0
- multipers/ml/signed_betti.py +50 -0
- multipers/ml/signed_measures.py +1046 -0
- multipers/ml/sliced_wasserstein.py +313 -0
- multipers/ml/tools.py +99 -0
- multipers/multiparameter_edge_collapse.py +29 -0
- multipers/multiparameter_module_approximation.cpython-311-x86_64-linux-gnu.so +0 -0
- multipers/multiparameter_module_approximation.pxd +147 -0
- multipers/multiparameter_module_approximation.pyi +439 -0
- multipers/multiparameter_module_approximation.pyx +931 -0
- multipers/pickle.py +53 -0
- multipers/plots.py +207 -0
- multipers/point_measure_integration.cpython-311-x86_64-linux-gnu.so +0 -0
- multipers/point_measure_integration.pyx +59 -0
- multipers/rank_invariant.cpython-311-x86_64-linux-gnu.so +0 -0
- multipers/rank_invariant.pyx +154 -0
- multipers/simplex_tree_multi.cpython-311-x86_64-linux-gnu.so +0 -0
- multipers/simplex_tree_multi.pxd +121 -0
- multipers/simplex_tree_multi.pyi +715 -0
- multipers/simplex_tree_multi.pyx +1284 -0
- multipers/tensor.pxd +13 -0
- multipers/test.pyx +44 -0
- multipers-1.0.dist-info/LICENSE +21 -0
- multipers-1.0.dist-info/METADATA +9 -0
- multipers-1.0.dist-info/RECORD +56 -0
- multipers-1.0.dist-info/WHEEL +5 -0
- multipers-1.0.dist-info/top_level.txt +1 -0
- multipers.libs/libtbb-5d1cde94.so.12.10 +0 -0
- multipers.libs/libtbbmalloc-5e0a3d4c.so.2.10 +0 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# cimport multipers.tensor as mt
|
|
2
|
+
from libc.stdint cimport intptr_t, uint16_t, uint32_t, int32_t
|
|
3
|
+
from libcpp.vector cimport vector
|
|
4
|
+
from libcpp cimport bool, int, float
|
|
5
|
+
from libcpp.utility cimport pair
|
|
6
|
+
from typing import Optional,Iterable,Callable
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
cimport numpy as cnp
|
|
10
|
+
cnp.import_array()
|
|
11
|
+
|
|
12
|
+
ctypedef float value_type
|
|
13
|
+
python_value_type=np.float32
|
|
14
|
+
|
|
15
|
+
ctypedef int32_t indices_type # uint fails for some reason
|
|
16
|
+
python_indices_type=np.int32
|
|
17
|
+
|
|
18
|
+
ctypedef int32_t tensor_dtype
|
|
19
|
+
python_tensor_dtype = np.int32
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
ctypedef pair[vector[vector[indices_type]], vector[tensor_dtype]] signed_measure_type
|
|
23
|
+
|
|
24
|
+
cdef extern from "multi_parameter_rank_invariant/hilbert_function.h" namespace "Gudhi::multiparameter::hilbert_function":
|
|
25
|
+
void get_hilbert_surface_python(const intptr_t, tensor_dtype* , const vector[indices_type], const vector[indices_type], bool, bool, indices_type, bool) except + nogil
|
|
26
|
+
signed_measure_type get_hilbert_signed_measure(const intptr_t, tensor_dtype* , const vector[indices_type], const vector[indices_type], bool, indices_type, bool, bool) except + nogil
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def hilbert_signed_measure(simplextree, vector[indices_type] degrees, mass_default=None, plot=False, indices_type n_jobs=0, bool verbose=False, bool expand_collapse=False):
|
|
30
|
+
"""
|
|
31
|
+
Computes the signed measures given by the decomposition of the hilbert function.
|
|
32
|
+
|
|
33
|
+
Input
|
|
34
|
+
-----
|
|
35
|
+
- simplextree:SimplexTreeMulti, the multifiltered simplicial complex
|
|
36
|
+
- degrees:array-like of ints, the degrees to compute
|
|
37
|
+
- mass_default: Either None, or 'auto' or 'inf', or array-like of floats. Where to put the default mass to get a zero-mass measure.
|
|
38
|
+
- plot:bool, plots the computed measures if true.
|
|
39
|
+
- n_jobs:int, number of jobs. Defaults to #cpu, but when doing parallel computations of signed measures, we recommend setting this to 1.
|
|
40
|
+
- verbose:bool, prints c++ logs.
|
|
41
|
+
|
|
42
|
+
Output
|
|
43
|
+
------
|
|
44
|
+
`[signed_measure_of_degree for degree in degrees]`
|
|
45
|
+
with `signed_measure_of_degree` of the form `(dirac location, dirac weights)`.
|
|
46
|
+
"""
|
|
47
|
+
assert simplextree._is_squeezed > 0, "Squeeze grid first."
|
|
48
|
+
cdef bool zero_pad = mass_default is not None
|
|
49
|
+
grid_conversion = [np.asarray(f) for f in simplextree.filtration_grid]
|
|
50
|
+
# assert simplextree.num_parameters == 2
|
|
51
|
+
grid_shape = np.array([len(f) for f in grid_conversion])
|
|
52
|
+
|
|
53
|
+
# match mass_default: ## Cython bug
|
|
54
|
+
# case None:
|
|
55
|
+
# pass
|
|
56
|
+
# case "inf":
|
|
57
|
+
# mass_default = np.array([np.inf]*simplextree.num_parameters)
|
|
58
|
+
# case "auto":
|
|
59
|
+
# mass_default = np.array([1.1*np.max(f) - 0.1*np.min(f) for f in grid_conversion])
|
|
60
|
+
# case _:
|
|
61
|
+
# mass_default = np.asarray(mass_default)
|
|
62
|
+
# assert mass_default.ndim == 1 and mass_default.shape[0] == simplextree.num_parameters
|
|
63
|
+
if mass_default is None:
|
|
64
|
+
mass_default = mass_default
|
|
65
|
+
else:
|
|
66
|
+
mass_default = np.asarray(mass_default)
|
|
67
|
+
assert mass_default.ndim == 1 and mass_default.shape[0] == simplextree.num_parameters
|
|
68
|
+
if zero_pad:
|
|
69
|
+
for i, _ in enumerate(grid_shape):
|
|
70
|
+
grid_shape[i] += 1 # adds a 0
|
|
71
|
+
for i,f in enumerate(grid_conversion):
|
|
72
|
+
grid_conversion[i] = np.concatenate([f, [mass_default[i]]])
|
|
73
|
+
assert len(grid_shape) == simplextree.num_parameters, "Grid shape size has to be the number of parameters."
|
|
74
|
+
grid_shape_with_degree = np.asarray(np.concatenate([[len(degrees)], grid_shape]), dtype=python_indices_type)
|
|
75
|
+
container_array = np.ascontiguousarray(np.zeros(grid_shape_with_degree, dtype=python_tensor_dtype).flatten())
|
|
76
|
+
assert len(container_array) < np.iinfo(np.uint32).max, "Too large container. Raise an issue on github if you encounter this issue. (Due to tensor's operator[])"
|
|
77
|
+
cdef intptr_t simplextree_ptr = simplextree.thisptr
|
|
78
|
+
cdef vector[indices_type] c_grid_shape = grid_shape_with_degree
|
|
79
|
+
cdef tensor_dtype[::1] container = container_array
|
|
80
|
+
cdef tensor_dtype* container_ptr = &container[0]
|
|
81
|
+
cdef signed_measure_type out
|
|
82
|
+
with nogil:
|
|
83
|
+
out = get_hilbert_signed_measure(simplextree_ptr, container_ptr, c_grid_shape, degrees, zero_pad, n_jobs, verbose, expand_collapse)
|
|
84
|
+
pts, weights = np.asarray(out.first, dtype=int).reshape(-1, simplextree.num_parameters+1), np.asarray(out.second, dtype=int)
|
|
85
|
+
# return pts, weights
|
|
86
|
+
degree_indices = [np.argwhere(pts[:,0] == degree_index).flatten() for degree_index, degree in enumerate(degrees)] ## TODO : maybe optimize
|
|
87
|
+
sms = [(pts[id,1:],weights[id]) for id in degree_indices]
|
|
88
|
+
for degree_index,(pts,weights) in enumerate(sms):
|
|
89
|
+
coords = np.empty(shape=pts.shape, dtype=float)
|
|
90
|
+
for i in range(coords.shape[1]):
|
|
91
|
+
coords[:,i] = grid_conversion[i][pts[:,i]]
|
|
92
|
+
sms[degree_index]=(coords, weights)
|
|
93
|
+
if plot:
|
|
94
|
+
from multipers.plots import plot_signed_measures
|
|
95
|
+
plot_signed_measures(sms)
|
|
96
|
+
return sms
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def hilbert_surface(simplextree, vector[indices_type] degrees, mass_default=None, bool mobius_inversion=False, bool plot=False, indices_type n_jobs=0, bool expand_collapse=False):
|
|
100
|
+
"""
|
|
101
|
+
Computes the hilbert function.
|
|
102
|
+
|
|
103
|
+
Input
|
|
104
|
+
-----
|
|
105
|
+
- simplextree:SimplexTreeMulti, the multifiltered simplicial complex
|
|
106
|
+
- degrees:array-like of ints, the degrees to compute
|
|
107
|
+
- mass_default: Either None, or 'auto' or 'inf', or array-like of floats. Where to put the default mass to get a zero-mass measure.
|
|
108
|
+
- plot:bool, plots the computed measures if true.
|
|
109
|
+
- n_jobs:int, number of jobs. Defaults to #cpu, but when doing parallel computations of signed measures, we recommend setting this to 1.
|
|
110
|
+
- verbose:bool, prints c++ logs.
|
|
111
|
+
|
|
112
|
+
Output
|
|
113
|
+
------
|
|
114
|
+
Integer array of the form `(num_degrees, num_filtration_values_of_parameter 1, ..., num_filtration_values_of_parameter n)`
|
|
115
|
+
"""
|
|
116
|
+
assert simplextree._is_squeezed > 0, "Squeeze grid first."
|
|
117
|
+
cdef bool zero_pad = mass_default is not None
|
|
118
|
+
grid_conversion = [np.asarray(f) for f in simplextree.filtration_grid]
|
|
119
|
+
grid_shape = np.array([len(f) for f in grid_conversion])
|
|
120
|
+
if mass_default is None:
|
|
121
|
+
mass_default = mass_default
|
|
122
|
+
else:
|
|
123
|
+
mass_default = np.asarray(mass_default)
|
|
124
|
+
assert mass_default.ndim == 1 and mass_default.shape[0] == simplextree.num_parameters
|
|
125
|
+
if zero_pad:
|
|
126
|
+
for i, _ in enumerate(grid_shape):
|
|
127
|
+
grid_shape[i] += 1 # adds a 0
|
|
128
|
+
for i,f in enumerate(grid_conversion):
|
|
129
|
+
grid_conversion[i] = np.concatenate([f, [mass_default[i]]])
|
|
130
|
+
assert len(grid_shape) == simplextree.num_parameters, "Grid shape size has to be the number of parameters."
|
|
131
|
+
grid_shape_with_degree = np.asarray(np.concatenate([[len(degrees)], grid_shape]), dtype=python_indices_type)
|
|
132
|
+
container_array = np.ascontiguousarray(np.zeros(grid_shape_with_degree, dtype=python_tensor_dtype).flatten())
|
|
133
|
+
assert len(container_array) < np.iinfo(np.uint32).max, "Too large container. Raise an issue on github if you encounter this issue. (Due to tensor's operator[])"
|
|
134
|
+
cdef intptr_t simplextree_ptr = simplextree.thisptr
|
|
135
|
+
cdef vector[indices_type] c_grid_shape = grid_shape_with_degree
|
|
136
|
+
cdef tensor_dtype[::1] container = container_array
|
|
137
|
+
cdef tensor_dtype* container_ptr = &container[0]
|
|
138
|
+
with nogil:
|
|
139
|
+
get_hilbert_surface_python(simplextree_ptr, container_ptr, c_grid_shape, degrees, mobius_inversion, zero_pad, n_jobs, expand_collapse)
|
|
140
|
+
out = (grid_conversion, container_array.reshape(grid_shape_with_degree))
|
|
141
|
+
if plot:
|
|
142
|
+
from multipers.plots import plot_surfaces
|
|
143
|
+
plot_surfaces(out)
|
|
144
|
+
return out
|
|
145
|
+
|
multipers/ml/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from warnings import warn
|
|
3
|
+
import numpy as np
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
from os.path import exists
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def accuracy_to_csv(X,Y,cl, k:float=10, dataset:str = "", shuffle=True, verbose:bool=True, **more_columns):
|
|
9
|
+
assert k > 0, "k is either the number of kfold > 1 or the test size > 0."
|
|
10
|
+
if k>1:
|
|
11
|
+
k = int(k)
|
|
12
|
+
from sklearn.model_selection import StratifiedKFold as KFold
|
|
13
|
+
kfold = KFold(k, shuffle=shuffle).split(X,Y)
|
|
14
|
+
accuracies = np.zeros(k)
|
|
15
|
+
for i,(train_idx, test_idx) in enumerate(tqdm(kfold, total=k, desc="Computing kfold")):
|
|
16
|
+
xtrain = [X[i] for i in train_idx]
|
|
17
|
+
ytrain = [Y[i] for i in train_idx]
|
|
18
|
+
cl.fit(xtrain, ytrain)
|
|
19
|
+
xtest = [X[i] for i in test_idx]
|
|
20
|
+
ytest = [Y[i] for i in test_idx]
|
|
21
|
+
accuracies[i] = cl.score(xtest, ytest)
|
|
22
|
+
if verbose:
|
|
23
|
+
print(f"step {i+1}, {dataset} : {accuracies[i]}", flush=True)
|
|
24
|
+
try:
|
|
25
|
+
print("Best classification parameters : ", cl.best_params_)
|
|
26
|
+
except:
|
|
27
|
+
None
|
|
28
|
+
|
|
29
|
+
print(f"Accuracy {dataset} : {np.mean(accuracies).round(decimals=3)}±{np.std(accuracies).round(decimals=3)} ")
|
|
30
|
+
elif k > 0:
|
|
31
|
+
from sklearn.model_selection import train_test_split
|
|
32
|
+
print("Computing accuracy, with train test split", flush=True)
|
|
33
|
+
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, shuffle=shuffle, test_size=k)
|
|
34
|
+
print("Fitting...", end="", flush=True)
|
|
35
|
+
cl.fit(xtrain, ytrain)
|
|
36
|
+
print("Computing score...", end="", flush=True)
|
|
37
|
+
accuracies = cl.score(xtest, ytest)
|
|
38
|
+
try:
|
|
39
|
+
print("Best classification parameters : ", cl.best_params_)
|
|
40
|
+
except:
|
|
41
|
+
None
|
|
42
|
+
print("Done.")
|
|
43
|
+
if verbose: print(f"Accuracy {dataset} : {accuracies} ")
|
|
44
|
+
file_path:str = f"result_{dataset}.csv".replace("/", "_").replace(".off", "")
|
|
45
|
+
columns:list[str] = ["dataset", "cv", "mean", "std"]
|
|
46
|
+
if exists(file_path):
|
|
47
|
+
df:pd.DataFrame = pd.read_csv(file_path)
|
|
48
|
+
else:
|
|
49
|
+
df:pd.DataFrame = pd.DataFrame(columns= columns)
|
|
50
|
+
more_names = []
|
|
51
|
+
more_values = []
|
|
52
|
+
for key, value in more_columns.items():
|
|
53
|
+
if key not in columns:
|
|
54
|
+
more_names.append(key)
|
|
55
|
+
more_values.append(value)
|
|
56
|
+
else:
|
|
57
|
+
warn(f"Duplicate key {key} ! with value {value}")
|
|
58
|
+
new_line:pd.DataFrame = pd.DataFrame([[dataset, k, np.mean(accuracies).round(decimals=3), np.std(accuracies).round(decimals=3)]+more_values], columns = columns+more_names)
|
|
59
|
+
print(new_line)
|
|
60
|
+
df = pd.concat([df, new_line])
|
|
61
|
+
df.to_csv(file_path, index=False)
|
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
from joblib import Parallel, delayed
|
|
3
|
+
import numpy as np
|
|
4
|
+
from itertools import product
|
|
5
|
+
|
|
6
|
+
# from numba import njit, prange
|
|
7
|
+
# import numba.np.unsafe.ndarray ## WORKAROUND FOR NUMBA
|
|
8
|
+
|
|
9
|
+
# @njit(nogil=True,fastmath=True,inline="always", cache=True)
|
|
10
|
+
# def _pts_convolution_gaussian_pt(pts, weights, pt, bandwidth):
|
|
11
|
+
# """
|
|
12
|
+
# Evaluates the convolution of the signed measure (pts, weights) with a gaussian meaasure of bandwidth bandwidth, at point pt
|
|
13
|
+
|
|
14
|
+
# Parameters
|
|
15
|
+
# ----------
|
|
16
|
+
|
|
17
|
+
# - pts : (npts) x (num_parameters)
|
|
18
|
+
# - weight : (npts)
|
|
19
|
+
# - pt : (num_parameters)
|
|
20
|
+
# - bandwidth : real
|
|
21
|
+
|
|
22
|
+
# Outputs
|
|
23
|
+
# -------
|
|
24
|
+
|
|
25
|
+
# The float value
|
|
26
|
+
# """
|
|
27
|
+
# num_parameters = pts.shape[1]
|
|
28
|
+
# distances = np.empty(len(pts), dtype=float)
|
|
29
|
+
# for i in prange(len(pts)):
|
|
30
|
+
# distances[i] = np.sum((pt - pts[i])**2)/(2*bandwidth**2)
|
|
31
|
+
# distances = np.exp(-distances)*weights / (np.sqrt(2*np.pi)*(bandwidth**(num_parameters / 2))) # This last renormalization is not necessary
|
|
32
|
+
# return np.mean(distances)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# @njit(nogil=True,fastmath=True,inline="always", cache=True)
|
|
36
|
+
# def _pts_convolution_exponential_pt(pts, weights, pt, bandwidth):
|
|
37
|
+
# """
|
|
38
|
+
# Evaluates the convolution of the signed measure (pts, weights) with a gaussian meaasure of bandwidth bandwidth, at point pt
|
|
39
|
+
|
|
40
|
+
# Parameters
|
|
41
|
+
# ----------
|
|
42
|
+
|
|
43
|
+
# - pts : (npts) x (num_parameters)
|
|
44
|
+
# - weight : (npts)
|
|
45
|
+
# - pt : (num_parameters)
|
|
46
|
+
# - bandwidth : real
|
|
47
|
+
|
|
48
|
+
# Outputs
|
|
49
|
+
# -------
|
|
50
|
+
|
|
51
|
+
# The float value
|
|
52
|
+
# """
|
|
53
|
+
# num_parameters = pts.shape[1]
|
|
54
|
+
# distances = np.empty(len(pts), dtype=float)
|
|
55
|
+
# for i in prange(len(pts)):
|
|
56
|
+
# distances[i] = np.linalg.norm(pt - pts[i])
|
|
57
|
+
# # distances = np.linalg.norm(pts-pt, axis=1)
|
|
58
|
+
# distances = np.exp(-distances/bandwidth)*weights / (bandwidth**num_parameters) # This last renormalization is not necessary
|
|
59
|
+
# return np.mean(distances)
|
|
60
|
+
|
|
61
|
+
# @njit(nogil=True, cache=True) # not sure if parallel here is worth it...
|
|
62
|
+
# def _pts_convolution_sparse_pts(pts:np.ndarray, weights:np.ndarray, pt_list:np.ndarray, bandwidth, kernel:int=0):
|
|
63
|
+
# """
|
|
64
|
+
# Evaluates the convolution of the signed measure (pts, weights) with a gaussian meaasure of bandwidth bandwidth, at points pt_list
|
|
65
|
+
|
|
66
|
+
# Parameters
|
|
67
|
+
# ----------
|
|
68
|
+
|
|
69
|
+
# - pts : (npts) x (num_parameters)
|
|
70
|
+
# - weight : (npts)
|
|
71
|
+
# - pt : (n)x(num_parameters)
|
|
72
|
+
# - bandwidth : real
|
|
73
|
+
|
|
74
|
+
# Outputs
|
|
75
|
+
# -------
|
|
76
|
+
|
|
77
|
+
# The values : (n)
|
|
78
|
+
# """
|
|
79
|
+
# if kernel == 0:
|
|
80
|
+
# return np.array([_pts_convolution_gaussian_pt(pts,weights,pt_list[i],bandwidth) for i in prange(pt_list.shape[0])])
|
|
81
|
+
# elif kernel == 1:
|
|
82
|
+
# return np.array([_pts_convolution_exponential_pt(pts,weights,pt_list[i],bandwidth) for i in prange(pt_list.shape[0])])
|
|
83
|
+
# else:
|
|
84
|
+
# raise Exception("Unsupported kernel")
|
|
85
|
+
|
|
86
|
+
def convolution_signed_measures(iterable_of_signed_measures, filtrations, bandwidth, flatten:bool=True, n_jobs:int=1, backend="pykeops", kernel="gaussian", **kwargs):
|
|
87
|
+
"""
|
|
88
|
+
Evaluates the convolution of the signed measures Iterable(pts, weights) with a gaussian measure of bandwidth bandwidth, on a grid given by the filtrations
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
|
|
93
|
+
- iterable_of_signed_measures : (num_signed_measure) x [ (npts) x (num_parameters), (npts)]
|
|
94
|
+
- filtrations : (num_parameter) x (filtration values)
|
|
95
|
+
- flatten : bool
|
|
96
|
+
- n_jobs : int
|
|
97
|
+
|
|
98
|
+
Outputs
|
|
99
|
+
-------
|
|
100
|
+
|
|
101
|
+
The concatenated images, for each signed measure (num_signed_measures) x (len(f) for f in filtration_values)
|
|
102
|
+
"""
|
|
103
|
+
grid_iterator = np.array(list(product(*filtrations)), dtype=float)
|
|
104
|
+
match backend:
|
|
105
|
+
case "sklearn":
|
|
106
|
+
def convolution_signed_measures_on_grid(signed_measures:Iterable[tuple[np.ndarray,np.ndarray]]):
|
|
107
|
+
return np.concatenate([
|
|
108
|
+
_pts_convolution_sparse_old(pts=pts,pts_weights=weights, grid_iterator = grid_iterator, bandwidth= bandwidth, kernel=kernel, **kwargs) for pts,weights in signed_measures
|
|
109
|
+
], axis=0)
|
|
110
|
+
# case "numba":
|
|
111
|
+
# kernel2int = {"gaussian":0, "exponential":1, "other":2}
|
|
112
|
+
# def convolution_signed_measures_on_grid(signed_measures:Iterable[tuple[np.ndarray,np.ndarray]]):
|
|
113
|
+
# return np.concatenate([
|
|
114
|
+
# _pts_convolution_sparse_pts(pts,weights, grid_iterator, bandwidth, kernel=kernel2int[kernel]) for pts,weights in signed_measures
|
|
115
|
+
# ], axis=0)
|
|
116
|
+
case "pykeops":
|
|
117
|
+
def convolution_signed_measures_on_grid(signed_measures:Iterable[tuple[np.ndarray,np.ndarray]]):
|
|
118
|
+
return np.concatenate([
|
|
119
|
+
_pts_convolution_pykeops(pts=pts,pts_weights=weights, grid_iterator = grid_iterator, bandwidth= bandwidth, kernel=kernel, **kwargs) for pts,weights in signed_measures
|
|
120
|
+
], axis=0)
|
|
121
|
+
## compiles first once
|
|
122
|
+
pts,weights = iterable_of_signed_measures[0][0]
|
|
123
|
+
small_pts,small_weights = pts[:2], weights[:2]
|
|
124
|
+
|
|
125
|
+
_pts_convolution_pykeops(small_pts,small_weights, grid_iterator = grid_iterator, bandwidth= bandwidth, kernel=kernel, **kwargs)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
if n_jobs>1 or n_jobs ==-1:
|
|
129
|
+
prefer = "processes" if backend=="sklearn" else "threads"
|
|
130
|
+
convolutions = Parallel(n_jobs=n_jobs, prefer=prefer)(delayed(convolution_signed_measures_on_grid)(sms) for sms in iterable_of_signed_measures)
|
|
131
|
+
else: convolutions = [convolution_signed_measures_on_grid(sms) for sms in iterable_of_signed_measures]
|
|
132
|
+
if not flatten:
|
|
133
|
+
out_shape = [-1] + [len(f) for f in filtrations] # Degree
|
|
134
|
+
convolutions = [x.reshape(out_shape) for x in convolutions]
|
|
135
|
+
return np.asarray(convolutions, dtype=float)
|
|
136
|
+
|
|
137
|
+
# def _test(r=1000, b=0.5, plot=True, kernel=0):
|
|
138
|
+
# import matplotlib.pyplot as plt
|
|
139
|
+
# pts, weigths = np.array([[1.,1.], [1.1,1.1]]), np.array([1,-1])
|
|
140
|
+
# pt_list = np.array(list(product(*[np.linspace(0,2,r)]*2)))
|
|
141
|
+
# img = _pts_convolution_sparse_pts(pts,weigths, pt_list,b,kernel=kernel)
|
|
142
|
+
# if plot:
|
|
143
|
+
# plt.imshow(img.reshape(r,-1).T, origin="lower")
|
|
144
|
+
# plt.show()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _pts_convolution_sparse_old(pts:np.ndarray, pts_weights:np.ndarray, grid_iterator, kernel="gaussian", bandwidth=0.1, **more_kde_args):
|
|
148
|
+
"""
|
|
149
|
+
Old version of `convolution_signed_measures`. Scikitlearn's convolution is slower than the code above.
|
|
150
|
+
"""
|
|
151
|
+
from sklearn.neighbors import KernelDensity
|
|
152
|
+
if len(pts) == 0:
|
|
153
|
+
# warn("Found a trivial signed measure !")
|
|
154
|
+
return np.zeros(len(grid_iterator))
|
|
155
|
+
kde = KernelDensity(kernel=kernel, bandwidth=bandwidth, rtol = 1e-4, **more_kde_args) # TODO : check rtol
|
|
156
|
+
pos_indices = pts_weights>0
|
|
157
|
+
neg_indices = pts_weights<0
|
|
158
|
+
img_pos = np.zeros(len(grid_iterator)) if pos_indices.sum() == 0 else kde.fit(
|
|
159
|
+
pts[pos_indices], sample_weight=pts_weights[pos_indices]
|
|
160
|
+
).score_samples(grid_iterator)
|
|
161
|
+
img_neg = np.zeros(len(grid_iterator)) if neg_indices.sum() == 0 else kde.fit(pts[neg_indices], sample_weight=-pts_weights[neg_indices]).score_samples(grid_iterator)
|
|
162
|
+
return np.exp(img_pos) - np.exp(img_neg)
|
|
163
|
+
|
|
164
|
+
def _pts_convolution_pykeops(pts:np.ndarray, pts_weights:np.ndarray, grid_iterator, kernel="gaussian", bandwidth=0.1, **more_kde_args):
|
|
165
|
+
"""
|
|
166
|
+
Pykeops convolution
|
|
167
|
+
"""
|
|
168
|
+
kde = KDE(kernel=kernel, bandwidth=bandwidth,return_log=False, **more_kde_args)
|
|
169
|
+
return kde.fit(pts, sample_weights=np.asarray(pts_weights, dtype=pts.dtype)).score_samples(grid_iterator)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
import numpy as np
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
## TODO : multiple bandwidths at once with lazy tensors
|
|
176
|
+
class KDE():
|
|
177
|
+
"""
|
|
178
|
+
Fast, scikit-style, and differentiable kernel density estimation, using PyKeops.
|
|
179
|
+
"""
|
|
180
|
+
def __init__(self, bandwidth=1, kernel="gaussian", return_log=True):
|
|
181
|
+
"""
|
|
182
|
+
bandwidth : numeric
|
|
183
|
+
bandwidth for Gaussian kernel
|
|
184
|
+
"""
|
|
185
|
+
self.X = None
|
|
186
|
+
self.bandwidth = bandwidth
|
|
187
|
+
self.kernel = kernel
|
|
188
|
+
self._kernel=None
|
|
189
|
+
self._backend=None
|
|
190
|
+
self._sample_weights=None
|
|
191
|
+
self.return_log=return_log
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def fit(self, X, sample_weights=None, y=None):
|
|
195
|
+
self.X = X
|
|
196
|
+
self._sample_weights = sample_weights
|
|
197
|
+
if isinstance(X, np.ndarray):
|
|
198
|
+
|
|
199
|
+
self._backend = np
|
|
200
|
+
else:
|
|
201
|
+
import torch
|
|
202
|
+
if isinstance(X, torch.Tensor):
|
|
203
|
+
self._backend = torch
|
|
204
|
+
else:
|
|
205
|
+
raise Exception('Unsupported backend.')
|
|
206
|
+
match self.kernel:
|
|
207
|
+
case "gaussian":
|
|
208
|
+
self._kernel = self.gaussian_kernel
|
|
209
|
+
case "exponential":
|
|
210
|
+
self._kernel = self.exponential_kernel
|
|
211
|
+
case _:
|
|
212
|
+
assert callable(self.kernel), f"--------------------------\nUnknown kernel {self.kernel}.\n--------------------------\n Custom kernel has to be callable, (x:LazyTensor(n,1,D),y:LazyTensor(1,m,D),bandwidth:float) ---> kernel matrix"
|
|
213
|
+
self._kernel = self.kernel
|
|
214
|
+
return self
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def gaussian_kernel(x_i,y_j, bandwidth):
|
|
218
|
+
exponent = -( ( (x_i - y_j) / bandwidth)**2 ).sum(dim=2) /2
|
|
219
|
+
kernel = (exponent).exp()/bandwidth
|
|
220
|
+
return kernel
|
|
221
|
+
@staticmethod
|
|
222
|
+
def exponential_kernel(x_i,y_j, bandwidth):
|
|
223
|
+
exponent = - (((((x_i - y_j)**2).sum())**1/2)/bandwidth).sum(dim=2)
|
|
224
|
+
kernel = (exponent).exp()/bandwidth
|
|
225
|
+
return kernel
|
|
226
|
+
@staticmethod
|
|
227
|
+
def to_lazy(X,Y, x_weights):
|
|
228
|
+
if isinstance(X, np.ndarray):
|
|
229
|
+
from pykeops.numpy import LazyTensor
|
|
230
|
+
lazy_x = LazyTensor(X.reshape((X.shape[0], 1, X.shape[1])))
|
|
231
|
+
lazy_y = LazyTensor(Y.reshape((1,Y.shape[0], Y.shape[1])))
|
|
232
|
+
if x_weights is not None :
|
|
233
|
+
w = LazyTensor(x_weights[:,None], axis=0)
|
|
234
|
+
return lazy_x, lazy_y, w
|
|
235
|
+
return lazy_x, lazy_y,None
|
|
236
|
+
import torch
|
|
237
|
+
if isinstance(X, torch.Tensor):
|
|
238
|
+
from pykeops.torch import LazyTensor
|
|
239
|
+
lazy_x = LazyTensor(X.view(X.shape[0], 1, X.shape[1]))
|
|
240
|
+
lazy_y = LazyTensor(Y.view(1,Y.shape[0], Y.shape[1]))
|
|
241
|
+
if x_weights is not None :
|
|
242
|
+
w = LazyTensor(x_weights[:,None], axis=0)
|
|
243
|
+
return lazy_x, lazy_y, w
|
|
244
|
+
return lazy_x, lazy_y,None
|
|
245
|
+
raise Exception('Bad tensor type.')
|
|
246
|
+
|
|
247
|
+
def score_samples(self, Y, X=None, return_kernel=False):
|
|
248
|
+
"""Returns the kernel density estimates of each point in `Y`.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
Y : tensor (m, d)
|
|
253
|
+
`m` points with `d` dimensions for which the probability density will
|
|
254
|
+
be calculated
|
|
255
|
+
X : tensor (n, d), optional
|
|
256
|
+
`n` points with `d` dimensions to which KDE will be fit. Provided to
|
|
257
|
+
allow batch calculations in `log_prob`. By default, `X` is None and
|
|
258
|
+
all points used to initialize KernelDensityEstimator are included.
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
log_probs : tensor (m)
|
|
264
|
+
log probability densities for each of the queried points in `Y`
|
|
265
|
+
"""
|
|
266
|
+
X = self.X if X is None else X
|
|
267
|
+
assert Y.shape[1] == X.shape[1] and X.ndim == Y.ndim == 2
|
|
268
|
+
lazy_x,lazy_y,w = self.to_lazy(X,Y, x_weights= self._sample_weights)
|
|
269
|
+
kernel = self._kernel(lazy_x,lazy_y, self.bandwidth)
|
|
270
|
+
if w is not None: kernel *= w
|
|
271
|
+
if return_kernel: return kernel
|
|
272
|
+
density_estimation = kernel.sum(dim=0).flatten()
|
|
273
|
+
return self._backend.log(density_estimation) if self.return_log else density_estimation
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class DTM():
|
|
277
|
+
|
|
278
|
+
"""
|
|
279
|
+
Fast, scikit-style, and differentiable DTM density estimation, using PyKeops.
|
|
280
|
+
Tuned version of KNN from
|
|
281
|
+
"""
|
|
282
|
+
def __init__(self, masses=[.1], metric:str="euclidean", **_kdtree_kwargs):
|
|
283
|
+
"""
|
|
284
|
+
mass : float in [0,1]
|
|
285
|
+
The mass threshold
|
|
286
|
+
metric :
|
|
287
|
+
The distance between points to consider
|
|
288
|
+
"""
|
|
289
|
+
self.masses = masses
|
|
290
|
+
self.metric = metric
|
|
291
|
+
self._kdtree_kwargs=_kdtree_kwargs
|
|
292
|
+
self._ks =None
|
|
293
|
+
self._kdtree=None
|
|
294
|
+
self._X=None
|
|
295
|
+
self._backend=None
|
|
296
|
+
|
|
297
|
+
def fit(self, X, sample_weights=None, y=None):
|
|
298
|
+
if len(self.masses) == 0: return self
|
|
299
|
+
assert np.max(self.masses) <= 1, "All masses should be in (0,1]."
|
|
300
|
+
from sklearn.neighbors import KDTree
|
|
301
|
+
if not isinstance(X, np.ndarray):
|
|
302
|
+
import torch
|
|
303
|
+
assert isinstance(X, torch.Tensor), "Backend has to be numpy of torch"
|
|
304
|
+
_X = X.detach()
|
|
305
|
+
self._backend = 'torch'
|
|
306
|
+
else:
|
|
307
|
+
_X = X
|
|
308
|
+
self._backend = 'numpy'
|
|
309
|
+
self._ks = np.array([int(mass*X.shape[0])+1 for mass in self.masses])
|
|
310
|
+
self._kdtree=KDTree(_X, metric=self.metric, **self._kdtree_kwargs)
|
|
311
|
+
self._X = X
|
|
312
|
+
return self
|
|
313
|
+
|
|
314
|
+
def score_samples(self, Y, X=None):
|
|
315
|
+
"""Returns the kernel density estimates of each point in `Y`.
|
|
316
|
+
|
|
317
|
+
Parameters
|
|
318
|
+
----------
|
|
319
|
+
Y : tensor (m, d)
|
|
320
|
+
`m` points with `d` dimensions for which the probability density will
|
|
321
|
+
be calculated
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
Returns
|
|
325
|
+
-------
|
|
326
|
+
the DTMs of Y, for each mass in masses.
|
|
327
|
+
"""
|
|
328
|
+
if len(self.masses) == 0: return np.empty((0,len(Y)))
|
|
329
|
+
assert Y.ndim == 2
|
|
330
|
+
if self._backend == 'torch':
|
|
331
|
+
_Y = Y.detach().numpy()
|
|
332
|
+
else:
|
|
333
|
+
_Y=Y
|
|
334
|
+
NN_Dist, NN = self._kdtree.query(_Y,self._ks.max(), return_distance=True)
|
|
335
|
+
DTMs = np.array([((NN_Dist**2)[:,:k].mean(1))**.5 for k in self._ks])
|
|
336
|
+
return DTMs
|
|
337
|
+
def score_samples_diff(self, Y):
|
|
338
|
+
"""Returns the kernel density estimates of each point in `Y`.
|
|
339
|
+
|
|
340
|
+
Parameters
|
|
341
|
+
----------
|
|
342
|
+
Y : tensor (m, d)
|
|
343
|
+
`m` points with `d` dimensions for which the probability density will
|
|
344
|
+
be calculated
|
|
345
|
+
X : tensor (n, d), optional
|
|
346
|
+
`n` points with `d` dimensions to which KDE will be fit. Provided to
|
|
347
|
+
allow batch calculations in `log_prob`. By default, `X` is None and
|
|
348
|
+
all points used to initialize KernelDensityEstimator are included.
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
Returns
|
|
352
|
+
-------
|
|
353
|
+
log_probs : tensor (m)
|
|
354
|
+
log probability densities for each of the queried points in `Y`
|
|
355
|
+
"""
|
|
356
|
+
import torch
|
|
357
|
+
assert Y.ndim == 2
|
|
358
|
+
assert self._backend == 'torch', 'Use the non-diff version with numpy.'
|
|
359
|
+
if len(self.masses) == 0: return torch.empty(0,len(Y))
|
|
360
|
+
NN = self._kdtree.query(Y.detach(),self._ks.max(), return_distance=False)
|
|
361
|
+
DTMs = tuple((((self._X[NN] - Y[:,None,:])**2)[:,:k].sum(dim=(1,2)) / k)**.5 for k in self._ks) # TODO : kdtree already computes distance, find implementation of kdtree that is pytorch differentiable
|
|
362
|
+
return DTMs
|
|
363
|
+
|
|
364
|
+
# def _pts_convolution_sparse(pts:np.ndarray, pts_weights:np.ndarray, filtration_grid:Iterable[np.ndarray], kernel="gaussian", bandwidth=0.1, **more_kde_args):
|
|
365
|
+
# """
|
|
366
|
+
# Old version of `convolution_signed_measures`. Scikitlearn's convolution is slower than the code above.
|
|
367
|
+
# """
|
|
368
|
+
# from sklearn.neighbors import KernelDensity
|
|
369
|
+
# grid_iterator = np.asarray(list(product(*filtration_grid)))
|
|
370
|
+
# grid_shape = [len(f) for f in filtration_grid]
|
|
371
|
+
# if len(pts) == 0:
|
|
372
|
+
# # warn("Found a trivial signed measure !")
|
|
373
|
+
# return np.zeros(shape=grid_shape)
|
|
374
|
+
# kde = KernelDensity(kernel=kernel, bandwidth=bandwidth, rtol = 1e-4, **more_kde_args) # TODO : check rtol
|
|
375
|
+
|
|
376
|
+
# pos_indices = pts_weights>0
|
|
377
|
+
# neg_indices = pts_weights<0
|
|
378
|
+
# img_pos = kde.fit(pts[pos_indices], sample_weight=pts_weights[pos_indices]).score_samples(grid_iterator).reshape(grid_shape)
|
|
379
|
+
# img_neg = kde.fit(pts[neg_indices], sample_weight=-pts_weights[neg_indices]).score_samples(grid_iterator).reshape(grid_shape)
|
|
380
|
+
# return np.exp(img_pos) - np.exp(img_neg)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
### Precompiles the convolution
|
|
384
|
+
# _test(r=2,b=.5, plot=False)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import persistable
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# requires installing ripser (pip install ripser) as well as persistable from the higher-homology branch,
|
|
5
|
+
# which can be done as follows:
|
|
6
|
+
# pip install git+https://github.com/LuisScoccola/persistable.git@higher-homology
|
|
7
|
+
# NOTE: only accepts as input a distance matrix
|
|
8
|
+
def hf_degree_rips(
|
|
9
|
+
distance_matrix,
|
|
10
|
+
min_rips_value,
|
|
11
|
+
max_rips_value,
|
|
12
|
+
max_normalized_degree,
|
|
13
|
+
min_normalized_degree,
|
|
14
|
+
grid_granularity,
|
|
15
|
+
max_homological_dimension,
|
|
16
|
+
subsample_size = None,
|
|
17
|
+
):
|
|
18
|
+
if subsample_size == None:
|
|
19
|
+
p = persistable.Persistable(distance_matrix, metric="precomputed")
|
|
20
|
+
else:
|
|
21
|
+
p = persistable.Persistable(distance_matrix, metric="precomputed", subsample=subsample_size)
|
|
22
|
+
|
|
23
|
+
rips_values, normalized_degree_values, hilbert_functions, minimal_hilbert_decompositions = p._hilbert_function(
|
|
24
|
+
min_rips_value,
|
|
25
|
+
max_rips_value,
|
|
26
|
+
max_normalized_degree,
|
|
27
|
+
min_normalized_degree,
|
|
28
|
+
grid_granularity,
|
|
29
|
+
homological_dimension=max_homological_dimension,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
return rips_values, normalized_degree_values, hilbert_functions, minimal_hilbert_decompositions
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def hf_h0_degree_rips(
|
|
37
|
+
point_cloud,
|
|
38
|
+
min_rips_value,
|
|
39
|
+
max_rips_value,
|
|
40
|
+
max_normalized_degree,
|
|
41
|
+
min_normalized_degree,
|
|
42
|
+
grid_granularity,
|
|
43
|
+
):
|
|
44
|
+
p = persistable.Persistable(point_cloud, n_neighbors="all")
|
|
45
|
+
|
|
46
|
+
rips_values, normalized_degree_values, hilbert_functions, minimal_hilbert_decompositions = p._hilbert_function(
|
|
47
|
+
min_rips_value,
|
|
48
|
+
max_rips_value,
|
|
49
|
+
max_normalized_degree,
|
|
50
|
+
min_normalized_degree,
|
|
51
|
+
grid_granularity,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return rips_values, normalized_degree_values, hilbert_functions[0], minimal_hilbert_decompositions[0]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def ri_h0_degree_rips(
|
|
58
|
+
point_cloud,
|
|
59
|
+
min_rips_value,
|
|
60
|
+
max_rips_value,
|
|
61
|
+
max_normalized_degree,
|
|
62
|
+
min_normalized_degree,
|
|
63
|
+
grid_granularity,
|
|
64
|
+
):
|
|
65
|
+
p = persistable.Persistable(point_cloud, n_neighbors="all")
|
|
66
|
+
|
|
67
|
+
rips_values, normalized_degree_values, rank_invariant, _, _ = p._rank_invariant(
|
|
68
|
+
min_rips_value,
|
|
69
|
+
max_rips_value,
|
|
70
|
+
max_normalized_degree,
|
|
71
|
+
min_normalized_degree,
|
|
72
|
+
grid_granularity,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return rips_values, normalized_degree_values, rank_invariant
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|