multipers 1.1.3__cp310-cp310-macosx_11_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of multipers might be problematic. Click here for more details.
- multipers/.dylibs/libtbb.12.12.dylib +0 -0
- multipers/.dylibs/libtbbmalloc.2.12.dylib +0 -0
- multipers/__init__.py +5 -0
- multipers/_old_rank_invariant.pyx +328 -0
- multipers/_signed_measure_meta.py +193 -0
- multipers/data/MOL2.py +350 -0
- multipers/data/UCR.py +18 -0
- multipers/data/__init__.py +1 -0
- multipers/data/graphs.py +466 -0
- multipers/data/immuno_regions.py +27 -0
- multipers/data/minimal_presentation_to_st_bf.py +0 -0
- multipers/data/pytorch2simplextree.py +91 -0
- multipers/data/shape3d.py +101 -0
- multipers/data/synthetic.py +68 -0
- multipers/distances.py +172 -0
- multipers/euler_characteristic.cpython-310-darwin.so +0 -0
- multipers/euler_characteristic.pyx +137 -0
- multipers/function_rips.cpython-310-darwin.so +0 -0
- multipers/function_rips.pyx +102 -0
- multipers/hilbert_function.cpython-310-darwin.so +0 -0
- multipers/hilbert_function.pyi +46 -0
- multipers/hilbert_function.pyx +151 -0
- multipers/io.cpython-310-darwin.so +0 -0
- multipers/io.pyx +176 -0
- multipers/ml/__init__.py +0 -0
- multipers/ml/accuracies.py +61 -0
- multipers/ml/convolutions.py +510 -0
- multipers/ml/invariants_with_persistable.py +79 -0
- multipers/ml/kernels.py +128 -0
- multipers/ml/mma.py +657 -0
- multipers/ml/one.py +472 -0
- multipers/ml/point_clouds.py +191 -0
- multipers/ml/signed_betti.py +50 -0
- multipers/ml/signed_measures.py +1479 -0
- multipers/ml/sliced_wasserstein.py +313 -0
- multipers/ml/tools.py +116 -0
- multipers/mma_structures.cpython-310-darwin.so +0 -0
- multipers/mma_structures.pxd +155 -0
- multipers/mma_structures.pyx +651 -0
- multipers/multiparameter_edge_collapse.py +29 -0
- multipers/multiparameter_module_approximation.cpython-310-darwin.so +0 -0
- multipers/multiparameter_module_approximation.pyi +439 -0
- multipers/multiparameter_module_approximation.pyx +311 -0
- multipers/pickle.py +53 -0
- multipers/plots.py +292 -0
- multipers/point_measure_integration.cpython-310-darwin.so +0 -0
- multipers/point_measure_integration.pyx +59 -0
- multipers/rank_invariant.cpython-310-darwin.so +0 -0
- multipers/rank_invariant.pyx +154 -0
- multipers/simplex_tree_multi.cpython-310-darwin.so +0 -0
- multipers/simplex_tree_multi.pxd +121 -0
- multipers/simplex_tree_multi.pyi +715 -0
- multipers/simplex_tree_multi.pyx +1417 -0
- multipers/slicer.cpython-310-darwin.so +0 -0
- multipers/slicer.pxd +94 -0
- multipers/slicer.pyx +276 -0
- multipers/tensor.pxd +13 -0
- multipers/test.pyx +44 -0
- multipers-1.1.3.dist-info/LICENSE +21 -0
- multipers-1.1.3.dist-info/METADATA +22 -0
- multipers-1.1.3.dist-info/RECORD +63 -0
- multipers-1.1.3.dist-info/WHEEL +5 -0
- multipers-1.1.3.dist-info/top_level.txt +1 -0
multipers/io.pyx
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
from multipers.simplex_tree_multi import SimplexTreeMulti
|
|
2
|
+
from gudhi import SimplexTree
|
|
3
|
+
import gudhi as gd
|
|
4
|
+
import numpy as np
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
mpfree_path = None
|
|
8
|
+
mpfree_in_path = "multipers_mpfree_input.scc"
|
|
9
|
+
mpfree_out_path = "multipers_mpfree_output.scc"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def scc_parser(path: str):
|
|
13
|
+
with open(path, "r") as f:
|
|
14
|
+
lines = f.readlines()
|
|
15
|
+
# Find scc2020
|
|
16
|
+
while lines[0].strip() != "scc2020":
|
|
17
|
+
lines = lines[1:]
|
|
18
|
+
lines = lines[1:]
|
|
19
|
+
# stripped scc2020 we can start
|
|
20
|
+
|
|
21
|
+
def pass_line(line):
|
|
22
|
+
return len(line) == 0 or line[0] == "#"
|
|
23
|
+
|
|
24
|
+
for i, line in enumerate(lines):
|
|
25
|
+
line = line.strip()
|
|
26
|
+
if pass_line(line):
|
|
27
|
+
continue
|
|
28
|
+
num_parameters = int(line)
|
|
29
|
+
lines = lines[i + 1 :]
|
|
30
|
+
break
|
|
31
|
+
|
|
32
|
+
block_sizes = []
|
|
33
|
+
|
|
34
|
+
for i, line in enumerate(lines):
|
|
35
|
+
line = line.strip()
|
|
36
|
+
if pass_line(line):
|
|
37
|
+
continue
|
|
38
|
+
block_sizes = [int(i) for i in line.split(" ")]
|
|
39
|
+
lines = lines[i + 1 :]
|
|
40
|
+
break
|
|
41
|
+
blocks = []
|
|
42
|
+
for block_size in block_sizes:
|
|
43
|
+
counter = block_size
|
|
44
|
+
block_filtrations = []
|
|
45
|
+
block_boundaries = []
|
|
46
|
+
for i, line in enumerate(lines):
|
|
47
|
+
if counter == 0:
|
|
48
|
+
lines = lines[i:]
|
|
49
|
+
break
|
|
50
|
+
line = line.strip()
|
|
51
|
+
if pass_line(line):
|
|
52
|
+
continue
|
|
53
|
+
filtration, boundary = line.split(";")
|
|
54
|
+
block_filtrations.append(
|
|
55
|
+
[float(x) for x in filtration.split(" ") if len(x) > 0]
|
|
56
|
+
)
|
|
57
|
+
block_boundaries.append([int(x) for x in boundary.split(" ") if len(x) > 0])
|
|
58
|
+
counter -= 1
|
|
59
|
+
blocks.append((block_filtrations, block_boundaries))
|
|
60
|
+
|
|
61
|
+
return blocks
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _init_mpfree():
|
|
65
|
+
from shutil import which
|
|
66
|
+
|
|
67
|
+
global mpfree_path, mpfree_in_path, mpfree_out_path
|
|
68
|
+
if mpfree_path is None:
|
|
69
|
+
a = which("./mpfree")
|
|
70
|
+
b = which("mpfree")
|
|
71
|
+
if a:
|
|
72
|
+
mpfree_path = a
|
|
73
|
+
elif b:
|
|
74
|
+
mpfree_path = b
|
|
75
|
+
else:
|
|
76
|
+
return
|
|
77
|
+
if not mpfree_path:
|
|
78
|
+
raise Exception(
|
|
79
|
+
"mpfree not found. Install it from https://bitbucket.org/mkerber/mpfree/, or use `mpfree_path`"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
shm_memory = "/dev/shm/" # on unix, we can write in RAM instead of disk.
|
|
83
|
+
if os.access(shm_memory, os.W_OK):
|
|
84
|
+
mpfree_in_path = shm_memory + mpfree_in_path
|
|
85
|
+
mpfree_out_path = shm_memory + mpfree_out_path
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def minimal_presentation_from_mpfree(
|
|
89
|
+
simplextree: SimplexTreeMulti,
|
|
90
|
+
full_resolution: bool = True,
|
|
91
|
+
dimension: int | np.int64 = 1,
|
|
92
|
+
clear: bool = True,
|
|
93
|
+
id: str = "", # For parallel stuff
|
|
94
|
+
):
|
|
95
|
+
global mpfree_path, mpfree_in_path, mpfree_out_path
|
|
96
|
+
if not mpfree_path:
|
|
97
|
+
_init_mpfree()
|
|
98
|
+
|
|
99
|
+
simplextree.to_scc(
|
|
100
|
+
path=mpfree_in_path + id,
|
|
101
|
+
rivet_compatible=False,
|
|
102
|
+
strip_comments=False,
|
|
103
|
+
ignore_last_generators=False,
|
|
104
|
+
overwrite=True,
|
|
105
|
+
reverse_block=True,
|
|
106
|
+
)
|
|
107
|
+
resolution_str = "--resolution" if full_resolution else ""
|
|
108
|
+
if os.path.exists(mpfree_out_path + id):
|
|
109
|
+
os.remove(mpfree_out_path + id)
|
|
110
|
+
os.system(
|
|
111
|
+
f"{mpfree_path} {resolution_str} --dim={dimension} \
|
|
112
|
+
{mpfree_in_path+id} {mpfree_out_path+id} >/dev/null 2>&1"
|
|
113
|
+
)
|
|
114
|
+
blocks = scc_parser(mpfree_out_path + id)
|
|
115
|
+
if clear:
|
|
116
|
+
clear_io(mpfree_in_path + id, mpfree_out_path + id)
|
|
117
|
+
return blocks
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def clear_io(*args):
|
|
121
|
+
global mpfree_in_path, mpfree_out_path
|
|
122
|
+
for x in [mpfree_in_path, mpfree_out_path] + list(args):
|
|
123
|
+
if os.path.exists(x):
|
|
124
|
+
os.remove(x)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
from multipers.mma_structures cimport Finitely_critical_multi_filtration,uintptr_t,boundary_matrix,float,pair,vector,intptr_t
|
|
131
|
+
cdef extern from "multiparameter_module_approximation/format_python-cpp.h" namespace "Gudhi::multiparameter::mma":
|
|
132
|
+
pair[boundary_matrix, vector[Finitely_critical_multi_filtration]] simplextree_to_boundary_filtration(uintptr_t)
|
|
133
|
+
vector[pair[boundary_matrix, vector[vector[float]]]] simplextree_to_scc(uintptr_t)
|
|
134
|
+
|
|
135
|
+
def simplex_tree2boundary_filtrations(simplextree:SimplexTreeMulti | SimplexTree):
|
|
136
|
+
"""Computes a (sparse) boundary matrix, with associated filtration. Can be used as an input of approx afterwards.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
simplextree: Gudhi or mma simplextree
|
|
141
|
+
The simplextree defining the filtration to convert to boundary-filtration.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
B:List of lists of ints
|
|
146
|
+
The boundary matrix.
|
|
147
|
+
F: List of 1D filtration
|
|
148
|
+
The filtrations aligned with B; the i-th simplex of this simplextree has boundary B[i] and filtration(s) F[i].
|
|
149
|
+
|
|
150
|
+
"""
|
|
151
|
+
cdef intptr_t cptr
|
|
152
|
+
if isinstance(simplextree, SimplexTreeMulti):
|
|
153
|
+
cptr = simplextree.thisptr
|
|
154
|
+
elif isinstance(simplextree, SimplexTree):
|
|
155
|
+
temp_st = gd.SimplexTreeMulti(simplextree, parameters=1)
|
|
156
|
+
cptr = temp_st.thisptr
|
|
157
|
+
else:
|
|
158
|
+
raise TypeError("Has to be a simplextree")
|
|
159
|
+
cdef pair[boundary_matrix, vector[Finitely_critical_multi_filtration]] cboundary_filtration = simplextree_to_boundary_filtration(cptr)
|
|
160
|
+
boundary = cboundary_filtration.first
|
|
161
|
+
multi_filtrations = np.array(Finitely_critical_multi_filtration.to_python(cboundary_filtration.second))
|
|
162
|
+
return boundary, multi_filtrations
|
|
163
|
+
|
|
164
|
+
def simplextree2scc(simplextree:SimplexTreeMulti | SimplexTree):
|
|
165
|
+
cdef intptr_t cptr
|
|
166
|
+
if isinstance(simplextree, SimplexTreeMulti):
|
|
167
|
+
cptr = simplextree.thisptr
|
|
168
|
+
elif isinstance(simplextree, SimplexTree):
|
|
169
|
+
temp_st = gd.SimplexTreeMulti(simplextree, parameters=1)
|
|
170
|
+
cptr = temp_st.thisptr
|
|
171
|
+
else:
|
|
172
|
+
raise TypeError("Has to be a simplextree")
|
|
173
|
+
|
|
174
|
+
return simplextree_to_scc(cptr)
|
|
175
|
+
|
|
176
|
+
|
multipers/ml/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from warnings import warn
|
|
3
|
+
import numpy as np
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
from os.path import exists
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def accuracy_to_csv(X,Y,cl, k:float=10, dataset:str = "", shuffle=True, verbose:bool=True, **more_columns):
|
|
9
|
+
assert k > 0, "k is either the number of kfold > 1 or the test size > 0."
|
|
10
|
+
if k>1:
|
|
11
|
+
k = int(k)
|
|
12
|
+
from sklearn.model_selection import StratifiedKFold as KFold
|
|
13
|
+
kfold = KFold(k, shuffle=shuffle).split(X,Y)
|
|
14
|
+
accuracies = np.zeros(k)
|
|
15
|
+
for i,(train_idx, test_idx) in enumerate(tqdm(kfold, total=k, desc="Computing kfold")):
|
|
16
|
+
xtrain = [X[i] for i in train_idx]
|
|
17
|
+
ytrain = [Y[i] for i in train_idx]
|
|
18
|
+
cl.fit(xtrain, ytrain)
|
|
19
|
+
xtest = [X[i] for i in test_idx]
|
|
20
|
+
ytest = [Y[i] for i in test_idx]
|
|
21
|
+
accuracies[i] = cl.score(xtest, ytest)
|
|
22
|
+
if verbose:
|
|
23
|
+
print(f"step {i+1}, {dataset} : {accuracies[i]}", flush=True)
|
|
24
|
+
try:
|
|
25
|
+
print("Best classification parameters : ", cl.best_params_)
|
|
26
|
+
except:
|
|
27
|
+
None
|
|
28
|
+
|
|
29
|
+
print(f"Accuracy {dataset} : {np.mean(accuracies).round(decimals=3)}±{np.std(accuracies).round(decimals=3)} ")
|
|
30
|
+
elif k > 0:
|
|
31
|
+
from sklearn.model_selection import train_test_split
|
|
32
|
+
print("Computing accuracy, with train test split", flush=True)
|
|
33
|
+
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, shuffle=shuffle, test_size=k)
|
|
34
|
+
print("Fitting...", end="", flush=True)
|
|
35
|
+
cl.fit(xtrain, ytrain)
|
|
36
|
+
print("Computing score...", end="", flush=True)
|
|
37
|
+
accuracies = cl.score(xtest, ytest)
|
|
38
|
+
try:
|
|
39
|
+
print("Best classification parameters : ", cl.best_params_)
|
|
40
|
+
except:
|
|
41
|
+
None
|
|
42
|
+
print("Done.")
|
|
43
|
+
if verbose: print(f"Accuracy {dataset} : {accuracies} ")
|
|
44
|
+
file_path:str = f"result_{dataset}.csv".replace("/", "_").replace(".off", "")
|
|
45
|
+
columns:list[str] = ["dataset", "cv", "mean", "std"]
|
|
46
|
+
if exists(file_path):
|
|
47
|
+
df:pd.DataFrame = pd.read_csv(file_path)
|
|
48
|
+
else:
|
|
49
|
+
df:pd.DataFrame = pd.DataFrame(columns= columns)
|
|
50
|
+
more_names = []
|
|
51
|
+
more_values = []
|
|
52
|
+
for key, value in more_columns.items():
|
|
53
|
+
if key not in columns:
|
|
54
|
+
more_names.append(key)
|
|
55
|
+
more_values.append(value)
|
|
56
|
+
else:
|
|
57
|
+
warn(f"Duplicate key {key} ! with value {value}")
|
|
58
|
+
new_line:pd.DataFrame = pd.DataFrame([[dataset, k, np.mean(accuracies).round(decimals=3), np.std(accuracies).round(decimals=3)]+more_values], columns = columns+more_names)
|
|
59
|
+
print(new_line)
|
|
60
|
+
df = pd.concat([df, new_line])
|
|
61
|
+
df.to_csv(file_path, index=False)
|