multipers 1.2.2__cp310-cp310-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of multipers might be problematic. Click here for more details.

Files changed (78) hide show
  1. multipers/.dylibs/libc++.1.0.dylib +0 -0
  2. multipers/.dylibs/libtbb.12.12.dylib +0 -0
  3. multipers/.dylibs/libtbbmalloc.2.12.dylib +0 -0
  4. multipers/__init__.py +11 -0
  5. multipers/_signed_measure_meta.py +268 -0
  6. multipers/_slicer_meta.py +171 -0
  7. multipers/data/MOL2.py +350 -0
  8. multipers/data/UCR.py +18 -0
  9. multipers/data/__init__.py +1 -0
  10. multipers/data/graphs.py +466 -0
  11. multipers/data/immuno_regions.py +27 -0
  12. multipers/data/minimal_presentation_to_st_bf.py +0 -0
  13. multipers/data/pytorch2simplextree.py +91 -0
  14. multipers/data/shape3d.py +101 -0
  15. multipers/data/synthetic.py +68 -0
  16. multipers/distances.py +198 -0
  17. multipers/euler_characteristic.pyx +132 -0
  18. multipers/filtration_conversions.pxd +229 -0
  19. multipers/filtrations.pxd +225 -0
  20. multipers/function_rips.cpython-310-darwin.so +0 -0
  21. multipers/function_rips.pyx +105 -0
  22. multipers/grids.cpython-310-darwin.so +0 -0
  23. multipers/grids.pyx +281 -0
  24. multipers/hilbert_function.pyi +46 -0
  25. multipers/hilbert_function.pyx +153 -0
  26. multipers/io.cpython-310-darwin.so +0 -0
  27. multipers/io.pyx +571 -0
  28. multipers/ml/__init__.py +0 -0
  29. multipers/ml/accuracies.py +90 -0
  30. multipers/ml/convolutions.py +532 -0
  31. multipers/ml/invariants_with_persistable.py +79 -0
  32. multipers/ml/kernels.py +176 -0
  33. multipers/ml/mma.py +659 -0
  34. multipers/ml/one.py +472 -0
  35. multipers/ml/point_clouds.py +238 -0
  36. multipers/ml/signed_betti.py +50 -0
  37. multipers/ml/signed_measures.py +1542 -0
  38. multipers/ml/sliced_wasserstein.py +461 -0
  39. multipers/ml/tools.py +113 -0
  40. multipers/mma_structures.cpython-310-darwin.so +0 -0
  41. multipers/mma_structures.pxd +127 -0
  42. multipers/mma_structures.pyx +2433 -0
  43. multipers/multiparameter_edge_collapse.py +41 -0
  44. multipers/multiparameter_module_approximation.cpython-310-darwin.so +0 -0
  45. multipers/multiparameter_module_approximation.pyx +211 -0
  46. multipers/pickle.py +53 -0
  47. multipers/plots.py +326 -0
  48. multipers/point_measure_integration.cpython-310-darwin.so +0 -0
  49. multipers/point_measure_integration.pyx +139 -0
  50. multipers/rank_invariant.cpython-310-darwin.so +0 -0
  51. multipers/rank_invariant.pyx +229 -0
  52. multipers/simplex_tree_multi.cpython-310-darwin.so +0 -0
  53. multipers/simplex_tree_multi.pxd +129 -0
  54. multipers/simplex_tree_multi.pyi +715 -0
  55. multipers/simplex_tree_multi.pyx +4655 -0
  56. multipers/slicer.cpython-310-darwin.so +0 -0
  57. multipers/slicer.pxd +781 -0
  58. multipers/slicer.pyx +3393 -0
  59. multipers/tensor.pxd +13 -0
  60. multipers/test.pyx +44 -0
  61. multipers/tests/__init__.py +40 -0
  62. multipers/tests/old_test_rank_invariant.py +91 -0
  63. multipers/tests/test_diff_helper.py +74 -0
  64. multipers/tests/test_hilbert_function.py +82 -0
  65. multipers/tests/test_mma.py +51 -0
  66. multipers/tests/test_point_clouds.py +59 -0
  67. multipers/tests/test_python-cpp_conversion.py +82 -0
  68. multipers/tests/test_signed_betti.py +181 -0
  69. multipers/tests/test_simplextreemulti.py +98 -0
  70. multipers/tests/test_slicer.py +63 -0
  71. multipers/torch/__init__.py +1 -0
  72. multipers/torch/diff_grids.py +217 -0
  73. multipers/torch/rips_density.py +257 -0
  74. multipers-1.2.2.dist-info/LICENSE +21 -0
  75. multipers-1.2.2.dist-info/METADATA +28 -0
  76. multipers-1.2.2.dist-info/RECORD +78 -0
  77. multipers-1.2.2.dist-info/WHEEL +5 -0
  78. multipers-1.2.2.dist-info/top_level.txt +1 -0
multipers/io.pyx ADDED
@@ -0,0 +1,571 @@
1
+ import re
2
+ from gudhi import SimplexTree
3
+ import multipers.slicer as mps
4
+ import gudhi as gd
5
+ import numpy as np
6
+ import os
7
+ from shutil import which
8
+ from libcpp cimport bool
9
+ from typing import Optional, Literal
10
+ from collections import defaultdict
11
+ import itertools
12
+ import threading
13
+
14
+ # from multipers.filtration_conversions cimport *
15
+ # from multipers.mma_structures cimport boundary_matrix,float,pair,vector,intptr_t
16
+ cimport numpy as cnp
17
+
18
+ doc_soft_urls = {
19
+ "mpfree":"https://bitbucket.org/mkerber/mpfree/",
20
+ "multi_chunk":"",
21
+ "function_delaunay":"https://bitbucket.org/mkerber/function_delaunay/",
22
+ "2pac":"https://gitlab.com/flenzen/2pac",
23
+ }
24
+ doc_soft_easy_install = {
25
+ "mpfree":f"""
26
+ ```sh
27
+ git clone {doc_soft_urls["mpfree"]}
28
+ cd mpfree
29
+ sudo cp mpfree /usr/bin/
30
+ cd ..
31
+ rm -rf mpfree
32
+ ```
33
+ """,
34
+ "multi_chunk":f"""
35
+ ```sh
36
+ git clone {doc_soft_urls["multi_chunk"]}
37
+ cd multi_chunk
38
+ sudo cp multi_chunk /usr/bin/
39
+ cd ..
40
+ rm -rf multi_chunk
41
+ ```
42
+ """,
43
+ "function_delaunay":f"""
44
+ ```sh
45
+ git clone {doc_soft_urls["function_delaunay"]}
46
+ cd function_delaunay
47
+ sudo cp main /usr/bin/function_delaunay
48
+ cd ..
49
+ rm -rf function_delaunay
50
+ ```
51
+ """,
52
+ "2pac":f"""
53
+ ```sh
54
+ git clone {doc_soft_urls["2pac"]} 2pac
55
+ cd 2pac && mkdir build && cd build
56
+ cmake ..
57
+ make
58
+ sudo cp 2pac /usr/bin
59
+ ```
60
+ """,
61
+ }
62
+ doc_soft_urls = defaultdict(lambda:"<Unknown url>", doc_soft_urls)
63
+ doc_soft_easy_install = defaultdict(lambda:"<Unknown>", doc_soft_easy_install)
64
+
65
+ available_reduce_softs = Literal["mpfree","multi_chunk","2pac"]
66
+
67
+
68
+ def _path_init(soft:str|os.PathLike):
69
+ a = which(f"./{soft}")
70
+ b = which(f"{soft}")
71
+ if a:
72
+ pathes[soft] = a
73
+ elif b:
74
+ pathes[soft] = b
75
+
76
+
77
+ pathes = {
78
+ "mpfree":None,
79
+ "2pac":None,
80
+ "function_delaunay":None,
81
+ "multi_chunk":None,
82
+ }
83
+
84
+ # mpfree_in_path:str|os.PathLike = "multipers_mpfree_input.scc"
85
+ # mpfree_out_path:str|os.PathLike = "multipers_mpfree_output.scc"
86
+ # twopac_in_path:str|os.PathLike = "multipers_twopac_input.scc"
87
+ # twopac_out_path:str|os.PathLike = "multipers_twopac_output.scc"
88
+ # multi_chunk_in_path:str|os.PathLike = "multipers_multi_chunk_input.scc"
89
+ # multi_chunk_out_path:str|os.PathLike = "multipers_multi_chunk_output.scc"
90
+ # function_delaunay_out_path:str|os.PathLike = "function_delaunay_output.scc"
91
+ # function_delaunay_in_path:str|os.PathLike = "function_delaunay_input.txt" # point cloud
92
+ input_path:str|os.PathLike = "multipers_input.scc"
93
+ output_path:str|os.PathLike = "multipers_output.scc"
94
+
95
+
96
+
97
+ ## TODO : optimize with Python.h ?
98
+ def scc_parser(path: str| os.PathLike):
99
+ """
100
+ Parse an scc file into the scc python format, aka blocks.
101
+ """
102
+ pass_line_regex = re.compile(r"^\s*$|^#|^scc2020$")
103
+ def valid_line(line):
104
+ return pass_line_regex.match(line) is None
105
+ parse_line_regex = re.compile(r"^(?P<filtration>[^;]+);(?P<boundary>[^;]*)$")
106
+ cdef tuple[tuple[str,str]] clines
107
+ with open(path, "r") as f:
108
+ lines =(x.strip() for x in f if valid_line(x))
109
+ num_parameters = int(next(lines))
110
+ sizes = np.cumsum([0] + next(lines).split(), dtype=np.int32)
111
+ lines = (parse_line_regex.match(a) for a in lines)
112
+ clines = tuple((a.group("filtration"),a.group("boundary")) for a in lines)
113
+ F = np.fromiter((a[0].split() for a in clines), dtype=np.dtype((np.float32,2)), count = sizes[-1])
114
+
115
+ B = tuple(np.asarray(a[1].split(), dtype=np.int32) if len(a[1])>0 else np.empty(0, dtype=np.int32) for a in clines) ## TODO : this is very slow : optimize
116
+ # block_lines = (tuple(get_bf(x, num_parameters) for x in lines[sizes[i]:sizes[i+1]]) for i in range(len(sizes)-1))
117
+
118
+ # blocks = [(np.asarray([x[0] for x in b if len(x)>0], dtype=float),tuple(x[1] for x in b)) for b in block_lines]
119
+ blocks = [(F[sizes[i]:sizes[i+1]], B[sizes[i]:sizes[i+1]]) for i in range(len(sizes)-1)]
120
+
121
+ return blocks
122
+
123
+
124
+ def scc_parser__old(path: str):
125
+ """
126
+ Parse an scc file into the scc python format, aka blocks.
127
+ """
128
+ with open(path, "r") as f:
129
+ lines = f.readlines()
130
+ # Find scc2020
131
+ while lines[0].strip() != "scc2020":
132
+ lines = lines[1:]
133
+ lines = lines[1:]
134
+ # stripped scc2020 we can start
135
+
136
+ def pass_line(line):
137
+ return re.match(r"^\s*$|^#", line) is not None
138
+
139
+ for i, line in enumerate(lines):
140
+ line = line.strip()
141
+ if pass_line(line):
142
+ continue
143
+ num_parameters = int(line)
144
+ lines = lines[i + 1 :]
145
+ break
146
+
147
+ block_sizes = []
148
+
149
+ for i, line in enumerate(lines):
150
+ line = line.strip()
151
+ if pass_line(line):
152
+ continue
153
+ block_sizes = tuple(int(i) for i in line.split(" "))
154
+ lines = lines[i + 1 :]
155
+ break
156
+ blocks = []
157
+ cdef int counter
158
+ for block_size in block_sizes:
159
+ counter = block_size
160
+ block_filtrations = []
161
+ block_boundaries = []
162
+ for i, line in enumerate(lines):
163
+ if counter == 0:
164
+ lines = lines[i:]
165
+ break
166
+ line = line.strip()
167
+ if pass_line(line):
168
+ continue
169
+ splitted_line = re.match(r"^(?P<floats>[^;]+);(?P<ints>[^;]*)$", line)
170
+ filtrations = np.asarray(splitted_line.group("floats").split(), dtype=float)
171
+ boundary = np.asarray(splitted_line.group("ints").split(), dtype=int)
172
+ block_filtrations.append(filtrations)
173
+ block_boundaries.append(boundary)
174
+ # filtration_boundary = line.split(";")
175
+ # if len(filtration_boundary) == 1:
176
+ # # happens when last generators do not have a ";" in the end
177
+ # filtration_boundary.append(" ")
178
+ # filtration, boundary = filtration_boundary
179
+ # block_filtrations.append(
180
+ # tuple(float(x) for x in filtration.split(" ") if len(x) > 0)
181
+ # )
182
+ # block_boundaries.append(tuple(int(x) for x in boundary.split(" ") if len(x) > 0))
183
+ counter -= 1
184
+ blocks.append((np.asarray(block_filtrations, dtype=float), tuple(block_boundaries)))
185
+
186
+ return blocks
187
+
188
+
189
+
190
+ def _put_temp_files_to_ram():
191
+ global input_path,output_path
192
+ shm_memory = "/tmp/" # on unix, we can write in RAM instead of disk.
193
+ if os.access(shm_memory, os.W_OK) and not input_path.startswith(shm_memory):
194
+ input_path = shm_memory + input_path
195
+ output_path = shm_memory + output_path
196
+
197
+ def _init_external_softwares(requires=[]):
198
+ global pathes
199
+ cdef bool any = False
200
+ for soft,soft_path in pathes.items():
201
+ if soft_path is None:
202
+ _path_init(soft)
203
+ any = any or (soft in requires)
204
+
205
+ if any:
206
+ _put_temp_files_to_ram()
207
+ for soft in requires:
208
+ if pathes[soft] is None:
209
+ global doc_soft_urls
210
+ raise ValueError(f"""
211
+ Did not found {soft}.
212
+ Install it from {doc_soft_urls[soft]}, and put it in your current directory,
213
+ or in you $PATH.
214
+ For instance:
215
+ {doc_soft_easy_install[soft]}
216
+ """)
217
+
218
+ def scc_reduce_from_str(
219
+ path:str|os.PathLike,
220
+ bool full_resolution=True,
221
+ int dimension: int | np.int64 = 1,
222
+ bool clear: bool = True,
223
+ id: Optional[str] = None, # For parallel stuff
224
+ bool verbose:bool=False,
225
+ backend:Literal["mpfree","multi_chunk","twopac"]="mpfree"
226
+ ):
227
+ """
228
+ Computes a minimal presentation of the file in path,
229
+ using mpfree.
230
+
231
+ path:PathLike
232
+ full_resolution: bool
233
+ dimension: int, presentation dimension to consider
234
+ clear: bool, removes temporary files if True
235
+ id: str, temporary files are of this id, allowing for multiprocessing
236
+ verbose: bool
237
+ backend: "mpfree", "multi_chunk" or "2pac"
238
+ """
239
+ global pathes, input_path, output_path
240
+ if pathes[backend] is None:
241
+ _init_external_softwares(requires=[backend])
242
+
243
+
244
+ resolution_str = "--resolution" if full_resolution else ""
245
+ # print(mpfree_in_path + id, mpfree_out_path + id)
246
+ if id is None:
247
+ id = str(threading.get_native_id())
248
+ if not os.path.exists(path):
249
+ raise ValueError(f"No file found at {path}.")
250
+ if os.path.exists(output_path + id):
251
+ os.remove(output_path + id)
252
+ verbose_arg = "> /dev/null 2>&1" if not verbose else ""
253
+ if backend == "mpfree":
254
+ more_verbose = "-v" if verbose else ""
255
+ command = (
256
+ f"{pathes[backend]} {more_verbose} {resolution_str} --dim={dimension} {path} {output_path+id} {verbose_arg}"
257
+ )
258
+ elif backend == "multi_chunk":
259
+ command = (
260
+ f"{pathes[backend]} {path} {output_path+id} {verbose_arg}"
261
+ )
262
+ elif backend in ["twopac", "2pac"]:
263
+ command = (
264
+ f"{pathes[backend]} -f {path} --scc-input -n{dimension} --save-resolution-scc {output_path+id} {verbose_arg}"
265
+ )
266
+ else:
267
+ raise ValueError(f"Unsupported backend {backend}.")
268
+ if verbose:
269
+ print(f"Calling :\n\n {command}")
270
+ os.system(command)
271
+
272
+ blocks = scc_parser(output_path + id)
273
+ if clear:
274
+ clear_io(output_path + id)
275
+ return blocks
276
+
277
+ def reduce_complex(
278
+ complex, # Simplextree, Slicer, or str
279
+ bool full_resolution: bool = True,
280
+ int dimension: int | np.int64 = 1,
281
+ bool clear: bool = True,
282
+ id: Optional[str]=None, # For parallel stuff
283
+ bool verbose:bool=False,
284
+ backend:available_reduce_softs="mpfree"
285
+ ):
286
+ """
287
+ Computes a minimal presentation of the file in path,
288
+ using `backend`.
289
+
290
+ simplextree
291
+ full_resolution: bool
292
+ dimension: int, presentation dimension to consider
293
+ clear: bool, removes temporary files if True
294
+ id: str, temporary files are of this id, allowing for multiprocessing
295
+ verbose: bool
296
+ """
297
+
298
+ from multipers.simplex_tree_multi import is_simplextree_multi
299
+ if id is None:
300
+ id = str(threading.get_native_id())
301
+ path = input_path+id
302
+ if is_simplextree_multi(complex):
303
+ complex.to_scc(
304
+ path=path,
305
+ rivet_compatible=False,
306
+ strip_comments=False,
307
+ ignore_last_generators=False,
308
+ overwrite=True,
309
+ reverse_block=False,
310
+ )
311
+ dimension = complex.dimension - dimension
312
+ elif isinstance(complex,str):
313
+ path = complex
314
+ elif isinstance(complex, list) or isinstance(complex, tuple):
315
+ scc2disk(complex,path=path)
316
+ else:
317
+ # Assumes its a slicer
318
+ blocks = mps.slicer2blocks(complex)
319
+ scc2disk(blocks,path=path)
320
+ dimension = len(blocks) -2 -dimension
321
+
322
+ return scc_reduce_from_str(
323
+ path=path,
324
+ full_resolution=full_resolution,
325
+ dimension=dimension,
326
+ clear=clear,
327
+ id=id,
328
+ verbose=verbose,
329
+ backend=backend
330
+ )
331
+
332
+
333
+
334
+
335
+ def function_delaunay_presentation(
336
+ point_cloud:np.ndarray,
337
+ function_values:np.ndarray,
338
+ id:Optional[str] = None,
339
+ bool clear:bool = True,
340
+ bool verbose:bool=False,
341
+ int degree = -1,
342
+ bool multi_chunk = False,
343
+ ):
344
+ """
345
+ Computes a function delaunay presentation, and returns it as blocks.
346
+
347
+ points : (num_pts, n) float array
348
+ grades : (num_pts,) float array
349
+ degree (opt) : if given, computes a minimal presentation of this homological degree first
350
+ clear:bool, removes temporary files if true
351
+ degree: computes minimal presentation of this degree if given
352
+ verbose : bool
353
+ """
354
+ if id is None:
355
+ id = str(threading.get_native_id())
356
+ global input_path, output_path, pathes
357
+ backend = "function_delaunay"
358
+ if pathes[backend] is None :
359
+ _init_external_softwares(requires=[backend])
360
+
361
+ to_write = np.concatenate([point_cloud, function_values.reshape(-1,1)], axis=1)
362
+ np.savetxt(input_path+id,to_write,delimiter=' ')
363
+ verbose_arg = "> /dev/null 2>&1" if not verbose else ""
364
+ degree_arg = f"--minpres {degree}" if degree > 0 else ""
365
+ multi_chunk_arg = "--multi-chunk" if multi_chunk else ""
366
+ if os.path.exists(output_path + id):
367
+ os.remove(output_path+ id)
368
+ command = f"{pathes[backend]} {degree_arg} {multi_chunk_arg} {input_path+id} {output_path+id} {verbose_arg} --no-delaunay-compare"
369
+ if verbose:
370
+ print(command)
371
+ os.system(command)
372
+
373
+ blocks = scc_parser(output_path + id)
374
+ if clear:
375
+ clear_io(output_path + id, input_path + id)
376
+ return blocks
377
+
378
+
379
+
380
+ def clear_io(*args):
381
+ """Removes temporary files"""
382
+ global input_path,output_path
383
+ for x in [input_path,output_path] + list(args):
384
+ if os.path.exists(x):
385
+ os.remove(x)
386
+
387
+
388
+
389
+
390
+
391
+
392
+ # cdef extern from "multiparameter_module_approximation/format_python-cpp.h" namespace "Gudhi::multiparameter::mma":
393
+ # pair[boundary_matrix, vector[Finitely_critical_multi_filtration[double]]] simplextree_to_boundary_filtration(intptr_t)
394
+ # vector[pair[ vector[vector[float]],boundary_matrix]] simplextree_to_scc(intptr_t)
395
+ # vector[pair[ vector[vector[vector[float]]],boundary_matrix]] function_simplextree_to_scc(intptr_t)
396
+ # pair[vector[vector[float]],boundary_matrix ] simplextree_to_ordered_bf(intptr_t)
397
+
398
+ # def simplex_tree2boundary_filtrations(simplextree:SimplexTreeMulti | SimplexTree):
399
+ # """Computes a (sparse) boundary matrix, with associated filtration. Can be used as an input of approx afterwards.
400
+ #
401
+ # Parameters
402
+ # ----------
403
+ # simplextree: Gudhi or mma simplextree
404
+ # The simplextree defining the filtration to convert to boundary-filtration.
405
+ #
406
+ # Returns
407
+ # -------
408
+ # B:List of lists of ints
409
+ # The boundary matrix.
410
+ # F: List of 1D filtration
411
+ # The filtrations aligned with B; the i-th simplex of this simplextree has boundary B[i] and filtration(s) F[i].
412
+ #
413
+ # """
414
+ # cdef intptr_t cptr
415
+ # if isinstance(simplextree, SimplexTreeMulti):
416
+ # cptr = simplextree.thisptr
417
+ # elif isinstance(simplextree, SimplexTree):
418
+ # temp_st = gd.SimplexTreeMulti(simplextree, parameters=1)
419
+ # cptr = temp_st.thisptr
420
+ # else:
421
+ # raise TypeError("Has to be a simplextree")
422
+ # cdef pair[boundary_matrix, vector[Finitely_critical_multi_filtration[double]]] cboundary_filtration = simplextree_to_boundary_filtration(cptr)
423
+ # boundary = cboundary_filtration.first
424
+ # # multi_filtrations = np.array(<vector[vector[float]]>Finitely_critical_multi_filtration.to_python(cboundary_filtration.second))
425
+ # cdef cnp.ndarray[double, ndim=2] multi_filtrations = _fmf2numpy_f64(cboundary_filtration.second)
426
+ # return boundary, multi_filtrations
427
+
428
+ # def simplextree2scc(simplextree:SimplexTreeMulti | SimplexTree, filtration_dtype=np.float32, bool flattened=False):
429
+ # """
430
+ # Turns a simplextree into a (simplicial) module presentation.
431
+ # """
432
+ # cdef intptr_t cptr
433
+ # cdef bool is_function_st = False
434
+ # if isinstance(simplextree, SimplexTreeMulti):
435
+ # cptr = simplextree.thisptr
436
+ # is_function_st = simplextree._is_function_simplextree
437
+ # elif isinstance(simplextree, SimplexTree):
438
+ # temp_st = gd.SimplexTreeMulti(simplextree, parameters=1)
439
+ # cptr = temp_st.thisptr
440
+ # else:
441
+ # raise TypeError("Has to be a simplextree")
442
+ #
443
+ # cdef pair[vector[vector[float]], boundary_matrix] out
444
+ # if flattened:
445
+ # out = simplextree_to_ordered_bf(cptr)
446
+ # return np.asarray(out.first,dtype=filtration_dtype), tuple(out.second)
447
+ #
448
+ # if is_function_st:
449
+ # blocks = function_simplextree_to_scc(cptr)
450
+ # else:
451
+ # blocks = simplextree_to_scc(cptr)
452
+ # # reduces the space in memory
453
+ # if is_function_st:
454
+ # blocks = [(tuple(f), tuple(b)) for f,b in blocks[::-1]]
455
+ # else:
456
+ # blocks = [(np.asarray(f,dtype=filtration_dtype), tuple(b)) for f,b in blocks[::-1]] ## presentation is on the other order
457
+ # return blocks+[(np.empty(0,dtype=filtration_dtype),[])]
458
+
459
+ def scc2disk(
460
+ stuff,
461
+ path:str|os.PathLike,
462
+ int num_parameters = -1,
463
+ bool reverse_block = False,
464
+ bool rivet_compatible = False,
465
+ bool ignore_last_generators = False,
466
+ bool strip_comments = False,
467
+ ):
468
+ """
469
+ Writes a scc python format / blocks into a file.
470
+ """
471
+ if num_parameters == -1:
472
+ for block in stuff:
473
+ if len(block[0]) == 0:
474
+ continue
475
+ num_gens, num_parameters_= np.asarray(block[0]).shape
476
+ num_parameters = num_parameters_
477
+ break
478
+ assert num_parameters > 0, f"Invalid number of parameters {num_parameters}"
479
+
480
+ if reverse_block: stuff.reverse()
481
+ with open(path, "w") as f:
482
+ f.write("scc2020\n") if not rivet_compatible else f.write("firep\n")
483
+ if not strip_comments and not rivet_compatible: f.write("# Number of parameters\n")
484
+ if rivet_compatible:
485
+ assert num_parameters == 2
486
+ f.write("Filtration 1\n")
487
+ f.write("Filtration 2\n")
488
+ else:
489
+ f.write(f"{num_parameters}\n")
490
+
491
+ if not strip_comments: f.write("# Sizes of generating sets\n")
492
+ for block in stuff: f.write(f"{len(block[0])} ")
493
+ f.write("\n")
494
+ for i,block in enumerate(stuff):
495
+ if (rivet_compatible or ignore_last_generators) and i == len(stuff)-1: continue
496
+ if not strip_comments: f.write(f"# Block of dimension {len(stuff)-1-i}\n")
497
+ filtration, boundary = block
498
+ filtration = np.asarray(filtration).astype(str)
499
+ # boundary = tuple(x.astype(str) for x in boundary)
500
+ f.write(" ".join(itertools.chain.from_iterable(
501
+ ((*(f.tolist()),";",*(np.asarray(b).astype(str).tolist()),"\n") for f,b in zip(filtration, boundary))
502
+ )
503
+ ))
504
+ # for j in range(<int>len(filtration)):
505
+ # line = " ".join((
506
+ # *filtration[j],
507
+ # ";",
508
+ # *boundary[j],
509
+ # "\n",
510
+ # ))
511
+ # f.write(line)
512
+
513
+ def scc2disk_old(
514
+ stuff,
515
+ path:str|os.PathLike,
516
+ num_parameters = -1,
517
+ reverse_block = False,
518
+ rivet_compatible = False,
519
+ ignore_last_generators = False,
520
+ strip_comments = False,
521
+ ):
522
+ """
523
+ Writes a scc python format / blocks into a file.
524
+ """
525
+ if num_parameters == -1:
526
+ for block in stuff:
527
+ if len(block[0]) == 0:
528
+ continue
529
+ num_gens, num_parameters_= np.asarray(block[0]).shape
530
+ num_parameters = num_parameters_
531
+ break
532
+ assert num_parameters > 0, f"Invalid number of parameters {num_parameters}"
533
+
534
+ if reverse_block: stuff.reverse()
535
+ out = []
536
+ if rivet_compatible:
537
+ out.append(r"firep")
538
+ else:
539
+ out.append(r"scc2020")
540
+ if not strip_comments and not rivet_compatible:
541
+ out.append(r"# Number of parameters")
542
+ if rivet_compatible:
543
+ out.append("Filtration 1")
544
+ out.append("Filtration 2\n")
545
+ else:
546
+ out.append(f"{num_parameters}")
547
+
548
+ if not strip_comments:
549
+ out.append("# Sizes of generating sets")
550
+
551
+ # for block in stuff:
552
+ # f.write(f"{len(block[0])} ")
553
+ out.append(" ".join(str(len(block[0])) for block in stuff))
554
+ str_blocks = [out]
555
+ for i,block in enumerate(stuff):
556
+ if (rivet_compatible or ignore_last_generators) and i == len(stuff)-1: continue
557
+ if not strip_comments:
558
+ str_blocks.append([f"# Block of dimension {len(stuff)-1-i}"])
559
+ filtration, boundary = block
560
+ if len(filtration) == 0:
561
+ continue
562
+ filtration = filtration.astype(str)
563
+ C = filtration[:,0]
564
+ for i in range(1,filtration.shape[1]):
565
+ C = np.char.add(C," ")
566
+ C = np.char.add(C,filtration[:,i])
567
+ C = np.char.add(C, ";")
568
+ D = np.fromiter((" ".join(b.astype(str).tolist()) for b in boundary), dtype="<U11") #int32-> str is "<U11" #check np.array(1, dtype=np.int32).astype(str)
569
+ str_blocks.append(np.char.add(C,D))
570
+
571
+ np.savetxt("test.scc", np.concatenate(str_blocks), delimiter="", fmt="%s")
File without changes
@@ -0,0 +1,90 @@
1
+ import pandas as pd
2
+ from warnings import warn
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ from os.path import exists
6
+
7
+
8
+ def accuracy_to_csv(
9
+ X,
10
+ Y,
11
+ cl,
12
+ k: float = 10,
13
+ dataset: str = "",
14
+ shuffle=True,
15
+ verbose: bool = True,
16
+ **more_columns,
17
+ ):
18
+ assert k > 0, "k is either the number of kfold > 1 or the test size > 0."
19
+ if k > 1:
20
+ k = int(k)
21
+ from sklearn.model_selection import StratifiedKFold as KFold
22
+
23
+ kfold = KFold(k, shuffle=shuffle).split(X, Y)
24
+ accuracies = np.zeros(k)
25
+ for i, (train_idx, test_idx) in enumerate(
26
+ tqdm(kfold, total=k, desc="Computing kfold")
27
+ ):
28
+ xtrain = [X[i] for i in train_idx]
29
+ ytrain = [Y[i] for i in train_idx]
30
+ cl.fit(xtrain, ytrain)
31
+ xtest = [X[i] for i in test_idx]
32
+ ytest = [Y[i] for i in test_idx]
33
+ accuracies[i] = cl.score(xtest, ytest)
34
+ if verbose:
35
+ print(f"step {i+1}, {dataset} : {accuracies[i]}", flush=True)
36
+ try:
37
+ print("Best classification parameters : ", cl.best_params_)
38
+ except:
39
+ None
40
+
41
+ print(
42
+ f"""Accuracy {dataset} : {np.mean(accuracies).round(decimals=3)}±{np.std(accuracies).round(decimals=3)}"""
43
+ )
44
+ elif k > 0:
45
+ from sklearn.model_selection import train_test_split
46
+
47
+ print("Computing accuracy, with train test split", flush=True)
48
+ xtrain, xtest, ytrain, ytest = train_test_split(
49
+ X, Y, shuffle=shuffle, test_size=k
50
+ )
51
+ print("Fitting...", end="", flush=True)
52
+ cl.fit(xtrain, ytrain)
53
+ print("Computing score...", end="", flush=True)
54
+ accuracies = cl.score(xtest, ytest)
55
+ try:
56
+ print("Best classification parameters : ", cl.best_params_)
57
+ except:
58
+ None
59
+ print("Done.")
60
+ if verbose:
61
+ print(f"Accuracy {dataset} : {accuracies} ")
62
+ file_path: str = f"result_{dataset}.csv".replace("/", "_").replace(".off", "")
63
+ columns: list[str] = ["dataset", "cv", "mean", "std"]
64
+ if exists(file_path):
65
+ df: pd.DataFrame = pd.read_csv(file_path)
66
+ else:
67
+ df: pd.DataFrame = pd.DataFrame(columns=columns)
68
+ more_names = []
69
+ more_values = []
70
+ for key, value in more_columns.items():
71
+ if key not in columns:
72
+ more_names.append(key)
73
+ more_values.append(value)
74
+ else:
75
+ warn(f"Duplicate key {key} ! with value {value}")
76
+ new_line: pd.DataFrame = pd.DataFrame(
77
+ [
78
+ [
79
+ dataset,
80
+ k,
81
+ np.mean(accuracies).round(decimals=3),
82
+ np.std(accuracies).round(decimals=3),
83
+ ]
84
+ + more_values
85
+ ],
86
+ columns=columns + more_names,
87
+ )
88
+ print(new_line)
89
+ df = pd.concat([df, new_line])
90
+ df.to_csv(file_path, index=False)