multipers 2.2.3__cp310-cp310-win_amd64.whl → 2.3.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of multipers might be problematic. Click here for more details.

Files changed (182) hide show
  1. multipers/__init__.py +33 -31
  2. multipers/_signed_measure_meta.py +430 -430
  3. multipers/_slicer_meta.py +211 -212
  4. multipers/data/MOL2.py +458 -458
  5. multipers/data/UCR.py +18 -18
  6. multipers/data/graphs.py +466 -466
  7. multipers/data/immuno_regions.py +27 -27
  8. multipers/data/pytorch2simplextree.py +90 -90
  9. multipers/data/shape3d.py +101 -101
  10. multipers/data/synthetic.py +113 -111
  11. multipers/distances.py +198 -198
  12. multipers/filtration_conversions.pxd.tp +84 -84
  13. multipers/filtrations/__init__.py +18 -0
  14. multipers/{ml/convolutions.py → filtrations/density.py} +563 -520
  15. multipers/filtrations/filtrations.py +289 -0
  16. multipers/filtrations.pxd +224 -224
  17. multipers/function_rips.cp310-win_amd64.pyd +0 -0
  18. multipers/function_rips.pyx +105 -105
  19. multipers/grids.cp310-win_amd64.pyd +0 -0
  20. multipers/grids.pyx +350 -350
  21. multipers/gudhi/Persistence_slices_interface.h +132 -132
  22. multipers/gudhi/Simplex_tree_interface.h +239 -245
  23. multipers/gudhi/Simplex_tree_multi_interface.h +516 -561
  24. multipers/gudhi/cubical_to_boundary.h +59 -59
  25. multipers/gudhi/gudhi/Bitmap_cubical_complex.h +450 -450
  26. multipers/gudhi/gudhi/Bitmap_cubical_complex_base.h +1070 -1070
  27. multipers/gudhi/gudhi/Bitmap_cubical_complex_periodic_boundary_conditions_base.h +579 -579
  28. multipers/gudhi/gudhi/Debug_utils.h +45 -45
  29. multipers/gudhi/gudhi/Fields/Multi_field.h +484 -484
  30. multipers/gudhi/gudhi/Fields/Multi_field_operators.h +455 -455
  31. multipers/gudhi/gudhi/Fields/Multi_field_shared.h +450 -450
  32. multipers/gudhi/gudhi/Fields/Multi_field_small.h +531 -531
  33. multipers/gudhi/gudhi/Fields/Multi_field_small_operators.h +507 -507
  34. multipers/gudhi/gudhi/Fields/Multi_field_small_shared.h +531 -531
  35. multipers/gudhi/gudhi/Fields/Z2_field.h +355 -355
  36. multipers/gudhi/gudhi/Fields/Z2_field_operators.h +376 -376
  37. multipers/gudhi/gudhi/Fields/Zp_field.h +420 -420
  38. multipers/gudhi/gudhi/Fields/Zp_field_operators.h +400 -400
  39. multipers/gudhi/gudhi/Fields/Zp_field_shared.h +418 -418
  40. multipers/gudhi/gudhi/Flag_complex_edge_collapser.h +337 -337
  41. multipers/gudhi/gudhi/Matrix.h +2107 -2107
  42. multipers/gudhi/gudhi/Multi_critical_filtration.h +1038 -1038
  43. multipers/gudhi/gudhi/Multi_persistence/Box.h +171 -171
  44. multipers/gudhi/gudhi/Multi_persistence/Line.h +282 -282
  45. multipers/gudhi/gudhi/Off_reader.h +173 -173
  46. multipers/gudhi/gudhi/One_critical_filtration.h +1433 -1431
  47. multipers/gudhi/gudhi/Persistence_matrix/Base_matrix.h +769 -769
  48. multipers/gudhi/gudhi/Persistence_matrix/Base_matrix_with_column_compression.h +686 -686
  49. multipers/gudhi/gudhi/Persistence_matrix/Boundary_matrix.h +842 -842
  50. multipers/gudhi/gudhi/Persistence_matrix/Chain_matrix.h +1350 -1350
  51. multipers/gudhi/gudhi/Persistence_matrix/Id_to_index_overlay.h +1105 -1105
  52. multipers/gudhi/gudhi/Persistence_matrix/Position_to_index_overlay.h +859 -859
  53. multipers/gudhi/gudhi/Persistence_matrix/RU_matrix.h +910 -910
  54. multipers/gudhi/gudhi/Persistence_matrix/allocators/entry_constructors.h +139 -139
  55. multipers/gudhi/gudhi/Persistence_matrix/base_pairing.h +230 -230
  56. multipers/gudhi/gudhi/Persistence_matrix/base_swap.h +211 -211
  57. multipers/gudhi/gudhi/Persistence_matrix/boundary_cell_position_to_id_mapper.h +60 -60
  58. multipers/gudhi/gudhi/Persistence_matrix/boundary_face_position_to_id_mapper.h +60 -60
  59. multipers/gudhi/gudhi/Persistence_matrix/chain_pairing.h +136 -136
  60. multipers/gudhi/gudhi/Persistence_matrix/chain_rep_cycles.h +190 -190
  61. multipers/gudhi/gudhi/Persistence_matrix/chain_vine_swap.h +616 -616
  62. multipers/gudhi/gudhi/Persistence_matrix/columns/chain_column_extra_properties.h +150 -150
  63. multipers/gudhi/gudhi/Persistence_matrix/columns/column_dimension_holder.h +106 -106
  64. multipers/gudhi/gudhi/Persistence_matrix/columns/column_utilities.h +219 -219
  65. multipers/gudhi/gudhi/Persistence_matrix/columns/entry_types.h +327 -327
  66. multipers/gudhi/gudhi/Persistence_matrix/columns/heap_column.h +1140 -1140
  67. multipers/gudhi/gudhi/Persistence_matrix/columns/intrusive_list_column.h +934 -934
  68. multipers/gudhi/gudhi/Persistence_matrix/columns/intrusive_set_column.h +934 -934
  69. multipers/gudhi/gudhi/Persistence_matrix/columns/list_column.h +980 -980
  70. multipers/gudhi/gudhi/Persistence_matrix/columns/naive_vector_column.h +1092 -1092
  71. multipers/gudhi/gudhi/Persistence_matrix/columns/row_access.h +192 -192
  72. multipers/gudhi/gudhi/Persistence_matrix/columns/set_column.h +921 -921
  73. multipers/gudhi/gudhi/Persistence_matrix/columns/small_vector_column.h +1093 -1093
  74. multipers/gudhi/gudhi/Persistence_matrix/columns/unordered_set_column.h +1012 -1012
  75. multipers/gudhi/gudhi/Persistence_matrix/columns/vector_column.h +1244 -1244
  76. multipers/gudhi/gudhi/Persistence_matrix/matrix_dimension_holders.h +186 -186
  77. multipers/gudhi/gudhi/Persistence_matrix/matrix_row_access.h +164 -164
  78. multipers/gudhi/gudhi/Persistence_matrix/ru_pairing.h +156 -156
  79. multipers/gudhi/gudhi/Persistence_matrix/ru_rep_cycles.h +376 -376
  80. multipers/gudhi/gudhi/Persistence_matrix/ru_vine_swap.h +540 -540
  81. multipers/gudhi/gudhi/Persistent_cohomology/Field_Zp.h +118 -118
  82. multipers/gudhi/gudhi/Persistent_cohomology/Multi_field.h +173 -173
  83. multipers/gudhi/gudhi/Persistent_cohomology/Persistent_cohomology_column.h +128 -128
  84. multipers/gudhi/gudhi/Persistent_cohomology.h +745 -745
  85. multipers/gudhi/gudhi/Points_off_io.h +171 -171
  86. multipers/gudhi/gudhi/Simple_object_pool.h +69 -69
  87. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_iterators.h +463 -463
  88. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_node_explicit_storage.h +83 -83
  89. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_siblings.h +106 -106
  90. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_star_simplex_iterators.h +277 -277
  91. multipers/gudhi/gudhi/Simplex_tree/hooks_simplex_base.h +62 -62
  92. multipers/gudhi/gudhi/Simplex_tree/indexing_tag.h +27 -27
  93. multipers/gudhi/gudhi/Simplex_tree/serialization_utils.h +62 -62
  94. multipers/gudhi/gudhi/Simplex_tree/simplex_tree_options.h +157 -157
  95. multipers/gudhi/gudhi/Simplex_tree.h +2794 -2794
  96. multipers/gudhi/gudhi/Simplex_tree_multi.h +152 -163
  97. multipers/gudhi/gudhi/distance_functions.h +62 -62
  98. multipers/gudhi/gudhi/graph_simplicial_complex.h +104 -104
  99. multipers/gudhi/gudhi/persistence_interval.h +253 -253
  100. multipers/gudhi/gudhi/persistence_matrix_options.h +170 -170
  101. multipers/gudhi/gudhi/reader_utils.h +367 -367
  102. multipers/gudhi/mma_interface_coh.h +256 -255
  103. multipers/gudhi/mma_interface_h0.h +223 -231
  104. multipers/gudhi/mma_interface_matrix.h +291 -282
  105. multipers/gudhi/naive_merge_tree.h +536 -575
  106. multipers/gudhi/scc_io.h +310 -289
  107. multipers/gudhi/truc.h +957 -888
  108. multipers/io.cp310-win_amd64.pyd +0 -0
  109. multipers/io.pyx +714 -711
  110. multipers/ml/accuracies.py +90 -90
  111. multipers/ml/invariants_with_persistable.py +79 -79
  112. multipers/ml/kernels.py +176 -176
  113. multipers/ml/mma.py +713 -714
  114. multipers/ml/one.py +472 -472
  115. multipers/ml/point_clouds.py +352 -346
  116. multipers/ml/signed_measures.py +1589 -1589
  117. multipers/ml/sliced_wasserstein.py +461 -461
  118. multipers/ml/tools.py +113 -113
  119. multipers/mma_structures.cp310-win_amd64.pyd +0 -0
  120. multipers/mma_structures.pxd +127 -127
  121. multipers/mma_structures.pyx +4 -8
  122. multipers/mma_structures.pyx.tp +1083 -1085
  123. multipers/multi_parameter_rank_invariant/diff_helpers.h +84 -93
  124. multipers/multi_parameter_rank_invariant/euler_characteristic.h +97 -97
  125. multipers/multi_parameter_rank_invariant/function_rips.h +322 -322
  126. multipers/multi_parameter_rank_invariant/hilbert_function.h +769 -769
  127. multipers/multi_parameter_rank_invariant/persistence_slices.h +148 -148
  128. multipers/multi_parameter_rank_invariant/rank_invariant.h +369 -369
  129. multipers/multiparameter_edge_collapse.py +41 -41
  130. multipers/multiparameter_module_approximation/approximation.h +2298 -2295
  131. multipers/multiparameter_module_approximation/combinatory.h +129 -129
  132. multipers/multiparameter_module_approximation/debug.h +107 -107
  133. multipers/multiparameter_module_approximation/format_python-cpp.h +286 -286
  134. multipers/multiparameter_module_approximation/heap_column.h +238 -238
  135. multipers/multiparameter_module_approximation/images.h +79 -79
  136. multipers/multiparameter_module_approximation/list_column.h +174 -174
  137. multipers/multiparameter_module_approximation/list_column_2.h +232 -232
  138. multipers/multiparameter_module_approximation/ru_matrix.h +347 -347
  139. multipers/multiparameter_module_approximation/set_column.h +135 -135
  140. multipers/multiparameter_module_approximation/structure_higher_dim_barcode.h +36 -36
  141. multipers/multiparameter_module_approximation/unordered_set_column.h +166 -166
  142. multipers/multiparameter_module_approximation/utilities.h +403 -419
  143. multipers/multiparameter_module_approximation/vector_column.h +223 -223
  144. multipers/multiparameter_module_approximation/vector_matrix.h +331 -331
  145. multipers/multiparameter_module_approximation/vineyards.h +464 -464
  146. multipers/multiparameter_module_approximation/vineyards_trajectories.h +649 -649
  147. multipers/multiparameter_module_approximation.cp310-win_amd64.pyd +0 -0
  148. multipers/multiparameter_module_approximation.pyx +218 -217
  149. multipers/pickle.py +90 -53
  150. multipers/plots.py +342 -334
  151. multipers/point_measure.cp310-win_amd64.pyd +0 -0
  152. multipers/point_measure.pyx +322 -320
  153. multipers/simplex_tree_multi.cp310-win_amd64.pyd +0 -0
  154. multipers/simplex_tree_multi.pxd +133 -133
  155. multipers/simplex_tree_multi.pyx +115 -48
  156. multipers/simplex_tree_multi.pyx.tp +1947 -1935
  157. multipers/slicer.cp310-win_amd64.pyd +0 -0
  158. multipers/slicer.pxd +301 -120
  159. multipers/slicer.pxd.tp +218 -214
  160. multipers/slicer.pyx +1570 -507
  161. multipers/slicer.pyx.tp +931 -914
  162. multipers/tensor/tensor.h +672 -672
  163. multipers/tensor.pxd +13 -13
  164. multipers/test.pyx +44 -44
  165. multipers/tests/__init__.py +57 -57
  166. multipers/torch/diff_grids.py +217 -217
  167. multipers/torch/rips_density.py +310 -304
  168. {multipers-2.2.3.dist-info → multipers-2.3.1.dist-info}/LICENSE +21 -21
  169. {multipers-2.2.3.dist-info → multipers-2.3.1.dist-info}/METADATA +21 -11
  170. multipers-2.3.1.dist-info/RECORD +182 -0
  171. {multipers-2.2.3.dist-info → multipers-2.3.1.dist-info}/WHEEL +1 -1
  172. multipers/tests/test_diff_helper.py +0 -73
  173. multipers/tests/test_hilbert_function.py +0 -82
  174. multipers/tests/test_mma.py +0 -83
  175. multipers/tests/test_point_clouds.py +0 -49
  176. multipers/tests/test_python-cpp_conversion.py +0 -82
  177. multipers/tests/test_signed_betti.py +0 -181
  178. multipers/tests/test_signed_measure.py +0 -89
  179. multipers/tests/test_simplextreemulti.py +0 -221
  180. multipers/tests/test_slicer.py +0 -221
  181. multipers-2.2.3.dist-info/RECORD +0 -189
  182. {multipers-2.2.3.dist-info → multipers-2.3.1.dist-info}/top_level.txt +0 -0
multipers/data/graphs.py CHANGED
@@ -1,466 +1,466 @@
1
- import numpy as np
2
- from os.path import expanduser, exists
3
- import networkx as nx
4
- from warnings import warn
5
- import pickle
6
- from joblib import Parallel, delayed
7
- from tqdm import tqdm
8
- from sklearn.preprocessing import LabelEncoder
9
- from scipy.spatial import distance_matrix
10
- from sklearn.base import BaseEstimator, TransformerMixin, clone
11
- import multipers as mp
12
- from typing import Iterable, List, Optional
13
- from numpy.typing import ArrayLike
14
-
15
- DATASET_PATH = expanduser("~/Datasets/")
16
-
17
-
18
- def _check_installed(dataset: str):
19
- from warnings import warn
20
- from os.path import exists
21
-
22
- assert dataset.startswith(
23
- "graphs/"
24
- ), "Graph datasets have to be of the form graphs/<name>"
25
- if exists(DATASET_PATH + dataset):
26
- return
27
- else:
28
- warn(
29
- f"""
30
- Dataset {dataset} not installed.
31
- You can find it in https://networkrepository.com
32
- Then (optinally) configure multipers.data.graphs.DATASET_PATH, which is currently
33
- > {DATASET_PATH=}
34
- and puts this dataset in $DATASET_PATH/{dataset}
35
- """
36
- )
37
- raise ValueError("Unknown dataset.")
38
-
39
-
40
- def get(dataset: str, filtration: Optional[str] = None):
41
- if filtration is None:
42
- return get_graphs(dataset)
43
- graphs, labels = get_graphs(dataset)
44
- try:
45
- for g in graphs:
46
- for node in g.nodes:
47
- g.nodes[node][filtration]
48
- except:
49
- print(
50
- f"Filtration {filtration} not computed, trying to compute it ...",
51
- flush=True,
52
- )
53
- compute_filtration(dataset, filtration)
54
- return get_graphs(dataset)
55
-
56
-
57
- def get_from_file_old(dataset: str, label="lb"):
58
- from os import walk
59
- from scipy.io import loadmat
60
- from warnings import warn
61
-
62
- path = DATASET_PATH + dataset + "/mat/"
63
- labels: list[int] = []
64
- gs: list[nx.Graph] = []
65
- for root, dir, files in walk(path):
66
- for file in files:
67
- file_ppties = file.split("_")
68
- gid = file_ppties[5]
69
- i = 0
70
- while i + 1 < len(file_ppties) and file_ppties[i] != label:
71
- i += 1
72
- if i + 1 >= len(file_ppties):
73
- warn(f"Cannot find label {label} on file {file}.")
74
- else:
75
- labels += [file_ppties[i + 1]]
76
- adj_mat = np.array(loadmat(path + file)["A"], dtype=np.float32)
77
- gs.append(nx.Graph(adj_mat))
78
- return gs, labels
79
-
80
-
81
- def get_from_file(dataset: str):
82
- from os.path import expanduser, exists
83
-
84
- path = DATASET_PATH + f"{dataset}/{dataset[7:]}."
85
- try:
86
- graphs_ids = np.loadtxt(path + "graph_idx")
87
- except:
88
- return get_from_file_old(dataset=dataset)
89
- labels: list[int] = LabelEncoder().fit_transform(np.loadtxt(path + "graph_labels"))
90
- edges = np.loadtxt(path + "edges", delimiter=",", dtype=int) - 1
91
- has_intrinsic_filtration = exists(path + "node_attrs")
92
- graphs: list[nx.Graph] = []
93
- if has_intrinsic_filtration:
94
- F = np.loadtxt(path + "node_attrs", delimiter=",")
95
- for graph_id in tqdm(np.unique(graphs_ids), desc="Reading graphs from file"):
96
- (nodes,) = np.where(graphs_ids == graph_id)
97
-
98
- def graph_has_edge(u: int, v: int) -> bool:
99
- if u in nodes or v in nodes:
100
- assert u in nodes and v in nodes, f"Nodes\
101
- {u} and {v} are not in the same graph"
102
- return True
103
- return False
104
-
105
- graph_edges = [(u, v) for u, v in edges if graph_has_edge(u, v)]
106
- g = nx.Graph(graph_edges)
107
- if has_intrinsic_filtration:
108
- node_attrs = {node: F[node] for node in nodes}
109
- nx.set_node_attributes(g, node_attrs, "intrinsic")
110
- graphs.append(g)
111
- return graphs, labels
112
-
113
-
114
- def get_graphs(dataset: str, N: int | str = "") -> tuple[list[nx.Graph], list[int]]:
115
- _check_installed(dataset)
116
- graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
117
- labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
118
- if not exists(graphs_path) or not exists(labels_path):
119
- if dataset.startswith("3dshapes/"):
120
- return get_from_file_old(
121
- dataset,
122
- )
123
-
124
- graphs, labels = get_from_file(
125
- dataset,
126
- )
127
- print("Saving graphs at :", graphs_path)
128
- set_graphs(graphs=graphs, labels=labels, dataset=dataset)
129
- else:
130
- graphs = pickle.load(open(graphs_path, "rb"))
131
- labels = pickle.load(open(labels_path, "rb"))
132
- from sklearn.preprocessing import LabelEncoder
133
-
134
- return graphs, LabelEncoder().fit_transform(labels)
135
-
136
-
137
- # saves graphs (and filtration values) into a file
138
- def set_graphs(graphs: list[nx.Graph], labels: list, dataset: str, N: int | str = ""):
139
- graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
140
- labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
141
- pickle.dump(graphs, open(graphs_path, "wb"))
142
- pickle.dump(labels, open(labels_path, "wb"))
143
- return
144
-
145
-
146
- def reset_graphs(dataset: str, N=None): # Resets filtrations values on graphs
147
- graphs, labels = get_from_file(dataset)
148
- set_graphs(graphs, labels, dataset)
149
- return
150
-
151
-
152
- def compute_ricci(graphs: list[nx.Graph], alpha=0.5, progress=1):
153
- from GraphRicciCurvature.OllivierRicci import OllivierRicci
154
-
155
- def ricci(graph, alpha=alpha):
156
- return OllivierRicci(graph, alpha=alpha).compute_ricci_curvature()
157
-
158
- graphs = [
159
- ricci(g) for g in tqdm(graphs, disable=not progress, desc="Computing ricci")
160
- ]
161
-
162
- def push_back_node(graph):
163
- # for node in graph.nodes:
164
- # graph.nodes[node]['ricciCurvature'] = np.min([graph[node][node2]['ricciCurvature'] for node2 in graph[node]] + [graph.nodes[node]['ricciCurvature']])
165
- node_filtrations = {
166
- node: -1
167
- if len(graph[node]) == 0
168
- else np.min([graph[node][node2]["ricciCurvature"] for node2 in graph[node]])
169
- for node in graph.nodes
170
- }
171
- nx.set_node_attributes(graph, node_filtrations, "ricciCurvature")
172
- return graph
173
-
174
- graphs = [push_back_node(g) for g in graphs]
175
- return graphs
176
-
177
-
178
- def compute_cc(graphs: list[nx.Graph], progress=1):
179
- def _cc(g):
180
- cc = nx.closeness_centrality(g)
181
- nx.set_node_attributes(g, cc, "cc")
182
- edges_cc = {(u, v): max(cc[u], cc[v]) for u, v in g.edges}
183
- nx.set_edge_attributes(g, edges_cc, "cc")
184
- return g
185
-
186
- graphs = Parallel(n_jobs=1, prefer="threads")(
187
- delayed(_cc)(g) for g in tqdm(graphs, disable=not progress, desc="Computing cc")
188
- )
189
- return graphs
190
- # for g in tqdm(graphs, desc="Computing cc"):
191
- # _cc(g)
192
- # return graphs
193
-
194
-
195
- def compute_degree(graphs: list[nx.Graph], progress=1):
196
- def _degree(g):
197
- degrees = {i: 1.1 if degree == 0 else 1 / degree for i, degree in g.degree}
198
- nx.set_node_attributes(g, degrees, "degree")
199
- edges_dg = {(u, v): max(degrees[u], degrees[v]) for u, v in g.edges}
200
- nx.set_edge_attributes(g, edges_dg, "degree")
201
- return g
202
-
203
- graphs = Parallel(n_jobs=1, prefer="threads")(
204
- delayed(_degree)(g)
205
- for g in tqdm(graphs, disable=not progress, desc="Computing degree")
206
- )
207
- return graphs
208
- # for g in tqdm(graphs, desc="Computing degree"):
209
- # _degree(g)
210
- # return graphs
211
-
212
-
213
- # TODO : make it compatible with non-connexe graphs
214
- def compute_fiedler(graphs: list[nx.Graph], progress=1):
215
- def _fiedler(g):
216
- connected_graphs = [
217
- nx.subgraph(g, nodes) for nodes in nx.connected_components(g)
218
- ]
219
- fiedler_vectors = [
220
- nx.fiedler_vector(g) ** 2
221
- if g.number_of_nodes() > 2
222
- else np.zeros(
223
- g.number_of_nodes(
224
- # order of nx.fiedler_vector correspond to nx.laplacian -> g.nodes
225
- )
226
- )
227
- for g in connected_graphs
228
- ]
229
- fiedler_dict = {
230
- node: fiedler_vector[node_index]
231
- for g, fiedler_vector in zip(connected_graphs, fiedler_vectors)
232
- for node_index, node in enumerate(list(g.nodes))
233
- }
234
- nx.set_node_attributes(g, fiedler_dict, "fiedler")
235
- edges_fiedler = {
236
- (u, v): max(fiedler_dict[u], fiedler_dict[v]) for u, v in g.edges
237
- }
238
- nx.set_edge_attributes(g, edges_fiedler, "fiedler")
239
- return g
240
-
241
- graphs = Parallel(n_jobs=1, prefer="threads")(
242
- delayed(_fiedler)(g)
243
- for g in tqdm(graphs, disable=not progress, desc="Computing fiedler")
244
- )
245
- return graphs
246
- # for g in tqdm(graphs, desc="Computing fiedler"):
247
- # _fiedler(g)
248
- # return graphs
249
-
250
-
251
- def compute_hks(graphs: list[nx.Graph], t: float, progress=1):
252
- def _hks(g: nx.Graph):
253
- w, vps = np.linalg.eig(
254
- nx.laplacianmatrix.normalized_laplacian_matrix(
255
- g, nodelist=g.nodes()
256
- ).toarray()
257
- ) # order is given by g.nodes order
258
- w = w.view(dtype=float)
259
- vps = vps.view(dtype=float)
260
- node_hks = {
261
- node: np.sum(np.exp(-t * w) * np.square(vps[node_index, :]))
262
- for node_index, node in enumerate(g.nodes)
263
- }
264
- nx.set_node_attributes(g, node_hks, f"hks_{t}")
265
- edges_hks = {(u, v): max(node_hks[u], node_hks[v]) for u, v in g.edges}
266
- nx.set_edge_attributes(g, edges_hks, f"hks_{t}")
267
- return g
268
-
269
- graphs = Parallel(n_jobs=1, prefer="threads")(
270
- delayed(_hks)(g)
271
- for g in tqdm(graphs, disable=not progress, desc=f"Computing hks_{t}")
272
- )
273
- return graphs
274
-
275
-
276
- def compute_geodesic(graphs: list[nx.Graph], progress=1):
277
- def _f(g: nx.Graph):
278
- try:
279
- nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
280
- except:
281
- warn(
282
- "This graph doesn't have an intrinsic filtration, will use 0 instead ..."
283
- )
284
- nodes_intrinsic = {i: 0 for i, n in g.nodes.data()}
285
- # return g
286
- node_geodesic = {i: 0 for i in g.nodes}
287
- nx.set_node_attributes(g, node_geodesic, f"geodesic")
288
- edges_geodesic = {
289
- (u, v): np.linalg.norm(nodes_intrinsic[u] - nodes_intrinsic[v])
290
- for u, v in g.edges
291
- }
292
- nx.set_edge_attributes(g, edges_geodesic, f"geodesic")
293
- return g
294
-
295
- graphs = Parallel(n_jobs=1, prefer="threads")(
296
- delayed(_f)(g)
297
- for g in tqdm(
298
- graphs, disable=not progress, desc=f"Computing geodesic distances on graphs"
299
- )
300
- )
301
- return graphs
302
-
303
-
304
- def compute_intrinsic(graphs: list[nx.Graph], progress=1, nowarning=False):
305
- def _f(g: nx.Graph):
306
- try:
307
- nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
308
- except:
309
- if not nowarning:
310
- raise ValueError("This graph doesn't have an intrinsic filtration.")
311
- else:
312
- return g
313
- edges_intrinsic = {
314
- (u, v): np.max([nodes_intrinsic[u], nodes_intrinsic[v]], axis=0)
315
- for u, v in g.edges
316
- }
317
- nx.set_edge_attributes(g, edges_intrinsic, "intrinsic")
318
- return g
319
-
320
- graphs = Parallel(n_jobs=1, prefer="threads")(
321
- delayed(_f)(g)
322
- for g in tqdm(
323
- graphs, disable=not progress, desc="Computing geodesic distances on graphs"
324
- )
325
- )
326
- return graphs
327
-
328
-
329
- def compute_filtration(dataset: str, filtration: str = "ALL", **kwargs):
330
- if filtration == "ALL":
331
- reset_graphs(dataset) # not necessary
332
- graphs, labels = get_graphs(dataset, **kwargs)
333
- graphs = compute_intrinsic(graphs, nowarning=True)
334
- graphs = compute_geodesic(graphs)
335
- graphs = compute_cc(graphs)
336
- graphs = compute_degree(graphs)
337
- graphs = compute_ricci(graphs)
338
- graphs = compute_fiedler(graphs)
339
- graphs = compute_hks(graphs, 10)
340
- set_graphs(graphs=graphs, labels=labels, dataset=dataset)
341
- return
342
- graphs, labels = get_graphs(dataset, **kwargs)
343
- if filtration == "dijkstra":
344
- return
345
- elif filtration == "cc":
346
- graphs = compute_cc(graphs)
347
- elif filtration == "degree":
348
- graphs = compute_degree(graphs)
349
- elif filtration == "ricciCurvature":
350
- graphs = compute_ricci(graphs)
351
- elif filtration == "fiedler":
352
- graphs = compute_fiedler(graphs)
353
- elif filtration == "geodesic":
354
- graphs = compute_geodesic(graphs)
355
- elif filtration.startswith("hks_"):
356
- # don't want do deal with floats, makes dots in title...
357
- t = int(filtration[4:])
358
- graphs = compute_hks(graphs=graphs, t=t)
359
- else:
360
- warn(f"Filtration {filtration} not implemented !")
361
- return
362
- set_graphs(graphs=graphs, labels=labels, dataset=dataset)
363
- return
364
-
365
-
366
- class Graph2SimplexTrees(BaseEstimator, TransformerMixin):
367
- """
368
- Transforms a list of networkx graphs into a list of simplextree multi
369
-
370
- Usual Filtrations
371
- -----------------
372
- - "cc" closeness centrality
373
- - "geodesic" if the graph provides data to compute it, e.g., BZR, COX2, PROTEINS
374
- - "degree"
375
- - "ricciCurvature" the ricci curvature
376
- - "fiedler" the square of the fiedler vector
377
- """
378
-
379
- def __init__(
380
- self,
381
- filtrations=[],
382
- delayed=False,
383
- num_collapses=100,
384
- progress: bool = False,
385
- ):
386
- super().__init__()
387
- self.filtrations = filtrations # filtration to search in graph
388
- self.delayed = delayed # reverses the filtration #TODO
389
- self.num_collapses = num_collapses
390
- self.progress = progress
391
- self.num_parameters: int = len(filtrations)
392
-
393
- def fit(self, X, y=None):
394
- if len(X) == 0:
395
- return self
396
- self.num_parameters = len(self.filtrations)
397
- if "intrinsic" in self.filtrations:
398
- intrinsic_size = len(X[0].nodes[0]["intrinsic"])
399
- self.num_parameters += intrinsic_size - 1
400
- return self
401
-
402
- def transform(self, X: list[nx.Graph]):
403
- def todo(graph, filtrations=self.filtrations) -> list[mp.SimplexTreeMulti]:
404
- st = mp.SimplexTreeMulti(num_parameters=self.num_parameters)
405
- nodes = np.asarray(graph.nodes, dtype=int).reshape(1, -1)
406
- nodes_filtrations = np.asarray(
407
- [
408
- [
409
- filtration
410
- for filtration_ in filtrations
411
- for filtration in np.asarray(
412
- graph.nodes[node][filtration_]
413
- ).reshape(-1)
414
- # this reshape is for attributes that are vectors
415
- ]
416
- for node in graph.nodes
417
- ],
418
- dtype=np.float32,
419
- )
420
- st.insert_batch(nodes, nodes_filtrations)
421
- edges = np.asarray(graph.edges, dtype=int).T
422
- edges_filtrations = np.asarray(
423
- [
424
- [
425
- filtration
426
- for filtration_ in filtrations
427
- for filtration in np.asarray(graph[u][v][filtration_]).reshape(
428
- -1
429
- )
430
- # this reshape is for attributes that are vectors
431
- ]
432
- for u, v in graph.edges
433
- ],
434
- dtype=np.float32,
435
- )
436
- st.insert_batch(edges, edges_filtrations)
437
- if st.num_parameters == 2:
438
- # TODO : wait for a filtration domination update
439
- st.collapse_edges(num=self.num_collapses)
440
- # st.make_filtration_non_decreasing() ## Ricci is not safe ...
441
- # same output for each pipelines, some have a supplementary axis.
442
- return [st]
443
-
444
- return (
445
- [delayed(todo)(graph) for graph in X]
446
- if self.delayed
447
- # memory bound imo
448
- else [
449
- todo(graph=graph)
450
- for graph in tqdm(
451
- X,
452
- desc="Computing simplextrees from graphs",
453
- disable=not self.progress,
454
- )
455
- ]
456
- # # ,
457
- # )
458
- # else Parallel(n_jobs=-1, prefer="threads")(
459
- # delayed(todo)(graph)
460
- # for graph in tqdm(
461
- # X,
462
- # desc="Computing simplextrees from graphs",
463
- # disable=not self.progress,
464
- # )
465
- # )
466
- )
1
+ import numpy as np
2
+ from os.path import expanduser, exists
3
+ import networkx as nx
4
+ from warnings import warn
5
+ import pickle
6
+ from joblib import Parallel, delayed
7
+ from tqdm import tqdm
8
+ from sklearn.preprocessing import LabelEncoder
9
+ from scipy.spatial import distance_matrix
10
+ from sklearn.base import BaseEstimator, TransformerMixin, clone
11
+ import multipers as mp
12
+ from typing import Iterable, List, Optional
13
+ from numpy.typing import ArrayLike
14
+
15
+ DATASET_PATH = expanduser("~/Datasets/")
16
+
17
+
18
+ def _check_installed(dataset: str):
19
+ from warnings import warn
20
+ from os.path import exists
21
+
22
+ assert dataset.startswith(
23
+ "graphs/"
24
+ ), "Graph datasets have to be of the form graphs/<name>"
25
+ if exists(DATASET_PATH + dataset):
26
+ return
27
+ else:
28
+ warn(
29
+ f"""
30
+ Dataset {dataset} not installed.
31
+ You can find it in https://networkrepository.com
32
+ Then (optinally) configure multipers.data.graphs.DATASET_PATH, which is currently
33
+ > {DATASET_PATH=}
34
+ and puts this dataset in $DATASET_PATH/{dataset}
35
+ """
36
+ )
37
+ raise ValueError("Unknown dataset.")
38
+
39
+
40
+ def get(dataset: str, filtration: Optional[str] = None):
41
+ if filtration is None:
42
+ return get_graphs(dataset)
43
+ graphs, labels = get_graphs(dataset)
44
+ try:
45
+ for g in graphs:
46
+ for node in g.nodes:
47
+ g.nodes[node][filtration]
48
+ except:
49
+ print(
50
+ f"Filtration {filtration} not computed, trying to compute it ...",
51
+ flush=True,
52
+ )
53
+ compute_filtration(dataset, filtration)
54
+ return get_graphs(dataset)
55
+
56
+
57
+ def get_from_file_old(dataset: str, label="lb"):
58
+ from os import walk
59
+ from scipy.io import loadmat
60
+ from warnings import warn
61
+
62
+ path = DATASET_PATH + dataset + "/mat/"
63
+ labels: list[int] = []
64
+ gs: list[nx.Graph] = []
65
+ for root, dir, files in walk(path):
66
+ for file in files:
67
+ file_ppties = file.split("_")
68
+ gid = file_ppties[5]
69
+ i = 0
70
+ while i + 1 < len(file_ppties) and file_ppties[i] != label:
71
+ i += 1
72
+ if i + 1 >= len(file_ppties):
73
+ warn(f"Cannot find label {label} on file {file}.")
74
+ else:
75
+ labels += [file_ppties[i + 1]]
76
+ adj_mat = np.array(loadmat(path + file)["A"], dtype=np.float32)
77
+ gs.append(nx.Graph(adj_mat))
78
+ return gs, labels
79
+
80
+
81
+ def get_from_file(dataset: str):
82
+ from os.path import expanduser, exists
83
+
84
+ path = DATASET_PATH + f"{dataset}/{dataset[7:]}."
85
+ try:
86
+ graphs_ids = np.loadtxt(path + "graph_idx")
87
+ except:
88
+ return get_from_file_old(dataset=dataset)
89
+ labels: list[int] = LabelEncoder().fit_transform(np.loadtxt(path + "graph_labels"))
90
+ edges = np.loadtxt(path + "edges", delimiter=",", dtype=int) - 1
91
+ has_intrinsic_filtration = exists(path + "node_attrs")
92
+ graphs: list[nx.Graph] = []
93
+ if has_intrinsic_filtration:
94
+ F = np.loadtxt(path + "node_attrs", delimiter=",")
95
+ for graph_id in tqdm(np.unique(graphs_ids), desc="Reading graphs from file"):
96
+ (nodes,) = np.where(graphs_ids == graph_id)
97
+
98
+ def graph_has_edge(u: int, v: int) -> bool:
99
+ if u in nodes or v in nodes:
100
+ assert u in nodes and v in nodes, f"Nodes\
101
+ {u} and {v} are not in the same graph"
102
+ return True
103
+ return False
104
+
105
+ graph_edges = [(u, v) for u, v in edges if graph_has_edge(u, v)]
106
+ g = nx.Graph(graph_edges)
107
+ if has_intrinsic_filtration:
108
+ node_attrs = {node: F[node] for node in nodes}
109
+ nx.set_node_attributes(g, node_attrs, "intrinsic")
110
+ graphs.append(g)
111
+ return graphs, labels
112
+
113
+
114
+ def get_graphs(dataset: str, N: int | str = "") -> tuple[list[nx.Graph], list[int]]:
115
+ _check_installed(dataset)
116
+ graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
117
+ labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
118
+ if not exists(graphs_path) or not exists(labels_path):
119
+ if dataset.startswith("3dshapes/"):
120
+ return get_from_file_old(
121
+ dataset,
122
+ )
123
+
124
+ graphs, labels = get_from_file(
125
+ dataset,
126
+ )
127
+ print("Saving graphs at :", graphs_path)
128
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
129
+ else:
130
+ graphs = pickle.load(open(graphs_path, "rb"))
131
+ labels = pickle.load(open(labels_path, "rb"))
132
+ from sklearn.preprocessing import LabelEncoder
133
+
134
+ return graphs, LabelEncoder().fit_transform(labels)
135
+
136
+
137
+ # saves graphs (and filtration values) into a file
138
+ def set_graphs(graphs: list[nx.Graph], labels: list, dataset: str, N: int | str = ""):
139
+ graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
140
+ labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
141
+ pickle.dump(graphs, open(graphs_path, "wb"))
142
+ pickle.dump(labels, open(labels_path, "wb"))
143
+ return
144
+
145
+
146
+ def reset_graphs(dataset: str, N=None): # Resets filtrations values on graphs
147
+ graphs, labels = get_from_file(dataset)
148
+ set_graphs(graphs, labels, dataset)
149
+ return
150
+
151
+
152
+ def compute_ricci(graphs: list[nx.Graph], alpha=0.5, progress=1):
153
+ from GraphRicciCurvature.OllivierRicci import OllivierRicci
154
+
155
+ def ricci(graph, alpha=alpha):
156
+ return OllivierRicci(graph, alpha=alpha).compute_ricci_curvature()
157
+
158
+ graphs = [
159
+ ricci(g) for g in tqdm(graphs, disable=not progress, desc="Computing ricci")
160
+ ]
161
+
162
+ def push_back_node(graph):
163
+ # for node in graph.nodes:
164
+ # graph.nodes[node]['ricciCurvature'] = np.min([graph[node][node2]['ricciCurvature'] for node2 in graph[node]] + [graph.nodes[node]['ricciCurvature']])
165
+ node_filtrations = {
166
+ node: -1
167
+ if len(graph[node]) == 0
168
+ else np.min([graph[node][node2]["ricciCurvature"] for node2 in graph[node]])
169
+ for node in graph.nodes
170
+ }
171
+ nx.set_node_attributes(graph, node_filtrations, "ricciCurvature")
172
+ return graph
173
+
174
+ graphs = [push_back_node(g) for g in graphs]
175
+ return graphs
176
+
177
+
178
+ def compute_cc(graphs: list[nx.Graph], progress=1):
179
+ def _cc(g):
180
+ cc = nx.closeness_centrality(g)
181
+ nx.set_node_attributes(g, cc, "cc")
182
+ edges_cc = {(u, v): max(cc[u], cc[v]) for u, v in g.edges}
183
+ nx.set_edge_attributes(g, edges_cc, "cc")
184
+ return g
185
+
186
+ graphs = Parallel(n_jobs=1, prefer="threads")(
187
+ delayed(_cc)(g) for g in tqdm(graphs, disable=not progress, desc="Computing cc")
188
+ )
189
+ return graphs
190
+ # for g in tqdm(graphs, desc="Computing cc"):
191
+ # _cc(g)
192
+ # return graphs
193
+
194
+
195
+ def compute_degree(graphs: list[nx.Graph], progress=1):
196
+ def _degree(g):
197
+ degrees = {i: 1.1 if degree == 0 else 1 / degree for i, degree in g.degree}
198
+ nx.set_node_attributes(g, degrees, "degree")
199
+ edges_dg = {(u, v): max(degrees[u], degrees[v]) for u, v in g.edges}
200
+ nx.set_edge_attributes(g, edges_dg, "degree")
201
+ return g
202
+
203
+ graphs = Parallel(n_jobs=1, prefer="threads")(
204
+ delayed(_degree)(g)
205
+ for g in tqdm(graphs, disable=not progress, desc="Computing degree")
206
+ )
207
+ return graphs
208
+ # for g in tqdm(graphs, desc="Computing degree"):
209
+ # _degree(g)
210
+ # return graphs
211
+
212
+
213
+ # TODO : make it compatible with non-connexe graphs
214
+ def compute_fiedler(graphs: list[nx.Graph], progress=1):
215
+ def _fiedler(g):
216
+ connected_graphs = [
217
+ nx.subgraph(g, nodes) for nodes in nx.connected_components(g)
218
+ ]
219
+ fiedler_vectors = [
220
+ nx.fiedler_vector(g) ** 2
221
+ if g.number_of_nodes() > 2
222
+ else np.zeros(
223
+ g.number_of_nodes(
224
+ # order of nx.fiedler_vector correspond to nx.laplacian -> g.nodes
225
+ )
226
+ )
227
+ for g in connected_graphs
228
+ ]
229
+ fiedler_dict = {
230
+ node: fiedler_vector[node_index]
231
+ for g, fiedler_vector in zip(connected_graphs, fiedler_vectors)
232
+ for node_index, node in enumerate(list(g.nodes))
233
+ }
234
+ nx.set_node_attributes(g, fiedler_dict, "fiedler")
235
+ edges_fiedler = {
236
+ (u, v): max(fiedler_dict[u], fiedler_dict[v]) for u, v in g.edges
237
+ }
238
+ nx.set_edge_attributes(g, edges_fiedler, "fiedler")
239
+ return g
240
+
241
+ graphs = Parallel(n_jobs=1, prefer="threads")(
242
+ delayed(_fiedler)(g)
243
+ for g in tqdm(graphs, disable=not progress, desc="Computing fiedler")
244
+ )
245
+ return graphs
246
+ # for g in tqdm(graphs, desc="Computing fiedler"):
247
+ # _fiedler(g)
248
+ # return graphs
249
+
250
+
251
+ def compute_hks(graphs: list[nx.Graph], t: float, progress=1):
252
+ def _hks(g: nx.Graph):
253
+ w, vps = np.linalg.eig(
254
+ nx.laplacianmatrix.normalized_laplacian_matrix(
255
+ g, nodelist=g.nodes()
256
+ ).toarray()
257
+ ) # order is given by g.nodes order
258
+ w = w.view(dtype=float)
259
+ vps = vps.view(dtype=float)
260
+ node_hks = {
261
+ node: np.sum(np.exp(-t * w) * np.square(vps[node_index, :]))
262
+ for node_index, node in enumerate(g.nodes)
263
+ }
264
+ nx.set_node_attributes(g, node_hks, f"hks_{t}")
265
+ edges_hks = {(u, v): max(node_hks[u], node_hks[v]) for u, v in g.edges}
266
+ nx.set_edge_attributes(g, edges_hks, f"hks_{t}")
267
+ return g
268
+
269
+ graphs = Parallel(n_jobs=1, prefer="threads")(
270
+ delayed(_hks)(g)
271
+ for g in tqdm(graphs, disable=not progress, desc=f"Computing hks_{t}")
272
+ )
273
+ return graphs
274
+
275
+
276
+ def compute_geodesic(graphs: list[nx.Graph], progress=1):
277
+ def _f(g: nx.Graph):
278
+ try:
279
+ nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
280
+ except:
281
+ warn(
282
+ "This graph doesn't have an intrinsic filtration, will use 0 instead ..."
283
+ )
284
+ nodes_intrinsic = {i: 0 for i, n in g.nodes.data()}
285
+ # return g
286
+ node_geodesic = {i: 0 for i in g.nodes}
287
+ nx.set_node_attributes(g, node_geodesic, f"geodesic")
288
+ edges_geodesic = {
289
+ (u, v): np.linalg.norm(nodes_intrinsic[u] - nodes_intrinsic[v])
290
+ for u, v in g.edges
291
+ }
292
+ nx.set_edge_attributes(g, edges_geodesic, f"geodesic")
293
+ return g
294
+
295
+ graphs = Parallel(n_jobs=1, prefer="threads")(
296
+ delayed(_f)(g)
297
+ for g in tqdm(
298
+ graphs, disable=not progress, desc=f"Computing geodesic distances on graphs"
299
+ )
300
+ )
301
+ return graphs
302
+
303
+
304
+ def compute_intrinsic(graphs: list[nx.Graph], progress=1, nowarning=False):
305
+ def _f(g: nx.Graph):
306
+ try:
307
+ nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
308
+ except:
309
+ if not nowarning:
310
+ raise ValueError("This graph doesn't have an intrinsic filtration.")
311
+ else:
312
+ return g
313
+ edges_intrinsic = {
314
+ (u, v): np.max([nodes_intrinsic[u], nodes_intrinsic[v]], axis=0)
315
+ for u, v in g.edges
316
+ }
317
+ nx.set_edge_attributes(g, edges_intrinsic, "intrinsic")
318
+ return g
319
+
320
+ graphs = Parallel(n_jobs=1, prefer="threads")(
321
+ delayed(_f)(g)
322
+ for g in tqdm(
323
+ graphs, disable=not progress, desc="Computing geodesic distances on graphs"
324
+ )
325
+ )
326
+ return graphs
327
+
328
+
329
+ def compute_filtration(dataset: str, filtration: str = "ALL", **kwargs):
330
+ if filtration == "ALL":
331
+ reset_graphs(dataset) # not necessary
332
+ graphs, labels = get_graphs(dataset, **kwargs)
333
+ graphs = compute_intrinsic(graphs, nowarning=True)
334
+ graphs = compute_geodesic(graphs)
335
+ graphs = compute_cc(graphs)
336
+ graphs = compute_degree(graphs)
337
+ graphs = compute_ricci(graphs)
338
+ graphs = compute_fiedler(graphs)
339
+ graphs = compute_hks(graphs, 10)
340
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
341
+ return
342
+ graphs, labels = get_graphs(dataset, **kwargs)
343
+ if filtration == "dijkstra":
344
+ return
345
+ elif filtration == "cc":
346
+ graphs = compute_cc(graphs)
347
+ elif filtration == "degree":
348
+ graphs = compute_degree(graphs)
349
+ elif filtration == "ricciCurvature":
350
+ graphs = compute_ricci(graphs)
351
+ elif filtration == "fiedler":
352
+ graphs = compute_fiedler(graphs)
353
+ elif filtration == "geodesic":
354
+ graphs = compute_geodesic(graphs)
355
+ elif filtration.startswith("hks_"):
356
+ # don't want do deal with floats, makes dots in title...
357
+ t = int(filtration[4:])
358
+ graphs = compute_hks(graphs=graphs, t=t)
359
+ else:
360
+ warn(f"Filtration {filtration} not implemented !")
361
+ return
362
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
363
+ return
364
+
365
+
366
+ class Graph2SimplexTrees(BaseEstimator, TransformerMixin):
367
+ """
368
+ Transforms a list of networkx graphs into a list of simplextree multi
369
+
370
+ Usual Filtrations
371
+ -----------------
372
+ - "cc" closeness centrality
373
+ - "geodesic" if the graph provides data to compute it, e.g., BZR, COX2, PROTEINS
374
+ - "degree"
375
+ - "ricciCurvature" the ricci curvature
376
+ - "fiedler" the square of the fiedler vector
377
+ """
378
+
379
+ def __init__(
380
+ self,
381
+ filtrations=[],
382
+ delayed=False,
383
+ num_collapses=100,
384
+ progress: bool = False,
385
+ ):
386
+ super().__init__()
387
+ self.filtrations = filtrations # filtration to search in graph
388
+ self.delayed = delayed # reverses the filtration #TODO
389
+ self.num_collapses = num_collapses
390
+ self.progress = progress
391
+ self.num_parameters: int = len(filtrations)
392
+
393
+ def fit(self, X, y=None):
394
+ if len(X) == 0:
395
+ return self
396
+ self.num_parameters = len(self.filtrations)
397
+ if "intrinsic" in self.filtrations:
398
+ intrinsic_size = len(X[0].nodes[0]["intrinsic"])
399
+ self.num_parameters += intrinsic_size - 1
400
+ return self
401
+
402
+ def transform(self, X: list[nx.Graph]):
403
+ def todo(graph, filtrations=self.filtrations) -> list[mp.SimplexTreeMulti]:
404
+ st = mp.SimplexTreeMulti(num_parameters=self.num_parameters)
405
+ nodes = np.asarray(graph.nodes, dtype=int).reshape(1, -1)
406
+ nodes_filtrations = np.asarray(
407
+ [
408
+ [
409
+ filtration
410
+ for filtration_ in filtrations
411
+ for filtration in np.asarray(
412
+ graph.nodes[node][filtration_]
413
+ ).reshape(-1)
414
+ # this reshape is for attributes that are vectors
415
+ ]
416
+ for node in graph.nodes
417
+ ],
418
+ dtype=np.float32,
419
+ )
420
+ st.insert_batch(nodes, nodes_filtrations)
421
+ edges = np.asarray(graph.edges, dtype=int).T
422
+ edges_filtrations = np.asarray(
423
+ [
424
+ [
425
+ filtration
426
+ for filtration_ in filtrations
427
+ for filtration in np.asarray(graph[u][v][filtration_]).reshape(
428
+ -1
429
+ )
430
+ # this reshape is for attributes that are vectors
431
+ ]
432
+ for u, v in graph.edges
433
+ ],
434
+ dtype=np.float32,
435
+ )
436
+ st.insert_batch(edges, edges_filtrations)
437
+ if st.num_parameters == 2:
438
+ # TODO : wait for a filtration domination update
439
+ st.collapse_edges(num=self.num_collapses)
440
+ # st.make_filtration_non_decreasing() ## Ricci is not safe ...
441
+ # same output for each pipelines, some have a supplementary axis.
442
+ return [st]
443
+
444
+ return (
445
+ [delayed(todo)(graph) for graph in X]
446
+ if self.delayed
447
+ # memory bound imo
448
+ else [
449
+ todo(graph=graph)
450
+ for graph in tqdm(
451
+ X,
452
+ desc="Computing simplextrees from graphs",
453
+ disable=not self.progress,
454
+ )
455
+ ]
456
+ # # ,
457
+ # )
458
+ # else Parallel(n_jobs=-1, prefer="threads")(
459
+ # delayed(todo)(graph)
460
+ # for graph in tqdm(
461
+ # X,
462
+ # desc="Computing simplextrees from graphs",
463
+ # disable=not self.progress,
464
+ # )
465
+ # )
466
+ )