multipers 2.2.3__cp311-cp311-win_amd64.whl → 2.3.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of multipers might be problematic. Click here for more details.

Files changed (182) hide show
  1. multipers/__init__.py +33 -31
  2. multipers/_signed_measure_meta.py +430 -430
  3. multipers/_slicer_meta.py +211 -212
  4. multipers/data/MOL2.py +458 -458
  5. multipers/data/UCR.py +18 -18
  6. multipers/data/graphs.py +466 -466
  7. multipers/data/immuno_regions.py +27 -27
  8. multipers/data/pytorch2simplextree.py +90 -90
  9. multipers/data/shape3d.py +101 -101
  10. multipers/data/synthetic.py +113 -111
  11. multipers/distances.py +198 -198
  12. multipers/filtration_conversions.pxd.tp +84 -84
  13. multipers/filtrations/__init__.py +18 -0
  14. multipers/filtrations/filtrations.py +289 -0
  15. multipers/filtrations.pxd +224 -224
  16. multipers/function_rips.cp311-win_amd64.pyd +0 -0
  17. multipers/function_rips.pyx +105 -105
  18. multipers/grids.cp311-win_amd64.pyd +0 -0
  19. multipers/grids.pyx +350 -350
  20. multipers/gudhi/Persistence_slices_interface.h +132 -132
  21. multipers/gudhi/Simplex_tree_interface.h +239 -245
  22. multipers/gudhi/Simplex_tree_multi_interface.h +516 -561
  23. multipers/gudhi/cubical_to_boundary.h +59 -59
  24. multipers/gudhi/gudhi/Bitmap_cubical_complex.h +450 -450
  25. multipers/gudhi/gudhi/Bitmap_cubical_complex_base.h +1070 -1070
  26. multipers/gudhi/gudhi/Bitmap_cubical_complex_periodic_boundary_conditions_base.h +579 -579
  27. multipers/gudhi/gudhi/Debug_utils.h +45 -45
  28. multipers/gudhi/gudhi/Fields/Multi_field.h +484 -484
  29. multipers/gudhi/gudhi/Fields/Multi_field_operators.h +455 -455
  30. multipers/gudhi/gudhi/Fields/Multi_field_shared.h +450 -450
  31. multipers/gudhi/gudhi/Fields/Multi_field_small.h +531 -531
  32. multipers/gudhi/gudhi/Fields/Multi_field_small_operators.h +507 -507
  33. multipers/gudhi/gudhi/Fields/Multi_field_small_shared.h +531 -531
  34. multipers/gudhi/gudhi/Fields/Z2_field.h +355 -355
  35. multipers/gudhi/gudhi/Fields/Z2_field_operators.h +376 -376
  36. multipers/gudhi/gudhi/Fields/Zp_field.h +420 -420
  37. multipers/gudhi/gudhi/Fields/Zp_field_operators.h +400 -400
  38. multipers/gudhi/gudhi/Fields/Zp_field_shared.h +418 -418
  39. multipers/gudhi/gudhi/Flag_complex_edge_collapser.h +337 -337
  40. multipers/gudhi/gudhi/Matrix.h +2107 -2107
  41. multipers/gudhi/gudhi/Multi_critical_filtration.h +1038 -1038
  42. multipers/gudhi/gudhi/Multi_persistence/Box.h +171 -171
  43. multipers/gudhi/gudhi/Multi_persistence/Line.h +282 -282
  44. multipers/gudhi/gudhi/Off_reader.h +173 -173
  45. multipers/gudhi/gudhi/One_critical_filtration.h +1432 -1431
  46. multipers/gudhi/gudhi/Persistence_matrix/Base_matrix.h +769 -769
  47. multipers/gudhi/gudhi/Persistence_matrix/Base_matrix_with_column_compression.h +686 -686
  48. multipers/gudhi/gudhi/Persistence_matrix/Boundary_matrix.h +842 -842
  49. multipers/gudhi/gudhi/Persistence_matrix/Chain_matrix.h +1350 -1350
  50. multipers/gudhi/gudhi/Persistence_matrix/Id_to_index_overlay.h +1105 -1105
  51. multipers/gudhi/gudhi/Persistence_matrix/Position_to_index_overlay.h +859 -859
  52. multipers/gudhi/gudhi/Persistence_matrix/RU_matrix.h +910 -910
  53. multipers/gudhi/gudhi/Persistence_matrix/allocators/entry_constructors.h +139 -139
  54. multipers/gudhi/gudhi/Persistence_matrix/base_pairing.h +230 -230
  55. multipers/gudhi/gudhi/Persistence_matrix/base_swap.h +211 -211
  56. multipers/gudhi/gudhi/Persistence_matrix/boundary_cell_position_to_id_mapper.h +60 -60
  57. multipers/gudhi/gudhi/Persistence_matrix/boundary_face_position_to_id_mapper.h +60 -60
  58. multipers/gudhi/gudhi/Persistence_matrix/chain_pairing.h +136 -136
  59. multipers/gudhi/gudhi/Persistence_matrix/chain_rep_cycles.h +190 -190
  60. multipers/gudhi/gudhi/Persistence_matrix/chain_vine_swap.h +616 -616
  61. multipers/gudhi/gudhi/Persistence_matrix/columns/chain_column_extra_properties.h +150 -150
  62. multipers/gudhi/gudhi/Persistence_matrix/columns/column_dimension_holder.h +106 -106
  63. multipers/gudhi/gudhi/Persistence_matrix/columns/column_utilities.h +219 -219
  64. multipers/gudhi/gudhi/Persistence_matrix/columns/entry_types.h +327 -327
  65. multipers/gudhi/gudhi/Persistence_matrix/columns/heap_column.h +1140 -1140
  66. multipers/gudhi/gudhi/Persistence_matrix/columns/intrusive_list_column.h +934 -934
  67. multipers/gudhi/gudhi/Persistence_matrix/columns/intrusive_set_column.h +934 -934
  68. multipers/gudhi/gudhi/Persistence_matrix/columns/list_column.h +980 -980
  69. multipers/gudhi/gudhi/Persistence_matrix/columns/naive_vector_column.h +1092 -1092
  70. multipers/gudhi/gudhi/Persistence_matrix/columns/row_access.h +192 -192
  71. multipers/gudhi/gudhi/Persistence_matrix/columns/set_column.h +921 -921
  72. multipers/gudhi/gudhi/Persistence_matrix/columns/small_vector_column.h +1093 -1093
  73. multipers/gudhi/gudhi/Persistence_matrix/columns/unordered_set_column.h +1012 -1012
  74. multipers/gudhi/gudhi/Persistence_matrix/columns/vector_column.h +1244 -1244
  75. multipers/gudhi/gudhi/Persistence_matrix/matrix_dimension_holders.h +186 -186
  76. multipers/gudhi/gudhi/Persistence_matrix/matrix_row_access.h +164 -164
  77. multipers/gudhi/gudhi/Persistence_matrix/ru_pairing.h +156 -156
  78. multipers/gudhi/gudhi/Persistence_matrix/ru_rep_cycles.h +376 -376
  79. multipers/gudhi/gudhi/Persistence_matrix/ru_vine_swap.h +540 -540
  80. multipers/gudhi/gudhi/Persistent_cohomology/Field_Zp.h +118 -118
  81. multipers/gudhi/gudhi/Persistent_cohomology/Multi_field.h +173 -173
  82. multipers/gudhi/gudhi/Persistent_cohomology/Persistent_cohomology_column.h +128 -128
  83. multipers/gudhi/gudhi/Persistent_cohomology.h +745 -745
  84. multipers/gudhi/gudhi/Points_off_io.h +171 -171
  85. multipers/gudhi/gudhi/Simple_object_pool.h +69 -69
  86. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_iterators.h +463 -463
  87. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_node_explicit_storage.h +83 -83
  88. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_siblings.h +106 -106
  89. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_star_simplex_iterators.h +277 -277
  90. multipers/gudhi/gudhi/Simplex_tree/hooks_simplex_base.h +62 -62
  91. multipers/gudhi/gudhi/Simplex_tree/indexing_tag.h +27 -27
  92. multipers/gudhi/gudhi/Simplex_tree/serialization_utils.h +62 -62
  93. multipers/gudhi/gudhi/Simplex_tree/simplex_tree_options.h +157 -157
  94. multipers/gudhi/gudhi/Simplex_tree.h +2794 -2794
  95. multipers/gudhi/gudhi/Simplex_tree_multi.h +152 -163
  96. multipers/gudhi/gudhi/distance_functions.h +62 -62
  97. multipers/gudhi/gudhi/graph_simplicial_complex.h +104 -104
  98. multipers/gudhi/gudhi/persistence_interval.h +253 -253
  99. multipers/gudhi/gudhi/persistence_matrix_options.h +170 -170
  100. multipers/gudhi/gudhi/reader_utils.h +367 -367
  101. multipers/gudhi/mma_interface_coh.h +256 -255
  102. multipers/gudhi/mma_interface_h0.h +223 -231
  103. multipers/gudhi/mma_interface_matrix.h +284 -282
  104. multipers/gudhi/naive_merge_tree.h +536 -575
  105. multipers/gudhi/scc_io.h +310 -289
  106. multipers/gudhi/truc.h +890 -888
  107. multipers/io.cp311-win_amd64.pyd +0 -0
  108. multipers/io.pyx +711 -711
  109. multipers/ml/accuracies.py +90 -90
  110. multipers/ml/convolutions.py +520 -520
  111. multipers/ml/invariants_with_persistable.py +79 -79
  112. multipers/ml/kernels.py +176 -176
  113. multipers/ml/mma.py +713 -714
  114. multipers/ml/one.py +472 -472
  115. multipers/ml/point_clouds.py +352 -346
  116. multipers/ml/signed_measures.py +1589 -1589
  117. multipers/ml/sliced_wasserstein.py +461 -461
  118. multipers/ml/tools.py +113 -113
  119. multipers/mma_structures.cp311-win_amd64.pyd +0 -0
  120. multipers/mma_structures.pxd +127 -127
  121. multipers/mma_structures.pyx +4 -4
  122. multipers/mma_structures.pyx.tp +1085 -1085
  123. multipers/multi_parameter_rank_invariant/diff_helpers.h +84 -93
  124. multipers/multi_parameter_rank_invariant/euler_characteristic.h +97 -97
  125. multipers/multi_parameter_rank_invariant/function_rips.h +322 -322
  126. multipers/multi_parameter_rank_invariant/hilbert_function.h +769 -769
  127. multipers/multi_parameter_rank_invariant/persistence_slices.h +148 -148
  128. multipers/multi_parameter_rank_invariant/rank_invariant.h +369 -369
  129. multipers/multiparameter_edge_collapse.py +41 -41
  130. multipers/multiparameter_module_approximation/approximation.h +2296 -2295
  131. multipers/multiparameter_module_approximation/combinatory.h +129 -129
  132. multipers/multiparameter_module_approximation/debug.h +107 -107
  133. multipers/multiparameter_module_approximation/format_python-cpp.h +286 -286
  134. multipers/multiparameter_module_approximation/heap_column.h +238 -238
  135. multipers/multiparameter_module_approximation/images.h +79 -79
  136. multipers/multiparameter_module_approximation/list_column.h +174 -174
  137. multipers/multiparameter_module_approximation/list_column_2.h +232 -232
  138. multipers/multiparameter_module_approximation/ru_matrix.h +347 -347
  139. multipers/multiparameter_module_approximation/set_column.h +135 -135
  140. multipers/multiparameter_module_approximation/structure_higher_dim_barcode.h +36 -36
  141. multipers/multiparameter_module_approximation/unordered_set_column.h +166 -166
  142. multipers/multiparameter_module_approximation/utilities.h +403 -419
  143. multipers/multiparameter_module_approximation/vector_column.h +223 -223
  144. multipers/multiparameter_module_approximation/vector_matrix.h +331 -331
  145. multipers/multiparameter_module_approximation/vineyards.h +464 -464
  146. multipers/multiparameter_module_approximation/vineyards_trajectories.h +649 -649
  147. multipers/multiparameter_module_approximation.cp311-win_amd64.pyd +0 -0
  148. multipers/multiparameter_module_approximation.pyx +216 -217
  149. multipers/pickle.py +90 -53
  150. multipers/plots.py +342 -334
  151. multipers/point_measure.cp311-win_amd64.pyd +0 -0
  152. multipers/point_measure.pyx +322 -320
  153. multipers/simplex_tree_multi.cp311-win_amd64.pyd +0 -0
  154. multipers/simplex_tree_multi.pxd +133 -133
  155. multipers/simplex_tree_multi.pyx +18 -15
  156. multipers/simplex_tree_multi.pyx.tp +1939 -1935
  157. multipers/slicer.cp311-win_amd64.pyd +0 -0
  158. multipers/slicer.pxd +81 -20
  159. multipers/slicer.pxd.tp +215 -214
  160. multipers/slicer.pyx +1091 -308
  161. multipers/slicer.pyx.tp +924 -914
  162. multipers/tensor/tensor.h +672 -672
  163. multipers/tensor.pxd +13 -13
  164. multipers/test.pyx +44 -44
  165. multipers/tests/__init__.py +57 -57
  166. multipers/torch/diff_grids.py +217 -217
  167. multipers/torch/rips_density.py +310 -304
  168. {multipers-2.2.3.dist-info → multipers-2.3.0.dist-info}/LICENSE +21 -21
  169. {multipers-2.2.3.dist-info → multipers-2.3.0.dist-info}/METADATA +21 -11
  170. multipers-2.3.0.dist-info/RECORD +182 -0
  171. multipers/tests/test_diff_helper.py +0 -73
  172. multipers/tests/test_hilbert_function.py +0 -82
  173. multipers/tests/test_mma.py +0 -83
  174. multipers/tests/test_point_clouds.py +0 -49
  175. multipers/tests/test_python-cpp_conversion.py +0 -82
  176. multipers/tests/test_signed_betti.py +0 -181
  177. multipers/tests/test_signed_measure.py +0 -89
  178. multipers/tests/test_simplextreemulti.py +0 -221
  179. multipers/tests/test_slicer.py +0 -221
  180. multipers-2.2.3.dist-info/RECORD +0 -189
  181. {multipers-2.2.3.dist-info → multipers-2.3.0.dist-info}/WHEEL +0 -0
  182. {multipers-2.2.3.dist-info → multipers-2.3.0.dist-info}/top_level.txt +0 -0
multipers/data/graphs.py CHANGED
@@ -1,466 +1,466 @@
1
- import numpy as np
2
- from os.path import expanduser, exists
3
- import networkx as nx
4
- from warnings import warn
5
- import pickle
6
- from joblib import Parallel, delayed
7
- from tqdm import tqdm
8
- from sklearn.preprocessing import LabelEncoder
9
- from scipy.spatial import distance_matrix
10
- from sklearn.base import BaseEstimator, TransformerMixin, clone
11
- import multipers as mp
12
- from typing import Iterable, List, Optional
13
- from numpy.typing import ArrayLike
14
-
15
- DATASET_PATH = expanduser("~/Datasets/")
16
-
17
-
18
- def _check_installed(dataset: str):
19
- from warnings import warn
20
- from os.path import exists
21
-
22
- assert dataset.startswith(
23
- "graphs/"
24
- ), "Graph datasets have to be of the form graphs/<name>"
25
- if exists(DATASET_PATH + dataset):
26
- return
27
- else:
28
- warn(
29
- f"""
30
- Dataset {dataset} not installed.
31
- You can find it in https://networkrepository.com
32
- Then (optinally) configure multipers.data.graphs.DATASET_PATH, which is currently
33
- > {DATASET_PATH=}
34
- and puts this dataset in $DATASET_PATH/{dataset}
35
- """
36
- )
37
- raise ValueError("Unknown dataset.")
38
-
39
-
40
- def get(dataset: str, filtration: Optional[str] = None):
41
- if filtration is None:
42
- return get_graphs(dataset)
43
- graphs, labels = get_graphs(dataset)
44
- try:
45
- for g in graphs:
46
- for node in g.nodes:
47
- g.nodes[node][filtration]
48
- except:
49
- print(
50
- f"Filtration {filtration} not computed, trying to compute it ...",
51
- flush=True,
52
- )
53
- compute_filtration(dataset, filtration)
54
- return get_graphs(dataset)
55
-
56
-
57
- def get_from_file_old(dataset: str, label="lb"):
58
- from os import walk
59
- from scipy.io import loadmat
60
- from warnings import warn
61
-
62
- path = DATASET_PATH + dataset + "/mat/"
63
- labels: list[int] = []
64
- gs: list[nx.Graph] = []
65
- for root, dir, files in walk(path):
66
- for file in files:
67
- file_ppties = file.split("_")
68
- gid = file_ppties[5]
69
- i = 0
70
- while i + 1 < len(file_ppties) and file_ppties[i] != label:
71
- i += 1
72
- if i + 1 >= len(file_ppties):
73
- warn(f"Cannot find label {label} on file {file}.")
74
- else:
75
- labels += [file_ppties[i + 1]]
76
- adj_mat = np.array(loadmat(path + file)["A"], dtype=np.float32)
77
- gs.append(nx.Graph(adj_mat))
78
- return gs, labels
79
-
80
-
81
- def get_from_file(dataset: str):
82
- from os.path import expanduser, exists
83
-
84
- path = DATASET_PATH + f"{dataset}/{dataset[7:]}."
85
- try:
86
- graphs_ids = np.loadtxt(path + "graph_idx")
87
- except:
88
- return get_from_file_old(dataset=dataset)
89
- labels: list[int] = LabelEncoder().fit_transform(np.loadtxt(path + "graph_labels"))
90
- edges = np.loadtxt(path + "edges", delimiter=",", dtype=int) - 1
91
- has_intrinsic_filtration = exists(path + "node_attrs")
92
- graphs: list[nx.Graph] = []
93
- if has_intrinsic_filtration:
94
- F = np.loadtxt(path + "node_attrs", delimiter=",")
95
- for graph_id in tqdm(np.unique(graphs_ids), desc="Reading graphs from file"):
96
- (nodes,) = np.where(graphs_ids == graph_id)
97
-
98
- def graph_has_edge(u: int, v: int) -> bool:
99
- if u in nodes or v in nodes:
100
- assert u in nodes and v in nodes, f"Nodes\
101
- {u} and {v} are not in the same graph"
102
- return True
103
- return False
104
-
105
- graph_edges = [(u, v) for u, v in edges if graph_has_edge(u, v)]
106
- g = nx.Graph(graph_edges)
107
- if has_intrinsic_filtration:
108
- node_attrs = {node: F[node] for node in nodes}
109
- nx.set_node_attributes(g, node_attrs, "intrinsic")
110
- graphs.append(g)
111
- return graphs, labels
112
-
113
-
114
- def get_graphs(dataset: str, N: int | str = "") -> tuple[list[nx.Graph], list[int]]:
115
- _check_installed(dataset)
116
- graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
117
- labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
118
- if not exists(graphs_path) or not exists(labels_path):
119
- if dataset.startswith("3dshapes/"):
120
- return get_from_file_old(
121
- dataset,
122
- )
123
-
124
- graphs, labels = get_from_file(
125
- dataset,
126
- )
127
- print("Saving graphs at :", graphs_path)
128
- set_graphs(graphs=graphs, labels=labels, dataset=dataset)
129
- else:
130
- graphs = pickle.load(open(graphs_path, "rb"))
131
- labels = pickle.load(open(labels_path, "rb"))
132
- from sklearn.preprocessing import LabelEncoder
133
-
134
- return graphs, LabelEncoder().fit_transform(labels)
135
-
136
-
137
- # saves graphs (and filtration values) into a file
138
- def set_graphs(graphs: list[nx.Graph], labels: list, dataset: str, N: int | str = ""):
139
- graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
140
- labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
141
- pickle.dump(graphs, open(graphs_path, "wb"))
142
- pickle.dump(labels, open(labels_path, "wb"))
143
- return
144
-
145
-
146
- def reset_graphs(dataset: str, N=None): # Resets filtrations values on graphs
147
- graphs, labels = get_from_file(dataset)
148
- set_graphs(graphs, labels, dataset)
149
- return
150
-
151
-
152
- def compute_ricci(graphs: list[nx.Graph], alpha=0.5, progress=1):
153
- from GraphRicciCurvature.OllivierRicci import OllivierRicci
154
-
155
- def ricci(graph, alpha=alpha):
156
- return OllivierRicci(graph, alpha=alpha).compute_ricci_curvature()
157
-
158
- graphs = [
159
- ricci(g) for g in tqdm(graphs, disable=not progress, desc="Computing ricci")
160
- ]
161
-
162
- def push_back_node(graph):
163
- # for node in graph.nodes:
164
- # graph.nodes[node]['ricciCurvature'] = np.min([graph[node][node2]['ricciCurvature'] for node2 in graph[node]] + [graph.nodes[node]['ricciCurvature']])
165
- node_filtrations = {
166
- node: -1
167
- if len(graph[node]) == 0
168
- else np.min([graph[node][node2]["ricciCurvature"] for node2 in graph[node]])
169
- for node in graph.nodes
170
- }
171
- nx.set_node_attributes(graph, node_filtrations, "ricciCurvature")
172
- return graph
173
-
174
- graphs = [push_back_node(g) for g in graphs]
175
- return graphs
176
-
177
-
178
- def compute_cc(graphs: list[nx.Graph], progress=1):
179
- def _cc(g):
180
- cc = nx.closeness_centrality(g)
181
- nx.set_node_attributes(g, cc, "cc")
182
- edges_cc = {(u, v): max(cc[u], cc[v]) for u, v in g.edges}
183
- nx.set_edge_attributes(g, edges_cc, "cc")
184
- return g
185
-
186
- graphs = Parallel(n_jobs=1, prefer="threads")(
187
- delayed(_cc)(g) for g in tqdm(graphs, disable=not progress, desc="Computing cc")
188
- )
189
- return graphs
190
- # for g in tqdm(graphs, desc="Computing cc"):
191
- # _cc(g)
192
- # return graphs
193
-
194
-
195
- def compute_degree(graphs: list[nx.Graph], progress=1):
196
- def _degree(g):
197
- degrees = {i: 1.1 if degree == 0 else 1 / degree for i, degree in g.degree}
198
- nx.set_node_attributes(g, degrees, "degree")
199
- edges_dg = {(u, v): max(degrees[u], degrees[v]) for u, v in g.edges}
200
- nx.set_edge_attributes(g, edges_dg, "degree")
201
- return g
202
-
203
- graphs = Parallel(n_jobs=1, prefer="threads")(
204
- delayed(_degree)(g)
205
- for g in tqdm(graphs, disable=not progress, desc="Computing degree")
206
- )
207
- return graphs
208
- # for g in tqdm(graphs, desc="Computing degree"):
209
- # _degree(g)
210
- # return graphs
211
-
212
-
213
- # TODO : make it compatible with non-connexe graphs
214
- def compute_fiedler(graphs: list[nx.Graph], progress=1):
215
- def _fiedler(g):
216
- connected_graphs = [
217
- nx.subgraph(g, nodes) for nodes in nx.connected_components(g)
218
- ]
219
- fiedler_vectors = [
220
- nx.fiedler_vector(g) ** 2
221
- if g.number_of_nodes() > 2
222
- else np.zeros(
223
- g.number_of_nodes(
224
- # order of nx.fiedler_vector correspond to nx.laplacian -> g.nodes
225
- )
226
- )
227
- for g in connected_graphs
228
- ]
229
- fiedler_dict = {
230
- node: fiedler_vector[node_index]
231
- for g, fiedler_vector in zip(connected_graphs, fiedler_vectors)
232
- for node_index, node in enumerate(list(g.nodes))
233
- }
234
- nx.set_node_attributes(g, fiedler_dict, "fiedler")
235
- edges_fiedler = {
236
- (u, v): max(fiedler_dict[u], fiedler_dict[v]) for u, v in g.edges
237
- }
238
- nx.set_edge_attributes(g, edges_fiedler, "fiedler")
239
- return g
240
-
241
- graphs = Parallel(n_jobs=1, prefer="threads")(
242
- delayed(_fiedler)(g)
243
- for g in tqdm(graphs, disable=not progress, desc="Computing fiedler")
244
- )
245
- return graphs
246
- # for g in tqdm(graphs, desc="Computing fiedler"):
247
- # _fiedler(g)
248
- # return graphs
249
-
250
-
251
- def compute_hks(graphs: list[nx.Graph], t: float, progress=1):
252
- def _hks(g: nx.Graph):
253
- w, vps = np.linalg.eig(
254
- nx.laplacianmatrix.normalized_laplacian_matrix(
255
- g, nodelist=g.nodes()
256
- ).toarray()
257
- ) # order is given by g.nodes order
258
- w = w.view(dtype=float)
259
- vps = vps.view(dtype=float)
260
- node_hks = {
261
- node: np.sum(np.exp(-t * w) * np.square(vps[node_index, :]))
262
- for node_index, node in enumerate(g.nodes)
263
- }
264
- nx.set_node_attributes(g, node_hks, f"hks_{t}")
265
- edges_hks = {(u, v): max(node_hks[u], node_hks[v]) for u, v in g.edges}
266
- nx.set_edge_attributes(g, edges_hks, f"hks_{t}")
267
- return g
268
-
269
- graphs = Parallel(n_jobs=1, prefer="threads")(
270
- delayed(_hks)(g)
271
- for g in tqdm(graphs, disable=not progress, desc=f"Computing hks_{t}")
272
- )
273
- return graphs
274
-
275
-
276
- def compute_geodesic(graphs: list[nx.Graph], progress=1):
277
- def _f(g: nx.Graph):
278
- try:
279
- nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
280
- except:
281
- warn(
282
- "This graph doesn't have an intrinsic filtration, will use 0 instead ..."
283
- )
284
- nodes_intrinsic = {i: 0 for i, n in g.nodes.data()}
285
- # return g
286
- node_geodesic = {i: 0 for i in g.nodes}
287
- nx.set_node_attributes(g, node_geodesic, f"geodesic")
288
- edges_geodesic = {
289
- (u, v): np.linalg.norm(nodes_intrinsic[u] - nodes_intrinsic[v])
290
- for u, v in g.edges
291
- }
292
- nx.set_edge_attributes(g, edges_geodesic, f"geodesic")
293
- return g
294
-
295
- graphs = Parallel(n_jobs=1, prefer="threads")(
296
- delayed(_f)(g)
297
- for g in tqdm(
298
- graphs, disable=not progress, desc=f"Computing geodesic distances on graphs"
299
- )
300
- )
301
- return graphs
302
-
303
-
304
- def compute_intrinsic(graphs: list[nx.Graph], progress=1, nowarning=False):
305
- def _f(g: nx.Graph):
306
- try:
307
- nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
308
- except:
309
- if not nowarning:
310
- raise ValueError("This graph doesn't have an intrinsic filtration.")
311
- else:
312
- return g
313
- edges_intrinsic = {
314
- (u, v): np.max([nodes_intrinsic[u], nodes_intrinsic[v]], axis=0)
315
- for u, v in g.edges
316
- }
317
- nx.set_edge_attributes(g, edges_intrinsic, "intrinsic")
318
- return g
319
-
320
- graphs = Parallel(n_jobs=1, prefer="threads")(
321
- delayed(_f)(g)
322
- for g in tqdm(
323
- graphs, disable=not progress, desc="Computing geodesic distances on graphs"
324
- )
325
- )
326
- return graphs
327
-
328
-
329
- def compute_filtration(dataset: str, filtration: str = "ALL", **kwargs):
330
- if filtration == "ALL":
331
- reset_graphs(dataset) # not necessary
332
- graphs, labels = get_graphs(dataset, **kwargs)
333
- graphs = compute_intrinsic(graphs, nowarning=True)
334
- graphs = compute_geodesic(graphs)
335
- graphs = compute_cc(graphs)
336
- graphs = compute_degree(graphs)
337
- graphs = compute_ricci(graphs)
338
- graphs = compute_fiedler(graphs)
339
- graphs = compute_hks(graphs, 10)
340
- set_graphs(graphs=graphs, labels=labels, dataset=dataset)
341
- return
342
- graphs, labels = get_graphs(dataset, **kwargs)
343
- if filtration == "dijkstra":
344
- return
345
- elif filtration == "cc":
346
- graphs = compute_cc(graphs)
347
- elif filtration == "degree":
348
- graphs = compute_degree(graphs)
349
- elif filtration == "ricciCurvature":
350
- graphs = compute_ricci(graphs)
351
- elif filtration == "fiedler":
352
- graphs = compute_fiedler(graphs)
353
- elif filtration == "geodesic":
354
- graphs = compute_geodesic(graphs)
355
- elif filtration.startswith("hks_"):
356
- # don't want do deal with floats, makes dots in title...
357
- t = int(filtration[4:])
358
- graphs = compute_hks(graphs=graphs, t=t)
359
- else:
360
- warn(f"Filtration {filtration} not implemented !")
361
- return
362
- set_graphs(graphs=graphs, labels=labels, dataset=dataset)
363
- return
364
-
365
-
366
- class Graph2SimplexTrees(BaseEstimator, TransformerMixin):
367
- """
368
- Transforms a list of networkx graphs into a list of simplextree multi
369
-
370
- Usual Filtrations
371
- -----------------
372
- - "cc" closeness centrality
373
- - "geodesic" if the graph provides data to compute it, e.g., BZR, COX2, PROTEINS
374
- - "degree"
375
- - "ricciCurvature" the ricci curvature
376
- - "fiedler" the square of the fiedler vector
377
- """
378
-
379
- def __init__(
380
- self,
381
- filtrations=[],
382
- delayed=False,
383
- num_collapses=100,
384
- progress: bool = False,
385
- ):
386
- super().__init__()
387
- self.filtrations = filtrations # filtration to search in graph
388
- self.delayed = delayed # reverses the filtration #TODO
389
- self.num_collapses = num_collapses
390
- self.progress = progress
391
- self.num_parameters: int = len(filtrations)
392
-
393
- def fit(self, X, y=None):
394
- if len(X) == 0:
395
- return self
396
- self.num_parameters = len(self.filtrations)
397
- if "intrinsic" in self.filtrations:
398
- intrinsic_size = len(X[0].nodes[0]["intrinsic"])
399
- self.num_parameters += intrinsic_size - 1
400
- return self
401
-
402
- def transform(self, X: list[nx.Graph]):
403
- def todo(graph, filtrations=self.filtrations) -> list[mp.SimplexTreeMulti]:
404
- st = mp.SimplexTreeMulti(num_parameters=self.num_parameters)
405
- nodes = np.asarray(graph.nodes, dtype=int).reshape(1, -1)
406
- nodes_filtrations = np.asarray(
407
- [
408
- [
409
- filtration
410
- for filtration_ in filtrations
411
- for filtration in np.asarray(
412
- graph.nodes[node][filtration_]
413
- ).reshape(-1)
414
- # this reshape is for attributes that are vectors
415
- ]
416
- for node in graph.nodes
417
- ],
418
- dtype=np.float32,
419
- )
420
- st.insert_batch(nodes, nodes_filtrations)
421
- edges = np.asarray(graph.edges, dtype=int).T
422
- edges_filtrations = np.asarray(
423
- [
424
- [
425
- filtration
426
- for filtration_ in filtrations
427
- for filtration in np.asarray(graph[u][v][filtration_]).reshape(
428
- -1
429
- )
430
- # this reshape is for attributes that are vectors
431
- ]
432
- for u, v in graph.edges
433
- ],
434
- dtype=np.float32,
435
- )
436
- st.insert_batch(edges, edges_filtrations)
437
- if st.num_parameters == 2:
438
- # TODO : wait for a filtration domination update
439
- st.collapse_edges(num=self.num_collapses)
440
- # st.make_filtration_non_decreasing() ## Ricci is not safe ...
441
- # same output for each pipelines, some have a supplementary axis.
442
- return [st]
443
-
444
- return (
445
- [delayed(todo)(graph) for graph in X]
446
- if self.delayed
447
- # memory bound imo
448
- else [
449
- todo(graph=graph)
450
- for graph in tqdm(
451
- X,
452
- desc="Computing simplextrees from graphs",
453
- disable=not self.progress,
454
- )
455
- ]
456
- # # ,
457
- # )
458
- # else Parallel(n_jobs=-1, prefer="threads")(
459
- # delayed(todo)(graph)
460
- # for graph in tqdm(
461
- # X,
462
- # desc="Computing simplextrees from graphs",
463
- # disable=not self.progress,
464
- # )
465
- # )
466
- )
1
+ import numpy as np
2
+ from os.path import expanduser, exists
3
+ import networkx as nx
4
+ from warnings import warn
5
+ import pickle
6
+ from joblib import Parallel, delayed
7
+ from tqdm import tqdm
8
+ from sklearn.preprocessing import LabelEncoder
9
+ from scipy.spatial import distance_matrix
10
+ from sklearn.base import BaseEstimator, TransformerMixin, clone
11
+ import multipers as mp
12
+ from typing import Iterable, List, Optional
13
+ from numpy.typing import ArrayLike
14
+
15
+ DATASET_PATH = expanduser("~/Datasets/")
16
+
17
+
18
+ def _check_installed(dataset: str):
19
+ from warnings import warn
20
+ from os.path import exists
21
+
22
+ assert dataset.startswith(
23
+ "graphs/"
24
+ ), "Graph datasets have to be of the form graphs/<name>"
25
+ if exists(DATASET_PATH + dataset):
26
+ return
27
+ else:
28
+ warn(
29
+ f"""
30
+ Dataset {dataset} not installed.
31
+ You can find it in https://networkrepository.com
32
+ Then (optinally) configure multipers.data.graphs.DATASET_PATH, which is currently
33
+ > {DATASET_PATH=}
34
+ and puts this dataset in $DATASET_PATH/{dataset}
35
+ """
36
+ )
37
+ raise ValueError("Unknown dataset.")
38
+
39
+
40
+ def get(dataset: str, filtration: Optional[str] = None):
41
+ if filtration is None:
42
+ return get_graphs(dataset)
43
+ graphs, labels = get_graphs(dataset)
44
+ try:
45
+ for g in graphs:
46
+ for node in g.nodes:
47
+ g.nodes[node][filtration]
48
+ except:
49
+ print(
50
+ f"Filtration {filtration} not computed, trying to compute it ...",
51
+ flush=True,
52
+ )
53
+ compute_filtration(dataset, filtration)
54
+ return get_graphs(dataset)
55
+
56
+
57
+ def get_from_file_old(dataset: str, label="lb"):
58
+ from os import walk
59
+ from scipy.io import loadmat
60
+ from warnings import warn
61
+
62
+ path = DATASET_PATH + dataset + "/mat/"
63
+ labels: list[int] = []
64
+ gs: list[nx.Graph] = []
65
+ for root, dir, files in walk(path):
66
+ for file in files:
67
+ file_ppties = file.split("_")
68
+ gid = file_ppties[5]
69
+ i = 0
70
+ while i + 1 < len(file_ppties) and file_ppties[i] != label:
71
+ i += 1
72
+ if i + 1 >= len(file_ppties):
73
+ warn(f"Cannot find label {label} on file {file}.")
74
+ else:
75
+ labels += [file_ppties[i + 1]]
76
+ adj_mat = np.array(loadmat(path + file)["A"], dtype=np.float32)
77
+ gs.append(nx.Graph(adj_mat))
78
+ return gs, labels
79
+
80
+
81
+ def get_from_file(dataset: str):
82
+ from os.path import expanduser, exists
83
+
84
+ path = DATASET_PATH + f"{dataset}/{dataset[7:]}."
85
+ try:
86
+ graphs_ids = np.loadtxt(path + "graph_idx")
87
+ except:
88
+ return get_from_file_old(dataset=dataset)
89
+ labels: list[int] = LabelEncoder().fit_transform(np.loadtxt(path + "graph_labels"))
90
+ edges = np.loadtxt(path + "edges", delimiter=",", dtype=int) - 1
91
+ has_intrinsic_filtration = exists(path + "node_attrs")
92
+ graphs: list[nx.Graph] = []
93
+ if has_intrinsic_filtration:
94
+ F = np.loadtxt(path + "node_attrs", delimiter=",")
95
+ for graph_id in tqdm(np.unique(graphs_ids), desc="Reading graphs from file"):
96
+ (nodes,) = np.where(graphs_ids == graph_id)
97
+
98
+ def graph_has_edge(u: int, v: int) -> bool:
99
+ if u in nodes or v in nodes:
100
+ assert u in nodes and v in nodes, f"Nodes\
101
+ {u} and {v} are not in the same graph"
102
+ return True
103
+ return False
104
+
105
+ graph_edges = [(u, v) for u, v in edges if graph_has_edge(u, v)]
106
+ g = nx.Graph(graph_edges)
107
+ if has_intrinsic_filtration:
108
+ node_attrs = {node: F[node] for node in nodes}
109
+ nx.set_node_attributes(g, node_attrs, "intrinsic")
110
+ graphs.append(g)
111
+ return graphs, labels
112
+
113
+
114
+ def get_graphs(dataset: str, N: int | str = "") -> tuple[list[nx.Graph], list[int]]:
115
+ _check_installed(dataset)
116
+ graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
117
+ labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
118
+ if not exists(graphs_path) or not exists(labels_path):
119
+ if dataset.startswith("3dshapes/"):
120
+ return get_from_file_old(
121
+ dataset,
122
+ )
123
+
124
+ graphs, labels = get_from_file(
125
+ dataset,
126
+ )
127
+ print("Saving graphs at :", graphs_path)
128
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
129
+ else:
130
+ graphs = pickle.load(open(graphs_path, "rb"))
131
+ labels = pickle.load(open(labels_path, "rb"))
132
+ from sklearn.preprocessing import LabelEncoder
133
+
134
+ return graphs, LabelEncoder().fit_transform(labels)
135
+
136
+
137
+ # saves graphs (and filtration values) into a file
138
+ def set_graphs(graphs: list[nx.Graph], labels: list, dataset: str, N: int | str = ""):
139
+ graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
140
+ labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
141
+ pickle.dump(graphs, open(graphs_path, "wb"))
142
+ pickle.dump(labels, open(labels_path, "wb"))
143
+ return
144
+
145
+
146
+ def reset_graphs(dataset: str, N=None): # Resets filtrations values on graphs
147
+ graphs, labels = get_from_file(dataset)
148
+ set_graphs(graphs, labels, dataset)
149
+ return
150
+
151
+
152
+ def compute_ricci(graphs: list[nx.Graph], alpha=0.5, progress=1):
153
+ from GraphRicciCurvature.OllivierRicci import OllivierRicci
154
+
155
+ def ricci(graph, alpha=alpha):
156
+ return OllivierRicci(graph, alpha=alpha).compute_ricci_curvature()
157
+
158
+ graphs = [
159
+ ricci(g) for g in tqdm(graphs, disable=not progress, desc="Computing ricci")
160
+ ]
161
+
162
+ def push_back_node(graph):
163
+ # for node in graph.nodes:
164
+ # graph.nodes[node]['ricciCurvature'] = np.min([graph[node][node2]['ricciCurvature'] for node2 in graph[node]] + [graph.nodes[node]['ricciCurvature']])
165
+ node_filtrations = {
166
+ node: -1
167
+ if len(graph[node]) == 0
168
+ else np.min([graph[node][node2]["ricciCurvature"] for node2 in graph[node]])
169
+ for node in graph.nodes
170
+ }
171
+ nx.set_node_attributes(graph, node_filtrations, "ricciCurvature")
172
+ return graph
173
+
174
+ graphs = [push_back_node(g) for g in graphs]
175
+ return graphs
176
+
177
+
178
+ def compute_cc(graphs: list[nx.Graph], progress=1):
179
+ def _cc(g):
180
+ cc = nx.closeness_centrality(g)
181
+ nx.set_node_attributes(g, cc, "cc")
182
+ edges_cc = {(u, v): max(cc[u], cc[v]) for u, v in g.edges}
183
+ nx.set_edge_attributes(g, edges_cc, "cc")
184
+ return g
185
+
186
+ graphs = Parallel(n_jobs=1, prefer="threads")(
187
+ delayed(_cc)(g) for g in tqdm(graphs, disable=not progress, desc="Computing cc")
188
+ )
189
+ return graphs
190
+ # for g in tqdm(graphs, desc="Computing cc"):
191
+ # _cc(g)
192
+ # return graphs
193
+
194
+
195
+ def compute_degree(graphs: list[nx.Graph], progress=1):
196
+ def _degree(g):
197
+ degrees = {i: 1.1 if degree == 0 else 1 / degree for i, degree in g.degree}
198
+ nx.set_node_attributes(g, degrees, "degree")
199
+ edges_dg = {(u, v): max(degrees[u], degrees[v]) for u, v in g.edges}
200
+ nx.set_edge_attributes(g, edges_dg, "degree")
201
+ return g
202
+
203
+ graphs = Parallel(n_jobs=1, prefer="threads")(
204
+ delayed(_degree)(g)
205
+ for g in tqdm(graphs, disable=not progress, desc="Computing degree")
206
+ )
207
+ return graphs
208
+ # for g in tqdm(graphs, desc="Computing degree"):
209
+ # _degree(g)
210
+ # return graphs
211
+
212
+
213
+ # TODO : make it compatible with non-connexe graphs
214
+ def compute_fiedler(graphs: list[nx.Graph], progress=1):
215
+ def _fiedler(g):
216
+ connected_graphs = [
217
+ nx.subgraph(g, nodes) for nodes in nx.connected_components(g)
218
+ ]
219
+ fiedler_vectors = [
220
+ nx.fiedler_vector(g) ** 2
221
+ if g.number_of_nodes() > 2
222
+ else np.zeros(
223
+ g.number_of_nodes(
224
+ # order of nx.fiedler_vector correspond to nx.laplacian -> g.nodes
225
+ )
226
+ )
227
+ for g in connected_graphs
228
+ ]
229
+ fiedler_dict = {
230
+ node: fiedler_vector[node_index]
231
+ for g, fiedler_vector in zip(connected_graphs, fiedler_vectors)
232
+ for node_index, node in enumerate(list(g.nodes))
233
+ }
234
+ nx.set_node_attributes(g, fiedler_dict, "fiedler")
235
+ edges_fiedler = {
236
+ (u, v): max(fiedler_dict[u], fiedler_dict[v]) for u, v in g.edges
237
+ }
238
+ nx.set_edge_attributes(g, edges_fiedler, "fiedler")
239
+ return g
240
+
241
+ graphs = Parallel(n_jobs=1, prefer="threads")(
242
+ delayed(_fiedler)(g)
243
+ for g in tqdm(graphs, disable=not progress, desc="Computing fiedler")
244
+ )
245
+ return graphs
246
+ # for g in tqdm(graphs, desc="Computing fiedler"):
247
+ # _fiedler(g)
248
+ # return graphs
249
+
250
+
251
+ def compute_hks(graphs: list[nx.Graph], t: float, progress=1):
252
+ def _hks(g: nx.Graph):
253
+ w, vps = np.linalg.eig(
254
+ nx.laplacianmatrix.normalized_laplacian_matrix(
255
+ g, nodelist=g.nodes()
256
+ ).toarray()
257
+ ) # order is given by g.nodes order
258
+ w = w.view(dtype=float)
259
+ vps = vps.view(dtype=float)
260
+ node_hks = {
261
+ node: np.sum(np.exp(-t * w) * np.square(vps[node_index, :]))
262
+ for node_index, node in enumerate(g.nodes)
263
+ }
264
+ nx.set_node_attributes(g, node_hks, f"hks_{t}")
265
+ edges_hks = {(u, v): max(node_hks[u], node_hks[v]) for u, v in g.edges}
266
+ nx.set_edge_attributes(g, edges_hks, f"hks_{t}")
267
+ return g
268
+
269
+ graphs = Parallel(n_jobs=1, prefer="threads")(
270
+ delayed(_hks)(g)
271
+ for g in tqdm(graphs, disable=not progress, desc=f"Computing hks_{t}")
272
+ )
273
+ return graphs
274
+
275
+
276
+ def compute_geodesic(graphs: list[nx.Graph], progress=1):
277
+ def _f(g: nx.Graph):
278
+ try:
279
+ nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
280
+ except:
281
+ warn(
282
+ "This graph doesn't have an intrinsic filtration, will use 0 instead ..."
283
+ )
284
+ nodes_intrinsic = {i: 0 for i, n in g.nodes.data()}
285
+ # return g
286
+ node_geodesic = {i: 0 for i in g.nodes}
287
+ nx.set_node_attributes(g, node_geodesic, f"geodesic")
288
+ edges_geodesic = {
289
+ (u, v): np.linalg.norm(nodes_intrinsic[u] - nodes_intrinsic[v])
290
+ for u, v in g.edges
291
+ }
292
+ nx.set_edge_attributes(g, edges_geodesic, f"geodesic")
293
+ return g
294
+
295
+ graphs = Parallel(n_jobs=1, prefer="threads")(
296
+ delayed(_f)(g)
297
+ for g in tqdm(
298
+ graphs, disable=not progress, desc=f"Computing geodesic distances on graphs"
299
+ )
300
+ )
301
+ return graphs
302
+
303
+
304
+ def compute_intrinsic(graphs: list[nx.Graph], progress=1, nowarning=False):
305
+ def _f(g: nx.Graph):
306
+ try:
307
+ nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
308
+ except:
309
+ if not nowarning:
310
+ raise ValueError("This graph doesn't have an intrinsic filtration.")
311
+ else:
312
+ return g
313
+ edges_intrinsic = {
314
+ (u, v): np.max([nodes_intrinsic[u], nodes_intrinsic[v]], axis=0)
315
+ for u, v in g.edges
316
+ }
317
+ nx.set_edge_attributes(g, edges_intrinsic, "intrinsic")
318
+ return g
319
+
320
+ graphs = Parallel(n_jobs=1, prefer="threads")(
321
+ delayed(_f)(g)
322
+ for g in tqdm(
323
+ graphs, disable=not progress, desc="Computing geodesic distances on graphs"
324
+ )
325
+ )
326
+ return graphs
327
+
328
+
329
+ def compute_filtration(dataset: str, filtration: str = "ALL", **kwargs):
330
+ if filtration == "ALL":
331
+ reset_graphs(dataset) # not necessary
332
+ graphs, labels = get_graphs(dataset, **kwargs)
333
+ graphs = compute_intrinsic(graphs, nowarning=True)
334
+ graphs = compute_geodesic(graphs)
335
+ graphs = compute_cc(graphs)
336
+ graphs = compute_degree(graphs)
337
+ graphs = compute_ricci(graphs)
338
+ graphs = compute_fiedler(graphs)
339
+ graphs = compute_hks(graphs, 10)
340
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
341
+ return
342
+ graphs, labels = get_graphs(dataset, **kwargs)
343
+ if filtration == "dijkstra":
344
+ return
345
+ elif filtration == "cc":
346
+ graphs = compute_cc(graphs)
347
+ elif filtration == "degree":
348
+ graphs = compute_degree(graphs)
349
+ elif filtration == "ricciCurvature":
350
+ graphs = compute_ricci(graphs)
351
+ elif filtration == "fiedler":
352
+ graphs = compute_fiedler(graphs)
353
+ elif filtration == "geodesic":
354
+ graphs = compute_geodesic(graphs)
355
+ elif filtration.startswith("hks_"):
356
+ # don't want do deal with floats, makes dots in title...
357
+ t = int(filtration[4:])
358
+ graphs = compute_hks(graphs=graphs, t=t)
359
+ else:
360
+ warn(f"Filtration {filtration} not implemented !")
361
+ return
362
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
363
+ return
364
+
365
+
366
+ class Graph2SimplexTrees(BaseEstimator, TransformerMixin):
367
+ """
368
+ Transforms a list of networkx graphs into a list of simplextree multi
369
+
370
+ Usual Filtrations
371
+ -----------------
372
+ - "cc" closeness centrality
373
+ - "geodesic" if the graph provides data to compute it, e.g., BZR, COX2, PROTEINS
374
+ - "degree"
375
+ - "ricciCurvature" the ricci curvature
376
+ - "fiedler" the square of the fiedler vector
377
+ """
378
+
379
+ def __init__(
380
+ self,
381
+ filtrations=[],
382
+ delayed=False,
383
+ num_collapses=100,
384
+ progress: bool = False,
385
+ ):
386
+ super().__init__()
387
+ self.filtrations = filtrations # filtration to search in graph
388
+ self.delayed = delayed # reverses the filtration #TODO
389
+ self.num_collapses = num_collapses
390
+ self.progress = progress
391
+ self.num_parameters: int = len(filtrations)
392
+
393
+ def fit(self, X, y=None):
394
+ if len(X) == 0:
395
+ return self
396
+ self.num_parameters = len(self.filtrations)
397
+ if "intrinsic" in self.filtrations:
398
+ intrinsic_size = len(X[0].nodes[0]["intrinsic"])
399
+ self.num_parameters += intrinsic_size - 1
400
+ return self
401
+
402
+ def transform(self, X: list[nx.Graph]):
403
+ def todo(graph, filtrations=self.filtrations) -> list[mp.SimplexTreeMulti]:
404
+ st = mp.SimplexTreeMulti(num_parameters=self.num_parameters)
405
+ nodes = np.asarray(graph.nodes, dtype=int).reshape(1, -1)
406
+ nodes_filtrations = np.asarray(
407
+ [
408
+ [
409
+ filtration
410
+ for filtration_ in filtrations
411
+ for filtration in np.asarray(
412
+ graph.nodes[node][filtration_]
413
+ ).reshape(-1)
414
+ # this reshape is for attributes that are vectors
415
+ ]
416
+ for node in graph.nodes
417
+ ],
418
+ dtype=np.float32,
419
+ )
420
+ st.insert_batch(nodes, nodes_filtrations)
421
+ edges = np.asarray(graph.edges, dtype=int).T
422
+ edges_filtrations = np.asarray(
423
+ [
424
+ [
425
+ filtration
426
+ for filtration_ in filtrations
427
+ for filtration in np.asarray(graph[u][v][filtration_]).reshape(
428
+ -1
429
+ )
430
+ # this reshape is for attributes that are vectors
431
+ ]
432
+ for u, v in graph.edges
433
+ ],
434
+ dtype=np.float32,
435
+ )
436
+ st.insert_batch(edges, edges_filtrations)
437
+ if st.num_parameters == 2:
438
+ # TODO : wait for a filtration domination update
439
+ st.collapse_edges(num=self.num_collapses)
440
+ # st.make_filtration_non_decreasing() ## Ricci is not safe ...
441
+ # same output for each pipelines, some have a supplementary axis.
442
+ return [st]
443
+
444
+ return (
445
+ [delayed(todo)(graph) for graph in X]
446
+ if self.delayed
447
+ # memory bound imo
448
+ else [
449
+ todo(graph=graph)
450
+ for graph in tqdm(
451
+ X,
452
+ desc="Computing simplextrees from graphs",
453
+ disable=not self.progress,
454
+ )
455
+ ]
456
+ # # ,
457
+ # )
458
+ # else Parallel(n_jobs=-1, prefer="threads")(
459
+ # delayed(todo)(graph)
460
+ # for graph in tqdm(
461
+ # X,
462
+ # desc="Computing simplextrees from graphs",
463
+ # disable=not self.progress,
464
+ # )
465
+ # )
466
+ )