multipers 2.3.3b6__cp311-cp311-manylinux_2_39_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of multipers might be problematic. Click here for more details.

Files changed (182) hide show
  1. multipers/__init__.py +33 -0
  2. multipers/_signed_measure_meta.py +453 -0
  3. multipers/_slicer_meta.py +211 -0
  4. multipers/array_api/__init__.py +45 -0
  5. multipers/array_api/numpy.py +41 -0
  6. multipers/array_api/torch.py +58 -0
  7. multipers/data/MOL2.py +458 -0
  8. multipers/data/UCR.py +18 -0
  9. multipers/data/__init__.py +1 -0
  10. multipers/data/graphs.py +466 -0
  11. multipers/data/immuno_regions.py +27 -0
  12. multipers/data/minimal_presentation_to_st_bf.py +0 -0
  13. multipers/data/pytorch2simplextree.py +91 -0
  14. multipers/data/shape3d.py +101 -0
  15. multipers/data/synthetic.py +113 -0
  16. multipers/distances.py +202 -0
  17. multipers/filtration_conversions.pxd +229 -0
  18. multipers/filtration_conversions.pxd.tp +84 -0
  19. multipers/filtrations/__init__.py +18 -0
  20. multipers/filtrations/density.py +574 -0
  21. multipers/filtrations/filtrations.py +361 -0
  22. multipers/filtrations.pxd +224 -0
  23. multipers/function_rips.cpython-311-x86_64-linux-gnu.so +0 -0
  24. multipers/function_rips.pyx +105 -0
  25. multipers/grids.cpython-311-x86_64-linux-gnu.so +0 -0
  26. multipers/grids.pyx +433 -0
  27. multipers/gudhi/Persistence_slices_interface.h +132 -0
  28. multipers/gudhi/Simplex_tree_interface.h +239 -0
  29. multipers/gudhi/Simplex_tree_multi_interface.h +551 -0
  30. multipers/gudhi/cubical_to_boundary.h +59 -0
  31. multipers/gudhi/gudhi/Bitmap_cubical_complex.h +450 -0
  32. multipers/gudhi/gudhi/Bitmap_cubical_complex_base.h +1070 -0
  33. multipers/gudhi/gudhi/Bitmap_cubical_complex_periodic_boundary_conditions_base.h +579 -0
  34. multipers/gudhi/gudhi/Debug_utils.h +45 -0
  35. multipers/gudhi/gudhi/Fields/Multi_field.h +484 -0
  36. multipers/gudhi/gudhi/Fields/Multi_field_operators.h +455 -0
  37. multipers/gudhi/gudhi/Fields/Multi_field_shared.h +450 -0
  38. multipers/gudhi/gudhi/Fields/Multi_field_small.h +531 -0
  39. multipers/gudhi/gudhi/Fields/Multi_field_small_operators.h +507 -0
  40. multipers/gudhi/gudhi/Fields/Multi_field_small_shared.h +531 -0
  41. multipers/gudhi/gudhi/Fields/Z2_field.h +355 -0
  42. multipers/gudhi/gudhi/Fields/Z2_field_operators.h +376 -0
  43. multipers/gudhi/gudhi/Fields/Zp_field.h +420 -0
  44. multipers/gudhi/gudhi/Fields/Zp_field_operators.h +400 -0
  45. multipers/gudhi/gudhi/Fields/Zp_field_shared.h +418 -0
  46. multipers/gudhi/gudhi/Flag_complex_edge_collapser.h +337 -0
  47. multipers/gudhi/gudhi/Matrix.h +2107 -0
  48. multipers/gudhi/gudhi/Multi_critical_filtration.h +1038 -0
  49. multipers/gudhi/gudhi/Multi_persistence/Box.h +174 -0
  50. multipers/gudhi/gudhi/Multi_persistence/Line.h +282 -0
  51. multipers/gudhi/gudhi/Off_reader.h +173 -0
  52. multipers/gudhi/gudhi/One_critical_filtration.h +1441 -0
  53. multipers/gudhi/gudhi/Persistence_matrix/Base_matrix.h +769 -0
  54. multipers/gudhi/gudhi/Persistence_matrix/Base_matrix_with_column_compression.h +686 -0
  55. multipers/gudhi/gudhi/Persistence_matrix/Boundary_matrix.h +842 -0
  56. multipers/gudhi/gudhi/Persistence_matrix/Chain_matrix.h +1350 -0
  57. multipers/gudhi/gudhi/Persistence_matrix/Id_to_index_overlay.h +1105 -0
  58. multipers/gudhi/gudhi/Persistence_matrix/Position_to_index_overlay.h +859 -0
  59. multipers/gudhi/gudhi/Persistence_matrix/RU_matrix.h +910 -0
  60. multipers/gudhi/gudhi/Persistence_matrix/allocators/entry_constructors.h +139 -0
  61. multipers/gudhi/gudhi/Persistence_matrix/base_pairing.h +230 -0
  62. multipers/gudhi/gudhi/Persistence_matrix/base_swap.h +211 -0
  63. multipers/gudhi/gudhi/Persistence_matrix/boundary_cell_position_to_id_mapper.h +60 -0
  64. multipers/gudhi/gudhi/Persistence_matrix/boundary_face_position_to_id_mapper.h +60 -0
  65. multipers/gudhi/gudhi/Persistence_matrix/chain_pairing.h +136 -0
  66. multipers/gudhi/gudhi/Persistence_matrix/chain_rep_cycles.h +190 -0
  67. multipers/gudhi/gudhi/Persistence_matrix/chain_vine_swap.h +616 -0
  68. multipers/gudhi/gudhi/Persistence_matrix/columns/chain_column_extra_properties.h +150 -0
  69. multipers/gudhi/gudhi/Persistence_matrix/columns/column_dimension_holder.h +106 -0
  70. multipers/gudhi/gudhi/Persistence_matrix/columns/column_utilities.h +219 -0
  71. multipers/gudhi/gudhi/Persistence_matrix/columns/entry_types.h +327 -0
  72. multipers/gudhi/gudhi/Persistence_matrix/columns/heap_column.h +1140 -0
  73. multipers/gudhi/gudhi/Persistence_matrix/columns/intrusive_list_column.h +934 -0
  74. multipers/gudhi/gudhi/Persistence_matrix/columns/intrusive_set_column.h +934 -0
  75. multipers/gudhi/gudhi/Persistence_matrix/columns/list_column.h +980 -0
  76. multipers/gudhi/gudhi/Persistence_matrix/columns/naive_vector_column.h +1092 -0
  77. multipers/gudhi/gudhi/Persistence_matrix/columns/row_access.h +192 -0
  78. multipers/gudhi/gudhi/Persistence_matrix/columns/set_column.h +921 -0
  79. multipers/gudhi/gudhi/Persistence_matrix/columns/small_vector_column.h +1093 -0
  80. multipers/gudhi/gudhi/Persistence_matrix/columns/unordered_set_column.h +1012 -0
  81. multipers/gudhi/gudhi/Persistence_matrix/columns/vector_column.h +1244 -0
  82. multipers/gudhi/gudhi/Persistence_matrix/matrix_dimension_holders.h +186 -0
  83. multipers/gudhi/gudhi/Persistence_matrix/matrix_row_access.h +164 -0
  84. multipers/gudhi/gudhi/Persistence_matrix/ru_pairing.h +156 -0
  85. multipers/gudhi/gudhi/Persistence_matrix/ru_rep_cycles.h +376 -0
  86. multipers/gudhi/gudhi/Persistence_matrix/ru_vine_swap.h +540 -0
  87. multipers/gudhi/gudhi/Persistent_cohomology/Field_Zp.h +118 -0
  88. multipers/gudhi/gudhi/Persistent_cohomology/Multi_field.h +173 -0
  89. multipers/gudhi/gudhi/Persistent_cohomology/Persistent_cohomology_column.h +128 -0
  90. multipers/gudhi/gudhi/Persistent_cohomology.h +745 -0
  91. multipers/gudhi/gudhi/Points_off_io.h +171 -0
  92. multipers/gudhi/gudhi/Simple_object_pool.h +69 -0
  93. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_iterators.h +463 -0
  94. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_node_explicit_storage.h +83 -0
  95. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_siblings.h +106 -0
  96. multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_star_simplex_iterators.h +277 -0
  97. multipers/gudhi/gudhi/Simplex_tree/hooks_simplex_base.h +62 -0
  98. multipers/gudhi/gudhi/Simplex_tree/indexing_tag.h +27 -0
  99. multipers/gudhi/gudhi/Simplex_tree/serialization_utils.h +62 -0
  100. multipers/gudhi/gudhi/Simplex_tree/simplex_tree_options.h +157 -0
  101. multipers/gudhi/gudhi/Simplex_tree.h +2794 -0
  102. multipers/gudhi/gudhi/Simplex_tree_multi.h +152 -0
  103. multipers/gudhi/gudhi/distance_functions.h +62 -0
  104. multipers/gudhi/gudhi/graph_simplicial_complex.h +104 -0
  105. multipers/gudhi/gudhi/persistence_interval.h +253 -0
  106. multipers/gudhi/gudhi/persistence_matrix_options.h +170 -0
  107. multipers/gudhi/gudhi/reader_utils.h +367 -0
  108. multipers/gudhi/mma_interface_coh.h +256 -0
  109. multipers/gudhi/mma_interface_h0.h +223 -0
  110. multipers/gudhi/mma_interface_matrix.h +293 -0
  111. multipers/gudhi/naive_merge_tree.h +536 -0
  112. multipers/gudhi/scc_io.h +310 -0
  113. multipers/gudhi/truc.h +1403 -0
  114. multipers/io.cpython-311-x86_64-linux-gnu.so +0 -0
  115. multipers/io.pyx +644 -0
  116. multipers/ml/__init__.py +0 -0
  117. multipers/ml/accuracies.py +90 -0
  118. multipers/ml/invariants_with_persistable.py +79 -0
  119. multipers/ml/kernels.py +176 -0
  120. multipers/ml/mma.py +713 -0
  121. multipers/ml/one.py +472 -0
  122. multipers/ml/point_clouds.py +352 -0
  123. multipers/ml/signed_measures.py +1589 -0
  124. multipers/ml/sliced_wasserstein.py +461 -0
  125. multipers/ml/tools.py +113 -0
  126. multipers/mma_structures.cpython-311-x86_64-linux-gnu.so +0 -0
  127. multipers/mma_structures.pxd +128 -0
  128. multipers/mma_structures.pyx +2786 -0
  129. multipers/mma_structures.pyx.tp +1094 -0
  130. multipers/multi_parameter_rank_invariant/diff_helpers.h +84 -0
  131. multipers/multi_parameter_rank_invariant/euler_characteristic.h +97 -0
  132. multipers/multi_parameter_rank_invariant/function_rips.h +322 -0
  133. multipers/multi_parameter_rank_invariant/hilbert_function.h +769 -0
  134. multipers/multi_parameter_rank_invariant/persistence_slices.h +148 -0
  135. multipers/multi_parameter_rank_invariant/rank_invariant.h +369 -0
  136. multipers/multiparameter_edge_collapse.py +41 -0
  137. multipers/multiparameter_module_approximation/approximation.h +2330 -0
  138. multipers/multiparameter_module_approximation/combinatory.h +129 -0
  139. multipers/multiparameter_module_approximation/debug.h +107 -0
  140. multipers/multiparameter_module_approximation/euler_curves.h +0 -0
  141. multipers/multiparameter_module_approximation/format_python-cpp.h +286 -0
  142. multipers/multiparameter_module_approximation/heap_column.h +238 -0
  143. multipers/multiparameter_module_approximation/images.h +79 -0
  144. multipers/multiparameter_module_approximation/list_column.h +174 -0
  145. multipers/multiparameter_module_approximation/list_column_2.h +232 -0
  146. multipers/multiparameter_module_approximation/ru_matrix.h +347 -0
  147. multipers/multiparameter_module_approximation/set_column.h +135 -0
  148. multipers/multiparameter_module_approximation/structure_higher_dim_barcode.h +36 -0
  149. multipers/multiparameter_module_approximation/unordered_set_column.h +166 -0
  150. multipers/multiparameter_module_approximation/utilities.h +403 -0
  151. multipers/multiparameter_module_approximation/vector_column.h +223 -0
  152. multipers/multiparameter_module_approximation/vector_matrix.h +331 -0
  153. multipers/multiparameter_module_approximation/vineyards.h +464 -0
  154. multipers/multiparameter_module_approximation/vineyards_trajectories.h +649 -0
  155. multipers/multiparameter_module_approximation.cpython-311-x86_64-linux-gnu.so +0 -0
  156. multipers/multiparameter_module_approximation.pyx +235 -0
  157. multipers/pickle.py +90 -0
  158. multipers/plots.py +456 -0
  159. multipers/point_measure.cpython-311-x86_64-linux-gnu.so +0 -0
  160. multipers/point_measure.pyx +395 -0
  161. multipers/simplex_tree_multi.cpython-311-x86_64-linux-gnu.so +0 -0
  162. multipers/simplex_tree_multi.pxd +134 -0
  163. multipers/simplex_tree_multi.pyx +10840 -0
  164. multipers/simplex_tree_multi.pyx.tp +2009 -0
  165. multipers/slicer.cpython-311-x86_64-linux-gnu.so +0 -0
  166. multipers/slicer.pxd +3034 -0
  167. multipers/slicer.pxd.tp +234 -0
  168. multipers/slicer.pyx +20481 -0
  169. multipers/slicer.pyx.tp +1088 -0
  170. multipers/tensor/tensor.h +672 -0
  171. multipers/tensor.pxd +13 -0
  172. multipers/test.pyx +44 -0
  173. multipers/tests/__init__.py +62 -0
  174. multipers/torch/__init__.py +1 -0
  175. multipers/torch/diff_grids.py +240 -0
  176. multipers/torch/rips_density.py +310 -0
  177. multipers-2.3.3b6.dist-info/METADATA +128 -0
  178. multipers-2.3.3b6.dist-info/RECORD +182 -0
  179. multipers-2.3.3b6.dist-info/WHEEL +5 -0
  180. multipers-2.3.3b6.dist-info/licenses/LICENSE +21 -0
  181. multipers-2.3.3b6.dist-info/top_level.txt +1 -0
  182. multipers.libs/libtbb-ca48af5c.so.12.16 +0 -0
@@ -0,0 +1,466 @@
1
+ import numpy as np
2
+ from os.path import expanduser, exists
3
+ import networkx as nx
4
+ from warnings import warn
5
+ import pickle
6
+ from joblib import Parallel, delayed
7
+ from tqdm import tqdm
8
+ from sklearn.preprocessing import LabelEncoder
9
+ from scipy.spatial import distance_matrix
10
+ from sklearn.base import BaseEstimator, TransformerMixin, clone
11
+ import multipers as mp
12
+ from typing import Iterable, List, Optional
13
+ from numpy.typing import ArrayLike
14
+
15
+ DATASET_PATH = expanduser("~/Datasets/")
16
+
17
+
18
+ def _check_installed(dataset: str):
19
+ from warnings import warn
20
+ from os.path import exists
21
+
22
+ assert dataset.startswith(
23
+ "graphs/"
24
+ ), "Graph datasets have to be of the form graphs/<name>"
25
+ if exists(DATASET_PATH + dataset):
26
+ return
27
+ else:
28
+ warn(
29
+ f"""
30
+ Dataset {dataset} not installed.
31
+ You can find it in https://networkrepository.com
32
+ Then (optinally) configure multipers.data.graphs.DATASET_PATH, which is currently
33
+ > {DATASET_PATH=}
34
+ and puts this dataset in $DATASET_PATH/{dataset}
35
+ """
36
+ )
37
+ raise ValueError("Unknown dataset.")
38
+
39
+
40
+ def get(dataset: str, filtration: Optional[str] = None):
41
+ if filtration is None:
42
+ return get_graphs(dataset)
43
+ graphs, labels = get_graphs(dataset)
44
+ try:
45
+ for g in graphs:
46
+ for node in g.nodes:
47
+ g.nodes[node][filtration]
48
+ except:
49
+ print(
50
+ f"Filtration {filtration} not computed, trying to compute it ...",
51
+ flush=True,
52
+ )
53
+ compute_filtration(dataset, filtration)
54
+ return get_graphs(dataset)
55
+
56
+
57
+ def get_from_file_old(dataset: str, label="lb"):
58
+ from os import walk
59
+ from scipy.io import loadmat
60
+ from warnings import warn
61
+
62
+ path = DATASET_PATH + dataset + "/mat/"
63
+ labels: list[int] = []
64
+ gs: list[nx.Graph] = []
65
+ for root, dir, files in walk(path):
66
+ for file in files:
67
+ file_ppties = file.split("_")
68
+ gid = file_ppties[5]
69
+ i = 0
70
+ while i + 1 < len(file_ppties) and file_ppties[i] != label:
71
+ i += 1
72
+ if i + 1 >= len(file_ppties):
73
+ warn(f"Cannot find label {label} on file {file}.")
74
+ else:
75
+ labels += [file_ppties[i + 1]]
76
+ adj_mat = np.array(loadmat(path + file)["A"], dtype=np.float32)
77
+ gs.append(nx.Graph(adj_mat))
78
+ return gs, labels
79
+
80
+
81
+ def get_from_file(dataset: str):
82
+ from os.path import expanduser, exists
83
+
84
+ path = DATASET_PATH + f"{dataset}/{dataset[7:]}."
85
+ try:
86
+ graphs_ids = np.loadtxt(path + "graph_idx")
87
+ except:
88
+ return get_from_file_old(dataset=dataset)
89
+ labels: list[int] = LabelEncoder().fit_transform(np.loadtxt(path + "graph_labels"))
90
+ edges = np.loadtxt(path + "edges", delimiter=",", dtype=int) - 1
91
+ has_intrinsic_filtration = exists(path + "node_attrs")
92
+ graphs: list[nx.Graph] = []
93
+ if has_intrinsic_filtration:
94
+ F = np.loadtxt(path + "node_attrs", delimiter=",")
95
+ for graph_id in tqdm(np.unique(graphs_ids), desc="Reading graphs from file"):
96
+ (nodes,) = np.where(graphs_ids == graph_id)
97
+
98
+ def graph_has_edge(u: int, v: int) -> bool:
99
+ if u in nodes or v in nodes:
100
+ assert u in nodes and v in nodes, f"Nodes\
101
+ {u} and {v} are not in the same graph"
102
+ return True
103
+ return False
104
+
105
+ graph_edges = [(u, v) for u, v in edges if graph_has_edge(u, v)]
106
+ g = nx.Graph(graph_edges)
107
+ if has_intrinsic_filtration:
108
+ node_attrs = {node: F[node] for node in nodes}
109
+ nx.set_node_attributes(g, node_attrs, "intrinsic")
110
+ graphs.append(g)
111
+ return graphs, labels
112
+
113
+
114
+ def get_graphs(dataset: str, N: int | str = "") -> tuple[list[nx.Graph], list[int]]:
115
+ _check_installed(dataset)
116
+ graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
117
+ labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
118
+ if not exists(graphs_path) or not exists(labels_path):
119
+ if dataset.startswith("3dshapes/"):
120
+ return get_from_file_old(
121
+ dataset,
122
+ )
123
+
124
+ graphs, labels = get_from_file(
125
+ dataset,
126
+ )
127
+ print("Saving graphs at :", graphs_path)
128
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
129
+ else:
130
+ graphs = pickle.load(open(graphs_path, "rb"))
131
+ labels = pickle.load(open(labels_path, "rb"))
132
+ from sklearn.preprocessing import LabelEncoder
133
+
134
+ return graphs, LabelEncoder().fit_transform(labels)
135
+
136
+
137
+ # saves graphs (and filtration values) into a file
138
+ def set_graphs(graphs: list[nx.Graph], labels: list, dataset: str, N: int | str = ""):
139
+ graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
140
+ labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
141
+ pickle.dump(graphs, open(graphs_path, "wb"))
142
+ pickle.dump(labels, open(labels_path, "wb"))
143
+ return
144
+
145
+
146
+ def reset_graphs(dataset: str, N=None): # Resets filtrations values on graphs
147
+ graphs, labels = get_from_file(dataset)
148
+ set_graphs(graphs, labels, dataset)
149
+ return
150
+
151
+
152
+ def compute_ricci(graphs: list[nx.Graph], alpha=0.5, progress=1):
153
+ from GraphRicciCurvature.OllivierRicci import OllivierRicci
154
+
155
+ def ricci(graph, alpha=alpha):
156
+ return OllivierRicci(graph, alpha=alpha).compute_ricci_curvature()
157
+
158
+ graphs = [
159
+ ricci(g) for g in tqdm(graphs, disable=not progress, desc="Computing ricci")
160
+ ]
161
+
162
+ def push_back_node(graph):
163
+ # for node in graph.nodes:
164
+ # graph.nodes[node]['ricciCurvature'] = np.min([graph[node][node2]['ricciCurvature'] for node2 in graph[node]] + [graph.nodes[node]['ricciCurvature']])
165
+ node_filtrations = {
166
+ node: -1
167
+ if len(graph[node]) == 0
168
+ else np.min([graph[node][node2]["ricciCurvature"] for node2 in graph[node]])
169
+ for node in graph.nodes
170
+ }
171
+ nx.set_node_attributes(graph, node_filtrations, "ricciCurvature")
172
+ return graph
173
+
174
+ graphs = [push_back_node(g) for g in graphs]
175
+ return graphs
176
+
177
+
178
+ def compute_cc(graphs: list[nx.Graph], progress=1):
179
+ def _cc(g):
180
+ cc = nx.closeness_centrality(g)
181
+ nx.set_node_attributes(g, cc, "cc")
182
+ edges_cc = {(u, v): max(cc[u], cc[v]) for u, v in g.edges}
183
+ nx.set_edge_attributes(g, edges_cc, "cc")
184
+ return g
185
+
186
+ graphs = Parallel(n_jobs=1, prefer="threads")(
187
+ delayed(_cc)(g) for g in tqdm(graphs, disable=not progress, desc="Computing cc")
188
+ )
189
+ return graphs
190
+ # for g in tqdm(graphs, desc="Computing cc"):
191
+ # _cc(g)
192
+ # return graphs
193
+
194
+
195
+ def compute_degree(graphs: list[nx.Graph], progress=1):
196
+ def _degree(g):
197
+ degrees = {i: 1.1 if degree == 0 else 1 / degree for i, degree in g.degree}
198
+ nx.set_node_attributes(g, degrees, "degree")
199
+ edges_dg = {(u, v): max(degrees[u], degrees[v]) for u, v in g.edges}
200
+ nx.set_edge_attributes(g, edges_dg, "degree")
201
+ return g
202
+
203
+ graphs = Parallel(n_jobs=1, prefer="threads")(
204
+ delayed(_degree)(g)
205
+ for g in tqdm(graphs, disable=not progress, desc="Computing degree")
206
+ )
207
+ return graphs
208
+ # for g in tqdm(graphs, desc="Computing degree"):
209
+ # _degree(g)
210
+ # return graphs
211
+
212
+
213
+ # TODO : make it compatible with non-connexe graphs
214
+ def compute_fiedler(graphs: list[nx.Graph], progress=1):
215
+ def _fiedler(g):
216
+ connected_graphs = [
217
+ nx.subgraph(g, nodes) for nodes in nx.connected_components(g)
218
+ ]
219
+ fiedler_vectors = [
220
+ nx.fiedler_vector(g) ** 2
221
+ if g.number_of_nodes() > 2
222
+ else np.zeros(
223
+ g.number_of_nodes(
224
+ # order of nx.fiedler_vector correspond to nx.laplacian -> g.nodes
225
+ )
226
+ )
227
+ for g in connected_graphs
228
+ ]
229
+ fiedler_dict = {
230
+ node: fiedler_vector[node_index]
231
+ for g, fiedler_vector in zip(connected_graphs, fiedler_vectors)
232
+ for node_index, node in enumerate(list(g.nodes))
233
+ }
234
+ nx.set_node_attributes(g, fiedler_dict, "fiedler")
235
+ edges_fiedler = {
236
+ (u, v): max(fiedler_dict[u], fiedler_dict[v]) for u, v in g.edges
237
+ }
238
+ nx.set_edge_attributes(g, edges_fiedler, "fiedler")
239
+ return g
240
+
241
+ graphs = Parallel(n_jobs=1, prefer="threads")(
242
+ delayed(_fiedler)(g)
243
+ for g in tqdm(graphs, disable=not progress, desc="Computing fiedler")
244
+ )
245
+ return graphs
246
+ # for g in tqdm(graphs, desc="Computing fiedler"):
247
+ # _fiedler(g)
248
+ # return graphs
249
+
250
+
251
+ def compute_hks(graphs: list[nx.Graph], t: float, progress=1):
252
+ def _hks(g: nx.Graph):
253
+ w, vps = np.linalg.eig(
254
+ nx.laplacianmatrix.normalized_laplacian_matrix(
255
+ g, nodelist=g.nodes()
256
+ ).toarray()
257
+ ) # order is given by g.nodes order
258
+ w = w.view(dtype=float)
259
+ vps = vps.view(dtype=float)
260
+ node_hks = {
261
+ node: np.sum(np.exp(-t * w) * np.square(vps[node_index, :]))
262
+ for node_index, node in enumerate(g.nodes)
263
+ }
264
+ nx.set_node_attributes(g, node_hks, f"hks_{t}")
265
+ edges_hks = {(u, v): max(node_hks[u], node_hks[v]) for u, v in g.edges}
266
+ nx.set_edge_attributes(g, edges_hks, f"hks_{t}")
267
+ return g
268
+
269
+ graphs = Parallel(n_jobs=1, prefer="threads")(
270
+ delayed(_hks)(g)
271
+ for g in tqdm(graphs, disable=not progress, desc=f"Computing hks_{t}")
272
+ )
273
+ return graphs
274
+
275
+
276
+ def compute_geodesic(graphs: list[nx.Graph], progress=1):
277
+ def _f(g: nx.Graph):
278
+ try:
279
+ nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
280
+ except:
281
+ warn(
282
+ "This graph doesn't have an intrinsic filtration, will use 0 instead ..."
283
+ )
284
+ nodes_intrinsic = {i: 0 for i, n in g.nodes.data()}
285
+ # return g
286
+ node_geodesic = {i: 0 for i in g.nodes}
287
+ nx.set_node_attributes(g, node_geodesic, f"geodesic")
288
+ edges_geodesic = {
289
+ (u, v): np.linalg.norm(nodes_intrinsic[u] - nodes_intrinsic[v])
290
+ for u, v in g.edges
291
+ }
292
+ nx.set_edge_attributes(g, edges_geodesic, f"geodesic")
293
+ return g
294
+
295
+ graphs = Parallel(n_jobs=1, prefer="threads")(
296
+ delayed(_f)(g)
297
+ for g in tqdm(
298
+ graphs, disable=not progress, desc=f"Computing geodesic distances on graphs"
299
+ )
300
+ )
301
+ return graphs
302
+
303
+
304
+ def compute_intrinsic(graphs: list[nx.Graph], progress=1, nowarning=False):
305
+ def _f(g: nx.Graph):
306
+ try:
307
+ nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
308
+ except:
309
+ if not nowarning:
310
+ raise ValueError("This graph doesn't have an intrinsic filtration.")
311
+ else:
312
+ return g
313
+ edges_intrinsic = {
314
+ (u, v): np.max([nodes_intrinsic[u], nodes_intrinsic[v]], axis=0)
315
+ for u, v in g.edges
316
+ }
317
+ nx.set_edge_attributes(g, edges_intrinsic, "intrinsic")
318
+ return g
319
+
320
+ graphs = Parallel(n_jobs=1, prefer="threads")(
321
+ delayed(_f)(g)
322
+ for g in tqdm(
323
+ graphs, disable=not progress, desc="Computing geodesic distances on graphs"
324
+ )
325
+ )
326
+ return graphs
327
+
328
+
329
+ def compute_filtration(dataset: str, filtration: str = "ALL", **kwargs):
330
+ if filtration == "ALL":
331
+ reset_graphs(dataset) # not necessary
332
+ graphs, labels = get_graphs(dataset, **kwargs)
333
+ graphs = compute_intrinsic(graphs, nowarning=True)
334
+ graphs = compute_geodesic(graphs)
335
+ graphs = compute_cc(graphs)
336
+ graphs = compute_degree(graphs)
337
+ graphs = compute_ricci(graphs)
338
+ graphs = compute_fiedler(graphs)
339
+ graphs = compute_hks(graphs, 10)
340
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
341
+ return
342
+ graphs, labels = get_graphs(dataset, **kwargs)
343
+ if filtration == "dijkstra":
344
+ return
345
+ elif filtration == "cc":
346
+ graphs = compute_cc(graphs)
347
+ elif filtration == "degree":
348
+ graphs = compute_degree(graphs)
349
+ elif filtration == "ricciCurvature":
350
+ graphs = compute_ricci(graphs)
351
+ elif filtration == "fiedler":
352
+ graphs = compute_fiedler(graphs)
353
+ elif filtration == "geodesic":
354
+ graphs = compute_geodesic(graphs)
355
+ elif filtration.startswith("hks_"):
356
+ # don't want do deal with floats, makes dots in title...
357
+ t = int(filtration[4:])
358
+ graphs = compute_hks(graphs=graphs, t=t)
359
+ else:
360
+ warn(f"Filtration {filtration} not implemented !")
361
+ return
362
+ set_graphs(graphs=graphs, labels=labels, dataset=dataset)
363
+ return
364
+
365
+
366
+ class Graph2SimplexTrees(BaseEstimator, TransformerMixin):
367
+ """
368
+ Transforms a list of networkx graphs into a list of simplextree multi
369
+
370
+ Usual Filtrations
371
+ -----------------
372
+ - "cc" closeness centrality
373
+ - "geodesic" if the graph provides data to compute it, e.g., BZR, COX2, PROTEINS
374
+ - "degree"
375
+ - "ricciCurvature" the ricci curvature
376
+ - "fiedler" the square of the fiedler vector
377
+ """
378
+
379
+ def __init__(
380
+ self,
381
+ filtrations=[],
382
+ delayed=False,
383
+ num_collapses=100,
384
+ progress: bool = False,
385
+ ):
386
+ super().__init__()
387
+ self.filtrations = filtrations # filtration to search in graph
388
+ self.delayed = delayed # reverses the filtration #TODO
389
+ self.num_collapses = num_collapses
390
+ self.progress = progress
391
+ self.num_parameters: int = len(filtrations)
392
+
393
+ def fit(self, X, y=None):
394
+ if len(X) == 0:
395
+ return self
396
+ self.num_parameters = len(self.filtrations)
397
+ if "intrinsic" in self.filtrations:
398
+ intrinsic_size = len(X[0].nodes[0]["intrinsic"])
399
+ self.num_parameters += intrinsic_size - 1
400
+ return self
401
+
402
+ def transform(self, X: list[nx.Graph]):
403
+ def todo(graph, filtrations=self.filtrations) -> list[mp.SimplexTreeMulti]:
404
+ st = mp.SimplexTreeMulti(num_parameters=self.num_parameters)
405
+ nodes = np.asarray(graph.nodes, dtype=int).reshape(1, -1)
406
+ nodes_filtrations = np.asarray(
407
+ [
408
+ [
409
+ filtration
410
+ for filtration_ in filtrations
411
+ for filtration in np.asarray(
412
+ graph.nodes[node][filtration_]
413
+ ).reshape(-1)
414
+ # this reshape is for attributes that are vectors
415
+ ]
416
+ for node in graph.nodes
417
+ ],
418
+ dtype=np.float32,
419
+ )
420
+ st.insert_batch(nodes, nodes_filtrations)
421
+ edges = np.asarray(graph.edges, dtype=int).T
422
+ edges_filtrations = np.asarray(
423
+ [
424
+ [
425
+ filtration
426
+ for filtration_ in filtrations
427
+ for filtration in np.asarray(graph[u][v][filtration_]).reshape(
428
+ -1
429
+ )
430
+ # this reshape is for attributes that are vectors
431
+ ]
432
+ for u, v in graph.edges
433
+ ],
434
+ dtype=np.float32,
435
+ )
436
+ st.insert_batch(edges, edges_filtrations)
437
+ if st.num_parameters == 2:
438
+ # TODO : wait for a filtration domination update
439
+ st.collapse_edges(num=self.num_collapses)
440
+ # st.make_filtration_non_decreasing() ## Ricci is not safe ...
441
+ # same output for each pipelines, some have a supplementary axis.
442
+ return [st]
443
+
444
+ return (
445
+ [delayed(todo)(graph) for graph in X]
446
+ if self.delayed
447
+ # memory bound imo
448
+ else [
449
+ todo(graph=graph)
450
+ for graph in tqdm(
451
+ X,
452
+ desc="Computing simplextrees from graphs",
453
+ disable=not self.progress,
454
+ )
455
+ ]
456
+ # # ,
457
+ # )
458
+ # else Parallel(n_jobs=-1, prefer="threads")(
459
+ # delayed(todo)(graph)
460
+ # for graph in tqdm(
461
+ # X,
462
+ # desc="Computing simplextrees from graphs",
463
+ # disable=not self.progress,
464
+ # )
465
+ # )
466
+ )
@@ -0,0 +1,27 @@
1
+ import numpy as np
2
+ from pandas import read_csv
3
+ from os.path import expanduser
4
+ from os import walk
5
+ from sklearn.preprocessing import LabelEncoder
6
+
7
+
8
+
9
+ def get(DATASET_PATH = expanduser("~/Datasets/")):
10
+ DATASET_PATH += "1.5mmRegions/"
11
+ X, labels = [],[]
12
+ for label in ["FoxP3", "CD8", "CD68"]:
13
+ # for label in ["FoxP3", "CD8"]:
14
+ for root, dirs, files in walk(DATASET_PATH + label+"/"):
15
+ for name in files:
16
+ X.append(np.array(read_csv(DATASET_PATH+label+"/"+name))/1500) ## Rescaled
17
+ labels.append(label)
18
+ return X, LabelEncoder().fit_transform(np.array(labels))
19
+
20
+ def get_immuno(i=1, DATASET_PATH = expanduser("~/Datasets/")):
21
+ immu_dataset = read_csv(DATASET_PATH+f"LargeHypoxicRegion{i}.csv")
22
+ X = np.array(immu_dataset['x'])
23
+ X /= np.max(X)
24
+ Y = np.array(immu_dataset['y'])
25
+ Y /= np.max(Y)
26
+ labels = LabelEncoder().fit_transform(immu_dataset['Celltype'])
27
+ return np.asarray([X,Y]).T, labels
File without changes
@@ -0,0 +1,91 @@
1
+ from tqdm import tqdm
2
+ import numpy as np
3
+ from torch_geometric.data.data import Data
4
+ import networkx as nx
5
+ from sklearn.base import BaseEstimator, TransformerMixin
6
+ from typing import Iterable
7
+
8
+
9
+ def modelnet2pts2gs(train_dataset, test_dataset , nbr_size = 8, exp_flag = True, labels_only = False,n=100, n_jobs=1, random=False):
10
+ from sklearn.neighbors import kneighbors_graph
11
+ """
12
+ sample points and create neighborhoold graph
13
+ """
14
+ dataset = train_dataset + test_dataset
15
+ indices = np.random.choice(range(len(dataset)),replace=False, size=n) if random else range(n)
16
+
17
+ dataset:list[Data] = [dataset[i] for i in indices]
18
+ _,labels = torch_geometric_2nx(dataset, labels_only=True)
19
+ if labels_only: return labels
20
+
21
+ def data2graph(data:Data):
22
+ pos = data.pos.numpy()
23
+ adj = kneighbors_graph(pos, nbr_size, mode='distance', n_jobs=n_jobs)
24
+ g = nx.from_scipy_sparse_array(adj, edge_attribute= 'weight')
25
+ if exp_flag:
26
+ for u, v in g.edges(): # TODO optimize
27
+ g[u][v]['weight'] = np.exp(-g[u][v]['weight'])
28
+ return g
29
+ #TODO : nx.set_edge_attributes()
30
+
31
+ return [data2graph(data) for data in dataset], labels
32
+ def torch_geometric_2nx(dataset, labels_only = False, print_flag = False, weight_flag = False):
33
+ """
34
+ :param dataset:
35
+ :param labels_only: return labels only
36
+ :param print_flag:
37
+ :param weight_flag: whether computing distance as weights or not
38
+ :return:
39
+ """
40
+ if labels_only:
41
+ return None, [int(data.y) for data in dataset]
42
+ def data2graph(data:Data):
43
+ edges = np.unique(data.edge_index.numpy().T, axis=0)
44
+ g = nx.from_edgelist(edges)
45
+ edge_filtration = {(u,v):np.linalg.norm(data.pos[u] - data.pos[v]) for u,v in g.edges}
46
+ nx.set_node_attributes(g,{node:0 for node in g.nodes}, "geodesic")
47
+ nx.set_edge_attributes(g, edge_filtration, "geodesic")
48
+ return g
49
+ return [data2graph(data) for data in tqdm(dataset, desc="Turning Data to graphs")], [int(data.y) for data in dataset]
50
+
51
+
52
+ def modelnet2graphs(version = '10', print_flag = False, labels_only = False, a = 0, b = 10, weight_flag = False):
53
+ """ load modelnet 10 or 40 and convert to graphs"""
54
+ from torch_geometric.transforms import FaceToEdge
55
+ from .shape3d import load_modelnet
56
+ train_dataset, test_dataset = load_modelnet(version, point_flag = False)
57
+ dataset = train_dataset + test_dataset
58
+ if b>0: dataset = [dataset[i] for i in range(a,b)]
59
+ if labels_only:
60
+ return torch_geometric_2nx(dataset, labels_only=True)
61
+ dataset = [FaceToEdge(remove_faces=False)(data) for data in dataset]
62
+ graphs, labels = torch_geometric_2nx(dataset, print_flag=print_flag, weight_flag= weight_flag)
63
+ return graphs, labels
64
+
65
+
66
+
67
+
68
+ class Torch2SimplexTree(BaseEstimator,TransformerMixin):
69
+ """
70
+ WARNING : build in progress
71
+ PyTorch Data-like to simplextree.
72
+
73
+ Input
74
+ -----
75
+ Class having `pos`, `edges`, `faces` methods
76
+
77
+ Filtrations
78
+ -----------
79
+ - Geodesic (geodesic rips)
80
+ - eccentricity
81
+ """
82
+ import multipers as mp
83
+
84
+ def __init__(self, filtrations:Iterable[str]=[]):
85
+ super().__init__()
86
+
87
+ def fit(self, X, y=None):
88
+ return self
89
+
90
+ def transform(self,X:list[nx.Graph]):
91
+ return
@@ -0,0 +1,101 @@
1
+ import numpy as np
2
+ from os.path import expanduser
3
+ from torch_geometric.datasets import ModelNet
4
+
5
+ DATASET_PATH = expanduser("~/Datasets/")
6
+ import os
7
+
8
+
9
+ ####################### MODELNET
10
+ def load_modelnet(version='10', sample_points = False, reset:bool=False, remove_faces=False):
11
+ from torch_geometric.transforms import FaceToEdge, SamplePoints
12
+ """
13
+ :param point_flag: Sample points if point_flag true. Otherwise load mesh
14
+ :return: train_dataset, test_dataset
15
+ """
16
+ assert version in ['10', '40']
17
+ if sample_points:
18
+ pre_transform, transform = FaceToEdge(remove_faces=remove_faces), SamplePoints(num=sample_points)
19
+ else:
20
+ pre_transform, transform = FaceToEdge(remove_faces=remove_faces), None
21
+ path = f"{DATASET_PATH}/ModelNet{version}"
22
+ if reset:
23
+ # print(f"rm -rf {path}")
24
+ os.system(f"rm -rf {path+'/processed/'}")
25
+ train_dataset = ModelNet(path, name=version, train=True, transform=transform, pre_transform=pre_transform)
26
+ test_dataset = ModelNet(path, name=version, train=False, transform=transform, pre_transform=pre_transform)
27
+ return train_dataset, test_dataset
28
+
29
+
30
+ def get_ModelNet(dataset, num_graph, seed):
31
+ train,test = load_modelnet(version=dataset[8:])
32
+ test_size = len(test) / len(train)
33
+ if num_graph >0:
34
+ np.random.seed(seed)
35
+ indices = np.random.choice(len(train), num_graph, replace=False)
36
+ train = train[indices]
37
+ indices = np.random.choice(len(test), int(num_graph*test_size), replace=False)
38
+ test = test[indices]
39
+ np.random.seed() # resets seed
40
+ return train, test
41
+
42
+
43
+ def get(dataset:str, num_graph=0, seed=0, node_per_graph=0):
44
+ if dataset.startswith("ModelNet"):
45
+ return get_ModelNet(dataset=dataset, num_graph=num_graph, seed=seed)
46
+ datasets = get_(dataset=dataset, num_sample=num_graph)
47
+ graphs = []
48
+ labels = []
49
+ np.random.seed(seed)
50
+ for data, ls in datasets:
51
+ nodes = np.random.choice(range(len(data.pos)), replace=False, size=node_per_graph)
52
+ for i,node in enumerate(nodes):
53
+ data_ = data # if i == 0 else None # prevents doing copies
54
+ graphs.append([data_, node])
55
+ labels.append(ls[node])
56
+ return graphs, labels
57
+
58
+
59
+ def get_(dataset:str, dataset_num:int|None=None, num_sample:int=0, DATASET_PATH = expanduser("~/Datasets/")):
60
+ from torch_geometric.io import read_off
61
+ if dataset.startswith("3dshapes/"):
62
+ dataset_ = dataset[len("3dshapes/"):]
63
+ else:
64
+ dataset_ = dataset
65
+ if dataset_num is None and "/" in dataset_:
66
+ position = dataset_.rfind("/")
67
+ dataset_num = int(dataset_[position+1:-4]) # cuts the "<dataset>/" and the ".off"
68
+ dataset_ = dataset_[:position]
69
+
70
+ if dataset_num is None: # gets a random (available) number for this dataset
71
+ from os import listdir
72
+ from random import choice
73
+ files = listdir(DATASET_PATH+f"3dshapes/{dataset_}")
74
+ if num_sample <= 0:
75
+ files = [file for file in files if "label" not in file]
76
+ else:
77
+ files = np.random.choice([file for file in files if "label" not in file], replace=False, size=num_sample)
78
+ dataset_nums = np.sort([int("".join([char for char in file if char.isnumeric()])) for file in files])
79
+
80
+ print("Dataset nums : ", *dataset_nums)
81
+ out = [get_(dataset_, dataset_num=num) for num in dataset_nums]
82
+ return out
83
+
84
+ path = DATASET_PATH+f"3dshapes/{dataset_}/{dataset_num}.off"
85
+ data = read_off(path)
86
+ faces = data.face.numpy().T
87
+ # data = FaceToEdge(remove_faces=remove_faces)(data)
88
+ #labels
89
+ label_path = path.split(".")[0] + "_labels.txt"
90
+ f = open(label_path, "r")
91
+ labels = np.zeros(len(data.pos), dtype="<U10") # Assumes labels are of size at most 10 chars
92
+ current_label=""
93
+ for i, line in enumerate(f.readlines()):
94
+ if i % 2 == 0:
95
+ current_label = line.strip()
96
+ continue
97
+ faces_of_label = np.array(line.strip().split(" "), dtype=int) -1 # this starts at 1, python starts at 0
98
+ # print(faces_of_label.min())
99
+ nodes_of_label = np.unique(faces[faces_of_label].flatten())
100
+ labels[nodes_of_label] = current_label # les labels sont sur les faces
101
+ return data, labels