multipers 2.2.3__cp310-cp310-win_amd64.whl → 2.3.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of multipers might be problematic. Click here for more details.
- multipers/__init__.py +33 -31
- multipers/_signed_measure_meta.py +430 -430
- multipers/_slicer_meta.py +211 -212
- multipers/data/MOL2.py +458 -458
- multipers/data/UCR.py +18 -18
- multipers/data/graphs.py +466 -466
- multipers/data/immuno_regions.py +27 -27
- multipers/data/pytorch2simplextree.py +90 -90
- multipers/data/shape3d.py +101 -101
- multipers/data/synthetic.py +113 -111
- multipers/distances.py +198 -198
- multipers/filtration_conversions.pxd.tp +84 -84
- multipers/filtrations/__init__.py +18 -0
- multipers/{ml/convolutions.py → filtrations/density.py} +563 -520
- multipers/filtrations/filtrations.py +289 -0
- multipers/filtrations.pxd +224 -224
- multipers/function_rips.cp310-win_amd64.pyd +0 -0
- multipers/function_rips.pyx +105 -105
- multipers/grids.cp310-win_amd64.pyd +0 -0
- multipers/grids.pyx +350 -350
- multipers/gudhi/Persistence_slices_interface.h +132 -132
- multipers/gudhi/Simplex_tree_interface.h +239 -245
- multipers/gudhi/Simplex_tree_multi_interface.h +516 -561
- multipers/gudhi/cubical_to_boundary.h +59 -59
- multipers/gudhi/gudhi/Bitmap_cubical_complex.h +450 -450
- multipers/gudhi/gudhi/Bitmap_cubical_complex_base.h +1070 -1070
- multipers/gudhi/gudhi/Bitmap_cubical_complex_periodic_boundary_conditions_base.h +579 -579
- multipers/gudhi/gudhi/Debug_utils.h +45 -45
- multipers/gudhi/gudhi/Fields/Multi_field.h +484 -484
- multipers/gudhi/gudhi/Fields/Multi_field_operators.h +455 -455
- multipers/gudhi/gudhi/Fields/Multi_field_shared.h +450 -450
- multipers/gudhi/gudhi/Fields/Multi_field_small.h +531 -531
- multipers/gudhi/gudhi/Fields/Multi_field_small_operators.h +507 -507
- multipers/gudhi/gudhi/Fields/Multi_field_small_shared.h +531 -531
- multipers/gudhi/gudhi/Fields/Z2_field.h +355 -355
- multipers/gudhi/gudhi/Fields/Z2_field_operators.h +376 -376
- multipers/gudhi/gudhi/Fields/Zp_field.h +420 -420
- multipers/gudhi/gudhi/Fields/Zp_field_operators.h +400 -400
- multipers/gudhi/gudhi/Fields/Zp_field_shared.h +418 -418
- multipers/gudhi/gudhi/Flag_complex_edge_collapser.h +337 -337
- multipers/gudhi/gudhi/Matrix.h +2107 -2107
- multipers/gudhi/gudhi/Multi_critical_filtration.h +1038 -1038
- multipers/gudhi/gudhi/Multi_persistence/Box.h +171 -171
- multipers/gudhi/gudhi/Multi_persistence/Line.h +282 -282
- multipers/gudhi/gudhi/Off_reader.h +173 -173
- multipers/gudhi/gudhi/One_critical_filtration.h +1433 -1431
- multipers/gudhi/gudhi/Persistence_matrix/Base_matrix.h +769 -769
- multipers/gudhi/gudhi/Persistence_matrix/Base_matrix_with_column_compression.h +686 -686
- multipers/gudhi/gudhi/Persistence_matrix/Boundary_matrix.h +842 -842
- multipers/gudhi/gudhi/Persistence_matrix/Chain_matrix.h +1350 -1350
- multipers/gudhi/gudhi/Persistence_matrix/Id_to_index_overlay.h +1105 -1105
- multipers/gudhi/gudhi/Persistence_matrix/Position_to_index_overlay.h +859 -859
- multipers/gudhi/gudhi/Persistence_matrix/RU_matrix.h +910 -910
- multipers/gudhi/gudhi/Persistence_matrix/allocators/entry_constructors.h +139 -139
- multipers/gudhi/gudhi/Persistence_matrix/base_pairing.h +230 -230
- multipers/gudhi/gudhi/Persistence_matrix/base_swap.h +211 -211
- multipers/gudhi/gudhi/Persistence_matrix/boundary_cell_position_to_id_mapper.h +60 -60
- multipers/gudhi/gudhi/Persistence_matrix/boundary_face_position_to_id_mapper.h +60 -60
- multipers/gudhi/gudhi/Persistence_matrix/chain_pairing.h +136 -136
- multipers/gudhi/gudhi/Persistence_matrix/chain_rep_cycles.h +190 -190
- multipers/gudhi/gudhi/Persistence_matrix/chain_vine_swap.h +616 -616
- multipers/gudhi/gudhi/Persistence_matrix/columns/chain_column_extra_properties.h +150 -150
- multipers/gudhi/gudhi/Persistence_matrix/columns/column_dimension_holder.h +106 -106
- multipers/gudhi/gudhi/Persistence_matrix/columns/column_utilities.h +219 -219
- multipers/gudhi/gudhi/Persistence_matrix/columns/entry_types.h +327 -327
- multipers/gudhi/gudhi/Persistence_matrix/columns/heap_column.h +1140 -1140
- multipers/gudhi/gudhi/Persistence_matrix/columns/intrusive_list_column.h +934 -934
- multipers/gudhi/gudhi/Persistence_matrix/columns/intrusive_set_column.h +934 -934
- multipers/gudhi/gudhi/Persistence_matrix/columns/list_column.h +980 -980
- multipers/gudhi/gudhi/Persistence_matrix/columns/naive_vector_column.h +1092 -1092
- multipers/gudhi/gudhi/Persistence_matrix/columns/row_access.h +192 -192
- multipers/gudhi/gudhi/Persistence_matrix/columns/set_column.h +921 -921
- multipers/gudhi/gudhi/Persistence_matrix/columns/small_vector_column.h +1093 -1093
- multipers/gudhi/gudhi/Persistence_matrix/columns/unordered_set_column.h +1012 -1012
- multipers/gudhi/gudhi/Persistence_matrix/columns/vector_column.h +1244 -1244
- multipers/gudhi/gudhi/Persistence_matrix/matrix_dimension_holders.h +186 -186
- multipers/gudhi/gudhi/Persistence_matrix/matrix_row_access.h +164 -164
- multipers/gudhi/gudhi/Persistence_matrix/ru_pairing.h +156 -156
- multipers/gudhi/gudhi/Persistence_matrix/ru_rep_cycles.h +376 -376
- multipers/gudhi/gudhi/Persistence_matrix/ru_vine_swap.h +540 -540
- multipers/gudhi/gudhi/Persistent_cohomology/Field_Zp.h +118 -118
- multipers/gudhi/gudhi/Persistent_cohomology/Multi_field.h +173 -173
- multipers/gudhi/gudhi/Persistent_cohomology/Persistent_cohomology_column.h +128 -128
- multipers/gudhi/gudhi/Persistent_cohomology.h +745 -745
- multipers/gudhi/gudhi/Points_off_io.h +171 -171
- multipers/gudhi/gudhi/Simple_object_pool.h +69 -69
- multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_iterators.h +463 -463
- multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_node_explicit_storage.h +83 -83
- multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_siblings.h +106 -106
- multipers/gudhi/gudhi/Simplex_tree/Simplex_tree_star_simplex_iterators.h +277 -277
- multipers/gudhi/gudhi/Simplex_tree/hooks_simplex_base.h +62 -62
- multipers/gudhi/gudhi/Simplex_tree/indexing_tag.h +27 -27
- multipers/gudhi/gudhi/Simplex_tree/serialization_utils.h +62 -62
- multipers/gudhi/gudhi/Simplex_tree/simplex_tree_options.h +157 -157
- multipers/gudhi/gudhi/Simplex_tree.h +2794 -2794
- multipers/gudhi/gudhi/Simplex_tree_multi.h +152 -163
- multipers/gudhi/gudhi/distance_functions.h +62 -62
- multipers/gudhi/gudhi/graph_simplicial_complex.h +104 -104
- multipers/gudhi/gudhi/persistence_interval.h +253 -253
- multipers/gudhi/gudhi/persistence_matrix_options.h +170 -170
- multipers/gudhi/gudhi/reader_utils.h +367 -367
- multipers/gudhi/mma_interface_coh.h +256 -255
- multipers/gudhi/mma_interface_h0.h +223 -231
- multipers/gudhi/mma_interface_matrix.h +291 -282
- multipers/gudhi/naive_merge_tree.h +536 -575
- multipers/gudhi/scc_io.h +310 -289
- multipers/gudhi/truc.h +957 -888
- multipers/io.cp310-win_amd64.pyd +0 -0
- multipers/io.pyx +714 -711
- multipers/ml/accuracies.py +90 -90
- multipers/ml/invariants_with_persistable.py +79 -79
- multipers/ml/kernels.py +176 -176
- multipers/ml/mma.py +713 -714
- multipers/ml/one.py +472 -472
- multipers/ml/point_clouds.py +352 -346
- multipers/ml/signed_measures.py +1589 -1589
- multipers/ml/sliced_wasserstein.py +461 -461
- multipers/ml/tools.py +113 -113
- multipers/mma_structures.cp310-win_amd64.pyd +0 -0
- multipers/mma_structures.pxd +127 -127
- multipers/mma_structures.pyx +4 -8
- multipers/mma_structures.pyx.tp +1083 -1085
- multipers/multi_parameter_rank_invariant/diff_helpers.h +84 -93
- multipers/multi_parameter_rank_invariant/euler_characteristic.h +97 -97
- multipers/multi_parameter_rank_invariant/function_rips.h +322 -322
- multipers/multi_parameter_rank_invariant/hilbert_function.h +769 -769
- multipers/multi_parameter_rank_invariant/persistence_slices.h +148 -148
- multipers/multi_parameter_rank_invariant/rank_invariant.h +369 -369
- multipers/multiparameter_edge_collapse.py +41 -41
- multipers/multiparameter_module_approximation/approximation.h +2298 -2295
- multipers/multiparameter_module_approximation/combinatory.h +129 -129
- multipers/multiparameter_module_approximation/debug.h +107 -107
- multipers/multiparameter_module_approximation/format_python-cpp.h +286 -286
- multipers/multiparameter_module_approximation/heap_column.h +238 -238
- multipers/multiparameter_module_approximation/images.h +79 -79
- multipers/multiparameter_module_approximation/list_column.h +174 -174
- multipers/multiparameter_module_approximation/list_column_2.h +232 -232
- multipers/multiparameter_module_approximation/ru_matrix.h +347 -347
- multipers/multiparameter_module_approximation/set_column.h +135 -135
- multipers/multiparameter_module_approximation/structure_higher_dim_barcode.h +36 -36
- multipers/multiparameter_module_approximation/unordered_set_column.h +166 -166
- multipers/multiparameter_module_approximation/utilities.h +403 -419
- multipers/multiparameter_module_approximation/vector_column.h +223 -223
- multipers/multiparameter_module_approximation/vector_matrix.h +331 -331
- multipers/multiparameter_module_approximation/vineyards.h +464 -464
- multipers/multiparameter_module_approximation/vineyards_trajectories.h +649 -649
- multipers/multiparameter_module_approximation.cp310-win_amd64.pyd +0 -0
- multipers/multiparameter_module_approximation.pyx +218 -217
- multipers/pickle.py +90 -53
- multipers/plots.py +342 -334
- multipers/point_measure.cp310-win_amd64.pyd +0 -0
- multipers/point_measure.pyx +322 -320
- multipers/simplex_tree_multi.cp310-win_amd64.pyd +0 -0
- multipers/simplex_tree_multi.pxd +133 -133
- multipers/simplex_tree_multi.pyx +115 -48
- multipers/simplex_tree_multi.pyx.tp +1947 -1935
- multipers/slicer.cp310-win_amd64.pyd +0 -0
- multipers/slicer.pxd +301 -120
- multipers/slicer.pxd.tp +218 -214
- multipers/slicer.pyx +1570 -507
- multipers/slicer.pyx.tp +931 -914
- multipers/tensor/tensor.h +672 -672
- multipers/tensor.pxd +13 -13
- multipers/test.pyx +44 -44
- multipers/tests/__init__.py +57 -57
- multipers/torch/diff_grids.py +217 -217
- multipers/torch/rips_density.py +310 -304
- {multipers-2.2.3.dist-info → multipers-2.3.1.dist-info}/LICENSE +21 -21
- {multipers-2.2.3.dist-info → multipers-2.3.1.dist-info}/METADATA +21 -11
- multipers-2.3.1.dist-info/RECORD +182 -0
- {multipers-2.2.3.dist-info → multipers-2.3.1.dist-info}/WHEEL +1 -1
- multipers/tests/test_diff_helper.py +0 -73
- multipers/tests/test_hilbert_function.py +0 -82
- multipers/tests/test_mma.py +0 -83
- multipers/tests/test_point_clouds.py +0 -49
- multipers/tests/test_python-cpp_conversion.py +0 -82
- multipers/tests/test_signed_betti.py +0 -181
- multipers/tests/test_signed_measure.py +0 -89
- multipers/tests/test_simplextreemulti.py +0 -221
- multipers/tests/test_slicer.py +0 -221
- multipers-2.2.3.dist-info/RECORD +0 -189
- {multipers-2.2.3.dist-info → multipers-2.3.1.dist-info}/top_level.txt +0 -0
multipers/data/graphs.py
CHANGED
|
@@ -1,466 +1,466 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from os.path import expanduser, exists
|
|
3
|
-
import networkx as nx
|
|
4
|
-
from warnings import warn
|
|
5
|
-
import pickle
|
|
6
|
-
from joblib import Parallel, delayed
|
|
7
|
-
from tqdm import tqdm
|
|
8
|
-
from sklearn.preprocessing import LabelEncoder
|
|
9
|
-
from scipy.spatial import distance_matrix
|
|
10
|
-
from sklearn.base import BaseEstimator, TransformerMixin, clone
|
|
11
|
-
import multipers as mp
|
|
12
|
-
from typing import Iterable, List, Optional
|
|
13
|
-
from numpy.typing import ArrayLike
|
|
14
|
-
|
|
15
|
-
DATASET_PATH = expanduser("~/Datasets/")
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def _check_installed(dataset: str):
|
|
19
|
-
from warnings import warn
|
|
20
|
-
from os.path import exists
|
|
21
|
-
|
|
22
|
-
assert dataset.startswith(
|
|
23
|
-
"graphs/"
|
|
24
|
-
), "Graph datasets have to be of the form graphs/<name>"
|
|
25
|
-
if exists(DATASET_PATH + dataset):
|
|
26
|
-
return
|
|
27
|
-
else:
|
|
28
|
-
warn(
|
|
29
|
-
f"""
|
|
30
|
-
Dataset {dataset} not installed.
|
|
31
|
-
You can find it in https://networkrepository.com
|
|
32
|
-
Then (optinally) configure multipers.data.graphs.DATASET_PATH, which is currently
|
|
33
|
-
> {DATASET_PATH=}
|
|
34
|
-
and puts this dataset in $DATASET_PATH/{dataset}
|
|
35
|
-
"""
|
|
36
|
-
)
|
|
37
|
-
raise ValueError("Unknown dataset.")
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def get(dataset: str, filtration: Optional[str] = None):
|
|
41
|
-
if filtration is None:
|
|
42
|
-
return get_graphs(dataset)
|
|
43
|
-
graphs, labels = get_graphs(dataset)
|
|
44
|
-
try:
|
|
45
|
-
for g in graphs:
|
|
46
|
-
for node in g.nodes:
|
|
47
|
-
g.nodes[node][filtration]
|
|
48
|
-
except:
|
|
49
|
-
print(
|
|
50
|
-
f"Filtration {filtration} not computed, trying to compute it ...",
|
|
51
|
-
flush=True,
|
|
52
|
-
)
|
|
53
|
-
compute_filtration(dataset, filtration)
|
|
54
|
-
return get_graphs(dataset)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def get_from_file_old(dataset: str, label="lb"):
|
|
58
|
-
from os import walk
|
|
59
|
-
from scipy.io import loadmat
|
|
60
|
-
from warnings import warn
|
|
61
|
-
|
|
62
|
-
path = DATASET_PATH + dataset + "/mat/"
|
|
63
|
-
labels: list[int] = []
|
|
64
|
-
gs: list[nx.Graph] = []
|
|
65
|
-
for root, dir, files in walk(path):
|
|
66
|
-
for file in files:
|
|
67
|
-
file_ppties = file.split("_")
|
|
68
|
-
gid = file_ppties[5]
|
|
69
|
-
i = 0
|
|
70
|
-
while i + 1 < len(file_ppties) and file_ppties[i] != label:
|
|
71
|
-
i += 1
|
|
72
|
-
if i + 1 >= len(file_ppties):
|
|
73
|
-
warn(f"Cannot find label {label} on file {file}.")
|
|
74
|
-
else:
|
|
75
|
-
labels += [file_ppties[i + 1]]
|
|
76
|
-
adj_mat = np.array(loadmat(path + file)["A"], dtype=np.float32)
|
|
77
|
-
gs.append(nx.Graph(adj_mat))
|
|
78
|
-
return gs, labels
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def get_from_file(dataset: str):
|
|
82
|
-
from os.path import expanduser, exists
|
|
83
|
-
|
|
84
|
-
path = DATASET_PATH + f"{dataset}/{dataset[7:]}."
|
|
85
|
-
try:
|
|
86
|
-
graphs_ids = np.loadtxt(path + "graph_idx")
|
|
87
|
-
except:
|
|
88
|
-
return get_from_file_old(dataset=dataset)
|
|
89
|
-
labels: list[int] = LabelEncoder().fit_transform(np.loadtxt(path + "graph_labels"))
|
|
90
|
-
edges = np.loadtxt(path + "edges", delimiter=",", dtype=int) - 1
|
|
91
|
-
has_intrinsic_filtration = exists(path + "node_attrs")
|
|
92
|
-
graphs: list[nx.Graph] = []
|
|
93
|
-
if has_intrinsic_filtration:
|
|
94
|
-
F = np.loadtxt(path + "node_attrs", delimiter=",")
|
|
95
|
-
for graph_id in tqdm(np.unique(graphs_ids), desc="Reading graphs from file"):
|
|
96
|
-
(nodes,) = np.where(graphs_ids == graph_id)
|
|
97
|
-
|
|
98
|
-
def graph_has_edge(u: int, v: int) -> bool:
|
|
99
|
-
if u in nodes or v in nodes:
|
|
100
|
-
assert u in nodes and v in nodes, f"Nodes\
|
|
101
|
-
{u} and {v} are not in the same graph"
|
|
102
|
-
return True
|
|
103
|
-
return False
|
|
104
|
-
|
|
105
|
-
graph_edges = [(u, v) for u, v in edges if graph_has_edge(u, v)]
|
|
106
|
-
g = nx.Graph(graph_edges)
|
|
107
|
-
if has_intrinsic_filtration:
|
|
108
|
-
node_attrs = {node: F[node] for node in nodes}
|
|
109
|
-
nx.set_node_attributes(g, node_attrs, "intrinsic")
|
|
110
|
-
graphs.append(g)
|
|
111
|
-
return graphs, labels
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def get_graphs(dataset: str, N: int | str = "") -> tuple[list[nx.Graph], list[int]]:
|
|
115
|
-
_check_installed(dataset)
|
|
116
|
-
graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
|
|
117
|
-
labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
|
|
118
|
-
if not exists(graphs_path) or not exists(labels_path):
|
|
119
|
-
if dataset.startswith("3dshapes/"):
|
|
120
|
-
return get_from_file_old(
|
|
121
|
-
dataset,
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
graphs, labels = get_from_file(
|
|
125
|
-
dataset,
|
|
126
|
-
)
|
|
127
|
-
print("Saving graphs at :", graphs_path)
|
|
128
|
-
set_graphs(graphs=graphs, labels=labels, dataset=dataset)
|
|
129
|
-
else:
|
|
130
|
-
graphs = pickle.load(open(graphs_path, "rb"))
|
|
131
|
-
labels = pickle.load(open(labels_path, "rb"))
|
|
132
|
-
from sklearn.preprocessing import LabelEncoder
|
|
133
|
-
|
|
134
|
-
return graphs, LabelEncoder().fit_transform(labels)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
# saves graphs (and filtration values) into a file
|
|
138
|
-
def set_graphs(graphs: list[nx.Graph], labels: list, dataset: str, N: int | str = ""):
|
|
139
|
-
graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
|
|
140
|
-
labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
|
|
141
|
-
pickle.dump(graphs, open(graphs_path, "wb"))
|
|
142
|
-
pickle.dump(labels, open(labels_path, "wb"))
|
|
143
|
-
return
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def reset_graphs(dataset: str, N=None): # Resets filtrations values on graphs
|
|
147
|
-
graphs, labels = get_from_file(dataset)
|
|
148
|
-
set_graphs(graphs, labels, dataset)
|
|
149
|
-
return
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def compute_ricci(graphs: list[nx.Graph], alpha=0.5, progress=1):
|
|
153
|
-
from GraphRicciCurvature.OllivierRicci import OllivierRicci
|
|
154
|
-
|
|
155
|
-
def ricci(graph, alpha=alpha):
|
|
156
|
-
return OllivierRicci(graph, alpha=alpha).compute_ricci_curvature()
|
|
157
|
-
|
|
158
|
-
graphs = [
|
|
159
|
-
ricci(g) for g in tqdm(graphs, disable=not progress, desc="Computing ricci")
|
|
160
|
-
]
|
|
161
|
-
|
|
162
|
-
def push_back_node(graph):
|
|
163
|
-
# for node in graph.nodes:
|
|
164
|
-
# graph.nodes[node]['ricciCurvature'] = np.min([graph[node][node2]['ricciCurvature'] for node2 in graph[node]] + [graph.nodes[node]['ricciCurvature']])
|
|
165
|
-
node_filtrations = {
|
|
166
|
-
node: -1
|
|
167
|
-
if len(graph[node]) == 0
|
|
168
|
-
else np.min([graph[node][node2]["ricciCurvature"] for node2 in graph[node]])
|
|
169
|
-
for node in graph.nodes
|
|
170
|
-
}
|
|
171
|
-
nx.set_node_attributes(graph, node_filtrations, "ricciCurvature")
|
|
172
|
-
return graph
|
|
173
|
-
|
|
174
|
-
graphs = [push_back_node(g) for g in graphs]
|
|
175
|
-
return graphs
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
def compute_cc(graphs: list[nx.Graph], progress=1):
|
|
179
|
-
def _cc(g):
|
|
180
|
-
cc = nx.closeness_centrality(g)
|
|
181
|
-
nx.set_node_attributes(g, cc, "cc")
|
|
182
|
-
edges_cc = {(u, v): max(cc[u], cc[v]) for u, v in g.edges}
|
|
183
|
-
nx.set_edge_attributes(g, edges_cc, "cc")
|
|
184
|
-
return g
|
|
185
|
-
|
|
186
|
-
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
187
|
-
delayed(_cc)(g) for g in tqdm(graphs, disable=not progress, desc="Computing cc")
|
|
188
|
-
)
|
|
189
|
-
return graphs
|
|
190
|
-
# for g in tqdm(graphs, desc="Computing cc"):
|
|
191
|
-
# _cc(g)
|
|
192
|
-
# return graphs
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
def compute_degree(graphs: list[nx.Graph], progress=1):
|
|
196
|
-
def _degree(g):
|
|
197
|
-
degrees = {i: 1.1 if degree == 0 else 1 / degree for i, degree in g.degree}
|
|
198
|
-
nx.set_node_attributes(g, degrees, "degree")
|
|
199
|
-
edges_dg = {(u, v): max(degrees[u], degrees[v]) for u, v in g.edges}
|
|
200
|
-
nx.set_edge_attributes(g, edges_dg, "degree")
|
|
201
|
-
return g
|
|
202
|
-
|
|
203
|
-
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
204
|
-
delayed(_degree)(g)
|
|
205
|
-
for g in tqdm(graphs, disable=not progress, desc="Computing degree")
|
|
206
|
-
)
|
|
207
|
-
return graphs
|
|
208
|
-
# for g in tqdm(graphs, desc="Computing degree"):
|
|
209
|
-
# _degree(g)
|
|
210
|
-
# return graphs
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
# TODO : make it compatible with non-connexe graphs
|
|
214
|
-
def compute_fiedler(graphs: list[nx.Graph], progress=1):
|
|
215
|
-
def _fiedler(g):
|
|
216
|
-
connected_graphs = [
|
|
217
|
-
nx.subgraph(g, nodes) for nodes in nx.connected_components(g)
|
|
218
|
-
]
|
|
219
|
-
fiedler_vectors = [
|
|
220
|
-
nx.fiedler_vector(g) ** 2
|
|
221
|
-
if g.number_of_nodes() > 2
|
|
222
|
-
else np.zeros(
|
|
223
|
-
g.number_of_nodes(
|
|
224
|
-
# order of nx.fiedler_vector correspond to nx.laplacian -> g.nodes
|
|
225
|
-
)
|
|
226
|
-
)
|
|
227
|
-
for g in connected_graphs
|
|
228
|
-
]
|
|
229
|
-
fiedler_dict = {
|
|
230
|
-
node: fiedler_vector[node_index]
|
|
231
|
-
for g, fiedler_vector in zip(connected_graphs, fiedler_vectors)
|
|
232
|
-
for node_index, node in enumerate(list(g.nodes))
|
|
233
|
-
}
|
|
234
|
-
nx.set_node_attributes(g, fiedler_dict, "fiedler")
|
|
235
|
-
edges_fiedler = {
|
|
236
|
-
(u, v): max(fiedler_dict[u], fiedler_dict[v]) for u, v in g.edges
|
|
237
|
-
}
|
|
238
|
-
nx.set_edge_attributes(g, edges_fiedler, "fiedler")
|
|
239
|
-
return g
|
|
240
|
-
|
|
241
|
-
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
242
|
-
delayed(_fiedler)(g)
|
|
243
|
-
for g in tqdm(graphs, disable=not progress, desc="Computing fiedler")
|
|
244
|
-
)
|
|
245
|
-
return graphs
|
|
246
|
-
# for g in tqdm(graphs, desc="Computing fiedler"):
|
|
247
|
-
# _fiedler(g)
|
|
248
|
-
# return graphs
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
def compute_hks(graphs: list[nx.Graph], t: float, progress=1):
|
|
252
|
-
def _hks(g: nx.Graph):
|
|
253
|
-
w, vps = np.linalg.eig(
|
|
254
|
-
nx.laplacianmatrix.normalized_laplacian_matrix(
|
|
255
|
-
g, nodelist=g.nodes()
|
|
256
|
-
).toarray()
|
|
257
|
-
) # order is given by g.nodes order
|
|
258
|
-
w = w.view(dtype=float)
|
|
259
|
-
vps = vps.view(dtype=float)
|
|
260
|
-
node_hks = {
|
|
261
|
-
node: np.sum(np.exp(-t * w) * np.square(vps[node_index, :]))
|
|
262
|
-
for node_index, node in enumerate(g.nodes)
|
|
263
|
-
}
|
|
264
|
-
nx.set_node_attributes(g, node_hks, f"hks_{t}")
|
|
265
|
-
edges_hks = {(u, v): max(node_hks[u], node_hks[v]) for u, v in g.edges}
|
|
266
|
-
nx.set_edge_attributes(g, edges_hks, f"hks_{t}")
|
|
267
|
-
return g
|
|
268
|
-
|
|
269
|
-
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
270
|
-
delayed(_hks)(g)
|
|
271
|
-
for g in tqdm(graphs, disable=not progress, desc=f"Computing hks_{t}")
|
|
272
|
-
)
|
|
273
|
-
return graphs
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
def compute_geodesic(graphs: list[nx.Graph], progress=1):
|
|
277
|
-
def _f(g: nx.Graph):
|
|
278
|
-
try:
|
|
279
|
-
nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
|
|
280
|
-
except:
|
|
281
|
-
warn(
|
|
282
|
-
"This graph doesn't have an intrinsic filtration, will use 0 instead ..."
|
|
283
|
-
)
|
|
284
|
-
nodes_intrinsic = {i: 0 for i, n in g.nodes.data()}
|
|
285
|
-
# return g
|
|
286
|
-
node_geodesic = {i: 0 for i in g.nodes}
|
|
287
|
-
nx.set_node_attributes(g, node_geodesic, f"geodesic")
|
|
288
|
-
edges_geodesic = {
|
|
289
|
-
(u, v): np.linalg.norm(nodes_intrinsic[u] - nodes_intrinsic[v])
|
|
290
|
-
for u, v in g.edges
|
|
291
|
-
}
|
|
292
|
-
nx.set_edge_attributes(g, edges_geodesic, f"geodesic")
|
|
293
|
-
return g
|
|
294
|
-
|
|
295
|
-
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
296
|
-
delayed(_f)(g)
|
|
297
|
-
for g in tqdm(
|
|
298
|
-
graphs, disable=not progress, desc=f"Computing geodesic distances on graphs"
|
|
299
|
-
)
|
|
300
|
-
)
|
|
301
|
-
return graphs
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
def compute_intrinsic(graphs: list[nx.Graph], progress=1, nowarning=False):
|
|
305
|
-
def _f(g: nx.Graph):
|
|
306
|
-
try:
|
|
307
|
-
nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
|
|
308
|
-
except:
|
|
309
|
-
if not nowarning:
|
|
310
|
-
raise ValueError("This graph doesn't have an intrinsic filtration.")
|
|
311
|
-
else:
|
|
312
|
-
return g
|
|
313
|
-
edges_intrinsic = {
|
|
314
|
-
(u, v): np.max([nodes_intrinsic[u], nodes_intrinsic[v]], axis=0)
|
|
315
|
-
for u, v in g.edges
|
|
316
|
-
}
|
|
317
|
-
nx.set_edge_attributes(g, edges_intrinsic, "intrinsic")
|
|
318
|
-
return g
|
|
319
|
-
|
|
320
|
-
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
321
|
-
delayed(_f)(g)
|
|
322
|
-
for g in tqdm(
|
|
323
|
-
graphs, disable=not progress, desc="Computing geodesic distances on graphs"
|
|
324
|
-
)
|
|
325
|
-
)
|
|
326
|
-
return graphs
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
def compute_filtration(dataset: str, filtration: str = "ALL", **kwargs):
|
|
330
|
-
if filtration == "ALL":
|
|
331
|
-
reset_graphs(dataset) # not necessary
|
|
332
|
-
graphs, labels = get_graphs(dataset, **kwargs)
|
|
333
|
-
graphs = compute_intrinsic(graphs, nowarning=True)
|
|
334
|
-
graphs = compute_geodesic(graphs)
|
|
335
|
-
graphs = compute_cc(graphs)
|
|
336
|
-
graphs = compute_degree(graphs)
|
|
337
|
-
graphs = compute_ricci(graphs)
|
|
338
|
-
graphs = compute_fiedler(graphs)
|
|
339
|
-
graphs = compute_hks(graphs, 10)
|
|
340
|
-
set_graphs(graphs=graphs, labels=labels, dataset=dataset)
|
|
341
|
-
return
|
|
342
|
-
graphs, labels = get_graphs(dataset, **kwargs)
|
|
343
|
-
if filtration == "dijkstra":
|
|
344
|
-
return
|
|
345
|
-
elif filtration == "cc":
|
|
346
|
-
graphs = compute_cc(graphs)
|
|
347
|
-
elif filtration == "degree":
|
|
348
|
-
graphs = compute_degree(graphs)
|
|
349
|
-
elif filtration == "ricciCurvature":
|
|
350
|
-
graphs = compute_ricci(graphs)
|
|
351
|
-
elif filtration == "fiedler":
|
|
352
|
-
graphs = compute_fiedler(graphs)
|
|
353
|
-
elif filtration == "geodesic":
|
|
354
|
-
graphs = compute_geodesic(graphs)
|
|
355
|
-
elif filtration.startswith("hks_"):
|
|
356
|
-
# don't want do deal with floats, makes dots in title...
|
|
357
|
-
t = int(filtration[4:])
|
|
358
|
-
graphs = compute_hks(graphs=graphs, t=t)
|
|
359
|
-
else:
|
|
360
|
-
warn(f"Filtration {filtration} not implemented !")
|
|
361
|
-
return
|
|
362
|
-
set_graphs(graphs=graphs, labels=labels, dataset=dataset)
|
|
363
|
-
return
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
class Graph2SimplexTrees(BaseEstimator, TransformerMixin):
|
|
367
|
-
"""
|
|
368
|
-
Transforms a list of networkx graphs into a list of simplextree multi
|
|
369
|
-
|
|
370
|
-
Usual Filtrations
|
|
371
|
-
-----------------
|
|
372
|
-
- "cc" closeness centrality
|
|
373
|
-
- "geodesic" if the graph provides data to compute it, e.g., BZR, COX2, PROTEINS
|
|
374
|
-
- "degree"
|
|
375
|
-
- "ricciCurvature" the ricci curvature
|
|
376
|
-
- "fiedler" the square of the fiedler vector
|
|
377
|
-
"""
|
|
378
|
-
|
|
379
|
-
def __init__(
|
|
380
|
-
self,
|
|
381
|
-
filtrations=[],
|
|
382
|
-
delayed=False,
|
|
383
|
-
num_collapses=100,
|
|
384
|
-
progress: bool = False,
|
|
385
|
-
):
|
|
386
|
-
super().__init__()
|
|
387
|
-
self.filtrations = filtrations # filtration to search in graph
|
|
388
|
-
self.delayed = delayed # reverses the filtration #TODO
|
|
389
|
-
self.num_collapses = num_collapses
|
|
390
|
-
self.progress = progress
|
|
391
|
-
self.num_parameters: int = len(filtrations)
|
|
392
|
-
|
|
393
|
-
def fit(self, X, y=None):
|
|
394
|
-
if len(X) == 0:
|
|
395
|
-
return self
|
|
396
|
-
self.num_parameters = len(self.filtrations)
|
|
397
|
-
if "intrinsic" in self.filtrations:
|
|
398
|
-
intrinsic_size = len(X[0].nodes[0]["intrinsic"])
|
|
399
|
-
self.num_parameters += intrinsic_size - 1
|
|
400
|
-
return self
|
|
401
|
-
|
|
402
|
-
def transform(self, X: list[nx.Graph]):
|
|
403
|
-
def todo(graph, filtrations=self.filtrations) -> list[mp.SimplexTreeMulti]:
|
|
404
|
-
st = mp.SimplexTreeMulti(num_parameters=self.num_parameters)
|
|
405
|
-
nodes = np.asarray(graph.nodes, dtype=int).reshape(1, -1)
|
|
406
|
-
nodes_filtrations = np.asarray(
|
|
407
|
-
[
|
|
408
|
-
[
|
|
409
|
-
filtration
|
|
410
|
-
for filtration_ in filtrations
|
|
411
|
-
for filtration in np.asarray(
|
|
412
|
-
graph.nodes[node][filtration_]
|
|
413
|
-
).reshape(-1)
|
|
414
|
-
# this reshape is for attributes that are vectors
|
|
415
|
-
]
|
|
416
|
-
for node in graph.nodes
|
|
417
|
-
],
|
|
418
|
-
dtype=np.float32,
|
|
419
|
-
)
|
|
420
|
-
st.insert_batch(nodes, nodes_filtrations)
|
|
421
|
-
edges = np.asarray(graph.edges, dtype=int).T
|
|
422
|
-
edges_filtrations = np.asarray(
|
|
423
|
-
[
|
|
424
|
-
[
|
|
425
|
-
filtration
|
|
426
|
-
for filtration_ in filtrations
|
|
427
|
-
for filtration in np.asarray(graph[u][v][filtration_]).reshape(
|
|
428
|
-
-1
|
|
429
|
-
)
|
|
430
|
-
# this reshape is for attributes that are vectors
|
|
431
|
-
]
|
|
432
|
-
for u, v in graph.edges
|
|
433
|
-
],
|
|
434
|
-
dtype=np.float32,
|
|
435
|
-
)
|
|
436
|
-
st.insert_batch(edges, edges_filtrations)
|
|
437
|
-
if st.num_parameters == 2:
|
|
438
|
-
# TODO : wait for a filtration domination update
|
|
439
|
-
st.collapse_edges(num=self.num_collapses)
|
|
440
|
-
# st.make_filtration_non_decreasing() ## Ricci is not safe ...
|
|
441
|
-
# same output for each pipelines, some have a supplementary axis.
|
|
442
|
-
return [st]
|
|
443
|
-
|
|
444
|
-
return (
|
|
445
|
-
[delayed(todo)(graph) for graph in X]
|
|
446
|
-
if self.delayed
|
|
447
|
-
# memory bound imo
|
|
448
|
-
else [
|
|
449
|
-
todo(graph=graph)
|
|
450
|
-
for graph in tqdm(
|
|
451
|
-
X,
|
|
452
|
-
desc="Computing simplextrees from graphs",
|
|
453
|
-
disable=not self.progress,
|
|
454
|
-
)
|
|
455
|
-
]
|
|
456
|
-
# # ,
|
|
457
|
-
# )
|
|
458
|
-
# else Parallel(n_jobs=-1, prefer="threads")(
|
|
459
|
-
# delayed(todo)(graph)
|
|
460
|
-
# for graph in tqdm(
|
|
461
|
-
# X,
|
|
462
|
-
# desc="Computing simplextrees from graphs",
|
|
463
|
-
# disable=not self.progress,
|
|
464
|
-
# )
|
|
465
|
-
# )
|
|
466
|
-
)
|
|
1
|
+
import numpy as np
|
|
2
|
+
from os.path import expanduser, exists
|
|
3
|
+
import networkx as nx
|
|
4
|
+
from warnings import warn
|
|
5
|
+
import pickle
|
|
6
|
+
from joblib import Parallel, delayed
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
from sklearn.preprocessing import LabelEncoder
|
|
9
|
+
from scipy.spatial import distance_matrix
|
|
10
|
+
from sklearn.base import BaseEstimator, TransformerMixin, clone
|
|
11
|
+
import multipers as mp
|
|
12
|
+
from typing import Iterable, List, Optional
|
|
13
|
+
from numpy.typing import ArrayLike
|
|
14
|
+
|
|
15
|
+
DATASET_PATH = expanduser("~/Datasets/")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _check_installed(dataset: str):
|
|
19
|
+
from warnings import warn
|
|
20
|
+
from os.path import exists
|
|
21
|
+
|
|
22
|
+
assert dataset.startswith(
|
|
23
|
+
"graphs/"
|
|
24
|
+
), "Graph datasets have to be of the form graphs/<name>"
|
|
25
|
+
if exists(DATASET_PATH + dataset):
|
|
26
|
+
return
|
|
27
|
+
else:
|
|
28
|
+
warn(
|
|
29
|
+
f"""
|
|
30
|
+
Dataset {dataset} not installed.
|
|
31
|
+
You can find it in https://networkrepository.com
|
|
32
|
+
Then (optinally) configure multipers.data.graphs.DATASET_PATH, which is currently
|
|
33
|
+
> {DATASET_PATH=}
|
|
34
|
+
and puts this dataset in $DATASET_PATH/{dataset}
|
|
35
|
+
"""
|
|
36
|
+
)
|
|
37
|
+
raise ValueError("Unknown dataset.")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get(dataset: str, filtration: Optional[str] = None):
|
|
41
|
+
if filtration is None:
|
|
42
|
+
return get_graphs(dataset)
|
|
43
|
+
graphs, labels = get_graphs(dataset)
|
|
44
|
+
try:
|
|
45
|
+
for g in graphs:
|
|
46
|
+
for node in g.nodes:
|
|
47
|
+
g.nodes[node][filtration]
|
|
48
|
+
except:
|
|
49
|
+
print(
|
|
50
|
+
f"Filtration {filtration} not computed, trying to compute it ...",
|
|
51
|
+
flush=True,
|
|
52
|
+
)
|
|
53
|
+
compute_filtration(dataset, filtration)
|
|
54
|
+
return get_graphs(dataset)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_from_file_old(dataset: str, label="lb"):
|
|
58
|
+
from os import walk
|
|
59
|
+
from scipy.io import loadmat
|
|
60
|
+
from warnings import warn
|
|
61
|
+
|
|
62
|
+
path = DATASET_PATH + dataset + "/mat/"
|
|
63
|
+
labels: list[int] = []
|
|
64
|
+
gs: list[nx.Graph] = []
|
|
65
|
+
for root, dir, files in walk(path):
|
|
66
|
+
for file in files:
|
|
67
|
+
file_ppties = file.split("_")
|
|
68
|
+
gid = file_ppties[5]
|
|
69
|
+
i = 0
|
|
70
|
+
while i + 1 < len(file_ppties) and file_ppties[i] != label:
|
|
71
|
+
i += 1
|
|
72
|
+
if i + 1 >= len(file_ppties):
|
|
73
|
+
warn(f"Cannot find label {label} on file {file}.")
|
|
74
|
+
else:
|
|
75
|
+
labels += [file_ppties[i + 1]]
|
|
76
|
+
adj_mat = np.array(loadmat(path + file)["A"], dtype=np.float32)
|
|
77
|
+
gs.append(nx.Graph(adj_mat))
|
|
78
|
+
return gs, labels
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_from_file(dataset: str):
|
|
82
|
+
from os.path import expanduser, exists
|
|
83
|
+
|
|
84
|
+
path = DATASET_PATH + f"{dataset}/{dataset[7:]}."
|
|
85
|
+
try:
|
|
86
|
+
graphs_ids = np.loadtxt(path + "graph_idx")
|
|
87
|
+
except:
|
|
88
|
+
return get_from_file_old(dataset=dataset)
|
|
89
|
+
labels: list[int] = LabelEncoder().fit_transform(np.loadtxt(path + "graph_labels"))
|
|
90
|
+
edges = np.loadtxt(path + "edges", delimiter=",", dtype=int) - 1
|
|
91
|
+
has_intrinsic_filtration = exists(path + "node_attrs")
|
|
92
|
+
graphs: list[nx.Graph] = []
|
|
93
|
+
if has_intrinsic_filtration:
|
|
94
|
+
F = np.loadtxt(path + "node_attrs", delimiter=",")
|
|
95
|
+
for graph_id in tqdm(np.unique(graphs_ids), desc="Reading graphs from file"):
|
|
96
|
+
(nodes,) = np.where(graphs_ids == graph_id)
|
|
97
|
+
|
|
98
|
+
def graph_has_edge(u: int, v: int) -> bool:
|
|
99
|
+
if u in nodes or v in nodes:
|
|
100
|
+
assert u in nodes and v in nodes, f"Nodes\
|
|
101
|
+
{u} and {v} are not in the same graph"
|
|
102
|
+
return True
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
graph_edges = [(u, v) for u, v in edges if graph_has_edge(u, v)]
|
|
106
|
+
g = nx.Graph(graph_edges)
|
|
107
|
+
if has_intrinsic_filtration:
|
|
108
|
+
node_attrs = {node: F[node] for node in nodes}
|
|
109
|
+
nx.set_node_attributes(g, node_attrs, "intrinsic")
|
|
110
|
+
graphs.append(g)
|
|
111
|
+
return graphs, labels
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_graphs(dataset: str, N: int | str = "") -> tuple[list[nx.Graph], list[int]]:
|
|
115
|
+
_check_installed(dataset)
|
|
116
|
+
graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
|
|
117
|
+
labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
|
|
118
|
+
if not exists(graphs_path) or not exists(labels_path):
|
|
119
|
+
if dataset.startswith("3dshapes/"):
|
|
120
|
+
return get_from_file_old(
|
|
121
|
+
dataset,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
graphs, labels = get_from_file(
|
|
125
|
+
dataset,
|
|
126
|
+
)
|
|
127
|
+
print("Saving graphs at :", graphs_path)
|
|
128
|
+
set_graphs(graphs=graphs, labels=labels, dataset=dataset)
|
|
129
|
+
else:
|
|
130
|
+
graphs = pickle.load(open(graphs_path, "rb"))
|
|
131
|
+
labels = pickle.load(open(labels_path, "rb"))
|
|
132
|
+
from sklearn.preprocessing import LabelEncoder
|
|
133
|
+
|
|
134
|
+
return graphs, LabelEncoder().fit_transform(labels)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# saves graphs (and filtration values) into a file
|
|
138
|
+
def set_graphs(graphs: list[nx.Graph], labels: list, dataset: str, N: int | str = ""):
|
|
139
|
+
graphs_path = f"{DATASET_PATH}{dataset}/graphs{N}.pkl"
|
|
140
|
+
labels_path = f"{DATASET_PATH}{dataset}/labels{N}.pkl"
|
|
141
|
+
pickle.dump(graphs, open(graphs_path, "wb"))
|
|
142
|
+
pickle.dump(labels, open(labels_path, "wb"))
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def reset_graphs(dataset: str, N=None): # Resets filtrations values on graphs
|
|
147
|
+
graphs, labels = get_from_file(dataset)
|
|
148
|
+
set_graphs(graphs, labels, dataset)
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def compute_ricci(graphs: list[nx.Graph], alpha=0.5, progress=1):
|
|
153
|
+
from GraphRicciCurvature.OllivierRicci import OllivierRicci
|
|
154
|
+
|
|
155
|
+
def ricci(graph, alpha=alpha):
|
|
156
|
+
return OllivierRicci(graph, alpha=alpha).compute_ricci_curvature()
|
|
157
|
+
|
|
158
|
+
graphs = [
|
|
159
|
+
ricci(g) for g in tqdm(graphs, disable=not progress, desc="Computing ricci")
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
def push_back_node(graph):
|
|
163
|
+
# for node in graph.nodes:
|
|
164
|
+
# graph.nodes[node]['ricciCurvature'] = np.min([graph[node][node2]['ricciCurvature'] for node2 in graph[node]] + [graph.nodes[node]['ricciCurvature']])
|
|
165
|
+
node_filtrations = {
|
|
166
|
+
node: -1
|
|
167
|
+
if len(graph[node]) == 0
|
|
168
|
+
else np.min([graph[node][node2]["ricciCurvature"] for node2 in graph[node]])
|
|
169
|
+
for node in graph.nodes
|
|
170
|
+
}
|
|
171
|
+
nx.set_node_attributes(graph, node_filtrations, "ricciCurvature")
|
|
172
|
+
return graph
|
|
173
|
+
|
|
174
|
+
graphs = [push_back_node(g) for g in graphs]
|
|
175
|
+
return graphs
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def compute_cc(graphs: list[nx.Graph], progress=1):
|
|
179
|
+
def _cc(g):
|
|
180
|
+
cc = nx.closeness_centrality(g)
|
|
181
|
+
nx.set_node_attributes(g, cc, "cc")
|
|
182
|
+
edges_cc = {(u, v): max(cc[u], cc[v]) for u, v in g.edges}
|
|
183
|
+
nx.set_edge_attributes(g, edges_cc, "cc")
|
|
184
|
+
return g
|
|
185
|
+
|
|
186
|
+
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
187
|
+
delayed(_cc)(g) for g in tqdm(graphs, disable=not progress, desc="Computing cc")
|
|
188
|
+
)
|
|
189
|
+
return graphs
|
|
190
|
+
# for g in tqdm(graphs, desc="Computing cc"):
|
|
191
|
+
# _cc(g)
|
|
192
|
+
# return graphs
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def compute_degree(graphs: list[nx.Graph], progress=1):
|
|
196
|
+
def _degree(g):
|
|
197
|
+
degrees = {i: 1.1 if degree == 0 else 1 / degree for i, degree in g.degree}
|
|
198
|
+
nx.set_node_attributes(g, degrees, "degree")
|
|
199
|
+
edges_dg = {(u, v): max(degrees[u], degrees[v]) for u, v in g.edges}
|
|
200
|
+
nx.set_edge_attributes(g, edges_dg, "degree")
|
|
201
|
+
return g
|
|
202
|
+
|
|
203
|
+
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
204
|
+
delayed(_degree)(g)
|
|
205
|
+
for g in tqdm(graphs, disable=not progress, desc="Computing degree")
|
|
206
|
+
)
|
|
207
|
+
return graphs
|
|
208
|
+
# for g in tqdm(graphs, desc="Computing degree"):
|
|
209
|
+
# _degree(g)
|
|
210
|
+
# return graphs
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# TODO : make it compatible with non-connexe graphs
|
|
214
|
+
def compute_fiedler(graphs: list[nx.Graph], progress=1):
|
|
215
|
+
def _fiedler(g):
|
|
216
|
+
connected_graphs = [
|
|
217
|
+
nx.subgraph(g, nodes) for nodes in nx.connected_components(g)
|
|
218
|
+
]
|
|
219
|
+
fiedler_vectors = [
|
|
220
|
+
nx.fiedler_vector(g) ** 2
|
|
221
|
+
if g.number_of_nodes() > 2
|
|
222
|
+
else np.zeros(
|
|
223
|
+
g.number_of_nodes(
|
|
224
|
+
# order of nx.fiedler_vector correspond to nx.laplacian -> g.nodes
|
|
225
|
+
)
|
|
226
|
+
)
|
|
227
|
+
for g in connected_graphs
|
|
228
|
+
]
|
|
229
|
+
fiedler_dict = {
|
|
230
|
+
node: fiedler_vector[node_index]
|
|
231
|
+
for g, fiedler_vector in zip(connected_graphs, fiedler_vectors)
|
|
232
|
+
for node_index, node in enumerate(list(g.nodes))
|
|
233
|
+
}
|
|
234
|
+
nx.set_node_attributes(g, fiedler_dict, "fiedler")
|
|
235
|
+
edges_fiedler = {
|
|
236
|
+
(u, v): max(fiedler_dict[u], fiedler_dict[v]) for u, v in g.edges
|
|
237
|
+
}
|
|
238
|
+
nx.set_edge_attributes(g, edges_fiedler, "fiedler")
|
|
239
|
+
return g
|
|
240
|
+
|
|
241
|
+
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
242
|
+
delayed(_fiedler)(g)
|
|
243
|
+
for g in tqdm(graphs, disable=not progress, desc="Computing fiedler")
|
|
244
|
+
)
|
|
245
|
+
return graphs
|
|
246
|
+
# for g in tqdm(graphs, desc="Computing fiedler"):
|
|
247
|
+
# _fiedler(g)
|
|
248
|
+
# return graphs
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def compute_hks(graphs: list[nx.Graph], t: float, progress=1):
|
|
252
|
+
def _hks(g: nx.Graph):
|
|
253
|
+
w, vps = np.linalg.eig(
|
|
254
|
+
nx.laplacianmatrix.normalized_laplacian_matrix(
|
|
255
|
+
g, nodelist=g.nodes()
|
|
256
|
+
).toarray()
|
|
257
|
+
) # order is given by g.nodes order
|
|
258
|
+
w = w.view(dtype=float)
|
|
259
|
+
vps = vps.view(dtype=float)
|
|
260
|
+
node_hks = {
|
|
261
|
+
node: np.sum(np.exp(-t * w) * np.square(vps[node_index, :]))
|
|
262
|
+
for node_index, node in enumerate(g.nodes)
|
|
263
|
+
}
|
|
264
|
+
nx.set_node_attributes(g, node_hks, f"hks_{t}")
|
|
265
|
+
edges_hks = {(u, v): max(node_hks[u], node_hks[v]) for u, v in g.edges}
|
|
266
|
+
nx.set_edge_attributes(g, edges_hks, f"hks_{t}")
|
|
267
|
+
return g
|
|
268
|
+
|
|
269
|
+
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
270
|
+
delayed(_hks)(g)
|
|
271
|
+
for g in tqdm(graphs, disable=not progress, desc=f"Computing hks_{t}")
|
|
272
|
+
)
|
|
273
|
+
return graphs
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def compute_geodesic(graphs: list[nx.Graph], progress=1):
|
|
277
|
+
def _f(g: nx.Graph):
|
|
278
|
+
try:
|
|
279
|
+
nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
|
|
280
|
+
except:
|
|
281
|
+
warn(
|
|
282
|
+
"This graph doesn't have an intrinsic filtration, will use 0 instead ..."
|
|
283
|
+
)
|
|
284
|
+
nodes_intrinsic = {i: 0 for i, n in g.nodes.data()}
|
|
285
|
+
# return g
|
|
286
|
+
node_geodesic = {i: 0 for i in g.nodes}
|
|
287
|
+
nx.set_node_attributes(g, node_geodesic, f"geodesic")
|
|
288
|
+
edges_geodesic = {
|
|
289
|
+
(u, v): np.linalg.norm(nodes_intrinsic[u] - nodes_intrinsic[v])
|
|
290
|
+
for u, v in g.edges
|
|
291
|
+
}
|
|
292
|
+
nx.set_edge_attributes(g, edges_geodesic, f"geodesic")
|
|
293
|
+
return g
|
|
294
|
+
|
|
295
|
+
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
296
|
+
delayed(_f)(g)
|
|
297
|
+
for g in tqdm(
|
|
298
|
+
graphs, disable=not progress, desc=f"Computing geodesic distances on graphs"
|
|
299
|
+
)
|
|
300
|
+
)
|
|
301
|
+
return graphs
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def compute_intrinsic(graphs: list[nx.Graph], progress=1, nowarning=False):
|
|
305
|
+
def _f(g: nx.Graph):
|
|
306
|
+
try:
|
|
307
|
+
nodes_intrinsic = {i: n["intrinsic"] for i, n in g.nodes.data()}
|
|
308
|
+
except:
|
|
309
|
+
if not nowarning:
|
|
310
|
+
raise ValueError("This graph doesn't have an intrinsic filtration.")
|
|
311
|
+
else:
|
|
312
|
+
return g
|
|
313
|
+
edges_intrinsic = {
|
|
314
|
+
(u, v): np.max([nodes_intrinsic[u], nodes_intrinsic[v]], axis=0)
|
|
315
|
+
for u, v in g.edges
|
|
316
|
+
}
|
|
317
|
+
nx.set_edge_attributes(g, edges_intrinsic, "intrinsic")
|
|
318
|
+
return g
|
|
319
|
+
|
|
320
|
+
graphs = Parallel(n_jobs=1, prefer="threads")(
|
|
321
|
+
delayed(_f)(g)
|
|
322
|
+
for g in tqdm(
|
|
323
|
+
graphs, disable=not progress, desc="Computing geodesic distances on graphs"
|
|
324
|
+
)
|
|
325
|
+
)
|
|
326
|
+
return graphs
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def compute_filtration(dataset: str, filtration: str = "ALL", **kwargs):
|
|
330
|
+
if filtration == "ALL":
|
|
331
|
+
reset_graphs(dataset) # not necessary
|
|
332
|
+
graphs, labels = get_graphs(dataset, **kwargs)
|
|
333
|
+
graphs = compute_intrinsic(graphs, nowarning=True)
|
|
334
|
+
graphs = compute_geodesic(graphs)
|
|
335
|
+
graphs = compute_cc(graphs)
|
|
336
|
+
graphs = compute_degree(graphs)
|
|
337
|
+
graphs = compute_ricci(graphs)
|
|
338
|
+
graphs = compute_fiedler(graphs)
|
|
339
|
+
graphs = compute_hks(graphs, 10)
|
|
340
|
+
set_graphs(graphs=graphs, labels=labels, dataset=dataset)
|
|
341
|
+
return
|
|
342
|
+
graphs, labels = get_graphs(dataset, **kwargs)
|
|
343
|
+
if filtration == "dijkstra":
|
|
344
|
+
return
|
|
345
|
+
elif filtration == "cc":
|
|
346
|
+
graphs = compute_cc(graphs)
|
|
347
|
+
elif filtration == "degree":
|
|
348
|
+
graphs = compute_degree(graphs)
|
|
349
|
+
elif filtration == "ricciCurvature":
|
|
350
|
+
graphs = compute_ricci(graphs)
|
|
351
|
+
elif filtration == "fiedler":
|
|
352
|
+
graphs = compute_fiedler(graphs)
|
|
353
|
+
elif filtration == "geodesic":
|
|
354
|
+
graphs = compute_geodesic(graphs)
|
|
355
|
+
elif filtration.startswith("hks_"):
|
|
356
|
+
# don't want do deal with floats, makes dots in title...
|
|
357
|
+
t = int(filtration[4:])
|
|
358
|
+
graphs = compute_hks(graphs=graphs, t=t)
|
|
359
|
+
else:
|
|
360
|
+
warn(f"Filtration {filtration} not implemented !")
|
|
361
|
+
return
|
|
362
|
+
set_graphs(graphs=graphs, labels=labels, dataset=dataset)
|
|
363
|
+
return
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
class Graph2SimplexTrees(BaseEstimator, TransformerMixin):
|
|
367
|
+
"""
|
|
368
|
+
Transforms a list of networkx graphs into a list of simplextree multi
|
|
369
|
+
|
|
370
|
+
Usual Filtrations
|
|
371
|
+
-----------------
|
|
372
|
+
- "cc" closeness centrality
|
|
373
|
+
- "geodesic" if the graph provides data to compute it, e.g., BZR, COX2, PROTEINS
|
|
374
|
+
- "degree"
|
|
375
|
+
- "ricciCurvature" the ricci curvature
|
|
376
|
+
- "fiedler" the square of the fiedler vector
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
def __init__(
|
|
380
|
+
self,
|
|
381
|
+
filtrations=[],
|
|
382
|
+
delayed=False,
|
|
383
|
+
num_collapses=100,
|
|
384
|
+
progress: bool = False,
|
|
385
|
+
):
|
|
386
|
+
super().__init__()
|
|
387
|
+
self.filtrations = filtrations # filtration to search in graph
|
|
388
|
+
self.delayed = delayed # reverses the filtration #TODO
|
|
389
|
+
self.num_collapses = num_collapses
|
|
390
|
+
self.progress = progress
|
|
391
|
+
self.num_parameters: int = len(filtrations)
|
|
392
|
+
|
|
393
|
+
def fit(self, X, y=None):
|
|
394
|
+
if len(X) == 0:
|
|
395
|
+
return self
|
|
396
|
+
self.num_parameters = len(self.filtrations)
|
|
397
|
+
if "intrinsic" in self.filtrations:
|
|
398
|
+
intrinsic_size = len(X[0].nodes[0]["intrinsic"])
|
|
399
|
+
self.num_parameters += intrinsic_size - 1
|
|
400
|
+
return self
|
|
401
|
+
|
|
402
|
+
def transform(self, X: list[nx.Graph]):
|
|
403
|
+
def todo(graph, filtrations=self.filtrations) -> list[mp.SimplexTreeMulti]:
|
|
404
|
+
st = mp.SimplexTreeMulti(num_parameters=self.num_parameters)
|
|
405
|
+
nodes = np.asarray(graph.nodes, dtype=int).reshape(1, -1)
|
|
406
|
+
nodes_filtrations = np.asarray(
|
|
407
|
+
[
|
|
408
|
+
[
|
|
409
|
+
filtration
|
|
410
|
+
for filtration_ in filtrations
|
|
411
|
+
for filtration in np.asarray(
|
|
412
|
+
graph.nodes[node][filtration_]
|
|
413
|
+
).reshape(-1)
|
|
414
|
+
# this reshape is for attributes that are vectors
|
|
415
|
+
]
|
|
416
|
+
for node in graph.nodes
|
|
417
|
+
],
|
|
418
|
+
dtype=np.float32,
|
|
419
|
+
)
|
|
420
|
+
st.insert_batch(nodes, nodes_filtrations)
|
|
421
|
+
edges = np.asarray(graph.edges, dtype=int).T
|
|
422
|
+
edges_filtrations = np.asarray(
|
|
423
|
+
[
|
|
424
|
+
[
|
|
425
|
+
filtration
|
|
426
|
+
for filtration_ in filtrations
|
|
427
|
+
for filtration in np.asarray(graph[u][v][filtration_]).reshape(
|
|
428
|
+
-1
|
|
429
|
+
)
|
|
430
|
+
# this reshape is for attributes that are vectors
|
|
431
|
+
]
|
|
432
|
+
for u, v in graph.edges
|
|
433
|
+
],
|
|
434
|
+
dtype=np.float32,
|
|
435
|
+
)
|
|
436
|
+
st.insert_batch(edges, edges_filtrations)
|
|
437
|
+
if st.num_parameters == 2:
|
|
438
|
+
# TODO : wait for a filtration domination update
|
|
439
|
+
st.collapse_edges(num=self.num_collapses)
|
|
440
|
+
# st.make_filtration_non_decreasing() ## Ricci is not safe ...
|
|
441
|
+
# same output for each pipelines, some have a supplementary axis.
|
|
442
|
+
return [st]
|
|
443
|
+
|
|
444
|
+
return (
|
|
445
|
+
[delayed(todo)(graph) for graph in X]
|
|
446
|
+
if self.delayed
|
|
447
|
+
# memory bound imo
|
|
448
|
+
else [
|
|
449
|
+
todo(graph=graph)
|
|
450
|
+
for graph in tqdm(
|
|
451
|
+
X,
|
|
452
|
+
desc="Computing simplextrees from graphs",
|
|
453
|
+
disable=not self.progress,
|
|
454
|
+
)
|
|
455
|
+
]
|
|
456
|
+
# # ,
|
|
457
|
+
# )
|
|
458
|
+
# else Parallel(n_jobs=-1, prefer="threads")(
|
|
459
|
+
# delayed(todo)(graph)
|
|
460
|
+
# for graph in tqdm(
|
|
461
|
+
# X,
|
|
462
|
+
# desc="Computing simplextrees from graphs",
|
|
463
|
+
# disable=not self.progress,
|
|
464
|
+
# )
|
|
465
|
+
# )
|
|
466
|
+
)
|