scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,621 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Dec 5, 2018
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ Nathan de Lara <nathan.delara@polytechnique.org>
7
+ Thomas Bonald <bonald@enst.fr>
8
+ """
9
+
10
+ from csv import reader
11
+ from typing import Dict, List, Tuple, Union
12
+ from xml.etree import ElementTree
13
+
14
+ import numpy as np
15
+ from scipy import sparse
16
+
17
+ from sknetwork.utils import Bunch
18
+ from sknetwork.utils.format import directed2undirected
19
+
20
+
21
+ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
22
+ bipartite: bool = False, weighted: bool = True, reindex: bool = True,
23
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
24
+ """Load a graph from an edge list.
25
+
26
+ Parameters
27
+ ----------
28
+ edge_list : Union[np.ndarray, List[Tuple]]
29
+ The edge list to convert, given as a NumPy array of size (n, 2) or (n, 3) or a list of tuples of
30
+ length 2 or 3.
31
+ directed : bool
32
+ If ``True``, considers the graph as directed.
33
+ bipartite : bool
34
+ If ``True``, returns a biadjacency matrix.
35
+ weighted : bool
36
+ If ``True``, returns a weighted graph.
37
+ reindex : bool
38
+ If ``True``, reindex nodes and returns the original node indices as names.
39
+ Reindexing is enforced if nodes are not integers.
40
+ sum_duplicates : bool
41
+ If ``True`` (default), sums weights of duplicate edges.
42
+ Otherwise, the weight of each edge is that of the first occurrence of this edge.
43
+ matrix_only : bool
44
+ If ``True``, returns only the adjacency or biadjacency matrix.
45
+ Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
46
+ If not specified (default), selects the most appropriate format.
47
+ Returns
48
+ -------
49
+ graph : :class:`Bunch` (including node names) or sparse matrix
50
+
51
+ Examples
52
+ --------
53
+ >>> edges = [(0, 1), (1, 2), (2, 0)]
54
+ >>> adjacency = from_edge_list(edges)
55
+ >>> adjacency.shape
56
+ (3, 3)
57
+ >>> edges = [('Alice', 'Bob'), ('Bob', 'Carol'), ('Carol', 'Alice')]
58
+ >>> graph = from_edge_list(edges)
59
+ >>> adjacency = graph.adjacency
60
+ >>> adjacency.shape
61
+ (3, 3)
62
+ >>> print(graph.names)
63
+ ['Alice' 'Bob' 'Carol']
64
+ """
65
+ edge_array = np.array([])
66
+ weights = None
67
+ if isinstance(edge_list, list):
68
+ try:
69
+ edge_array = np.array([[edge[0], edge[1]] for edge in edge_list])
70
+ if len(edge_list) and len(edge_list[0]) == 3:
71
+ weights = np.array([edge[2] for edge in edge_list])
72
+ else:
73
+ raise ValueError()
74
+ except ValueError:
75
+ ValueError('Edges must be given as tuples of fixed size (2 or 3).')
76
+ elif isinstance(edge_list, np.ndarray):
77
+ if edge_list.ndim != 2 or edge_list.shape[1] not in [2, 3]:
78
+ raise ValueError('The edge list must be given as an array of shape (n_edges, 2) or '
79
+ '(n_edges, 3).')
80
+ edge_array = edge_list[:, :2]
81
+ if edge_list.shape[1] == 3:
82
+ weights = edge_list[:, 2]
83
+ else:
84
+ raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
85
+ return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
86
+ weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
87
+
88
+
89
+ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
90
+ bipartite: bool = False, weighted: bool = True, reindex: bool = True,
91
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
92
+ """Load a graph from an adjacency list.
93
+
94
+ Parameters
95
+ ----------
96
+ adjacency_list : Union[List[List], Dict[str, List]]
97
+ Adjacency list (neighbors of each node) or dictionary (node: neighbors).
98
+ directed : bool
99
+ If ``True``, considers the graph as directed.
100
+ bipartite : bool
101
+ If ``True``, returns a biadjacency matrix.
102
+ weighted : bool
103
+ If ``True``, returns a weighted graph.
104
+ reindex : bool
105
+ If ``True``, reindex nodes and returns the original node indices as names.
106
+ Reindexing is enforced if nodes are not integers.
107
+ sum_duplicates : bool
108
+ If ``True`` (default), sums weights of duplicate edges.
109
+ Otherwise, the weight of each edge is that of the first occurrence of this edge.
110
+ matrix_only : bool
111
+ If ``True``, returns only the adjacency or biadjacency matrix.
112
+ Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
113
+ If not specified (default), selects the most appropriate format.
114
+ Returns
115
+ -------
116
+ graph : :class:`Bunch` or sparse matrix
117
+
118
+ Example
119
+ -------
120
+ >>> edges = [[1, 2], [0, 2, 3], [0, 1]]
121
+ >>> adjacency = from_adjacency_list(edges)
122
+ >>> adjacency.shape
123
+ (4, 4)
124
+ """
125
+ edge_list = []
126
+ if isinstance(adjacency_list, list):
127
+ for i, neighbors in enumerate(adjacency_list):
128
+ for j in neighbors:
129
+ edge_list.append((i, j))
130
+ elif isinstance(adjacency_list, dict):
131
+ for i, neighbors in adjacency_list.items():
132
+ for j in neighbors:
133
+ edge_list.append((i, j))
134
+ else:
135
+ raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
136
+ return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
137
+ reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
138
+
139
+
140
+ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
141
+ weighted: bool = True, reindex: bool = True, sum_duplicates: bool = True,
142
+ matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
143
+ """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
144
+
145
+ Parameters
146
+ ----------
147
+ edge_array : np.ndarray
148
+ Array of edges.
149
+ weights : np.ndarray
150
+ Array of weights.
151
+ directed : bool
152
+ If ``True``, considers the graph as directed.
153
+ bipartite : bool
154
+ If ``True``, returns a biadjacency matrix.
155
+ weighted : bool
156
+ If ``True``, returns a weighted graph.
157
+ reindex : bool
158
+ If ``True``, reindex nodes and returns the original node indices as names.
159
+ Reindexing is enforced if nodes are not integers.
160
+ sum_duplicates : bool
161
+ If ``True`` (default), sums weights of duplicate edges.
162
+ Otherwise, the weight of each edge is that of the first occurrence of this edge.
163
+ matrix_only : bool
164
+ If ``True``, returns only the adjacency or biadjacency matrix.
165
+ Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
166
+ If not specified (default), selects the most appropriate format.
167
+
168
+ Returns
169
+ -------
170
+ graph : :class:`Bunch` or sparse matrix
171
+ """
172
+ try:
173
+ edge_array = edge_array.astype(float)
174
+ except ValueError:
175
+ pass
176
+ if edge_array.dtype == float and (edge_array == edge_array.astype(int)).all():
177
+ edge_array = edge_array.astype(int)
178
+ if weights is None:
179
+ weights = np.ones(len(edge_array))
180
+ if weights.dtype not in [bool, int, float]:
181
+ try:
182
+ weights = weights.astype(float)
183
+ except ValueError:
184
+ raise ValueError('Weights must be numeric.')
185
+ if all(weights == weights.astype(int)):
186
+ weights = weights.astype(int)
187
+ if not weighted:
188
+ weights = weights.astype(bool)
189
+
190
+ if not sum_duplicates:
191
+ _, index = np.unique(edge_array, axis=0, return_index=True)
192
+ edge_array = edge_array[index]
193
+ weights = weights[index]
194
+ graph = Bunch()
195
+ if bipartite:
196
+ row = edge_array[:, 0]
197
+ col = edge_array[:, 1]
198
+ if row.dtype != int or (reindex and len(set(row)) < max(row) + 1):
199
+ names_row, row = np.unique(row, return_inverse=True)
200
+ graph.names_row = names_row
201
+ graph.names = names_row
202
+ n_row = len(names_row)
203
+ else:
204
+ n_row = max(row) + 1
205
+ if col.dtype != int or (reindex and len(set(col)) < max(col) + 1):
206
+ names_col, col = np.unique(col, return_inverse=True)
207
+ graph.names_col = names_col
208
+ n_col = len(names_col)
209
+ else:
210
+ n_col = max(col) + 1
211
+ matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
212
+ graph.biadjacency = matrix
213
+ else:
214
+ nodes = edge_array.ravel()
215
+ if nodes.dtype != int or (reindex and len(set(nodes)) < max(nodes) + 1):
216
+ names, nodes = np.unique(nodes, return_inverse=True)
217
+ graph.names = names
218
+ n = len(names)
219
+ edge_array = nodes.reshape(-1, 2)
220
+ else:
221
+ n = max(nodes) + 1
222
+ row = edge_array[:, 0]
223
+ col = edge_array[:, 1]
224
+ matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
225
+ if not directed:
226
+ matrix = directed2undirected(matrix)
227
+ graph.adjacency = matrix
228
+ if matrix_only or (matrix_only is None and len(graph) == 1):
229
+ return matrix
230
+ else:
231
+ return graph
232
+
233
+
234
+ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
235
+ data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
236
+ reindex: bool = True, sum_duplicates: bool = True, matrix_only: bool = None) \
237
+ -> Union[Bunch, sparse.csr_matrix]:
238
+ """Load a graph from a CSV or TSV file.
239
+ The delimiter can be specified (e.g., ' ' for space-separated values).
240
+
241
+ Parameters
242
+ ----------
243
+ file_path : str
244
+ Path to the CSV file.
245
+ delimiter : str
246
+ Delimiter used in the file. Guessed if not specified.
247
+ sep : str
248
+ Alias for delimiter.
249
+ comments : str
250
+ Characters for comment lines.
251
+ data_structure : str
252
+ If 'edge_list', considers each row of the file as an edge (tuple of size 2 or 3).
253
+ If 'adjacency_list', considers each row of the file as an adjacency list (list of neighbors).
254
+ If 'adjacency_dict', considers each row of the file as an adjacency dictionary with key
255
+ given by the first column (node: list of neighbors).
256
+ If ``None`` (default), data_structure is guessed from the first rows of the file.
257
+ directed : bool
258
+ If ``True``, considers the graph as directed.
259
+ bipartite : bool
260
+ If ``True``, returns a biadjacency matrix of shape (n1, n2).
261
+ weighted : bool
262
+ If ``True``, returns a weighted graph (e.g., counts the number of occurrences of each edge).
263
+ reindex : bool
264
+ If ``True``, reindex nodes and returns the original node indices as names.
265
+ Reindexing is enforced if nodes are not integers.
266
+ sum_duplicates : bool
267
+ If ``True`` (default), sums weights of duplicate edges.
268
+ Otherwise, the weight of each edge is that of the first occurrence of this edge.
269
+ matrix_only : bool
270
+ If ``True``, returns only the adjacency or biadjacency matrix.
271
+ Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
272
+ If not specified (default), selects the most appropriate format.
273
+
274
+ Returns
275
+ -------
276
+ graph: :class:`Bunch` or sparse matrix
277
+ """
278
+ header_length, delimiter_guess, comment_guess, data_structure_guess = scan_header(file_path, delimiters=delimiter,
279
+ comments=comments)
280
+ if delimiter is None:
281
+ if sep is not None:
282
+ delimiter = sep
283
+ else:
284
+ delimiter = delimiter_guess
285
+ if data_structure is None:
286
+ data_structure = data_structure_guess
287
+ if data_structure == 'edge_list':
288
+ try:
289
+ array = np.genfromtxt(file_path, delimiter=delimiter, comments=comment_guess)
290
+ if np.isnan(array).any():
291
+ raise TypeError()
292
+ edge_array = array[:, :2].astype(int)
293
+ if array.shape[1] == 3:
294
+ weights = array[:, 2]
295
+ else:
296
+ weights = None
297
+ return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
298
+ weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
299
+ matrix_only=matrix_only)
300
+ except TypeError:
301
+ pass
302
+ with open(file_path, 'r', encoding='utf-8') as f:
303
+ for i in range(header_length):
304
+ f.readline()
305
+ csv_reader = reader(f, delimiter=delimiter)
306
+ if data_structure == 'edge_list':
307
+ edge_list = [tuple(row) for row in csv_reader]
308
+ return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
309
+ weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
310
+ matrix_only=matrix_only)
311
+ elif data_structure == 'adjacency_list':
312
+ adjacency_list = [row for row in csv_reader]
313
+ return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
314
+ weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
315
+ matrix_only=matrix_only)
316
+ elif data_structure == 'adjacency_dict':
317
+ adjacency_list = {row[0]: row[1:] for row in csv_reader}
318
+ return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
319
+ weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
320
+ matrix_only=matrix_only)
321
+
322
+
323
+ def scan_header(file_path: str, delimiters: str = None, comments: str = '#%', n_scan: int = 100):
324
+ """Infer some properties of the graph from the first lines of a CSV file .
325
+ Parameters
326
+ ----------
327
+ file_path : str
328
+ Path to the CSV file.
329
+ delimiters : str
330
+ Possible delimiters.
331
+ comments : str
332
+ Possible comment characters.
333
+ n_scan : int
334
+ Number of rows scanned for inference.
335
+
336
+ Returns
337
+ -------
338
+ header_length : int
339
+ Length of the header (comments and blank lines)
340
+ delimiter_guess : str
341
+ Guessed delimiter.
342
+ comment_guess : str
343
+ Guessed comment character.
344
+ data_structure_guess : str
345
+ Either 'edge_list' or 'adjacency_list'.
346
+ """
347
+ header_length = 0
348
+ if delimiters is None:
349
+ delimiters = '\t,; '
350
+ comment_guess = comments[0]
351
+ count = {delimiter: [] for delimiter in delimiters}
352
+ rows = []
353
+ with open(file_path, 'r', encoding='utf-8') as f:
354
+ for row in f.readlines():
355
+ if row.startswith(tuple(comments)) or row == '':
356
+ if len(row):
357
+ comment_guess = row[0]
358
+ header_length += 1
359
+ else:
360
+ rows.append(row.rstrip())
361
+ for delimiter in delimiters:
362
+ count[delimiter].append(row.count(delimiter))
363
+ if len(rows) == n_scan:
364
+ break
365
+ means = [np.mean(count[delimiter]) for delimiter in delimiters]
366
+ stds = [np.std(count[delimiter]) for delimiter in delimiters]
367
+ index = np.argwhere((np.array(means) > 0) * (np.array(stds) == 0)).ravel()
368
+ if len(index) == 1:
369
+ delimiter_guess = delimiters[int(index)]
370
+ else:
371
+ delimiter_guess = delimiters[int(np.argmax(means))]
372
+ length = {len(row.split(delimiter_guess)) for row in rows}
373
+ if length == {2} or length == {3}:
374
+ data_structure_guess = 'edge_list'
375
+ else:
376
+ data_structure_guess = 'adjacency_list'
377
+ return header_length, delimiter_guess, comment_guess, data_structure_guess
378
+
379
+
380
+ def load_labels(file: str) -> np.ndarray:
381
+ """Parser for files with a single entry on each row.
382
+
383
+ Parameters
384
+ ----------
385
+ file : str
386
+ The path to the dataset
387
+
388
+ Returns
389
+ -------
390
+ labels: np.ndarray
391
+ Labels.
392
+ """
393
+ rows = []
394
+ with open(file, 'r', encoding='utf-8') as f:
395
+ for row in f:
396
+ rows.append(row.strip())
397
+ return np.array(rows)
398
+
399
+
400
+ def load_header(file: str):
401
+ """Check if the graph is directed, bipartite, weighted."""
402
+ directed, bipartite, weighted = False, False, True
403
+ with open(file, 'r', encoding='utf-8') as f:
404
+ row = f.readline()
405
+ if 'bip' in row:
406
+ bipartite = True
407
+ if 'unweighted' in row:
408
+ weighted = False
409
+ if 'asym' in row:
410
+ directed = True
411
+ return directed, bipartite, weighted
412
+
413
+
414
+ def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
415
+ """Extract metadata from the file."""
416
+ metadata = Bunch()
417
+ with open(file, 'r', encoding='utf-8') as f:
418
+ for row in f:
419
+ parts = row.split(delimiter)
420
+ key, value = parts[0], ': '.join(parts[1:]).strip('\n')
421
+ metadata[key] = value
422
+ return metadata
423
+
424
+
425
+ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Bunch:
426
+ """Load graph from GraphML file.
427
+
428
+ Hyperedges and nested graphs are not supported.
429
+
430
+ Parameters
431
+ ----------
432
+ file_path: str
433
+ Path to the GraphML file.
434
+ weight_key: str
435
+ The key to be used as a value for edge weights
436
+ max_string_size: int
437
+ The maximum size for string features of the data
438
+
439
+ Returns
440
+ -------
441
+ data: :class:`Bunch`
442
+ The dataset in a bunch with the adjacency as a CSR matrix.
443
+ """
444
+ # see http://graphml.graphdrawing.org/primer/graphml-primer.html
445
+ # and http://graphml.graphdrawing.org/specification/dtd.html#top
446
+ tree = ElementTree.parse(file_path)
447
+ n_nodes = 0
448
+ n_edges = 0
449
+ symmetrize = None
450
+ naming_nodes = True
451
+ default_weight = 1
452
+ weight_type = bool
453
+ weight_id = None
454
+ # indices in the graph tree
455
+ node_indices = []
456
+ edge_indices = []
457
+ data = Bunch()
458
+ graph = None
459
+ file_description = None
460
+ attribute_descriptions = Bunch()
461
+ attribute_descriptions.node = Bunch()
462
+ attribute_descriptions.edge = Bunch()
463
+ keys = {}
464
+ for file_element in tree.getroot():
465
+ if file_element.tag.endswith('graph'):
466
+ graph = file_element
467
+ symmetrize = (graph.attrib['edgedefault'] == 'undirected')
468
+ for index, element in enumerate(graph):
469
+ if element.tag.endswith('node'):
470
+ node_indices.append(index)
471
+ n_nodes += 1
472
+ elif element.tag.endswith('edge'):
473
+ edge_indices.append(index)
474
+ if 'directed' in element.attrib:
475
+ if element.attrib['directed'] == 'true':
476
+ n_edges += 1
477
+ else:
478
+ n_edges += 2
479
+ elif symmetrize:
480
+ n_edges += 2
481
+ else:
482
+ n_edges += 1
483
+ if 'parse.nodeids' in graph.attrib:
484
+ naming_nodes = not (graph.attrib['parse.nodeids'] == 'canonical')
485
+ for file_element in tree.getroot():
486
+ if file_element.tag.endswith('key'):
487
+ attribute_name = file_element.attrib['attr.name']
488
+ attribute_type = java_type_to_python_type(file_element.attrib['attr.type'])
489
+ if attribute_name == weight_key:
490
+ weight_type = java_type_to_python_type(file_element.attrib['attr.type'])
491
+ weight_id = file_element.attrib['id']
492
+ for key_element in file_element:
493
+ if key_element.tag == 'default':
494
+ default_weight = attribute_type(key_element.text)
495
+ else:
496
+ default_value = None
497
+ if file_element.attrib['for'] == 'node':
498
+ size = n_nodes
499
+ if 'node_attribute' not in data:
500
+ data.node_attribute = Bunch()
501
+ for key_element in file_element:
502
+ if key_element.tag.endswith('desc'):
503
+ attribute_descriptions.node[attribute_name] = key_element.text
504
+ elif key_element.tag.endswith('default'):
505
+ default_value = attribute_type(key_element.text)
506
+ if attribute_type == str:
507
+ local_type = '<U' + str(max_string_size)
508
+ else:
509
+ local_type = attribute_type
510
+ if default_value:
511
+ data.node_attribute[attribute_name] = np.full(size, default_value, dtype=local_type)
512
+ else:
513
+ data.node_attribute[attribute_name] = np.zeros(size, dtype=local_type)
514
+ elif file_element.attrib['for'] == 'edge':
515
+ size = n_edges
516
+ if 'edge_attribute' not in data:
517
+ data.edge_attribute = Bunch()
518
+ for key_element in file_element:
519
+ if key_element.tag.endswith('desc'):
520
+ attribute_descriptions.edge[attribute_name] = key_element.text
521
+ elif key_element.tag.endswith('default'):
522
+ default_value = attribute_type(key_element.text)
523
+ if attribute_type == str:
524
+ local_type = '<U' + str(max_string_size)
525
+ else:
526
+ local_type = attribute_type
527
+ if default_value:
528
+ data.edge_attribute[attribute_name] = np.full(size, default_value, dtype=local_type)
529
+ else:
530
+ data.edge_attribute[attribute_name] = np.zeros(size, dtype=local_type)
531
+ keys[file_element.attrib['id']] = [attribute_name, attribute_type]
532
+ elif file_element.tag.endswith('desc'):
533
+ file_description = file_element.text
534
+ if file_description or attribute_descriptions.node or attribute_descriptions.edge:
535
+ data.meta = Bunch()
536
+ if file_description:
537
+ data.meta['description'] = file_description
538
+ if attribute_descriptions.node or attribute_descriptions.edge:
539
+ data.meta['attributes'] = attribute_descriptions
540
+ if graph is not None:
541
+ row = np.zeros(n_edges, dtype=int)
542
+ col = np.zeros(n_edges, dtype=int)
543
+ dat = np.full(n_edges, default_weight, dtype=weight_type)
544
+ data.names = None
545
+ if naming_nodes:
546
+ data.names = np.zeros(n_nodes, dtype='<U512')
547
+
548
+ node_map = {}
549
+ # deal with nodes first
550
+ for number, index in enumerate(node_indices):
551
+ node = graph[index]
552
+ if naming_nodes:
553
+ name = node.attrib['id']
554
+ data.names[number] = name
555
+ node_map[name] = number
556
+ for node_attribute in node:
557
+ if node_attribute.tag.endswith('data'):
558
+ data.node_attribute[keys[node_attribute.attrib['key']][0]][number] = \
559
+ keys[node_attribute.attrib['key']][1](node_attribute.text)
560
+ # deal with edges
561
+ edge_index = -1
562
+ for index in edge_indices:
563
+ edge_index += 1
564
+ duplicate = False
565
+ edge = graph[index]
566
+ if naming_nodes:
567
+ node1 = node_map[edge.attrib['source']]
568
+ node2 = node_map[edge.attrib['target']]
569
+ else:
570
+ node1 = int(edge.attrib['source'][1:])
571
+ node2 = int(edge.attrib['target'][1:])
572
+ row[edge_index] = node1
573
+ col[edge_index] = node2
574
+ for edge_attribute in edge:
575
+ if edge_attribute.tag.endswith('data'):
576
+ if edge_attribute.attrib['key'] == weight_id:
577
+ dat[edge_index] = weight_type(edge_attribute.text)
578
+ else:
579
+ data.edge_attribute[keys[edge_attribute.attrib['key']][0]][edge_index] = \
580
+ keys[edge_attribute.attrib['key']][1](edge_attribute.text)
581
+ if 'directed' in edge.attrib:
582
+ if edge.attrib['directed'] != 'true':
583
+ duplicate = True
584
+ elif symmetrize:
585
+ duplicate = True
586
+ if duplicate:
587
+ edge_index += 1
588
+ row[edge_index] = node2
589
+ col[edge_index] = node1
590
+ for edge_attribute in edge:
591
+ if edge_attribute.tag.endswith('data'):
592
+ if edge_attribute.attrib['key'] == weight_id:
593
+ dat[edge_index] = weight_type(edge_attribute.text)
594
+ else:
595
+ data.edge_attribute[keys[edge_attribute.attrib['key']][0]][edge_index] = \
596
+ keys[edge_attribute.attrib['key']][1](edge_attribute.text)
597
+ data.adjacency = sparse.csr_matrix((dat, (row, col)), shape=(n_nodes, n_nodes))
598
+ if data.names is None:
599
+ data.pop('names')
600
+ return data
601
+ else:
602
+ raise ValueError(f'No graph defined in {file_path}.')
603
+
604
+
605
+ def java_type_to_python_type(value: str) -> type:
606
+ if value == 'boolean':
607
+ return bool
608
+ elif value == 'int':
609
+ return int
610
+ elif value == 'string':
611
+ return str
612
+ elif value in ('long', 'float', 'double'):
613
+ return float
614
+
615
+
616
+ def is_number(s):
617
+ try:
618
+ float(s)
619
+ return True
620
+ except ValueError:
621
+ return False