scikit-network 0.33.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (228) hide show
  1. scikit_network-0.33.3.dist-info/METADATA +122 -0
  2. scikit_network-0.33.3.dist-info/RECORD +228 -0
  3. scikit_network-0.33.3.dist-info/WHEEL +5 -0
  4. scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/base.py +67 -0
  9. sknetwork/classification/__init__.py +8 -0
  10. sknetwork/classification/base.py +142 -0
  11. sknetwork/classification/base_rank.py +133 -0
  12. sknetwork/classification/diffusion.py +134 -0
  13. sknetwork/classification/knn.py +139 -0
  14. sknetwork/classification/metrics.py +205 -0
  15. sknetwork/classification/pagerank.py +66 -0
  16. sknetwork/classification/propagation.py +152 -0
  17. sknetwork/classification/tests/__init__.py +1 -0
  18. sknetwork/classification/tests/test_API.py +30 -0
  19. sknetwork/classification/tests/test_diffusion.py +77 -0
  20. sknetwork/classification/tests/test_knn.py +23 -0
  21. sknetwork/classification/tests/test_metrics.py +53 -0
  22. sknetwork/classification/tests/test_pagerank.py +20 -0
  23. sknetwork/classification/tests/test_propagation.py +24 -0
  24. sknetwork/classification/vote.cp313-win_amd64.pyd +0 -0
  25. sknetwork/classification/vote.cpp +27584 -0
  26. sknetwork/classification/vote.pyx +56 -0
  27. sknetwork/clustering/__init__.py +8 -0
  28. sknetwork/clustering/base.py +172 -0
  29. sknetwork/clustering/kcenters.py +253 -0
  30. sknetwork/clustering/leiden.py +242 -0
  31. sknetwork/clustering/leiden_core.cp313-win_amd64.pyd +0 -0
  32. sknetwork/clustering/leiden_core.cpp +31575 -0
  33. sknetwork/clustering/leiden_core.pyx +124 -0
  34. sknetwork/clustering/louvain.py +286 -0
  35. sknetwork/clustering/louvain_core.cp313-win_amd64.pyd +0 -0
  36. sknetwork/clustering/louvain_core.cpp +31220 -0
  37. sknetwork/clustering/louvain_core.pyx +124 -0
  38. sknetwork/clustering/metrics.py +91 -0
  39. sknetwork/clustering/postprocess.py +66 -0
  40. sknetwork/clustering/propagation_clustering.py +104 -0
  41. sknetwork/clustering/tests/__init__.py +1 -0
  42. sknetwork/clustering/tests/test_API.py +38 -0
  43. sknetwork/clustering/tests/test_kcenters.py +60 -0
  44. sknetwork/clustering/tests/test_leiden.py +34 -0
  45. sknetwork/clustering/tests/test_louvain.py +135 -0
  46. sknetwork/clustering/tests/test_metrics.py +50 -0
  47. sknetwork/clustering/tests/test_postprocess.py +39 -0
  48. sknetwork/data/__init__.py +6 -0
  49. sknetwork/data/base.py +33 -0
  50. sknetwork/data/load.py +406 -0
  51. sknetwork/data/models.py +459 -0
  52. sknetwork/data/parse.py +644 -0
  53. sknetwork/data/test_graphs.py +84 -0
  54. sknetwork/data/tests/__init__.py +1 -0
  55. sknetwork/data/tests/test_API.py +30 -0
  56. sknetwork/data/tests/test_base.py +14 -0
  57. sknetwork/data/tests/test_load.py +95 -0
  58. sknetwork/data/tests/test_models.py +52 -0
  59. sknetwork/data/tests/test_parse.py +250 -0
  60. sknetwork/data/tests/test_test_graphs.py +29 -0
  61. sknetwork/data/tests/test_toy_graphs.py +68 -0
  62. sknetwork/data/timeout.py +38 -0
  63. sknetwork/data/toy_graphs.py +611 -0
  64. sknetwork/embedding/__init__.py +8 -0
  65. sknetwork/embedding/base.py +94 -0
  66. sknetwork/embedding/force_atlas.py +198 -0
  67. sknetwork/embedding/louvain_embedding.py +148 -0
  68. sknetwork/embedding/random_projection.py +135 -0
  69. sknetwork/embedding/spectral.py +141 -0
  70. sknetwork/embedding/spring.py +198 -0
  71. sknetwork/embedding/svd.py +359 -0
  72. sknetwork/embedding/tests/__init__.py +1 -0
  73. sknetwork/embedding/tests/test_API.py +49 -0
  74. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  75. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  76. sknetwork/embedding/tests/test_random_projection.py +28 -0
  77. sknetwork/embedding/tests/test_spectral.py +81 -0
  78. sknetwork/embedding/tests/test_spring.py +50 -0
  79. sknetwork/embedding/tests/test_svd.py +43 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +181 -0
  83. sknetwork/gnn/base_activation.py +90 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +305 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/loss.py +180 -0
  88. sknetwork/gnn/neighbor_sampler.py +65 -0
  89. sknetwork/gnn/optimizer.py +164 -0
  90. sknetwork/gnn/tests/__init__.py +1 -0
  91. sknetwork/gnn/tests/test_activation.py +56 -0
  92. sknetwork/gnn/tests/test_base.py +75 -0
  93. sknetwork/gnn/tests/test_base_layer.py +37 -0
  94. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  95. sknetwork/gnn/tests/test_layers.py +80 -0
  96. sknetwork/gnn/tests/test_loss.py +33 -0
  97. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  98. sknetwork/gnn/tests/test_optimizer.py +43 -0
  99. sknetwork/gnn/tests/test_utils.py +41 -0
  100. sknetwork/gnn/utils.py +127 -0
  101. sknetwork/hierarchy/__init__.py +6 -0
  102. sknetwork/hierarchy/base.py +96 -0
  103. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  104. sknetwork/hierarchy/metrics.py +234 -0
  105. sknetwork/hierarchy/paris.cp313-win_amd64.pyd +0 -0
  106. sknetwork/hierarchy/paris.cpp +37868 -0
  107. sknetwork/hierarchy/paris.pyx +316 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +24 -0
  111. sknetwork/hierarchy/tests/test_algos.py +34 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/linalg/__init__.py +9 -0
  115. sknetwork/linalg/basics.py +37 -0
  116. sknetwork/linalg/diteration.cp313-win_amd64.pyd +0 -0
  117. sknetwork/linalg/diteration.cpp +27400 -0
  118. sknetwork/linalg/diteration.pyx +47 -0
  119. sknetwork/linalg/eig_solver.py +93 -0
  120. sknetwork/linalg/laplacian.py +15 -0
  121. sknetwork/linalg/normalizer.py +86 -0
  122. sknetwork/linalg/operators.py +225 -0
  123. sknetwork/linalg/polynome.py +76 -0
  124. sknetwork/linalg/ppr_solver.py +170 -0
  125. sknetwork/linalg/push.cp313-win_amd64.pyd +0 -0
  126. sknetwork/linalg/push.cpp +31072 -0
  127. sknetwork/linalg/push.pyx +71 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +34 -0
  134. sknetwork/linalg/tests/test_operators.py +66 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +2 -0
  140. sknetwork/linkpred/base.py +46 -0
  141. sknetwork/linkpred/nn.py +126 -0
  142. sknetwork/linkpred/tests/__init__.py +1 -0
  143. sknetwork/linkpred/tests/test_nn.py +27 -0
  144. sknetwork/log.py +19 -0
  145. sknetwork/path/__init__.py +5 -0
  146. sknetwork/path/dag.py +54 -0
  147. sknetwork/path/distances.py +98 -0
  148. sknetwork/path/search.py +31 -0
  149. sknetwork/path/shortest_path.py +61 -0
  150. sknetwork/path/tests/__init__.py +1 -0
  151. sknetwork/path/tests/test_dag.py +37 -0
  152. sknetwork/path/tests/test_distances.py +62 -0
  153. sknetwork/path/tests/test_search.py +40 -0
  154. sknetwork/path/tests/test_shortest_path.py +40 -0
  155. sknetwork/ranking/__init__.py +8 -0
  156. sknetwork/ranking/base.py +61 -0
  157. sknetwork/ranking/betweenness.cp313-win_amd64.pyd +0 -0
  158. sknetwork/ranking/betweenness.cpp +9707 -0
  159. sknetwork/ranking/betweenness.pyx +97 -0
  160. sknetwork/ranking/closeness.py +92 -0
  161. sknetwork/ranking/hits.py +94 -0
  162. sknetwork/ranking/katz.py +83 -0
  163. sknetwork/ranking/pagerank.py +110 -0
  164. sknetwork/ranking/postprocess.py +37 -0
  165. sknetwork/ranking/tests/__init__.py +1 -0
  166. sknetwork/ranking/tests/test_API.py +32 -0
  167. sknetwork/ranking/tests/test_betweenness.py +38 -0
  168. sknetwork/ranking/tests/test_closeness.py +30 -0
  169. sknetwork/ranking/tests/test_hits.py +20 -0
  170. sknetwork/ranking/tests/test_pagerank.py +62 -0
  171. sknetwork/ranking/tests/test_postprocess.py +26 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +61 -0
  174. sknetwork/regression/diffusion.py +210 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +32 -0
  177. sknetwork/regression/tests/test_diffusion.py +56 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/test_base.py +35 -0
  180. sknetwork/test_log.py +15 -0
  181. sknetwork/topology/__init__.py +8 -0
  182. sknetwork/topology/cliques.cp313-win_amd64.pyd +0 -0
  183. sknetwork/topology/cliques.cpp +32565 -0
  184. sknetwork/topology/cliques.pyx +149 -0
  185. sknetwork/topology/core.cp313-win_amd64.pyd +0 -0
  186. sknetwork/topology/core.cpp +30651 -0
  187. sknetwork/topology/core.pyx +90 -0
  188. sknetwork/topology/cycles.py +243 -0
  189. sknetwork/topology/minheap.cp313-win_amd64.pyd +0 -0
  190. sknetwork/topology/minheap.cpp +27332 -0
  191. sknetwork/topology/minheap.pxd +20 -0
  192. sknetwork/topology/minheap.pyx +109 -0
  193. sknetwork/topology/structure.py +194 -0
  194. sknetwork/topology/tests/__init__.py +1 -0
  195. sknetwork/topology/tests/test_cliques.py +28 -0
  196. sknetwork/topology/tests/test_core.py +19 -0
  197. sknetwork/topology/tests/test_cycles.py +65 -0
  198. sknetwork/topology/tests/test_structure.py +85 -0
  199. sknetwork/topology/tests/test_triangles.py +38 -0
  200. sknetwork/topology/tests/test_wl.py +72 -0
  201. sknetwork/topology/triangles.cp313-win_amd64.pyd +0 -0
  202. sknetwork/topology/triangles.cpp +8894 -0
  203. sknetwork/topology/triangles.pyx +151 -0
  204. sknetwork/topology/weisfeiler_lehman.py +133 -0
  205. sknetwork/topology/weisfeiler_lehman_core.cp313-win_amd64.pyd +0 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27635 -0
  207. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  208. sknetwork/utils/__init__.py +7 -0
  209. sknetwork/utils/check.py +355 -0
  210. sknetwork/utils/format.py +221 -0
  211. sknetwork/utils/membership.py +82 -0
  212. sknetwork/utils/neighbors.py +115 -0
  213. sknetwork/utils/tests/__init__.py +1 -0
  214. sknetwork/utils/tests/test_check.py +190 -0
  215. sknetwork/utils/tests/test_format.py +63 -0
  216. sknetwork/utils/tests/test_membership.py +24 -0
  217. sknetwork/utils/tests/test_neighbors.py +41 -0
  218. sknetwork/utils/tests/test_tfidf.py +18 -0
  219. sknetwork/utils/tests/test_values.py +66 -0
  220. sknetwork/utils/tfidf.py +37 -0
  221. sknetwork/utils/values.py +76 -0
  222. sknetwork/visualization/__init__.py +4 -0
  223. sknetwork/visualization/colors.py +34 -0
  224. sknetwork/visualization/dendrograms.py +277 -0
  225. sknetwork/visualization/graphs.py +1039 -0
  226. sknetwork/visualization/tests/__init__.py +1 -0
  227. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  228. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,644 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in December 2018
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
7
+ @author: Thomas Bonald <bonald@enst.fr>
8
+ """
9
+
10
+ from csv import reader
11
+ from typing import Dict, List, Tuple, Union, Optional
12
+ from xml.etree import ElementTree
13
+
14
+ import numpy as np
15
+ from scipy import sparse
16
+
17
+ from sknetwork.data.base import Dataset
18
+ from sknetwork.utils.format import directed2undirected
19
+
20
+
21
+ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
22
+ bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
23
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
24
+ """Load a graph from an edge list.
25
+
26
+ Parameters
27
+ ----------
28
+ edge_list : Union[np.ndarray, List[Tuple]]
29
+ The edge list to convert, given as a NumPy array of size (n, 2) or (n, 3) or a list of tuples of
30
+ length 2 or 3.
31
+ directed : bool
32
+ If ``True``, considers the graph as directed.
33
+ bipartite : bool
34
+ If ``True``, returns a biadjacency matrix.
35
+ weighted : bool
36
+ If ``True``, returns a weighted graph.
37
+ reindex : bool
38
+ If ``True``, reindex nodes and returns the original node indices as names.
39
+ Reindexing is enforced if nodes are not integers.
40
+ shape : tuple
41
+ Shape of the adjacency or biadjacency matrix.
42
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
43
+ sum_duplicates : bool
44
+ If ``True`` (default), sums weights of duplicate edges.
45
+ Otherwise, the weight of each edge is that of the first occurrence of this edge.
46
+ matrix_only : bool
47
+ If ``True``, returns only the adjacency or biadjacency matrix.
48
+ Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
49
+ If not specified (default), selects the most appropriate format.
50
+ Returns
51
+ -------
52
+ graph : :class:`Dataset` (including node names) or sparse matrix
53
+
54
+ Examples
55
+ --------
56
+ >>> edges = [(0, 1), (1, 2), (2, 0)]
57
+ >>> adjacency = from_edge_list(edges)
58
+ >>> adjacency.shape
59
+ (3, 3)
60
+ >>> edges = [('Alice', 'Bob'), ('Bob', 'Carol'), ('Carol', 'Alice')]
61
+ >>> graph = from_edge_list(edges)
62
+ >>> adjacency = graph.adjacency
63
+ >>> adjacency.shape
64
+ (3, 3)
65
+ >>> print(graph.names)
66
+ ['Alice' 'Bob' 'Carol']
67
+ """
68
+ edge_array = np.array([])
69
+ weights = None
70
+ if isinstance(edge_list, list):
71
+ try:
72
+ edge_array = np.array([[edge[0], edge[1]] for edge in edge_list])
73
+ if len(edge_list) and len(edge_list[0]) == 3:
74
+ weights = np.array([edge[2] for edge in edge_list])
75
+ else:
76
+ raise ValueError()
77
+ except ValueError:
78
+ ValueError('Edges must be given as tuples of fixed size (2 or 3).')
79
+ elif isinstance(edge_list, np.ndarray):
80
+ if edge_list.ndim != 2 or edge_list.shape[1] not in [2, 3]:
81
+ raise ValueError('The edge list must be given as an array of shape (n_edges, 2) or '
82
+ '(n_edges, 3).')
83
+ edge_array = edge_list[:, :2]
84
+ if edge_list.shape[1] == 3:
85
+ weights = edge_list[:, 2]
86
+ else:
87
+ raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
88
+ return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
89
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
90
+ matrix_only=matrix_only)
91
+
92
+
93
+ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
94
+ bipartite: bool = False, weighted: bool = True, reindex: bool = False,
95
+ shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
96
+ -> Union[Dataset, sparse.csr_matrix]:
97
+ """Load a graph from an adjacency list.
98
+
99
+ Parameters
100
+ ----------
101
+ adjacency_list : Union[List[List], Dict[str, List]]
102
+ Adjacency list (neighbors of each node) or dictionary (node: neighbors).
103
+ directed : bool
104
+ If ``True``, considers the graph as directed.
105
+ bipartite : bool
106
+ If ``True``, returns a biadjacency matrix.
107
+ weighted : bool
108
+ If ``True``, returns a weighted graph.
109
+ reindex : bool
110
+ If ``True``, reindex nodes and returns the original node indices as names.
111
+ Reindexing is enforced if nodes are not integers.
112
+ shape : tuple
113
+ Shape of the adjacency or biadjacency matrix.
114
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
115
+ sum_duplicates : bool
116
+ If ``True`` (default), sums weights of duplicate edges.
117
+ Otherwise, the weight of each edge is that of the first occurrence of this edge.
118
+ matrix_only : bool
119
+ If ``True``, returns only the adjacency or biadjacency matrix.
120
+ Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
121
+ If not specified (default), selects the most appropriate format.
122
+ Returns
123
+ -------
124
+ graph : :class:`Dataset` or sparse matrix
125
+
126
+ Example
127
+ -------
128
+ >>> edges = [[1, 2], [0, 2, 3], [0, 1]]
129
+ >>> adjacency = from_adjacency_list(edges)
130
+ >>> adjacency.shape
131
+ (4, 4)
132
+ """
133
+ edge_list = []
134
+ if isinstance(adjacency_list, list):
135
+ for i, neighbors in enumerate(adjacency_list):
136
+ for j in neighbors:
137
+ edge_list.append((i, j))
138
+ elif isinstance(adjacency_list, dict):
139
+ for i, neighbors in adjacency_list.items():
140
+ for j in neighbors:
141
+ edge_list.append((i, j))
142
+ else:
143
+ raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
144
+ return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
145
+ reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
146
+
147
+
148
+ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
149
+ weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
150
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
151
+ """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
152
+
153
+ Parameters
154
+ ----------
155
+ edge_array : np.ndarray
156
+ Array of edges.
157
+ weights : np.ndarray
158
+ Array of weights.
159
+ directed : bool
160
+ If ``True``, considers the graph as directed.
161
+ bipartite : bool
162
+ If ``True``, returns a biadjacency matrix.
163
+ weighted : bool
164
+ If ``True``, returns a weighted graph.
165
+ reindex : bool
166
+ If ``True``, reindex nodes and returns the original node indices as names.
167
+ Reindexing is enforced if nodes are not integers.
168
+ shape : tuple
169
+ Shape of the adjacency or biadjacency matrix.
170
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
171
+ sum_duplicates : bool
172
+ If ``True`` (default), sums weights of duplicate edges.
173
+ Otherwise, the weight of each edge is that of the first occurrence of this edge.
174
+ matrix_only : bool
175
+ If ``True``, returns only the adjacency or biadjacency matrix.
176
+ Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
177
+ If not specified (default), selects the most appropriate format.
178
+
179
+ Returns
180
+ -------
181
+ graph : :class:`Dataset` or sparse matrix
182
+ """
183
+ try:
184
+ edge_array = edge_array.astype(float)
185
+ except ValueError:
186
+ pass
187
+ if edge_array.dtype == float and (edge_array == edge_array.astype(int)).all():
188
+ edge_array = edge_array.astype(int)
189
+ if weights is None:
190
+ weights = np.ones(len(edge_array))
191
+ if weights.dtype not in [bool, int, float]:
192
+ try:
193
+ weights = weights.astype(float)
194
+ except ValueError:
195
+ raise ValueError('Weights must be numeric.')
196
+ if all(weights == weights.astype(int)):
197
+ weights = weights.astype(int)
198
+ if not weighted:
199
+ weights = weights.astype(bool)
200
+
201
+ if not sum_duplicates:
202
+ _, index = np.unique(edge_array, axis=0, return_index=True)
203
+ edge_array = edge_array[index]
204
+ weights = weights[index]
205
+ graph = Dataset()
206
+ if bipartite:
207
+ row = edge_array[:, 0]
208
+ col = edge_array[:, 1]
209
+ if row.dtype != int or reindex:
210
+ names_row, row = np.unique(row, return_inverse=True)
211
+ graph.names_row = names_row
212
+ graph.names = names_row
213
+ n_row = len(names_row)
214
+ elif shape is not None:
215
+ n_row = max(shape[0], max(row) + 1)
216
+ else:
217
+ n_row = max(row) + 1
218
+ if col.dtype != int or reindex:
219
+ names_col, col = np.unique(col, return_inverse=True)
220
+ graph.names_col = names_col
221
+ n_col = len(names_col)
222
+ elif shape is not None:
223
+ n_col = max(shape[1], max(col) + 1)
224
+ else:
225
+ n_col = max(col) + 1
226
+ matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
227
+ matrix.sum_duplicates()
228
+ graph.biadjacency = matrix
229
+ else:
230
+ nodes = edge_array.ravel()
231
+ if nodes.dtype != int or reindex:
232
+ names, nodes = np.unique(nodes, return_inverse=True)
233
+ graph.names = names
234
+ n = len(names)
235
+ edge_array = nodes.reshape(-1, 2)
236
+ elif shape is not None:
237
+ n = max(shape[0], max(nodes) + 1)
238
+ else:
239
+ n = max(nodes) + 1
240
+ row = edge_array[:, 0]
241
+ col = edge_array[:, 1]
242
+ matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
243
+ if not directed:
244
+ matrix = directed2undirected(matrix)
245
+ matrix.sum_duplicates()
246
+ graph.adjacency = matrix
247
+ if matrix_only or (matrix_only is None and len(graph) == 1):
248
+ return matrix
249
+ else:
250
+ return graph
251
+
252
+
253
+ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
254
+ data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
255
+ reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
256
+ matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
257
+ """Load a graph from a CSV or TSV file.
258
+ The delimiter can be specified (e.g., ' ' for space-separated values).
259
+
260
+ Parameters
261
+ ----------
262
+ file_path : str
263
+ Path to the CSV file.
264
+ delimiter : str
265
+ Delimiter used in the file. Guessed if not specified.
266
+ sep : str
267
+ Alias for delimiter.
268
+ comments : str
269
+ Characters for comment lines.
270
+ data_structure : str
271
+ If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
272
+ If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
273
+ in the order of node indices; an empty line means no neighbor).
274
+ If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
275
+ given by the first column (node: list of neighbors).
276
+ If ``None`` (default), data_structure is guessed from the first rows of the file.
277
+ directed : bool
278
+ If ``True``, considers the graph as directed.
279
+ bipartite : bool
280
+ If ``True``, returns a biadjacency matrix of shape (n1, n2).
281
+ weighted : bool
282
+ If ``True``, returns a weighted graph (e.g., counts the number of occurrences of each edge).
283
+ reindex : bool
284
+ If ``True``, reindex nodes and returns the original node indices as names.
285
+ Reindexing is enforced if nodes are not integers.
286
+ shape : tuple
287
+ Shape of the adjacency or biadjacency matrix.
288
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
289
+ sum_duplicates : bool
290
+ If ``True`` (default), sums weights of duplicate edges.
291
+ Otherwise, the weight of each edge is that of the first occurrence of this edge.
292
+ matrix_only : bool
293
+ If ``True``, returns only the adjacency or biadjacency matrix.
294
+ Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
295
+ If not specified (default), selects the most appropriate format.
296
+
297
+ Returns
298
+ -------
299
+ graph: :class:`Dataset` or sparse matrix
300
+ """
301
+ header_length, delimiter_guess, comment_guess, data_structure_guess = scan_header(file_path, delimiters=delimiter,
302
+ comments=comments)
303
+ if delimiter is None:
304
+ if sep is not None:
305
+ delimiter = sep
306
+ else:
307
+ delimiter = delimiter_guess
308
+ if data_structure is None:
309
+ data_structure = data_structure_guess
310
+ if data_structure == 'edge_list':
311
+ try:
312
+ array = np.genfromtxt(file_path, delimiter=delimiter, comments=comment_guess)
313
+ if np.isnan(array).any():
314
+ raise TypeError()
315
+ edge_array = array[:, :2].astype(int)
316
+ if array.shape[1] == 3:
317
+ weights = array[:, 2]
318
+ else:
319
+ weights = None
320
+ return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
321
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
322
+ matrix_only=matrix_only)
323
+ except TypeError:
324
+ pass
325
+ with open(file_path, 'r', encoding='utf-8') as f:
326
+ for i in range(header_length):
327
+ f.readline()
328
+ csv_reader = reader(f, delimiter=delimiter)
329
+ if data_structure == 'edge_list':
330
+ edge_list = [tuple(row) for row in csv_reader]
331
+ return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
332
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
333
+ matrix_only=matrix_only)
334
+ elif data_structure == 'adjacency_list':
335
+ adjacency_list = [row for row in csv_reader]
336
+ return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
337
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
338
+ matrix_only=matrix_only)
339
+ elif data_structure == 'adjacency_dict':
340
+ adjacency_list = {row[0]: row[1:] for row in csv_reader}
341
+ return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
342
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
343
+ matrix_only=matrix_only)
344
+
345
+
346
+ def scan_header(file_path: str, delimiters: str = None, comments: str = '#%', n_scan: int = 100):
347
+ """Infer some properties of the graph from the first lines of a CSV file .
348
+ Parameters
349
+ ----------
350
+ file_path : str
351
+ Path to the CSV file.
352
+ delimiters : str
353
+ Possible delimiters.
354
+ comments : str
355
+ Possible comment characters.
356
+ n_scan : int
357
+ Number of rows scanned for inference.
358
+
359
+ Returns
360
+ -------
361
+ header_length : int
362
+ Length of the header (comments and blank lines)
363
+ delimiter_guess : str
364
+ Guessed delimiter.
365
+ comment_guess : str
366
+ Guessed comment character.
367
+ data_structure_guess : str
368
+ Either 'edge_list' or 'adjacency_list'.
369
+ """
370
+ header_length = 0
371
+ if delimiters is None:
372
+ delimiters = '\t,; '
373
+ comment_guess = comments[0]
374
+ count = {delimiter: [] for delimiter in delimiters}
375
+ rows = []
376
+ with open(file_path, 'r', encoding='utf-8') as f:
377
+ for row in f.readlines():
378
+ if row.startswith(tuple(comments)) or row == '':
379
+ if len(row):
380
+ comment_guess = row[0]
381
+ header_length += 1
382
+ else:
383
+ rows.append(row.rstrip())
384
+ for delimiter in delimiters:
385
+ count[delimiter].append(row.count(delimiter))
386
+ if len(rows) == n_scan:
387
+ break
388
+ means = [np.mean(count[delimiter]) for delimiter in delimiters]
389
+ stds = [np.std(count[delimiter]) for delimiter in delimiters]
390
+ index = np.argwhere((np.array(means) > 0) * (np.array(stds) == 0)).ravel()
391
+ if len(index) == 1:
392
+ delimiter_guess = delimiters[int(index)]
393
+ else:
394
+ delimiter_guess = delimiters[int(np.argmax(means))]
395
+ length = {len(row.split(delimiter_guess)) for row in rows}
396
+ if length == {2} or length == {3}:
397
+ data_structure_guess = 'edge_list'
398
+ else:
399
+ data_structure_guess = 'adjacency_list'
400
+ return header_length, delimiter_guess, comment_guess, data_structure_guess
401
+
402
+
403
+ def load_labels(file: str) -> np.ndarray:
404
+ """Parser for files with a single entry on each row.
405
+
406
+ Parameters
407
+ ----------
408
+ file : str
409
+ The path to the dataset
410
+
411
+ Returns
412
+ -------
413
+ labels: np.ndarray
414
+ Labels.
415
+ """
416
+ rows = []
417
+ with open(file, 'r', encoding='utf-8') as f:
418
+ for row in f:
419
+ rows.append(row.strip())
420
+ return np.array(rows)
421
+
422
+
423
+ def load_header(file: str):
424
+ """Check if the graph is directed, bipartite, weighted."""
425
+ directed, bipartite, weighted = False, False, True
426
+ with open(file, 'r', encoding='utf-8') as f:
427
+ row = f.readline()
428
+ if 'bip' in row:
429
+ bipartite = True
430
+ if 'unweighted' in row:
431
+ weighted = False
432
+ if 'asym' in row:
433
+ directed = True
434
+ return directed, bipartite, weighted
435
+
436
+
437
+ def load_metadata(file: str, delimiter: str = ': ') -> Dataset:
438
+ """Extract metadata from the file."""
439
+ metadata = Dataset()
440
+ with open(file, 'r', encoding='utf-8') as f:
441
+ for row in f:
442
+ parts = row.split(delimiter)
443
+ key, value = parts[0], ': '.join(parts[1:]).strip('\n')
444
+ metadata[key] = value
445
+ return metadata
446
+
447
+
448
+ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Dataset:
449
+ """Load graph from GraphML file.
450
+
451
+ Hyperedges and nested graphs are not supported.
452
+
453
+ Parameters
454
+ ----------
455
+ file_path: str
456
+ Path to the GraphML file.
457
+ weight_key: str
458
+ The key to be used as a value for edge weights
459
+ max_string_size: int
460
+ The maximum size for string features of the data
461
+
462
+ Returns
463
+ -------
464
+ data: :class:`Dataset`
465
+ The dataset in a Dataset with the adjacency as a CSR matrix.
466
+ """
467
+ # see http://graphml.graphdrawing.org/primer/graphml-primer.html
468
+ # and http://graphml.graphdrawing.org/specification/dtd.html#top
469
+ tree = ElementTree.parse(file_path)
470
+ n_nodes = 0
471
+ n_edges = 0
472
+ symmetrize = None
473
+ naming_nodes = True
474
+ default_weight = 1
475
+ weight_type = bool
476
+ weight_id = None
477
+ # indices in the graph tree
478
+ node_indices = []
479
+ edge_indices = []
480
+ data = Dataset()
481
+ graph = None
482
+ file_description = None
483
+ attribute_descriptions = Dataset()
484
+ attribute_descriptions.node = Dataset()
485
+ attribute_descriptions.edge = Dataset()
486
+ keys = {}
487
+ for file_element in tree.getroot():
488
+ if file_element.tag.endswith('graph'):
489
+ graph = file_element
490
+ symmetrize = (graph.attrib['edgedefault'] == 'undirected')
491
+ for index, element in enumerate(graph):
492
+ if element.tag.endswith('node'):
493
+ node_indices.append(index)
494
+ n_nodes += 1
495
+ elif element.tag.endswith('edge'):
496
+ edge_indices.append(index)
497
+ if 'directed' in element.attrib:
498
+ if element.attrib['directed'] == 'true':
499
+ n_edges += 1
500
+ else:
501
+ n_edges += 2
502
+ elif symmetrize:
503
+ n_edges += 2
504
+ else:
505
+ n_edges += 1
506
+ if 'parse.nodeids' in graph.attrib:
507
+ naming_nodes = not (graph.attrib['parse.nodeids'] == 'canonical')
508
+ for file_element in tree.getroot():
509
+ if file_element.tag.endswith('key'):
510
+ attribute_name = file_element.attrib['attr.name']
511
+ attribute_type = java_type_to_python_type(file_element.attrib['attr.type'])
512
+ if attribute_name == weight_key:
513
+ weight_type = java_type_to_python_type(file_element.attrib['attr.type'])
514
+ weight_id = file_element.attrib['id']
515
+ for key_element in file_element:
516
+ if key_element.tag == 'default':
517
+ default_weight = attribute_type(key_element.text)
518
+ else:
519
+ default_value = None
520
+ if file_element.attrib['for'] == 'node':
521
+ size = n_nodes
522
+ if 'node_attribute' not in data:
523
+ data.node_attribute = Dataset()
524
+ for key_element in file_element:
525
+ if key_element.tag.endswith('desc'):
526
+ attribute_descriptions.node[attribute_name] = key_element.text
527
+ elif key_element.tag.endswith('default'):
528
+ default_value = attribute_type(key_element.text)
529
+ if attribute_type == str:
530
+ local_type = '<U' + str(max_string_size)
531
+ else:
532
+ local_type = attribute_type
533
+ if default_value:
534
+ data.node_attribute[attribute_name] = np.full(size, default_value, dtype=local_type)
535
+ else:
536
+ data.node_attribute[attribute_name] = np.zeros(size, dtype=local_type)
537
+ elif file_element.attrib['for'] == 'edge':
538
+ size = n_edges
539
+ if 'edge_attribute' not in data:
540
+ data.edge_attribute = Dataset()
541
+ for key_element in file_element:
542
+ if key_element.tag.endswith('desc'):
543
+ attribute_descriptions.edge[attribute_name] = key_element.text
544
+ elif key_element.tag.endswith('default'):
545
+ default_value = attribute_type(key_element.text)
546
+ if attribute_type == str:
547
+ local_type = '<U' + str(max_string_size)
548
+ else:
549
+ local_type = attribute_type
550
+ if default_value:
551
+ data.edge_attribute[attribute_name] = np.full(size, default_value, dtype=local_type)
552
+ else:
553
+ data.edge_attribute[attribute_name] = np.zeros(size, dtype=local_type)
554
+ keys[file_element.attrib['id']] = [attribute_name, attribute_type]
555
+ elif file_element.tag.endswith('desc'):
556
+ file_description = file_element.text
557
+ if file_description or attribute_descriptions.node or attribute_descriptions.edge:
558
+ data.meta = Dataset()
559
+ if file_description:
560
+ data.meta['description'] = file_description
561
+ if attribute_descriptions.node or attribute_descriptions.edge:
562
+ data.meta['attributes'] = attribute_descriptions
563
+ if graph is not None:
564
+ row = np.zeros(n_edges, dtype=int)
565
+ col = np.zeros(n_edges, dtype=int)
566
+ dat = np.full(n_edges, default_weight, dtype=weight_type)
567
+ data.names = None
568
+ if naming_nodes:
569
+ data.names = np.zeros(n_nodes, dtype='<U512')
570
+
571
+ node_map = {}
572
+ # deal with nodes first
573
+ for number, index in enumerate(node_indices):
574
+ node = graph[index]
575
+ if naming_nodes:
576
+ name = node.attrib['id']
577
+ data.names[number] = name
578
+ node_map[name] = number
579
+ for node_attribute in node:
580
+ if node_attribute.tag.endswith('data'):
581
+ data.node_attribute[keys[node_attribute.attrib['key']][0]][number] = \
582
+ keys[node_attribute.attrib['key']][1](node_attribute.text)
583
+ # deal with edges
584
+ edge_index = -1
585
+ for index in edge_indices:
586
+ edge_index += 1
587
+ duplicate = False
588
+ edge = graph[index]
589
+ if naming_nodes:
590
+ node1 = node_map[edge.attrib['source']]
591
+ node2 = node_map[edge.attrib['target']]
592
+ else:
593
+ node1 = int(edge.attrib['source'][1:])
594
+ node2 = int(edge.attrib['target'][1:])
595
+ row[edge_index] = node1
596
+ col[edge_index] = node2
597
+ for edge_attribute in edge:
598
+ if edge_attribute.tag.endswith('data'):
599
+ if edge_attribute.attrib['key'] == weight_id:
600
+ dat[edge_index] = weight_type(edge_attribute.text)
601
+ else:
602
+ data.edge_attribute[keys[edge_attribute.attrib['key']][0]][edge_index] = \
603
+ keys[edge_attribute.attrib['key']][1](edge_attribute.text)
604
+ if 'directed' in edge.attrib:
605
+ if edge.attrib['directed'] != 'true':
606
+ duplicate = True
607
+ elif symmetrize:
608
+ duplicate = True
609
+ if duplicate:
610
+ edge_index += 1
611
+ row[edge_index] = node2
612
+ col[edge_index] = node1
613
+ for edge_attribute in edge:
614
+ if edge_attribute.tag.endswith('data'):
615
+ if edge_attribute.attrib['key'] == weight_id:
616
+ dat[edge_index] = weight_type(edge_attribute.text)
617
+ else:
618
+ data.edge_attribute[keys[edge_attribute.attrib['key']][0]][edge_index] = \
619
+ keys[edge_attribute.attrib['key']][1](edge_attribute.text)
620
+ data.adjacency = sparse.csr_matrix((dat, (row, col)), shape=(n_nodes, n_nodes))
621
+ if data.names is None:
622
+ data.pop('names')
623
+ return data
624
+ else:
625
+ raise ValueError(f'No graph defined in {file_path}.')
626
+
627
+
628
+ def java_type_to_python_type(value: str) -> type:
629
+ if value == 'boolean':
630
+ return bool
631
+ elif value == 'int':
632
+ return int
633
+ elif value == 'string':
634
+ return str
635
+ elif value in ('long', 'float', 'double'):
636
+ return float
637
+
638
+
639
+ def is_number(s):
640
+ try:
641
+ float(s)
642
+ return True
643
+ except ValueError:
644
+ return False