scikit-network 0.31.0__cp39-cp39-win_amd64.whl → 0.33.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (126) hide show
  1. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
  2. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
  3. scikit_network-0.33.0.dist-info/RECORD +228 -0
  4. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/classification/base.py +1 -1
  7. sknetwork/classification/base_rank.py +3 -3
  8. sknetwork/classification/diffusion.py +25 -16
  9. sknetwork/classification/knn.py +23 -16
  10. sknetwork/classification/metrics.py +4 -4
  11. sknetwork/classification/pagerank.py +12 -8
  12. sknetwork/classification/propagation.py +25 -17
  13. sknetwork/classification/tests/test_diffusion.py +10 -0
  14. sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
  15. sknetwork/classification/vote.cpp +14549 -8668
  16. sknetwork/clustering/__init__.py +3 -1
  17. sknetwork/clustering/base.py +1 -1
  18. sknetwork/clustering/kcenters.py +253 -0
  19. sknetwork/clustering/leiden.py +242 -0
  20. sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
  21. sknetwork/clustering/leiden_core.cpp +31564 -0
  22. sknetwork/clustering/leiden_core.pyx +124 -0
  23. sknetwork/clustering/louvain.py +118 -83
  24. sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
  25. sknetwork/clustering/louvain_core.cpp +21876 -16332
  26. sknetwork/clustering/louvain_core.pyx +86 -94
  27. sknetwork/clustering/postprocess.py +2 -2
  28. sknetwork/clustering/propagation_clustering.py +4 -4
  29. sknetwork/clustering/tests/test_API.py +7 -3
  30. sknetwork/clustering/tests/test_kcenters.py +60 -0
  31. sknetwork/clustering/tests/test_leiden.py +34 -0
  32. sknetwork/clustering/tests/test_louvain.py +2 -3
  33. sknetwork/data/__init__.py +1 -1
  34. sknetwork/data/base.py +7 -2
  35. sknetwork/data/load.py +20 -25
  36. sknetwork/data/models.py +15 -15
  37. sknetwork/data/parse.py +57 -34
  38. sknetwork/data/tests/test_API.py +3 -3
  39. sknetwork/data/tests/test_base.py +2 -2
  40. sknetwork/data/tests/test_parse.py +9 -12
  41. sknetwork/data/tests/test_toy_graphs.py +33 -33
  42. sknetwork/data/toy_graphs.py +35 -43
  43. sknetwork/embedding/__init__.py +0 -1
  44. sknetwork/embedding/base.py +23 -19
  45. sknetwork/embedding/force_atlas.py +3 -2
  46. sknetwork/embedding/louvain_embedding.py +1 -27
  47. sknetwork/embedding/random_projection.py +5 -3
  48. sknetwork/embedding/spectral.py +0 -73
  49. sknetwork/embedding/svd.py +0 -4
  50. sknetwork/embedding/tests/test_API.py +4 -28
  51. sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
  52. sknetwork/embedding/tests/test_spectral.py +2 -5
  53. sknetwork/embedding/tests/test_svd.py +7 -1
  54. sknetwork/gnn/base_layer.py +3 -3
  55. sknetwork/gnn/gnn_classifier.py +41 -87
  56. sknetwork/gnn/layer.py +1 -1
  57. sknetwork/gnn/loss.py +1 -1
  58. sknetwork/gnn/optimizer.py +4 -3
  59. sknetwork/gnn/tests/test_base_layer.py +4 -4
  60. sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
  61. sknetwork/gnn/utils.py +8 -8
  62. sknetwork/hierarchy/base.py +27 -0
  63. sknetwork/hierarchy/louvain_hierarchy.py +55 -47
  64. sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
  65. sknetwork/hierarchy/paris.cpp +27667 -20915
  66. sknetwork/hierarchy/paris.pyx +11 -10
  67. sknetwork/hierarchy/postprocess.py +16 -16
  68. sknetwork/hierarchy/tests/test_algos.py +5 -0
  69. sknetwork/hierarchy/tests/test_metrics.py +4 -4
  70. sknetwork/linalg/__init__.py +1 -1
  71. sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
  72. sknetwork/linalg/diteration.cpp +13916 -8050
  73. sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
  74. sknetwork/linalg/operators.py +1 -1
  75. sknetwork/linalg/ppr_solver.py +1 -1
  76. sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
  77. sknetwork/linalg/push.cpp +23187 -16973
  78. sknetwork/linalg/tests/test_normalization.py +3 -7
  79. sknetwork/linalg/tests/test_operators.py +2 -6
  80. sknetwork/linalg/tests/test_ppr.py +1 -1
  81. sknetwork/linkpred/base.py +12 -1
  82. sknetwork/linkpred/nn.py +6 -6
  83. sknetwork/path/distances.py +11 -4
  84. sknetwork/path/shortest_path.py +1 -1
  85. sknetwork/path/tests/test_distances.py +7 -0
  86. sknetwork/path/tests/test_search.py +2 -2
  87. sknetwork/ranking/base.py +11 -6
  88. sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
  89. sknetwork/ranking/betweenness.cpp +5256 -2190
  90. sknetwork/ranking/pagerank.py +13 -12
  91. sknetwork/ranking/tests/test_API.py +0 -2
  92. sknetwork/ranking/tests/test_betweenness.py +1 -1
  93. sknetwork/ranking/tests/test_pagerank.py +11 -5
  94. sknetwork/regression/base.py +18 -1
  95. sknetwork/regression/diffusion.py +30 -14
  96. sknetwork/regression/tests/test_diffusion.py +8 -0
  97. sknetwork/topology/__init__.py +3 -1
  98. sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
  99. sknetwork/topology/cliques.cpp +23528 -16848
  100. sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
  101. sknetwork/topology/core.cpp +22849 -16581
  102. sknetwork/topology/cycles.py +243 -0
  103. sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
  104. sknetwork/topology/minheap.cpp +19495 -13469
  105. sknetwork/topology/structure.py +2 -42
  106. sknetwork/topology/tests/test_cycles.py +65 -0
  107. sknetwork/topology/tests/test_structure.py +2 -16
  108. sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
  109. sknetwork/topology/triangles.cpp +5283 -1397
  110. sknetwork/topology/triangles.pyx +7 -4
  111. sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
  112. sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
  113. sknetwork/utils/__init__.py +1 -1
  114. sknetwork/utils/format.py +1 -1
  115. sknetwork/utils/membership.py +2 -2
  116. sknetwork/utils/values.py +5 -3
  117. sknetwork/visualization/__init__.py +2 -2
  118. sknetwork/visualization/dendrograms.py +55 -7
  119. sknetwork/visualization/graphs.py +261 -44
  120. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  121. sknetwork/visualization/tests/test_graphs.py +63 -57
  122. scikit_network-0.31.0.dist-info/RECORD +0 -221
  123. sknetwork/embedding/louvain_hierarchy.py +0 -142
  124. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  125. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
  126. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0
sknetwork/data/models.py CHANGED
@@ -12,7 +12,7 @@ from typing import Union, Optional, Iterable
12
12
  import numpy as np
13
13
  from scipy import sparse
14
14
 
15
- from sknetwork.data.base import Bunch
15
+ from sknetwork.data.base import Dataset
16
16
  from sknetwork.data.parse import from_edge_list
17
17
  from sknetwork.utils.check import check_random_state
18
18
  from sknetwork.utils.format import directed2undirected
@@ -20,7 +20,7 @@ from sknetwork.utils.format import directed2undirected
20
20
 
21
21
  def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05,
22
22
  directed: bool = False, self_loops: bool = False, metadata: bool = False, seed: Optional[int] = None) \
23
- -> Union[sparse.csr_matrix, Bunch]:
23
+ -> Union[sparse.csr_matrix, Dataset]:
24
24
  """Stochastic block model.
25
25
 
26
26
  Parameters
@@ -83,7 +83,7 @@ def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_ou
83
83
  else:
84
84
  adjacency = directed2undirected(sparse.csr_matrix(sparse.triu(adjacency)), weighted=False)
85
85
  if metadata:
86
- graph = Bunch()
86
+ graph = Dataset()
87
87
  graph.adjacency = adjacency
88
88
  labels = np.repeat(np.arange(len(sizes)), sizes)
89
89
  graph.labels = labels
@@ -129,7 +129,7 @@ def erdos_renyi(n: int = 20, p: float = .3, directed: bool = False, self_loops:
129
129
  return block_model([n], p, 0., directed=directed, self_loops=self_loops, metadata=False, seed=seed)
130
130
 
131
131
 
132
- def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
132
+ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
133
133
  """Linear graph (directed).
134
134
 
135
135
  Parameters
@@ -158,7 +158,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
158
158
  if metadata:
159
159
  x = np.arange(n)
160
160
  y = np.zeros(n)
161
- graph = Bunch()
161
+ graph = Dataset()
162
162
  graph.adjacency = adjacency
163
163
  graph.position = np.array((x, y)).T
164
164
  return graph
@@ -166,7 +166,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
166
166
  return adjacency
167
167
 
168
168
 
169
- def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
169
+ def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
170
170
  """Linear graph (undirected).
171
171
 
172
172
  Parameters
@@ -218,7 +218,7 @@ def cyclic_position(n: int) -> np.ndarray:
218
218
  return position
219
219
 
220
220
 
221
- def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
221
+ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
222
222
  """Cyclic graph (directed).
223
223
 
224
224
  Parameters
@@ -245,7 +245,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
245
245
  adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(n, n))
246
246
 
247
247
  if metadata:
248
- graph = Bunch()
248
+ graph = Dataset()
249
249
  graph.adjacency = adjacency
250
250
  graph.position = cyclic_position(n)
251
251
  return graph
@@ -253,7 +253,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
253
253
  return adjacency
254
254
 
255
255
 
256
- def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
256
+ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
257
257
  """Cyclic graph (undirected).
258
258
 
259
259
  Parameters
@@ -283,7 +283,7 @@ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
283
283
  return graph.adjacency
284
284
 
285
285
 
286
- def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
286
+ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
287
287
  """Grid (undirected).
288
288
 
289
289
  Parameters
@@ -312,7 +312,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
312
312
  edges = list(map(lambda edge: (node_id[edge[0]], node_id[edge[1]]), edges))
313
313
  adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
314
314
  if metadata:
315
- graph = Bunch()
315
+ graph = Dataset()
316
316
  graph.adjacency = adjacency
317
317
  graph.position = np.array(nodes)
318
318
  return graph
@@ -320,7 +320,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
320
320
  return adjacency
321
321
 
322
322
 
323
- def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
323
+ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
324
324
  """Star (undirected).
325
325
 
326
326
  Parameters
@@ -345,7 +345,7 @@ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix
345
345
  edges = [(0, i+1) for i in range(n_branches)]
346
346
  adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
347
347
  if metadata:
348
- graph = Bunch()
348
+ graph = Dataset()
349
349
  graph.adjacency = adjacency
350
350
  angles = 2 * np.pi * np.arange(n_branches) / n_branches
351
351
  x = [0] + list(np.cos(angles))
@@ -402,7 +402,7 @@ def albert_barabasi(n: int = 100, degree: int = 3, directed: bool = False, seed:
402
402
 
403
403
 
404
404
  def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Optional[int] = None,
405
- metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
405
+ metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
406
406
  """Watts-Strogatz model.
407
407
 
408
408
  Parameters
@@ -451,7 +451,7 @@ def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Opti
451
451
  adjacency[j, i] = 0
452
452
  adjacency = sparse.csr_matrix(adjacency, shape=adjacency.shape)
453
453
  if metadata:
454
- graph = Bunch()
454
+ graph = Dataset()
455
455
  graph.adjacency = adjacency
456
456
  graph.position = cyclic_position(n)
457
457
  return graph
sknetwork/data/parse.py CHANGED
@@ -8,19 +8,19 @@ Created in December 2018
8
8
  """
9
9
 
10
10
  from csv import reader
11
- from typing import Dict, List, Tuple, Union
11
+ from typing import Dict, List, Tuple, Union, Optional
12
12
  from xml.etree import ElementTree
13
13
 
14
14
  import numpy as np
15
15
  from scipy import sparse
16
16
 
17
- from sknetwork.data.base import Bunch
17
+ from sknetwork.data.base import Dataset
18
18
  from sknetwork.utils.format import directed2undirected
19
19
 
20
20
 
21
21
  def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
22
- bipartite: bool = False, weighted: bool = True, reindex: bool = True,
23
- sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
22
+ bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
23
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
24
24
  """Load a graph from an edge list.
25
25
 
26
26
  Parameters
@@ -37,6 +37,9 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
37
37
  reindex : bool
38
38
  If ``True``, reindex nodes and returns the original node indices as names.
39
39
  Reindexing is enforced if nodes are not integers.
40
+ shape : tuple
41
+ Shape of the adjacency or biadjacency matrix.
42
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
40
43
  sum_duplicates : bool
41
44
  If ``True`` (default), sums weights of duplicate edges.
42
45
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -83,12 +86,14 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
83
86
  else:
84
87
  raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
85
88
  return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
86
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
89
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
90
+ matrix_only=matrix_only)
87
91
 
88
92
 
89
93
  def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
90
- bipartite: bool = False, weighted: bool = True, reindex: bool = True,
91
- sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
94
+ bipartite: bool = False, weighted: bool = True, reindex: bool = False,
95
+ shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
96
+ -> Union[Dataset, sparse.csr_matrix]:
92
97
  """Load a graph from an adjacency list.
93
98
 
94
99
  Parameters
@@ -104,6 +109,9 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
104
109
  reindex : bool
105
110
  If ``True``, reindex nodes and returns the original node indices as names.
106
111
  Reindexing is enforced if nodes are not integers.
112
+ shape : tuple
113
+ Shape of the adjacency or biadjacency matrix.
114
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
107
115
  sum_duplicates : bool
108
116
  If ``True`` (default), sums weights of duplicate edges.
109
117
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -134,12 +142,12 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
134
142
  else:
135
143
  raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
136
144
  return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
137
- reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
145
+ reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
138
146
 
139
147
 
140
148
  def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
141
- weighted: bool = True, reindex: bool = True, sum_duplicates: bool = True,
142
- matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
149
+ weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
150
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
143
151
  """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
144
152
 
145
153
  Parameters
@@ -157,6 +165,9 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
157
165
  reindex : bool
158
166
  If ``True``, reindex nodes and returns the original node indices as names.
159
167
  Reindexing is enforced if nodes are not integers.
168
+ shape : tuple
169
+ Shape of the adjacency or biadjacency matrix.
170
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
160
171
  sum_duplicates : bool
161
172
  If ``True`` (default), sums weights of duplicate edges.
162
173
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -191,32 +202,39 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
191
202
  _, index = np.unique(edge_array, axis=0, return_index=True)
192
203
  edge_array = edge_array[index]
193
204
  weights = weights[index]
194
- graph = Bunch()
205
+ graph = Dataset()
195
206
  if bipartite:
196
207
  row = edge_array[:, 0]
197
208
  col = edge_array[:, 1]
198
- if row.dtype != int or (reindex and len(set(row)) < max(row) + 1):
209
+ if row.dtype != int or reindex:
199
210
  names_row, row = np.unique(row, return_inverse=True)
200
211
  graph.names_row = names_row
201
212
  graph.names = names_row
202
213
  n_row = len(names_row)
214
+ elif shape is not None:
215
+ n_row = max(shape[0], max(row) + 1)
203
216
  else:
204
217
  n_row = max(row) + 1
205
- if col.dtype != int or (reindex and len(set(col)) < max(col) + 1):
218
+ if col.dtype != int or reindex:
206
219
  names_col, col = np.unique(col, return_inverse=True)
207
220
  graph.names_col = names_col
208
221
  n_col = len(names_col)
222
+ elif shape is not None:
223
+ n_col = max(shape[1], max(col) + 1)
209
224
  else:
210
225
  n_col = max(col) + 1
211
226
  matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
227
+ matrix.sum_duplicates()
212
228
  graph.biadjacency = matrix
213
229
  else:
214
230
  nodes = edge_array.ravel()
215
- if nodes.dtype != int or (reindex and len(set(nodes)) < max(nodes) + 1):
231
+ if nodes.dtype != int or reindex:
216
232
  names, nodes = np.unique(nodes, return_inverse=True)
217
233
  graph.names = names
218
234
  n = len(names)
219
235
  edge_array = nodes.reshape(-1, 2)
236
+ elif shape is not None:
237
+ n = max(shape[0], max(nodes) + 1)
220
238
  else:
221
239
  n = max(nodes) + 1
222
240
  row = edge_array[:, 0]
@@ -224,6 +242,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
224
242
  matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
225
243
  if not directed:
226
244
  matrix = directed2undirected(matrix)
245
+ matrix.sum_duplicates()
227
246
  graph.adjacency = matrix
228
247
  if matrix_only or (matrix_only is None and len(graph) == 1):
229
248
  return matrix
@@ -233,8 +252,8 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
233
252
 
234
253
  def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
235
254
  data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
236
- reindex: bool = True, sum_duplicates: bool = True, matrix_only: bool = None) \
237
- -> Union[Bunch, sparse.csr_matrix]:
255
+ reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
256
+ matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
238
257
  """Load a graph from a CSV or TSV file.
239
258
  The delimiter can be specified (e.g., ' ' for space-separated values).
240
259
 
@@ -249,9 +268,10 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
249
268
  comments : str
250
269
  Characters for comment lines.
251
270
  data_structure : str
252
- If 'edge_list', considers each row of the file as an edge (tuple of size 2 or 3).
253
- If 'adjacency_list', considers each row of the file as an adjacency list (list of neighbors).
254
- If 'adjacency_dict', considers each row of the file as an adjacency dictionary with key
271
+ If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
272
+ If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
273
+ in the order of node indices; an empty line means no neighbor).
274
+ If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
255
275
  given by the first column (node: list of neighbors).
256
276
  If ``None`` (default), data_structure is guessed from the first rows of the file.
257
277
  directed : bool
@@ -263,6 +283,9 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
263
283
  reindex : bool
264
284
  If ``True``, reindex nodes and returns the original node indices as names.
265
285
  Reindexing is enforced if nodes are not integers.
286
+ shape : tuple
287
+ Shape of the adjacency or biadjacency matrix.
288
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
266
289
  sum_duplicates : bool
267
290
  If ``True`` (default), sums weights of duplicate edges.
268
291
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -295,7 +318,7 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
295
318
  else:
296
319
  weights = None
297
320
  return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
298
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
321
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
299
322
  matrix_only=matrix_only)
300
323
  except TypeError:
301
324
  pass
@@ -306,17 +329,17 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
306
329
  if data_structure == 'edge_list':
307
330
  edge_list = [tuple(row) for row in csv_reader]
308
331
  return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
309
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
332
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
310
333
  matrix_only=matrix_only)
311
334
  elif data_structure == 'adjacency_list':
312
335
  adjacency_list = [row for row in csv_reader]
313
336
  return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
314
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
337
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
315
338
  matrix_only=matrix_only)
316
339
  elif data_structure == 'adjacency_dict':
317
340
  adjacency_list = {row[0]: row[1:] for row in csv_reader}
318
341
  return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
319
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
342
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
320
343
  matrix_only=matrix_only)
321
344
 
322
345
 
@@ -411,9 +434,9 @@ def load_header(file: str):
411
434
  return directed, bipartite, weighted
412
435
 
413
436
 
414
- def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
437
+ def load_metadata(file: str, delimiter: str = ': ') -> Dataset:
415
438
  """Extract metadata from the file."""
416
- metadata = Bunch()
439
+ metadata = Dataset()
417
440
  with open(file, 'r', encoding='utf-8') as f:
418
441
  for row in f:
419
442
  parts = row.split(delimiter)
@@ -422,7 +445,7 @@ def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
422
445
  return metadata
423
446
 
424
447
 
425
- def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Bunch:
448
+ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Dataset:
426
449
  """Load graph from GraphML file.
427
450
 
428
451
  Hyperedges and nested graphs are not supported.
@@ -438,7 +461,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
438
461
 
439
462
  Returns
440
463
  -------
441
- data: :class:`Bunch`
464
+ data: :class:`Dataset`
442
465
  The dataset in a Dataset with the adjacency as a CSR matrix.
443
466
  """
444
467
  # see http://graphml.graphdrawing.org/primer/graphml-primer.html
@@ -454,12 +477,12 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
454
477
  # indices in the graph tree
455
478
  node_indices = []
456
479
  edge_indices = []
457
- data = Bunch()
480
+ data = Dataset()
458
481
  graph = None
459
482
  file_description = None
460
- attribute_descriptions = Bunch()
461
- attribute_descriptions.node = Bunch()
462
- attribute_descriptions.edge = Bunch()
483
+ attribute_descriptions = Dataset()
484
+ attribute_descriptions.node = Dataset()
485
+ attribute_descriptions.edge = Dataset()
463
486
  keys = {}
464
487
  for file_element in tree.getroot():
465
488
  if file_element.tag.endswith('graph'):
@@ -497,7 +520,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
497
520
  if file_element.attrib['for'] == 'node':
498
521
  size = n_nodes
499
522
  if 'node_attribute' not in data:
500
- data.node_attribute = Bunch()
523
+ data.node_attribute = Dataset()
501
524
  for key_element in file_element:
502
525
  if key_element.tag.endswith('desc'):
503
526
  attribute_descriptions.node[attribute_name] = key_element.text
@@ -514,7 +537,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
514
537
  elif file_element.attrib['for'] == 'edge':
515
538
  size = n_edges
516
539
  if 'edge_attribute' not in data:
517
- data.edge_attribute = Bunch()
540
+ data.edge_attribute = Dataset()
518
541
  for key_element in file_element:
519
542
  if key_element.tag.endswith('desc'):
520
543
  attribute_descriptions.edge[attribute_name] = key_element.text
@@ -532,7 +555,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
532
555
  elif file_element.tag.endswith('desc'):
533
556
  file_description = file_element.text
534
557
  if file_description or attribute_descriptions.node or attribute_descriptions.edge:
535
- data.meta = Bunch()
558
+ data.meta = Dataset()
536
559
  if file_description:
537
560
  data.meta['description'] = file_description
538
561
  if attribute_descriptions.node or attribute_descriptions.edge:
@@ -8,7 +8,7 @@ import warnings
8
8
 
9
9
  from sknetwork.data.load import *
10
10
  from sknetwork.data.toy_graphs import *
11
- from sknetwork.data import Bunch
11
+ from sknetwork.data import Dataset
12
12
 
13
13
 
14
14
  class TestDataAPI(unittest.TestCase):
@@ -17,14 +17,14 @@ class TestDataAPI(unittest.TestCase):
17
17
  toy_graphs = [karate_club, painters, bow_tie, house, miserables]
18
18
  for toy_graph in toy_graphs:
19
19
  self.assertEqual(type(toy_graph()), sparse.csr_matrix)
20
- self.assertEqual(type(toy_graph(metadata=True)), Bunch)
20
+ self.assertEqual(type(toy_graph(metadata=True)), Dataset)
21
21
 
22
22
  def test_load(self):
23
23
  tmp_data_dir = tempfile.gettempdir() + '/stub'
24
24
  clear_data_home(tmp_data_dir)
25
25
  try:
26
26
  graph = load_netset('stub', tmp_data_dir)
27
- self.assertEqual(type(graph), Bunch)
27
+ self.assertEqual(type(graph), Dataset)
28
28
  except URLError: # pragma: no cover
29
29
  warnings.warn('Could not reach NetSet. Corresponding test has not been performed.', RuntimeWarning)
30
30
  return
@@ -3,12 +3,12 @@
3
3
 
4
4
  import unittest
5
5
 
6
- from sknetwork.data.base import Bunch
6
+ from sknetwork.data.base import Dataset
7
7
 
8
8
 
9
9
  class TestDataset(unittest.TestCase):
10
10
 
11
11
  def test(self):
12
- dataset = Bunch(name='dataset')
12
+ dataset = Dataset(name='dataset')
13
13
  self.assertEqual(dataset.name, 'dataset')
14
14
  self.assertEqual(dataset['name'], 'dataset')
@@ -20,6 +20,10 @@ class TestParser(unittest.TestCase):
20
20
  self.assertTrue((adjacency.indices == [2, 3, 0, 1, 5, 4]).all())
21
21
  self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
22
22
  self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
23
+ adjacency = parse.from_csv(self.stub_data_1, shape=(7, 7))
24
+ self.assertTrue((adjacency.shape == (7, 7)))
25
+ biadjacency = parse.from_csv(self.stub_data_1, bipartite=True, shape=(7, 9))
26
+ self.assertTrue((biadjacency.shape == (7, 9)))
23
27
  remove(self.stub_data_1)
24
28
 
25
29
  def test_labeled_weighted(self):
@@ -33,13 +37,14 @@ class TestParser(unittest.TestCase):
33
37
  self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
34
38
  self.assertTrue((adjacency.data == [1, 6, 5, 6, 1, 5]).all())
35
39
  self.assertTrue((names == [' b', ' d', ' e', 'a', 'c', 'f']).all())
40
+
36
41
  remove(self.stub_data_2)
37
42
 
38
43
  def test_auto_reindex(self):
39
44
  self.stub_data_4 = 'stub_4.txt'
40
45
  with open(self.stub_data_4, "w") as text_file:
41
46
  text_file.write('%stub\n14 31\n42 50\n0 12')
42
- graph = parse.from_csv(self.stub_data_4)
47
+ graph = parse.from_csv(self.stub_data_4, reindex=True)
43
48
  adjacency = graph.adjacency
44
49
  names = graph.names
45
50
  self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
@@ -164,23 +169,15 @@ class TestParser(unittest.TestCase):
164
169
  self.stub_data_9 = 'stub_9.txt'
165
170
  with open(self.stub_data_9, "w") as text_file:
166
171
  text_file.write('#stub\n1 3\n4 5\n0 3')
167
- graph = parse.from_csv(self.stub_data_9, bipartite=True)
172
+ graph = parse.from_csv(self.stub_data_9, bipartite=True, reindex=True)
168
173
  biadjacency = graph.biadjacency
169
174
  self.assertTrue((biadjacency.indices == [0, 0, 1]).all())
170
175
  self.assertTrue((biadjacency.indptr == [0, 1, 2, 3]).all())
171
176
  self.assertTrue((biadjacency.data == [1, 1, 1]).all())
177
+ biadjacency = parse.from_csv(self.stub_data_9, bipartite=True)
178
+ self.assertTrue(biadjacency.shape == (5, 6))
172
179
  remove(self.stub_data_9)
173
180
 
174
- def test_csv_adjacency_bipartite(self):
175
- self.stub_data_10 = 'stub_10.txt'
176
- with open(self.stub_data_10, "w") as text_file:
177
- text_file.write('%stub\n3\n3\n0')
178
- graph = parse.from_csv(self.stub_data_10, bipartite=True)
179
- biadjacency = graph.biadjacency
180
- self.assertTupleEqual(biadjacency.shape, (3, 2))
181
- self.assertTrue((biadjacency.data == [1, 1, 1]).all())
182
- remove(self.stub_data_10)
183
-
184
181
  def test_edge_list(self):
185
182
  edge_list_1 = [('Alice', 'Bob'), ('Carol', 'Alice')]
186
183
  graph = parse.from_edge_list(edge_list_1)
@@ -16,22 +16,22 @@ class TestToys(unittest.TestCase):
16
16
  adjacency = house()
17
17
  self.assertEqual(adjacency.shape, (5, 5))
18
18
 
19
- graph = house(metadata=True)
20
- self.assertEqual(graph.position.shape, (5, 2))
19
+ dataset = house(metadata=True)
20
+ self.assertEqual(dataset.position.shape, (5, 2))
21
21
 
22
22
  adjacency = bow_tie()
23
23
  self.assertEqual(adjacency.shape, (5, 5))
24
24
 
25
- graph = bow_tie(metadata=True)
26
- self.assertEqual(graph.position.shape, (5, 2))
25
+ dataset = bow_tie(metadata=True)
26
+ self.assertEqual(dataset.position.shape, (5, 2))
27
27
 
28
- graph = karate_club(True)
29
- self.assertEqual(graph.adjacency.shape, (34, 34))
30
- self.assertEqual(len(graph.labels), 34)
28
+ dataset = karate_club(True)
29
+ self.assertEqual(dataset.adjacency.shape, (34, 34))
30
+ self.assertEqual(len(dataset.labels), 34)
31
31
 
32
- graph = miserables(True)
33
- self.assertEqual(graph.adjacency.shape, (77, 77))
34
- self.assertEqual(len(graph.names), 77)
32
+ dataset = miserables(True)
33
+ self.assertEqual(dataset.adjacency.shape, (77, 77))
34
+ self.assertEqual(len(dataset.names), 77)
35
35
 
36
36
  def test_directed(self):
37
37
  adjacency = painters()
@@ -40,29 +40,29 @@ class TestToys(unittest.TestCase):
40
40
  adjacency = art_philo_science()
41
41
  self.assertEqual(adjacency.shape, (30, 30))
42
42
 
43
- graph = painters(True)
44
- self.assertEqual(graph.adjacency.shape, (14, 14))
45
- self.assertEqual(len(graph.names), 14)
43
+ dataset = painters(True)
44
+ self.assertEqual(dataset.adjacency.shape, (14, 14))
45
+ self.assertEqual(len(dataset.names), 14)
46
46
 
47
- graph = art_philo_science(True)
48
- self.assertEqual(graph.adjacency.shape, (30, 30))
49
- self.assertEqual(len(graph.names), 30)
47
+ dataset = art_philo_science(True)
48
+ self.assertEqual(dataset.adjacency.shape, (30, 30))
49
+ self.assertEqual(len(dataset.names), 30)
50
50
 
51
51
  def test_bipartite(self):
52
- graph = star_wars(True)
53
- self.assertEqual(graph.biadjacency.shape, (4, 3))
54
- self.assertEqual(len(graph.names), 4)
55
- self.assertEqual(len(graph.names_col), 3)
56
-
57
- graph = movie_actor(True)
58
- self.assertEqual(graph.biadjacency.shape, (15, 16))
59
- self.assertEqual(len(graph.names), 15)
60
- self.assertEqual(len(graph.names_col), 16)
61
-
62
- graph = hourglass(True)
63
- self.assertEqual(graph.biadjacency.shape, (2, 2))
64
-
65
- graph = art_philo_science(True)
66
- self.assertEqual(graph.biadjacency.shape, (30, 11))
67
- self.assertEqual(len(graph.names), 30)
68
- self.assertEqual(len(graph.names_col), 11)
52
+ dataset = star_wars(True)
53
+ self.assertEqual(dataset.biadjacency.shape, (4, 3))
54
+ self.assertEqual(len(dataset.names), 4)
55
+ self.assertEqual(len(dataset.names_col), 3)
56
+
57
+ dataset = movie_actor(True)
58
+ self.assertEqual(dataset.biadjacency.shape, (15, 17))
59
+ self.assertEqual(len(dataset.names), 15)
60
+ self.assertEqual(len(dataset.names_col), 17)
61
+
62
+ dataset = hourglass(True)
63
+ self.assertEqual(dataset.biadjacency.shape, (2, 2))
64
+
65
+ dataset = art_philo_science(True)
66
+ self.assertEqual(dataset.biadjacency.shape, (30, 11))
67
+ self.assertEqual(len(dataset.names), 30)
68
+ self.assertEqual(len(dataset.names_col), 11)