egosplit-sknetwork 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Ryan DeWolfe
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 2.4
2
+ Name: egosplit_sknetwork
3
+ Version: 0.0.4
4
+ Summary: Fast python implementation of the egosplitting framework for overlapping clustering using sknetwork.
5
+ Author-email: Ryan DeWolfe <ryandewolfe33@gmail.com>
6
+ License-Expression: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: numpy>=2.0
13
+ Requires-Dist: scikit-network>=0.33
14
+ Requires-Dist: numba>=0.60.0
15
+ Dynamic: license-file
16
+
17
+ # EgoSplit-sknetwork
18
+
19
+ This package provides a fast and flexible implementation of the egosplitting community detection paradigm for detecting overlapping communities.
20
+ For details and motivation of the algorithm, please see the paper below.
21
+ The reference implementation is available [here](https://github.com/google-research/google-research/blob/master/graph_embedding/persona/persona.py).
22
+
23
+
24
+ > Alessandro Epasto, Silvio Lattanzi, and Renato Paes Leme. 2017. Ego-Splitting Framework: from Non-Overlapping to Overlapping Clusters. In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD '17). Association for Computing Machinery, New York, NY, USA, 145-154. https://doi.org/10.1145/3097983.3098054
25
+
26
+ # Installation
27
+
28
+ Currently you can install this package by cloning this repository and installing locally.
29
+ ```sh
30
+ git clone https://github.com/ryandewolfe33/egosplit-sknetwork.git
31
+ cd egosplit-sknetwork
32
+ pip install .
33
+ ```
34
+
35
+
36
+ # Example
37
+
38
+ ```python
39
+ import sknetwork as sn
40
+ from egosplit_sknetwork import EgoSplit
41
+
42
+ g = sn.data.toy_graphs.karate_club()
43
+ egosplit = EgoSplit()
44
+ labels = egosplit.fit_predict(g)
45
+ ```
46
+
47
+ By default the algorithm uses [Propagation Clustering](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html#sknetwork.clustering.PropagationClustering) for local clustering and [Leiden](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html#sknetwork.clustering.Leiden) for global clustering.
48
+ To pass other clustering algorithms to egosplit, they must be initialized in advace and passed as parameters.
49
+ The algorithm accepts any subclass of [sknetwork.clustering.BaseClustering](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html) for either local_clustering (used to cluster the egonets) or global_clustering (used to cluster the persona graph).
50
+
51
+ ```python
52
+ high_res_clusterer = sn.clustering.Louvain(resolution=5, random_state=42)
53
+ egosplit = EgoSplit(local_clustering='PC', global_clustering=high_res_clusterer)
54
+ egosplit.fit(g)
55
+ labels = egosplit.labels_
56
+ ```
57
+
58
+ Labels is a sparse matrix with dimensions (n_labels, n_vertices), where `labels[i,j] = True` if vertex j is in cluster i.
@@ -0,0 +1,42 @@
1
+ # EgoSplit-sknetwork
2
+
3
+ This package provides a fast and flexible implementation of the egosplitting community detection paradigm for detecting overlapping communities.
4
+ For details and motivation of the algorithm, please see the paper below.
5
+ The reference implementation is available [here](https://github.com/google-research/google-research/blob/master/graph_embedding/persona/persona.py).
6
+
7
+
8
+ > Alessandro Epasto, Silvio Lattanzi, and Renato Paes Leme. 2017. Ego-Splitting Framework: from Non-Overlapping to Overlapping Clusters. In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD '17). Association for Computing Machinery, New York, NY, USA, 145-154. https://doi.org/10.1145/3097983.3098054
9
+
10
+ # Installation
11
+
12
+ Currently you can install this package by cloning this repository and installing locally.
13
+ ```sh
14
+ git clone https://github.com/ryandewolfe33/egosplit-sknetwork.git
15
+ cd egosplit-sknetwork
16
+ pip install .
17
+ ```
18
+
19
+
20
+ # Example
21
+
22
+ ```python
23
+ import sknetwork as sn
24
+ from egosplit_sknetwork import EgoSplit
25
+
26
+ g = sn.data.toy_graphs.karate_club()
27
+ egosplit = EgoSplit()
28
+ labels = egosplit.fit_predict(g)
29
+ ```
30
+
31
+ By default the algorithm uses [Propagation Clustering](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html#sknetwork.clustering.PropagationClustering) for local clustering and [Leiden](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html#sknetwork.clustering.Leiden) for global clustering.
32
+ To pass other clustering algorithms to egosplit, they must be initialized in advace and passed as parameters.
33
+ The algorithm accepts any subclass of [sknetwork.clustering.BaseClustering](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html) for either local_clustering (used to cluster the egonets) or global_clustering (used to cluster the persona graph).
34
+
35
+ ```python
36
+ high_res_clusterer = sn.clustering.Louvain(resolution=5, random_state=42)
37
+ egosplit = EgoSplit(local_clustering='PC', global_clustering=high_res_clusterer)
38
+ egosplit.fit(g)
39
+ labels = egosplit.labels_
40
+ ```
41
+
42
+ Labels is a sparse matrix with dimensions (n_labels, n_vertices), where `labels[i,j] = True` if vertex j is in cluster i.
@@ -0,0 +1,4 @@
1
+ __version__ = "0.0.4"
2
+ from .egosplit_sknetwork_ import EgoSplit
3
+
4
+ __all__ = ["EgoSplit"]
@@ -0,0 +1,279 @@
1
+ import sknetwork as sn
2
+ import numpy as np
3
+ import scipy.sparse as sp
4
+ import numba
5
+ from numba.typed import List
6
+ from numba.types import int32
7
+ from tqdm import tqdm
8
+
9
+
10
+ class ConnectedComponents(sn.clustering.BaseClustering):
11
+ """
12
+ A helper class that allows connected components to behave like a clustering algorithm.
13
+ The clusters are the connected components of the input graph
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ sort_clusters: bool = True,
19
+ return_probs: bool = False,
20
+ return_aggregate: bool = False,
21
+ ):
22
+ super(ConnectedComponents, self).__init__(
23
+ sort_clusters=sort_clusters,
24
+ return_probs=return_probs,
25
+ return_aggregate=return_aggregate,
26
+ )
27
+
28
+ def fit(self, g):
29
+ self.labels_ = sp.csgraph.connected_components(g)[1]
30
+ return self
31
+
32
+
33
+ #################################
34
+ # Helper Functions for EgoSplit #
35
+ #################################
36
+
37
+
38
+ @numba.njit(nogil=True)
39
+ def _get_data(indptr, indices, data, i, j):
40
+ for index in range(indptr[i], indptr[i + 1]):
41
+ if indices[index] == j:
42
+ return data[index]
43
+ return -1
44
+
45
+
46
+ @numba.njit
47
+ def _make_neighbor_sets(indptr, indices):
48
+ neighbors = List(
49
+ [set(indices[indptr[i] : indptr[i + 1]]) for i in range(len(indptr) - 1)]
50
+ )
51
+ return neighbors
52
+
53
+
54
+ @numba.njit
55
+ def _make_egonet(i, neighbor_sets, global_indptr, global_indices, global_data):
56
+ egonet_nodes = neighbor_sets[i]
57
+ egonet_old_ids = np.empty(len(egonet_nodes), dtype="int32")
58
+ for i, node in enumerate(egonet_nodes):
59
+ egonet_old_ids[i] = node
60
+ new_ids = {node: int32(index) for index, node in enumerate(egonet_old_ids)}
61
+ lil = List.empty_list(int32[:])
62
+ for node in egonet_old_ids:
63
+ egonet_neighbors_old_ids = neighbor_sets[node].intersection(egonet_nodes)
64
+ egonet_neighbors_new_ids = np.empty(
65
+ len(egonet_neighbors_old_ids), dtype="int32"
66
+ )
67
+ for i, old_id in enumerate(egonet_neighbors_old_ids):
68
+ egonet_neighbors_new_ids[i] = new_ids[old_id]
69
+ egonet_neighbors_new_ids.sort()
70
+ lil.append(egonet_neighbors_new_ids)
71
+ indptr = np.empty(len(lil) + 1, dtype="int32")
72
+ indptr[0] = 0
73
+ n_edges = 0
74
+ for i in lil:
75
+ n_edges += len(i)
76
+ indices = np.empty(n_edges, dtype="int32")
77
+ data = np.empty_like(indices, dtype=global_data.dtype)
78
+ for i, neighbors in enumerate(lil):
79
+ indptr[i + 1] = indptr[i] + len(neighbors)
80
+ indices[indptr[i] : indptr[i + 1]] = neighbors
81
+
82
+ i_old_id = egonet_old_ids[i]
83
+ neighbors_old_ids = np.empty_like(neighbors)
84
+ for j, n in enumerate(neighbors):
85
+ neighbors_old_ids[j] = egonet_old_ids[n]
86
+ for j, index in enumerate(range(indptr[i], indptr[i + 1])):
87
+ data[index] = _get_data(
88
+ global_indptr,
89
+ global_indices,
90
+ global_data,
91
+ i_old_id,
92
+ neighbors_old_ids[j],
93
+ )
94
+ return indptr, indices, data
95
+
96
+
97
+ @numba.njit
98
+ def make_persona_graph(
99
+ g_indptr, g_indices, g_data, egonet_indices, egonet_community, first_personae_index
100
+ ):
101
+ persona_indptr = np.empty(first_personae_index[-1] + 1, dtype="int32")
102
+ persona_indptr[-1] = len(g_indices)
103
+ persona_indices = np.empty_like(g_indices)
104
+ persona_data = np.empty_like(g_data)
105
+
106
+ next_index = 0
107
+ for og_n1 in range(len(g_indptr) - 1):
108
+ og_neighbors = egonet_indices[og_n1]
109
+ communities = egonet_community[og_n1]
110
+
111
+ for c in range(np.max(communities) + 1):
112
+ new_n1 = first_personae_index[og_n1] + c
113
+ new_n1_indptr = next_index
114
+ persona_indptr[new_n1] = new_n1_indptr
115
+ for i in range(len(communities)):
116
+ if communities[i] != c:
117
+ continue
118
+ og_n2 = og_neighbors[i]
119
+ # Get new id of the other end of the edge (og_n1, og_n2)
120
+ og_n2_neighbors = egonet_indices[og_n2]
121
+ # search for og_n1
122
+ for j in range(len(og_n2_neighbors)):
123
+ if og_n2_neighbors[j] != og_n1:
124
+ continue
125
+ # Get the egonet commuity of og_n1
126
+ n2_persona_for_n1 = egonet_community[og_n2][j]
127
+ new_n2 = first_personae_index[og_n2] + n2_persona_for_n1
128
+ # write new n2_persona and data into persona graph
129
+ persona_indices[next_index] = new_n2
130
+ persona_data[next_index] = g_data[g_indptr[og_n1]] + i
131
+ next_index += 1
132
+ return persona_data, persona_indices, persona_indptr
133
+
134
+
135
+ class EgoSplit:
136
+ """
137
+ Implementation of the Egosplitting framework method for overlapping clustering using
138
+ sknetwork. Since sknetwork does not allow overlapping clusterings, this is not a
139
+ subclass of the sknetwork.clustering.BaseClustering, but it is built to behave similarly.
140
+
141
+ Parameters
142
+ ----------
143
+ local_clustering: The clustering method used for the egonet. Should be either "CC"
144
+ (ConnectedCompnents), "PC" (PropagationClustering), or a subclass of
145
+ sknetwork.clustering.BaseClustering.
146
+ global_clustering: The clustering method used for the persona graph. Should
147
+ be either "Louvain", "Leiden", or a subclass of sknetwork.clustering.BaseClustering.
148
+ random_state: The random state to pass to the default clustering algorithms
149
+
150
+ Returns
151
+ -------
152
+ scipy.sparse.csr_matrix: An overlapping clustering of the nodes. Rows correspond to clusters
153
+ and columns to nodes.
154
+
155
+ Example
156
+ -------
157
+ >>> g = sn.data.karate_club()
158
+ >>> part1 = EgoSplit().fit_predict(g)
159
+
160
+ Reference
161
+ ---------
162
+ Alessandro Epasto, Silvio Lattanzi, and Renato Paes Leme. 2017. Ego-Splitting Framework:
163
+ from Non-Overlapping to Overlapping Clusters. In Proceedings of the 23rd ACM SIGKDD
164
+ International Conference on Knowledge Discovery and Data Mining (KDD '17). Association
165
+ for Computing Machinery, New York, NY, USA, 145-154. https://doi.org/10.1145/3097983.3098054
166
+ """
167
+
168
+ def __init__(
169
+ self,
170
+ local_clustering="PC",
171
+ global_clustering="Leiden",
172
+ min_cluster_size=5,
173
+ random_state=None,
174
+ verbose=False,
175
+ ):
176
+ if local_clustering == "CC":
177
+ self.local_clustering_ = ConnectedComponents()
178
+ elif local_clustering == "PC":
179
+ self.local_clustering_ = sn.clustering.PropagationClustering()
180
+ elif issubclass(type(local_clustering), sn.clustering.BaseClustering):
181
+ self.local_clustering_ = local_clustering
182
+ else:
183
+ raise ValueError(
184
+ f"local_clustering should be either 'CC' or 'PC', or a subclass of sknetwork.clustering.BaseClustering. Got {type(local_clustering)}"
185
+ )
186
+
187
+ if global_clustering == "Leiden":
188
+ self.global_clustering_ = sn.clustering.Leiden(random_state=random_state)
189
+ elif global_clustering == "Louvain":
190
+ self.global_clustering_ = sn.clustering.Louvain(random_state=random_state)
191
+ elif global_clustering == "PC":
192
+ self.global_clustering_ = sn.clustering.PropagationClustering()
193
+ elif issubclass(type(global_clustering), sn.clustering.BaseClustering):
194
+ self.global_clustering_ = global_clustering
195
+ else:
196
+ raise ValueError(
197
+ f"global_clustering should be in ['Louvain', 'Leiden', 'PC'] or a subclass of sknetwork.clustering.BaseClustering. Got {type(global_clustering)}"
198
+ )
199
+
200
+ self.min_cluster_size = min_cluster_size
201
+ if not isinstance(self.min_cluster_size, int):
202
+ if self.max_rounds % 1 != 0:
203
+ raise ValueError("min_cluster_size must be a whole number")
204
+ try:
205
+ # convert other types of int to python int
206
+ self.min_cluster_size = int(self.min_cluster_size)
207
+ except ValueError:
208
+ raise ValueError("min_cluster_size must be an int")
209
+ if self.min_cluster_size < 0:
210
+ raise ValueError("min_cluster_size must be non-negative")
211
+ self.verbose = verbose
212
+
213
+ def fit(self, g):
214
+ egonet_indices = [] # Store the original indices of the egonet
215
+ egonet_community = [] # Store the community labels of the ego nets
216
+ self.first_personae_index_ = np.empty(
217
+ g.shape[0] + 1, dtype="int32"
218
+ ) # Store the first index for a nodes new personae.
219
+ # The new personae of node i will be stored in rows
220
+ # first_personae_index[i], first_personae_index[i]+1, ... , first_personae_index[i+1]-1.
221
+ next_index = 0
222
+ neighbor_sets = _make_neighbor_sets(g.indptr, g.indices)
223
+ print("Making Egonets") if self.verbose else None
224
+ for node in tqdm(range(g.shape[0]), disable=not self.verbose):
225
+ neighbors = g.indices[g.indptr[node] : g.indptr[node + 1]]
226
+ egonet_indices.append(neighbors)
227
+ indptr, indices, data = _make_egonet(
228
+ node, neighbor_sets, g.indptr, g.indices, g.data
229
+ )
230
+ egonet = sp.csr_matrix(
231
+ (data, indices, indptr), shape=(len(neighbors), len(neighbors))
232
+ )
233
+ if (
234
+ len(egonet.data) == 0
235
+ ): # egonet has no edges, each node is its own cluster
236
+ persona_map = sp.csgraph.connected_components(egonet)[1]
237
+ else:
238
+ persona_map = self.local_clustering_.fit_predict(egonet).astype("int32")
239
+ egonet_community.append(persona_map)
240
+ self.first_personae_index_[node] = next_index
241
+ next_index += np.max(persona_map) + 1
242
+
243
+ self.first_personae_index_[-1] = next_index
244
+ ei = List(egonet_indices)
245
+ ec = List(egonet_community)
246
+ print("Making Persona Graph") if self.verbose else None
247
+ persona_graph_data = make_persona_graph(
248
+ g.indptr, g.indices, g.data, ei, ec, self.first_personae_index_
249
+ )
250
+ self.persona_graph_ = sp.csr_matrix(
251
+ persona_graph_data,
252
+ shape=(self.first_personae_index_[-1], self.first_personae_index_[-1]),
253
+ )
254
+ print("Clustering Persona Graph") if self.verbose else None
255
+ self.persona_clusters_ = self.global_clustering_.fit_predict(
256
+ self.persona_graph_
257
+ )
258
+ print("Mapping Clusters") if self.verbose else None
259
+ n_clusters = np.max(self.persona_clusters_) + 1
260
+ clusters = sp.lil_matrix((g.shape[0], n_clusters), dtype="bool")
261
+ for node in tqdm(range(g.shape[0]), disable=not self.verbose):
262
+ node_clusters = np.unique(
263
+ self.persona_clusters_[
264
+ self.first_personae_index_[node] : self.first_personae_index_[
265
+ node + 1
266
+ ]
267
+ ]
268
+ )
269
+ clusters[node, node_clusters] = True
270
+ clusters = clusters.tocsc().transpose()
271
+ if self.min_cluster_size > 0:
272
+ clusters = clusters[clusters.getnnz(1) >= self.min_cluster_size]
273
+
274
+ self.labels_ = clusters
275
+ return self
276
+
277
+ def fit_predict(self, g):
278
+ self.fit(g)
279
+ return self.labels_
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 2.4
2
+ Name: egosplit_sknetwork
3
+ Version: 0.0.4
4
+ Summary: Fast python implementation of the egosplitting framework for overlapping clustering using sknetwork.
5
+ Author-email: Ryan DeWolfe <ryandewolfe33@gmail.com>
6
+ License-Expression: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: numpy>=2.0
13
+ Requires-Dist: scikit-network>=0.33
14
+ Requires-Dist: numba>=0.60.0
15
+ Dynamic: license-file
16
+
17
+ # EgoSplit-sknetwork
18
+
19
+ This package provides a fast and flexible implementation of the egosplitting community detection paradigm for detecting overlapping communities.
20
+ For details and motivation of the algorithm, please see the paper below.
21
+ The reference implementation is available [here](https://github.com/google-research/google-research/blob/master/graph_embedding/persona/persona.py).
22
+
23
+
24
+ > Alessandro Epasto, Silvio Lattanzi, and Renato Paes Leme. 2017. Ego-Splitting Framework: from Non-Overlapping to Overlapping Clusters. In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD '17). Association for Computing Machinery, New York, NY, USA, 145-154. https://doi.org/10.1145/3097983.3098054
25
+
26
+ # Installation
27
+
28
+ Currently you can install this package by cloning this repository and installing locally.
29
+ ```sh
30
+ git clone https://github.com/ryandewolfe33/egosplit-sknetwork.git
31
+ cd egosplit-sknetwork
32
+ pip install .
33
+ ```
34
+
35
+
36
+ # Example
37
+
38
+ ```python
39
+ import sknetwork as sn
40
+ from egosplit_sknetwork import EgoSplit
41
+
42
+ g = sn.data.toy_graphs.karate_club()
43
+ egosplit = EgoSplit()
44
+ labels = egosplit.fit_predict(g)
45
+ ```
46
+
47
+ By default the algorithm uses [Propagation Clustering](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html#sknetwork.clustering.PropagationClustering) for local clustering and [Leiden](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html#sknetwork.clustering.Leiden) for global clustering.
48
+ To pass other clustering algorithms to egosplit, they must be initialized in advace and passed as parameters.
49
+ The algorithm accepts any subclass of [sknetwork.clustering.BaseClustering](https://scikit-network.readthedocs.io/en/latest/reference/clustering.html) for either local_clustering (used to cluster the egonets) or global_clustering (used to cluster the persona graph).
50
+
51
+ ```python
52
+ high_res_clusterer = sn.clustering.Louvain(resolution=5, random_state=42)
53
+ egosplit = EgoSplit(local_clustering='PC', global_clustering=high_res_clusterer)
54
+ egosplit.fit(g)
55
+ labels = egosplit.labels_
56
+ ```
57
+
58
+ Labels is a sparse matrix with dimensions (n_labels, n_vertices), where `labels[i,j] = True` if vertex j is in cluster i.
@@ -0,0 +1,11 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ egosplit_sknetwork/__init__.py
5
+ egosplit_sknetwork/egosplit_sknetwork_.py
6
+ egosplit_sknetwork.egg-info/PKG-INFO
7
+ egosplit_sknetwork.egg-info/SOURCES.txt
8
+ egosplit_sknetwork.egg-info/dependency_links.txt
9
+ egosplit_sknetwork.egg-info/requires.txt
10
+ egosplit_sknetwork.egg-info/top_level.txt
11
+ tests/test_egosplit_sknetwork.py
@@ -0,0 +1,3 @@
1
+ numpy>=2.0
2
+ scikit-network>=0.33
3
+ numba>=0.60.0
@@ -0,0 +1 @@
1
+ egosplit_sknetwork
@@ -0,0 +1,25 @@
1
+ [build-system]
2
+ requires = ["setuptools >= 77.0.3"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "egosplit_sknetwork"
7
+ version = "0.0.4"
8
+ authors = [
9
+ { name="Ryan DeWolfe", email="ryandewolfe33@gmail.com" },
10
+ ]
11
+ description = "Fast python implementation of the egosplitting framework for overlapping clustering using sknetwork."
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3",
14
+ "Operating System :: OS Independent",
15
+ ]
16
+ readme = "README.md"
17
+ license = "MIT"
18
+ license-files = ["LICEN[CS]E*"]
19
+
20
+ requires-python = ">=3.10"
21
+ dependencies = [
22
+ "numpy >= 2.0",
23
+ "scikit-network >= 0.33",
24
+ "numba >= 0.60.0",
25
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,101 @@
1
+ import numpy as np
2
+ import scipy.sparse as sp
3
+ import sknetwork as sn
4
+ import pytest
5
+
6
+ import egosplit_sknetwork as esn
7
+
8
+
9
+ @pytest.fixture
10
+ def karate():
11
+ return sn.data.toy_graphs.karate_club()
12
+
13
+
14
+ def test_make_egonet():
15
+ g = sp.csr_matrix(
16
+ [
17
+ [0, 1, 1, 1, 0],
18
+ [1, 0, 1, 1, 1],
19
+ [1, 1, 0, 0, 0],
20
+ [0, 1, 0, 0, 1],
21
+ [0, 1, 0, 1, 0],
22
+ ]
23
+ )
24
+
25
+ true_egonet = sp.csr_matrix(
26
+ [
27
+ [0, 1, 1],
28
+ [1, 0, 0],
29
+ [1, 0, 0],
30
+ ]
31
+ )
32
+
33
+ neighbor_sets = esn.egosplit_sknetwork_._make_neighbor_sets(g.indptr, g.indices)
34
+ neighbors = neighbor_sets[0]
35
+ indptr, indices, data = esn.egosplit_sknetwork_._make_egonet(
36
+ 0, neighbor_sets, g.indptr, g.indices, g.data
37
+ )
38
+ egonet = sp.csr_matrix(
39
+ (data, indices, indptr), shape=(len(neighbors), len(neighbors))
40
+ )
41
+
42
+ assert egonet.shape == true_egonet.shape
43
+ assert (egonet - true_egonet).nnz == 0
44
+
45
+
46
+ def test_make_egonet_weighted():
47
+ g = sp.csr_matrix(
48
+ [
49
+ [0, 0.5, 0.5, 0.5, 0],
50
+ [0.5, 0, 0.5, 0.5, 0.5],
51
+ [0.5, 0.5, 0, 0, 0],
52
+ [0, 0.5, 0, 0, 0.5],
53
+ [0, 0.5, 0, 0.5, 0],
54
+ ]
55
+ )
56
+
57
+ true_egonet = sp.csr_matrix(
58
+ [
59
+ [0, 0.5, 0.5],
60
+ [0.5, 0, 0],
61
+ [0.5, 0, 0],
62
+ ]
63
+ )
64
+
65
+ neighbor_sets = esn.egosplit_sknetwork_._make_neighbor_sets(g.indptr, g.indices)
66
+ neighbors = neighbor_sets[0]
67
+ indptr, indices, data = esn.egosplit_sknetwork_._make_egonet(
68
+ 0, neighbor_sets, g.indptr, g.indices, g.data
69
+ )
70
+ egonet = sp.csr_matrix(
71
+ (data, indices, indptr), shape=(len(neighbors), len(neighbors))
72
+ )
73
+
74
+ assert egonet.shape == true_egonet.shape
75
+ assert (egonet - true_egonet).nnz == 0
76
+
77
+
78
+ @pytest.mark.parametrize(
79
+ "local_alg", ["CC", "PC", sn.clustering.Louvain(random_state=42)]
80
+ )
81
+ @pytest.mark.parametrize(
82
+ "global_alg", ["Louvain", "Leiden", "PC", sn.clustering.KCenters(3)]
83
+ )
84
+ def test_egosplit(karate, local_alg, global_alg):
85
+ egosplit = esn.EgoSplit(
86
+ local_clustering=local_alg, global_clustering=global_alg, random_state=42
87
+ )
88
+ labels = egosplit.fit_predict(karate)
89
+ assert isinstance(labels, sp.csr_matrix)
90
+ assert labels.shape[1] == karate.shape[0]
91
+ assert labels.shape[0] > 0
92
+
93
+
94
+ def test_egosplit_weighted(karate):
95
+ rng = np.random.default_rng(seed=42)
96
+ karate.data = rng.random(len(karate.data)) + 0.1 # so no 0
97
+ egosplit = esn.EgoSplit()
98
+ labels = egosplit.fit_predict(karate)
99
+ assert isinstance(labels, sp.csr_matrix)
100
+ assert labels.shape[1] == karate.shape[0]
101
+ assert labels.shape[0] > 0