partition-sknetwork 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- partition_sknetwork-0.0.1/LICENSE +21 -0
- partition_sknetwork-0.0.1/PKG-INFO +79 -0
- partition_sknetwork-0.0.1/README.md +61 -0
- partition_sknetwork-0.0.1/partition_sknetwork/__init__.py +5 -0
- partition_sknetwork-0.0.1/partition_sknetwork/partition_sknetwork.py +217 -0
- partition_sknetwork-0.0.1/partition_sknetwork.egg-info/PKG-INFO +79 -0
- partition_sknetwork-0.0.1/partition_sknetwork.egg-info/SOURCES.txt +10 -0
- partition_sknetwork-0.0.1/partition_sknetwork.egg-info/dependency_links.txt +1 -0
- partition_sknetwork-0.0.1/partition_sknetwork.egg-info/requires.txt +3 -0
- partition_sknetwork-0.0.1/partition_sknetwork.egg-info/top_level.txt +1 -0
- partition_sknetwork-0.0.1/pyproject.toml +29 -0
- partition_sknetwork-0.0.1/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Ryan DeWolfe
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: partition_sknetwork
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Ensemble Clustering for Graphs (ECG) and Graph aware measurse (GAM) for sknetwork.
|
|
5
|
+
Author-email: Ryan DeWolfe <ryandewolfe33@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ftheberge/graph-partition-and-measures
|
|
8
|
+
Project-URL: Issues, https://github.com/ftheberge/graph-partition-and-measures/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: numpy>=2.0
|
|
15
|
+
Requires-Dist: scikit-network>=0.33
|
|
16
|
+
Requires-Dist: numba>=0.60.0
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# Graph Partition and Measures
|
|
20
|
+
|
|
21
|
+
Python code implementing 11 graph-aware measures (gam) for comparing graph partitions as well as a stable ensemble-based graph partition algorithm (ecg). This verion works with the sknetwork package. Versions for networkx and igraph are also available: partition-networkx, partition-igraph.
|
|
22
|
+
|
|
23
|
+
## Graph aware measures (gam)
|
|
24
|
+
|
|
25
|
+
The measures are respectively:
|
|
26
|
+
* 'rand': the RAND index
|
|
27
|
+
* 'jaccard': the Jaccard index
|
|
28
|
+
* 'mn': pairwise similarity normalized with the mean function
|
|
29
|
+
* 'gmn': pairwise similarity normalized with the geometric mean function
|
|
30
|
+
* 'min': pairwise similarity normalized with the minimum function
|
|
31
|
+
* 'max': pairwise similarity normalized with the maximum function
|
|
32
|
+
|
|
33
|
+
Each measure can be adjusted (recommended) or not, except for 'jaccard'.
|
|
34
|
+
Details can be found in:
|
|
35
|
+
|
|
36
|
+
Valérie Poulin and François Théberge, "Comparing Graph Clusterings: Set Partition Measures vs. Graph-aware Measures",
|
|
37
|
+
IEEE Transactions on Pattern Analysis and Machine Intelligence 43, 6 (2021) https://doi.org/10.1109/TPAMI.2020.3009862
|
|
38
|
+
|
|
39
|
+
## Ensemble clustering for graphs (ecg)
|
|
40
|
+
|
|
41
|
+
This is a good, stable graph partitioning algorithm. Details for ecg can be found in:
|
|
42
|
+
|
|
43
|
+
Valérie Poulin and François Théberge, "Ensemble clustering for graphs: comparisons and applications", Appl Netw Sci 4, 51 (2019).
|
|
44
|
+
https://doi.org/10.1007/s41109-019-0162-z
|
|
45
|
+
|
|
46
|
+
# Example
|
|
47
|
+
|
|
48
|
+
We need to import the supplied Python file partition_igraph.
|
|
49
|
+
|
|
50
|
+
```pyhon
|
|
51
|
+
import sknetwork as sn
|
|
52
|
+
import partition_sknetwork as ps
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Next, let's build a graph with communities.
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
block_sizes = [100 for _ in range(10)]
|
|
59
|
+
g = sn.data.models.block_model(block_sizes, 0.1, 0.025, seed=42)
|
|
60
|
+
|
|
61
|
+
# Store the ground truth communities
|
|
62
|
+
labels = np.array([i for i,block_size in enumerate(block_sizes) for _ in range(block_size)])
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Run Louvain and ecg:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
louvain = sn.clustering.Louvain(shuffle_nodes=True, random_state=42).fit_predict(g)
|
|
69
|
+
ecg = ps.ECG(random_state=42).fit_predict(g)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Finally, we show a few examples of measures we can compute with gam:
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
print('Adjusted Graph-Aware Rand Index for Louvain:',ps.gam(g, labels, louvain))
|
|
76
|
+
print('Adjusted Graph-Aware Rand Index for ECG:',ps.gam(g, labels, ecg))
|
|
77
|
+
print('\nJaccard Graph-Aware for Louvain:',ps.gam(g, labels, louvain, method="jaccard", adjusted=False))
|
|
78
|
+
print('Jaccard Graph-Aware for ECG:',ps.gam(g, labels, ecg, method="jaccard", adjusted=False))
|
|
79
|
+
```
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Graph Partition and Measures
|
|
2
|
+
|
|
3
|
+
Python code implementing 11 graph-aware measures (gam) for comparing graph partitions as well as a stable ensemble-based graph partition algorithm (ecg). This verion works with the sknetwork package. Versions for networkx and igraph are also available: partition-networkx, partition-igraph.
|
|
4
|
+
|
|
5
|
+
## Graph aware measures (gam)
|
|
6
|
+
|
|
7
|
+
The measures are respectively:
|
|
8
|
+
* 'rand': the RAND index
|
|
9
|
+
* 'jaccard': the Jaccard index
|
|
10
|
+
* 'mn': pairwise similarity normalized with the mean function
|
|
11
|
+
* 'gmn': pairwise similarity normalized with the geometric mean function
|
|
12
|
+
* 'min': pairwise similarity normalized with the minimum function
|
|
13
|
+
* 'max': pairwise similarity normalized with the maximum function
|
|
14
|
+
|
|
15
|
+
Each measure can be adjusted (recommended) or not, except for 'jaccard'.
|
|
16
|
+
Details can be found in:
|
|
17
|
+
|
|
18
|
+
Valérie Poulin and François Théberge, "Comparing Graph Clusterings: Set Partition Measures vs. Graph-aware Measures",
|
|
19
|
+
IEEE Transactions on Pattern Analysis and Machine Intelligence 43, 6 (2021) https://doi.org/10.1109/TPAMI.2020.3009862
|
|
20
|
+
|
|
21
|
+
## Ensemble clustering for graphs (ecg)
|
|
22
|
+
|
|
23
|
+
This is a good, stable graph partitioning algorithm. Details for ecg can be found in:
|
|
24
|
+
|
|
25
|
+
Valérie Poulin and François Théberge, "Ensemble clustering for graphs: comparisons and applications", Appl Netw Sci 4, 51 (2019).
|
|
26
|
+
https://doi.org/10.1007/s41109-019-0162-z
|
|
27
|
+
|
|
28
|
+
# Example
|
|
29
|
+
|
|
30
|
+
We need to import the supplied Python file partition_igraph.
|
|
31
|
+
|
|
32
|
+
```pyhon
|
|
33
|
+
import sknetwork as sn
|
|
34
|
+
import partition_sknetwork as ps
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Next, let's build a graph with communities.
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
block_sizes = [100 for _ in range(10)]
|
|
41
|
+
g = sn.data.models.block_model(block_sizes, 0.1, 0.025, seed=42)
|
|
42
|
+
|
|
43
|
+
# Store the ground truth communities
|
|
44
|
+
labels = np.array([i for i,block_size in enumerate(block_sizes) for _ in range(block_size)])
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Run Louvain and ecg:
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
louvain = sn.clustering.Louvain(shuffle_nodes=True, random_state=42).fit_predict(g)
|
|
51
|
+
ecg = ps.ECG(random_state=42).fit_predict(g)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Finally, we show a few examples of measures we can compute with gam:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
print('Adjusted Graph-Aware Rand Index for Louvain:',ps.gam(g, labels, louvain))
|
|
58
|
+
print('Adjusted Graph-Aware Rand Index for ECG:',ps.gam(g, labels, ecg))
|
|
59
|
+
print('\nJaccard Graph-Aware for Louvain:',ps.gam(g, labels, louvain, method="jaccard", adjusted=False))
|
|
60
|
+
print('Jaccard Graph-Aware for ECG:',ps.gam(g, labels, ecg, method="jaccard", adjusted=False))
|
|
61
|
+
```
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import numba
|
|
3
|
+
import scipy.sparse as sp
|
|
4
|
+
import sknetwork as sn
|
|
5
|
+
from sknetwork.clustering import BaseClustering, Louvain, Leiden
|
|
6
|
+
from sknetwork.utils.check import check_format, check_random_state, get_probs
|
|
7
|
+
from sknetwork.topology import get_core_decomposition
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@numba.njit
|
|
12
|
+
def _internal_edge(g_indptr, g_indices, partition):
|
|
13
|
+
is_internal_edge = np.empty(len(g_indices), dtype="bool")
|
|
14
|
+
for n1 in range(len(g_indptr)-1):
|
|
15
|
+
for data_offset, n2 in enumerate(g_indices[g_indptr[n1]:g_indptr[n1+1]]):
|
|
16
|
+
is_internal_edge[g_indptr[n1]+data_offset] = partition[n1] == partition[n2]
|
|
17
|
+
return is_internal_edge
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def gam(g, u, v, method="rand", adjusted=True):
|
|
21
|
+
"""
|
|
22
|
+
Compute one of 11 graph-aware measures to compare graph partitions.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
g: adjaceny matrix of the graph on which the partitions are defined.
|
|
27
|
+
|
|
28
|
+
u: Partiton of nodes. A numpy array of length n where u[i] = j means node i is in part j. Parts must be labeled 0-n_parts.
|
|
29
|
+
|
|
30
|
+
v: Partiton of nodes. A numpy array of length n where u[i] = j means node i is in part j. Parts must be labeled 0-n_parts.
|
|
31
|
+
|
|
32
|
+
method: 'str'
|
|
33
|
+
one of 'rand', 'jaccard', 'mn', 'gmn', 'min' or 'max'
|
|
34
|
+
|
|
35
|
+
adjusted: 'bool'
|
|
36
|
+
if True, return adjusted measure (preferred). All measures can be adjusted except 'jaccard'.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
float: A graph-aware similarity measure between vertex partitions u and v.
|
|
41
|
+
|
|
42
|
+
Examples
|
|
43
|
+
--------
|
|
44
|
+
>>> g = sn.data.karate_club()
|
|
45
|
+
>>> part1 = sn.clustering.Louvain().fit_predict(g)
|
|
46
|
+
>>> part2 = sn.clustering.PropagationClustering().fit_predict(g)
|
|
47
|
+
>>> print(gam(g, part1, part2))
|
|
48
|
+
|
|
49
|
+
Reference
|
|
50
|
+
---------
|
|
51
|
+
Valérie Poulin and François Théberge, "Comparing Graph Clusterings: Set Partition Measures vs. Graph-aware Measures",
|
|
52
|
+
IEEE Transactions on Pattern Analysis and Machine Intelligence 43, 6 (2021) https://doi.org/10.1109/TPAMI.2020.3009862
|
|
53
|
+
"""
|
|
54
|
+
g = sp.triu(g).tocsr()
|
|
55
|
+
bu = _internal_edge(g.indptr, g.indices, u)
|
|
56
|
+
bv = _internal_edge(g.indptr, g.indices, v)
|
|
57
|
+
su = np.sum(bu)
|
|
58
|
+
sv = np.sum(bv)
|
|
59
|
+
suv = np.sum(bu*bv)
|
|
60
|
+
m = len(bu)
|
|
61
|
+
## all adjusted measures
|
|
62
|
+
if adjusted:
|
|
63
|
+
if method=="jaccard":
|
|
64
|
+
raise ValueError("no adjusted jaccard measure, set adjusted=False")
|
|
65
|
+
elif method=="rand" or method=="mn":
|
|
66
|
+
return((suv-su*sv/m)/(np.average([su,sv])-su*sv/m))
|
|
67
|
+
elif method=="gmn":
|
|
68
|
+
return((suv-su*sv/m)/(np.sqrt(su*sv)-su*sv/m))
|
|
69
|
+
elif method=="min":
|
|
70
|
+
return((suv-su*sv/m)/(np.min([su,sv])-su*sv/m))
|
|
71
|
+
elif method=="max":
|
|
72
|
+
return((suv-su*sv/m)/(np.max([su,sv])-su*sv/m))
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(f"Method not found. Should be one of ['jaccard', 'rand', 'gmn', 'min', 'max']. Got {method}")
|
|
75
|
+
## all non-adjusted measures
|
|
76
|
+
else:
|
|
77
|
+
if method=="jaccard":
|
|
78
|
+
union_b = np.sum((bu+bv)>0)
|
|
79
|
+
return(suv/union_b)
|
|
80
|
+
elif method=="rand":
|
|
81
|
+
return(1-(su+sv)/m+2*suv/m)
|
|
82
|
+
elif method=="mn":
|
|
83
|
+
return(suv/np.average([su,sv]))
|
|
84
|
+
elif method=="gmn":
|
|
85
|
+
return(suv/np.sqrt(su*sv))
|
|
86
|
+
elif method=="min":
|
|
87
|
+
return(suv/np.min([su,sv]))
|
|
88
|
+
elif method=="max":
|
|
89
|
+
return(suv/np.max([su,sv]))
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError(f"Method not found. Should be one of ['jaccard', 'rand', 'gmn', 'min', 'max']. Got {method}")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@numba.njit
|
|
95
|
+
def _ecg_weights(g_indptr, g_indices, g_data, partitions):
|
|
96
|
+
for n1 in range(len(g_indptr)-1):
|
|
97
|
+
for data_offset, n2 in enumerate(g_indices[g_indptr[n1]:g_indptr[n1+1]]):
|
|
98
|
+
g_data[g_indptr[n1]+data_offset] = np.sum(partitions[n1, :] == partitions[n2, :])
|
|
99
|
+
return g_data
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ECG(BaseClustering):
|
|
103
|
+
"""
|
|
104
|
+
Stable ensemble-based graph clustering;
|
|
105
|
+
the ensemble consists of single-level randomized Louvain;
|
|
106
|
+
each member of the ensemble gets a "vote" to determine if the edges are intra-community or not;
|
|
107
|
+
the votes are aggregated into ECG edge-weights in range [0,1];
|
|
108
|
+
a final (full depth) Leiden is run using those edge weights;
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
resolution :
|
|
113
|
+
Resolution parameter.
|
|
114
|
+
ens_size :
|
|
115
|
+
Number of Louvain runs in the ensemble
|
|
116
|
+
min_weight :
|
|
117
|
+
Minimum edge weight
|
|
118
|
+
sort_clusters :
|
|
119
|
+
If ``True``, sort labels in decreasing order of cluster size.
|
|
120
|
+
return_probs :
|
|
121
|
+
If ``True``, return the probability distribution over clusters (soft clustering).
|
|
122
|
+
return_aggregate :
|
|
123
|
+
If ``True``, return the adjacency matrix of the graph between clusters.
|
|
124
|
+
random_state :
|
|
125
|
+
Random number generator or random seed. If None, numpy.random is used.
|
|
126
|
+
rng :
|
|
127
|
+
numpy Generator object to use. If passed random_state is not used
|
|
128
|
+
|
|
129
|
+
Attributes
|
|
130
|
+
----------
|
|
131
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
132
|
+
Label of each node.
|
|
133
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
134
|
+
Probability distribution over labels.
|
|
135
|
+
labels_row_, labels_col_ : np.ndarray
|
|
136
|
+
Labels of rows and columns, for bipartite graphs.
|
|
137
|
+
probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
138
|
+
Probability distributions over labels for rows and columns (for bipartite graphs).
|
|
139
|
+
aggregate_ : sparse.csr_matrix
|
|
140
|
+
Aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
141
|
+
|
|
142
|
+
Notes
|
|
143
|
+
-----
|
|
144
|
+
The ECG edge weight function is defined as:
|
|
145
|
+
min_weight + ( 1 - min_weight ) x (#votes_in_ensemble) / ens_size
|
|
146
|
+
Edges outside the 2-core are assigned 'min_weight'.
|
|
147
|
+
|
|
148
|
+
Example
|
|
149
|
+
--------
|
|
150
|
+
>>> g = sn.data.karate_club()
|
|
151
|
+
>>> part = ECG().fit_predict(g)
|
|
152
|
+
|
|
153
|
+
Reference
|
|
154
|
+
---------
|
|
155
|
+
Valérie Poulin and François Théberge, "Ensemble clustering for graphs: comparisons and applications",
|
|
156
|
+
Appl Netw Sci 4, 51 (2019). https://doi.org/10.1007/s41109-019-0162-z
|
|
157
|
+
"""
|
|
158
|
+
def __init__(
|
|
159
|
+
self,
|
|
160
|
+
ens_size:int=16,
|
|
161
|
+
min_weight:float=0.05,
|
|
162
|
+
final:str="leiden",
|
|
163
|
+
resolution:float=1.0,
|
|
164
|
+
sort_clusters:bool=True,
|
|
165
|
+
return_probs:bool=False,
|
|
166
|
+
random_state=None,
|
|
167
|
+
rng=None,
|
|
168
|
+
return_aggregate:bool=False
|
|
169
|
+
):
|
|
170
|
+
super(ECG, self).__init__(sort_clusters=sort_clusters, return_probs=return_probs, return_aggregate=return_aggregate)
|
|
171
|
+
if ens_size <= 0 or not float(ens_size).is_integer():
|
|
172
|
+
raise ValueError(f"ens_size must be a positive integer. Got {ens_size}")
|
|
173
|
+
self.ens_size = ens_size
|
|
174
|
+
if min_weight < 0:
|
|
175
|
+
raise ValueError(f"min_weight must be non-negative. Got {min_weight}")
|
|
176
|
+
self.min_weight = min_weight
|
|
177
|
+
if final not in ["louvain", "leiden"]:
|
|
178
|
+
raise ValueError(f"final must be one of 'louvain' or 'leiden'. Got {final}")
|
|
179
|
+
self.final = final
|
|
180
|
+
if resolution < 0:
|
|
181
|
+
raise ValueError(f"resolution must be non-negative. Got {resolution}")
|
|
182
|
+
self.resolution = resolution
|
|
183
|
+
if rng is not None:
|
|
184
|
+
self.rng = rng
|
|
185
|
+
elif random_state is not None:
|
|
186
|
+
self.rng = np.random.default_rng(random_state)
|
|
187
|
+
else:
|
|
188
|
+
self.rng = np.random.default_rng()
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def fit(self, g):
|
|
192
|
+
g = check_format(g)
|
|
193
|
+
# Stage one, compute weights
|
|
194
|
+
self.weights = g.copy().astype("float64")
|
|
195
|
+
partitions = np.empty((g.shape[0], self.ens_size), dtype="int32")
|
|
196
|
+
for i in range(self.ens_size):
|
|
197
|
+
louvain = Louvain(resolution=self.resolution, n_aggregations=0, shuffle_nodes=True, random_state=self.rng.choice(2**32))
|
|
198
|
+
partitions[:, i] = louvain.fit_predict(g)
|
|
199
|
+
_ecg_weights(self.weights.indptr, self.weights.indices, self.weights.data, partitions)
|
|
200
|
+
self.weights.data = self.weights.data/self.ens_size
|
|
201
|
+
self.weights.data = self.min_weight + (1-self.min_weight)*self.weights.data
|
|
202
|
+
# Force min_weight outside 2-core
|
|
203
|
+
core = get_core_decomposition(g)
|
|
204
|
+
for i, core_num in enumerate(core):
|
|
205
|
+
if core_num < 2:
|
|
206
|
+
self.weights.data[self.weights.indptr[i]:self.weights.indptr[i+1]] = self.min_weight
|
|
207
|
+
|
|
208
|
+
# Stage two, cluster weighted graph
|
|
209
|
+
if self.final == "louvain":
|
|
210
|
+
clusterer = Louvain(resolution=self.resolution, shuffle_nodes=True, sort_clusters=self.sort_clusters, random_state=self.rng.choice(2**32))
|
|
211
|
+
else:
|
|
212
|
+
clusterer = Leiden(resolution=self.resolution, shuffle_nodes=True, sort_clusters=self.sort_clusters, random_state=self.rng.choice(2**32))
|
|
213
|
+
|
|
214
|
+
self.labels_ = clusterer.fit_predict(self.weights)
|
|
215
|
+
self.CSI = 1 - np.mean(np.minimum(self.weights.data, 1-self.weights.data))
|
|
216
|
+
self._secondary_outputs(g)
|
|
217
|
+
return self
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: partition_sknetwork
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Ensemble Clustering for Graphs (ECG) and Graph aware measurse (GAM) for sknetwork.
|
|
5
|
+
Author-email: Ryan DeWolfe <ryandewolfe33@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ftheberge/graph-partition-and-measures
|
|
8
|
+
Project-URL: Issues, https://github.com/ftheberge/graph-partition-and-measures/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: numpy>=2.0
|
|
15
|
+
Requires-Dist: scikit-network>=0.33
|
|
16
|
+
Requires-Dist: numba>=0.60.0
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# Graph Partition and Measures
|
|
20
|
+
|
|
21
|
+
Python code implementing 11 graph-aware measures (gam) for comparing graph partitions as well as a stable ensemble-based graph partition algorithm (ecg). This verion works with the sknetwork package. Versions for networkx and igraph are also available: partition-networkx, partition-igraph.
|
|
22
|
+
|
|
23
|
+
## Graph aware measures (gam)
|
|
24
|
+
|
|
25
|
+
The measures are respectively:
|
|
26
|
+
* 'rand': the RAND index
|
|
27
|
+
* 'jaccard': the Jaccard index
|
|
28
|
+
* 'mn': pairwise similarity normalized with the mean function
|
|
29
|
+
* 'gmn': pairwise similarity normalized with the geometric mean function
|
|
30
|
+
* 'min': pairwise similarity normalized with the minimum function
|
|
31
|
+
* 'max': pairwise similarity normalized with the maximum function
|
|
32
|
+
|
|
33
|
+
Each measure can be adjusted (recommended) or not, except for 'jaccard'.
|
|
34
|
+
Details can be found in:
|
|
35
|
+
|
|
36
|
+
Valérie Poulin and François Théberge, "Comparing Graph Clusterings: Set Partition Measures vs. Graph-aware Measures",
|
|
37
|
+
IEEE Transactions on Pattern Analysis and Machine Intelligence 43, 6 (2021) https://doi.org/10.1109/TPAMI.2020.3009862
|
|
38
|
+
|
|
39
|
+
## Ensemble clustering for graphs (ecg)
|
|
40
|
+
|
|
41
|
+
This is a good, stable graph partitioning algorithm. Details for ecg can be found in:
|
|
42
|
+
|
|
43
|
+
Valérie Poulin and François Théberge, "Ensemble clustering for graphs: comparisons and applications", Appl Netw Sci 4, 51 (2019).
|
|
44
|
+
https://doi.org/10.1007/s41109-019-0162-z
|
|
45
|
+
|
|
46
|
+
# Example
|
|
47
|
+
|
|
48
|
+
We need to import the supplied Python file partition_igraph.
|
|
49
|
+
|
|
50
|
+
```pyhon
|
|
51
|
+
import sknetwork as sn
|
|
52
|
+
import partition_sknetwork as ps
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Next, let's build a graph with communities.
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
block_sizes = [100 for _ in range(10)]
|
|
59
|
+
g = sn.data.models.block_model(block_sizes, 0.1, 0.025, seed=42)
|
|
60
|
+
|
|
61
|
+
# Store the ground truth communities
|
|
62
|
+
labels = np.array([i for i,block_size in enumerate(block_sizes) for _ in range(block_size)])
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Run Louvain and ecg:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
louvain = sn.clustering.Louvain(shuffle_nodes=True, random_state=42).fit_predict(g)
|
|
69
|
+
ecg = ps.ECG(random_state=42).fit_predict(g)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Finally, we show a few examples of measures we can compute with gam:
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
print('Adjusted Graph-Aware Rand Index for Louvain:',ps.gam(g, labels, louvain))
|
|
76
|
+
print('Adjusted Graph-Aware Rand Index for ECG:',ps.gam(g, labels, ecg))
|
|
77
|
+
print('\nJaccard Graph-Aware for Louvain:',ps.gam(g, labels, louvain, method="jaccard", adjusted=False))
|
|
78
|
+
print('Jaccard Graph-Aware for ECG:',ps.gam(g, labels, ecg, method="jaccard", adjusted=False))
|
|
79
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
partition_sknetwork/__init__.py
|
|
5
|
+
partition_sknetwork/partition_sknetwork.py
|
|
6
|
+
partition_sknetwork.egg-info/PKG-INFO
|
|
7
|
+
partition_sknetwork.egg-info/SOURCES.txt
|
|
8
|
+
partition_sknetwork.egg-info/dependency_links.txt
|
|
9
|
+
partition_sknetwork.egg-info/requires.txt
|
|
10
|
+
partition_sknetwork.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
partition_sknetwork
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools >= 77.0.3"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "partition_sknetwork"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Ryan DeWolfe", email="ryandewolfe33@gmail.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "Ensemble Clustering for Graphs (ECG) and Graph aware measurse (GAM) for sknetwork."
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"Operating System :: OS Independent",
|
|
15
|
+
]
|
|
16
|
+
readme = "README.md"
|
|
17
|
+
license = "MIT"
|
|
18
|
+
license-files = ["LICEN[CS]E*"]
|
|
19
|
+
|
|
20
|
+
requires-python = ">=3.10"
|
|
21
|
+
dependencies = [
|
|
22
|
+
"numpy >= 2.0",
|
|
23
|
+
"scikit-network >= 0.33",
|
|
24
|
+
"numba >= 0.60.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/ftheberge/graph-partition-and-measures"
|
|
29
|
+
Issues = "https://github.com/ftheberge/graph-partition-and-measures/issues"
|