modularitypruning 1.3.6__tar.gz → 1.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/PKG-INFO +4 -1
  2. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/README.md +2 -0
  3. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/modularitypruning.egg-info/PKG-INFO +4 -1
  4. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/modularitypruning.egg-info/SOURCES.txt +2 -0
  5. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/modularitypruning.egg-info/requires.txt +1 -0
  6. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/setup.py +4 -3
  7. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/tests/test_champ_coefficients_2D.py +6 -4
  8. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/tests/test_champ_coefficients_3D.py +5 -3
  9. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/tests/test_champ_usage_2D.py +6 -4
  10. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/tests/test_champ_usage_3D.py +6 -4
  11. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/tests/test_deprecated_louvain_names.py +2 -1
  12. modularitypruning-1.4.1/tests/test_documentation_examples.py +266 -0
  13. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/tests/test_monolayer_parameter_estimation.py +8 -6
  14. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/tests/test_multiplex_parameter_estimation.py +12 -10
  15. modularitypruning-1.4.1/tests/test_parallel_leiden_performance.py +149 -0
  16. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/tests/test_temporal_multilevel_parameter_estimation.py +8 -6
  17. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/champ_utilities.py +8 -6
  18. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/leiden_utilities.py +38 -74
  19. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/louvain_utilities.py +6 -4
  20. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/parameter_estimation.py +2 -1
  21. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/parameter_estimation_utilities.py +29 -9
  22. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/partition_utilities.py +1 -0
  23. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/plotting.py +9 -7
  24. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/LICENSE +0 -0
  25. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/modularitypruning.egg-info/dependency_links.txt +0 -0
  26. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/modularitypruning.egg-info/top_level.txt +0 -0
  27. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/setup.cfg +0 -0
  28. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/__init__.py +0 -0
  29. {modularitypruning-1.3.6 → modularitypruning-1.4.1}/utilities/progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: modularitypruning
3
- Version: 1.3.6
3
+ Version: 1.4.1
4
4
  Summary: Pruning tool to identify small subsets of network partitions that are significant from the perspective of stochastic block model inference.
5
5
  Home-page: https://github.com/ragibson/ModularityPruning
6
6
  Author: Ryan Gibson
@@ -26,6 +26,7 @@ Requires-Dist: igraph
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: scipy>=1.7
28
28
  Requires-Dist: seaborn
29
+ Requires-Dist: tqdm
29
30
 
30
31
  # ModularityPruning
31
32
 
@@ -46,6 +47,7 @@ https://static-content.springer.com/esm/art%3A10.1038%2Fs41598-022-20142-6/Media
46
47
  ).
47
48
 
48
49
  ## Installation
50
+
49
51
  This project is on [PyPI](https://pypi.org/project/modularitypruning/) and can
50
52
  be installed with
51
53
 
@@ -60,6 +62,7 @@ Alternatively, you can install it from this repository directly:
60
62
  python3 setup.py install
61
63
 
62
64
  <a name = "Basic Usage"></a>
65
+
63
66
  ## Basic Usage
64
67
 
65
68
  This package interfaces directly with python-igraph. A simple example of its
@@ -17,6 +17,7 @@ https://static-content.springer.com/esm/art%3A10.1038%2Fs41598-022-20142-6/Media
17
17
  ).
18
18
 
19
19
  ## Installation
20
+
20
21
  This project is on [PyPI](https://pypi.org/project/modularitypruning/) and can
21
22
  be installed with
22
23
 
@@ -31,6 +32,7 @@ Alternatively, you can install it from this repository directly:
31
32
  python3 setup.py install
32
33
 
33
34
  <a name = "Basic Usage"></a>
35
+
34
36
  ## Basic Usage
35
37
 
36
38
  This package interfaces directly with python-igraph. A simple example of its
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: modularitypruning
3
- Version: 1.3.6
3
+ Version: 1.4.1
4
4
  Summary: Pruning tool to identify small subsets of network partitions that are significant from the perspective of stochastic block model inference.
5
5
  Home-page: https://github.com/ragibson/ModularityPruning
6
6
  Author: Ryan Gibson
@@ -26,6 +26,7 @@ Requires-Dist: igraph
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: scipy>=1.7
28
28
  Requires-Dist: seaborn
29
+ Requires-Dist: tqdm
29
30
 
30
31
  # ModularityPruning
31
32
 
@@ -46,6 +47,7 @@ https://static-content.springer.com/esm/art%3A10.1038%2Fs41598-022-20142-6/Media
46
47
  ).
47
48
 
48
49
  ## Installation
50
+
49
51
  This project is on [PyPI](https://pypi.org/project/modularitypruning/) and can
50
52
  be installed with
51
53
 
@@ -60,6 +62,7 @@ Alternatively, you can install it from this repository directly:
60
62
  python3 setup.py install
61
63
 
62
64
  <a name = "Basic Usage"></a>
65
+
63
66
  ## Basic Usage
64
67
 
65
68
  This package interfaces directly with python-igraph. A simple example of its
@@ -11,8 +11,10 @@ tests/test_champ_coefficients_3D.py
11
11
  tests/test_champ_usage_2D.py
12
12
  tests/test_champ_usage_3D.py
13
13
  tests/test_deprecated_louvain_names.py
14
+ tests/test_documentation_examples.py
14
15
  tests/test_monolayer_parameter_estimation.py
15
16
  tests/test_multiplex_parameter_estimation.py
17
+ tests/test_parallel_leiden_performance.py
16
18
  tests/test_temporal_multilevel_parameter_estimation.py
17
19
  utilities/__init__.py
18
20
  utilities/champ_utilities.py
@@ -6,3 +6,4 @@ igraph
6
6
  scikit-learn
7
7
  scipy>=1.7
8
8
  seaborn
9
+ tqdm
@@ -1,6 +1,7 @@
1
- from setuptools import setup
2
1
  import os
3
2
 
3
+ from setuptools import setup
4
+
4
5
  here = os.path.abspath(os.path.dirname(__file__))
5
6
 
6
7
  # Get the long description from the README file
@@ -9,7 +10,7 @@ with open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
9
10
 
10
11
  setup(
11
12
  name='modularitypruning',
12
- version='1.3.6',
13
+ version='1.4.1',
13
14
  package_dir={'modularitypruning': 'utilities'},
14
15
  packages=['modularitypruning'],
15
16
  url='https://github.com/ragibson/ModularityPruning',
@@ -34,5 +35,5 @@ setup(
34
35
  ],
35
36
  python_requires='>=3.8, <4',
36
37
  install_requires=['leidenalg', 'matplotlib', "numpy", 'psutil', 'igraph',
37
- "scikit-learn", "scipy>=1.7", 'seaborn']
38
+ "scikit-learn", "scipy>=1.7", 'seaborn', 'tqdm']
38
39
  )
@@ -1,9 +1,11 @@
1
- from .shared_testing_functions import generate_connected_ER, generate_random_values, generate_random_partitions, \
2
- generate_igraph_famous
1
+ import unittest
2
+ from random import seed
3
+
3
4
  from modularitypruning.champ_utilities import partition_coefficients_2D
4
5
  from modularitypruning.leiden_utilities import leiden_part_with_membership, repeated_leiden_from_gammas
5
- from random import seed
6
- import unittest
6
+
7
+ from .shared_testing_functions import generate_connected_ER, generate_random_values, generate_random_partitions, \
8
+ generate_igraph_famous
7
9
 
8
10
 
9
11
  class TestCHAMPCoefficients2D(unittest.TestCase):
@@ -1,8 +1,10 @@
1
- from .shared_testing_functions import generate_connected_multilayer_ER, generate_random_partitions
1
+ import unittest
2
+ from random import seed
3
+
2
4
  from modularitypruning.champ_utilities import partition_coefficients_3D
3
5
  from modularitypruning.leiden_utilities import multilayer_leiden_part_with_membership, leiden_part_with_membership
4
- from random import seed
5
- import unittest
6
+
7
+ from .shared_testing_functions import generate_connected_multilayer_ER, generate_random_partitions
6
8
 
7
9
 
8
10
  class TestCHAMPCoefficients3D(unittest.TestCase):
@@ -1,9 +1,11 @@
1
- from .shared_testing_functions import generate_connected_ER, generate_random_values, generate_random_partitions, \
2
- generate_igraph_famous
1
+ import unittest
2
+ from random import seed
3
+
3
4
  from modularitypruning.champ_utilities import CHAMP_2D
4
5
  from modularitypruning.leiden_utilities import leiden_part_with_membership, repeated_leiden_from_gammas
5
- from random import seed
6
- import unittest
6
+
7
+ from .shared_testing_functions import generate_connected_ER, generate_random_values, generate_random_partitions, \
8
+ generate_igraph_famous
7
9
 
8
10
 
9
11
  class TestCHAMP2D(unittest.TestCase):
@@ -1,10 +1,12 @@
1
- from .shared_testing_functions import generate_connected_multilayer_ER, generate_random_values, \
2
- generate_random_partitions
1
+ import unittest
2
+ from random import seed
3
+
3
4
  from modularitypruning.champ_utilities import CHAMP_3D
4
5
  from modularitypruning.leiden_utilities import multilayer_leiden_part_with_membership
5
6
  from numpy import mean
6
- from random import seed
7
- import unittest
7
+
8
+ from .shared_testing_functions import generate_connected_multilayer_ER, generate_random_values, \
9
+ generate_random_partitions
8
10
 
9
11
 
10
12
  def point_is_inside_champ_domain(gamma, omega, domain_vertices):
@@ -1,6 +1,7 @@
1
- import pytest
2
1
  import unittest
3
2
 
3
+ import pytest
4
+
4
5
 
5
6
  class TestDeprecatedLouvainNames(unittest.TestCase):
6
7
  def test_deprecated_louvain_module(self):
@@ -0,0 +1,266 @@
1
+ """
2
+ This set of tests checks that the examples from the documentation still work correctly.
3
+
4
+ Sometimes this is simply checking that the code produces the intended output or runs without errors.
5
+ """
6
+ import unittest
7
+ from random import seed, random
8
+
9
+ import igraph as ig
10
+ import matplotlib.pyplot as plt
11
+ import numpy as np
12
+ from modularitypruning import prune_to_stable_partitions, prune_to_multilayer_stable_partitions
13
+ from modularitypruning.champ_utilities import CHAMP_2D, CHAMP_3D
14
+ from modularitypruning.leiden_utilities import (repeated_parallel_leiden_from_gammas,
15
+ repeated_parallel_leiden_from_gammas_omegas)
16
+ from modularitypruning.parameter_estimation_utilities import domains_to_gamma_omega_estimates, ranges_to_gamma_estimates
17
+ from modularitypruning.partition_utilities import num_communities
18
+ from modularitypruning.plotting import (plot_2d_domains_with_estimates, plot_2d_domains, plot_2d_domains_with_ami,
19
+ plot_2d_domains_with_num_communities, plot_estimates, plot_multiplex_community)
20
+
21
+
22
+ class TestDocumentationExamples(unittest.TestCase):
23
+ def test_basic_singlelayer_example(self):
24
+ """
25
+ Taken verbatim from basic_example.rst.
26
+
27
+ Like a lot of our other tests, this is stochastic but appears incredibly stable.
28
+ """
29
+ # get Karate Club graph in igraph
30
+ G = ig.Graph.Famous("Zachary")
31
+
32
+ # run leiden 1000 times on this graph from gamma=0 to gamma=2
33
+ partitions = repeated_parallel_leiden_from_gammas(G, np.linspace(0, 2, 1000))
34
+
35
+ # prune to the stable partitions from gamma=0 to gamma=2
36
+ stable_partitions = prune_to_stable_partitions(G, partitions, 0, 2)
37
+
38
+ intended_stable_partition = [(0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 1, 0, 0, 0, 2, 2, 1,
39
+ 0, 2, 0, 2, 0, 2, 3, 3, 3, 2, 3, 3, 2, 2, 3, 2, 2)]
40
+ self.assertEqual(stable_partitions, intended_stable_partition)
41
+
42
+ @staticmethod
43
+ def generate_basic_multilayer_network():
44
+ """This is taken verbatim from basic_multilayer_example.rst."""
45
+ num_layers = 3
46
+ n_per_layer = 30
47
+ p_in = 0.5
48
+ p_out = 0.05
49
+ K = 3
50
+
51
+ # layer_vec holds the layer membership of each node
52
+ # e.g. layer_vec[5] = 2 means that node 5 resides in layer 2 (the third layer)
53
+ layer_vec = [i // n_per_layer for i in range(n_per_layer * num_layers)]
54
+ interlayer_edges = [(n_per_layer * layer + v, n_per_layer * layer + v + n_per_layer)
55
+ for layer in range(num_layers - 1)
56
+ for v in range(n_per_layer)]
57
+
58
+ # set up a community vector with
59
+ # three communities in layer 0 (each of size 10)
60
+ # three communities in layer 1 (each of size 10)
61
+ # one community in layer 2 (of size 30)
62
+ comm_per_layer = [[i // (n_per_layer // K) if layer < num_layers - 1 else 0
63
+ for i in range(n_per_layer)] for layer in range(num_layers)]
64
+ comm_vec = [item for sublist in comm_per_layer for item in sublist]
65
+
66
+ # randomly connect nodes inside each layer with undirected edges according to
67
+ # within-community probability p_in and between-community probability p_out
68
+ intralayer_edges = [(u, v) for v in range(len(comm_vec)) for u in range(v + 1, len(comm_vec))
69
+ if layer_vec[v] == layer_vec[u] and (
70
+ (comm_vec[v] == comm_vec[u] and random() < p_in) or
71
+ (comm_vec[v] != comm_vec[u] and random() < p_out)
72
+ )]
73
+
74
+ # create the networks in igraph. By Pamfil et al.'s convention, the interlayer edges
75
+ # of a temporal network are directed (representing the "one-way" nature of time)
76
+ G_intralayer = ig.Graph(intralayer_edges)
77
+ G_interlayer = ig.Graph(interlayer_edges, directed=True)
78
+
79
+ return G_intralayer, G_interlayer, layer_vec
80
+
81
+ def test_basic_multilayer_example(self):
82
+ """
83
+ This is taken verbatim from basic_multilayer_example.rst.
84
+
85
+ For simplicity and re-use, the network generation is encapsulated in generate_basic_multilayer_network().
86
+ """
87
+ n_per_layer = 30 # from network generation code
88
+ G_intralayer, G_interlayer, layer_vec = self.generate_basic_multilayer_network()
89
+
90
+ # run leidenalg on a uniform 32x32 grid (1024 samples) of gamma and omega in [0, 2]
91
+ gamma_range = (0, 2)
92
+ omega_range = (0, 2)
93
+ leiden_gammas = np.linspace(*gamma_range, 32)
94
+ leiden_omegas = np.linspace(*omega_range, 32)
95
+
96
+ parts = repeated_parallel_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_vec,
97
+ gammas=leiden_gammas, omegas=leiden_omegas)
98
+
99
+ # prune to the stable partitions from (gamma=0, omega=0) to (gamma=2, omega=2)
100
+ stable_parts = prune_to_multilayer_stable_partitions(G_intralayer, G_interlayer, layer_vec,
101
+ "temporal", parts,
102
+ *gamma_range, *omega_range)
103
+
104
+ # check all 3-partition stable partitions closely match ground truth communities
105
+ for membership in stable_parts:
106
+ if num_communities(membership) != 3:
107
+ continue
108
+
109
+ most_common_label = []
110
+ for chunk_idx in range(6): # check most common label of each community (10 nodes each)
111
+ counts = {i: 0 for i in range(max(membership) + 1)}
112
+ for chunk_label in membership[10 * chunk_idx:10 * (chunk_idx + 1)]:
113
+ counts[chunk_label] += 1
114
+ most_common_label.append(max(counts.items(), key=lambda x: x[1])[0])
115
+
116
+ # check these communities look like the intended ground truth communities for the first layer
117
+ # [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
118
+ self.assertNotEqual(most_common_label[0], most_common_label[1])
119
+ self.assertNotEqual(most_common_label[1], most_common_label[2])
120
+
121
+ # at least one partition has the last layer mostly in one community and another splits it into multiple
122
+ unified_final_layer_count = 0
123
+ split_final_layer_count = 0
124
+ for membership in stable_parts:
125
+ count_final_layer = {i: 0 for i in range(max(membership) + 1)}
126
+ for label in membership[-n_per_layer:]:
127
+ count_final_layer[label] += 1
128
+ most_common_label_final_layer, most_common_label_count = max(count_final_layer.items(),
129
+ key=lambda x: x[1])
130
+ proportion_final_layer_having_same_label = most_common_label_count / n_per_layer
131
+
132
+ if proportion_final_layer_having_same_label > 0.9:
133
+ unified_final_layer_count += 1
134
+ elif proportion_final_layer_having_same_label < 0.5:
135
+ split_final_layer_count += 1
136
+
137
+ self.assertGreater(unified_final_layer_count, 0)
138
+ self.assertGreater(split_final_layer_count, 0)
139
+
140
+ def test_plot_estimates_example(self):
141
+ """
142
+ This is taken (almost) verbatim from plotting_examples.rst.
143
+
144
+ The first call to plt.rc() has usetex=False (instead of True) to avoid requiring a full LaTeX installation.
145
+ """
146
+ # get Karate Club graph in igraph
147
+ G = ig.Graph.Famous("Zachary")
148
+
149
+ # run leiden 100K times on this graph from gamma=0 to gamma=2 (takes ~2-3 seconds)
150
+ partitions = repeated_parallel_leiden_from_gammas(G, np.linspace(0, 2, 10 ** 5))
151
+
152
+ # run CHAMP to obtain the dominant partitions along with their regions of optimality
153
+ ranges = CHAMP_2D(G, partitions, gamma_0=0.0, gamma_f=2.0)
154
+
155
+ # append gamma estimate for each dominant partition onto the CHAMP domains
156
+ gamma_estimates = ranges_to_gamma_estimates(G, ranges)
157
+
158
+ # plot gamma estimates and domains of optimality
159
+ plt.rc('text', usetex=False)
160
+ plt.rc('font', family='serif')
161
+ plot_estimates(gamma_estimates)
162
+ plt.title(r"Karate Club CHAMP Domains of Optimality and $\gamma$ Estimates", fontsize=14)
163
+ plt.xlabel(r"$\gamma$", fontsize=14)
164
+ plt.ylabel("Number of communities", fontsize=14)
165
+
166
+ def test_plot_2d_domains_examples(self):
167
+ """
168
+ This is taken (almost) verbatim from plotting_examples.rst.
169
+
170
+ The first call to plt.rc() has usetex=False (instead of True) to avoid requiring a full LaTeX installation.
171
+
172
+ The documentation explicitly shows plot_2d_domains_with_estimates() and describes other, similar functions
173
+ * plot_2d_domains()
174
+ * plot_2d_domains_with_ami()
175
+ * plot_2d_domains_with_num_communities()
176
+ As such, we test them all here.
177
+ """
178
+ G_intralayer, G_interlayer, layer_vec = self.generate_basic_multilayer_network()
179
+ # run leiden on a uniform grid (10K samples) of gamma and omega (takes ~3 seconds)
180
+ gamma_range = (0.5, 1.5)
181
+ omega_range = (0, 2)
182
+ parts = repeated_parallel_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_vec,
183
+ gammas=np.linspace(*gamma_range, 100),
184
+ omegas=np.linspace(*omega_range, 100))
185
+
186
+ # run CHAMP to obtain the dominant partitions along with their regions of optimality
187
+ domains = CHAMP_3D(G_intralayer, G_interlayer, layer_vec, parts,
188
+ gamma_0=gamma_range[0], gamma_f=gamma_range[1],
189
+ omega_0=omega_range[0], omega_f=omega_range[1])
190
+
191
+ # append resolution parameter estimates for each dominant partition onto the CHAMP domains
192
+ domains_with_estimates = domains_to_gamma_omega_estimates(G_intralayer, G_interlayer, layer_vec,
193
+ domains, model='temporal')
194
+
195
+ # plot resolution parameter estimates and domains of optimality
196
+ plt.rc('text', usetex=False)
197
+ plt.rc('font', family='serif')
198
+ plot_2d_domains_with_estimates(domains_with_estimates, xlim=omega_range, ylim=gamma_range)
199
+ plt.title(r"CHAMP Domains and ($\omega$, $\gamma$) Estimates", fontsize=16)
200
+ plt.xlabel(r"$\omega$", fontsize=20)
201
+ plt.ylabel(r"$\gamma$", fontsize=20)
202
+ plt.gca().tick_params(axis='both', labelsize=12)
203
+ plt.tight_layout()
204
+
205
+ # same plotting code, but with plot_2d_domains()
206
+ plt.rc('text', usetex=False)
207
+ plt.rc('font', family='serif')
208
+ plot_2d_domains(domains, xlim=omega_range, ylim=gamma_range)
209
+ plt.title(r"CHAMP Domains", fontsize=16)
210
+ plt.xlabel(r"$\omega$", fontsize=20)
211
+ plt.ylabel(r"$\gamma$", fontsize=20)
212
+ plt.gca().tick_params(axis='both', labelsize=12)
213
+ plt.tight_layout()
214
+
215
+ # same plotting code, but with plot_2d_domains_with_ami()
216
+ plt.rc('text', usetex=False)
217
+ plt.rc('font', family='serif')
218
+ ground_truth_partition = ([0] * 10 + [1] * 10 + [2] * 10) * 2 + [0] * 30
219
+ plot_2d_domains_with_ami(domains_with_estimates, ground_truth=ground_truth_partition,
220
+ xlim=omega_range, ylim=gamma_range)
221
+ plt.title(r"CHAMP Domains, Colored by AMI with Ground Truth", fontsize=16)
222
+ plt.xlabel(r"$\omega$", fontsize=20)
223
+ plt.ylabel(r"$\gamma$", fontsize=20)
224
+ plt.gca().tick_params(axis='both', labelsize=12)
225
+ plt.tight_layout()
226
+
227
+ # same plotting code, but with plot_2d_domains_with_num_communities()
228
+ plt.rc('text', usetex=False)
229
+ plt.rc('font', family='serif')
230
+ plot_2d_domains_with_num_communities(domains_with_estimates, xlim=omega_range, ylim=gamma_range)
231
+ plt.title(r"CHAMP Domains, Colored by Number of Communities", fontsize=16)
232
+ plt.xlabel(r"$\omega$", fontsize=20)
233
+ plt.ylabel(r"$\gamma$", fontsize=20)
234
+ plt.gca().tick_params(axis='both', labelsize=12)
235
+ plt.tight_layout()
236
+ plt.close() # closing all these figures instead of showing
237
+
238
+ def test_plot_multiplex_community(self):
239
+ """
240
+ This is taken (almost) verbatim from plotting_examples.rst.
241
+
242
+ The first call to plt.rc() has usetex=False (instead of True) to avoid requiring a full LaTeX installation.
243
+ """
244
+ num_layers = 3
245
+ layer_vec = [i // 71 for i in range(num_layers * 71)]
246
+ membership = [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
247
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
248
+ 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
249
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
250
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
251
+ 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2, 2,
252
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
253
+
254
+ plt.rc('text', usetex=False)
255
+ plt.rc('font', family='serif')
256
+ ax = plot_multiplex_community(np.array(membership), np.array(layer_vec))
257
+ ax.set_xticks(np.linspace(0, num_layers, 2 * num_layers + 1))
258
+ ax.set_xticklabels(["", "Advice", "", "Coworker", "", "Friend", ""], fontsize=14)
259
+ plt.title(f"Multiplex Communities", fontsize=14)
260
+ plt.ylabel("Node ID", fontsize=14)
261
+ plt.close() # closing this these figures instead of showing
262
+
263
+
264
+ if __name__ == "__main__":
265
+ seed(0)
266
+ unittest.main()
@@ -1,13 +1,15 @@
1
- from .shared_testing_functions import assert_almost_equal_or_both_none_or_nan, generate_igraph_famous, \
2
- generate_random_partition
3
- import igraph as ig
1
+ import unittest
4
2
  from math import log
5
- from numpy import mean
3
+ from random import seed
4
+
5
+ import igraph as ig
6
6
  from modularitypruning.parameter_estimation import iterative_monolayer_resolution_parameter_estimation
7
7
  from modularitypruning.parameter_estimation_utilities import gamma_estimate
8
8
  from modularitypruning.partition_utilities import all_degrees
9
- from random import seed
10
- import unittest
9
+ from numpy import mean
10
+
11
+ from .shared_testing_functions import assert_almost_equal_or_both_none_or_nan, generate_igraph_famous, \
12
+ generate_random_partition
11
13
 
12
14
 
13
15
  class TestMonolayerParameterEstimation(unittest.TestCase):
@@ -1,14 +1,16 @@
1
- from .shared_testing_functions import assert_almost_equal_or_both_none_or_nan, generate_random_partition, \
2
- generate_multilayer_intralayer_SBM
3
- import igraph as ig
1
+ import unittest
4
2
  from math import log
5
- from numpy import mean
3
+ from random import seed
4
+
5
+ import igraph as ig
6
6
  from modularitypruning.leiden_utilities import repeated_leiden_from_gammas_omegas
7
7
  from modularitypruning.parameter_estimation import iterative_multilayer_resolution_parameter_estimation
8
8
  from modularitypruning.parameter_estimation_utilities import gamma_omega_estimate
9
9
  from modularitypruning.partition_utilities import num_communities, all_degrees
10
- from random import seed
11
- import unittest
10
+ from numpy import mean
11
+
12
+ from .shared_testing_functions import assert_almost_equal_or_both_none_or_nan, generate_random_partition, \
13
+ generate_multilayer_intralayer_SBM
12
14
 
13
15
 
14
16
  class TestMultiplexParameterEstimation(unittest.TestCase):
@@ -49,10 +51,10 @@ class TestMultiplexParameterEstimation(unittest.TestCase):
49
51
  model='multiplex')
50
52
 
51
53
  # check we converged close to the ground truth "correct" values
52
- # the multiplex omega estimation seems less accurate than in other models, perhaps due to
53
- # the copying probability approximation
54
- self.assertLess(abs(true_gamma - gamma), 0.05)
55
- self.assertLess(abs(true_omega - omega), 0.15)
54
+ # the multiplex parameter estimation is much less robust and less accurate than in other models,
55
+ # perhaps due to the copying probability approximation
56
+ self.assertLess(abs(true_gamma - gamma), 0.1)
57
+ self.assertLess(abs(true_omega - omega), 0.2)
56
58
 
57
59
  def test_multiplex_SBM_correct_convergence_varying_copying_probabilty(self):
58
60
  for eta in [0.25, 0.5, 0.75, 0.9]:
@@ -0,0 +1,149 @@
1
+ import functools
2
+ import unittest
3
+ import warnings
4
+ from multiprocessing import Pool, cpu_count
5
+ from random import seed
6
+ from time import time
7
+
8
+ import igraph as ig
9
+ import numpy as np
10
+ import psutil
11
+ import pytest
12
+ from modularitypruning.leiden_utilities import (repeated_leiden_from_gammas, repeated_parallel_leiden_from_gammas,
13
+ repeated_leiden_from_gammas_omegas,
14
+ repeated_parallel_leiden_from_gammas_omegas)
15
+
16
+ from .shared_testing_functions import generate_connected_ER, generate_multilayer_intralayer_SBM
17
+
18
+ # this set of tests ensures that we achieve >= 75% parallel performance compared to perfect scaling of
19
+ # single-threaded jobs to multiple cores (with no memory contention). This threshold will be decreased in
20
+ # determine_target_parallelization_speedup() if the background CPU utilization exceeds 20%.
21
+ PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING = 0.75
22
+
23
+
24
+ def mock_calculation(_):
25
+ """A mock calculation that provides enough work to make serialization overhead negligible."""
26
+ return sum(range(10 ** 7))
27
+
28
+
29
+ @functools.lru_cache(maxsize=1)
30
+ def determine_target_parallelization_speedup(num_calculations=32):
31
+ """
32
+ Calculate the parallelization speedup on mock_calculation to benchmark our implementation against.
33
+
34
+ This performs
35
+ * ``num_calculations`` function calls in the single-threaded case, and
36
+ * ``num_calculations * cpu_count()`` calls in the multi-processed case
37
+
38
+ Due in part to frequency scaling and simple memory contention, leidenalg over multiple processes (completely
39
+ outside of Python or multiprocessing.Pool) seems to run at around (90% * core count) speedup on modern systems when
40
+ hyper-threading is disabled.
41
+ """
42
+ global PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING
43
+
44
+ cpu_utilization = psutil.cpu_percent(interval=5)
45
+ if cpu_utilization > 20:
46
+ PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING = 0.5
47
+ warnings.warn(f"System CPU utilization is non-negligible during parallel performance test! "
48
+ f"Dropping performance scaling target to 50%.")
49
+
50
+ start_time = time()
51
+ _ = [mock_calculation(i) for i in range(num_calculations)]
52
+ base_duration = time() - start_time
53
+
54
+ num_pool_calculations = num_calculations * cpu_count()
55
+ with Pool(processes=cpu_count()) as pool:
56
+ pool.map(mock_calculation, range(cpu_count())) # force pool initialization and basic burn-in
57
+
58
+ start_time = time()
59
+ pool.map(mock_calculation, range(num_pool_calculations))
60
+ pool_duration = time() - start_time
61
+
62
+ return num_pool_calculations / num_calculations * base_duration / pool_duration
63
+
64
+
65
+ @pytest.mark.serial # these tests have to run serially for the parallel performance comparisons to make sense
66
+ class TestParallelLeidenPerformance(unittest.TestCase):
67
+ @staticmethod
68
+ def run_singlelayer_graph_parallelization(G, gammas):
69
+ target_speedup = determine_target_parallelization_speedup()
70
+
71
+ start_time = time()
72
+ _ = repeated_leiden_from_gammas(G, gammas)
73
+ duration = time() - start_time
74
+
75
+ pool_gammas = np.linspace(min(gammas), max(gammas), len(gammas) * cpu_count())
76
+ start_time = time()
77
+ _ = repeated_parallel_leiden_from_gammas(G, pool_gammas)
78
+ pool_duration = time() - start_time
79
+
80
+ speedup = len(pool_gammas) / len(gammas) * duration / pool_duration
81
+ return speedup / target_speedup
82
+
83
+ @staticmethod
84
+ def run_multilayer_graph_parallelization(G_intralayer, G_interlayer, layer_membership, gammas, omegas):
85
+ target_speedup = determine_target_parallelization_speedup()
86
+
87
+ start_time = time()
88
+ _ = repeated_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_membership, gammas, omegas)
89
+ duration = time() - start_time
90
+
91
+ pool_gammas = np.linspace(min(gammas), max(gammas), int(len(gammas) * np.sqrt(cpu_count())))
92
+ pool_omegas = np.linspace(min(omegas), max(omegas), int(len(omegas) * np.sqrt(cpu_count())))
93
+ start_time = time()
94
+ _ = repeated_parallel_leiden_from_gammas_omegas(
95
+ G_intralayer, G_interlayer, layer_membership, pool_gammas, pool_omegas
96
+ )
97
+ pool_duration = time() - start_time
98
+
99
+ speedup = len(pool_gammas) * len(pool_omegas) / len(gammas) / len(omegas) * duration / pool_duration
100
+ return speedup / target_speedup
101
+
102
+ def test_tiny_singlelayer_graph_many_runs(self):
103
+ """Single-threaded equivalent is 25k runs on G(n=34, m=78)."""
104
+ G = ig.Graph.Famous("Zachary")
105
+ gammas = np.linspace(0.0, 4.0, 25000)
106
+ parallelization = self.run_singlelayer_graph_parallelization(G, gammas)
107
+ self.assertGreater(parallelization, PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING)
108
+
109
+ def test_larger_singlelayer_graph_few_runs(self):
110
+ """Single-threaded equivalent is 50 runs on G(n=10000, m=40000)."""
111
+ G = generate_connected_ER(n=10000, m=40000, directed=False)
112
+ gammas = np.linspace(0.0, 2.0, 50)
113
+ parallelization = self.run_singlelayer_graph_parallelization(G, gammas)
114
+ self.assertGreater(parallelization, PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING)
115
+
116
+ def test_tiny_multilayer_graph_many_runs(self):
117
+ """Single-threaded equivalent is 10k runs on G(n=50, m=150)."""
118
+ G_intralayer, layer_membership = generate_multilayer_intralayer_SBM(
119
+ copying_probability=0.9, p_in=0.8, p_out=0.2, first_layer_membership=[0] * 5 + [1] * 5, num_layers=5
120
+ )
121
+ interlayer_edges = [(10 * layer + v, 10 * layer + v + 10)
122
+ for layer in range(5 - 1) for v in range(10)]
123
+ G_interlayer = ig.Graph(interlayer_edges, directed=True)
124
+
125
+ gammas = np.linspace(0.0, 2.0, 100)
126
+ omegas = np.linspace(0.0, 2.0, 100)
127
+ parallelization = self.run_multilayer_graph_parallelization(G_intralayer, G_interlayer,
128
+ layer_membership, gammas, omegas)
129
+ self.assertGreater(parallelization, PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING)
130
+
131
+ def test_larger_multilayer_graph_few_runs(self):
132
+ """Single-threaded equivalent is 49 runs on approximately G(n=2500, m=15000)."""
133
+ G_intralayer, layer_membership = generate_multilayer_intralayer_SBM(
134
+ copying_probability=0.9, p_in=0.15, p_out=0.05, first_layer_membership=[0] * 50 + [1] * 50, num_layers=25
135
+ )
136
+ interlayer_edges = [(100 * layer + v, 100 * layer + v + 100)
137
+ for layer in range(25 - 1) for v in range(100)]
138
+ G_interlayer = ig.Graph(interlayer_edges, directed=True)
139
+
140
+ gammas = np.linspace(0.0, 2.0, 7)
141
+ omegas = np.linspace(0.0, 2.0, 7)
142
+ parallelization = self.run_multilayer_graph_parallelization(G_intralayer, G_interlayer,
143
+ layer_membership, gammas, omegas)
144
+ self.assertGreater(parallelization, PERFORMANCE_TARGET_RELATIVE_TO_PERFECT_SCALING)
145
+
146
+
147
+ if __name__ == "__main__":
148
+ seed(0)
149
+ unittest.main()
@@ -1,14 +1,16 @@
1
- from .shared_testing_functions import assert_almost_equal_or_both_none_or_nan, generate_random_partition, \
2
- generate_multilayer_intralayer_SBM
3
- import igraph as ig
1
+ import unittest
4
2
  from math import log
5
- from numpy import mean
3
+ from random import seed
4
+
5
+ import igraph as ig
6
6
  from modularitypruning.leiden_utilities import repeated_leiden_from_gammas_omegas
7
7
  from modularitypruning.parameter_estimation import iterative_multilayer_resolution_parameter_estimation
8
8
  from modularitypruning.parameter_estimation_utilities import gamma_omega_estimate
9
9
  from modularitypruning.partition_utilities import num_communities, all_degrees
10
- from random import seed
11
- import unittest
10
+ from numpy import mean
11
+
12
+ from .shared_testing_functions import assert_almost_equal_or_both_none_or_nan, generate_random_partition, \
13
+ generate_multilayer_intralayer_SBM
12
14
 
13
15
 
14
16
  class TestTemporalAndMultilevelParameterEstimation(unittest.TestCase):
@@ -1,14 +1,16 @@
1
- from .partition_utilities import all_degrees, in_degrees, out_degrees, membership_to_communities, \
2
- membership_to_layered_communities
1
+ import warnings
3
2
  from collections import defaultdict
4
- import numpy as np
5
- from numpy.random import choice
6
3
  from math import floor
7
4
  from multiprocessing import Pool, cpu_count
8
- from scipy.spatial import HalfspaceIntersection
5
+
6
+ import numpy as np
7
+ from numpy.random import choice
9
8
  from scipy.linalg import LinAlgWarning
10
9
  from scipy.optimize import linprog, OptimizeWarning
11
- import warnings
10
+ from scipy.spatial import HalfspaceIntersection
11
+
12
+ from .partition_utilities import all_degrees, in_degrees, out_degrees, membership_to_communities, \
13
+ membership_to_layered_communities
12
14
 
13
15
 
14
16
  def get_interior_point(halfspaces, initial_num_sampled=50, full_retry_limit=10):
@@ -1,12 +1,10 @@
1
- from .progress import Progress
2
1
  import functools
2
+ from multiprocessing import Pool, cpu_count
3
+
3
4
  import igraph as ig
4
5
  import leidenalg
5
- from math import ceil
6
- from multiprocessing import Pool, cpu_count
7
6
  import numpy as np
8
- import psutil
9
- import warnings
7
+ from tqdm import tqdm
10
8
 
11
9
  LOW_MEMORY_THRESHOLD = 1e9 # 1 GB
12
10
 
@@ -50,6 +48,11 @@ def singlelayer_leiden(G, gamma, return_partition=False):
50
48
  return tuple(partition.membership)
51
49
 
52
50
 
51
+ def _wrapped_singlelayer_leiden(args):
52
+ """Wrapped singlelayer_leiden() for use in multiprocessing.Pool.imap_unordered."""
53
+ return singlelayer_leiden(*args)
54
+
55
+
53
56
  def leiden_part(G):
54
57
  return leidenalg.RBConfigurationVertexPartition(G)
55
58
 
@@ -68,8 +71,6 @@ def split_intralayer_leiden_graph(G_intralayer, layer_membership):
68
71
 
69
72
  This is needed since leidenalg lacks support for faster multilayer optimization.
70
73
 
71
- WARNING: Optimization can be EXTREMELY slow! Leidenalg does not properly implement multilayer optimization.
72
-
73
74
  :param G_intralayer: intralayer graph of interest
74
75
  :type G_intralayer: igraph.Graph
75
76
  :param layer_vec: list of each vertex's layer membership
@@ -77,9 +78,6 @@ def split_intralayer_leiden_graph(G_intralayer, layer_membership):
77
78
  :return: list of intralayer networks
78
79
  :rtype: list[igraph.Graph]
79
80
  """
80
- warnings.warn("You are using Leiden multilayer modularity optimization. THIS CAN BE EXTREMELY SLOW! "
81
- "leidenalg's implementation is inefficient, especially when there are many layers.")
82
-
83
81
  # internally use hashable objects for memoization
84
82
  return _split_leiden_graph_layers_cached(n=G_intralayer.vcount(), G_es=tuple(G_intralayer.es),
85
83
  is_directed=G_intralayer.is_directed(),
@@ -108,7 +106,8 @@ def _split_leiden_graph_layers_cached(n, G_es, is_directed, layer_membership):
108
106
  def multilayer_leiden(G_intralayer, G_interlayer, layer_vec, gamma, omega, optimiser=None, return_partition=False):
109
107
  r"""Run the Leiden modularity maximization algorithm at a single (:math:`\gamma, \omega`) value.
110
108
 
111
- WARNING: Optimization can be EXTREMELY slow! Leidenalg does not properly implement multilayer optimization.
109
+ WARNING: Optimization can be EXTREMELY slow for large numbers of layers! Leidenalg does not properly implement
110
+ multilayer optimization.
112
111
 
113
112
  :param G_intralayer: intralayer graph of interest
114
113
  :type G_intralayer: igraph.Graph
@@ -150,6 +149,11 @@ def multilayer_leiden(G_intralayer, G_interlayer, layer_vec, gamma, omega, optim
150
149
  return tuple(intralayer_parts[0].membership)
151
150
 
152
151
 
152
+ def _wrapped_multilayer_leiden(args):
153
+ """Wrapped multilayer_leiden() for use in multiprocessing.Pool.imap_unordered."""
154
+ return multilayer_leiden(*args)
155
+
156
+
153
157
  def multilayer_leiden_part(G_intralayer, G_interlayer, layer_membership):
154
158
  if 'weight' not in G_intralayer.es:
155
159
  G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount()
@@ -178,51 +182,29 @@ def repeated_leiden_from_gammas(G, gammas):
178
182
  return {sorted_tuple(singlelayer_leiden(G, gamma)) for gamma in gammas}
179
183
 
180
184
 
181
- def repeated_parallel_leiden_from_gammas(G, gammas, show_progress=True, chunk_dispatch=True):
185
+ def repeated_parallel_leiden_from_gammas(G, gammas, show_progress=True):
182
186
  r"""Runs the Leiden modularity maximization algorithm at each provided :math:`\gamma` value, using all CPU cores.
183
187
 
184
188
  :param G: graph of interest
185
189
  :type G: igraph.Graph
186
190
  :param gammas: list of gammas (resolution parameters) to run Leiden at
187
191
  :type gammas: list[float]
188
- :param show_progress: if True, render a progress bar. This will only work if ``chunk_dispatch`` is also True
192
+ :param show_progress: if True, render a progress bar
189
193
  :type show_progress: bool
190
- :param chunk_dispatch: if True, dispatch parallel work in chunks. Setting this to False may increase performance,
191
- but can lead to out-of-memory issues
192
- :type chunk_dispatch: bool
193
194
  :return: a set of all unique partitions returned by the Leiden algorithm
194
195
  :rtype: set of tuple[int]
195
196
  """
196
-
197
- pool = Pool(processes=cpu_count())
198
197
  total = set()
199
-
200
- chunk_size = len(gammas) // 99
201
- if chunk_size > 0 and chunk_dispatch:
202
- chunk_params = ([(G, g) for g in gammas[i:i + chunk_size]] for i in range(0, len(gammas), chunk_size))
203
- else:
204
- chunk_params = [[(G, g) for g in gammas]]
205
- chunk_size = len(gammas)
206
-
207
- if show_progress:
208
- progress = Progress(ceil(len(gammas) / chunk_size))
209
-
210
- for chunk in chunk_params:
211
- for partition in pool.starmap(singlelayer_leiden, chunk):
212
- total.add(sorted_tuple(partition))
213
-
198
+ pool_chunk_size = max(1, len(gammas) // (cpu_count() * 100))
199
+ with Pool(processes=cpu_count()) as pool:
200
+ pool_iterator = pool.imap_unordered(_wrapped_singlelayer_leiden, [(G, g) for g in gammas],
201
+ chunksize=pool_chunk_size)
214
202
  if show_progress:
215
- progress.increment()
216
-
217
- if psutil.virtual_memory().available < LOW_MEMORY_THRESHOLD:
218
- # Reinitialize pool to get around an apparent memory leak in multiprocessing
219
- pool.close()
220
- pool = Pool(processes=cpu_count())
203
+ pool_iterator = tqdm(pool_iterator, total=len(gammas))
221
204
 
222
- if show_progress:
223
- progress.done()
205
+ for partition in pool_iterator:
206
+ total.add(sorted_tuple(partition))
224
207
 
225
- pool.close()
226
208
  return total
227
209
 
228
210
 
@@ -232,10 +214,13 @@ def repeated_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_vec, ga
232
214
 
233
215
 
234
216
  def repeated_parallel_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_vec, gammas, omegas,
235
- show_progress=True, chunk_dispatch=True):
217
+ show_progress=True):
236
218
  """
237
219
  Runs leidenalg at each gamma and omega in ``gammas`` and ``omegas``, using all CPU cores available.
238
220
 
221
+ WARNING: Optimization can be EXTREMELY slow for large numbers of layers! Leidenalg does not properly implement
222
+ multilayer optimization.
223
+
239
224
  :param G_intralayer: intralayer graph of interest
240
225
  :type G_intralayer: igraph.Graph
241
226
  :param G_interlayer: interlayer graph of interest
@@ -248,44 +233,23 @@ def repeated_parallel_leiden_from_gammas_omegas(G_intralayer, G_interlayer, laye
248
233
  :type omegas: list[float]
249
234
  :param show_progress: if True, render a progress bar
250
235
  :type show_progress: bool
251
- :param chunk_dispatch: if True, dispatch parallel work in chunks. Setting this to False may increase performance,
252
- but can lead to out-of-memory issues
253
- :type chunk_dispatch: bool
254
236
  :return: a set of all unique partitions encountered
255
237
  :rtype: set of tuple[int]
256
238
  """
257
239
  resolution_parameter_points = [(gamma, omega) for gamma in gammas for omega in omegas]
258
240
 
259
- pool = Pool(processes=cpu_count())
260
241
  total = set()
261
-
262
- chunk_size = len(resolution_parameter_points) // 99
263
- if chunk_size > 0 and chunk_dispatch:
264
- chunk_params = ([(G_intralayer, G_interlayer, layer_vec, gamma, omega)
265
- for gamma, omega in resolution_parameter_points[i:i + chunk_size]]
266
- for i in range(0, len(resolution_parameter_points), chunk_size))
267
- else:
268
- chunk_params = [[(G_intralayer, G_interlayer, layer_vec, gamma, omega)
269
- for gamma, omega in resolution_parameter_points]]
270
- chunk_size = len(gammas)
271
-
272
- if show_progress:
273
- progress = Progress(ceil(len(resolution_parameter_points) / chunk_size))
274
-
275
- for chunk in chunk_params:
276
- for partition in pool.starmap(multilayer_leiden, chunk):
277
- total.add(sorted_tuple(partition))
278
-
242
+ pool_chunk_size = max(1, len(resolution_parameter_points) // (cpu_count() * 100))
243
+ with Pool(processes=cpu_count()) as pool:
244
+ pool_iterator = pool.imap_unordered(
245
+ _wrapped_multilayer_leiden,
246
+ [(G_intralayer, G_interlayer, layer_vec, gamma, omega) for gamma, omega in resolution_parameter_points],
247
+ chunksize=pool_chunk_size
248
+ )
279
249
  if show_progress:
280
- progress.increment()
281
-
282
- if psutil.virtual_memory().available < LOW_MEMORY_THRESHOLD:
283
- # Reinitialize pool to get around an apparent memory leak in multiprocessing
284
- pool.close()
285
- pool = Pool(processes=cpu_count())
250
+ pool_iterator = tqdm(pool_iterator, total=len(resolution_parameter_points))
286
251
 
287
- if show_progress:
288
- progress.done()
252
+ for partition in pool_iterator:
253
+ total.add(sorted_tuple(partition))
289
254
 
290
- pool.close()
291
255
  return total
@@ -6,14 +6,16 @@ module ``modularitypruning.louvain_utilities`` now shims single-layer functions
6
6
  in ``modularitypruning.leiden_utilities`` (though it still contains the legacy multi-layer functions since they can be
7
7
  faster in general -- leidenalg does not efficiently implement multilayer optimization).
8
8
  """
9
- from . import leiden_utilities
10
- from .leiden_utilities import sorted_tuple, LOW_MEMORY_THRESHOLD
11
- from .progress import Progress
9
+ import warnings
12
10
  from math import ceil
13
11
  from multiprocessing import Pool, cpu_count
12
+
14
13
  import numpy as np
15
14
  import psutil
16
- import warnings
15
+
16
+ from . import leiden_utilities
17
+ from .leiden_utilities import sorted_tuple, LOW_MEMORY_THRESHOLD
18
+ from .progress import Progress
17
19
 
18
20
  try:
19
21
  import louvain # import louvain if possible
@@ -1,8 +1,9 @@
1
+ import leidenalg
2
+
1
3
  from .leiden_utilities import singlelayer_leiden, multilayer_leiden
2
4
  from .parameter_estimation_utilities import leiden_part_with_membership, estimate_singlelayer_SBM_parameters, \
3
5
  gamma_estimate_from_parameters, omega_function_from_model, estimate_multilayer_SBM_parameters
4
6
  from .partition_utilities import in_degrees
5
- import leidenalg
6
7
 
7
8
 
8
9
  def iterative_monolayer_resolution_parameter_estimation(G, gamma=1.0, tol=1e-2, max_iter=25, verbose=False,
@@ -1,12 +1,14 @@
1
- from .leiden_utilities import leiden_part_with_membership, sorted_tuple
2
- from .champ_utilities import CHAMP_2D, CHAMP_3D
3
- from .partition_utilities import num_communities
1
+ import warnings
2
+ from math import log
3
+
4
4
  import igraph as ig
5
5
  import leidenalg
6
- from math import log
7
6
  import numpy as np
8
7
  from scipy.optimize import fsolve
9
- import warnings
8
+
9
+ from .champ_utilities import CHAMP_2D, CHAMP_3D
10
+ from .leiden_utilities import leiden_part_with_membership, sorted_tuple
11
+ from .partition_utilities import num_communities
10
12
 
11
13
 
12
14
  def estimate_singlelayer_SBM_parameters(G, partition, m=None):
@@ -534,12 +536,25 @@ def prune_to_multilayer_stable_partitions(G_intralayer, G_interlayer, layer_vec,
534
536
  parameter estimates are within the provided ``gamma_start``, ``gamma_end``, ``omega_start``, and ``omega_end``
535
537
  bounds.
536
538
 
539
+ There are three network layer topology models available, all from Pamfil et al.
540
+
541
+ * **"temporal"**: Interlayer edges always connect copies of a node from one layer to the next, often representing
542
+ interactions that change over time.
543
+ * **"multilevel"**: Interlayer edges connect a hierarchy of monolayer networks from one layer to the next. This is
544
+ more general than temporal networks, as nodes can connect arbitrarily to nodes in the next layer. These often
545
+ represent inclusion relationships, such as cities to counties, counties to states, and states to countries.
546
+ * **"multiplex"**: Each layer represents a type of interaction, making the entire multilayer network akin to an
547
+ edge-colored multigraph (each type of edge has its own layer). This model is unique in that there is no natural
548
+ ordering of layers, and the resulting theory requires some analytical simplifications, making the resulting
549
+ parameter estimation the least robust of the three models.
550
+
537
551
  See https://doi.org/10.1038/s41598-022-20142-6 for more details.
538
552
 
539
- NOTE: This method truncates omega estimates to ``omega_end - 1e-3`` in order to properly identify stable partitions
540
- with infinite interlayer coupling estimates (e.g. when all membership labels persist across layers). If
541
- ``omega_end`` is set too low, such partitions may be incorrectly identified as stable. As such, you should be
542
- somewhat wary of the returned partitions with zero community structure differences across layers.
553
+ NOTE: This method will truncate omega estimates to ``omega_end - 1e-3`` (and raise a warning) if needed to properly
554
+ identify stable partitions with very large or infinite interlayer coupling estimates (e.g., when all membership
555
+ labels persist across layers). If ``omega_end`` is set too low, these partitions may be incorrectly identified as
556
+ stable. Conversely, some partitions with large omega estimates might be misclassified as not stable. Therefore, be
557
+ cautious of returned partitions with little or no community structure differences across layers.
543
558
 
544
559
  :param G_intralayer: intralayer graph of interest
545
560
  :type G_intralayer: igraph.Graph
@@ -599,6 +614,11 @@ def prune_to_multilayer_stable_partitions(G_intralayer, G_interlayer, layer_vec,
599
614
  omega_start, omega_end)
600
615
  domains_with_estimates = domains_to_gamma_omega_estimates(G_intralayer, G_interlayer, layer_vec, domains, model)
601
616
 
617
+ if any(o_est >= omega_end for _, _, g_est, o_est in domains_with_estimates if g_est is not None):
618
+ warnings.warn(f"We are truncating some omega estimates to your choice of omega_end={omega_end}. You should "
619
+ f"check that this accurately captures the high-omega behavior of the partition domains. "
620
+ f"Be cautious of partitions with little or no community structure differences across layers!")
621
+
602
622
  # Truncate infinite omega solutions to our maximum omega
603
623
  domains_with_estimates = [(polyverts, membership, g_est, min(o_est, omega_end - 1e-3))
604
624
  for polyverts, membership, g_est, o_est in domains_with_estimates
@@ -1,4 +1,5 @@
1
1
  from collections import defaultdict
2
+
2
3
  from sklearn.metrics import adjusted_mutual_info_score, normalized_mutual_info_score
3
4
 
4
5
 
@@ -1,12 +1,14 @@
1
- from .partition_utilities import num_communities, ami
2
1
  from collections import defaultdict
3
2
  from random import sample, shuffle
4
- import numpy as np
3
+
5
4
  import matplotlib
6
- from matplotlib.patches import Polygon
7
- from matplotlib.collections import PatchCollection
8
5
  import matplotlib.pyplot as plt
6
+ import numpy as np
9
7
  import seaborn as sbn
8
+ from matplotlib.collections import PatchCollection
9
+ from matplotlib.patches import Polygon
10
+
11
+ from .partition_utilities import num_communities, ami
10
12
 
11
13
 
12
14
  def plot_adjacency(adj):
@@ -69,7 +71,7 @@ def plot_estimates(gamma_estimates):
69
71
  # length_includes_head=True, alpha=0.5, zorder=2, **{"overhang": 0.5})
70
72
 
71
73
 
72
- def plot_2d_domains(domains, xlim, ylim, flip_axes=False, use_current_axes=False):
74
+ def plot_2d_domains(domains, xlim, ylim, flip_axes=True, use_current_axes=False):
73
75
  """Plot partition dominance ranges in the (gamma, omega) plane, using the domains from CHAMP_3D.
74
76
 
75
77
  Limits output to xlim and ylim dimensions. Note that the plotting here has x=gamma and y=omega.
@@ -91,7 +93,7 @@ def plot_2d_domains(domains, xlim, ylim, flip_axes=False, use_current_axes=False
91
93
  patches.append(polygon)
92
94
 
93
95
  cnorm = matplotlib.colors.Normalize(vmin=0, vmax=len(domains))
94
- cmap = matplotlib.cm.get_cmap("Set1")
96
+ cmap = plt.get_cmap("Set1")
95
97
  available_colors = {cmap(cnorm(i)) for i in range(len(domains))}
96
98
 
97
99
  if len(available_colors) == len(domains):
@@ -207,7 +209,7 @@ def plot_2d_domains_with_num_communities(domains_with_estimates, xlim, ylim, fli
207
209
  plt.ylim(ylim)
208
210
 
209
211
 
210
- def plot_2d_domains_with_ami(domains_with_estimates, ground_truth, xlim, ylim, flip_axes=False):
212
+ def plot_2d_domains_with_ami(domains_with_estimates, ground_truth, xlim, ylim, flip_axes=True):
211
213
  """Plot partition dominance ranges in the (gamma, omega) plane, using the domains from CHAMP_3D and coloring by the
212
214
  AMI between the partitions and ground truth.
213
215