modularitypruning 1.3.6__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,11 @@
1
- from .progress import Progress
2
1
  import functools
3
2
  import igraph as ig
4
3
  import leidenalg
5
4
  from math import ceil
6
5
  from multiprocessing import Pool, cpu_count
6
+ from tqdm import tqdm
7
7
  import numpy as np
8
8
  import psutil
9
- import warnings
10
9
 
11
10
  LOW_MEMORY_THRESHOLD = 1e9 # 1 GB
12
11
 
@@ -50,6 +49,11 @@ def singlelayer_leiden(G, gamma, return_partition=False):
50
49
  return tuple(partition.membership)
51
50
 
52
51
 
52
+ def _wrapped_singlelayer_leiden(args):
53
+ """Wrapped singlelayer_leiden() for use in multiprocessing.Pool.imap_unordered."""
54
+ return singlelayer_leiden(*args)
55
+
56
+
53
57
  def leiden_part(G):
54
58
  return leidenalg.RBConfigurationVertexPartition(G)
55
59
 
@@ -68,8 +72,6 @@ def split_intralayer_leiden_graph(G_intralayer, layer_membership):
68
72
 
69
73
  This is needed since leidenalg lacks support for faster multilayer optimization.
70
74
 
71
- WARNING: Optimization can be EXTREMELY slow! Leidenalg does not properly implement multilayer optimization.
72
-
73
75
  :param G_intralayer: intralayer graph of interest
74
76
  :type G_intralayer: igraph.Graph
75
77
  :param layer_vec: list of each vertex's layer membership
@@ -77,9 +79,6 @@ def split_intralayer_leiden_graph(G_intralayer, layer_membership):
77
79
  :return: list of intralayer networks
78
80
  :rtype: list[igraph.Graph]
79
81
  """
80
- warnings.warn("You are using Leiden multilayer modularity optimization. THIS CAN BE EXTREMELY SLOW! "
81
- "leidenalg's implementation is inefficient, especially when there are many layers.")
82
-
83
82
  # internally use hashable objects for memoization
84
83
  return _split_leiden_graph_layers_cached(n=G_intralayer.vcount(), G_es=tuple(G_intralayer.es),
85
84
  is_directed=G_intralayer.is_directed(),
@@ -108,7 +107,8 @@ def _split_leiden_graph_layers_cached(n, G_es, is_directed, layer_membership):
108
107
  def multilayer_leiden(G_intralayer, G_interlayer, layer_vec, gamma, omega, optimiser=None, return_partition=False):
109
108
  r"""Run the Leiden modularity maximization algorithm at a single (:math:`\gamma, \omega`) value.
110
109
 
111
- WARNING: Optimization can be EXTREMELY slow! Leidenalg does not properly implement multilayer optimization.
110
+ WARNING: Optimization can be EXTREMELY slow for large numbers of layers! Leidenalg does not properly implement
111
+ multilayer optimization.
112
112
 
113
113
  :param G_intralayer: intralayer graph of interest
114
114
  :type G_intralayer: igraph.Graph
@@ -150,6 +150,11 @@ def multilayer_leiden(G_intralayer, G_interlayer, layer_vec, gamma, omega, optim
150
150
  return tuple(intralayer_parts[0].membership)
151
151
 
152
152
 
153
+ def _wrapped_multilayer_leiden(args):
154
+ """Wrapped multilayer_leiden() for use in multiprocessing.Pool.imap_unordered."""
155
+ return multilayer_leiden(*args)
156
+
157
+
153
158
  def multilayer_leiden_part(G_intralayer, G_interlayer, layer_membership):
154
159
  if 'weight' not in G_intralayer.es:
155
160
  G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount()
@@ -178,51 +183,29 @@ def repeated_leiden_from_gammas(G, gammas):
178
183
  return {sorted_tuple(singlelayer_leiden(G, gamma)) for gamma in gammas}
179
184
 
180
185
 
181
- def repeated_parallel_leiden_from_gammas(G, gammas, show_progress=True, chunk_dispatch=True):
186
+ def repeated_parallel_leiden_from_gammas(G, gammas, show_progress=True):
182
187
  r"""Runs the Leiden modularity maximization algorithm at each provided :math:`\gamma` value, using all CPU cores.
183
188
 
184
189
  :param G: graph of interest
185
190
  :type G: igraph.Graph
186
191
  :param gammas: list of gammas (resolution parameters) to run Leiden at
187
192
  :type gammas: list[float]
188
- :param show_progress: if True, render a progress bar. This will only work if ``chunk_dispatch`` is also True
193
+ :param show_progress: if True, render a progress bar
189
194
  :type show_progress: bool
190
- :param chunk_dispatch: if True, dispatch parallel work in chunks. Setting this to False may increase performance,
191
- but can lead to out-of-memory issues
192
- :type chunk_dispatch: bool
193
195
  :return: a set of all unique partitions returned by the Leiden algorithm
194
196
  :rtype: set of tuple[int]
195
197
  """
196
-
197
- pool = Pool(processes=cpu_count())
198
198
  total = set()
199
-
200
- chunk_size = len(gammas) // 99
201
- if chunk_size > 0 and chunk_dispatch:
202
- chunk_params = ([(G, g) for g in gammas[i:i + chunk_size]] for i in range(0, len(gammas), chunk_size))
203
- else:
204
- chunk_params = [[(G, g) for g in gammas]]
205
- chunk_size = len(gammas)
206
-
207
- if show_progress:
208
- progress = Progress(ceil(len(gammas) / chunk_size))
209
-
210
- for chunk in chunk_params:
211
- for partition in pool.starmap(singlelayer_leiden, chunk):
212
- total.add(sorted_tuple(partition))
213
-
199
+ pool_chunk_size = max(1, len(gammas) // (cpu_count() * 100))
200
+ with Pool(processes=cpu_count()) as pool:
201
+ pool_iterator = pool.imap_unordered(_wrapped_singlelayer_leiden, [(G, g) for g in gammas],
202
+ chunksize=pool_chunk_size)
214
203
  if show_progress:
215
- progress.increment()
216
-
217
- if psutil.virtual_memory().available < LOW_MEMORY_THRESHOLD:
218
- # Reinitialize pool to get around an apparent memory leak in multiprocessing
219
- pool.close()
220
- pool = Pool(processes=cpu_count())
204
+ pool_iterator = tqdm(pool_iterator, total=len(gammas))
221
205
 
222
- if show_progress:
223
- progress.done()
206
+ for partition in pool_iterator:
207
+ total.add(sorted_tuple(partition))
224
208
 
225
- pool.close()
226
209
  return total
227
210
 
228
211
 
@@ -232,10 +215,13 @@ def repeated_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_vec, ga
232
215
 
233
216
 
234
217
  def repeated_parallel_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_vec, gammas, omegas,
235
- show_progress=True, chunk_dispatch=True):
218
+ show_progress=True):
236
219
  """
237
220
  Runs leidenalg at each gamma and omega in ``gammas`` and ``omegas``, using all CPU cores available.
238
221
 
222
+ WARNING: Optimization can be EXTREMELY slow for large numbers of layers! Leidenalg does not properly implement
223
+ multilayer optimization.
224
+
239
225
  :param G_intralayer: intralayer graph of interest
240
226
  :type G_intralayer: igraph.Graph
241
227
  :param G_interlayer: interlayer graph of interest
@@ -248,44 +234,23 @@ def repeated_parallel_leiden_from_gammas_omegas(G_intralayer, G_interlayer, laye
248
234
  :type omegas: list[float]
249
235
  :param show_progress: if True, render a progress bar
250
236
  :type show_progress: bool
251
- :param chunk_dispatch: if True, dispatch parallel work in chunks. Setting this to False may increase performance,
252
- but can lead to out-of-memory issues
253
- :type chunk_dispatch: bool
254
237
  :return: a set of all unique partitions encountered
255
238
  :rtype: set of tuple[int]
256
239
  """
257
240
  resolution_parameter_points = [(gamma, omega) for gamma in gammas for omega in omegas]
258
241
 
259
- pool = Pool(processes=cpu_count())
260
242
  total = set()
261
-
262
- chunk_size = len(resolution_parameter_points) // 99
263
- if chunk_size > 0 and chunk_dispatch:
264
- chunk_params = ([(G_intralayer, G_interlayer, layer_vec, gamma, omega)
265
- for gamma, omega in resolution_parameter_points[i:i + chunk_size]]
266
- for i in range(0, len(resolution_parameter_points), chunk_size))
267
- else:
268
- chunk_params = [[(G_intralayer, G_interlayer, layer_vec, gamma, omega)
269
- for gamma, omega in resolution_parameter_points]]
270
- chunk_size = len(gammas)
271
-
272
- if show_progress:
273
- progress = Progress(ceil(len(resolution_parameter_points) / chunk_size))
274
-
275
- for chunk in chunk_params:
276
- for partition in pool.starmap(multilayer_leiden, chunk):
277
- total.add(sorted_tuple(partition))
278
-
243
+ pool_chunk_size = max(1, len(resolution_parameter_points) // (cpu_count() * 100))
244
+ with Pool(processes=cpu_count()) as pool:
245
+ pool_iterator = pool.imap_unordered(
246
+ _wrapped_multilayer_leiden,
247
+ [(G_intralayer, G_interlayer, layer_vec, gamma, omega) for gamma, omega in resolution_parameter_points],
248
+ chunksize=pool_chunk_size
249
+ )
279
250
  if show_progress:
280
- progress.increment()
281
-
282
- if psutil.virtual_memory().available < LOW_MEMORY_THRESHOLD:
283
- # Reinitialize pool to get around an apparent memory leak in multiprocessing
284
- pool.close()
285
- pool = Pool(processes=cpu_count())
251
+ pool_iterator = tqdm(pool_iterator, total=len(resolution_parameter_points))
286
252
 
287
- if show_progress:
288
- progress.done()
253
+ for partition in pool_iterator:
254
+ total.add(sorted_tuple(partition))
289
255
 
290
- pool.close()
291
256
  return total
@@ -534,12 +534,25 @@ def prune_to_multilayer_stable_partitions(G_intralayer, G_interlayer, layer_vec,
534
534
  parameter estimates are within the provided ``gamma_start``, ``gamma_end``, ``omega_start``, and ``omega_end``
535
535
  bounds.
536
536
 
537
+ There are three network layer topology models available, all from Pamfil et al.
538
+
539
+ * **"temporal"**: Interlayer edges always connect copies of a node from one layer to the next, often representing
540
+ interactions that change over time.
541
+ * **"multilevel"**: Interlayer edges connect a hierarchy of monolayer networks from one layer to the next. This is
542
+ more general than temporal networks, as nodes can connect arbitrarily to nodes in the next layer. These often
543
+ represent inclusion relationships, such as cities to counties, counties to states, and states to countries.
544
+ * **"multiplex"**: Each layer represents a type of interaction, making the entire multilayer network akin to an
545
+ edge-colored multigraph (each type of edge has its own layer). This model is unique in that there is no natural
546
+ ordering of layers, and the resulting theory requires some analytical simplifications, making the resulting
547
+ parameter estimation the least robust of the three models.
548
+
537
549
  See https://doi.org/10.1038/s41598-022-20142-6 for more details.
538
550
 
539
- NOTE: This method truncates omega estimates to ``omega_end - 1e-3`` in order to properly identify stable partitions
540
- with infinite interlayer coupling estimates (e.g. when all membership labels persist across layers). If
541
- ``omega_end`` is set too low, such partitions may be incorrectly identified as stable. As such, you should be
542
- somewhat wary of the returned partitions with zero community structure differences across layers.
551
+ NOTE: This method will truncate omega estimates to ``omega_end - 1e-3`` (and raise a warning) if needed to properly
552
+ identify stable partitions with very large or infinite interlayer coupling estimates (e.g., when all membership
553
+ labels persist across layers). If ``omega_end`` is set too low, these partitions may be incorrectly identified as
554
+ stable. Conversely, some partitions with large omega estimates might be misclassified as not stable. Therefore, be
555
+ cautious of returned partitions with little or no community structure differences across layers.
543
556
 
544
557
  :param G_intralayer: intralayer graph of interest
545
558
  :type G_intralayer: igraph.Graph
@@ -599,6 +612,11 @@ def prune_to_multilayer_stable_partitions(G_intralayer, G_interlayer, layer_vec,
599
612
  omega_start, omega_end)
600
613
  domains_with_estimates = domains_to_gamma_omega_estimates(G_intralayer, G_interlayer, layer_vec, domains, model)
601
614
 
615
+ if any(o_est >= omega_end for _, _, g_est, o_est in domains_with_estimates if g_est is not None):
616
+ warnings.warn(f"We are truncating some omega estimates to your choice of omega_end={omega_end}. You should "
617
+ f"check that this accurately captures the high-omega behavior of the partition domains. "
618
+ f"Be cautious of partitions with little or no community structure differences across layers!")
619
+
602
620
  # Truncate infinite omega solutions to our maximum omega
603
621
  domains_with_estimates = [(polyverts, membership, g_est, min(o_est, omega_end - 1e-3))
604
622
  for polyverts, membership, g_est, o_est in domains_with_estimates
@@ -69,7 +69,7 @@ def plot_estimates(gamma_estimates):
69
69
  # length_includes_head=True, alpha=0.5, zorder=2, **{"overhang": 0.5})
70
70
 
71
71
 
72
- def plot_2d_domains(domains, xlim, ylim, flip_axes=False, use_current_axes=False):
72
+ def plot_2d_domains(domains, xlim, ylim, flip_axes=True, use_current_axes=False):
73
73
  """Plot partition dominance ranges in the (gamma, omega) plane, using the domains from CHAMP_3D.
74
74
 
75
75
  Limits output to xlim and ylim dimensions. Note that the plotting here has x=gamma and y=omega.
@@ -91,7 +91,7 @@ def plot_2d_domains(domains, xlim, ylim, flip_axes=False, use_current_axes=False
91
91
  patches.append(polygon)
92
92
 
93
93
  cnorm = matplotlib.colors.Normalize(vmin=0, vmax=len(domains))
94
- cmap = matplotlib.cm.get_cmap("Set1")
94
+ cmap = plt.get_cmap("Set1")
95
95
  available_colors = {cmap(cnorm(i)) for i in range(len(domains))}
96
96
 
97
97
  if len(available_colors) == len(domains):
@@ -207,7 +207,7 @@ def plot_2d_domains_with_num_communities(domains_with_estimates, xlim, ylim, fli
207
207
  plt.ylim(ylim)
208
208
 
209
209
 
210
- def plot_2d_domains_with_ami(domains_with_estimates, ground_truth, xlim, ylim, flip_axes=False):
210
+ def plot_2d_domains_with_ami(domains_with_estimates, ground_truth, xlim, ylim, flip_axes=True):
211
211
  """Plot partition dominance ranges in the (gamma, omega) plane, using the domains from CHAMP_3D and coloring by the
212
212
  AMI between the partitions and ground truth.
213
213
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: modularitypruning
3
- Version: 1.3.6
3
+ Version: 1.4.0
4
4
  Summary: Pruning tool to identify small subsets of network partitions that are significant from the perspective of stochastic block model inference.
5
5
  Home-page: https://github.com/ragibson/ModularityPruning
6
6
  Author: Ryan Gibson
@@ -26,6 +26,7 @@ Requires-Dist: igraph
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: scipy >=1.7
28
28
  Requires-Dist: seaborn
29
+ Requires-Dist: tqdm
29
30
 
30
31
  # ModularityPruning
31
32
 
@@ -0,0 +1,14 @@
1
+ modularitypruning/__init__.py,sha256=U1iz51AVVzHw0aBZeJicxVg_L6TWq5pmv8Ep_bYyySU,238
2
+ modularitypruning/champ_utilities.py,sha256=VveP8N9CvMODk3dPtVMRfNLji1pktaolA6iNoW6Fy-A,16348
3
+ modularitypruning/leiden_utilities.py,sha256=sV3BkYONzVmKlpy-gUUUoL8XfKhwpytMRmCcijbsAiA,10395
4
+ modularitypruning/louvain_utilities.py,sha256=Zt58Wl4hgu6-zejdl-N_NW04UC4rbYmSHgpfoGDC2Ws,8231
5
+ modularitypruning/parameter_estimation.py,sha256=EPU5BDDauToPbAdG1lZc9p5Rl_oDqiC7bltfnjs5tg8,10479
6
+ modularitypruning/parameter_estimation_utilities.py,sha256=OzgJT4jOo2ovoHzUbS0m40l2cqqOk9tBU9cF35Tmm_M,28150
7
+ modularitypruning/partition_utilities.py,sha256=Fizqd0JuODL8W4BP2h8iV0WhZMK6HoKjH_QFNVDZkaI,956
8
+ modularitypruning/plotting.py,sha256=3JqJOpfoq_Vj_6y8nqrYHhkSqDdI56iAb4pSAMcgEmI,10568
9
+ modularitypruning/progress.py,sha256=XxkEVx8L9BoFnWtvUPg-kWtxUmE1RHqs5p5HPiTExUQ,971
10
+ modularitypruning-1.4.0.dist-info/LICENSE,sha256=eWz3HIQQxg7p1iSpUOUDKdDhGcuMPuVDDlcXf9F12D8,1068
11
+ modularitypruning-1.4.0.dist-info/METADATA,sha256=ZtLw4-g6NiP2r4G3Wcfk_ND_5wTjrNCn8etyjOkx5QM,3420
12
+ modularitypruning-1.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
+ modularitypruning-1.4.0.dist-info/top_level.txt,sha256=ZPOx3a-ek0Ge0ZMq-uvbySSaAL9MZ-t23-JkuHZXo9E,18
14
+ modularitypruning-1.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.3)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,14 +0,0 @@
1
- modularitypruning/__init__.py,sha256=U1iz51AVVzHw0aBZeJicxVg_L6TWq5pmv8Ep_bYyySU,238
2
- modularitypruning/champ_utilities.py,sha256=VveP8N9CvMODk3dPtVMRfNLji1pktaolA6iNoW6Fy-A,16348
3
- modularitypruning/leiden_utilities.py,sha256=er_Sjp5b6fUacO-Bfygbk6CiX6J8uj-gKFw-HZWffNA,11820
4
- modularitypruning/louvain_utilities.py,sha256=Zt58Wl4hgu6-zejdl-N_NW04UC4rbYmSHgpfoGDC2Ws,8231
5
- modularitypruning/parameter_estimation.py,sha256=EPU5BDDauToPbAdG1lZc9p5Rl_oDqiC7bltfnjs5tg8,10479
6
- modularitypruning/parameter_estimation_utilities.py,sha256=dRml1ZIB3ctVC7lk4QQRQwwLBldhG92rNIT6kHs0yfQ,26563
7
- modularitypruning/partition_utilities.py,sha256=Fizqd0JuODL8W4BP2h8iV0WhZMK6HoKjH_QFNVDZkaI,956
8
- modularitypruning/plotting.py,sha256=Bxu62ueWZRKKoeNVVHy0aXvocKAUgKuj0dZZsnLmL78,10580
9
- modularitypruning/progress.py,sha256=XxkEVx8L9BoFnWtvUPg-kWtxUmE1RHqs5p5HPiTExUQ,971
10
- modularitypruning-1.3.6.dist-info/LICENSE,sha256=eWz3HIQQxg7p1iSpUOUDKdDhGcuMPuVDDlcXf9F12D8,1068
11
- modularitypruning-1.3.6.dist-info/METADATA,sha256=mcEUa7NhV6DfDHPHGHY6J7jv99McahL_R0Py_-jU0fQ,3400
12
- modularitypruning-1.3.6.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
13
- modularitypruning-1.3.6.dist-info/top_level.txt,sha256=ZPOx3a-ek0Ge0ZMq-uvbySSaAL9MZ-t23-JkuHZXo9E,18
14
- modularitypruning-1.3.6.dist-info/RECORD,,