PyPI - modularitypruning - Versions diffs - 1.3.6__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

modularitypruning 1.3.6py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

modularitypruning/leiden_utilities.py CHANGED Viewed

@@ -1,12 +1,11 @@
-from .progress import Progress
 import functools
 import igraph as ig
 import leidenalg
 from math import ceil
 from multiprocessing import Pool, cpu_count
+from tqdm import tqdm
 import numpy as np
 import psutil
-import warnings
 LOW_MEMORY_THRESHOLD = 1e9  # 1 GB
@@ -50,6 +49,11 @@ def singlelayer_leiden(G, gamma, return_partition=False):
         return tuple(partition.membership)
+def _wrapped_singlelayer_leiden(args):
+    """Wrapped singlelayer_leiden() for use in multiprocessing.Pool.imap_unordered."""
+    return singlelayer_leiden(*args)
 def leiden_part(G):
     return leidenalg.RBConfigurationVertexPartition(G)
@@ -68,8 +72,6 @@ def split_intralayer_leiden_graph(G_intralayer, layer_membership):
     This is needed since leidenalg lacks support for faster multilayer optimization.
-    WARNING: Optimization can be EXTREMELY slow! Leidenalg does not properly implement multilayer optimization.
     :param G_intralayer: intralayer graph of interest
     :type G_intralayer: igraph.Graph
     :param layer_vec: list of each vertex's layer membership
@@ -77,9 +79,6 @@ def split_intralayer_leiden_graph(G_intralayer, layer_membership):
     :return: list of intralayer networks
     :rtype: list[igraph.Graph]
     """
-    warnings.warn("You are using Leiden multilayer modularity optimization. THIS CAN BE EXTREMELY SLOW! "
-                  "leidenalg's implementation is inefficient, especially when there are many layers.")
     # internally use hashable objects for memoization
     return _split_leiden_graph_layers_cached(n=G_intralayer.vcount(), G_es=tuple(G_intralayer.es),
                                              is_directed=G_intralayer.is_directed(),
@@ -108,7 +107,8 @@ def _split_leiden_graph_layers_cached(n, G_es, is_directed, layer_membership):
 def multilayer_leiden(G_intralayer, G_interlayer, layer_vec, gamma, omega, optimiser=None, return_partition=False):
     r"""Run the Leiden modularity maximization algorithm at a single (:math:`\gamma, \omega`) value.
-    WARNING: Optimization can be EXTREMELY slow! Leidenalg does not properly implement multilayer optimization.
+    WARNING: Optimization can be EXTREMELY slow for large numbers of layers! Leidenalg does not properly implement
+    multilayer optimization.
     :param G_intralayer: intralayer graph of interest
     :type G_intralayer: igraph.Graph
@@ -150,6 +150,11 @@ def multilayer_leiden(G_intralayer, G_interlayer, layer_vec, gamma, omega, optim
         return tuple(intralayer_parts[0].membership)
+def _wrapped_multilayer_leiden(args):
+    """Wrapped multilayer_leiden() for use in multiprocessing.Pool.imap_unordered."""
+    return multilayer_leiden(*args)
 def multilayer_leiden_part(G_intralayer, G_interlayer, layer_membership):
     if 'weight' not in G_intralayer.es:
         G_intralayer.es['weight'] = [1.0] * G_intralayer.ecount()
@@ -178,51 +183,29 @@ def repeated_leiden_from_gammas(G, gammas):
     return {sorted_tuple(singlelayer_leiden(G, gamma)) for gamma in gammas}
-def repeated_parallel_leiden_from_gammas(G, gammas, show_progress=True, chunk_dispatch=True):
+def repeated_parallel_leiden_from_gammas(G, gammas, show_progress=True):
     r"""Runs the Leiden modularity maximization algorithm at each provided :math:`\gamma` value, using all CPU cores.
     :param G: graph of interest
     :type G: igraph.Graph
     :param gammas: list of gammas (resolution parameters) to run Leiden at
     :type gammas: list[float]
-    :param show_progress: if True, render a progress bar. This will only work if ``chunk_dispatch`` is also True
+    :param show_progress: if True, render a progress bar
     :type show_progress: bool
-    :param chunk_dispatch: if True, dispatch parallel work in chunks. Setting this to False may increase performance,
-                           but can lead to out-of-memory issues
-    :type chunk_dispatch: bool
     :return: a set of all unique partitions returned by the Leiden algorithm
     :rtype: set of tuple[int]
     """
-    pool = Pool(processes=cpu_count())
     total = set()
-    chunk_size = len(gammas) // 99
-    if chunk_size > 0 and chunk_dispatch:
-        chunk_params = ([(G, g) for g in gammas[i:i + chunk_size]] for i in range(0, len(gammas), chunk_size))
-    else:
-        chunk_params = [[(G, g) for g in gammas]]
-        chunk_size = len(gammas)
-    if show_progress:
-        progress = Progress(ceil(len(gammas) / chunk_size))
-    for chunk in chunk_params:
-        for partition in pool.starmap(singlelayer_leiden, chunk):
-            total.add(sorted_tuple(partition))
+    pool_chunk_size = max(1, len(gammas) // (cpu_count() * 100))
+    with Pool(processes=cpu_count()) as pool:
+        pool_iterator = pool.imap_unordered(_wrapped_singlelayer_leiden, [(G, g) for g in gammas],
+                                            chunksize=pool_chunk_size)
         if show_progress:
-            progress.increment()
-        if psutil.virtual_memory().available < LOW_MEMORY_THRESHOLD:
-            # Reinitialize pool to get around an apparent memory leak in multiprocessing
-            pool.close()
-            pool = Pool(processes=cpu_count())
+            pool_iterator = tqdm(pool_iterator, total=len(gammas))
-    if show_progress:
-        progress.done()
+        for partition in pool_iterator:
+            total.add(sorted_tuple(partition))
-    pool.close()
     return total
@@ -232,10 +215,13 @@ def repeated_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_vec, ga
 def repeated_parallel_leiden_from_gammas_omegas(G_intralayer, G_interlayer, layer_vec, gammas, omegas,
-                                                show_progress=True, chunk_dispatch=True):
+                                                show_progress=True):
     """
     Runs leidenalg at each gamma and omega in ``gammas`` and ``omegas``, using all CPU cores available.
+    WARNING: Optimization can be EXTREMELY slow for large numbers of layers! Leidenalg does not properly implement
+    multilayer optimization.
     :param G_intralayer: intralayer graph of interest
     :type G_intralayer: igraph.Graph
     :param G_interlayer: interlayer graph of interest
@@ -248,44 +234,23 @@ def repeated_parallel_leiden_from_gammas_omegas(G_intralayer, G_interlayer, laye
     :type omegas: list[float]
     :param show_progress: if True, render a progress bar
     :type show_progress: bool
-    :param chunk_dispatch: if True, dispatch parallel work in chunks. Setting this to False may increase performance,
-                           but can lead to out-of-memory issues
-    :type chunk_dispatch: bool
     :return: a set of all unique partitions encountered
     :rtype: set of tuple[int]
     """
     resolution_parameter_points = [(gamma, omega) for gamma in gammas for omega in omegas]
-    pool = Pool(processes=cpu_count())
     total = set()
-    chunk_size = len(resolution_parameter_points) // 99
-    if chunk_size > 0 and chunk_dispatch:
-        chunk_params = ([(G_intralayer, G_interlayer, layer_vec, gamma, omega)
-                         for gamma, omega in resolution_parameter_points[i:i + chunk_size]]
-                        for i in range(0, len(resolution_parameter_points), chunk_size))
-    else:
-        chunk_params = [[(G_intralayer, G_interlayer, layer_vec, gamma, omega)
-                         for gamma, omega in resolution_parameter_points]]
-        chunk_size = len(gammas)
-    if show_progress:
-        progress = Progress(ceil(len(resolution_parameter_points) / chunk_size))
-    for chunk in chunk_params:
-        for partition in pool.starmap(multilayer_leiden, chunk):
-            total.add(sorted_tuple(partition))
+    pool_chunk_size = max(1, len(resolution_parameter_points) // (cpu_count() * 100))
+    with Pool(processes=cpu_count()) as pool:
+        pool_iterator = pool.imap_unordered(
+            _wrapped_multilayer_leiden,
+            [(G_intralayer, G_interlayer, layer_vec, gamma, omega) for gamma, omega in resolution_parameter_points],
+            chunksize=pool_chunk_size
+        )
         if show_progress:
-            progress.increment()
-        if psutil.virtual_memory().available < LOW_MEMORY_THRESHOLD:
-            # Reinitialize pool to get around an apparent memory leak in multiprocessing
-            pool.close()
-            pool = Pool(processes=cpu_count())
+            pool_iterator = tqdm(pool_iterator, total=len(resolution_parameter_points))
-    if show_progress:
-        progress.done()
+        for partition in pool_iterator:
+            total.add(sorted_tuple(partition))
-    pool.close()
     return total

modularitypruning/parameter_estimation_utilities.py CHANGED Viewed

@@ -534,12 +534,25 @@ def prune_to_multilayer_stable_partitions(G_intralayer, G_interlayer, layer_vec,
     parameter estimates are within the provided ``gamma_start``, ``gamma_end``, ``omega_start``, and ``omega_end``
     bounds.
+    There are three network layer topology models available, all from Pamfil et al.
+    * **"temporal"**: Interlayer edges always connect copies of a node from one layer to the next, often representing
+      interactions that change over time.
+    * **"multilevel"**: Interlayer edges connect a hierarchy of monolayer networks from one layer to the next. This is
+      more general than temporal networks, as nodes can connect arbitrarily to nodes in the next layer. These often
+      represent inclusion relationships, such as cities to counties, counties to states, and states to countries.
+    * **"multiplex"**: Each layer represents a type of interaction, making the entire multilayer network akin to an
+      edge-colored multigraph (each type of edge has its own layer). This model is unique in that there is no natural
+      ordering of layers, and the resulting theory requires some analytical simplifications, making the resulting
+      parameter estimation the least robust of the three models.
     See https://doi.org/10.1038/s41598-022-20142-6 for more details.
-    NOTE: This method truncates omega estimates to ``omega_end - 1e-3`` in order to properly identify stable partitions
-    with infinite interlayer coupling estimates (e.g. when all membership labels persist across layers). If
-    ``omega_end`` is set too low, such partitions may be incorrectly identified as stable. As such, you should be
-    somewhat wary of the returned partitions with zero community structure differences across layers.
+    NOTE: This method will truncate omega estimates to ``omega_end - 1e-3`` (and raise a warning) if needed to properly
+    identify stable partitions with very large or infinite interlayer coupling estimates (e.g., when all membership
+    labels persist across layers). If ``omega_end`` is set too low, these partitions may be incorrectly identified as
+    stable. Conversely, some partitions with large omega estimates might be misclassified as not stable. Therefore, be
+    cautious of returned partitions with little or no community structure differences across layers.
     :param G_intralayer: intralayer graph of interest
     :type G_intralayer: igraph.Graph
@@ -599,6 +612,11 @@ def prune_to_multilayer_stable_partitions(G_intralayer, G_interlayer, layer_vec,
                        omega_start, omega_end)
     domains_with_estimates = domains_to_gamma_omega_estimates(G_intralayer, G_interlayer, layer_vec, domains, model)
+    if any(o_est >= omega_end for _, _, g_est, o_est in domains_with_estimates if g_est is not None):
+        warnings.warn(f"We are truncating some omega estimates to your choice of omega_end={omega_end}. You should "
+                      f"check that this accurately captures the high-omega behavior of the partition domains. "
+                      f"Be cautious of partitions with little or no community structure differences across layers!")
     # Truncate infinite omega solutions to our maximum omega
     domains_with_estimates = [(polyverts, membership, g_est, min(o_est, omega_end - 1e-3))
                               for polyverts, membership, g_est, o_est in domains_with_estimates

modularitypruning/plotting.py CHANGED Viewed

@@ -69,7 +69,7 @@ def plot_estimates(gamma_estimates):
                 #           length_includes_head=True, alpha=0.5, zorder=2, **{"overhang": 0.5})
-def plot_2d_domains(domains, xlim, ylim, flip_axes=False, use_current_axes=False):
+def plot_2d_domains(domains, xlim, ylim, flip_axes=True, use_current_axes=False):
     """Plot partition dominance ranges in the (gamma, omega) plane, using the domains from CHAMP_3D.
     Limits output to xlim and ylim dimensions. Note that the plotting here has x=gamma and y=omega.
@@ -91,7 +91,7 @@ def plot_2d_domains(domains, xlim, ylim, flip_axes=False, use_current_axes=False
         patches.append(polygon)
     cnorm = matplotlib.colors.Normalize(vmin=0, vmax=len(domains))
-    cmap = matplotlib.cm.get_cmap("Set1")
+    cmap = plt.get_cmap("Set1")
     available_colors = {cmap(cnorm(i)) for i in range(len(domains))}
     if len(available_colors) == len(domains):
@@ -207,7 +207,7 @@ def plot_2d_domains_with_num_communities(domains_with_estimates, xlim, ylim, fli
     plt.ylim(ylim)
-def plot_2d_domains_with_ami(domains_with_estimates, ground_truth, xlim, ylim, flip_axes=False):
+def plot_2d_domains_with_ami(domains_with_estimates, ground_truth, xlim, ylim, flip_axes=True):
     """Plot partition dominance ranges in the (gamma, omega) plane, using the domains from CHAMP_3D and coloring by the
     AMI between the partitions and ground truth.

{modularitypruning-1.3.6.dist-info → modularitypruning-1.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: modularitypruning
-Version: 1.3.6
+Version: 1.4.0
 Summary: Pruning tool to identify small subsets of network partitions that are significant from the perspective of stochastic block model inference.
 Home-page: https://github.com/ragibson/ModularityPruning
 Author: Ryan Gibson
@@ -26,6 +26,7 @@ Requires-Dist: igraph
 Requires-Dist: scikit-learn
 Requires-Dist: scipy >=1.7
 Requires-Dist: seaborn
+Requires-Dist: tqdm
 # ModularityPruning

modularitypruning-1.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+modularitypruning/__init__.py,sha256=U1iz51AVVzHw0aBZeJicxVg_L6TWq5pmv8Ep_bYyySU,238
+modularitypruning/champ_utilities.py,sha256=VveP8N9CvMODk3dPtVMRfNLji1pktaolA6iNoW6Fy-A,16348
+modularitypruning/leiden_utilities.py,sha256=sV3BkYONzVmKlpy-gUUUoL8XfKhwpytMRmCcijbsAiA,10395
+modularitypruning/louvain_utilities.py,sha256=Zt58Wl4hgu6-zejdl-N_NW04UC4rbYmSHgpfoGDC2Ws,8231
+modularitypruning/parameter_estimation.py,sha256=EPU5BDDauToPbAdG1lZc9p5Rl_oDqiC7bltfnjs5tg8,10479
+modularitypruning/parameter_estimation_utilities.py,sha256=OzgJT4jOo2ovoHzUbS0m40l2cqqOk9tBU9cF35Tmm_M,28150
+modularitypruning/partition_utilities.py,sha256=Fizqd0JuODL8W4BP2h8iV0WhZMK6HoKjH_QFNVDZkaI,956
+modularitypruning/plotting.py,sha256=3JqJOpfoq_Vj_6y8nqrYHhkSqDdI56iAb4pSAMcgEmI,10568
+modularitypruning/progress.py,sha256=XxkEVx8L9BoFnWtvUPg-kWtxUmE1RHqs5p5HPiTExUQ,971
+modularitypruning-1.4.0.dist-info/LICENSE,sha256=eWz3HIQQxg7p1iSpUOUDKdDhGcuMPuVDDlcXf9F12D8,1068
+modularitypruning-1.4.0.dist-info/METADATA,sha256=ZtLw4-g6NiP2r4G3Wcfk_ND_5wTjrNCn8etyjOkx5QM,3420
+modularitypruning-1.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+modularitypruning-1.4.0.dist-info/top_level.txt,sha256=ZPOx3a-ek0Ge0ZMq-uvbySSaAL9MZ-t23-JkuHZXo9E,18
+modularitypruning-1.4.0.dist-info/RECORD,,

{modularitypruning-1.3.6.dist-info → modularitypruning-1.4.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.41.3)
+Generator: bdist_wheel (0.43.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

modularitypruning-1.3.6.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-modularitypruning/__init__.py,sha256=U1iz51AVVzHw0aBZeJicxVg_L6TWq5pmv8Ep_bYyySU,238
-modularitypruning/champ_utilities.py,sha256=VveP8N9CvMODk3dPtVMRfNLji1pktaolA6iNoW6Fy-A,16348
-modularitypruning/leiden_utilities.py,sha256=er_Sjp5b6fUacO-Bfygbk6CiX6J8uj-gKFw-HZWffNA,11820
-modularitypruning/louvain_utilities.py,sha256=Zt58Wl4hgu6-zejdl-N_NW04UC4rbYmSHgpfoGDC2Ws,8231
-modularitypruning/parameter_estimation.py,sha256=EPU5BDDauToPbAdG1lZc9p5Rl_oDqiC7bltfnjs5tg8,10479
-modularitypruning/parameter_estimation_utilities.py,sha256=dRml1ZIB3ctVC7lk4QQRQwwLBldhG92rNIT6kHs0yfQ,26563
-modularitypruning/partition_utilities.py,sha256=Fizqd0JuODL8W4BP2h8iV0WhZMK6HoKjH_QFNVDZkaI,956
-modularitypruning/plotting.py,sha256=Bxu62ueWZRKKoeNVVHy0aXvocKAUgKuj0dZZsnLmL78,10580
-modularitypruning/progress.py,sha256=XxkEVx8L9BoFnWtvUPg-kWtxUmE1RHqs5p5HPiTExUQ,971
-modularitypruning-1.3.6.dist-info/LICENSE,sha256=eWz3HIQQxg7p1iSpUOUDKdDhGcuMPuVDDlcXf9F12D8,1068
-modularitypruning-1.3.6.dist-info/METADATA,sha256=mcEUa7NhV6DfDHPHGHY6J7jv99McahL_R0Py_-jU0fQ,3400
-modularitypruning-1.3.6.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
-modularitypruning-1.3.6.dist-info/top_level.txt,sha256=ZPOx3a-ek0Ge0ZMq-uvbySSaAL9MZ-t23-JkuHZXo9E,18
-modularitypruning-1.3.6.dist-info/RECORD,,

{modularitypruning-1.3.6.dist-info → modularitypruning-1.4.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{modularitypruning-1.3.6.dist-info → modularitypruning-1.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

modularitypruning 1.3.6__py3-none-any.whl → 1.4.0__py3-none-any.whl

modularitypruning 1.3.6py3-none-any.whl → 1.4.0py3-none-any.whl