PyPI - muxpack - Versions diffs - 0.1.0__tar.gz → 0.2.0.dev1__tar.gz - Mend

muxpack 0.1.0tar.gz → 0.2.0.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/PKG-INFO RENAMED Viewed

@@ -1,17 +1,20 @@
 Metadata-Version: 2.3
 Name: muxpack
-Version: 0.1.0
+Version: 0.2.0.dev1
 Summary: Tools to handle multiplex network data more easily
 Author: Edwin de Jonge, Jan van der Laan
 Author-email: Edwin de Jonge <edwindjonge@gmail.com>, Jan van der Laan <djvanderlaan@gmail.com>
-Requires-Dist: duckdb>=1.4.4
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
 Requires-Dist: ibis-framework[duckdb]>=12.0.0
 Requires-Dist: networkx>=3.6.1
-Requires-Dist: pandas>=3.0.1
-Requires-Dist: pyarrow>=23.0.1
-Requires-Dist: pyarrow-hotfix>=0.7
 Requires-Dist: scipy>=1.17.1
-Requires-Python: >=3.13
+Requires-Python: >=3.11
 Project-URL: Homepage, https://codeberg.org/CBS-Networktools/muxpack.py
 Project-URL: Documentation, https://readthedocs.org
 Project-URL: Repository, https://codeberg.org/CBS-Networktools/muxpack.py
@@ -22,6 +25,8 @@ Description-Content-Type: text/markdown
 ## Muxpack
+[![Python package](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml/badge.svg)](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml)
 Muxpack is a Python implementation for working with multiplex network files.
 ## Documentation
@@ -33,4 +38,4 @@ uv sync --group docs
 uv run sphinx-build -b html docs docs/_build/html
 ```
-The generated HTML is available in `docs/_build/html/index.html`.
+The generated HTML is available in `docs/_build/html/index.html`.

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/README.md RENAMED Viewed

@@ -2,6 +2,8 @@
 ## Muxpack
+[![Python package](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml/badge.svg)](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml)
 Muxpack is a Python implementation for working with multiplex network files.
 ## Documentation
@@ -13,4 +15,4 @@ uv sync --group docs
 uv run sphinx-build -b html docs docs/_build/html
 ```
-The generated HTML is available in `docs/_build/html/index.html`.
+The generated HTML is available in `docs/_build/html/index.html`.

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/pyproject.toml RENAMED Viewed

@@ -1,20 +1,25 @@
 [project]
 name = "muxpack"
-version = "0.1.0"
+version = "0.2.0dev1"
 description = "Tools to handle multiplex network data more easily"
 readme = "README.md"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+]
 authors = [
     { name = "Edwin de Jonge", email = "edwindjonge@gmail.com" },
     { name = "Jan van der Laan", email = "djvanderlaan@gmail.com" }
 ]
-requires-python = ">=3.13"
+requires-python = ">=3.11"
 dependencies = [
-    "duckdb>=1.4.4",
     "ibis-framework[duckdb]>=12.0.0",
     "networkx>=3.6.1",
-    "pandas>=3.0.1",
-    "pyarrow>=23.0.1",
-    "pyarrow-hotfix>=0.7",
     "scipy>=1.17.1",
 ]
@@ -28,7 +33,7 @@ Repository = "https://codeberg.org/CBS-Networktools/muxpack.py"
 muxpack = "muxpack:main"
 [build-system]
-requires = ["uv_build>=0.10.6,<0.11.0"]
+requires = ["uv_build>=0.10.6,<0.12.0"]
 build-backend = "uv_build"
 [dependency-groups]
@@ -37,7 +42,7 @@ dev = [
     "ruff>=0.15.4",
 ]
 docs = [
-    "sphinx>=9.1.0",
+    "sphinx>=7.4,<9.0",
     "sphinx-rtd-theme>=3.0.0",
 ]

muxpack-0.2.0.dev1/src/muxpack/__init__.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""Public package API for working with multiplex network data.
+This module re-exports the main classes and helper functions so users can
+import common functionality directly from :mod:`muxpack`.
+"""
+from importlib.metadata import PackageNotFoundError, version
+import argparse
+from .check import check_edges, check_vertices
+from .io import read_multiplexseries, save_multiplexseries
+from .multiplexseries import MultiplexSeries
+from .multiplex import Multiplex
+from .to_csr_matrix import to_csr_matrix
+from .bipartite import Bipartite
+try:
+    __version__ = version("muxpack")
+except PackageNotFoundError:
+    __version__ = "0+unknown"
+def main(argv: list[str] | None = None) -> int:
+    """Minimal CLI entrypoint for package metadata and help output."""
+    parser = argparse.ArgumentParser(
+        prog="muxpack", description="Tools to handle multiplex network data."
+    )
+    parser.add_argument(
+        "--version", action="version", version=f"%(prog)s {__version__}"
+    )
+    parser.parse_args(argv)
+    parser.print_help()
+    return 0
+__all__ = [
+    "check_edges",
+    "check_vertices",
+    "read_multiplexseries",
+    "Multiplex",
+    "MultiplexSeries",
+    "save_multiplexseries",
+    "to_csr_matrix",
+    "Bipartite",
+    "main",
+]

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/check.py RENAMED Viewed

@@ -1,3 +1,9 @@
+"""Validation helpers for edge and vertex ibis tables.
+The functions in this module are used by :class:`muxpack.Multiplex` and
+:class:`muxpack.MultiplexSeries` to validate required columns and value types.
+"""
 from ibis.expr.types import Table
 from ibis import dtype
@@ -30,9 +36,11 @@ def check_edges(edges: Table, check_period=True) -> bool:
     if not check_period:
         expect_types.pop("period", None)
-    if check_column_type(edges, expect_types):
-        return True
+    opt_types = {"weight": "numeric"}
+    if check_column_type(edges, expect_types, optional=False):
+        if check_column_type(edges, opt_types, optional=True):
+            return True
     return False
@@ -58,13 +66,15 @@ def check_vertices(vertices: Table, check_period=True) -> bool:
     if check_period:
         expect_types["period"] = "integer"
-    if not check_column_type(vertices, expect_types):
+    if not check_column_type(vertices, expect_types, optional=False):
         return False
     return True
-def check_column_type(t: Table, expected_types: dict[str, str]) -> bool:
+def check_column_type(
+    t: Table, expected_types: dict[str, str], optional: bool = False
+) -> bool:
     """
     Check that the columns in a table have the expected types.
@@ -72,15 +82,21 @@ def check_column_type(t: Table, expected_types: dict[str, str]) -> bool:
         - t: the table to check.
         - expected_types: dictionary mapping column names to expected type strings
           (e.g., ``"integer"``, ``"string"``).
+        - optional: accept that the column does not exist.
     Returns:
         - ``True`` if all specified columns exist and have the expected types, ``False`` otherwise.
     """
     for column, expected_type in expected_types.items():
+        if column not in t.columns:
+            if optional is True:
+                logger.info(f"Optional column '{column}' is missing.")
+                continue
+            else:
+                logger.warning(f"Column '{column}' is missing.")
+                return False
         col = t[column]
-        if col is None:
-            logger.warning(f"Column '{column}' is missing.")
-            return False
         coltype = col.type()
         if expected_type == "integer" and coltype.is_integer():
             continue

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/io.py RENAMED Viewed

@@ -1,3 +1,9 @@
+"""Input and output helpers for the muxpack on-disk layout.
+This module provides low-level read/write functions used by high-level classes
+such as :class:`muxpack.Multiplex` and :class:`muxpack.MultiplexSeries`.
+"""
 import ibis
 from muxpack.bipartite import Bipartite
@@ -6,13 +12,14 @@ from pathlib import Path
 import os
 import logging
 from typing import Tuple
+from ibis import _
 logger = logging.getLogger(__name__)
-def load_network(dir: Path) -> MultiplexSeries:
+def read_multiplexseries(dir: Path) -> MultiplexSeries:
     """
-    Load a multiplex network from a directory containing Parquet files.
+    Load a multiplex series from a directory containing Parquet files.
     The expected directory structure is::
@@ -42,8 +49,18 @@ def load_network(dir: Path) -> MultiplexSeries:
         logger.info(f"No vertices found: {e}")
         vertices = None
+    relationtypes = None
+    relationtypes_file = Path(dir) / "relationtypes.parquet"
+    legacy_relationtypes_file = Path(dir) / "relationtypes.csv"
     try:
-        relationtypes = ibis.read_parquet(f"{dir}/*/relationtypes.csv")
+        if relationtypes_file.exists():
+            relationtypes = con.read_parquet(
+                str(relationtypes_file), table_name="relationtypes"
+            )
+        elif legacy_relationtypes_file.exists():
+            relationtypes = con.read_csv(
+                str(legacy_relationtypes_file), table_name="relationtypes"
+            )
     except Exception as e:
         logger.info(f"No relationtypes found: {e}")
         relationtypes = None
@@ -52,74 +69,74 @@ def load_network(dir: Path) -> MultiplexSeries:
     return m
-def save_network(
-    edges: ibis.Table,
-    vertices: ibis.Table,
-    dir: Path | str,
-    existing_data_behavior="delete_matching",
-    **kwargs,
-) -> Tuple[ibis.Table, ibis.Table]:
-    """
-    Save edges and vertices to disk following the muxpack directory structure.
-    The directory and all sub-directories are created if they do not exist.
-    Edges and vertices are not validated for consistency.
-    Args:
-        - edges: edge table to save.
-        - vertices: vertex table to save.
-        - dir: root path where the network will be saved.
-        - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
-        - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
-    Returns:
-        - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
-    """
-    E = edges
-    V = vertices
-    dir = Path(dir)
-    logger.info(f"Saving network to {dir}...")
-    # We do a manual partitioning to have maximum control.
-    # alternative and potentially more efficient would be partitioning using
-    # duckdb, however, that would pose some problems:
-    # - Hive naming convention does not follow the muxpack specification
-    # - Hive partitioning removes columns that are partitioned.
-    periods = E[["period"]].distinct().to_pandas().period
-    for period in periods:
-        period_dir = dir / f"{period}"
-        os.makedirs(period_dir, exist_ok=True)
-        # writing vertices
-        vertices_file = period_dir / "vertices.parquet"
-        V_period = V.filter(V.period == period)
-        V_period.to_parquet(vertices_file)
-        # writing edges
-        edges_dir = period_dir / "edges"
-        os.makedirs(edges_dir, exist_ok=True)
-        E_period = E.filter(E.period == period)
-        layers = E_period[["layer"]].distinct().to_pandas().layer
-        logger.info(f"layers: {layers}")
-        for layer in layers:
-            layer_dir = edges_dir / f"{layer}"
-            # TODO further partition?
-            os.makedirs(layer_dir, exist_ok=True)
-            E_period_layer = E_period.filter(E_period.layer == layer).order_by(
-                ["src", "relationtype", "dst"]
-            )
-            E_period_layer.to_parquet_dir(
-                layer_dir, existing_data_behavior=existing_data_behavior, **kwargs
-            )
-            logger.info(f"\t\tSaved layer {layer}")
-        logger.info(f"\tFinished saving period {period}")
-    logger.info(f"Finished saving network to {dir}.")
-    con = ibis.duckdb.connect()
-    edges = con.read_parquet(f"{dir}/*/edges/**/*.parquet", table_name="edges")
-    vertices = con.read_parquet(f"{dir}/*/vertices.parquet", table_name="vertices")
-    return edges, vertices
+# def save_multiplexseries(
+#     edges: ibis.Table,
+#     vertices: ibis.Table,
+#     dir: Path | str,
+#     existing_data_behavior="delete_matching",
+#     **kwargs,
+# ) -> Tuple[ibis.Table, ibis.Table]:
+#     """
+#     Save edges and vertices to disk following the muxpack directory structure.
+#     The directory and all sub-directories are created if they do not exist.
+#     Edges and vertices are not validated for consistency.
+#     Args:
+#         - edges: edge table to save.
+#         - vertices: vertex table to save.
+#         - dir: root path where the network will be saved.
+#         - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
+#         - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
+#     Returns:
+#         - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
+#     """
+#     E = edges
+#     V = vertices
+#     dir = Path(dir)
+#     logger.info(f"Saving network to {dir}...")
+#     # We do a manual partitioning to have maximum control.
+#     # alternative and potentially more efficient would be partitioning using
+#     # duckdb, however, that would pose some problems:
+#     # - Hive naming convention does not follow the muxpack specification
+#     # - Hive partitioning removes columns that are partitioned.
+#     periods = E[["period"]].distinct().period.to_list()
+#     for period in periods:
+#         period_dir = dir / f"{period}"
+#         os.makedirs(period_dir, exist_ok=True)
+#         # writing vertices
+#         vertices_file = period_dir / "vertices.parquet"
+#         V_period = V.filter(V.period == period)
+#         V_period.to_parquet(vertices_file)
+#         # writing edges
+#         edges_dir = period_dir / "edges"
+#         os.makedirs(edges_dir, exist_ok=True)
+#         E_period = E.filter(E.period == period)
+#         layers = E_period[["layer"]].distinct().layer.to_list()
+#         logger.info(f"layers: {layers}")
+#         for layer in layers:
+#             layer_dir = edges_dir / f"{layer}"
+#             # TODO further partition?
+#             os.makedirs(layer_dir, exist_ok=True)
+#             E_period_layer = E_period.filter(E_period.layer == layer).order_by(
+#                 ["src", "relationtype", "dst"]
+#             )
+#             E_period_layer.to_parquet_dir(
+#                 layer_dir, existing_data_behavior=existing_data_behavior, **kwargs
+#             )
+#             logger.info(f"\t\tSaved layer {layer}")
+#         logger.info(f"\tFinished saving period {period}")
+#     logger.info(f"Finished saving network to {dir}.")
+#     con = ibis.duckdb.connect()
+#     edges = con.read_parquet(f"{dir}/*/edges/**/*.parquet", table_name="edges")
+#     vertices = con.read_parquet(f"{dir}/*/vertices.parquet", table_name="vertices")
+#     return edges, vertices
 def save_multiplex(
@@ -135,13 +152,21 @@ def save_multiplex(
     The directory and all sub-directories are created if they do not exist.
     Edges and vertices are not validated for consistency.
-    Args:
-        - edges: edge table to save.
-        - vertices: vertex table to save.
-        - period: the period for this multiplex, or ``None`` to skip period filtering.
-        - dir: root path where the multiplex will be saved.
-        - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
-        - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
+    Parameters
+    ----------
+    edges
+        Edge table to save.
+    vertices
+        Vertex table to save.
+    dir
+        Root path where the multiplex will be saved.
+    period
+        Period for this multiplex. If ``None``, all rows in ``edges`` are written.
+    existing_data_behavior
+        Passed through to ``pyarrow.dataset.write_dataset``.
+    kwargs
+        Additional keyword arguments forwarded to
+        ``pyarrow.dataset.write_dataset``.
     Returns:
         - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
@@ -163,21 +188,23 @@ def save_multiplex(
     vertices_file = dir / "vertices.parquet"
     if period is not None:
         # test if period column is there, if not add it to
-        V = V.filter(V.period == period)
+        V = V.filter(_.period == period)
     V.to_parquet(vertices_file)
     # writing edges
     edges_dir = dir / "edges"
     os.makedirs(edges_dir, exist_ok=True)
-    E_period = E.filter(E.period == period)
-    layers = E_period[["layer"]].distinct().to_pandas().layer
+    E_period = E
+    if period is not None:
+        E_period = E.filter(_.period == period)
+    layers = E_period[["layer"]].distinct().layer.to_list()
     logger.info(f"layers: {layers}")
     for layer in layers:
         layer_dir = edges_dir / f"{layer}"
         # TODO further partition?
         os.makedirs(layer_dir, exist_ok=True)
-        E_period_layer = E_period.filter(E_period.layer == layer).order_by(
+        E_period_layer = E_period.filter(_.layer == layer).order_by(
             ["src", "relationtype", "dst"]
         )
         E_period_layer.to_parquet_dir(
@@ -193,24 +220,54 @@ def save_multiplex(
 def save_multiplexseries(
-    edges: ibis.Table, vertices: ibis.Table, dir: Path | str
-) -> None:
+    edges: ibis.Table,
+    vertices: ibis.Table,
+    dir: Path | str,
+    relationtypes: ibis.Table | None = None,
+    existing_data_behavior="delete_matching",
+    **kwargs,
+) -> Tuple[ibis.Table, ibis.Table]:
     """
-    Save a multiplex series to disk by writing each period as a separate sub-directory.
+    Save edges and vertices to disk following the muxpack directory structure.
+    The directory and all sub-directories are created if they do not exist.
+    Edges and vertices are not validated for consistency.
     Args:
-        - edges: edge table with a ``period`` column.
-        - vertices: vertex table with a ``period`` column.
-        - dir: root path where the multiplex series will be saved.
+        - edges: edge table to save.
+        - vertices: vertex table to save.
+        - relationtypes: optional relationtype metadata table to save at root level.
+        - dir: root path where the network will be saved.
+        - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
+        - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
+    Returns:
+        - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
     """
     dir = Path(dir)
-    periods = (
-        edges.select("period").distinct().to_pyarrow().column("period").to_pylist()
+    os.makedirs(dir, exist_ok=True)
+    periods: list[str] = (
+        edges.select("period").distinct().order_by("period").period.to_list()
     )
     for period in periods:
         E = edges.filter(edges.period == period)
         V = vertices.filter(vertices.period == period)
-        save_multiplex(edges=E, vertices=V, dir=dir / period)
+        speriod = f"{period}"
+        save_multiplex(
+            edges=E,
+            vertices=V,
+            dir=dir / speriod,
+            period=period,
+            existing_data_behavior=existing_data_behavior,
+            **kwargs,
+        )
+    if relationtypes is not None:
+        relationtypes.to_parquet(dir / "relationtypes.parquet")
+    mp = read_multiplexseries(dir)
+    return mp.edges, mp.vertices
 def save_bipartite(
@@ -259,13 +316,13 @@ def read_bipartite(dir: Path | str) -> Bipartite:
     role_src = metadata["role_src"]
     role_dst = metadata["role_dst"]
     relationtype = metadata["relationtype"]
-    return BiPartite(
+    return Bipartite(
         edges=edges, role_src=role_src, role_dst=role_dst, relationtype=relationtype
     )
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
-    m = load_network("data")
+    m = read_multiplexseries("data")
-    save_network(edges=m.edges, vertices=m.vertices, dir="data2")
+    save_multiplexseries(edges=m.edges, vertices=m.vertices, dir="data2")

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/multiplex.py RENAMED Viewed

@@ -1,3 +1,9 @@
+"""Single-period multiplex graph representation.
+This module defines :class:`Multiplex`, a validated container around ibis edge
+and vertex tables with helpers for conversions and degree summaries.
+"""
 import ibis
 from .check import check_edges, check_vertices
@@ -19,7 +25,7 @@ class Multiplex:
     For multiple periods, use MultiplexSeries.
     """
-    #: The edges of the multiplex. This is a table with columns "src", "dst", "layer" and "relationtype".
+    #: The edges of the multiplex. This is a table with columns "src", "dst", "layer","relationtype" and optionally weight.
     edges: ibis.Table
     #: The vertices of the multiplex. This is a table with a column "id" and optional additional columns.
@@ -60,7 +66,7 @@ class Multiplex:
         Returns:
             - List of layer names.
         """
-        layers = self.edges[["layer"]].distinct().to_pandas().layer.tolist()
+        layers = self.edges[["layer"]].distinct().layer.to_list()
         return layers
     def update_vertices(self) -> None:
@@ -75,30 +81,62 @@ class Multiplex:
         V = src.union(dst, distinct=True).to_pyarrow()
         self.vertices = ibis.memtable(V)
-    def to_csr_matrix(self) -> csr_matrix[bool]:
+    def to_csr_matrix(
+        self, use_weight: bool | str | ibis.Value = False
+    ) -> csr_matrix:
         """
         Transform the multiplex into a sparse matrix, collapsing all layers into one.
         To keep layers separate, use ``to_csr_matrices`` instead.
+        Args:
+            - use_weight: optional column in the edges table to use as weights for the adjacency matrix. If False, the adjacency matrix will be unweighted (boolean).
+            if True, the method will look for a column named "weight" in the edges table. If a string is provided, it will be used as the name of the weight column.
+              If not provided, the adjacency matrix will be unweighted (boolean).
         Returns:
             - Sparse boolean matrix of shape ``(n_vertices, n_vertices)``.
         """
         from .to_csr_matrix import to_row_col_idx, idx_to_csr_matrix
-        idx = to_row_col_idx(self.edges, self.vertices)
-        M = idx_to_csr_matrix(idx, self.vertices)
+        E = self.edges
+        V = self.vertices
+        if use_weight is True:
+            weight = "weight"
+        elif isinstance(use_weight, str):
+            E[["weight"]] = E[[use_weight]]
+        elif isinstance(use_weight, ibis.Value):
+            weight = "weight"
+            E = E.mutate(weight=weight)
+        else:
+            if ("weight" in E.columns) and (use_weight is False):
+                logger.warning(
+                    "Weight column 'weight' found in edges table, but use_weight is False. Ignoring weight column."
+                )
+                E = E.drop(["weight"], errors="ignore")
+        if (use_weight is not False) and (weight not in E.columns):
+            raise ValueError(f"Weight column '{weight}' not found in edges table")
+        idx = to_row_col_idx(E, V)
+        M = idx_to_csr_matrix(idx, V)
         return M
-    def to_csr_matrices(self) -> dict[str, csr_matrix]:
+    def to_csr_matrices(self, layers: list[str] | None = None) -> dict[str, csr_matrix]:
         """
         Transform the multiplex into a dictionary of sparse matrices, one per layer.
+        Args:
+            - layers: optional list of layer names to include. If None, all layers are included.
         Returns:
             - Dictionary mapping layer name to a sparse boolean matrix of shape ``(n_vertices, n_vertices)``.
         """
         from .to_csr_matrix import to_row_col_idx, idx_to_csr_matrix
-        layers = self.layers()
+        # Maybe turn this into a generator instead of a dict, to avoid loading all matrices into memory at once.
+        layers = self.layers() if layers is None else layers
         matrices = {}
         for layer in layers:
             idx = to_row_col_idx(
@@ -108,6 +146,26 @@ class Multiplex:
             matrices[layer] = M
         return matrices
+    def outdegree(self, by_layer: bool = False) -> ibis.Table:
+        """
+        Compute the out-degree of each vertex in the multiplex.
+        Args:
+            - by_layer: if True, compute the out-degree separately for each layer.
+        Returns:
+            - by_layer=False: Table with columns "id" and "out_degree", where "id" is the vertex id and "out_degree" is the total number of outgoing edges from that vertex across all layers.
+            - by_layer=True: Table with columns "id", "layer", and "out_degree", where "id" is the vertex id, "layer" is the layer name, and "out_degree" is the number of outgoing edges from that vertex in that layer.
+        """
+        E = self.edges
+        gb = ["src"]
+        if by_layer:
+            gb.append("layer")
+        outdegree = E.group_by(gb).aggregate(outdegree=E.count()).rename(id="src")
+        return outdegree
     def to_networkx(self) -> nx.MultiDiGraph:
         """
         Convert the multiplex to a NetworkX MultiDiGraph.
@@ -136,6 +194,12 @@ class Multiplex:
             self.update_vertices()
             vertices = self.vertices
         period = self.period
-        edges, vertices = io.save_multiplex(edges, vertices, period, dir=dir, **kw_args)
+        edges, vertices = io.save_multiplex(
+            edges=edges,
+            vertices=vertices,
+            dir=dir,
+            period=period,
+            **kw_args,
+        )
         self.edges = edges
         self.vertices = vertices

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/multiplexseries.py RENAMED Viewed

@@ -1,11 +1,22 @@
+"""Multi-period multiplex graph representation.
+This module defines :class:`MultiplexSeries`, which stores edges across periods
+and exposes filtering, per-period access, collapsing, and persistence helpers.
+"""
 import ibis
+from ibis import _
+from muxpack.networkx import to_MultiDiGraph
 from .check import check_edges, check_vertices
 from pathlib import Path
 from . import io
 from .multiplex import Multiplex
 import logging
-from typing import Tuple
+from typing import Generator, Tuple
+from scipy.sparse import csr_matrix
+import networkx as nx
 logger = logging.getLogger(__name__)
@@ -57,7 +68,7 @@ class MultiplexSeries:
         self.vertices = vertices
         self.relationtypes = relationtypes
-        if not vertices is None:
+        if vertices is not None:
             logger.info("Vertices table provided, using it as is.")
             self.vertex_ids = vertices[["id"]].distinct()
@@ -69,14 +80,9 @@ class MultiplexSeries:
             - Sorted list of period values.
         """
         periods = (
-            self.edges.select(self.edges.period)
-            .distinct()
-            .order_by("period")
-            .to_pyarrow()
-            .column("period")
-            .to_pylist()
+            self.edges.select("period").distinct().order_by("period").period.to_list()
         )
-        # periods = self.edges[["period"]].distinct().to_pandas().period.tolist()
+        # periods = self.edges[["period"]].distinct().to_pandas().period.to_list()
         return periods
     def layers(self) -> list[str]:
@@ -86,16 +92,46 @@ class MultiplexSeries:
         Returns:
             - Sorted list of layer names.
         """
-        layers = (
-            self.edges.select(self.edges.layer)
-            .distinct()
-            .order_by("layer")
-            .to_pyarrow()
-            .column("layer")
-            .to_pylist()
-        )
+        layers = self.edges.select("layer").distinct().order_by("layer").layer.to_list()
         return layers
+    def to_csr_matrices(
+        self, periods: list[int] | None = None
+    ) -> Generator[Tuple[csr_matrix, int]]:
+        """
+        Generate a sparse matrix for each period. The indices of the matrix correspond to
+        the rownumber the ``vertex_ids`` table.
+        Args:
+            - periods: list of periods to generate matrices for. If empty, all periods
+              present in ``edges`` are used.
+        """
+        from .to_csr_matrix import to_csr_matrix
+        if periods is None:
+            periods = self.periods()
+        for period in periods:
+            E_y = self.edges.filter(_.period == period)
+            yield to_csr_matrix(E_y, self.vertex_ids), period
+    def to_networkx(
+        self, periods: list[int] | None = None
+    ) -> Generator[Tuple[nx.MultiDiGraph, int]]:
+        """
+        Generate a NetworkX MultiDiGraph for each period.
+        Args:
+            - periods: list of periods to generate graphs for. If empty, all periods
+              present in ``edges`` are used.
+        """
+        if periods is None:
+            periods = self.periods()
+        for period in periods:
+            E_y = self.edges.filter(_.period == period)
+            yield to_MultiDiGraph(E_y, self.vertex_ids), period
     def update_vertices(self) -> None:
         """
         Update the vertices table by deriving it from the edges table.
@@ -163,8 +199,7 @@ class MultiplexSeries:
     def add_filter(
         self,
         periods: list[int] = None,
-        layers: list[str] = None,
-        relationtypes: list[int] = None,
+        layers: dict[str, list[int] | None] = None,
         src: list[int] = None,
         dst: list[int] = None,
     ) -> None:
@@ -179,8 +214,7 @@ class MultiplexSeries:
         Args:
             - periods: list of periods to keep.
-            - layers: list of layer names to keep.
-            - relationtypes: list of relationtype values to keep.
+            - layers: dict of {layer:[relationtype]} to keep. Use ``None`` for the list of relationtypes to keep all relationtypes for that layer.
             - src: list of source vertex ids (ego) to keep.
             - dst: list of destination vertex ids (non-ego) to keep.
         """
@@ -189,23 +223,36 @@ class MultiplexSeries:
         flt: list[ibis.BooleanValue] = []
         if periods is not None and len(periods) > 0:
-            flt.append(E.period.isin(periods))
+            flt.append(_.period.isin(periods))
         if layers is not None and len(layers) > 0:
-            flt.append(E.layer.isin(layers))
-        if relationtypes is not None and len(relationtypes) > 0:
-            flt.append(E.relationtype.isin(relationtypes))
+            rt = []
+            if not isinstance(layers, dict):
+                raise ValueError("layers must be a dict of {layer:[relationtype]|None}")
+            sl = self.layers()
+            for layer, relationtypes in layers.items():
+                if layer not in sl:
+                    raise ValueError(f"Layer '{layer}' not found in multiplex series")
+                e = _.layer == layer
+                if relationtypes is not None:
+                    e = ibis.and_(e, _.relationtype.isin(relationtypes))
+                rt.append(e)
+            if len(rt) > 1:
+                flt.append(ibis.or_(rt))
+            elif len(rt) == 1:
+                flt.append(e)
         if src is not None and len(src) > 0:
             vid = ibis.memtable({"id": src})
             # we use semi join because we expect the vertex list to be large
-            E = E.semi_join(vid, E.src == vid.id)
+            E = E.semi_join(vid, _.src == vid.id)
         if dst is not None and len(dst) > 0:
             vid = ibis.memtable({"id": dst})
             # we use semi join because we expect the vertex list to be large
-            E = E.semi_join(vid, E.dst == vid.id)
+            E = E.semi_join(vid, _.dst == vid.id)
         logger.debug("Filter: f{flt}")
         if len(flt):
@@ -213,6 +260,19 @@ class MultiplexSeries:
         self.edges = E
+    def __str__(self) -> str:
+        """
+        Return a string representation of the multiplex series.
+        Returns:
+            - String with number of edges, vertices, and periods.
+        """
+        n_edges = self.edges.count().execute()
+        n_vertices = self.vertex_ids.count().execute()
+        periods = self.periods()
+        layers = self.layers()
+        return f"MultiplexSeries\n  Edges: {n_edges}\n  Vertices: {n_vertices}\n  Periods: {periods}\n  Layers: {layers}"
     def __copy__(self) -> "MultiplexSeries":
         """
         Return a shallow copy of this MultiplexSeries.
@@ -220,9 +280,9 @@ class MultiplexSeries:
         Returns:
             - A new MultiplexSeries sharing the same ``edges`` and ``vertices`` tables.
         """
-        return MultiplexSeries(self.edges, self.vertices)
+        return MultiplexSeries(self.edges, self.vertices, self.relationtypes)
-    def collapse(self) -> Multiplex:
+    def collapse(self, period: int | None = None) -> Multiplex:
         """
         Collapse the multiplex series into a single Multiplex by discarding period
         information. Duplicate edges across periods are removed. This is useful
@@ -236,7 +296,7 @@ class MultiplexSeries:
             V = self.vertices.select("id").distinct()
         else:
             V = None
-        return Multiplex(edges=E, vertices=V, period=None)
+        return Multiplex(edges=E, vertices=V, period=period)
     def collapse_to(self, dir: Path | str) -> None:
         """
@@ -260,14 +320,22 @@ class MultiplexSeries:
         Args:
             - dir: path to the directory where the MultiplexSeries will be saved.
-            - **kw_args: additional keyword arguments forwarded to ``io.save_network``.
+            - **kw_args: additional keyword arguments forwarded to
+              ``io.save_multiplexseries``.
         """
         edges = self.edges
         vertices = self.vertices
+        relationtypes = self.relationtypes
         if vertices is None:
             mp = MultiplexSeries(edges=self.edges)
             mp.update_vertices()
             vertices = mp.vertices
-        E, V = io.save_network(edges, vertices, dir=dir, **kw_args)
+        E, V = io.save_multiplexseries(
+            edges=edges,
+            vertices=vertices,
+            relationtypes=relationtypes,
+            dir=dir,
+            **kw_args,
+        )
         self.edges = E
         self.vertices = V

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/networkx.py RENAMED Viewed

@@ -1,3 +1,5 @@
+"""Conversion helpers from muxpack data structures to NetworkX graphs."""
 import networkx as nx
 import ibis
 from .to_csr_matrix import to_csr_matrix

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/to_csr_matrix.py RENAMED Viewed

@@ -1,7 +1,8 @@
-from ibis import row_number, Table
+"""Sparse matrix conversion utilities for multiplex edge tables."""
+from ibis import row_number, Table, _
 import ibis
 from scipy.sparse import csr_matrix
-from muxpack.multiplex import Multiplex
 from typing import Tuple, Generator
 import logging
@@ -10,7 +11,7 @@ logger = logging.getLogger(__name__)
 # from collections.abc import Generator
-def to_row_col_idx(edges: Table, vertices: Table) -> Table:
+def to_row_col_idx(edges: Table, vertices: Table, use_weight: bool = False) -> Table:
     """
     Turn an edge list into a row/column index table based on the given vertices table.
@@ -28,22 +29,34 @@ def to_row_col_idx(edges: Table, vertices: Table) -> Table:
     row = v.select(src="id", row="idx")
     col = v.select(dst="id", col="idx")
-    # may sum the number of columns
-    idx_edges = (
-        edges[["src", "dst"]]
-        .distinct()
-        .inner_join(row, "src")
-        .inner_join(col, "dst")
-        .mutate(data=True)
-        .select("data", "row", "col")
-    )
-    logger.debug(
-        f"Created row-col index table with {idx_edges.count().execute()} edges."
-    )
+    if use_weight:
+        idx_edges = (
+            edges.aggregate(weight=_.weight.sum(), by=["src", "dst"])
+            .inner_join(row, "src")
+            .inner_join(col, "dst")
+            .mutate(data=True)
+            .select("data", "row", "col", "weight")
+        )
+        logger.debug("Created weighted row-col index tables.")
+    else:
+        # may sum the number of columns
+        idx_edges = (
+            edges[["src", "dst"]]
+            .distinct()
+            .inner_join(row, "src")
+            .inner_join(col, "dst")
+            .mutate(data=True)
+            .select("data", "row", "col")
+        )
+        logger.debug("Created row-col index table with edges.")
     return idx_edges
-def idx_to_csr_matrix(idx: Table, vertices: Table) -> csr_matrix:
+def idx_to_csr_matrix(
+    idx: Table, vertices: Table, use_weight: bool = False
+) -> csr_matrix:
     """
     Convert a row-column index table to a CSR sparse matrix.
@@ -65,50 +78,48 @@ def idx_to_csr_matrix(idx: Table, vertices: Table) -> csr_matrix:
     return M
-def to_csr_matrix(edges: Table, vertices: Table | None) -> csr_matrix:
+def to_csr_matrix(edges: Table, vertices: Table) -> csr_matrix:
     """
     Transform an edge list into a sparse matrix (csr_matrix).
     Args:
         - edges: table with ``src`` and ``dst`` columns.
         - vertices: table with an ``id`` column; edges are filtered to vertices present
-          in this table. Pass ``None`` to derive vertices from the edges table.
+          in this table.
     Returns:
         - Square CSR sparse matrix of shape ``(n_vertices, n_vertices)``.
     """
     # vertices may contain multiple periods
-    if vertices is not None:
-        vertices = vertices[["id"]].distinct()
+    vertices = vertices[["id"]].distinct()
     edges_row_col = to_row_col_idx(edges, vertices=vertices)
     M = idx_to_csr_matrix(edges_row_col, vertices=vertices)
     return M
 def to_period_csr_matrix(
-    edges: Table, vertices: Table | None, periods: list[int] = []
+    edges: Table, vertices: Table, periods: list[int] | None = None
 ) -> Generator[Tuple[csr_matrix, int]]:
     """
-    Generate a sparse matrix for each period.
+    Generate a sparse matrix for each period. The indices of the matrix correspond to
+    the rownumber the ``vertices`` table.
     Args:
         - edges: table with columns ``src``, ``dst``, and ``period``.
-        - vertices: table with columns ``id`` and ``period``, or ``None`` to derive
-          vertices from the edges table for each period.
+        - vertices: table with columns ``id`` to derive
+          vertices from the edges table
         - periods: list of periods to generate matrices for. If empty, all periods
           present in ``edges`` are used.
     Returns:
         - Generator of ``(csr_matrix, period)`` tuples, one per period.
     """
-    if len(periods) == 0:
-        periods = edges[["period"]].distinct().to_pandas().period.tolist()
+    if periods is None or len(periods) == 0:
+        periods = edges[["period"]].distinct().period.to_list()
     for period in periods:
-        E_y = edges.filter(edges.period == period)
-        if vertices is not None:
-            V_y = vertices.filter(vertices.period == period)
-        else:
-            V_y = None
+        E_y = edges.filter(_.period == period)
+        V_y = vertices
         yield to_csr_matrix(E_y, V_y), period
@@ -129,4 +140,4 @@ if __name__ == "__main__":
     print(f"M1 = {M1}")
     M = to_csr_matrix(E, V)
-    print(M)
+    print(M)

muxpack-0.1.0/src/muxpack/__init__.py DELETED Viewed

@@ -1,17 +0,0 @@
-from .check import check_edges, check_vertices
-from .io import load_network, save_network
-from .multiplexseries import MultiplexSeries
-from .multiplex import Multiplex
-from .to_csr_matrix import to_csr_matrix
-from .bipartite import Bipartite
-__all__ = [
-    "check_edges",
-    "check_vertices",
-    "load_network",
-    "Multiplex",
-    "MultiplexSeries",
-    "save_network",
-    "to_csr_matrix",
-    "Bipartite",
-]

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/bipartite.py RENAMED Viewed

File without changes

{muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/py.typed RENAMED Viewed

File without changes

muxpack 0.1.0__tar.gz → 0.2.0.dev1__tar.gz

muxpack 0.1.0tar.gz → 0.2.0.dev1tar.gz