muxpack 0.1.0__tar.gz → 0.2.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,20 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: muxpack
3
- Version: 0.1.0
3
+ Version: 0.2.0.dev1
4
4
  Summary: Tools to handle multiplex network data more easily
5
5
  Author: Edwin de Jonge, Jan van der Laan
6
6
  Author-email: Edwin de Jonge <edwindjonge@gmail.com>, Jan van der Laan <djvanderlaan@gmail.com>
7
- Requires-Dist: duckdb>=1.4.4
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.12
10
+ Classifier: Programming Language :: Python :: 3.13
11
+ Classifier: Programming Language :: Python :: 3.14
12
+ Classifier: Programming Language :: Python :: Implementation :: CPython
13
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
8
14
  Requires-Dist: ibis-framework[duckdb]>=12.0.0
9
15
  Requires-Dist: networkx>=3.6.1
10
- Requires-Dist: pandas>=3.0.1
11
- Requires-Dist: pyarrow>=23.0.1
12
- Requires-Dist: pyarrow-hotfix>=0.7
13
16
  Requires-Dist: scipy>=1.17.1
14
- Requires-Python: >=3.13
17
+ Requires-Python: >=3.11
15
18
  Project-URL: Homepage, https://codeberg.org/CBS-Networktools/muxpack.py
16
19
  Project-URL: Documentation, https://readthedocs.org
17
20
  Project-URL: Repository, https://codeberg.org/CBS-Networktools/muxpack.py
@@ -22,6 +25,8 @@ Description-Content-Type: text/markdown
22
25
 
23
26
  ## Muxpack
24
27
 
28
+ [![Python package](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml/badge.svg)](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml)
29
+
25
30
  Muxpack is a Python implementation for working with multiplex network files.
26
31
 
27
32
  ## Documentation
@@ -33,4 +38,4 @@ uv sync --group docs
33
38
  uv run sphinx-build -b html docs docs/_build/html
34
39
  ```
35
40
 
36
- The generated HTML is available in `docs/_build/html/index.html`.
41
+ The generated HTML is available in `docs/_build/html/index.html`.
@@ -2,6 +2,8 @@
2
2
 
3
3
  ## Muxpack
4
4
 
5
+ [![Python package](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml/badge.svg)](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml)
6
+
5
7
  Muxpack is a Python implementation for working with multiplex network files.
6
8
 
7
9
  ## Documentation
@@ -13,4 +15,4 @@ uv sync --group docs
13
15
  uv run sphinx-build -b html docs docs/_build/html
14
16
  ```
15
17
 
16
- The generated HTML is available in `docs/_build/html/index.html`.
18
+ The generated HTML is available in `docs/_build/html/index.html`.
@@ -1,20 +1,25 @@
1
1
  [project]
2
2
  name = "muxpack"
3
- version = "0.1.0"
3
+ version = "0.2.0dev1"
4
4
  description = "Tools to handle multiplex network data more easily"
5
5
  readme = "README.md"
6
+ classifiers = [
7
+ "Programming Language :: Python :: 3",
8
+ "Programming Language :: Python :: 3.11",
9
+ "Programming Language :: Python :: 3.12",
10
+ "Programming Language :: Python :: 3.13",
11
+ "Programming Language :: Python :: 3.14",
12
+ "Programming Language :: Python :: Implementation :: CPython",
13
+ "Programming Language :: Python :: Implementation :: PyPy",
14
+ ]
6
15
  authors = [
7
16
  { name = "Edwin de Jonge", email = "edwindjonge@gmail.com" },
8
17
  { name = "Jan van der Laan", email = "djvanderlaan@gmail.com" }
9
18
  ]
10
- requires-python = ">=3.13"
19
+ requires-python = ">=3.11"
11
20
  dependencies = [
12
- "duckdb>=1.4.4",
13
21
  "ibis-framework[duckdb]>=12.0.0",
14
22
  "networkx>=3.6.1",
15
- "pandas>=3.0.1",
16
- "pyarrow>=23.0.1",
17
- "pyarrow-hotfix>=0.7",
18
23
  "scipy>=1.17.1",
19
24
  ]
20
25
 
@@ -28,7 +33,7 @@ Repository = "https://codeberg.org/CBS-Networktools/muxpack.py"
28
33
  muxpack = "muxpack:main"
29
34
 
30
35
  [build-system]
31
- requires = ["uv_build>=0.10.6,<0.11.0"]
36
+ requires = ["uv_build>=0.10.6,<0.12.0"]
32
37
  build-backend = "uv_build"
33
38
 
34
39
  [dependency-groups]
@@ -37,7 +42,7 @@ dev = [
37
42
  "ruff>=0.15.4",
38
43
  ]
39
44
  docs = [
40
- "sphinx>=9.1.0",
45
+ "sphinx>=7.4,<9.0",
41
46
  "sphinx-rtd-theme>=3.0.0",
42
47
  ]
43
48
 
@@ -0,0 +1,45 @@
1
+ """Public package API for working with multiplex network data.
2
+
3
+ This module re-exports the main classes and helper functions so users can
4
+ import common functionality directly from :mod:`muxpack`.
5
+ """
6
+
7
+ from importlib.metadata import PackageNotFoundError, version
8
+ import argparse
9
+
10
+ from .check import check_edges, check_vertices
11
+ from .io import read_multiplexseries, save_multiplexseries
12
+ from .multiplexseries import MultiplexSeries
13
+ from .multiplex import Multiplex
14
+ from .to_csr_matrix import to_csr_matrix
15
+ from .bipartite import Bipartite
16
+
17
+ try:
18
+ __version__ = version("muxpack")
19
+ except PackageNotFoundError:
20
+ __version__ = "0+unknown"
21
+
22
+
23
+ def main(argv: list[str] | None = None) -> int:
24
+ """Minimal CLI entrypoint for package metadata and help output."""
25
+ parser = argparse.ArgumentParser(
26
+ prog="muxpack", description="Tools to handle multiplex network data."
27
+ )
28
+ parser.add_argument(
29
+ "--version", action="version", version=f"%(prog)s {__version__}"
30
+ )
31
+ parser.parse_args(argv)
32
+ parser.print_help()
33
+ return 0
34
+
35
+ __all__ = [
36
+ "check_edges",
37
+ "check_vertices",
38
+ "read_multiplexseries",
39
+ "Multiplex",
40
+ "MultiplexSeries",
41
+ "save_multiplexseries",
42
+ "to_csr_matrix",
43
+ "Bipartite",
44
+ "main",
45
+ ]
@@ -1,3 +1,9 @@
1
+ """Validation helpers for edge and vertex ibis tables.
2
+
3
+ The functions in this module are used by :class:`muxpack.Multiplex` and
4
+ :class:`muxpack.MultiplexSeries` to validate required columns and value types.
5
+ """
6
+
1
7
  from ibis.expr.types import Table
2
8
  from ibis import dtype
3
9
 
@@ -30,9 +36,11 @@ def check_edges(edges: Table, check_period=True) -> bool:
30
36
  if not check_period:
31
37
  expect_types.pop("period", None)
32
38
 
33
- if check_column_type(edges, expect_types):
34
- return True
39
+ opt_types = {"weight": "numeric"}
35
40
 
41
+ if check_column_type(edges, expect_types, optional=False):
42
+ if check_column_type(edges, opt_types, optional=True):
43
+ return True
36
44
  return False
37
45
 
38
46
 
@@ -58,13 +66,15 @@ def check_vertices(vertices: Table, check_period=True) -> bool:
58
66
  if check_period:
59
67
  expect_types["period"] = "integer"
60
68
 
61
- if not check_column_type(vertices, expect_types):
69
+ if not check_column_type(vertices, expect_types, optional=False):
62
70
  return False
63
71
 
64
72
  return True
65
73
 
66
74
 
67
- def check_column_type(t: Table, expected_types: dict[str, str]) -> bool:
75
+ def check_column_type(
76
+ t: Table, expected_types: dict[str, str], optional: bool = False
77
+ ) -> bool:
68
78
  """
69
79
  Check that the columns in a table have the expected types.
70
80
 
@@ -72,15 +82,21 @@ def check_column_type(t: Table, expected_types: dict[str, str]) -> bool:
72
82
  - t: the table to check.
73
83
  - expected_types: dictionary mapping column names to expected type strings
74
84
  (e.g., ``"integer"``, ``"string"``).
85
+ - optional: accept that the column does not exist.
75
86
 
76
87
  Returns:
77
88
  - ``True`` if all specified columns exist and have the expected types, ``False`` otherwise.
78
89
  """
79
90
  for column, expected_type in expected_types.items():
91
+ if column not in t.columns:
92
+ if optional is True:
93
+ logger.info(f"Optional column '{column}' is missing.")
94
+ continue
95
+ else:
96
+ logger.warning(f"Column '{column}' is missing.")
97
+ return False
98
+
80
99
  col = t[column]
81
- if col is None:
82
- logger.warning(f"Column '{column}' is missing.")
83
- return False
84
100
  coltype = col.type()
85
101
  if expected_type == "integer" and coltype.is_integer():
86
102
  continue
@@ -1,3 +1,9 @@
1
+ """Input and output helpers for the muxpack on-disk layout.
2
+
3
+ This module provides low-level read/write functions used by high-level classes
4
+ such as :class:`muxpack.Multiplex` and :class:`muxpack.MultiplexSeries`.
5
+ """
6
+
1
7
  import ibis
2
8
 
3
9
  from muxpack.bipartite import Bipartite
@@ -6,13 +12,14 @@ from pathlib import Path
6
12
  import os
7
13
  import logging
8
14
  from typing import Tuple
15
+ from ibis import _
9
16
 
10
17
  logger = logging.getLogger(__name__)
11
18
 
12
19
 
13
- def load_network(dir: Path) -> MultiplexSeries:
20
+ def read_multiplexseries(dir: Path) -> MultiplexSeries:
14
21
  """
15
- Load a multiplex network from a directory containing Parquet files.
22
+ Load a multiplex series from a directory containing Parquet files.
16
23
 
17
24
  The expected directory structure is::
18
25
 
@@ -42,8 +49,18 @@ def load_network(dir: Path) -> MultiplexSeries:
42
49
  logger.info(f"No vertices found: {e}")
43
50
  vertices = None
44
51
 
52
+ relationtypes = None
53
+ relationtypes_file = Path(dir) / "relationtypes.parquet"
54
+ legacy_relationtypes_file = Path(dir) / "relationtypes.csv"
45
55
  try:
46
- relationtypes = ibis.read_parquet(f"{dir}/*/relationtypes.csv")
56
+ if relationtypes_file.exists():
57
+ relationtypes = con.read_parquet(
58
+ str(relationtypes_file), table_name="relationtypes"
59
+ )
60
+ elif legacy_relationtypes_file.exists():
61
+ relationtypes = con.read_csv(
62
+ str(legacy_relationtypes_file), table_name="relationtypes"
63
+ )
47
64
  except Exception as e:
48
65
  logger.info(f"No relationtypes found: {e}")
49
66
  relationtypes = None
@@ -52,74 +69,74 @@ def load_network(dir: Path) -> MultiplexSeries:
52
69
  return m
53
70
 
54
71
 
55
- def save_network(
56
- edges: ibis.Table,
57
- vertices: ibis.Table,
58
- dir: Path | str,
59
- existing_data_behavior="delete_matching",
60
- **kwargs,
61
- ) -> Tuple[ibis.Table, ibis.Table]:
62
- """
63
- Save edges and vertices to disk following the muxpack directory structure.
64
- The directory and all sub-directories are created if they do not exist.
65
- Edges and vertices are not validated for consistency.
66
-
67
- Args:
68
- - edges: edge table to save.
69
- - vertices: vertex table to save.
70
- - dir: root path where the network will be saved.
71
- - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
72
- - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
73
-
74
- Returns:
75
- - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
76
- """
77
- E = edges
78
- V = vertices
79
- dir = Path(dir)
80
-
81
- logger.info(f"Saving network to {dir}...")
82
-
83
- # We do a manual partitioning to have maximum control.
84
- # alternative and potentially more efficient would be partitioning using
85
- # duckdb, however, that would pose some problems:
86
- # - Hive naming convention does not follow the muxpack specification
87
- # - Hive partitioning removes columns that are partitioned.
88
- periods = E[["period"]].distinct().to_pandas().period
89
-
90
- for period in periods:
91
- period_dir = dir / f"{period}"
92
- os.makedirs(period_dir, exist_ok=True)
93
-
94
- # writing vertices
95
- vertices_file = period_dir / "vertices.parquet"
96
- V_period = V.filter(V.period == period)
97
- V_period.to_parquet(vertices_file)
98
-
99
- # writing edges
100
- edges_dir = period_dir / "edges"
101
- os.makedirs(edges_dir, exist_ok=True)
102
- E_period = E.filter(E.period == period)
103
- layers = E_period[["layer"]].distinct().to_pandas().layer
104
- logger.info(f"layers: {layers}")
105
- for layer in layers:
106
- layer_dir = edges_dir / f"{layer}"
107
- # TODO further partition?
108
- os.makedirs(layer_dir, exist_ok=True)
109
- E_period_layer = E_period.filter(E_period.layer == layer).order_by(
110
- ["src", "relationtype", "dst"]
111
- )
112
- E_period_layer.to_parquet_dir(
113
- layer_dir, existing_data_behavior=existing_data_behavior, **kwargs
114
- )
115
- logger.info(f"\t\tSaved layer {layer}")
116
- logger.info(f"\tFinished saving period {period}")
117
- logger.info(f"Finished saving network to {dir}.")
118
-
119
- con = ibis.duckdb.connect()
120
- edges = con.read_parquet(f"{dir}/*/edges/**/*.parquet", table_name="edges")
121
- vertices = con.read_parquet(f"{dir}/*/vertices.parquet", table_name="vertices")
122
- return edges, vertices
72
+ # def save_multiplexseries(
73
+ # edges: ibis.Table,
74
+ # vertices: ibis.Table,
75
+ # dir: Path | str,
76
+ # existing_data_behavior="delete_matching",
77
+ # **kwargs,
78
+ # ) -> Tuple[ibis.Table, ibis.Table]:
79
+ # """
80
+ # Save edges and vertices to disk following the muxpack directory structure.
81
+ # The directory and all sub-directories are created if they do not exist.
82
+ # Edges and vertices are not validated for consistency.
83
+
84
+ # Args:
85
+ # - edges: edge table to save.
86
+ # - vertices: vertex table to save.
87
+ # - dir: root path where the network will be saved.
88
+ # - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
89
+ # - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
90
+
91
+ # Returns:
92
+ # - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
93
+ # """
94
+ # E = edges
95
+ # V = vertices
96
+ # dir = Path(dir)
97
+
98
+ # logger.info(f"Saving network to {dir}...")
99
+
100
+ # # We do a manual partitioning to have maximum control.
101
+ # # alternative and potentially more efficient would be partitioning using
102
+ # # duckdb, however, that would pose some problems:
103
+ # # - Hive naming convention does not follow the muxpack specification
104
+ # # - Hive partitioning removes columns that are partitioned.
105
+ # periods = E[["period"]].distinct().period.to_list()
106
+
107
+ # for period in periods:
108
+ # period_dir = dir / f"{period}"
109
+ # os.makedirs(period_dir, exist_ok=True)
110
+
111
+ # # writing vertices
112
+ # vertices_file = period_dir / "vertices.parquet"
113
+ # V_period = V.filter(V.period == period)
114
+ # V_period.to_parquet(vertices_file)
115
+
116
+ # # writing edges
117
+ # edges_dir = period_dir / "edges"
118
+ # os.makedirs(edges_dir, exist_ok=True)
119
+ # E_period = E.filter(E.period == period)
120
+ # layers = E_period[["layer"]].distinct().layer.to_list()
121
+ # logger.info(f"layers: {layers}")
122
+ # for layer in layers:
123
+ # layer_dir = edges_dir / f"{layer}"
124
+ # # TODO further partition?
125
+ # os.makedirs(layer_dir, exist_ok=True)
126
+ # E_period_layer = E_period.filter(E_period.layer == layer).order_by(
127
+ # ["src", "relationtype", "dst"]
128
+ # )
129
+ # E_period_layer.to_parquet_dir(
130
+ # layer_dir, existing_data_behavior=existing_data_behavior, **kwargs
131
+ # )
132
+ # logger.info(f"\t\tSaved layer {layer}")
133
+ # logger.info(f"\tFinished saving period {period}")
134
+ # logger.info(f"Finished saving network to {dir}.")
135
+
136
+ # con = ibis.duckdb.connect()
137
+ # edges = con.read_parquet(f"{dir}/*/edges/**/*.parquet", table_name="edges")
138
+ # vertices = con.read_parquet(f"{dir}/*/vertices.parquet", table_name="vertices")
139
+ # return edges, vertices
123
140
 
124
141
 
125
142
  def save_multiplex(
@@ -135,13 +152,21 @@ def save_multiplex(
135
152
  The directory and all sub-directories are created if they do not exist.
136
153
  Edges and vertices are not validated for consistency.
137
154
 
138
- Args:
139
- - edges: edge table to save.
140
- - vertices: vertex table to save.
141
- - period: the period for this multiplex, or ``None`` to skip period filtering.
142
- - dir: root path where the multiplex will be saved.
143
- - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
144
- - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
155
+ Parameters
156
+ ----------
157
+ edges
158
+ Edge table to save.
159
+ vertices
160
+ Vertex table to save.
161
+ dir
162
+ Root path where the multiplex will be saved.
163
+ period
164
+ Period for this multiplex. If ``None``, all rows in ``edges`` are written.
165
+ existing_data_behavior
166
+ Passed through to ``pyarrow.dataset.write_dataset``.
167
+ kwargs
168
+ Additional keyword arguments forwarded to
169
+ ``pyarrow.dataset.write_dataset``.
145
170
 
146
171
  Returns:
147
172
  - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
@@ -163,21 +188,23 @@ def save_multiplex(
163
188
  vertices_file = dir / "vertices.parquet"
164
189
  if period is not None:
165
190
  # test if period column is there, if not add it to
166
- V = V.filter(V.period == period)
191
+ V = V.filter(_.period == period)
167
192
  V.to_parquet(vertices_file)
168
193
 
169
194
  # writing edges
170
195
  edges_dir = dir / "edges"
171
196
 
172
197
  os.makedirs(edges_dir, exist_ok=True)
173
- E_period = E.filter(E.period == period)
174
- layers = E_period[["layer"]].distinct().to_pandas().layer
198
+ E_period = E
199
+ if period is not None:
200
+ E_period = E.filter(_.period == period)
201
+ layers = E_period[["layer"]].distinct().layer.to_list()
175
202
  logger.info(f"layers: {layers}")
176
203
  for layer in layers:
177
204
  layer_dir = edges_dir / f"{layer}"
178
205
  # TODO further partition?
179
206
  os.makedirs(layer_dir, exist_ok=True)
180
- E_period_layer = E_period.filter(E_period.layer == layer).order_by(
207
+ E_period_layer = E_period.filter(_.layer == layer).order_by(
181
208
  ["src", "relationtype", "dst"]
182
209
  )
183
210
  E_period_layer.to_parquet_dir(
@@ -193,24 +220,54 @@ def save_multiplex(
193
220
 
194
221
 
195
222
  def save_multiplexseries(
196
- edges: ibis.Table, vertices: ibis.Table, dir: Path | str
197
- ) -> None:
223
+ edges: ibis.Table,
224
+ vertices: ibis.Table,
225
+ dir: Path | str,
226
+ relationtypes: ibis.Table | None = None,
227
+ existing_data_behavior="delete_matching",
228
+ **kwargs,
229
+ ) -> Tuple[ibis.Table, ibis.Table]:
198
230
  """
199
- Save a multiplex series to disk by writing each period as a separate sub-directory.
231
+ Save edges and vertices to disk following the muxpack directory structure.
232
+ The directory and all sub-directories are created if they do not exist.
233
+ Edges and vertices are not validated for consistency.
200
234
 
201
235
  Args:
202
- - edges: edge table with a ``period`` column.
203
- - vertices: vertex table with a ``period`` column.
204
- - dir: root path where the multiplex series will be saved.
236
+
237
+ - edges: edge table to save.
238
+ - vertices: vertex table to save.
239
+ - relationtypes: optional relationtype metadata table to save at root level.
240
+ - dir: root path where the network will be saved.
241
+ - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
242
+ - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
243
+
244
+ Returns:
245
+ - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
205
246
  """
247
+
206
248
  dir = Path(dir)
207
- periods = (
208
- edges.select("period").distinct().to_pyarrow().column("period").to_pylist()
249
+ os.makedirs(dir, exist_ok=True)
250
+ periods: list[str] = (
251
+ edges.select("period").distinct().order_by("period").period.to_list()
209
252
  )
210
253
  for period in periods:
211
254
  E = edges.filter(edges.period == period)
212
255
  V = vertices.filter(vertices.period == period)
213
- save_multiplex(edges=E, vertices=V, dir=dir / period)
256
+ speriod = f"{period}"
257
+ save_multiplex(
258
+ edges=E,
259
+ vertices=V,
260
+ dir=dir / speriod,
261
+ period=period,
262
+ existing_data_behavior=existing_data_behavior,
263
+ **kwargs,
264
+ )
265
+
266
+ if relationtypes is not None:
267
+ relationtypes.to_parquet(dir / "relationtypes.parquet")
268
+
269
+ mp = read_multiplexseries(dir)
270
+ return mp.edges, mp.vertices
214
271
 
215
272
 
216
273
  def save_bipartite(
@@ -259,13 +316,13 @@ def read_bipartite(dir: Path | str) -> Bipartite:
259
316
  role_src = metadata["role_src"]
260
317
  role_dst = metadata["role_dst"]
261
318
  relationtype = metadata["relationtype"]
262
- return BiPartite(
319
+ return Bipartite(
263
320
  edges=edges, role_src=role_src, role_dst=role_dst, relationtype=relationtype
264
321
  )
265
322
 
266
323
 
267
324
  if __name__ == "__main__":
268
325
  logging.basicConfig(level=logging.INFO)
269
- m = load_network("data")
326
+ m = read_multiplexseries("data")
270
327
 
271
- save_network(edges=m.edges, vertices=m.vertices, dir="data2")
328
+ save_multiplexseries(edges=m.edges, vertices=m.vertices, dir="data2")
@@ -1,3 +1,9 @@
1
+ """Single-period multiplex graph representation.
2
+
3
+ This module defines :class:`Multiplex`, a validated container around ibis edge
4
+ and vertex tables with helpers for conversions and degree summaries.
5
+ """
6
+
1
7
  import ibis
2
8
 
3
9
  from .check import check_edges, check_vertices
@@ -19,7 +25,7 @@ class Multiplex:
19
25
  For multiple periods, use MultiplexSeries.
20
26
  """
21
27
 
22
- #: The edges of the multiplex. This is a table with columns "src", "dst", "layer" and "relationtype".
28
+ #: The edges of the multiplex. This is a table with columns "src", "dst", "layer","relationtype" and optionally weight.
23
29
  edges: ibis.Table
24
30
 
25
31
  #: The vertices of the multiplex. This is a table with a column "id" and optional additional columns.
@@ -60,7 +66,7 @@ class Multiplex:
60
66
  Returns:
61
67
  - List of layer names.
62
68
  """
63
- layers = self.edges[["layer"]].distinct().to_pandas().layer.tolist()
69
+ layers = self.edges[["layer"]].distinct().layer.to_list()
64
70
  return layers
65
71
 
66
72
  def update_vertices(self) -> None:
@@ -75,30 +81,62 @@ class Multiplex:
75
81
  V = src.union(dst, distinct=True).to_pyarrow()
76
82
  self.vertices = ibis.memtable(V)
77
83
 
78
- def to_csr_matrix(self) -> csr_matrix[bool]:
84
+ def to_csr_matrix(
85
+ self, use_weight: bool | str | ibis.Value = False
86
+ ) -> csr_matrix:
79
87
  """
80
88
  Transform the multiplex into a sparse matrix, collapsing all layers into one.
81
89
  To keep layers separate, use ``to_csr_matrices`` instead.
82
90
 
91
+ Args:
92
+ - use_weight: optional column in the edges table to use as weights for the adjacency matrix. If False, the adjacency matrix will be unweighted (boolean).
93
+ if True, the method will look for a column named "weight" in the edges table. If a string is provided, it will be used as the name of the weight column.
94
+ If not provided, the adjacency matrix will be unweighted (boolean).
95
+
83
96
  Returns:
84
97
  - Sparse boolean matrix of shape ``(n_vertices, n_vertices)``.
85
98
  """
86
99
  from .to_csr_matrix import to_row_col_idx, idx_to_csr_matrix
87
100
 
88
- idx = to_row_col_idx(self.edges, self.vertices)
89
- M = idx_to_csr_matrix(idx, self.vertices)
101
+ E = self.edges
102
+ V = self.vertices
103
+
104
+ if use_weight is True:
105
+ weight = "weight"
106
+ elif isinstance(use_weight, str):
107
+ E[["weight"]] = E[[use_weight]]
108
+ elif isinstance(use_weight, ibis.Value):
109
+ weight = "weight"
110
+ E = E.mutate(weight=weight)
111
+ else:
112
+ if ("weight" in E.columns) and (use_weight is False):
113
+ logger.warning(
114
+ "Weight column 'weight' found in edges table, but use_weight is False. Ignoring weight column."
115
+ )
116
+ E = E.drop(["weight"], errors="ignore")
117
+
118
+ if (use_weight is not False) and (weight not in E.columns):
119
+ raise ValueError(f"Weight column '{weight}' not found in edges table")
120
+
121
+ idx = to_row_col_idx(E, V)
122
+ M = idx_to_csr_matrix(idx, V)
90
123
  return M
91
124
 
92
- def to_csr_matrices(self) -> dict[str, csr_matrix]:
125
+ def to_csr_matrices(self, layers: list[str] | None = None) -> dict[str, csr_matrix]:
93
126
  """
94
127
  Transform the multiplex into a dictionary of sparse matrices, one per layer.
95
128
 
129
+ Args:
130
+ - layers: optional list of layer names to include. If None, all layers are included.
131
+
96
132
  Returns:
97
133
  - Dictionary mapping layer name to a sparse boolean matrix of shape ``(n_vertices, n_vertices)``.
98
134
  """
99
135
  from .to_csr_matrix import to_row_col_idx, idx_to_csr_matrix
100
136
 
101
- layers = self.layers()
137
+ # Maybe turn this into a generator instead of a dict, to avoid loading all matrices into memory at once.
138
+
139
+ layers = self.layers() if layers is None else layers
102
140
  matrices = {}
103
141
  for layer in layers:
104
142
  idx = to_row_col_idx(
@@ -108,6 +146,26 @@ class Multiplex:
108
146
  matrices[layer] = M
109
147
  return matrices
110
148
 
149
+ def outdegree(self, by_layer: bool = False) -> ibis.Table:
150
+ """
151
+ Compute the out-degree of each vertex in the multiplex.
152
+
153
+ Args:
154
+ - by_layer: if True, compute the out-degree separately for each layer.
155
+
156
+ Returns:
157
+ - by_layer=False: Table with columns "id" and "out_degree", where "id" is the vertex id and "out_degree" is the total number of outgoing edges from that vertex across all layers.
158
+ - by_layer=True: Table with columns "id", "layer", and "out_degree", where "id" is the vertex id, "layer" is the layer name, and "out_degree" is the number of outgoing edges from that vertex in that layer.
159
+ """
160
+ E = self.edges
161
+
162
+ gb = ["src"]
163
+ if by_layer:
164
+ gb.append("layer")
165
+
166
+ outdegree = E.group_by(gb).aggregate(outdegree=E.count()).rename(id="src")
167
+ return outdegree
168
+
111
169
  def to_networkx(self) -> nx.MultiDiGraph:
112
170
  """
113
171
  Convert the multiplex to a NetworkX MultiDiGraph.
@@ -136,6 +194,12 @@ class Multiplex:
136
194
  self.update_vertices()
137
195
  vertices = self.vertices
138
196
  period = self.period
139
- edges, vertices = io.save_multiplex(edges, vertices, period, dir=dir, **kw_args)
197
+ edges, vertices = io.save_multiplex(
198
+ edges=edges,
199
+ vertices=vertices,
200
+ dir=dir,
201
+ period=period,
202
+ **kw_args,
203
+ )
140
204
  self.edges = edges
141
205
  self.vertices = vertices
@@ -1,11 +1,22 @@
1
+ """Multi-period multiplex graph representation.
2
+
3
+ This module defines :class:`MultiplexSeries`, which stores edges across periods
4
+ and exposes filtering, per-period access, collapsing, and persistence helpers.
5
+ """
6
+
1
7
  import ibis
8
+ from ibis import _
9
+
10
+ from muxpack.networkx import to_MultiDiGraph
2
11
 
3
12
  from .check import check_edges, check_vertices
4
13
  from pathlib import Path
5
14
  from . import io
6
15
  from .multiplex import Multiplex
7
16
  import logging
8
- from typing import Tuple
17
+ from typing import Generator, Tuple
18
+ from scipy.sparse import csr_matrix
19
+ import networkx as nx
9
20
 
10
21
  logger = logging.getLogger(__name__)
11
22
 
@@ -57,7 +68,7 @@ class MultiplexSeries:
57
68
  self.vertices = vertices
58
69
  self.relationtypes = relationtypes
59
70
 
60
- if not vertices is None:
71
+ if vertices is not None:
61
72
  logger.info("Vertices table provided, using it as is.")
62
73
  self.vertex_ids = vertices[["id"]].distinct()
63
74
 
@@ -69,14 +80,9 @@ class MultiplexSeries:
69
80
  - Sorted list of period values.
70
81
  """
71
82
  periods = (
72
- self.edges.select(self.edges.period)
73
- .distinct()
74
- .order_by("period")
75
- .to_pyarrow()
76
- .column("period")
77
- .to_pylist()
83
+ self.edges.select("period").distinct().order_by("period").period.to_list()
78
84
  )
79
- # periods = self.edges[["period"]].distinct().to_pandas().period.tolist()
85
+ # periods = self.edges[["period"]].distinct().to_pandas().period.to_list()
80
86
  return periods
81
87
 
82
88
  def layers(self) -> list[str]:
@@ -86,16 +92,46 @@ class MultiplexSeries:
86
92
  Returns:
87
93
  - Sorted list of layer names.
88
94
  """
89
- layers = (
90
- self.edges.select(self.edges.layer)
91
- .distinct()
92
- .order_by("layer")
93
- .to_pyarrow()
94
- .column("layer")
95
- .to_pylist()
96
- )
95
+ layers = self.edges.select("layer").distinct().order_by("layer").layer.to_list()
97
96
  return layers
98
97
 
98
+ def to_csr_matrices(
99
+ self, periods: list[int] | None = None
100
+ ) -> Generator[Tuple[csr_matrix, int]]:
101
+ """
102
+ Generate a sparse matrix for each period. The indices of the matrix correspond to
103
+ the rownumber the ``vertex_ids`` table.
104
+
105
+ Args:
106
+ - periods: list of periods to generate matrices for. If empty, all periods
107
+ present in ``edges`` are used.
108
+ """
109
+ from .to_csr_matrix import to_csr_matrix
110
+
111
+ if periods is None:
112
+ periods = self.periods()
113
+
114
+ for period in periods:
115
+ E_y = self.edges.filter(_.period == period)
116
+ yield to_csr_matrix(E_y, self.vertex_ids), period
117
+
118
+ def to_networkx(
119
+ self, periods: list[int] | None = None
120
+ ) -> Generator[Tuple[nx.MultiDiGraph, int]]:
121
+ """
122
+ Generate a NetworkX MultiDiGraph for each period.
123
+
124
+ Args:
125
+ - periods: list of periods to generate graphs for. If empty, all periods
126
+ present in ``edges`` are used.
127
+ """
128
+ if periods is None:
129
+ periods = self.periods()
130
+
131
+ for period in periods:
132
+ E_y = self.edges.filter(_.period == period)
133
+ yield to_MultiDiGraph(E_y, self.vertex_ids), period
134
+
99
135
  def update_vertices(self) -> None:
100
136
  """
101
137
  Update the vertices table by deriving it from the edges table.
@@ -163,8 +199,7 @@ class MultiplexSeries:
163
199
  def add_filter(
164
200
  self,
165
201
  periods: list[int] = None,
166
- layers: list[str] = None,
167
- relationtypes: list[int] = None,
202
+ layers: dict[str, list[int] | None] = None,
168
203
  src: list[int] = None,
169
204
  dst: list[int] = None,
170
205
  ) -> None:
@@ -179,8 +214,7 @@ class MultiplexSeries:
179
214
 
180
215
  Args:
181
216
  - periods: list of periods to keep.
182
- - layers: list of layer names to keep.
183
- - relationtypes: list of relationtype values to keep.
217
+ - layers: dict of {layer:[relationtype]} to keep. Use ``None`` for the list of relationtypes to keep all relationtypes for that layer.
184
218
  - src: list of source vertex ids (ego) to keep.
185
219
  - dst: list of destination vertex ids (non-ego) to keep.
186
220
  """
@@ -189,23 +223,36 @@ class MultiplexSeries:
189
223
  flt: list[ibis.BooleanValue] = []
190
224
 
191
225
  if periods is not None and len(periods) > 0:
192
- flt.append(E.period.isin(periods))
226
+ flt.append(_.period.isin(periods))
193
227
 
194
228
  if layers is not None and len(layers) > 0:
195
- flt.append(E.layer.isin(layers))
196
-
197
- if relationtypes is not None and len(relationtypes) > 0:
198
- flt.append(E.relationtype.isin(relationtypes))
229
+ rt = []
230
+ if not isinstance(layers, dict):
231
+ raise ValueError("layers must be a dict of {layer:[relationtype]|None}")
232
+
233
+ sl = self.layers()
234
+ for layer, relationtypes in layers.items():
235
+ if layer not in sl:
236
+ raise ValueError(f"Layer '{layer}' not found in multiplex series")
237
+ e = _.layer == layer
238
+ if relationtypes is not None:
239
+ e = ibis.and_(e, _.relationtype.isin(relationtypes))
240
+ rt.append(e)
241
+
242
+ if len(rt) > 1:
243
+ flt.append(ibis.or_(rt))
244
+ elif len(rt) == 1:
245
+ flt.append(e)
199
246
 
200
247
  if src is not None and len(src) > 0:
201
248
  vid = ibis.memtable({"id": src})
202
249
  # we use semi join because we expect the vertex list to be large
203
- E = E.semi_join(vid, E.src == vid.id)
250
+ E = E.semi_join(vid, _.src == vid.id)
204
251
 
205
252
  if dst is not None and len(dst) > 0:
206
253
  vid = ibis.memtable({"id": dst})
207
254
  # we use semi join because we expect the vertex list to be large
208
- E = E.semi_join(vid, E.dst == vid.id)
255
+ E = E.semi_join(vid, _.dst == vid.id)
209
256
 
210
257
  logger.debug("Filter: f{flt}")
211
258
  if len(flt):
@@ -213,6 +260,19 @@ class MultiplexSeries:
213
260
 
214
261
  self.edges = E
215
262
 
263
+ def __str__(self) -> str:
264
+ """
265
+ Return a string representation of the multiplex series.
266
+
267
+ Returns:
268
+ - String with number of edges, vertices, and periods.
269
+ """
270
+ n_edges = self.edges.count().execute()
271
+ n_vertices = self.vertex_ids.count().execute()
272
+ periods = self.periods()
273
+ layers = self.layers()
274
+ return f"MultiplexSeries\n Edges: {n_edges}\n Vertices: {n_vertices}\n Periods: {periods}\n Layers: {layers}"
275
+
216
276
  def __copy__(self) -> "MultiplexSeries":
217
277
  """
218
278
  Return a shallow copy of this MultiplexSeries.
@@ -220,9 +280,9 @@ class MultiplexSeries:
220
280
  Returns:
221
281
  - A new MultiplexSeries sharing the same ``edges`` and ``vertices`` tables.
222
282
  """
223
- return MultiplexSeries(self.edges, self.vertices)
283
+ return MultiplexSeries(self.edges, self.vertices, self.relationtypes)
224
284
 
225
- def collapse(self) -> Multiplex:
285
+ def collapse(self, period: int | None = None) -> Multiplex:
226
286
  """
227
287
  Collapse the multiplex series into a single Multiplex by discarding period
228
288
  information. Duplicate edges across periods are removed. This is useful
@@ -236,7 +296,7 @@ class MultiplexSeries:
236
296
  V = self.vertices.select("id").distinct()
237
297
  else:
238
298
  V = None
239
- return Multiplex(edges=E, vertices=V, period=None)
299
+ return Multiplex(edges=E, vertices=V, period=period)
240
300
 
241
301
  def collapse_to(self, dir: Path | str) -> None:
242
302
  """
@@ -260,14 +320,22 @@ class MultiplexSeries:
260
320
 
261
321
  Args:
262
322
  - dir: path to the directory where the MultiplexSeries will be saved.
263
- - **kw_args: additional keyword arguments forwarded to ``io.save_network``.
323
+ - **kw_args: additional keyword arguments forwarded to
324
+ ``io.save_multiplexseries``.
264
325
  """
265
326
  edges = self.edges
266
327
  vertices = self.vertices
328
+ relationtypes = self.relationtypes
267
329
  if vertices is None:
268
330
  mp = MultiplexSeries(edges=self.edges)
269
331
  mp.update_vertices()
270
332
  vertices = mp.vertices
271
- E, V = io.save_network(edges, vertices, dir=dir, **kw_args)
333
+ E, V = io.save_multiplexseries(
334
+ edges=edges,
335
+ vertices=vertices,
336
+ relationtypes=relationtypes,
337
+ dir=dir,
338
+ **kw_args,
339
+ )
272
340
  self.edges = E
273
341
  self.vertices = V
@@ -1,3 +1,5 @@
1
+ """Conversion helpers from muxpack data structures to NetworkX graphs."""
2
+
1
3
  import networkx as nx
2
4
  import ibis
3
5
  from .to_csr_matrix import to_csr_matrix
@@ -1,7 +1,8 @@
1
- from ibis import row_number, Table
1
+ """Sparse matrix conversion utilities for multiplex edge tables."""
2
+
3
+ from ibis import row_number, Table, _
2
4
  import ibis
3
5
  from scipy.sparse import csr_matrix
4
- from muxpack.multiplex import Multiplex
5
6
  from typing import Tuple, Generator
6
7
 
7
8
  import logging
@@ -10,7 +11,7 @@ logger = logging.getLogger(__name__)
10
11
  # from collections.abc import Generator
11
12
 
12
13
 
13
- def to_row_col_idx(edges: Table, vertices: Table) -> Table:
14
+ def to_row_col_idx(edges: Table, vertices: Table, use_weight: bool = False) -> Table:
14
15
  """
15
16
  Turn an edge list into a row/column index table based on the given vertices table.
16
17
 
@@ -28,22 +29,34 @@ def to_row_col_idx(edges: Table, vertices: Table) -> Table:
28
29
  row = v.select(src="id", row="idx")
29
30
  col = v.select(dst="id", col="idx")
30
31
 
31
- # may sum the number of columns
32
- idx_edges = (
33
- edges[["src", "dst"]]
34
- .distinct()
35
- .inner_join(row, "src")
36
- .inner_join(col, "dst")
37
- .mutate(data=True)
38
- .select("data", "row", "col")
39
- )
40
- logger.debug(
41
- f"Created row-col index table with {idx_edges.count().execute()} edges."
42
- )
32
+ if use_weight:
33
+ idx_edges = (
34
+ edges.aggregate(weight=_.weight.sum(), by=["src", "dst"])
35
+ .inner_join(row, "src")
36
+ .inner_join(col, "dst")
37
+ .mutate(data=True)
38
+ .select("data", "row", "col", "weight")
39
+ )
40
+
41
+ logger.debug("Created weighted row-col index tables.")
42
+ else:
43
+ # may sum the number of columns
44
+ idx_edges = (
45
+ edges[["src", "dst"]]
46
+ .distinct()
47
+ .inner_join(row, "src")
48
+ .inner_join(col, "dst")
49
+ .mutate(data=True)
50
+ .select("data", "row", "col")
51
+ )
52
+
53
+ logger.debug("Created row-col index table with edges.")
43
54
  return idx_edges
44
55
 
45
56
 
46
- def idx_to_csr_matrix(idx: Table, vertices: Table) -> csr_matrix:
57
+ def idx_to_csr_matrix(
58
+ idx: Table, vertices: Table, use_weight: bool = False
59
+ ) -> csr_matrix:
47
60
  """
48
61
  Convert a row-column index table to a CSR sparse matrix.
49
62
 
@@ -65,50 +78,48 @@ def idx_to_csr_matrix(idx: Table, vertices: Table) -> csr_matrix:
65
78
  return M
66
79
 
67
80
 
68
- def to_csr_matrix(edges: Table, vertices: Table | None) -> csr_matrix:
81
+ def to_csr_matrix(edges: Table, vertices: Table) -> csr_matrix:
69
82
  """
70
83
  Transform an edge list into a sparse matrix (csr_matrix).
71
84
 
72
85
  Args:
73
86
  - edges: table with ``src`` and ``dst`` columns.
74
87
  - vertices: table with an ``id`` column; edges are filtered to vertices present
75
- in this table. Pass ``None`` to derive vertices from the edges table.
88
+ in this table.
76
89
 
77
90
  Returns:
78
91
  - Square CSR sparse matrix of shape ``(n_vertices, n_vertices)``.
79
92
  """
80
93
  # vertices may contain multiple periods
81
- if vertices is not None:
82
- vertices = vertices[["id"]].distinct()
94
+ vertices = vertices[["id"]].distinct()
83
95
  edges_row_col = to_row_col_idx(edges, vertices=vertices)
84
96
  M = idx_to_csr_matrix(edges_row_col, vertices=vertices)
85
97
  return M
86
98
 
87
99
 
88
100
  def to_period_csr_matrix(
89
- edges: Table, vertices: Table | None, periods: list[int] = []
101
+ edges: Table, vertices: Table, periods: list[int] | None = None
90
102
  ) -> Generator[Tuple[csr_matrix, int]]:
91
103
  """
92
- Generate a sparse matrix for each period.
104
+ Generate a sparse matrix for each period. The indices of the matrix correspond to
105
+ the rownumber the ``vertices`` table.
93
106
 
94
107
  Args:
95
108
  - edges: table with columns ``src``, ``dst``, and ``period``.
96
- - vertices: table with columns ``id`` and ``period``, or ``None`` to derive
97
- vertices from the edges table for each period.
109
+ - vertices: table with columns ``id`` to derive
110
+ vertices from the edges table
98
111
  - periods: list of periods to generate matrices for. If empty, all periods
99
112
  present in ``edges`` are used.
100
113
 
101
114
  Returns:
102
115
  - Generator of ``(csr_matrix, period)`` tuples, one per period.
103
116
  """
104
- if len(periods) == 0:
105
- periods = edges[["period"]].distinct().to_pandas().period.tolist()
117
+ if periods is None or len(periods) == 0:
118
+ periods = edges[["period"]].distinct().period.to_list()
119
+
106
120
  for period in periods:
107
- E_y = edges.filter(edges.period == period)
108
- if vertices is not None:
109
- V_y = vertices.filter(vertices.period == period)
110
- else:
111
- V_y = None
121
+ E_y = edges.filter(_.period == period)
122
+ V_y = vertices
112
123
 
113
124
  yield to_csr_matrix(E_y, V_y), period
114
125
 
@@ -129,4 +140,4 @@ if __name__ == "__main__":
129
140
  print(f"M1 = {M1}")
130
141
 
131
142
  M = to_csr_matrix(E, V)
132
- print(M)
143
+ print(M)
@@ -1,17 +0,0 @@
1
- from .check import check_edges, check_vertices
2
- from .io import load_network, save_network
3
- from .multiplexseries import MultiplexSeries
4
- from .multiplex import Multiplex
5
- from .to_csr_matrix import to_csr_matrix
6
- from .bipartite import Bipartite
7
-
8
- __all__ = [
9
- "check_edges",
10
- "check_vertices",
11
- "load_network",
12
- "Multiplex",
13
- "MultiplexSeries",
14
- "save_network",
15
- "to_csr_matrix",
16
- "Bipartite",
17
- ]