muxpack 0.1.0__tar.gz → 0.2.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/PKG-INFO +12 -7
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/README.md +3 -1
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/pyproject.toml +13 -8
- muxpack-0.2.0.dev1/src/muxpack/__init__.py +45 -0
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/check.py +23 -7
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/io.py +151 -94
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/multiplex.py +72 -8
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/multiplexseries.py +101 -33
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/networkx.py +2 -0
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/to_csr_matrix.py +43 -32
- muxpack-0.1.0/src/muxpack/__init__.py +0 -17
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/bipartite.py +0 -0
- {muxpack-0.1.0 → muxpack-0.2.0.dev1}/src/muxpack/py.typed +0 -0
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: muxpack
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0.dev1
|
|
4
4
|
Summary: Tools to handle multiplex network data more easily
|
|
5
5
|
Author: Edwin de Jonge, Jan van der Laan
|
|
6
6
|
Author-email: Edwin de Jonge <edwindjonge@gmail.com>, Jan van der Laan <djvanderlaan@gmail.com>
|
|
7
|
-
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
12
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
13
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
8
14
|
Requires-Dist: ibis-framework[duckdb]>=12.0.0
|
|
9
15
|
Requires-Dist: networkx>=3.6.1
|
|
10
|
-
Requires-Dist: pandas>=3.0.1
|
|
11
|
-
Requires-Dist: pyarrow>=23.0.1
|
|
12
|
-
Requires-Dist: pyarrow-hotfix>=0.7
|
|
13
16
|
Requires-Dist: scipy>=1.17.1
|
|
14
|
-
Requires-Python: >=3.
|
|
17
|
+
Requires-Python: >=3.11
|
|
15
18
|
Project-URL: Homepage, https://codeberg.org/CBS-Networktools/muxpack.py
|
|
16
19
|
Project-URL: Documentation, https://readthedocs.org
|
|
17
20
|
Project-URL: Repository, https://codeberg.org/CBS-Networktools/muxpack.py
|
|
@@ -22,6 +25,8 @@ Description-Content-Type: text/markdown
|
|
|
22
25
|
|
|
23
26
|
## Muxpack
|
|
24
27
|
|
|
28
|
+
[](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml)
|
|
29
|
+
|
|
25
30
|
Muxpack is a Python implementation for working with multiplex network files.
|
|
26
31
|
|
|
27
32
|
## Documentation
|
|
@@ -33,4 +38,4 @@ uv sync --group docs
|
|
|
33
38
|
uv run sphinx-build -b html docs docs/_build/html
|
|
34
39
|
```
|
|
35
40
|
|
|
36
|
-
The generated HTML is available in `docs/_build/html/index.html`.
|
|
41
|
+
The generated HTML is available in `docs/_build/html/index.html`.
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
## Muxpack
|
|
4
4
|
|
|
5
|
+
[](https://github.com/edwindj/muxpack.py/actions/workflows/python.yml)
|
|
6
|
+
|
|
5
7
|
Muxpack is a Python implementation for working with multiplex network files.
|
|
6
8
|
|
|
7
9
|
## Documentation
|
|
@@ -13,4 +15,4 @@ uv sync --group docs
|
|
|
13
15
|
uv run sphinx-build -b html docs docs/_build/html
|
|
14
16
|
```
|
|
15
17
|
|
|
16
|
-
The generated HTML is available in `docs/_build/html/index.html`.
|
|
18
|
+
The generated HTML is available in `docs/_build/html/index.html`.
|
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "muxpack"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0dev1"
|
|
4
4
|
description = "Tools to handle multiplex network data more easily"
|
|
5
5
|
readme = "README.md"
|
|
6
|
+
classifiers = [
|
|
7
|
+
"Programming Language :: Python :: 3",
|
|
8
|
+
"Programming Language :: Python :: 3.11",
|
|
9
|
+
"Programming Language :: Python :: 3.12",
|
|
10
|
+
"Programming Language :: Python :: 3.13",
|
|
11
|
+
"Programming Language :: Python :: 3.14",
|
|
12
|
+
"Programming Language :: Python :: Implementation :: CPython",
|
|
13
|
+
"Programming Language :: Python :: Implementation :: PyPy",
|
|
14
|
+
]
|
|
6
15
|
authors = [
|
|
7
16
|
{ name = "Edwin de Jonge", email = "edwindjonge@gmail.com" },
|
|
8
17
|
{ name = "Jan van der Laan", email = "djvanderlaan@gmail.com" }
|
|
9
18
|
]
|
|
10
|
-
requires-python = ">=3.
|
|
19
|
+
requires-python = ">=3.11"
|
|
11
20
|
dependencies = [
|
|
12
|
-
"duckdb>=1.4.4",
|
|
13
21
|
"ibis-framework[duckdb]>=12.0.0",
|
|
14
22
|
"networkx>=3.6.1",
|
|
15
|
-
"pandas>=3.0.1",
|
|
16
|
-
"pyarrow>=23.0.1",
|
|
17
|
-
"pyarrow-hotfix>=0.7",
|
|
18
23
|
"scipy>=1.17.1",
|
|
19
24
|
]
|
|
20
25
|
|
|
@@ -28,7 +33,7 @@ Repository = "https://codeberg.org/CBS-Networktools/muxpack.py"
|
|
|
28
33
|
muxpack = "muxpack:main"
|
|
29
34
|
|
|
30
35
|
[build-system]
|
|
31
|
-
requires = ["uv_build>=0.10.6,<0.
|
|
36
|
+
requires = ["uv_build>=0.10.6,<0.12.0"]
|
|
32
37
|
build-backend = "uv_build"
|
|
33
38
|
|
|
34
39
|
[dependency-groups]
|
|
@@ -37,7 +42,7 @@ dev = [
|
|
|
37
42
|
"ruff>=0.15.4",
|
|
38
43
|
]
|
|
39
44
|
docs = [
|
|
40
|
-
"sphinx>=9.
|
|
45
|
+
"sphinx>=7.4,<9.0",
|
|
41
46
|
"sphinx-rtd-theme>=3.0.0",
|
|
42
47
|
]
|
|
43
48
|
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Public package API for working with multiplex network data.
|
|
2
|
+
|
|
3
|
+
This module re-exports the main classes and helper functions so users can
|
|
4
|
+
import common functionality directly from :mod:`muxpack`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
8
|
+
import argparse
|
|
9
|
+
|
|
10
|
+
from .check import check_edges, check_vertices
|
|
11
|
+
from .io import read_multiplexseries, save_multiplexseries
|
|
12
|
+
from .multiplexseries import MultiplexSeries
|
|
13
|
+
from .multiplex import Multiplex
|
|
14
|
+
from .to_csr_matrix import to_csr_matrix
|
|
15
|
+
from .bipartite import Bipartite
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
__version__ = version("muxpack")
|
|
19
|
+
except PackageNotFoundError:
|
|
20
|
+
__version__ = "0+unknown"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def main(argv: list[str] | None = None) -> int:
|
|
24
|
+
"""Minimal CLI entrypoint for package metadata and help output."""
|
|
25
|
+
parser = argparse.ArgumentParser(
|
|
26
|
+
prog="muxpack", description="Tools to handle multiplex network data."
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--version", action="version", version=f"%(prog)s {__version__}"
|
|
30
|
+
)
|
|
31
|
+
parser.parse_args(argv)
|
|
32
|
+
parser.print_help()
|
|
33
|
+
return 0
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"check_edges",
|
|
37
|
+
"check_vertices",
|
|
38
|
+
"read_multiplexseries",
|
|
39
|
+
"Multiplex",
|
|
40
|
+
"MultiplexSeries",
|
|
41
|
+
"save_multiplexseries",
|
|
42
|
+
"to_csr_matrix",
|
|
43
|
+
"Bipartite",
|
|
44
|
+
"main",
|
|
45
|
+
]
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""Validation helpers for edge and vertex ibis tables.
|
|
2
|
+
|
|
3
|
+
The functions in this module are used by :class:`muxpack.Multiplex` and
|
|
4
|
+
:class:`muxpack.MultiplexSeries` to validate required columns and value types.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
from ibis.expr.types import Table
|
|
2
8
|
from ibis import dtype
|
|
3
9
|
|
|
@@ -30,9 +36,11 @@ def check_edges(edges: Table, check_period=True) -> bool:
|
|
|
30
36
|
if not check_period:
|
|
31
37
|
expect_types.pop("period", None)
|
|
32
38
|
|
|
33
|
-
|
|
34
|
-
return True
|
|
39
|
+
opt_types = {"weight": "numeric"}
|
|
35
40
|
|
|
41
|
+
if check_column_type(edges, expect_types, optional=False):
|
|
42
|
+
if check_column_type(edges, opt_types, optional=True):
|
|
43
|
+
return True
|
|
36
44
|
return False
|
|
37
45
|
|
|
38
46
|
|
|
@@ -58,13 +66,15 @@ def check_vertices(vertices: Table, check_period=True) -> bool:
|
|
|
58
66
|
if check_period:
|
|
59
67
|
expect_types["period"] = "integer"
|
|
60
68
|
|
|
61
|
-
if not check_column_type(vertices, expect_types):
|
|
69
|
+
if not check_column_type(vertices, expect_types, optional=False):
|
|
62
70
|
return False
|
|
63
71
|
|
|
64
72
|
return True
|
|
65
73
|
|
|
66
74
|
|
|
67
|
-
def check_column_type(
|
|
75
|
+
def check_column_type(
|
|
76
|
+
t: Table, expected_types: dict[str, str], optional: bool = False
|
|
77
|
+
) -> bool:
|
|
68
78
|
"""
|
|
69
79
|
Check that the columns in a table have the expected types.
|
|
70
80
|
|
|
@@ -72,15 +82,21 @@ def check_column_type(t: Table, expected_types: dict[str, str]) -> bool:
|
|
|
72
82
|
- t: the table to check.
|
|
73
83
|
- expected_types: dictionary mapping column names to expected type strings
|
|
74
84
|
(e.g., ``"integer"``, ``"string"``).
|
|
85
|
+
- optional: accept that the column does not exist.
|
|
75
86
|
|
|
76
87
|
Returns:
|
|
77
88
|
- ``True`` if all specified columns exist and have the expected types, ``False`` otherwise.
|
|
78
89
|
"""
|
|
79
90
|
for column, expected_type in expected_types.items():
|
|
91
|
+
if column not in t.columns:
|
|
92
|
+
if optional is True:
|
|
93
|
+
logger.info(f"Optional column '{column}' is missing.")
|
|
94
|
+
continue
|
|
95
|
+
else:
|
|
96
|
+
logger.warning(f"Column '{column}' is missing.")
|
|
97
|
+
return False
|
|
98
|
+
|
|
80
99
|
col = t[column]
|
|
81
|
-
if col is None:
|
|
82
|
-
logger.warning(f"Column '{column}' is missing.")
|
|
83
|
-
return False
|
|
84
100
|
coltype = col.type()
|
|
85
101
|
if expected_type == "integer" and coltype.is_integer():
|
|
86
102
|
continue
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""Input and output helpers for the muxpack on-disk layout.
|
|
2
|
+
|
|
3
|
+
This module provides low-level read/write functions used by high-level classes
|
|
4
|
+
such as :class:`muxpack.Multiplex` and :class:`muxpack.MultiplexSeries`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import ibis
|
|
2
8
|
|
|
3
9
|
from muxpack.bipartite import Bipartite
|
|
@@ -6,13 +12,14 @@ from pathlib import Path
|
|
|
6
12
|
import os
|
|
7
13
|
import logging
|
|
8
14
|
from typing import Tuple
|
|
15
|
+
from ibis import _
|
|
9
16
|
|
|
10
17
|
logger = logging.getLogger(__name__)
|
|
11
18
|
|
|
12
19
|
|
|
13
|
-
def
|
|
20
|
+
def read_multiplexseries(dir: Path) -> MultiplexSeries:
|
|
14
21
|
"""
|
|
15
|
-
Load a multiplex
|
|
22
|
+
Load a multiplex series from a directory containing Parquet files.
|
|
16
23
|
|
|
17
24
|
The expected directory structure is::
|
|
18
25
|
|
|
@@ -42,8 +49,18 @@ def load_network(dir: Path) -> MultiplexSeries:
|
|
|
42
49
|
logger.info(f"No vertices found: {e}")
|
|
43
50
|
vertices = None
|
|
44
51
|
|
|
52
|
+
relationtypes = None
|
|
53
|
+
relationtypes_file = Path(dir) / "relationtypes.parquet"
|
|
54
|
+
legacy_relationtypes_file = Path(dir) / "relationtypes.csv"
|
|
45
55
|
try:
|
|
46
|
-
|
|
56
|
+
if relationtypes_file.exists():
|
|
57
|
+
relationtypes = con.read_parquet(
|
|
58
|
+
str(relationtypes_file), table_name="relationtypes"
|
|
59
|
+
)
|
|
60
|
+
elif legacy_relationtypes_file.exists():
|
|
61
|
+
relationtypes = con.read_csv(
|
|
62
|
+
str(legacy_relationtypes_file), table_name="relationtypes"
|
|
63
|
+
)
|
|
47
64
|
except Exception as e:
|
|
48
65
|
logger.info(f"No relationtypes found: {e}")
|
|
49
66
|
relationtypes = None
|
|
@@ -52,74 +69,74 @@ def load_network(dir: Path) -> MultiplexSeries:
|
|
|
52
69
|
return m
|
|
53
70
|
|
|
54
71
|
|
|
55
|
-
def
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
) -> Tuple[ibis.Table, ibis.Table]:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
72
|
+
# def save_multiplexseries(
|
|
73
|
+
# edges: ibis.Table,
|
|
74
|
+
# vertices: ibis.Table,
|
|
75
|
+
# dir: Path | str,
|
|
76
|
+
# existing_data_behavior="delete_matching",
|
|
77
|
+
# **kwargs,
|
|
78
|
+
# ) -> Tuple[ibis.Table, ibis.Table]:
|
|
79
|
+
# """
|
|
80
|
+
# Save edges and vertices to disk following the muxpack directory structure.
|
|
81
|
+
# The directory and all sub-directories are created if they do not exist.
|
|
82
|
+
# Edges and vertices are not validated for consistency.
|
|
83
|
+
|
|
84
|
+
# Args:
|
|
85
|
+
# - edges: edge table to save.
|
|
86
|
+
# - vertices: vertex table to save.
|
|
87
|
+
# - dir: root path where the network will be saved.
|
|
88
|
+
# - existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
|
|
89
|
+
# - **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
|
|
90
|
+
|
|
91
|
+
# Returns:
|
|
92
|
+
# - Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
|
|
93
|
+
# """
|
|
94
|
+
# E = edges
|
|
95
|
+
# V = vertices
|
|
96
|
+
# dir = Path(dir)
|
|
97
|
+
|
|
98
|
+
# logger.info(f"Saving network to {dir}...")
|
|
99
|
+
|
|
100
|
+
# # We do a manual partitioning to have maximum control.
|
|
101
|
+
# # alternative and potentially more efficient would be partitioning using
|
|
102
|
+
# # duckdb, however, that would pose some problems:
|
|
103
|
+
# # - Hive naming convention does not follow the muxpack specification
|
|
104
|
+
# # - Hive partitioning removes columns that are partitioned.
|
|
105
|
+
# periods = E[["period"]].distinct().period.to_list()
|
|
106
|
+
|
|
107
|
+
# for period in periods:
|
|
108
|
+
# period_dir = dir / f"{period}"
|
|
109
|
+
# os.makedirs(period_dir, exist_ok=True)
|
|
110
|
+
|
|
111
|
+
# # writing vertices
|
|
112
|
+
# vertices_file = period_dir / "vertices.parquet"
|
|
113
|
+
# V_period = V.filter(V.period == period)
|
|
114
|
+
# V_period.to_parquet(vertices_file)
|
|
115
|
+
|
|
116
|
+
# # writing edges
|
|
117
|
+
# edges_dir = period_dir / "edges"
|
|
118
|
+
# os.makedirs(edges_dir, exist_ok=True)
|
|
119
|
+
# E_period = E.filter(E.period == period)
|
|
120
|
+
# layers = E_period[["layer"]].distinct().layer.to_list()
|
|
121
|
+
# logger.info(f"layers: {layers}")
|
|
122
|
+
# for layer in layers:
|
|
123
|
+
# layer_dir = edges_dir / f"{layer}"
|
|
124
|
+
# # TODO further partition?
|
|
125
|
+
# os.makedirs(layer_dir, exist_ok=True)
|
|
126
|
+
# E_period_layer = E_period.filter(E_period.layer == layer).order_by(
|
|
127
|
+
# ["src", "relationtype", "dst"]
|
|
128
|
+
# )
|
|
129
|
+
# E_period_layer.to_parquet_dir(
|
|
130
|
+
# layer_dir, existing_data_behavior=existing_data_behavior, **kwargs
|
|
131
|
+
# )
|
|
132
|
+
# logger.info(f"\t\tSaved layer {layer}")
|
|
133
|
+
# logger.info(f"\tFinished saving period {period}")
|
|
134
|
+
# logger.info(f"Finished saving network to {dir}.")
|
|
135
|
+
|
|
136
|
+
# con = ibis.duckdb.connect()
|
|
137
|
+
# edges = con.read_parquet(f"{dir}/*/edges/**/*.parquet", table_name="edges")
|
|
138
|
+
# vertices = con.read_parquet(f"{dir}/*/vertices.parquet", table_name="vertices")
|
|
139
|
+
# return edges, vertices
|
|
123
140
|
|
|
124
141
|
|
|
125
142
|
def save_multiplex(
|
|
@@ -135,13 +152,21 @@ def save_multiplex(
|
|
|
135
152
|
The directory and all sub-directories are created if they do not exist.
|
|
136
153
|
Edges and vertices are not validated for consistency.
|
|
137
154
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
edges
|
|
158
|
+
Edge table to save.
|
|
159
|
+
vertices
|
|
160
|
+
Vertex table to save.
|
|
161
|
+
dir
|
|
162
|
+
Root path where the multiplex will be saved.
|
|
163
|
+
period
|
|
164
|
+
Period for this multiplex. If ``None``, all rows in ``edges`` are written.
|
|
165
|
+
existing_data_behavior
|
|
166
|
+
Passed through to ``pyarrow.dataset.write_dataset``.
|
|
167
|
+
kwargs
|
|
168
|
+
Additional keyword arguments forwarded to
|
|
169
|
+
``pyarrow.dataset.write_dataset``.
|
|
145
170
|
|
|
146
171
|
Returns:
|
|
147
172
|
- Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
|
|
@@ -163,21 +188,23 @@ def save_multiplex(
|
|
|
163
188
|
vertices_file = dir / "vertices.parquet"
|
|
164
189
|
if period is not None:
|
|
165
190
|
# test if period column is there, if not add it to
|
|
166
|
-
V = V.filter(
|
|
191
|
+
V = V.filter(_.period == period)
|
|
167
192
|
V.to_parquet(vertices_file)
|
|
168
193
|
|
|
169
194
|
# writing edges
|
|
170
195
|
edges_dir = dir / "edges"
|
|
171
196
|
|
|
172
197
|
os.makedirs(edges_dir, exist_ok=True)
|
|
173
|
-
E_period = E
|
|
174
|
-
|
|
198
|
+
E_period = E
|
|
199
|
+
if period is not None:
|
|
200
|
+
E_period = E.filter(_.period == period)
|
|
201
|
+
layers = E_period[["layer"]].distinct().layer.to_list()
|
|
175
202
|
logger.info(f"layers: {layers}")
|
|
176
203
|
for layer in layers:
|
|
177
204
|
layer_dir = edges_dir / f"{layer}"
|
|
178
205
|
# TODO further partition?
|
|
179
206
|
os.makedirs(layer_dir, exist_ok=True)
|
|
180
|
-
E_period_layer = E_period.filter(
|
|
207
|
+
E_period_layer = E_period.filter(_.layer == layer).order_by(
|
|
181
208
|
["src", "relationtype", "dst"]
|
|
182
209
|
)
|
|
183
210
|
E_period_layer.to_parquet_dir(
|
|
@@ -193,24 +220,54 @@ def save_multiplex(
|
|
|
193
220
|
|
|
194
221
|
|
|
195
222
|
def save_multiplexseries(
|
|
196
|
-
edges: ibis.Table,
|
|
197
|
-
|
|
223
|
+
edges: ibis.Table,
|
|
224
|
+
vertices: ibis.Table,
|
|
225
|
+
dir: Path | str,
|
|
226
|
+
relationtypes: ibis.Table | None = None,
|
|
227
|
+
existing_data_behavior="delete_matching",
|
|
228
|
+
**kwargs,
|
|
229
|
+
) -> Tuple[ibis.Table, ibis.Table]:
|
|
198
230
|
"""
|
|
199
|
-
Save
|
|
231
|
+
Save edges and vertices to disk following the muxpack directory structure.
|
|
232
|
+
The directory and all sub-directories are created if they do not exist.
|
|
233
|
+
Edges and vertices are not validated for consistency.
|
|
200
234
|
|
|
201
235
|
Args:
|
|
202
|
-
|
|
203
|
-
-
|
|
204
|
-
-
|
|
236
|
+
|
|
237
|
+
- edges: edge table to save.
|
|
238
|
+
- vertices: vertex table to save.
|
|
239
|
+
- relationtypes: optional relationtype metadata table to save at root level.
|
|
240
|
+
- dir: root path where the network will be saved.
|
|
241
|
+
- existing_data_behavior: passed through to ``pyarrow.dataset.write_dataset``.
|
|
242
|
+
- **kwargs: additional keyword arguments forwarded to ``pyarrow.dataset.write_dataset``.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
- Tuple of ``(edges, vertices)`` table objects pointing to the saved files.
|
|
205
246
|
"""
|
|
247
|
+
|
|
206
248
|
dir = Path(dir)
|
|
207
|
-
|
|
208
|
-
|
|
249
|
+
os.makedirs(dir, exist_ok=True)
|
|
250
|
+
periods: list[str] = (
|
|
251
|
+
edges.select("period").distinct().order_by("period").period.to_list()
|
|
209
252
|
)
|
|
210
253
|
for period in periods:
|
|
211
254
|
E = edges.filter(edges.period == period)
|
|
212
255
|
V = vertices.filter(vertices.period == period)
|
|
213
|
-
|
|
256
|
+
speriod = f"{period}"
|
|
257
|
+
save_multiplex(
|
|
258
|
+
edges=E,
|
|
259
|
+
vertices=V,
|
|
260
|
+
dir=dir / speriod,
|
|
261
|
+
period=period,
|
|
262
|
+
existing_data_behavior=existing_data_behavior,
|
|
263
|
+
**kwargs,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
if relationtypes is not None:
|
|
267
|
+
relationtypes.to_parquet(dir / "relationtypes.parquet")
|
|
268
|
+
|
|
269
|
+
mp = read_multiplexseries(dir)
|
|
270
|
+
return mp.edges, mp.vertices
|
|
214
271
|
|
|
215
272
|
|
|
216
273
|
def save_bipartite(
|
|
@@ -259,13 +316,13 @@ def read_bipartite(dir: Path | str) -> Bipartite:
|
|
|
259
316
|
role_src = metadata["role_src"]
|
|
260
317
|
role_dst = metadata["role_dst"]
|
|
261
318
|
relationtype = metadata["relationtype"]
|
|
262
|
-
return
|
|
319
|
+
return Bipartite(
|
|
263
320
|
edges=edges, role_src=role_src, role_dst=role_dst, relationtype=relationtype
|
|
264
321
|
)
|
|
265
322
|
|
|
266
323
|
|
|
267
324
|
if __name__ == "__main__":
|
|
268
325
|
logging.basicConfig(level=logging.INFO)
|
|
269
|
-
m =
|
|
326
|
+
m = read_multiplexseries("data")
|
|
270
327
|
|
|
271
|
-
|
|
328
|
+
save_multiplexseries(edges=m.edges, vertices=m.vertices, dir="data2")
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
"""Single-period multiplex graph representation.
|
|
2
|
+
|
|
3
|
+
This module defines :class:`Multiplex`, a validated container around ibis edge
|
|
4
|
+
and vertex tables with helpers for conversions and degree summaries.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import ibis
|
|
2
8
|
|
|
3
9
|
from .check import check_edges, check_vertices
|
|
@@ -19,7 +25,7 @@ class Multiplex:
|
|
|
19
25
|
For multiple periods, use MultiplexSeries.
|
|
20
26
|
"""
|
|
21
27
|
|
|
22
|
-
#: The edges of the multiplex. This is a table with columns "src", "dst", "layer" and
|
|
28
|
+
#: The edges of the multiplex. This is a table with columns "src", "dst", "layer","relationtype" and optionally weight.
|
|
23
29
|
edges: ibis.Table
|
|
24
30
|
|
|
25
31
|
#: The vertices of the multiplex. This is a table with a column "id" and optional additional columns.
|
|
@@ -60,7 +66,7 @@ class Multiplex:
|
|
|
60
66
|
Returns:
|
|
61
67
|
- List of layer names.
|
|
62
68
|
"""
|
|
63
|
-
layers = self.edges[["layer"]].distinct().
|
|
69
|
+
layers = self.edges[["layer"]].distinct().layer.to_list()
|
|
64
70
|
return layers
|
|
65
71
|
|
|
66
72
|
def update_vertices(self) -> None:
|
|
@@ -75,30 +81,62 @@ class Multiplex:
|
|
|
75
81
|
V = src.union(dst, distinct=True).to_pyarrow()
|
|
76
82
|
self.vertices = ibis.memtable(V)
|
|
77
83
|
|
|
78
|
-
def to_csr_matrix(
|
|
84
|
+
def to_csr_matrix(
|
|
85
|
+
self, use_weight: bool | str | ibis.Value = False
|
|
86
|
+
) -> csr_matrix:
|
|
79
87
|
"""
|
|
80
88
|
Transform the multiplex into a sparse matrix, collapsing all layers into one.
|
|
81
89
|
To keep layers separate, use ``to_csr_matrices`` instead.
|
|
82
90
|
|
|
91
|
+
Args:
|
|
92
|
+
- use_weight: optional column in the edges table to use as weights for the adjacency matrix. If False, the adjacency matrix will be unweighted (boolean).
|
|
93
|
+
if True, the method will look for a column named "weight" in the edges table. If a string is provided, it will be used as the name of the weight column.
|
|
94
|
+
If not provided, the adjacency matrix will be unweighted (boolean).
|
|
95
|
+
|
|
83
96
|
Returns:
|
|
84
97
|
- Sparse boolean matrix of shape ``(n_vertices, n_vertices)``.
|
|
85
98
|
"""
|
|
86
99
|
from .to_csr_matrix import to_row_col_idx, idx_to_csr_matrix
|
|
87
100
|
|
|
88
|
-
|
|
89
|
-
|
|
101
|
+
E = self.edges
|
|
102
|
+
V = self.vertices
|
|
103
|
+
|
|
104
|
+
if use_weight is True:
|
|
105
|
+
weight = "weight"
|
|
106
|
+
elif isinstance(use_weight, str):
|
|
107
|
+
E[["weight"]] = E[[use_weight]]
|
|
108
|
+
elif isinstance(use_weight, ibis.Value):
|
|
109
|
+
weight = "weight"
|
|
110
|
+
E = E.mutate(weight=weight)
|
|
111
|
+
else:
|
|
112
|
+
if ("weight" in E.columns) and (use_weight is False):
|
|
113
|
+
logger.warning(
|
|
114
|
+
"Weight column 'weight' found in edges table, but use_weight is False. Ignoring weight column."
|
|
115
|
+
)
|
|
116
|
+
E = E.drop(["weight"], errors="ignore")
|
|
117
|
+
|
|
118
|
+
if (use_weight is not False) and (weight not in E.columns):
|
|
119
|
+
raise ValueError(f"Weight column '{weight}' not found in edges table")
|
|
120
|
+
|
|
121
|
+
idx = to_row_col_idx(E, V)
|
|
122
|
+
M = idx_to_csr_matrix(idx, V)
|
|
90
123
|
return M
|
|
91
124
|
|
|
92
|
-
def to_csr_matrices(self) -> dict[str, csr_matrix]:
|
|
125
|
+
def to_csr_matrices(self, layers: list[str] | None = None) -> dict[str, csr_matrix]:
|
|
93
126
|
"""
|
|
94
127
|
Transform the multiplex into a dictionary of sparse matrices, one per layer.
|
|
95
128
|
|
|
129
|
+
Args:
|
|
130
|
+
- layers: optional list of layer names to include. If None, all layers are included.
|
|
131
|
+
|
|
96
132
|
Returns:
|
|
97
133
|
- Dictionary mapping layer name to a sparse boolean matrix of shape ``(n_vertices, n_vertices)``.
|
|
98
134
|
"""
|
|
99
135
|
from .to_csr_matrix import to_row_col_idx, idx_to_csr_matrix
|
|
100
136
|
|
|
101
|
-
|
|
137
|
+
# Maybe turn this into a generator instead of a dict, to avoid loading all matrices into memory at once.
|
|
138
|
+
|
|
139
|
+
layers = self.layers() if layers is None else layers
|
|
102
140
|
matrices = {}
|
|
103
141
|
for layer in layers:
|
|
104
142
|
idx = to_row_col_idx(
|
|
@@ -108,6 +146,26 @@ class Multiplex:
|
|
|
108
146
|
matrices[layer] = M
|
|
109
147
|
return matrices
|
|
110
148
|
|
|
149
|
+
def outdegree(self, by_layer: bool = False) -> ibis.Table:
|
|
150
|
+
"""
|
|
151
|
+
Compute the out-degree of each vertex in the multiplex.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
- by_layer: if True, compute the out-degree separately for each layer.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
- by_layer=False: Table with columns "id" and "out_degree", where "id" is the vertex id and "out_degree" is the total number of outgoing edges from that vertex across all layers.
|
|
158
|
+
- by_layer=True: Table with columns "id", "layer", and "out_degree", where "id" is the vertex id, "layer" is the layer name, and "out_degree" is the number of outgoing edges from that vertex in that layer.
|
|
159
|
+
"""
|
|
160
|
+
E = self.edges
|
|
161
|
+
|
|
162
|
+
gb = ["src"]
|
|
163
|
+
if by_layer:
|
|
164
|
+
gb.append("layer")
|
|
165
|
+
|
|
166
|
+
outdegree = E.group_by(gb).aggregate(outdegree=E.count()).rename(id="src")
|
|
167
|
+
return outdegree
|
|
168
|
+
|
|
111
169
|
def to_networkx(self) -> nx.MultiDiGraph:
|
|
112
170
|
"""
|
|
113
171
|
Convert the multiplex to a NetworkX MultiDiGraph.
|
|
@@ -136,6 +194,12 @@ class Multiplex:
|
|
|
136
194
|
self.update_vertices()
|
|
137
195
|
vertices = self.vertices
|
|
138
196
|
period = self.period
|
|
139
|
-
edges, vertices = io.save_multiplex(
|
|
197
|
+
edges, vertices = io.save_multiplex(
|
|
198
|
+
edges=edges,
|
|
199
|
+
vertices=vertices,
|
|
200
|
+
dir=dir,
|
|
201
|
+
period=period,
|
|
202
|
+
**kw_args,
|
|
203
|
+
)
|
|
140
204
|
self.edges = edges
|
|
141
205
|
self.vertices = vertices
|
|
@@ -1,11 +1,22 @@
|
|
|
1
|
+
"""Multi-period multiplex graph representation.
|
|
2
|
+
|
|
3
|
+
This module defines :class:`MultiplexSeries`, which stores edges across periods
|
|
4
|
+
and exposes filtering, per-period access, collapsing, and persistence helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import ibis
|
|
8
|
+
from ibis import _
|
|
9
|
+
|
|
10
|
+
from muxpack.networkx import to_MultiDiGraph
|
|
2
11
|
|
|
3
12
|
from .check import check_edges, check_vertices
|
|
4
13
|
from pathlib import Path
|
|
5
14
|
from . import io
|
|
6
15
|
from .multiplex import Multiplex
|
|
7
16
|
import logging
|
|
8
|
-
from typing import Tuple
|
|
17
|
+
from typing import Generator, Tuple
|
|
18
|
+
from scipy.sparse import csr_matrix
|
|
19
|
+
import networkx as nx
|
|
9
20
|
|
|
10
21
|
logger = logging.getLogger(__name__)
|
|
11
22
|
|
|
@@ -57,7 +68,7 @@ class MultiplexSeries:
|
|
|
57
68
|
self.vertices = vertices
|
|
58
69
|
self.relationtypes = relationtypes
|
|
59
70
|
|
|
60
|
-
if
|
|
71
|
+
if vertices is not None:
|
|
61
72
|
logger.info("Vertices table provided, using it as is.")
|
|
62
73
|
self.vertex_ids = vertices[["id"]].distinct()
|
|
63
74
|
|
|
@@ -69,14 +80,9 @@ class MultiplexSeries:
|
|
|
69
80
|
- Sorted list of period values.
|
|
70
81
|
"""
|
|
71
82
|
periods = (
|
|
72
|
-
self.edges.select(
|
|
73
|
-
.distinct()
|
|
74
|
-
.order_by("period")
|
|
75
|
-
.to_pyarrow()
|
|
76
|
-
.column("period")
|
|
77
|
-
.to_pylist()
|
|
83
|
+
self.edges.select("period").distinct().order_by("period").period.to_list()
|
|
78
84
|
)
|
|
79
|
-
# periods = self.edges[["period"]].distinct().to_pandas().period.
|
|
85
|
+
# periods = self.edges[["period"]].distinct().to_pandas().period.to_list()
|
|
80
86
|
return periods
|
|
81
87
|
|
|
82
88
|
def layers(self) -> list[str]:
|
|
@@ -86,16 +92,46 @@ class MultiplexSeries:
|
|
|
86
92
|
Returns:
|
|
87
93
|
- Sorted list of layer names.
|
|
88
94
|
"""
|
|
89
|
-
layers = (
|
|
90
|
-
self.edges.select(self.edges.layer)
|
|
91
|
-
.distinct()
|
|
92
|
-
.order_by("layer")
|
|
93
|
-
.to_pyarrow()
|
|
94
|
-
.column("layer")
|
|
95
|
-
.to_pylist()
|
|
96
|
-
)
|
|
95
|
+
layers = self.edges.select("layer").distinct().order_by("layer").layer.to_list()
|
|
97
96
|
return layers
|
|
98
97
|
|
|
98
|
+
def to_csr_matrices(
|
|
99
|
+
self, periods: list[int] | None = None
|
|
100
|
+
) -> Generator[Tuple[csr_matrix, int]]:
|
|
101
|
+
"""
|
|
102
|
+
Generate a sparse matrix for each period. The indices of the matrix correspond to
|
|
103
|
+
the rownumber the ``vertex_ids`` table.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
- periods: list of periods to generate matrices for. If empty, all periods
|
|
107
|
+
present in ``edges`` are used.
|
|
108
|
+
"""
|
|
109
|
+
from .to_csr_matrix import to_csr_matrix
|
|
110
|
+
|
|
111
|
+
if periods is None:
|
|
112
|
+
periods = self.periods()
|
|
113
|
+
|
|
114
|
+
for period in periods:
|
|
115
|
+
E_y = self.edges.filter(_.period == period)
|
|
116
|
+
yield to_csr_matrix(E_y, self.vertex_ids), period
|
|
117
|
+
|
|
118
|
+
def to_networkx(
|
|
119
|
+
self, periods: list[int] | None = None
|
|
120
|
+
) -> Generator[Tuple[nx.MultiDiGraph, int]]:
|
|
121
|
+
"""
|
|
122
|
+
Generate a NetworkX MultiDiGraph for each period.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
- periods: list of periods to generate graphs for. If empty, all periods
|
|
126
|
+
present in ``edges`` are used.
|
|
127
|
+
"""
|
|
128
|
+
if periods is None:
|
|
129
|
+
periods = self.periods()
|
|
130
|
+
|
|
131
|
+
for period in periods:
|
|
132
|
+
E_y = self.edges.filter(_.period == period)
|
|
133
|
+
yield to_MultiDiGraph(E_y, self.vertex_ids), period
|
|
134
|
+
|
|
99
135
|
def update_vertices(self) -> None:
|
|
100
136
|
"""
|
|
101
137
|
Update the vertices table by deriving it from the edges table.
|
|
@@ -163,8 +199,7 @@ class MultiplexSeries:
|
|
|
163
199
|
def add_filter(
|
|
164
200
|
self,
|
|
165
201
|
periods: list[int] = None,
|
|
166
|
-
layers: list[
|
|
167
|
-
relationtypes: list[int] = None,
|
|
202
|
+
layers: dict[str, list[int] | None] = None,
|
|
168
203
|
src: list[int] = None,
|
|
169
204
|
dst: list[int] = None,
|
|
170
205
|
) -> None:
|
|
@@ -179,8 +214,7 @@ class MultiplexSeries:
|
|
|
179
214
|
|
|
180
215
|
Args:
|
|
181
216
|
- periods: list of periods to keep.
|
|
182
|
-
- layers:
|
|
183
|
-
- relationtypes: list of relationtype values to keep.
|
|
217
|
+
- layers: dict of {layer:[relationtype]} to keep. Use ``None`` for the list of relationtypes to keep all relationtypes for that layer.
|
|
184
218
|
- src: list of source vertex ids (ego) to keep.
|
|
185
219
|
- dst: list of destination vertex ids (non-ego) to keep.
|
|
186
220
|
"""
|
|
@@ -189,23 +223,36 @@ class MultiplexSeries:
|
|
|
189
223
|
flt: list[ibis.BooleanValue] = []
|
|
190
224
|
|
|
191
225
|
if periods is not None and len(periods) > 0:
|
|
192
|
-
flt.append(
|
|
226
|
+
flt.append(_.period.isin(periods))
|
|
193
227
|
|
|
194
228
|
if layers is not None and len(layers) > 0:
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
229
|
+
rt = []
|
|
230
|
+
if not isinstance(layers, dict):
|
|
231
|
+
raise ValueError("layers must be a dict of {layer:[relationtype]|None}")
|
|
232
|
+
|
|
233
|
+
sl = self.layers()
|
|
234
|
+
for layer, relationtypes in layers.items():
|
|
235
|
+
if layer not in sl:
|
|
236
|
+
raise ValueError(f"Layer '{layer}' not found in multiplex series")
|
|
237
|
+
e = _.layer == layer
|
|
238
|
+
if relationtypes is not None:
|
|
239
|
+
e = ibis.and_(e, _.relationtype.isin(relationtypes))
|
|
240
|
+
rt.append(e)
|
|
241
|
+
|
|
242
|
+
if len(rt) > 1:
|
|
243
|
+
flt.append(ibis.or_(rt))
|
|
244
|
+
elif len(rt) == 1:
|
|
245
|
+
flt.append(e)
|
|
199
246
|
|
|
200
247
|
if src is not None and len(src) > 0:
|
|
201
248
|
vid = ibis.memtable({"id": src})
|
|
202
249
|
# we use semi join because we expect the vertex list to be large
|
|
203
|
-
E = E.semi_join(vid,
|
|
250
|
+
E = E.semi_join(vid, _.src == vid.id)
|
|
204
251
|
|
|
205
252
|
if dst is not None and len(dst) > 0:
|
|
206
253
|
vid = ibis.memtable({"id": dst})
|
|
207
254
|
# we use semi join because we expect the vertex list to be large
|
|
208
|
-
E = E.semi_join(vid,
|
|
255
|
+
E = E.semi_join(vid, _.dst == vid.id)
|
|
209
256
|
|
|
210
257
|
logger.debug("Filter: f{flt}")
|
|
211
258
|
if len(flt):
|
|
@@ -213,6 +260,19 @@ class MultiplexSeries:
|
|
|
213
260
|
|
|
214
261
|
self.edges = E
|
|
215
262
|
|
|
263
|
+
def __str__(self) -> str:
|
|
264
|
+
"""
|
|
265
|
+
Return a string representation of the multiplex series.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
- String with number of edges, vertices, and periods.
|
|
269
|
+
"""
|
|
270
|
+
n_edges = self.edges.count().execute()
|
|
271
|
+
n_vertices = self.vertex_ids.count().execute()
|
|
272
|
+
periods = self.periods()
|
|
273
|
+
layers = self.layers()
|
|
274
|
+
return f"MultiplexSeries\n Edges: {n_edges}\n Vertices: {n_vertices}\n Periods: {periods}\n Layers: {layers}"
|
|
275
|
+
|
|
216
276
|
def __copy__(self) -> "MultiplexSeries":
|
|
217
277
|
"""
|
|
218
278
|
Return a shallow copy of this MultiplexSeries.
|
|
@@ -220,9 +280,9 @@ class MultiplexSeries:
|
|
|
220
280
|
Returns:
|
|
221
281
|
- A new MultiplexSeries sharing the same ``edges`` and ``vertices`` tables.
|
|
222
282
|
"""
|
|
223
|
-
return MultiplexSeries(self.edges, self.vertices)
|
|
283
|
+
return MultiplexSeries(self.edges, self.vertices, self.relationtypes)
|
|
224
284
|
|
|
225
|
-
def collapse(self) -> Multiplex:
|
|
285
|
+
def collapse(self, period: int | None = None) -> Multiplex:
|
|
226
286
|
"""
|
|
227
287
|
Collapse the multiplex series into a single Multiplex by discarding period
|
|
228
288
|
information. Duplicate edges across periods are removed. This is useful
|
|
@@ -236,7 +296,7 @@ class MultiplexSeries:
|
|
|
236
296
|
V = self.vertices.select("id").distinct()
|
|
237
297
|
else:
|
|
238
298
|
V = None
|
|
239
|
-
return Multiplex(edges=E, vertices=V, period=
|
|
299
|
+
return Multiplex(edges=E, vertices=V, period=period)
|
|
240
300
|
|
|
241
301
|
def collapse_to(self, dir: Path | str) -> None:
|
|
242
302
|
"""
|
|
@@ -260,14 +320,22 @@ class MultiplexSeries:
|
|
|
260
320
|
|
|
261
321
|
Args:
|
|
262
322
|
- dir: path to the directory where the MultiplexSeries will be saved.
|
|
263
|
-
- **kw_args: additional keyword arguments forwarded to
|
|
323
|
+
- **kw_args: additional keyword arguments forwarded to
|
|
324
|
+
``io.save_multiplexseries``.
|
|
264
325
|
"""
|
|
265
326
|
edges = self.edges
|
|
266
327
|
vertices = self.vertices
|
|
328
|
+
relationtypes = self.relationtypes
|
|
267
329
|
if vertices is None:
|
|
268
330
|
mp = MultiplexSeries(edges=self.edges)
|
|
269
331
|
mp.update_vertices()
|
|
270
332
|
vertices = mp.vertices
|
|
271
|
-
E, V = io.
|
|
333
|
+
E, V = io.save_multiplexseries(
|
|
334
|
+
edges=edges,
|
|
335
|
+
vertices=vertices,
|
|
336
|
+
relationtypes=relationtypes,
|
|
337
|
+
dir=dir,
|
|
338
|
+
**kw_args,
|
|
339
|
+
)
|
|
272
340
|
self.edges = E
|
|
273
341
|
self.vertices = V
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
"""Sparse matrix conversion utilities for multiplex edge tables."""
|
|
2
|
+
|
|
3
|
+
from ibis import row_number, Table, _
|
|
2
4
|
import ibis
|
|
3
5
|
from scipy.sparse import csr_matrix
|
|
4
|
-
from muxpack.multiplex import Multiplex
|
|
5
6
|
from typing import Tuple, Generator
|
|
6
7
|
|
|
7
8
|
import logging
|
|
@@ -10,7 +11,7 @@ logger = logging.getLogger(__name__)
|
|
|
10
11
|
# from collections.abc import Generator
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
def to_row_col_idx(edges: Table, vertices: Table) -> Table:
|
|
14
|
+
def to_row_col_idx(edges: Table, vertices: Table, use_weight: bool = False) -> Table:
|
|
14
15
|
"""
|
|
15
16
|
Turn an edge list into a row/column index table based on the given vertices table.
|
|
16
17
|
|
|
@@ -28,22 +29,34 @@ def to_row_col_idx(edges: Table, vertices: Table) -> Table:
|
|
|
28
29
|
row = v.select(src="id", row="idx")
|
|
29
30
|
col = v.select(dst="id", col="idx")
|
|
30
31
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
32
|
+
if use_weight:
|
|
33
|
+
idx_edges = (
|
|
34
|
+
edges.aggregate(weight=_.weight.sum(), by=["src", "dst"])
|
|
35
|
+
.inner_join(row, "src")
|
|
36
|
+
.inner_join(col, "dst")
|
|
37
|
+
.mutate(data=True)
|
|
38
|
+
.select("data", "row", "col", "weight")
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
logger.debug("Created weighted row-col index tables.")
|
|
42
|
+
else:
|
|
43
|
+
# may sum the number of columns
|
|
44
|
+
idx_edges = (
|
|
45
|
+
edges[["src", "dst"]]
|
|
46
|
+
.distinct()
|
|
47
|
+
.inner_join(row, "src")
|
|
48
|
+
.inner_join(col, "dst")
|
|
49
|
+
.mutate(data=True)
|
|
50
|
+
.select("data", "row", "col")
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
logger.debug("Created row-col index table with edges.")
|
|
43
54
|
return idx_edges
|
|
44
55
|
|
|
45
56
|
|
|
46
|
-
def idx_to_csr_matrix(
|
|
57
|
+
def idx_to_csr_matrix(
|
|
58
|
+
idx: Table, vertices: Table, use_weight: bool = False
|
|
59
|
+
) -> csr_matrix:
|
|
47
60
|
"""
|
|
48
61
|
Convert a row-column index table to a CSR sparse matrix.
|
|
49
62
|
|
|
@@ -65,50 +78,48 @@ def idx_to_csr_matrix(idx: Table, vertices: Table) -> csr_matrix:
|
|
|
65
78
|
return M
|
|
66
79
|
|
|
67
80
|
|
|
68
|
-
def to_csr_matrix(edges: Table, vertices: Table
|
|
81
|
+
def to_csr_matrix(edges: Table, vertices: Table) -> csr_matrix:
|
|
69
82
|
"""
|
|
70
83
|
Transform an edge list into a sparse matrix (csr_matrix).
|
|
71
84
|
|
|
72
85
|
Args:
|
|
73
86
|
- edges: table with ``src`` and ``dst`` columns.
|
|
74
87
|
- vertices: table with an ``id`` column; edges are filtered to vertices present
|
|
75
|
-
in this table.
|
|
88
|
+
in this table.
|
|
76
89
|
|
|
77
90
|
Returns:
|
|
78
91
|
- Square CSR sparse matrix of shape ``(n_vertices, n_vertices)``.
|
|
79
92
|
"""
|
|
80
93
|
# vertices may contain multiple periods
|
|
81
|
-
|
|
82
|
-
vertices = vertices[["id"]].distinct()
|
|
94
|
+
vertices = vertices[["id"]].distinct()
|
|
83
95
|
edges_row_col = to_row_col_idx(edges, vertices=vertices)
|
|
84
96
|
M = idx_to_csr_matrix(edges_row_col, vertices=vertices)
|
|
85
97
|
return M
|
|
86
98
|
|
|
87
99
|
|
|
88
100
|
def to_period_csr_matrix(
|
|
89
|
-
edges: Table, vertices: Table
|
|
101
|
+
edges: Table, vertices: Table, periods: list[int] | None = None
|
|
90
102
|
) -> Generator[Tuple[csr_matrix, int]]:
|
|
91
103
|
"""
|
|
92
|
-
Generate a sparse matrix for each period.
|
|
104
|
+
Generate a sparse matrix for each period. The indices of the matrix correspond to
|
|
105
|
+
the rownumber the ``vertices`` table.
|
|
93
106
|
|
|
94
107
|
Args:
|
|
95
108
|
- edges: table with columns ``src``, ``dst``, and ``period``.
|
|
96
|
-
- vertices: table with columns ``id``
|
|
97
|
-
vertices from the edges table
|
|
109
|
+
- vertices: table with columns ``id`` to derive
|
|
110
|
+
vertices from the edges table
|
|
98
111
|
- periods: list of periods to generate matrices for. If empty, all periods
|
|
99
112
|
present in ``edges`` are used.
|
|
100
113
|
|
|
101
114
|
Returns:
|
|
102
115
|
- Generator of ``(csr_matrix, period)`` tuples, one per period.
|
|
103
116
|
"""
|
|
104
|
-
if len(periods) == 0:
|
|
105
|
-
periods = edges[["period"]].distinct().
|
|
117
|
+
if periods is None or len(periods) == 0:
|
|
118
|
+
periods = edges[["period"]].distinct().period.to_list()
|
|
119
|
+
|
|
106
120
|
for period in periods:
|
|
107
|
-
E_y = edges.filter(
|
|
108
|
-
|
|
109
|
-
V_y = vertices.filter(vertices.period == period)
|
|
110
|
-
else:
|
|
111
|
-
V_y = None
|
|
121
|
+
E_y = edges.filter(_.period == period)
|
|
122
|
+
V_y = vertices
|
|
112
123
|
|
|
113
124
|
yield to_csr_matrix(E_y, V_y), period
|
|
114
125
|
|
|
@@ -129,4 +140,4 @@ if __name__ == "__main__":
|
|
|
129
140
|
print(f"M1 = {M1}")
|
|
130
141
|
|
|
131
142
|
M = to_csr_matrix(E, V)
|
|
132
|
-
print(M)
|
|
143
|
+
print(M)
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from .check import check_edges, check_vertices
|
|
2
|
-
from .io import load_network, save_network
|
|
3
|
-
from .multiplexseries import MultiplexSeries
|
|
4
|
-
from .multiplex import Multiplex
|
|
5
|
-
from .to_csr_matrix import to_csr_matrix
|
|
6
|
-
from .bipartite import Bipartite
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"check_edges",
|
|
10
|
-
"check_vertices",
|
|
11
|
-
"load_network",
|
|
12
|
-
"Multiplex",
|
|
13
|
-
"MultiplexSeries",
|
|
14
|
-
"save_network",
|
|
15
|
-
"to_csr_matrix",
|
|
16
|
-
"Bipartite",
|
|
17
|
-
]
|
|
File without changes
|
|
File without changes
|