cortexforge 0.1.0__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cortexforge-0.1.0 → cortexforge-0.1.3}/PKG-INFO +1 -1
- {cortexforge-0.1.0 → cortexforge-0.1.3}/README.md +5 -10
- {cortexforge-0.1.0 → cortexforge-0.1.3}/pyproject.toml +5 -3
- cortexforge-0.1.3/src/cortexforge/cli/__init__.py +60 -0
- cortexforge-0.1.3/src/cortexforge/cli/datasets.py +56 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/cli/forge.py +8 -8
- cortexforge-0.1.3/src/cortexforge/cli/planner.py +88 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/datasets/__init__.py +4 -0
- cortexforge-0.1.3/src/cortexforge/datasets/api.py +366 -0
- cortexforge-0.1.3/src/cortexforge/datasets/main.py +77 -0
- cortexforge-0.1.3/src/cortexforge/datasets/registry.json +15 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/main.py +5 -2
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/planner/generators/experiment_scenario.py +77 -36
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/planner/main.py +15 -6
- cortexforge-0.1.3/src/cortexforge/planner/modulations.py +26 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge.egg-info/PKG-INFO +1 -1
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge.egg-info/SOURCES.txt +4 -0
- cortexforge-0.1.3/src/cortexforge.egg-info/entry_points.txt +2 -0
- cortexforge-0.1.0/src/cortexforge/cli/planner.py +0 -45
- cortexforge-0.1.0/src/cortexforge/datasets/api.py +0 -88
- cortexforge-0.1.0/src/cortexforge/utils/__init__.py +0 -0
- cortexforge-0.1.0/src/cortexforge.egg-info/entry_points.txt +0 -3
- {cortexforge-0.1.0 → cortexforge-0.1.3}/setup.cfg +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/datasets/hash.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/datasets/local.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/datasets/manifest.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/datasets/types.py +0 -0
- {cortexforge-0.1.0/src/cortexforge/cli → cortexforge-0.1.3/src/cortexforge/forge}/__init__.py +0 -0
- {cortexforge-0.1.0/src/cortexforge/forge → cortexforge-0.1.3/src/cortexforge/forge/radio}/__init__.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/radio/rx.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/radio/rx_recorder.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/radio/tx.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/radio/tx_burst.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/radio/waveforms.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/radio/waveforms_analog.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/radio/waveforms_numerique.py +0 -0
- {cortexforge-0.1.0/src/cortexforge/forge/radio → cortexforge-0.1.3/src/cortexforge/forge/utils}/__init__.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/compute_baseline.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/load_timeline.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/node_identity.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/node_layout.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/sigmf/hash.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/sigmf/sigmf_annotations.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/sigmf/sigmf_captures.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/sigmf/sigmf_global.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/sigmf_writer.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/sync_barrier/rx_barrier_server.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/sync_barrier/sync_config.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/sync_barrier/tx_barrier_client.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/forge/utils/uhd_time.py +0 -0
- {cortexforge-0.1.0/src/cortexforge/forge/utils → cortexforge-0.1.3/src/cortexforge/planner}/__init__.py +0 -0
- {cortexforge-0.1.0/src/cortexforge/planner → cortexforge-0.1.3/src/cortexforge/planner/generators}/__init__.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/planner/generators/cortexlab_scenario.py +0 -0
- {cortexforge-0.1.0/src/cortexforge/planner/generators → cortexforge-0.1.3/src/cortexforge/utils}/__init__.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/utils/loader.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge/utils/logger.py +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge.egg-info/dependency_links.txt +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge.egg-info/requires.txt +0 -0
- {cortexforge-0.1.0 → cortexforge-0.1.3}/src/cortexforge.egg-info/top_level.txt +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
# CorteXForge
|
|
2
|
-
This project is a framework designed to automate the generation and execution of radio dataset experiments on the [
|
|
2
|
+
This project is a framework designed to automate the generation and execution of radio dataset experiments on the [SLICES-RI/CorteXlab](https://www.cortexlab.fr/doku.php?id=start) testbed.
|
|
3
3
|
It relies on the [GNU Radio](https://www.gnuradio.org) environment to record labeled transmissions of various signals.
|
|
4
4
|
|
|
5
5
|
## Overview
|
|
6
|
-
This project is organized into
|
|
6
|
+
This project is organized into three main components:
|
|
7
7
|
- Scenario generation: this part produces configuration files describing the experiment setup. It creates:
|
|
8
8
|
- a `scenario.yaml` file defining which nodes will be used on CorteXlab;
|
|
9
9
|
- an `timeline.csv` file orchestrating the role and sequence of these nodes.
|
|
10
|
-
- Experiment execution: this part deploys and executes the generates experiment definitions (`timeline.csv`) directly on the [
|
|
10
|
+
- Experiment execution: this part deploys and executes the generates experiment definitions (`timeline.csv`) directly on the [SLICES-RI/CorteXlab](https://www.cortexlab.fr/doku.php?id=start) nodes.
|
|
11
|
+
- Dataset API
|
|
11
12
|
|
|
12
13
|
## Quick start (User Guide) :rocket:
|
|
13
14
|
|
|
@@ -21,7 +22,7 @@ The scenario generator can be executed locally before deployment in Slices/Cort
|
|
|
21
22
|
### Example usage
|
|
22
23
|
- ```git clone https://github.com/Andreaj42/CorteXForge.git```
|
|
23
24
|
- ```python3.13 -m venv .venv```
|
|
24
|
-
-
|
|
25
|
+
- ```. .venv/bin/activate```
|
|
25
26
|
- ```pip install -e .[planner]```
|
|
26
27
|
- ```cortexforge-planner --nodes-path confis/nodes.yaml --duration 600 --output-path my/path/on/cortexlab```
|
|
27
28
|
|
|
@@ -49,12 +50,6 @@ To monitor your experiment, use:
|
|
|
49
50
|
- ```minus testbed status```
|
|
50
51
|
- ```minus log -d```
|
|
51
52
|
|
|
52
|
-
## Developer Guide :hammer_and_wrench:
|
|
53
|
-
|
|
54
|
-
Now clone the project:
|
|
55
|
-
- ```git clone https://github.com/Andreaj42/CorteXForge.git```
|
|
56
|
-
- ```cd forge```
|
|
57
|
-
|
|
58
53
|
|
|
59
54
|
### Docker Images :whale:
|
|
60
55
|
To simplify deployment and ensure reproductibility, we generated a Docker image.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "cortexforge"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.3"
|
|
8
8
|
description = "RF generator dataset based on SLICES/CorteXlab"
|
|
9
9
|
authors = [
|
|
10
10
|
{ name = "Andrea Joly", email = "andrea.joly@inria.fr" }
|
|
@@ -31,11 +31,13 @@ forge = [
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
[project.scripts]
|
|
34
|
-
cortexforge
|
|
35
|
-
cortexforge-forge = "cortexforge.forge.main:main"
|
|
34
|
+
cortexforge = "cortexforge.cli:main"
|
|
36
35
|
|
|
37
36
|
[tool.setuptools]
|
|
38
37
|
package-dir = {"" = "src"}
|
|
39
38
|
|
|
40
39
|
[tool.setuptools.packages.find]
|
|
41
40
|
where = ["src"]
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.package-data]
|
|
43
|
+
"cortexforge" = ["**/*.json"]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Unified CLI entrypoint for CorteXForge."""
|
|
2
|
+
|
|
3
|
+
from argparse import ArgumentParser, Namespace
|
|
4
|
+
|
|
5
|
+
from cortexforge.cli.datasets import configure_parser as configure_datasets_parser
|
|
6
|
+
from cortexforge.cli.forge import configure_parser as configure_forge_parser
|
|
7
|
+
from cortexforge.cli.planner import configure_parser as configure_planner_parser
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_parser() -> ArgumentParser:
|
|
11
|
+
"""Build the root CLI parser."""
|
|
12
|
+
parser = ArgumentParser(prog="cortexforge", description="CorteXForge command line")
|
|
13
|
+
sub = parser.add_subparsers(
|
|
14
|
+
dest="command",
|
|
15
|
+
required=True,
|
|
16
|
+
help="CorteXForge subcommand",
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
configure_planner_parser(
|
|
20
|
+
sub.add_parser("planner", help="Generate scenarios and experiment files")
|
|
21
|
+
)
|
|
22
|
+
configure_forge_parser(
|
|
23
|
+
sub.add_parser(
|
|
24
|
+
"forge", help="Run transmitter or receiver commands on CorteXlab"
|
|
25
|
+
)
|
|
26
|
+
)
|
|
27
|
+
configure_datasets_parser(
|
|
28
|
+
sub.add_parser("datasets", help="Inspect available datasets")
|
|
29
|
+
)
|
|
30
|
+
return parser
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def parse_args(argv: list[str] | None = None) -> Namespace:
|
|
34
|
+
"""Parse root CLI arguments."""
|
|
35
|
+
return build_parser().parse_args(argv)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def main(argv: list[str] | None = None) -> None:
|
|
39
|
+
"""Dispatch to the selected CorteXForge subcommand."""
|
|
40
|
+
args = parse_args(argv)
|
|
41
|
+
|
|
42
|
+
if args.command == "planner":
|
|
43
|
+
from cortexforge.planner.main import run as planner_run
|
|
44
|
+
|
|
45
|
+
planner_run(args)
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
if args.command == "forge":
|
|
49
|
+
from cortexforge.forge.main import run as forge_run
|
|
50
|
+
|
|
51
|
+
forge_run(args)
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
if args.command == "datasets":
|
|
55
|
+
from cortexforge.datasets.main import run as datasets_run
|
|
56
|
+
|
|
57
|
+
datasets_run(args)
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
raise ValueError(f"Unknown command: {args.command}")
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""CLI argument parser for CorteXForge datasets."""
|
|
2
|
+
|
|
3
|
+
from argparse import ArgumentParser, Namespace
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def configure_parser(parser: ArgumentParser) -> ArgumentParser:
|
|
8
|
+
"""Attach datasets-specific arguments to an existing parser."""
|
|
9
|
+
parser.add_argument(
|
|
10
|
+
"--root",
|
|
11
|
+
type=Path,
|
|
12
|
+
default=Path("datasets"),
|
|
13
|
+
help="Path to the datasets root directory",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
sub = parser.add_subparsers(
|
|
17
|
+
dest="datasets_command",
|
|
18
|
+
required=True,
|
|
19
|
+
help="Dataset operation to execute",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
sub.add_parser("list", help="List available datasets and their versions")
|
|
23
|
+
|
|
24
|
+
download = sub.add_parser(
|
|
25
|
+
"download",
|
|
26
|
+
help="Download and extract a dataset version from the embedded registry",
|
|
27
|
+
)
|
|
28
|
+
download.add_argument("name", type=str, help="Dataset name")
|
|
29
|
+
download.add_argument(
|
|
30
|
+
"--version",
|
|
31
|
+
type=str,
|
|
32
|
+
default=None,
|
|
33
|
+
help="Dataset version to download (defaults to latest)",
|
|
34
|
+
)
|
|
35
|
+
download.add_argument(
|
|
36
|
+
"--force",
|
|
37
|
+
action="store_true",
|
|
38
|
+
help="Re-download and overwrite an existing local dataset version",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
return parser
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build_parser() -> ArgumentParser:
|
|
45
|
+
"""Build and configure the standalone datasets parser."""
|
|
46
|
+
parser = ArgumentParser(
|
|
47
|
+
prog="cortexforge datasets",
|
|
48
|
+
description="Inspect available datasets",
|
|
49
|
+
)
|
|
50
|
+
return configure_parser(parser)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def parse_args(argv: list[str] | None = None) -> Namespace:
|
|
54
|
+
"""Parse command line arguments."""
|
|
55
|
+
parser = build_parser()
|
|
56
|
+
return parser.parse_args(argv)
|
|
@@ -4,14 +4,8 @@ from argparse import ArgumentParser, Namespace
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def
|
|
8
|
-
"""
|
|
9
|
-
Build and configure the cli argument parser for CorteXForge forge.
|
|
10
|
-
|
|
11
|
-
Returns:
|
|
12
|
-
ArgumentParser: Configured argument parser instance with
|
|
13
|
-
"""
|
|
14
|
-
parser = ArgumentParser(prog="CorteXForge forge", description="Dataset Generator")
|
|
7
|
+
def configure_parser(parser: ArgumentParser) -> ArgumentParser:
|
|
8
|
+
"""Attach forge-specific arguments to an existing parser."""
|
|
15
9
|
sub = parser.add_subparsers(
|
|
16
10
|
dest="role",
|
|
17
11
|
required=True,
|
|
@@ -52,6 +46,12 @@ def build_parser() -> ArgumentParser:
|
|
|
52
46
|
return parser
|
|
53
47
|
|
|
54
48
|
|
|
49
|
+
def build_parser() -> ArgumentParser:
|
|
50
|
+
"""Build and configure the standalone forge parser."""
|
|
51
|
+
parser = ArgumentParser(prog="cortexforge forge", description="Dataset Generator")
|
|
52
|
+
return configure_parser(parser)
|
|
53
|
+
|
|
54
|
+
|
|
55
55
|
def parse_args(argv: list[str] | None = None) -> Namespace:
|
|
56
56
|
"""Parse command line arguments."""
|
|
57
57
|
parser = build_parser()
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""CLI argument parser for CorteXForge planner."""
|
|
2
|
+
|
|
3
|
+
from argparse import Action, ArgumentParser, Namespace
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from cortexforge.planner.modulations import DEFAULT_MODULATIONS
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _split_modulations(value: str) -> list[str]:
|
|
10
|
+
return [item.strip().upper() for item in value.split(",") if item.strip()]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class _ModulationAction(Action):
|
|
14
|
+
"""Parse comma-separated and repeated modulation values into a flat list."""
|
|
15
|
+
|
|
16
|
+
def __call__(self, parser, namespace, values, option_string=None):
|
|
17
|
+
modulations = list(getattr(namespace, self.dest, None) or [])
|
|
18
|
+
for value in values:
|
|
19
|
+
modulations.extend(_split_modulations(value))
|
|
20
|
+
unknown = sorted(set(modulations) - set(DEFAULT_MODULATIONS))
|
|
21
|
+
if unknown:
|
|
22
|
+
parser.error(
|
|
23
|
+
f"unsupported modulation(s): {', '.join(unknown)}. "
|
|
24
|
+
f"Supported values are: {', '.join(DEFAULT_MODULATIONS)}"
|
|
25
|
+
)
|
|
26
|
+
setattr(namespace, self.dest, modulations)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def configure_parser(parser: ArgumentParser) -> ArgumentParser:
|
|
30
|
+
"""Attach planner-specific arguments to an existing parser."""
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--username", required=True, type=str, help="username on CorteXlab"
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--duration", type=int, default=60, help="Experiment duration in seconds"
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--rx-frequency", type=int, default=2450000000, help="Receiver frequency"
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument("--rx-gain", type=int, default=10, help="Receiver gain")
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"--rx-sample-rate", type=int, default=250000, help="Receiver sample-rate"
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--overlapping",
|
|
46
|
+
action="store_true",
|
|
47
|
+
help="Allow overlapping signals in timeline",
|
|
48
|
+
)
|
|
49
|
+
parser.add_argument(
|
|
50
|
+
"--n-signals",
|
|
51
|
+
type=int,
|
|
52
|
+
default=288,
|
|
53
|
+
help=(
|
|
54
|
+
"Number of signals to generate. Must be a multiple of the selected "
|
|
55
|
+
"modulation count."
|
|
56
|
+
),
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"--nodes-path",
|
|
60
|
+
type=Path,
|
|
61
|
+
default="configs/nodes.yaml",
|
|
62
|
+
help="Path to nodes.yaml file",
|
|
63
|
+
)
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
"--modulations",
|
|
66
|
+
action=_ModulationAction,
|
|
67
|
+
nargs="+",
|
|
68
|
+
metavar="MOD",
|
|
69
|
+
default=None,
|
|
70
|
+
help=(
|
|
71
|
+
"Modulations to include in the generated dataset. "
|
|
72
|
+
"Use a space-separated list or comma-separated values. "
|
|
73
|
+
"Defaults to all planner modulations."
|
|
74
|
+
),
|
|
75
|
+
)
|
|
76
|
+
return parser
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def build_parser() -> ArgumentParser:
|
|
80
|
+
"""Build and configure the standalone planner parser."""
|
|
81
|
+
parser = ArgumentParser(prog="cortexforge planner", description="Dataset Generator")
|
|
82
|
+
return configure_parser(parser)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def parse_args(argv: list[str] | None = None) -> Namespace:
|
|
86
|
+
"""Parse command line arguments."""
|
|
87
|
+
parser = build_parser()
|
|
88
|
+
return parser.parse_args(argv)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from .api import (
|
|
2
2
|
describe_dataset,
|
|
3
|
+
download_dataset,
|
|
3
4
|
list_datasets,
|
|
5
|
+
list_registry_datasets,
|
|
4
6
|
list_datasets_with_versions,
|
|
5
7
|
list_versions,
|
|
6
8
|
load_dataset,
|
|
@@ -9,7 +11,9 @@ from .api import (
|
|
|
9
11
|
|
|
10
12
|
__all__ = [
|
|
11
13
|
"load_dataset",
|
|
14
|
+
"download_dataset",
|
|
12
15
|
"list_datasets",
|
|
16
|
+
"list_registry_datasets",
|
|
13
17
|
"list_datasets_with_versions",
|
|
14
18
|
"describe_dataset",
|
|
15
19
|
"load_manifest",
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from urllib.request import Request, urlopen
|
|
10
|
+
|
|
11
|
+
from .local import load_local_dataset
|
|
12
|
+
from .manifest import load_manifest
|
|
13
|
+
|
|
14
|
+
REGISTRY_PATH = Path(__file__).with_name("registry.json")
|
|
15
|
+
DOWNLOAD_HEADERS = {
|
|
16
|
+
"User-Agent": "CorteXForge/0.1 (+https://github.com/Andreaj42/CorteXForge)",
|
|
17
|
+
"Accept": "*/*",
|
|
18
|
+
}
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _load_registry() -> dict:
|
|
23
|
+
with REGISTRY_PATH.open("r", encoding="utf-8") as f:
|
|
24
|
+
return json.load(f)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _resolve_registry_entry(name: str, version: str | None) -> tuple[str, dict]:
|
|
28
|
+
registry = _load_registry()
|
|
29
|
+
|
|
30
|
+
if name not in registry:
|
|
31
|
+
raise ValueError(f"Unknown dataset: {name}")
|
|
32
|
+
|
|
33
|
+
dataset_entry = registry[name]
|
|
34
|
+
resolved_version = version or dataset_entry["latest"]
|
|
35
|
+
versions = dataset_entry.get("versions", {})
|
|
36
|
+
|
|
37
|
+
if resolved_version not in versions:
|
|
38
|
+
raise ValueError(f"Unknown version '{resolved_version}' for dataset '{name}'")
|
|
39
|
+
|
|
40
|
+
return resolved_version, versions[resolved_version]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _sha256_hex(path: Path, chunk_size: int = 1024 * 1024) -> str:
|
|
44
|
+
h = hashlib.sha256()
|
|
45
|
+
with path.open("rb") as f:
|
|
46
|
+
for chunk in iter(lambda: f.read(chunk_size), b""):
|
|
47
|
+
h.update(chunk)
|
|
48
|
+
return h.hexdigest()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _format_size(size_bytes: int | float) -> str:
|
|
52
|
+
value = float(size_bytes)
|
|
53
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
54
|
+
|
|
55
|
+
for unit in units:
|
|
56
|
+
if value < 1024 or unit == units[-1]:
|
|
57
|
+
if unit == "B":
|
|
58
|
+
return f"{int(value)} {unit}"
|
|
59
|
+
return f"{value:.1f} {unit}"
|
|
60
|
+
value /= 1024
|
|
61
|
+
|
|
62
|
+
return f"{int(size_bytes)} B"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _render_progress_bar(
|
|
66
|
+
downloaded_bytes: int,
|
|
67
|
+
total_bytes: int | None,
|
|
68
|
+
width: int = 28,
|
|
69
|
+
) -> str:
|
|
70
|
+
if not total_bytes or total_bytes <= 0:
|
|
71
|
+
return f"[{'?' * width}] {_format_size(downloaded_bytes)}"
|
|
72
|
+
|
|
73
|
+
ratio = min(downloaded_bytes / total_bytes, 1.0)
|
|
74
|
+
filled = min(int(ratio * width), width)
|
|
75
|
+
bar = "#" * filled + "-" * (width - filled)
|
|
76
|
+
percent = int(ratio * 100)
|
|
77
|
+
return (
|
|
78
|
+
f"[{bar}] {percent:3d}% "
|
|
79
|
+
f"({_format_size(downloaded_bytes)}/{_format_size(total_bytes)})"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _update_progress_line(message: str) -> None:
|
|
84
|
+
sys.stderr.write(f"\r{message}")
|
|
85
|
+
sys.stderr.flush()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _finish_progress_line(message: str) -> None:
|
|
89
|
+
sys.stderr.write(f"\r{message}\n")
|
|
90
|
+
sys.stderr.flush()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _download_file(url: str, destination: Path, chunk_size: int = 1024 * 1024) -> None:
|
|
94
|
+
request = Request(url, headers=DOWNLOAD_HEADERS)
|
|
95
|
+
with urlopen(request) as response, destination.open("wb") as f:
|
|
96
|
+
total_bytes_header = response.headers.get("Content-Length")
|
|
97
|
+
total_bytes = int(total_bytes_header) if total_bytes_header else None
|
|
98
|
+
downloaded_bytes = 0
|
|
99
|
+
last_logged_percent = -1
|
|
100
|
+
started_at = time.monotonic()
|
|
101
|
+
|
|
102
|
+
if total_bytes:
|
|
103
|
+
_update_progress_line(
|
|
104
|
+
f"Download progress: {_render_progress_bar(0, total_bytes)}"
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
_update_progress_line("Download progress: starting, total size unknown")
|
|
108
|
+
|
|
109
|
+
while True:
|
|
110
|
+
chunk = response.read(chunk_size)
|
|
111
|
+
if not chunk:
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
f.write(chunk)
|
|
115
|
+
downloaded_bytes += len(chunk)
|
|
116
|
+
|
|
117
|
+
if total_bytes:
|
|
118
|
+
percent = int((downloaded_bytes / total_bytes) * 100)
|
|
119
|
+
if percent >= last_logged_percent + 5 or downloaded_bytes == total_bytes:
|
|
120
|
+
elapsed = max(time.monotonic() - started_at, 1e-9)
|
|
121
|
+
speed = downloaded_bytes / elapsed
|
|
122
|
+
_update_progress_line(
|
|
123
|
+
"Download progress: "
|
|
124
|
+
f"{_render_progress_bar(downloaded_bytes, total_bytes)} "
|
|
125
|
+
f"at {_format_size(speed)}/s"
|
|
126
|
+
)
|
|
127
|
+
last_logged_percent = percent
|
|
128
|
+
else:
|
|
129
|
+
if downloaded_bytes == len(chunk) or downloaded_bytes % (50 * chunk_size) == 0:
|
|
130
|
+
elapsed = max(time.monotonic() - started_at, 1e-9)
|
|
131
|
+
speed = downloaded_bytes / elapsed
|
|
132
|
+
_update_progress_line(
|
|
133
|
+
"Download progress: "
|
|
134
|
+
f"{_format_size(downloaded_bytes)} downloaded "
|
|
135
|
+
f"at {_format_size(speed)}/s"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
elapsed = max(time.monotonic() - started_at, 1e-9)
|
|
139
|
+
speed = downloaded_bytes / elapsed
|
|
140
|
+
_finish_progress_line(
|
|
141
|
+
"Download completed: "
|
|
142
|
+
f"{_format_size(downloaded_bytes)} in {elapsed:.1f}s "
|
|
143
|
+
f"at {_format_size(speed)}/s"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _extract_archive_with_tar(archive_path: Path, root: Path) -> None:
|
|
148
|
+
subprocess.run(
|
|
149
|
+
["tar", "-xf", str(archive_path.resolve()), "-C", str(root.resolve())],
|
|
150
|
+
check=True,
|
|
151
|
+
capture_output=True,
|
|
152
|
+
text=True,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _extract_archive_with_external_zstd(archive_path: Path, root: Path) -> None:
|
|
157
|
+
decompressor = shutil.which("unzstd")
|
|
158
|
+
command = None
|
|
159
|
+
|
|
160
|
+
if decompressor:
|
|
161
|
+
command = [decompressor, "-c", str(archive_path.resolve())]
|
|
162
|
+
else:
|
|
163
|
+
zstd = shutil.which("zstd")
|
|
164
|
+
if zstd:
|
|
165
|
+
command = [zstd, "-d", "-c", str(archive_path.resolve())]
|
|
166
|
+
|
|
167
|
+
if command is None:
|
|
168
|
+
raise RuntimeError(
|
|
169
|
+
"Could not extract .tar.zst archive because no compatible zstd "
|
|
170
|
+
"decompressor was found in PATH. Install 'zstd' or 'unzstd'."
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
decompress_process = subprocess.Popen(
|
|
174
|
+
command,
|
|
175
|
+
stdout=subprocess.PIPE,
|
|
176
|
+
stderr=subprocess.PIPE,
|
|
177
|
+
text=False,
|
|
178
|
+
)
|
|
179
|
+
tar_process = subprocess.Popen(
|
|
180
|
+
["tar", "-xf", "-", "-C", str(root.resolve())],
|
|
181
|
+
stdin=decompress_process.stdout,
|
|
182
|
+
stdout=subprocess.PIPE,
|
|
183
|
+
stderr=subprocess.PIPE,
|
|
184
|
+
text=False,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
if decompress_process.stdout is not None:
|
|
188
|
+
decompress_process.stdout.close()
|
|
189
|
+
|
|
190
|
+
tar_stdout, tar_stderr = tar_process.communicate()
|
|
191
|
+
_, decompress_stderr = decompress_process.communicate()
|
|
192
|
+
|
|
193
|
+
if decompress_process.returncode != 0:
|
|
194
|
+
stderr_text = decompress_stderr.decode("utf-8", errors="replace").strip()
|
|
195
|
+
raise RuntimeError(
|
|
196
|
+
"Failed to decompress dataset archive with zstd. "
|
|
197
|
+
f"Command: {' '.join(command)}. Error: {stderr_text or 'unknown error'}"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if tar_process.returncode != 0:
|
|
201
|
+
stderr_text = tar_stderr.decode("utf-8", errors="replace").strip()
|
|
202
|
+
raise RuntimeError(
|
|
203
|
+
"Failed to extract decompressed tar archive. "
|
|
204
|
+
f"Error: {stderr_text or 'unknown error'}"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _extract_archive(archive_path: Path, root: Path) -> None:
|
|
209
|
+
try:
|
|
210
|
+
_extract_archive_with_tar(archive_path, root)
|
|
211
|
+
return
|
|
212
|
+
except subprocess.CalledProcessError as exc:
|
|
213
|
+
tar_error = (exc.stderr or exc.stdout or "").strip()
|
|
214
|
+
logger.warning(
|
|
215
|
+
"Direct tar extraction failed for %s: %s",
|
|
216
|
+
archive_path.resolve(),
|
|
217
|
+
tar_error or f"exit code {exc.returncode}",
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
if archive_path.suffix == ".zst" and archive_path.name.endswith(".tar.zst"):
|
|
221
|
+
logger.info("Retrying archive extraction with external zstd decompressor")
|
|
222
|
+
_extract_archive_with_external_zstd(archive_path, root)
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
raise RuntimeError(f"Could not extract archive: {archive_path.resolve()}")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def load_dataset(
|
|
229
|
+
name: str,
|
|
230
|
+
version: str,
|
|
231
|
+
split: str,
|
|
232
|
+
root: str | Path,
|
|
233
|
+
verify: bool = True,
|
|
234
|
+
):
|
|
235
|
+
dataset_dir = Path(root) / name / version
|
|
236
|
+
return load_local_dataset(dataset_dir=dataset_dir, split=split, verify=verify)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def describe_dataset(
|
|
240
|
+
name: str,
|
|
241
|
+
version: str,
|
|
242
|
+
root: str | Path,
|
|
243
|
+
):
|
|
244
|
+
dataset_dir = Path(root) / name / version
|
|
245
|
+
return load_manifest(dataset_dir / "manifest.json")
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def download_dataset(
|
|
249
|
+
name: str,
|
|
250
|
+
root: str | Path,
|
|
251
|
+
version: str | None = None,
|
|
252
|
+
force: bool = False,
|
|
253
|
+
):
|
|
254
|
+
root = Path(root)
|
|
255
|
+
resolved_version, archive_entry = _resolve_registry_entry(name, version)
|
|
256
|
+
dataset_dir = root / archive_entry["root_dir"]
|
|
257
|
+
manifest_path = dataset_dir / "manifest.json"
|
|
258
|
+
resolved_root = root.resolve()
|
|
259
|
+
resolved_dataset_dir = dataset_dir.resolve()
|
|
260
|
+
|
|
261
|
+
if manifest_path.exists() and not force:
|
|
262
|
+
logger.info(
|
|
263
|
+
"Dataset %s:%s already available at %s",
|
|
264
|
+
name,
|
|
265
|
+
resolved_version,
|
|
266
|
+
resolved_dataset_dir,
|
|
267
|
+
)
|
|
268
|
+
return load_manifest(manifest_path)
|
|
269
|
+
|
|
270
|
+
archive_name = archive_entry["archive_name"]
|
|
271
|
+
archive_path = root / archive_name
|
|
272
|
+
url = archive_entry["url"]
|
|
273
|
+
expected_sha256 = archive_entry["sha256"]
|
|
274
|
+
|
|
275
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
276
|
+
|
|
277
|
+
if dataset_dir.exists() and force:
|
|
278
|
+
logger.info(
|
|
279
|
+
"Removing existing dataset %s:%s from %s",
|
|
280
|
+
name,
|
|
281
|
+
resolved_version,
|
|
282
|
+
resolved_dataset_dir,
|
|
283
|
+
)
|
|
284
|
+
shutil.rmtree(dataset_dir)
|
|
285
|
+
|
|
286
|
+
logger.info("Download timeline for %s:%s", name, resolved_version)
|
|
287
|
+
logger.info("1/4 Resolve target directory: %s", resolved_root)
|
|
288
|
+
logger.info("2/4 Download archive from %s", url)
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
_download_file(url, archive_path)
|
|
292
|
+
|
|
293
|
+
logger.info("3/4 Verify archive checksum: %s", archive_path.resolve())
|
|
294
|
+
actual_sha256 = _sha256_hex(archive_path)
|
|
295
|
+
if actual_sha256 != expected_sha256:
|
|
296
|
+
raise ValueError(
|
|
297
|
+
"Downloaded archive checksum mismatch for "
|
|
298
|
+
f"{name}:{resolved_version}. Expected {expected_sha256}, got {actual_sha256}"
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
logger.info("4/4 Extract archive into %s", resolved_root)
|
|
302
|
+
_extract_archive(archive_path, root)
|
|
303
|
+
finally:
|
|
304
|
+
archive_path.unlink(missing_ok=True)
|
|
305
|
+
|
|
306
|
+
if not manifest_path.exists():
|
|
307
|
+
raise FileNotFoundError(
|
|
308
|
+
f"Dataset archive for {name}:{resolved_version} did not contain {manifest_path}"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
logger.info(
|
|
312
|
+
"Dataset %s:%s installed in %s",
|
|
313
|
+
name,
|
|
314
|
+
resolved_version,
|
|
315
|
+
resolved_dataset_dir,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
return load_manifest(manifest_path)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def list_datasets(root: str | Path | None = None) -> list[str]:
|
|
322
|
+
del root
|
|
323
|
+
registry = _load_registry()
|
|
324
|
+
return sorted(registry.keys())
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def list_versions(name: str, root: str | Path | None = None) -> list[str]:
|
|
328
|
+
del root
|
|
329
|
+
registry = _load_registry()
|
|
330
|
+
|
|
331
|
+
if name not in registry:
|
|
332
|
+
raise ValueError(f"Unknown dataset: {name}")
|
|
333
|
+
|
|
334
|
+
versions = registry[name].get("versions", {})
|
|
335
|
+
return sorted(versions.keys())
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def list_datasets_with_versions(root: str | Path | None = None) -> dict[str, list[str]]:
|
|
339
|
+
del root
|
|
340
|
+
registry = _load_registry()
|
|
341
|
+
return {
|
|
342
|
+
dataset_name: sorted(dataset_entry.get("versions", {}).keys())
|
|
343
|
+
for dataset_name, dataset_entry in sorted(registry.items())
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def list_registry_datasets(root: str | Path | None = None) -> list[dict]:
|
|
348
|
+
del root
|
|
349
|
+
registry = _load_registry()
|
|
350
|
+
result = []
|
|
351
|
+
|
|
352
|
+
for dataset_name, dataset_entry in sorted(registry.items()):
|
|
353
|
+
latest = dataset_entry["latest"]
|
|
354
|
+
latest_entry = dataset_entry.get("versions", {}).get(latest, {})
|
|
355
|
+
result.append(
|
|
356
|
+
{
|
|
357
|
+
"name": dataset_name,
|
|
358
|
+
"latest": latest,
|
|
359
|
+
"versions": sorted(dataset_entry.get("versions", {}).keys()),
|
|
360
|
+
"description": latest_entry.get("description", ""),
|
|
361
|
+
"size_bytes": latest_entry.get("size_bytes"),
|
|
362
|
+
"root_dir": latest_entry.get("root_dir"),
|
|
363
|
+
}
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
return result
|