agi-app-polars-execution 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. agi_app_polars_execution-0.1.0/LICENSE +22 -0
  2. agi_app_polars_execution-0.1.0/PKG-INFO +52 -0
  3. agi_app_polars_execution-0.1.0/README.md +22 -0
  4. agi_app_polars_execution-0.1.0/pyproject.toml +84 -0
  5. agi_app_polars_execution-0.1.0/setup.cfg +4 -0
  6. agi_app_polars_execution-0.1.0/setup.py +47 -0
  7. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/__init__.py +33 -0
  8. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/README.md +34 -0
  9. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/pyproject.toml +10 -0
  10. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/app_args_form.py +142 -0
  11. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/app_settings.toml +41 -0
  12. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/connectors/data_connectors.toml +27 -0
  13. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/execution_polars/__init__.py +10 -0
  14. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/execution_polars/app_args.py +83 -0
  15. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/execution_polars/execution_polars.py +187 -0
  16. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/execution_polars/execution_polars_args.py +3 -0
  17. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/execution_polars/reduction.py +191 -0
  18. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/execution_polars_worker/__init__.py +3 -0
  19. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/execution_polars_worker/execution_polars_worker.py +151 -0
  20. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/execution_polars_worker/pyproject.toml +10 -0
  21. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/src/pre_prompt.json +1 -0
  22. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution/project/execution_polars_project/uv_config.toml +5 -0
  23. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution.egg-info/PKG-INFO +52 -0
  24. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution.egg-info/SOURCES.txt +11 -0
  25. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution.egg-info/dependency_links.txt +1 -0
  26. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution.egg-info/entry_points.txt +3 -0
  27. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution.egg-info/requires.txt +1 -0
  28. agi_app_polars_execution-0.1.0/src/agi_app_polars_execution.egg-info/top_level.txt +1 -0
@@ -0,0 +1,22 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2025, Jean-Pierre Morard, THALES SIX GTS France SAS
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
7
+ following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
10
+ disclaimer.
11
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
12
+ disclaimer in the documentation and/or other materials provided with the distribution.
13
+ 3. Neither the name of Jean-Pierre MORARD nor the names of its contributors, or THALES SIX GTS France SAS, may be used
14
+ to endorse or promote products derived from this software without specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
17
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.4
2
+ Name: agi-app-polars-execution
3
+ Version: 0.1.0
4
+ Summary: AGILAB Polars execution app project
5
+ Author: Jean-Pierre Morard
6
+ Maintainer: Jean-Pierre Morard
7
+ License-Expression: BSD-3-Clause
8
+ Project-URL: Documentation, https://thalesgroup.github.io/agilab
9
+ Project-URL: Source, https://github.com/ThalesGroup/agilab/tree/main/src/agilab/lib/agi-app-polars-execution
10
+ Project-URL: Issues, https://github.com/ThalesGroup/agilab/issues
11
+ Project-URL: Homepage, https://github.com/ThalesGroup/agilab
12
+ Project-URL: Repository, https://github.com/ThalesGroup/agilab
13
+ Project-URL: Discussions, https://github.com/ThalesGroup/agilab/discussions
14
+ Project-URL: Changelog, https://github.com/ThalesGroup/agilab/releases
15
+ Keywords: agilab,apps,reproducibility,workflow-orchestration
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Development Status :: 4 - Beta
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Operating System :: MacOS
23
+ Classifier: Operating System :: Microsoft :: Windows
24
+ Classifier: Operating System :: POSIX :: Linux
25
+ Requires-Python: >=3.11
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: agi-core<2027.0,>=2026.05.13
29
+ Dynamic: license-file
30
+
31
+ # agi-app-polars-execution
32
+
33
+ [![PyPI version](https://img.shields.io/pypi/v/agi-app-polars-execution.svg?cacheSeconds=300)](https://pypi.org/project/agi-app-polars-execution/)
34
+ [![Python versions](https://img.shields.io/pypi/pyversions/agi-app-polars-execution.svg)](https://pypi.org/project/agi-app-polars-execution/)
35
+ [![License: BSD 3-Clause](https://img.shields.io/pypi/l/agi-app-polars-execution)](https://opensource.org/licenses/BSD-3-Clause)
36
+
37
+ `agi-app-polars-execution` publishes the `execution_polars_project` AGILAB app
38
+ project as a self-contained payload. The distribution name is PyPI-facing; the
39
+ installed AGILAB project name remains `execution_polars_project`.
40
+
41
+ The package advertises the project through the `agilab.apps` entry point group
42
+ so `AgiEnv(app="execution_polars_project")` can resolve it without a monorepo
43
+ checkout.
44
+
45
+ ## Install
46
+
47
+ ```bash
48
+ pip install agi-app-polars-execution
49
+ ```
50
+
51
+ Most users install these app packages through the umbrella `agi-apps` package or
52
+ through `agilab[ui]` / `agilab[examples]`.
@@ -0,0 +1,22 @@
1
+ # agi-app-polars-execution
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/agi-app-polars-execution.svg?cacheSeconds=300)](https://pypi.org/project/agi-app-polars-execution/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/agi-app-polars-execution.svg)](https://pypi.org/project/agi-app-polars-execution/)
5
+ [![License: BSD 3-Clause](https://img.shields.io/pypi/l/agi-app-polars-execution)](https://opensource.org/licenses/BSD-3-Clause)
6
+
7
+ `agi-app-polars-execution` publishes the `execution_polars_project` AGILAB app
8
+ project as a self-contained payload. The distribution name is PyPI-facing; the
9
+ installed AGILAB project name remains `execution_polars_project`.
10
+
11
+ The package advertises the project through the `agilab.apps` entry point group
12
+ so `AgiEnv(app="execution_polars_project")` can resolve it without a monorepo
13
+ checkout.
14
+
15
+ ## Install
16
+
17
+ ```bash
18
+ pip install agi-app-polars-execution
19
+ ```
20
+
21
+ Most users install these app packages through the umbrella `agi-apps` package or
22
+ through `agilab[ui]` / `agilab[examples]`.
@@ -0,0 +1,84 @@
1
+ [project]
2
+ version = "0.1.0"
3
+ name = "agi-app-polars-execution"
4
+ description = "AGILAB Polars execution app project"
5
+ requires-python = ">=3.11"
6
+ readme = "README.md"
7
+ authors = [
8
+ { name = "Jean-Pierre Morard" }
9
+ ]
10
+ maintainers = [{ name = "Jean-Pierre Morard" }]
11
+ license = "BSD-3-Clause"
12
+ license-files = ["LICENSE"]
13
+
14
+ classifiers = [
15
+ "Intended Audience :: Developers",
16
+ "Development Status :: 4 - Beta",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Operating System :: MacOS",
22
+ "Operating System :: Microsoft :: Windows",
23
+ "Operating System :: POSIX :: Linux",
24
+ ]
25
+
26
+ keywords = [
27
+ "agilab",
28
+ "apps",
29
+ "reproducibility",
30
+ "workflow-orchestration",
31
+ ]
32
+
33
+ dependencies = ["agi-core>=2026.05.13,<2027.0"]
34
+
35
+ [project.urls]
36
+ Documentation = "https://thalesgroup.github.io/agilab"
37
+ Source = "https://github.com/ThalesGroup/agilab/tree/main/src/agilab/lib/agi-app-polars-execution"
38
+ Issues = "https://github.com/ThalesGroup/agilab/issues"
39
+ Homepage = "https://github.com/ThalesGroup/agilab"
40
+ Repository = "https://github.com/ThalesGroup/agilab"
41
+ Discussions = "https://github.com/ThalesGroup/agilab/discussions"
42
+ Changelog = "https://github.com/ThalesGroup/agilab/releases"
43
+
44
+ [project.entry-points."agilab.apps"]
45
+ execution_polars = "agi_app_polars_execution:project_root"
46
+ execution_polars_project = "agi_app_polars_execution:project_root"
47
+
48
+ [dependency-groups]
49
+ dev = [
50
+ "pytest",
51
+ ]
52
+
53
+ [tool.uv.sources.agi-core]
54
+ path = "../../core/agi-core"
55
+ editable = true
56
+
57
+ [build-system]
58
+ requires = ["setuptools>=68", "wheel"]
59
+ build-backend = "setuptools.build_meta"
60
+
61
+ [tool.setuptools]
62
+ include-package-data = false
63
+ package-dir = {"" = "src"}
64
+ packages = ["agi_app_polars_execution"]
65
+
66
+ [tool.setuptools.package-data]
67
+ "agi_app_polars_execution" = [
68
+ "project/**/*",
69
+ ]
70
+
71
+ [tool.setuptools.exclude-package-data]
72
+ "agi_app_polars_execution" = [
73
+ "project/**/.venv/**",
74
+ "project/**/__pycache__/**",
75
+ "project/**/*.pyc",
76
+ "project/**/*.pyo",
77
+ "project/**/*.pyx",
78
+ "project/**/*.c",
79
+ "project/**/*.so",
80
+ "project/**/uv.lock",
81
+ ]
82
+
83
+ [tool.pytest.ini_options]
84
+ testpaths = ["test"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib.util
4
+ from pathlib import Path
5
+
6
+ from setuptools import setup
7
+ from setuptools.command.build_py import build_py as _build_py
8
+ from setuptools.command.sdist import sdist as _sdist
9
+
10
+ APP_PROJECT = 'execution_polars_project'
11
+ PACKAGE_IMPORT = 'agi_app_polars_execution'
12
+
13
+
14
+ def _load_build_support():
15
+ module_path = Path(__file__).resolve().parents[4] / "src" / "agilab" / "lib" / "app_project_build_support.py"
16
+ if not module_path.exists():
17
+ return None
18
+ spec = importlib.util.spec_from_file_location("agilab_app_project_build_support", module_path)
19
+ if spec is None or spec.loader is None:
20
+ raise RuntimeError(f"Unable to load app project build support from {module_path}")
21
+ module = importlib.util.module_from_spec(spec)
22
+ spec.loader.exec_module(module)
23
+ return module
24
+
25
+
26
+ def _copy_payload(target_root: Path) -> None:
27
+ support = _load_build_support()
28
+ if support is None:
29
+ return
30
+ changed = support.copy_app_project_payload(APP_PROJECT, target_root)
31
+ for pyproject_path in changed:
32
+ print(f"[{PACKAGE_IMPORT}] sanitized packaged app manifest: {pyproject_path}")
33
+
34
+
35
+ class build_py(_build_py):
36
+ def run(self):
37
+ super().run()
38
+ _copy_payload(Path(self.build_lib) / PACKAGE_IMPORT / "project")
39
+
40
+
41
+ class sdist(_sdist):
42
+ def make_release_tree(self, base_dir, files):
43
+ super().make_release_tree(base_dir, files)
44
+ _copy_payload(Path(base_dir) / "src" / PACKAGE_IMPORT / "project")
45
+
46
+
47
+ setup(cmdclass={"build_py": build_py, "sdist": sdist})
@@ -0,0 +1,33 @@
1
+ """Installed AGILAB app project provider for execution_polars_project."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ APP_SLUG = 'execution_polars'
8
+ PROJECT_NAME = 'execution_polars_project'
9
+ PACKAGE_NAME = 'agi-app-polars-execution'
10
+
11
+
12
+ def package_root() -> Path:
13
+ return Path(__file__).resolve().parent
14
+
15
+
16
+ def project_root() -> Path:
17
+ packaged_root = package_root() / "project" / PROJECT_NAME
18
+ if packaged_root.exists():
19
+ return packaged_root
20
+ source_root = Path(__file__).resolve().parents[4] / "apps" / "builtin" / PROJECT_NAME
21
+ return source_root if source_root.exists() else packaged_root
22
+
23
+
24
+ def metadata() -> dict[str, str]:
25
+ return {
26
+ "slug": APP_SLUG,
27
+ "project": PROJECT_NAME,
28
+ "package": PACKAGE_NAME,
29
+ "project_root": str(project_root()),
30
+ }
31
+
32
+
33
+ __all__ = ["APP_SLUG", "PACKAGE_NAME", "PROJECT_NAME", "metadata", "package_root", "project_root"]
@@ -0,0 +1,34 @@
1
+ # Execution Polars Project
2
+
3
+ `execution_polars_project` is a built-in AGILAB execution playground for the
4
+ `PolarsWorker` path.
5
+
6
+ The app uses the same deterministic workload shape as
7
+ `execution_pandas_project`, then processes the partitions with polars so the two
8
+ execution paths can be compared without changing the benchmark input.
9
+
10
+ ## What It Shows
11
+
12
+ - deterministic first-run dataset generation under shared storage
13
+ - worker distribution over multiple CSV partitions
14
+ - polars-based compute and aggregation through the AGILAB worker contract
15
+ - parity outputs for comparing execution modes, engines, and reducer behavior
16
+
17
+ ## Typical Flow
18
+
19
+ 1. Select `execution_polars_project` in `PROJECT`.
20
+ 2. Run `INSTALL` from `ORCHESTRATE`.
21
+ 3. Run `EXECUTE` with the default settings or adjusted partition counts.
22
+ 4. Inspect generated CSV or Parquet outputs under `execution_polars/results`.
23
+
24
+ ## Outputs
25
+
26
+ Each worker writes an output file under the run results directory. The reducer
27
+ also emits a summary artifact that records row counts, score metrics, source
28
+ files, and the execution engine label.
29
+
30
+ ## Scope
31
+
32
+ This app is intentionally synthetic. It is useful for validating AGILAB
33
+ execution behavior and comparing worker paths, not for demonstrating a
34
+ domain-specific analytics workflow.
@@ -0,0 +1,10 @@
1
+ [project]
2
+ name = "execution_polars_project"
3
+ version = "0.1.0"
4
+ description = "Built-in AGILab execution playground using PolarsWorker"
5
+ requires-python = ">=3.11"
6
+ dependencies = ["agi-env>=2026.05.13.post3,<2027.0", "agi-node>=2026.05.13.post3,<2027.0", "agi-cluster>=2026.05.13.post3,<2027.0", "polars", "pydantic", "streamlit>=1.56.0"]
7
+
8
+ [build-system]
9
+ requires = ["setuptools"]
10
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,142 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import sys
5
+ from typing import Any
6
+
7
+ import streamlit as st
8
+ from pydantic import ValidationError
9
+
10
+ _HERE = Path(__file__).resolve().parent
11
+ if str(_HERE) not in sys.path:
12
+ sys.path.insert(0, str(_HERE))
13
+
14
+ from execution_polars.app_args import ExecutionPolarsArgs, dump_args, load_args
15
+
16
+
17
+ PAGE_ID = "execution_polars_project:app_args_form"
18
+
19
+
20
+ def _k(name: str) -> str:
21
+ return f"{PAGE_ID}:{name}"
22
+
23
+
24
+ def _get_env():
25
+ env = st.session_state.get("env") or st.session_state.get("_env")
26
+ if env is None:
27
+ st.error("AGILab environment is not initialised yet. Return to the main page and try again.")
28
+ st.stop()
29
+ return env
30
+
31
+
32
+ def _load_current_args(settings_path: Path) -> ExecutionPolarsArgs:
33
+ try:
34
+ return load_args(settings_path)
35
+ except Exception as exc:
36
+ st.warning(f"Unable to load Execution Polars args from `{settings_path}`: {exc}")
37
+ return ExecutionPolarsArgs()
38
+
39
+
40
+ def _safe_rows_per_partition(rows_per_file: int, n_partitions: int) -> int:
41
+ if n_partitions <= 0:
42
+ return 0
43
+ return rows_per_file // n_partitions
44
+
45
+
46
+ env = _get_env()
47
+ settings_path = Path(env.app_settings_file)
48
+ current_args = _load_current_args(settings_path)
49
+ current_payload = current_args.model_dump(mode="json")
50
+
51
+ st.caption(
52
+ "Execution Polars generates a synthetic CSV dataset and runs the distributed Polars worker path. "
53
+ "Use this form to size the playground workload before EXECUTE."
54
+ )
55
+
56
+ for key, default in (
57
+ ("data_in", str(current_payload.get("data_in", "execution_playground/dataset") or "execution_playground/dataset")),
58
+ ("data_out", str(current_payload.get("data_out", "execution_polars/results") or "execution_polars/results")),
59
+ ("files", str(current_payload.get("files", "*.csv") or "*.csv")),
60
+ ("nfile", int(current_payload.get("nfile", 16) or 16)),
61
+ ("n_partitions", int(current_payload.get("n_partitions", 16) or 16)),
62
+ ("rows_per_file", int(current_payload.get("rows_per_file", 100_000) or 100_000)),
63
+ ("n_groups", int(current_payload.get("n_groups", 32) or 32)),
64
+ ("compute_passes", int(current_payload.get("compute_passes", 32) or 32)),
65
+ ("output_format", str(current_payload.get("output_format", "csv") or "csv")),
66
+ ("seed", int(current_payload.get("seed", 42) or 42)),
67
+ ("reset_target", bool(current_payload.get("reset_target", False))),
68
+ ):
69
+ st.session_state.setdefault(_k(key), default)
70
+
71
+ c1, c2, c3 = st.columns([2, 2, 1.2])
72
+ with c1:
73
+ st.text_input("Dataset directory", key=_k("data_in"))
74
+ with c2:
75
+ st.text_input("Results directory", key=_k("data_out"))
76
+ with c3:
77
+ st.selectbox("Output format", options=["csv", "parquet"], key=_k("output_format"))
78
+
79
+ c4, c5, c6, c7 = st.columns([1.2, 1.2, 1.2, 1.2])
80
+ with c4:
81
+ st.text_input("Files glob", key=_k("files"))
82
+ with c5:
83
+ st.number_input("Files", key=_k("nfile"), min_value=1, step=1)
84
+ with c6:
85
+ st.number_input("Partitions", key=_k("n_partitions"), min_value=1, step=1)
86
+ with c7:
87
+ st.number_input("Rows / file", key=_k("rows_per_file"), min_value=1, step=10_000)
88
+
89
+ c8, c9, c10, c11 = st.columns([1.2, 1.2, 1.2, 1.2])
90
+ with c8:
91
+ st.number_input("Groups", key=_k("n_groups"), min_value=1, step=1)
92
+ with c9:
93
+ st.number_input("Compute passes", key=_k("compute_passes"), min_value=1, step=1)
94
+ with c10:
95
+ st.number_input("Seed", key=_k("seed"), min_value=0, step=1)
96
+ with c11:
97
+ st.checkbox("Reset output", key=_k("reset_target"))
98
+
99
+ candidate: dict[str, Any] = {
100
+ "data_in": (st.session_state.get(_k("data_in")) or "").strip(),
101
+ "data_out": (st.session_state.get(_k("data_out")) or "").strip(),
102
+ "files": (st.session_state.get(_k("files")) or "*.csv").strip() or "*.csv",
103
+ "nfile": st.session_state.get(_k("nfile"), 16),
104
+ "n_partitions": st.session_state.get(_k("n_partitions"), 16),
105
+ "rows_per_file": st.session_state.get(_k("rows_per_file"), 100_000),
106
+ "n_groups": st.session_state.get(_k("n_groups"), 32),
107
+ "compute_passes": st.session_state.get(_k("compute_passes"), 32),
108
+ "output_format": st.session_state.get(_k("output_format")) or "csv",
109
+ "seed": st.session_state.get(_k("seed"), 42),
110
+ "reset_target": bool(st.session_state.get(_k("reset_target"), False)),
111
+ }
112
+
113
+ try:
114
+ validated = ExecutionPolarsArgs(**candidate)
115
+ except ValidationError as exc:
116
+ st.error("Invalid Execution Polars parameters:")
117
+ if hasattr(env, "humanize_validation_errors"):
118
+ for msg in env.humanize_validation_errors(exc):
119
+ st.markdown(msg)
120
+ else:
121
+ st.code(str(exc))
122
+ else:
123
+ validated_payload = validated.model_dump(mode="json")
124
+ if validated_payload != current_payload:
125
+ dump_args(validated, settings_path)
126
+ app_settings = st.session_state.get("app_settings")
127
+ if not isinstance(app_settings, dict):
128
+ app_settings = {}
129
+ app_settings.setdefault("cluster", {})
130
+ app_settings["args"] = validated_payload
131
+ st.session_state["app_settings"] = app_settings
132
+ st.session_state["is_args_from_ui"] = True
133
+ st.success(f"Saved to `{settings_path}`.")
134
+ else:
135
+ st.info("No changes to save.")
136
+
137
+ total_rows = validated.nfile * validated.rows_per_file
138
+ rows_per_partition = _safe_rows_per_partition(validated.rows_per_file, validated.n_partitions)
139
+ st.caption(
140
+ f"Planned workload: `{validated.nfile}` files, about `{total_rows:,}` rows total, "
141
+ f"`{validated.n_partitions}` partitions per file, about `{rows_per_partition:,}` rows per partition."
142
+ )
@@ -0,0 +1,41 @@
1
+ [args]
2
+ data_in = "execution_playground/dataset"
3
+ data_out = "execution_polars/results"
4
+ files = "*.csv"
5
+ nfile = 16
6
+ n_partitions = 16
7
+ rows_per_file = 100000
8
+ n_groups = 32
9
+ compute_passes = 32
10
+ output_format = "csv"
11
+ seed = 42
12
+ reset_target = false
13
+
14
+ [cluster]
15
+ verbose = 1
16
+ cython = false
17
+ pool = false
18
+ rapids = false
19
+ cluster_enabled = false
20
+ scheduler = "127.0.0.1:8786"
21
+ workers_data_path = ""
22
+
23
+ [cluster.workers]
24
+ "127.0.0.1" = 2
25
+
26
+ [cluster.service_health]
27
+ allow_idle = false
28
+ max_unhealthy = 0
29
+ max_restart_rate = 0.25
30
+
31
+ [connector_catalog]
32
+ path = "connectors/data_connectors.toml"
33
+
34
+ [connector_refs]
35
+ benchmark_sql = "execution_polars_sql"
36
+ operator_events = "execution_polars_ops_opensearch"
37
+ artifact_store = "execution_polars_artifact_store"
38
+
39
+ [legacy_paths]
40
+ data_in = "execution_playground/dataset"
41
+ data_out = "execution_polars/results"
@@ -0,0 +1,27 @@
1
+ [[connectors]]
2
+ id = "execution_polars_sql"
3
+ kind = "sql"
4
+ label = "Execution Polars SQL"
5
+ description = "Read-only benchmark input table for polars execution experiments."
6
+ uri = "postgresql://execution.example.invalid/polars"
7
+ driver = "postgresql"
8
+ query_mode = "read_only"
9
+
10
+ [[connectors]]
11
+ id = "execution_polars_ops_opensearch"
12
+ kind = "opensearch"
13
+ label = "Execution Polars Operations OpenSearch"
14
+ description = "Execution benchmark run evidence index."
15
+ url = "https://opensearch.example.invalid"
16
+ index = "agilab-execution-polars-*"
17
+ auth_ref = "env:OPENSEARCH_TOKEN"
18
+
19
+ [[connectors]]
20
+ id = "execution_polars_artifact_store"
21
+ kind = "object_storage"
22
+ label = "Execution Polars Artifact Store"
23
+ description = "Object-storage prefix for polars benchmark artifacts."
24
+ provider = "s3"
25
+ bucket = "agilab-artifacts"
26
+ prefix = "execution_polars/"
27
+ auth_ref = "env:AWS_PROFILE"
@@ -0,0 +1,10 @@
1
+ from .app_args import ExecutionPolarsArgs
2
+ from .execution_polars import ExecutionPolars, ExecutionPolarsApp
3
+ from .reduction import EXECUTION_POLARS_REDUCE_CONTRACT
4
+
5
+ __all__ = [
6
+ "EXECUTION_POLARS_REDUCE_CONTRACT",
7
+ "ExecutionPolars",
8
+ "ExecutionPolarsApp",
9
+ "ExecutionPolarsArgs",
10
+ ]
@@ -0,0 +1,83 @@
1
+ """Argument helpers for the execution playground polars app."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any, Literal, TypedDict
7
+
8
+ from pydantic import BaseModel, ConfigDict, Field
9
+
10
+ from agi_env.app_args import dump_model_to_toml, load_model_from_toml, merge_model_data
11
+
12
+
13
+ class ExecutionPolarsArgs(BaseModel):
14
+ """Runtime parameters for the polars execution playground."""
15
+
16
+ model_config = ConfigDict(extra="forbid")
17
+
18
+ data_in: Path = Field(default_factory=lambda: Path("execution_playground/dataset"))
19
+ data_out: Path = Field(default_factory=lambda: Path("execution_polars/results"))
20
+ files: str = "*.csv"
21
+ nfile: int = 16
22
+ n_partitions: int = 16
23
+ rows_per_file: int = 100_000
24
+ n_groups: int = 32
25
+ compute_passes: int = 32
26
+ output_format: Literal["csv", "parquet"] = "csv"
27
+ seed: int = 42
28
+ reset_target: bool = False
29
+
30
+
31
+ class ExecutionPolarsArgsTD(TypedDict, total=False):
32
+ data_in: str
33
+ data_out: str
34
+ files: str
35
+ nfile: int
36
+ n_partitions: int
37
+ rows_per_file: int
38
+ n_groups: int
39
+ compute_passes: int
40
+ output_format: str
41
+ seed: int
42
+ reset_target: bool
43
+
44
+
45
+ ArgsModel = ExecutionPolarsArgs
46
+ ArgsOverrides = ExecutionPolarsArgsTD
47
+
48
+
49
+ def load_args(settings_path: str | Path, *, section: str = "args") -> ExecutionPolarsArgs:
50
+ return load_model_from_toml(ExecutionPolarsArgs, settings_path, section=section)
51
+
52
+
53
+ def merge_args(
54
+ base: ExecutionPolarsArgs,
55
+ overrides: ExecutionPolarsArgsTD | None = None,
56
+ ) -> ExecutionPolarsArgs:
57
+ return merge_model_data(base, overrides)
58
+
59
+
60
+ def dump_args(
61
+ args: ExecutionPolarsArgs,
62
+ settings_path: str | Path,
63
+ *,
64
+ section: str = "args",
65
+ create_missing: bool = True,
66
+ ) -> None:
67
+ dump_model_to_toml(args, settings_path, section=section, create_missing=create_missing)
68
+
69
+
70
+ def ensure_defaults(args: ExecutionPolarsArgs, **_: Any) -> ExecutionPolarsArgs:
71
+ return args
72
+
73
+
74
+ __all__ = [
75
+ "ArgsModel",
76
+ "ArgsOverrides",
77
+ "ExecutionPolarsArgs",
78
+ "ExecutionPolarsArgsTD",
79
+ "dump_args",
80
+ "ensure_defaults",
81
+ "load_args",
82
+ "merge_args",
83
+ ]
@@ -0,0 +1,187 @@
1
+ """Execution playground manager for the polars worker path."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ import json
7
+ import logging
8
+ import math
9
+ import random
10
+ import shutil
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from pydantic import ValidationError
15
+
16
+ from agi_node.agi_dispatcher import BaseWorker, WorkDispatcher
17
+
18
+ from .app_args import (
19
+ ArgsOverrides,
20
+ ExecutionPolarsArgs,
21
+ dump_args,
22
+ ensure_defaults,
23
+ load_args,
24
+ merge_args,
25
+ )
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class ExecutionPolars(BaseWorker):
31
+ """AGILab manager that generates the same workload as the pandas variant."""
32
+
33
+ worker_vars: dict[str, Any] = {}
34
+
35
+ def __init__(
36
+ self,
37
+ env,
38
+ args: ExecutionPolarsArgs | None = None,
39
+ **kwargs: ArgsOverrides,
40
+ ) -> None:
41
+ self.env = env
42
+ self._ensure_managed_pc_share_dir(env)
43
+ self.verbose = int(kwargs.pop("verbose", getattr(env, "verbose", 0) or 0))
44
+
45
+ if args is None:
46
+ try:
47
+ args = ExecutionPolarsArgs(**kwargs)
48
+ except ValidationError as exc:
49
+ raise ValueError(f"Invalid ExecutionPolars arguments: {exc}") from exc
50
+
51
+ self.args = ensure_defaults(args, env=env)
52
+ self.args = self._apply_managed_pc_paths(self.args)
53
+ self.args.data_in = env.resolve_share_path(self.args.data_in)
54
+ self.args.data_out = env.resolve_share_path(self.args.data_out)
55
+ self.data_out = self.args.data_out
56
+
57
+ self.args.data_in.mkdir(parents=True, exist_ok=True)
58
+ self._ensure_dataset(self.args.data_in)
59
+
60
+ if self.args.reset_target and self.data_out.exists():
61
+ shutil.rmtree(self.data_out, ignore_errors=True, onerror=WorkDispatcher._onerror)
62
+ self.data_out.mkdir(parents=True, exist_ok=True)
63
+
64
+ WorkDispatcher.args = self.args.model_dump(mode="json")
65
+
66
+ @classmethod
67
+ def from_toml(
68
+ cls,
69
+ env,
70
+ settings_path: str | Path = "app_settings.toml",
71
+ section: str = "args",
72
+ **overrides: ArgsOverrides,
73
+ ) -> "ExecutionPolars":
74
+ base = load_args(settings_path, section=section)
75
+ merged = ensure_defaults(merge_args(base, overrides or None), env=env)
76
+ return cls(env, args=merged)
77
+
78
+ def to_toml(
79
+ self,
80
+ settings_path: str | Path = "app_settings.toml",
81
+ section: str = "args",
82
+ create_missing: bool = True,
83
+ ) -> None:
84
+ dump_args(self.args, settings_path, section=section, create_missing=create_missing)
85
+
86
+ def as_dict(self) -> dict[str, Any]:
87
+ return self.args.model_dump(mode="json")
88
+
89
+ def _manifest_path(self, data_in: Path) -> Path:
90
+ return data_in / "_execution_playground_manifest.json"
91
+
92
+ def _dataset_manifest(self) -> dict[str, int]:
93
+ return {
94
+ "n_partitions": int(self.args.n_partitions),
95
+ "rows_per_file": int(self.args.rows_per_file),
96
+ "n_groups": int(self.args.n_groups),
97
+ "seed": int(self.args.seed),
98
+ }
99
+
100
+ def _ensure_dataset(self, data_in: Path) -> None:
101
+ manifest_path = self._manifest_path(data_in)
102
+ expected = self._dataset_manifest()
103
+ existing_files = sorted(data_in.glob(self.args.files))
104
+ regenerate = not existing_files
105
+
106
+ if manifest_path.exists():
107
+ try:
108
+ current = json.loads(manifest_path.read_text(encoding="utf-8"))
109
+ except Exception:
110
+ current = {}
111
+ regenerate = regenerate or current != expected
112
+ else:
113
+ regenerate = True
114
+
115
+ if not regenerate:
116
+ return
117
+
118
+ for candidate in data_in.glob("*.csv"):
119
+ candidate.unlink()
120
+
121
+ segments = ("alpha", "beta", "gamma", "delta")
122
+ for partition_idx in range(self.args.n_partitions):
123
+ rng = random.Random(self.args.seed + partition_idx)
124
+ output = data_in / f"part_{partition_idx:02d}.csv"
125
+ with output.open("w", newline="", encoding="utf-8") as stream:
126
+ writer = csv.writer(stream)
127
+ writer.writerow(
128
+ ["row_id", "group_id", "bucket", "segment", "x", "y", "signal", "weight"]
129
+ )
130
+ for row_idx in range(self.args.rows_per_file):
131
+ group_id = row_idx % self.args.n_groups
132
+ bucket = (group_id + partition_idx) % 8
133
+ segment = segments[group_id % len(segments)]
134
+ x = round(rng.random() * 100.0 + group_id * 0.3, 6)
135
+ y = round(rng.random() * 50.0 + bucket * 0.7 + math.sin(row_idx / 17.0), 6)
136
+ signal = round(((row_idx % 97) - 48) * 0.15 + rng.random() * 0.25, 6)
137
+ weight = round(1.0 + (row_idx % 11) * 0.05 + partition_idx * 0.01, 6)
138
+ writer.writerow(
139
+ [f"{partition_idx}-{row_idx}", group_id, bucket, segment, x, y, signal, weight]
140
+ )
141
+
142
+ manifest_path.write_text(json.dumps(expected, indent=2), encoding="utf-8")
143
+
144
+ def build_distribution(self, workers):
145
+ files = sorted(self.args.data_in.glob(self.args.files))
146
+ if self.args.nfile > 0:
147
+ files = files[: self.args.nfile]
148
+ if not files:
149
+ raise FileNotFoundError(
150
+ f"No workload files found in {self.args.data_in} with pattern {self.args.files!r}"
151
+ )
152
+
153
+ weights = [(str(path), max(int(path.stat().st_size // 1024), 1)) for path in files]
154
+ if len(weights) == 1:
155
+ worker_chunks = [[weights[0]]]
156
+ else:
157
+ worker_chunks = WorkDispatcher.make_chunks(
158
+ len(weights),
159
+ weights,
160
+ workers=workers,
161
+ verbose=self.verbose,
162
+ threshold=12,
163
+ )
164
+
165
+ work_plan = []
166
+ work_plan_metadata = []
167
+ for chunk in worker_chunks:
168
+ file_batch = [file_path for file_path, _ in chunk]
169
+ total_size_kb = sum(size_kb for _, size_kb in chunk)
170
+ batch_label = (
171
+ Path(file_batch[0]).name
172
+ if len(file_batch) == 1
173
+ else f"{len(file_batch)} files"
174
+ )
175
+ work_plan.append([file_batch])
176
+ work_plan_metadata.append(
177
+ [{"file": batch_label, "size_kb": total_size_kb}]
178
+ )
179
+
180
+ return work_plan, work_plan_metadata, "file", "size_kb", "KB"
181
+
182
+
183
+ class ExecutionPolarsApp(ExecutionPolars):
184
+ """Compatibility alias retaining the historic *App suffix."""
185
+
186
+
187
+ __all__ = ["ExecutionPolars", "ExecutionPolarsApp"]
@@ -0,0 +1,3 @@
1
+ """Compatibility wrapper for older imports."""
2
+
3
+ from .app_args import * # noqa: F401,F403
@@ -0,0 +1,191 @@
1
+ """Reduce-contract adoption for the built-in polars execution app."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from collections.abc import Sequence
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import polars as pl
11
+
12
+ from agi_node.reduction import (
13
+ ReduceArtifact,
14
+ ReduceContract,
15
+ ReducePartial,
16
+ require_payload_keys,
17
+ )
18
+
19
+
20
+ REDUCE_ARTIFACT_FILENAME_TEMPLATE = "reduce_summary_worker_{worker_id}.json"
21
+ REDUCE_ARTIFACT_NAME = "execution_polars_reduce_summary"
22
+ REDUCER_NAME = "execution_polars.weighted-score.v1"
23
+
24
+ _REQUIRED_FRAME_COLUMNS = frozenset(
25
+ {
26
+ "row_count",
27
+ "x_sum",
28
+ "weight_sum",
29
+ "weighted_score",
30
+ "python_tail_checksum",
31
+ "source_file",
32
+ "engine",
33
+ "execution_model",
34
+ }
35
+ )
36
+
37
+ _REQUIRED_PAYLOAD_KEYS = (
38
+ "row_count",
39
+ "result_rows",
40
+ "source_file_count",
41
+ "x_sum",
42
+ "weight_sum",
43
+ "weighted_score_sum",
44
+ "python_tail_checksum",
45
+ "source_files",
46
+ "engines",
47
+ "execution_models",
48
+ )
49
+
50
+
51
+ def _sorted_unique_strings(values: pl.Series) -> list[str]:
52
+ return sorted({str(value) for value in values.drop_nulls().to_list()})
53
+
54
+
55
+ def _merge_execution_polars_partials(partials: Sequence[ReducePartial]) -> dict[str, Any]:
56
+ source_files: set[str] = set()
57
+ engines: set[str] = set()
58
+ execution_models: set[str] = set()
59
+ payload: dict[str, Any] = {
60
+ "row_count": 0,
61
+ "result_rows": 0,
62
+ "source_file_count": 0,
63
+ "x_sum": 0.0,
64
+ "weight_sum": 0.0,
65
+ "weighted_score_sum": 0.0,
66
+ "python_tail_checksum": 0.0,
67
+ }
68
+
69
+ for partial in partials:
70
+ partial_payload = partial.payload
71
+ payload["row_count"] += int(partial_payload["row_count"])
72
+ payload["result_rows"] += int(partial_payload["result_rows"])
73
+ payload["x_sum"] += float(partial_payload["x_sum"])
74
+ payload["weight_sum"] += float(partial_payload["weight_sum"])
75
+ payload["weighted_score_sum"] += float(partial_payload["weighted_score_sum"])
76
+ payload["python_tail_checksum"] += float(partial_payload["python_tail_checksum"])
77
+ source_files.update(str(item) for item in partial_payload["source_files"])
78
+ engines.update(str(item) for item in partial_payload["engines"])
79
+ execution_models.update(str(item) for item in partial_payload["execution_models"])
80
+
81
+ payload["source_files"] = sorted(source_files)
82
+ payload["source_file_count"] = len(source_files)
83
+ payload["engines"] = sorted(engines)
84
+ payload["execution_models"] = sorted(execution_models)
85
+ return payload
86
+
87
+
88
+ def _validate_execution_polars_artifact(artifact: ReduceArtifact) -> None:
89
+ payload = artifact.payload
90
+ if int(payload["row_count"]) <= 0:
91
+ raise ValueError("execution_polars reducer produced no source rows")
92
+ if int(payload["source_file_count"]) <= 0:
93
+ raise ValueError("execution_polars reducer produced no source files")
94
+ if not payload["engines"]:
95
+ raise ValueError("execution_polars reducer produced no engine metadata")
96
+
97
+
98
+ EXECUTION_POLARS_REDUCE_CONTRACT = ReduceContract(
99
+ name=REDUCER_NAME,
100
+ artifact_name=REDUCE_ARTIFACT_NAME,
101
+ merge=_merge_execution_polars_partials,
102
+ validate_partial=require_payload_keys(*_REQUIRED_PAYLOAD_KEYS),
103
+ validate_artifact=_validate_execution_polars_artifact,
104
+ metadata={
105
+ "app": "execution_polars_project",
106
+ "engine": "polars",
107
+ "scope": "worker-result",
108
+ },
109
+ )
110
+
111
+
112
+ def reduce_artifact_path(output_dir: Path | str, worker_id: int | str) -> Path:
113
+ filename = REDUCE_ARTIFACT_FILENAME_TEMPLATE.format(worker_id=worker_id)
114
+ return Path(output_dir) / filename
115
+
116
+
117
+ def partial_from_result_frame(
118
+ df: pl.DataFrame,
119
+ *,
120
+ partial_id: str,
121
+ artifact_path: Path | str | None = None,
122
+ metadata: dict[str, Any] | None = None,
123
+ ) -> ReducePartial:
124
+ if df is None or df.is_empty():
125
+ raise ValueError("execution_polars reducer requires a non-empty result frame")
126
+
127
+ missing = sorted(_REQUIRED_FRAME_COLUMNS.difference(df.columns))
128
+ if missing:
129
+ raise ValueError(f"execution_polars result frame missing columns: {', '.join(missing)}")
130
+
131
+ source_files = _sorted_unique_strings(df["source_file"])
132
+ checksum_by_file = (
133
+ df.select(["source_file", "python_tail_checksum"])
134
+ .unique(subset=["source_file"], keep="first")
135
+ .get_column("python_tail_checksum")
136
+ )
137
+ payload = {
138
+ "row_count": int(df["row_count"].sum()),
139
+ "result_rows": int(df.height),
140
+ "source_file_count": len(source_files),
141
+ "x_sum": float(df["x_sum"].sum()),
142
+ "weight_sum": float(df["weight_sum"].sum()),
143
+ "weighted_score_sum": float(df["weighted_score"].sum()),
144
+ "python_tail_checksum": float(checksum_by_file.sum()),
145
+ "source_files": source_files,
146
+ "engines": _sorted_unique_strings(df["engine"]),
147
+ "execution_models": _sorted_unique_strings(df["execution_model"]),
148
+ }
149
+ return ReducePartial(
150
+ partial_id=partial_id,
151
+ payload=payload,
152
+ metadata=metadata or {},
153
+ artifact_path=str(artifact_path) if artifact_path else None,
154
+ )
155
+
156
+
157
+ def build_reduce_artifact(partials: Sequence[ReducePartial]) -> ReduceArtifact:
158
+ return EXECUTION_POLARS_REDUCE_CONTRACT.build_artifact(partials)
159
+
160
+
161
+ def write_reduce_artifact(
162
+ df: pl.DataFrame,
163
+ output_dir: Path | str,
164
+ *,
165
+ worker_id: int | str,
166
+ ) -> Path:
167
+ output_path = reduce_artifact_path(output_dir, worker_id)
168
+ partial = partial_from_result_frame(
169
+ df,
170
+ partial_id=f"execution_polars_worker_{worker_id}",
171
+ artifact_path=output_path,
172
+ metadata={"worker_id": str(worker_id)},
173
+ )
174
+ artifact = build_reduce_artifact((partial,))
175
+ output_path.write_text(
176
+ json.dumps(artifact.to_dict(), indent=2, sort_keys=True) + "\n",
177
+ encoding="utf-8",
178
+ )
179
+ return output_path
180
+
181
+
182
+ __all__ = [
183
+ "EXECUTION_POLARS_REDUCE_CONTRACT",
184
+ "REDUCE_ARTIFACT_FILENAME_TEMPLATE",
185
+ "REDUCE_ARTIFACT_NAME",
186
+ "REDUCER_NAME",
187
+ "build_reduce_artifact",
188
+ "partial_from_result_frame",
189
+ "reduce_artifact_path",
190
+ "write_reduce_artifact",
191
+ ]
@@ -0,0 +1,3 @@
1
+ from .execution_polars_worker import ExecutionPolarsWorker
2
+
3
+ __all__ = ["ExecutionPolarsWorker"]
@@ -0,0 +1,151 @@
1
+ """Polars-based worker for the execution playground."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from types import SimpleNamespace
7
+ import time
8
+
9
+ import polars as pl
10
+
11
+ from agi_node.polars_worker import PolarsWorker
12
+ from execution_polars.reduction import write_reduce_artifact
13
+
14
+ _runtime: dict[str, object] = {}
15
+
16
+
17
+ class ExecutionPolarsWorker(PolarsWorker):
18
+ """Execute the benchmark workload through the PolarsWorker path."""
19
+
20
+ pool_vars: dict[str, object] = {}
21
+
22
+ def start(self):
23
+ global _runtime
24
+ if isinstance(self.args, dict):
25
+ self.args = SimpleNamespace(**self.args)
26
+ elif not isinstance(self.args, SimpleNamespace):
27
+ self.args = SimpleNamespace(**vars(self.args))
28
+
29
+ data_paths = self.setup_data_directories(
30
+ source_path=self.args.data_in,
31
+ target_path=self.args.data_out,
32
+ target_subdir="results",
33
+ reset_target=bool(getattr(self.args, "reset_target", False)),
34
+ )
35
+ self.args.data_in = data_paths.normalized_input
36
+ self.args.data_out = data_paths.normalized_output
37
+ self.data_out = data_paths.output_path
38
+ self.pool_vars = {"args": self.args}
39
+ _runtime = self.pool_vars
40
+
41
+ def pool_init(self, worker_vars):
42
+ global _runtime
43
+ _runtime = worker_vars
44
+
45
+ def _current_args(self) -> SimpleNamespace:
46
+ args = _runtime.get("args", self.args)
47
+ if isinstance(args, dict):
48
+ return SimpleNamespace(**args)
49
+ return args
50
+
51
+ def work_init(self) -> None:
52
+ """Keep parity with the PolarsWorker execution contract."""
53
+ return None
54
+
55
+ def _python_tail_checksum(self, df: pl.DataFrame) -> float:
56
+ """Add a small GIL-bound scalar tail so execution modes separate more clearly."""
57
+ args = self._current_args()
58
+ loop_passes = max(int(getattr(args, "compute_passes", 1)), 1) * 8
59
+ sample_stride = 64
60
+ checksum = 0.0
61
+ x_values = df["x"].to_list()
62
+ y_values = df["y"].to_list()
63
+ signal_values = df["signal"].to_list()
64
+ weight_values = df["weight"].to_list()
65
+ for idx in range(0, len(x_values), sample_stride):
66
+ value = float(x_values[idx]) + float(y_values[idx]) * 0.01
67
+ signal = float(signal_values[idx])
68
+ weight = float(weight_values[idx])
69
+ for _ in range(loop_passes):
70
+ value = abs((value * 1.0000007) + signal * 0.17 - weight * 0.03)
71
+ checksum += value
72
+ return checksum
73
+
74
+ def works(self, workers_plan, workers_plan_metadata) -> float:
75
+ """Treat pool and dask bits as parallel paths for this benchmark worker."""
76
+ if workers_plan:
77
+ if self._mode & 0b0101:
78
+ self._exec_multi_process(workers_plan, workers_plan_metadata)
79
+ else:
80
+ self._exec_mono_process(workers_plan, workers_plan_metadata)
81
+
82
+ self.stop()
83
+
84
+ if getattr(PolarsWorker, "_t0", None) is None:
85
+ PolarsWorker._t0 = time.time()
86
+ return time.time() - PolarsWorker._t0
87
+
88
+ def work_pool(self, file_path):
89
+ args = self._current_args()
90
+ source = Path(str(file_path)).expanduser()
91
+ df = pl.read_csv(source)
92
+
93
+ passes = max(int(getattr(args, "compute_passes", 1)), 1)
94
+ for idx in range(passes):
95
+ column = f"score_{idx}"
96
+ df = df.with_columns(
97
+ (
98
+ (pl.col("x") * (idx + 1.3))
99
+ - (pl.col("y") * (0.35 + idx * 0.05))
100
+ + (pl.col("signal") * pl.col("weight"))
101
+ )
102
+ .abs()
103
+ .alias(column)
104
+ )
105
+
106
+ agg = (
107
+ df.group_by(["group_id", "bucket", "segment"])
108
+ .agg(
109
+ pl.len().alias("row_count"),
110
+ pl.col("x").sum().alias("x_sum"),
111
+ pl.col("y").mean().alias("y_mean"),
112
+ pl.col("score_0").mean().alias("score_mean"),
113
+ pl.col(f"score_{passes - 1}").max().alias("score_max"),
114
+ pl.col("weight").sum().alias("weight_sum"),
115
+ )
116
+ .sort(["bucket", "group_id"])
117
+ )
118
+
119
+ segment_weights = pl.DataFrame(
120
+ {
121
+ "segment": ["alpha", "beta", "gamma", "delta"],
122
+ "segment_weight": [1.00, 1.08, 1.14, 1.22],
123
+ }
124
+ )
125
+ python_tail_checksum = self._python_tail_checksum(df)
126
+ agg = (
127
+ agg.join(segment_weights, on="segment", how="left")
128
+ .with_columns(
129
+ (pl.col("score_mean") * pl.col("segment_weight") * pl.col("row_count")).alias("weighted_score"),
130
+ pl.lit(python_tail_checksum).alias("python_tail_checksum"),
131
+ pl.lit(source.name).alias("source_file"),
132
+ pl.lit("polars").alias("engine"),
133
+ pl.lit("threads").alias("execution_model"),
134
+ )
135
+ )
136
+ return agg
137
+
138
+ def work_done(self, df: pl.DataFrame | None = None) -> None:
139
+ if df is None or df.is_empty():
140
+ return
141
+ output_format = getattr(self._current_args(), "output_format", "csv")
142
+ output_path = Path(self.data_out) / f"{self._worker_id}_output"
143
+ if output_format == "parquet":
144
+ df.write_parquet(output_path.with_suffix(".parquet"))
145
+ else:
146
+ df.write_csv(output_path.with_suffix(".csv"))
147
+ write_reduce_artifact(
148
+ df,
149
+ self.data_out,
150
+ worker_id=getattr(self, "_worker_id", 0),
151
+ )
@@ -0,0 +1,10 @@
1
+ [project]
2
+ name = "execution_polars_project"
3
+ version = "0.1.0"
4
+ description = "Built-in AGILab execution playground using PolarsWorker"
5
+ requires-python = ">=3.11"
6
+ dependencies = ["agi-env>=2026.05.13.post3,<2027.0", "agi-node>=2026.05.13.post3,<2027.0", "polars", "pydantic", "streamlit>=1.56.0"]
7
+
8
+ [build-system]
9
+ requires = ["setuptools"]
10
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,5 @@
1
+ extra-index-url = ["https://pypi.nvidia.com"]
2
+
3
+ override-dependencies = [
4
+ "cuspatial-cu12==25.4.0; platform_system == 'Linux'",
5
+ ]
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.4
2
+ Name: agi-app-polars-execution
3
+ Version: 0.1.0
4
+ Summary: AGILAB Polars execution app project
5
+ Author: Jean-Pierre Morard
6
+ Maintainer: Jean-Pierre Morard
7
+ License-Expression: BSD-3-Clause
8
+ Project-URL: Documentation, https://thalesgroup.github.io/agilab
9
+ Project-URL: Source, https://github.com/ThalesGroup/agilab/tree/main/src/agilab/lib/agi-app-polars-execution
10
+ Project-URL: Issues, https://github.com/ThalesGroup/agilab/issues
11
+ Project-URL: Homepage, https://github.com/ThalesGroup/agilab
12
+ Project-URL: Repository, https://github.com/ThalesGroup/agilab
13
+ Project-URL: Discussions, https://github.com/ThalesGroup/agilab/discussions
14
+ Project-URL: Changelog, https://github.com/ThalesGroup/agilab/releases
15
+ Keywords: agilab,apps,reproducibility,workflow-orchestration
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Development Status :: 4 - Beta
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Operating System :: MacOS
23
+ Classifier: Operating System :: Microsoft :: Windows
24
+ Classifier: Operating System :: POSIX :: Linux
25
+ Requires-Python: >=3.11
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: agi-core<2027.0,>=2026.05.13
29
+ Dynamic: license-file
30
+
31
+ # agi-app-polars-execution
32
+
33
+ [![PyPI version](https://img.shields.io/pypi/v/agi-app-polars-execution.svg?cacheSeconds=300)](https://pypi.org/project/agi-app-polars-execution/)
34
+ [![Python versions](https://img.shields.io/pypi/pyversions/agi-app-polars-execution.svg)](https://pypi.org/project/agi-app-polars-execution/)
35
+ [![License: BSD 3-Clause](https://img.shields.io/pypi/l/agi-app-polars-execution)](https://opensource.org/licenses/BSD-3-Clause)
36
+
37
+ `agi-app-polars-execution` publishes the `execution_polars_project` AGILAB app
38
+ project as a self-contained payload. The distribution name is PyPI-facing; the
39
+ installed AGILAB project name remains `execution_polars_project`.
40
+
41
+ The package advertises the project through the `agilab.apps` entry point group
42
+ so `AgiEnv(app="execution_polars_project")` can resolve it without a monorepo
43
+ checkout.
44
+
45
+ ## Install
46
+
47
+ ```bash
48
+ pip install agi-app-polars-execution
49
+ ```
50
+
51
+ Most users install these app packages through the umbrella `agi-apps` package or
52
+ through `agilab[ui]` / `agilab[examples]`.
@@ -0,0 +1,11 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ src/agi_app_polars_execution/__init__.py
6
+ src/agi_app_polars_execution.egg-info/PKG-INFO
7
+ src/agi_app_polars_execution.egg-info/SOURCES.txt
8
+ src/agi_app_polars_execution.egg-info/dependency_links.txt
9
+ src/agi_app_polars_execution.egg-info/entry_points.txt
10
+ src/agi_app_polars_execution.egg-info/requires.txt
11
+ src/agi_app_polars_execution.egg-info/top_level.txt
@@ -0,0 +1,3 @@
1
+ [agilab.apps]
2
+ execution_polars = agi_app_polars_execution:project_root
3
+ execution_polars_project = agi_app_polars_execution:project_root
@@ -0,0 +1 @@
1
+ agi-core<2027.0,>=2026.05.13