agi-app-data-quality-gate 2026.6.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. agi_app_data_quality_gate-2026.6.23/LICENSE +22 -0
  2. agi_app_data_quality_gate-2026.6.23/PKG-INFO +124 -0
  3. agi_app_data_quality_gate-2026.6.23/README.md +93 -0
  4. agi_app_data_quality_gate-2026.6.23/pyproject.toml +93 -0
  5. agi_app_data_quality_gate-2026.6.23/setup.cfg +4 -0
  6. agi_app_data_quality_gate-2026.6.23/setup.py +47 -0
  7. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/__init__.py +32 -0
  8. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/README.md +103 -0
  9. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/lab_stages.toml +30 -0
  10. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/pipeline_view.dot +13 -0
  11. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/pyproject.toml +23 -0
  12. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/app_args_form.py +154 -0
  13. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/app_settings.toml +33 -0
  14. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/__init__.py +54 -0
  15. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/app_args.py +9 -0
  16. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/compat/__init__.py +1 -0
  17. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/compat/module_shim.py +99 -0
  18. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/core.py +9 -0
  19. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/data_quality_gate.py +9 -0
  20. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/domain/__init__.py +1 -0
  21. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/domain/core.py +1014 -0
  22. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/reduction.py +9 -0
  23. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/runtime/__init__.py +1 -0
  24. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/runtime/app_args.py +164 -0
  25. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/runtime/data_quality_gate.py +111 -0
  26. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/runtime/reduction.py +171 -0
  27. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate_worker/__init__.py +5 -0
  28. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate_worker/data_quality_gate_worker.py +166 -0
  29. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate_worker/pyproject.toml +10 -0
  30. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/pre_prompt.json +10 -0
  31. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/PKG-INFO +124 -0
  32. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/SOURCES.txt +11 -0
  33. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/dependency_links.txt +1 -0
  34. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/entry_points.txt +3 -0
  35. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/requires.txt +2 -0
  36. agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/top_level.txt +1 -0
@@ -0,0 +1,22 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2025, Jean-Pierre Morard, THALES SIX GTS France SAS
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
7
+ following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
10
+ disclaimer.
11
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
12
+ disclaimer in the documentation and/or other materials provided with the distribution.
13
+ 3. Neither the name of Jean-Pierre MORARD nor the names of its contributors, or THALES SIX GTS France SAS, may be used
14
+ to endorse or promote products derived from this software without specific prior written permission.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
17
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,124 @@
1
+ Metadata-Version: 2.4
2
+ Name: agi-app-data-quality-gate
3
+ Version: 2026.6.23
4
+ Summary: AGILAB deterministic data contract, drift, leakage, and promotion gate
5
+ Author: Jean-Pierre Morard
6
+ Maintainer: Jean-Pierre Morard
7
+ License-Expression: BSD-3-Clause
8
+ Project-URL: Documentation, https://thalesgroup.github.io/agilab
9
+ Project-URL: Source, https://github.com/ThalesGroup/agilab/tree/main/src/agilab/lib/agi-app-data-quality-gate
10
+ Project-URL: Issues, https://github.com/ThalesGroup/agilab/issues
11
+ Project-URL: Homepage, https://github.com/ThalesGroup/agilab
12
+ Project-URL: Repository, https://github.com/ThalesGroup/agilab
13
+ Project-URL: Discussions, https://github.com/ThalesGroup/agilab/discussions
14
+ Project-URL: Changelog, https://github.com/ThalesGroup/agilab/releases
15
+ Keywords: agilab,apps,data-quality,drift,reproducibility,workflow-orchestration
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Development Status :: 4 - Beta
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Operating System :: MacOS
23
+ Classifier: Operating System :: Microsoft :: Windows
24
+ Classifier: Operating System :: POSIX :: Linux
25
+ Requires-Python: >=3.11
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: agi-env<2027.0,>=2026.05.31
29
+ Requires-Dist: agi-node<2027.0,>=2026.05.31
30
+ Dynamic: license-file
31
+
32
+ # agi-app-data-quality-gate
33
+
34
+ ![Release artifact](https://img.shields.io/badge/release%20artifact-wheel%2Bsdist-blue)
35
+ ![PyPI](https://img.shields.io/badge/PyPI-not%20promoted-lightgrey)
36
+ [![License: BSD 3-Clause](https://img.shields.io/badge/license-BSD%203--Clause-blue)](https://opensource.org/licenses/BSD-3-Clause)
37
+
38
+ `agi-app-data-quality-gate` packages the `data_quality_gate_project` AGILAB app.
39
+ It is a deterministic data contract, drift, leakage, and promotion-gate example
40
+ for teams that need a concrete proof before a candidate dataset reaches model
41
+ training or pilot promotion.
42
+
43
+ ## Purpose
44
+
45
+ Use this package to show how AGILAB can turn a data-readiness review into
46
+ replayable evidence. The app generates a baseline dataset and a candidate
47
+ dataset, validates the expected columns, profiles quality, measures drift, and
48
+ writes a decision that can be reviewed before another system takes ownership.
49
+
50
+ ## What You Learn
51
+
52
+ The packaged project demonstrates the same contract-first workflow without
53
+ requiring a source checkout. A first run shows the generated datasets, the
54
+ quality profiles, the drift table, the gate decision, and the manifest that ties
55
+ those artifacts together. It is intended to make a data promotion review easy to
56
+ rerun and easy to inspect from AGILAB.
57
+
58
+ ## Installed Project
59
+
60
+ The distribution name is `agi-app-data-quality-gate`; the AGILAB project name is
61
+ `data_quality_gate_project`. The package exposes both `data_quality_gate` and
62
+ `data_quality_gate_project` through the `agilab.apps` entry point group, so
63
+ `AgiEnv(app="data_quality_gate_project")` resolves the project without a
64
+ monorepo checkout once this payload package is installed.
65
+
66
+ ## Install
67
+
68
+ ```bash
69
+ pip install agi-app-data-quality-gate
70
+ ```
71
+
72
+ This is the stable package install shape once this distribution is promoted to
73
+ PyPI. For the current release artifact path, install the wheel directly:
74
+
75
+ ```bash
76
+ pip install /path/to/agi_app_data_quality_gate-<version>-py3-none-any.whl
77
+ ```
78
+
79
+ This app project is built as wheel and source-distribution artifacts in the
80
+ GitHub Release archive, but it is not promoted to PyPI in the current release
81
+ plan and is not pulled by the `agi-apps` umbrella. Install it directly only when
82
+ validating the data quality gate package from a release artifact or a locally
83
+ built wheel.
84
+
85
+ ## Run In AGILAB
86
+
87
+ Select `data_quality_gate_project`, open `ORCHESTRATE`, then run `INSTALL` and
88
+ `EXECUTE`. Open `ANALYSIS` or inspect the exported evidence directory to review
89
+ the contract, drift metrics, gate decision, and artifact manifest.
90
+
91
+ ## Expected Inputs
92
+
93
+ The default run generates deterministic synthetic baseline and candidate
94
+ datasets. It does not require private data, a model registry, a cloud account,
95
+ an LLM, or an external network service.
96
+
97
+ ## Expected Outputs
98
+
99
+ The app writes baseline and candidate CSV files, JSON profiles, a data contract,
100
+ drift metrics, a gate decision, a Markdown evidence report, a run manifest, and
101
+ a data-quality summary with artifact hashes.
102
+
103
+ ## Change One Thing
104
+
105
+ Change only `drift_strength`, then rerun the app. Lower values should move the
106
+ gate toward `promote`; higher values should move it toward `manual-review` or
107
+ `block`. Keep `seed=2026` when you want artifact deltas that remain easy to
108
+ explain.
109
+
110
+ ## Troubleshooting
111
+
112
+ If the package resolves but custom data does not, rerun the default synthetic
113
+ case first. Then verify that CSV and JSON paths are AGILAB-share-relative and
114
+ that the candidate file contains every column required by the contract. A noisy
115
+ or unexpected `manual-review` decision usually means the drift threshold was
116
+ tighter than the candidate distribution, so inspect `drift_metrics.csv` before
117
+ loosening the gate.
118
+
119
+ ## Scope
120
+
121
+ This is a compact data-quality gate example. It does not replace a full data
122
+ observability platform, feature store, enterprise governance workflow, or
123
+ production approval authority. Its purpose is to make one data-readiness review
124
+ portable, deterministic, and evidence-backed.
@@ -0,0 +1,93 @@
1
+ # agi-app-data-quality-gate
2
+
3
+ ![Release artifact](https://img.shields.io/badge/release%20artifact-wheel%2Bsdist-blue)
4
+ ![PyPI](https://img.shields.io/badge/PyPI-not%20promoted-lightgrey)
5
+ [![License: BSD 3-Clause](https://img.shields.io/badge/license-BSD%203--Clause-blue)](https://opensource.org/licenses/BSD-3-Clause)
6
+
7
+ `agi-app-data-quality-gate` packages the `data_quality_gate_project` AGILAB app.
8
+ It is a deterministic data contract, drift, leakage, and promotion-gate example
9
+ for teams that need a concrete proof before a candidate dataset reaches model
10
+ training or pilot promotion.
11
+
12
+ ## Purpose
13
+
14
+ Use this package to show how AGILAB can turn a data-readiness review into
15
+ replayable evidence. The app generates a baseline dataset and a candidate
16
+ dataset, validates the expected columns, profiles quality, measures drift, and
17
+ writes a decision that can be reviewed before another system takes ownership.
18
+
19
+ ## What You Learn
20
+
21
+ The packaged project demonstrates the same contract-first workflow without
22
+ requiring a source checkout. A first run shows the generated datasets, the
23
+ quality profiles, the drift table, the gate decision, and the manifest that ties
24
+ those artifacts together. It is intended to make a data promotion review easy to
25
+ rerun and easy to inspect from AGILAB.
26
+
27
+ ## Installed Project
28
+
29
+ The distribution name is `agi-app-data-quality-gate`; the AGILAB project name is
30
+ `data_quality_gate_project`. The package exposes both `data_quality_gate` and
31
+ `data_quality_gate_project` through the `agilab.apps` entry point group, so
32
+ `AgiEnv(app="data_quality_gate_project")` resolves the project without a
33
+ monorepo checkout once this payload package is installed.
34
+
35
+ ## Install
36
+
37
+ ```bash
38
+ pip install agi-app-data-quality-gate
39
+ ```
40
+
41
+ This is the stable package install shape once this distribution is promoted to
42
+ PyPI. For the current release artifact path, install the wheel directly:
43
+
44
+ ```bash
45
+ pip install /path/to/agi_app_data_quality_gate-<version>-py3-none-any.whl
46
+ ```
47
+
48
+ This app project is built as wheel and source-distribution artifacts in the
49
+ GitHub Release archive, but it is not promoted to PyPI in the current release
50
+ plan and is not pulled by the `agi-apps` umbrella. Install it directly only when
51
+ validating the data quality gate package from a release artifact or a locally
52
+ built wheel.
53
+
54
+ ## Run In AGILAB
55
+
56
+ Select `data_quality_gate_project`, open `ORCHESTRATE`, then run `INSTALL` and
57
+ `EXECUTE`. Open `ANALYSIS` or inspect the exported evidence directory to review
58
+ the contract, drift metrics, gate decision, and artifact manifest.
59
+
60
+ ## Expected Inputs
61
+
62
+ The default run generates deterministic synthetic baseline and candidate
63
+ datasets. It does not require private data, a model registry, a cloud account,
64
+ an LLM, or an external network service.
65
+
66
+ ## Expected Outputs
67
+
68
+ The app writes baseline and candidate CSV files, JSON profiles, a data contract,
69
+ drift metrics, a gate decision, a Markdown evidence report, a run manifest, and
70
+ a data-quality summary with artifact hashes.
71
+
72
+ ## Change One Thing
73
+
74
+ Change only `drift_strength`, then rerun the app. Lower values should move the
75
+ gate toward `promote`; higher values should move it toward `manual-review` or
76
+ `block`. Keep `seed=2026` when you want artifact deltas that remain easy to
77
+ explain.
78
+
79
+ ## Troubleshooting
80
+
81
+ If the package resolves but custom data does not, rerun the default synthetic
82
+ case first. Then verify that CSV and JSON paths are AGILAB-share-relative and
83
+ that the candidate file contains every column required by the contract. A noisy
84
+ or unexpected `manual-review` decision usually means the drift threshold was
85
+ tighter than the candidate distribution, so inspect `drift_metrics.csv` before
86
+ loosening the gate.
87
+
88
+ ## Scope
89
+
90
+ This is a compact data-quality gate example. It does not replace a full data
91
+ observability platform, feature store, enterprise governance workflow, or
92
+ production approval authority. Its purpose is to make one data-readiness review
93
+ portable, deterministic, and evidence-backed.
@@ -0,0 +1,93 @@
1
+ [project]
2
+ version = "2026.06.23"
3
+ name = "agi-app-data-quality-gate"
4
+ description = "AGILAB deterministic data contract, drift, leakage, and promotion gate"
5
+ requires-python = ">=3.11"
6
+ readme = "README.md"
7
+ authors = [
8
+ { name = "Jean-Pierre Morard" }
9
+ ]
10
+ maintainers = [{ name = "Jean-Pierre Morard" }]
11
+ license = "BSD-3-Clause"
12
+ license-files = ["LICENSE"]
13
+
14
+ classifiers = [
15
+ "Intended Audience :: Developers",
16
+ "Development Status :: 4 - Beta",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Operating System :: MacOS",
22
+ "Operating System :: Microsoft :: Windows",
23
+ "Operating System :: POSIX :: Linux",
24
+ ]
25
+
26
+ keywords = [
27
+ "agilab",
28
+ "apps",
29
+ "data-quality",
30
+ "drift",
31
+ "reproducibility",
32
+ "workflow-orchestration",
33
+ ]
34
+
35
+ dependencies = ["agi-env>=2026.05.31,<2027.0", "agi-node>=2026.05.31,<2027.0"]
36
+
37
+ [project.urls]
38
+ Documentation = "https://thalesgroup.github.io/agilab"
39
+ Source = "https://github.com/ThalesGroup/agilab/tree/main/src/agilab/lib/agi-app-data-quality-gate"
40
+ Issues = "https://github.com/ThalesGroup/agilab/issues"
41
+ Homepage = "https://github.com/ThalesGroup/agilab"
42
+ Repository = "https://github.com/ThalesGroup/agilab"
43
+ Discussions = "https://github.com/ThalesGroup/agilab/discussions"
44
+ Changelog = "https://github.com/ThalesGroup/agilab/releases"
45
+
46
+ [project.entry-points."agilab.apps"]
47
+ data_quality_gate = "agi_app_data_quality_gate:project_root"
48
+ data_quality_gate_project = "agi_app_data_quality_gate:project_root"
49
+
50
+
51
+
52
+
53
+ [dependency-groups]
54
+ dev = [
55
+ "pytest",
56
+ ]
57
+
58
+ [tool.uv.sources.agi-env]
59
+ path = "../../core/agi-env"
60
+ editable = true
61
+
62
+ [tool.uv.sources.agi-node]
63
+ path = "../../core/agi-node"
64
+ editable = true
65
+
66
+ [build-system]
67
+ requires = ["setuptools>=68", "wheel"]
68
+ build-backend = "setuptools.build_meta"
69
+
70
+ [tool.setuptools]
71
+ include-package-data = false
72
+ package-dir = {"" = "src"}
73
+ packages = ["agi_app_data_quality_gate"]
74
+
75
+ [tool.setuptools.package-data]
76
+ "agi_app_data_quality_gate" = [
77
+ "project/**/*",
78
+ ]
79
+
80
+ [tool.setuptools.exclude-package-data]
81
+ "agi_app_data_quality_gate" = [
82
+ "project/**/.venv/**",
83
+ "project/**/__pycache__/**",
84
+ "project/**/*.pyc",
85
+ "project/**/*.pyo",
86
+ "project/**/*.pyx",
87
+ "project/**/*.c",
88
+ "project/**/*.so",
89
+ "project/**/uv.lock",
90
+ ]
91
+
92
+ [tool.pytest.ini_options]
93
+ testpaths = ["test"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib.util
4
+ from pathlib import Path
5
+
6
+ from setuptools import setup
7
+ from setuptools.command.build_py import build_py as _build_py
8
+ from setuptools.command.sdist import sdist as _sdist
9
+
10
+ APP_PROJECT = "data_quality_gate_project"
11
+ PACKAGE_IMPORT = "agi_app_data_quality_gate"
12
+
13
+
14
+ def _load_build_support():
15
+ module_path = Path(__file__).resolve().parents[4] / "src" / "agilab" / "lib" / "app_project_build_support.py"
16
+ if not module_path.exists():
17
+ return None
18
+ spec = importlib.util.spec_from_file_location("agilab_app_project_build_support", module_path)
19
+ if spec is None or spec.loader is None:
20
+ raise RuntimeError(f"Unable to load app project build support from {module_path}")
21
+ module = importlib.util.module_from_spec(spec)
22
+ spec.loader.exec_module(module)
23
+ return module
24
+
25
+
26
+ def _copy_payload(target_root: Path) -> None:
27
+ support = _load_build_support()
28
+ if support is None:
29
+ return
30
+ changed = support.copy_app_project_payload(APP_PROJECT, target_root)
31
+ for pyproject_path in changed:
32
+ print(f"[{PACKAGE_IMPORT}] sanitized packaged app manifest: {pyproject_path}")
33
+
34
+
35
+ class build_py(_build_py):
36
+ def run(self):
37
+ super().run()
38
+ _copy_payload(Path(self.build_lib) / PACKAGE_IMPORT / "project")
39
+
40
+
41
+ class sdist(_sdist):
42
+ def make_release_tree(self, base_dir, files):
43
+ super().make_release_tree(base_dir, files)
44
+ _copy_payload(Path(base_dir) / "src" / PACKAGE_IMPORT / "project")
45
+
46
+
47
+ setup(cmdclass={"build_py": build_py, "sdist": sdist})
@@ -0,0 +1,32 @@
1
+ """Installed AGILAB app project provider for data_quality_gate_project."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ APP_SLUG = "data_quality_gate"
8
+ PROJECT_NAME = "data_quality_gate_project"
9
+ PACKAGE_NAME = "agi-app-data-quality-gate"
10
+
11
+
12
+ def package_root() -> Path:
13
+ return Path(__file__).resolve().parent
14
+
15
+
16
+ def project_root() -> Path:
17
+ source_root = Path(__file__).resolve().parents[4] / "apps" / "builtin" / PROJECT_NAME
18
+ if source_root.exists():
19
+ return source_root
20
+ return package_root() / "project" / PROJECT_NAME
21
+
22
+
23
+ def metadata() -> dict[str, str]:
24
+ return {
25
+ "slug": APP_SLUG,
26
+ "project": PROJECT_NAME,
27
+ "package": PACKAGE_NAME,
28
+ "project_root": str(project_root()),
29
+ }
30
+
31
+
32
+ __all__ = ["APP_SLUG", "PACKAGE_NAME", "PROJECT_NAME", "metadata", "package_root", "project_root"]
@@ -0,0 +1,103 @@
1
+ # Data Quality Gate Project
2
+
3
+ `data_quality_gate_project` is a built-in AGILAB app for a production-adjacent
4
+ data contract and drift gate. It turns the common "is this candidate dataset
5
+ safe to promote?" review into a deterministic run with machine-readable
6
+ evidence, a human report, and a clear gate decision.
7
+
8
+ ## Purpose
9
+
10
+ Use this project when you want a fast, understandable proof that AGILAB can
11
+ protect an AI/ML workflow before model training or promotion. The app generates
12
+ a baseline dataset and a candidate dataset, validates the contract, measures
13
+ quality and drift, then writes a gate decision that can be reviewed or wired
14
+ into a later CI/promotion step.
15
+
16
+ ## What You Learn
17
+
18
+ The first run shows how AGILAB turns a data-readiness question into replayable
19
+ evidence rather than a spreadsheet note. You see the app produce source data,
20
+ profile both sides of the comparison, apply a contract, score drift, and write a
21
+ decision card that names the failing or passing gate. It is a compact example of
22
+ how an experiment workbench can protect a downstream model workflow before
23
+ training begins.
24
+
25
+ ## Run In AGILAB
26
+
27
+ Select `data_quality_gate_project`, then open `ORCHESTRATE`. Keep the default
28
+ arguments for the first run, click `INSTALL`, then click `RUN`.
29
+
30
+ The default configuration creates a deterministic candidate dataset with a
31
+ small business distribution shift. The run should complete locally and write
32
+ the data quality evidence under `data_quality_gate/evidence`.
33
+
34
+ To gate your own data, place two CSV files under the AGILAB share and set
35
+ `baseline_csv` plus `candidate_csv` to their relative paths. Optional
36
+ `contract_json` and `thresholds_json` files can override the default column
37
+ contract and promotion thresholds without editing Python code.
38
+
39
+ ## Expected Inputs
40
+
41
+ No external data, API key, cloud service, notebook, model registry, or LLM is
42
+ required for the first run. The app can also read user-provided baseline and
43
+ candidate CSV files from the AGILAB share. Contract JSON accepts:
44
+
45
+ - `columns`: mapping from column name to `{kind, role, required, drift}`.
46
+ - `allow_unexpected_columns`: whether extra candidate columns are accepted.
47
+ - `target_column`, `identifier_columns`, and `leakage_name_patterns`.
48
+ - `thresholds`: optional overrides for PSI, KS, null-rate, duplicate-rate, row
49
+ count, mean-shift, and category-delta thresholds.
50
+
51
+ ## Expected Outputs
52
+
53
+ The worker writes:
54
+
55
+ - `baseline.csv`
56
+ - `candidate.csv`
57
+ - `baseline_profile.json`
58
+ - `candidate_profile.json`
59
+ - `data_contract.json`
60
+ - `drift_metrics.csv`
61
+ - `gate_decision.json`
62
+ - `decision_card.json`
63
+ - `data_quality_dashboard.html`
64
+ - `input_sources.json`
65
+ - `data_quality_report.md`
66
+ - `run_manifest.json`
67
+ - `data_quality_gate_summary.json`
68
+
69
+ The same evidence bundle is mirrored under the app analysis export directory so
70
+ generic artifact readers can inspect it later.
71
+
72
+ ## Change One Thing
73
+
74
+ After the default run works, change only one thing:
75
+
76
+ - Raise or lower `drift_strength` to see the synthetic decision move.
77
+ - Or set `baseline_csv` and `candidate_csv` to your own share-relative files.
78
+ - Or set `thresholds_json` to tighten/relax the gate without code changes.
79
+
80
+ Keep `seed=2026` for synthetic comparisons so artifact deltas remain easy to
81
+ explain.
82
+
83
+ ## Example Quality Plan
84
+
85
+ - Review artifact: Review `data_quality_report.md` and `gate_decision.json` first; they explain why a dataset is allowed, warned, or blocked before model work starts.
86
+ - Practice change: Change one threshold or one missing-value count in the seeded input and confirm the gate moves from pass to warn or fail with an actionable reason.
87
+ - Quality check: A mature run leaves a stable gate report, a concise summary, and no hidden dependency on private data or external services.
88
+
89
+ ## Troubleshooting
90
+
91
+ If custom CSV inputs fail, first run the defaults again to confirm the app and
92
+ worker install are healthy. Then check that `baseline_csv`, `candidate_csv`,
93
+ `contract_json`, and `thresholds_json` are relative to the AGILAB share, not to
94
+ the repository checkout. Contract errors usually mean a required column is
95
+ missing, a numeric column was parsed as text, or a threshold override used a name
96
+ that is not present in the generated `data_contract.json`.
97
+
98
+ ## Scope
99
+
100
+ This app is a deterministic data-quality and drift gate example. It is not a
101
+ full data observability platform, feature store, model registry, or production
102
+ governance system. Its job is to make one candidate dataset review reproducible,
103
+ portable, and evidence-backed before another system takes ownership.
@@ -0,0 +1,30 @@
1
+ [[stages]]
2
+ id = "build_candidate_data"
3
+ label = "Load or build baseline and candidate data"
4
+ kind = "data"
5
+ produces = ["baseline.csv", "candidate.csv", "input_sources.json"]
6
+ M = "user CSV inputs when provided, otherwise deterministic seeded tabular generator"
7
+
8
+ [[stages]]
9
+ id = "validate_contract"
10
+ label = "Validate data contract"
11
+ kind = "validation"
12
+ depends_on = ["build_candidate_data"]
13
+ produces = ["data_contract.json", "baseline_profile.json", "candidate_profile.json"]
14
+ M = "configurable schema, type, null, duplicate, and leakage checks"
15
+
16
+ [[stages]]
17
+ id = "measure_drift"
18
+ label = "Measure drift"
19
+ kind = "evidence"
20
+ depends_on = ["validate_contract"]
21
+ produces = ["drift_metrics.csv"]
22
+ M = "PSI, KS statistic, mean shift, and category delta"
23
+
24
+ [[stages]]
25
+ id = "write_gate_decision"
26
+ label = "Write gate decision"
27
+ kind = "decision"
28
+ depends_on = ["measure_drift"]
29
+ produces = ["gate_decision.json", "decision_card.json", "data_quality_dashboard.html", "run_manifest.json", "data_quality_report.md"]
30
+ M = "AGILAB data-quality evidence writer"
@@ -0,0 +1,13 @@
1
+ digraph data_quality_gate_project {
2
+ rankdir=LR;
3
+ node [shape=box, style="rounded"];
4
+
5
+ build_candidate_data [label="Load/build baseline + candidate"];
6
+ validate_contract [label="Validate contract"];
7
+ measure_drift [label="Measure drift"];
8
+ write_gate_decision [label="Write decision + dashboard"];
9
+
10
+ build_candidate_data -> validate_contract;
11
+ validate_contract -> measure_drift;
12
+ measure_drift -> write_gate_decision;
13
+ }
@@ -0,0 +1,23 @@
1
+ [project]
2
+ name = "data_quality_gate_project"
3
+ version = "2026.05.30.post1"
4
+ description = "Built-in AGILAB data contract and drift gate app with promotion evidence"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ authors = [
8
+ { name = "Jean-Pierre Morard" }
9
+ ]
10
+ dependencies = ["agi-env>=2026.05.31", "agi-node>=2026.05.31", "pandas>=2.3.0,<4", "pydantic>=2.11,<2.13", "streamlit>=1.57,<1.58"]
11
+
12
+ [project.urls]
13
+ Documentation = "https://thalesgroup.github.io/agilab"
14
+ Source = "https://github.com/ThalesGroup/agilab/tree/main/src/agilab/apps/builtin/data_quality_gate_project"
15
+ Issues = "https://github.com/ThalesGroup/agilab/issues"
16
+ Homepage = "https://github.com/ThalesGroup/agilab"
17
+ Repository = "https://github.com/ThalesGroup/agilab"
18
+ Discussions = "https://github.com/ThalesGroup/agilab/discussions"
19
+ Changelog = "https://github.com/ThalesGroup/agilab/releases"
20
+
21
+ [build-system]
22
+ requires = ["setuptools"]
23
+ build-backend = "setuptools.build_meta"