agi-app-data-quality-gate 2026.6.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agi_app_data_quality_gate-2026.6.23/LICENSE +22 -0
- agi_app_data_quality_gate-2026.6.23/PKG-INFO +124 -0
- agi_app_data_quality_gate-2026.6.23/README.md +93 -0
- agi_app_data_quality_gate-2026.6.23/pyproject.toml +93 -0
- agi_app_data_quality_gate-2026.6.23/setup.cfg +4 -0
- agi_app_data_quality_gate-2026.6.23/setup.py +47 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/__init__.py +32 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/README.md +103 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/lab_stages.toml +30 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/pipeline_view.dot +13 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/pyproject.toml +23 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/app_args_form.py +154 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/app_settings.toml +33 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/__init__.py +54 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/app_args.py +9 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/compat/__init__.py +1 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/compat/module_shim.py +99 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/core.py +9 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/data_quality_gate.py +9 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/domain/__init__.py +1 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/domain/core.py +1014 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/reduction.py +9 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/runtime/__init__.py +1 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/runtime/app_args.py +164 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/runtime/data_quality_gate.py +111 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate/runtime/reduction.py +171 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate_worker/__init__.py +5 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate_worker/data_quality_gate_worker.py +166 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/data_quality_gate_worker/pyproject.toml +10 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate/project/data_quality_gate_project/src/pre_prompt.json +10 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/PKG-INFO +124 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/SOURCES.txt +11 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/dependency_links.txt +1 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/entry_points.txt +3 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/requires.txt +2 -0
- agi_app_data_quality_gate-2026.6.23/src/agi_app_data_quality_gate.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, Jean-Pierre Morard, THALES SIX GTS France SAS
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
|
|
7
|
+
following conditions are met:
|
|
8
|
+
|
|
9
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
|
|
10
|
+
disclaimer.
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
|
|
12
|
+
disclaimer in the documentation and/or other materials provided with the distribution.
|
|
13
|
+
3. Neither the name of Jean-Pierre MORARD nor the names of its contributors, or THALES SIX GTS France SAS, may be used
|
|
14
|
+
to endorse or promote products derived from this software without specific prior written permission.
|
|
15
|
+
|
|
16
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
17
|
+
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
18
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
19
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
20
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
21
|
+
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
22
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agi-app-data-quality-gate
|
|
3
|
+
Version: 2026.6.23
|
|
4
|
+
Summary: AGILAB deterministic data contract, drift, leakage, and promotion gate
|
|
5
|
+
Author: Jean-Pierre Morard
|
|
6
|
+
Maintainer: Jean-Pierre Morard
|
|
7
|
+
License-Expression: BSD-3-Clause
|
|
8
|
+
Project-URL: Documentation, https://thalesgroup.github.io/agilab
|
|
9
|
+
Project-URL: Source, https://github.com/ThalesGroup/agilab/tree/main/src/agilab/lib/agi-app-data-quality-gate
|
|
10
|
+
Project-URL: Issues, https://github.com/ThalesGroup/agilab/issues
|
|
11
|
+
Project-URL: Homepage, https://github.com/ThalesGroup/agilab
|
|
12
|
+
Project-URL: Repository, https://github.com/ThalesGroup/agilab
|
|
13
|
+
Project-URL: Discussions, https://github.com/ThalesGroup/agilab/discussions
|
|
14
|
+
Project-URL: Changelog, https://github.com/ThalesGroup/agilab/releases
|
|
15
|
+
Keywords: agilab,apps,data-quality,drift,reproducibility,workflow-orchestration
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: Development Status :: 4 - Beta
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Operating System :: MacOS
|
|
23
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
24
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
25
|
+
Requires-Python: >=3.11
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: agi-env<2027.0,>=2026.05.31
|
|
29
|
+
Requires-Dist: agi-node<2027.0,>=2026.05.31
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# agi-app-data-quality-gate
|
|
33
|
+
|
|
34
|
+

|
|
35
|
+

|
|
36
|
+
[](https://opensource.org/licenses/BSD-3-Clause)
|
|
37
|
+
|
|
38
|
+
`agi-app-data-quality-gate` packages the `data_quality_gate_project` AGILAB app.
|
|
39
|
+
It is a deterministic data contract, drift, leakage, and promotion-gate example
|
|
40
|
+
for teams that need a concrete proof before a candidate dataset reaches model
|
|
41
|
+
training or pilot promotion.
|
|
42
|
+
|
|
43
|
+
## Purpose
|
|
44
|
+
|
|
45
|
+
Use this package to show how AGILAB can turn a data-readiness review into
|
|
46
|
+
replayable evidence. The app generates a baseline dataset and a candidate
|
|
47
|
+
dataset, validates the expected columns, profiles quality, measures drift, and
|
|
48
|
+
writes a decision that can be reviewed before another system takes ownership.
|
|
49
|
+
|
|
50
|
+
## What You Learn
|
|
51
|
+
|
|
52
|
+
The packaged project demonstrates the same contract-first workflow without
|
|
53
|
+
requiring a source checkout. A first run shows the generated datasets, the
|
|
54
|
+
quality profiles, the drift table, the gate decision, and the manifest that ties
|
|
55
|
+
those artifacts together. It is intended to make a data promotion review easy to
|
|
56
|
+
rerun and easy to inspect from AGILAB.
|
|
57
|
+
|
|
58
|
+
## Installed Project
|
|
59
|
+
|
|
60
|
+
The distribution name is `agi-app-data-quality-gate`; the AGILAB project name is
|
|
61
|
+
`data_quality_gate_project`. The package exposes both `data_quality_gate` and
|
|
62
|
+
`data_quality_gate_project` through the `agilab.apps` entry point group, so
|
|
63
|
+
`AgiEnv(app="data_quality_gate_project")` resolves the project without a
|
|
64
|
+
monorepo checkout once this payload package is installed.
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install agi-app-data-quality-gate
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
This is the stable package install shape once this distribution is promoted to
|
|
73
|
+
PyPI. For the current release artifact path, install the wheel directly:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install /path/to/agi_app_data_quality_gate-<version>-py3-none-any.whl
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
This app project is built as wheel and source-distribution artifacts in the
|
|
80
|
+
GitHub Release archive, but it is not promoted to PyPI in the current release
|
|
81
|
+
plan and is not pulled by the `agi-apps` umbrella. Install it directly only when
|
|
82
|
+
validating the data quality gate package from a release artifact or a locally
|
|
83
|
+
built wheel.
|
|
84
|
+
|
|
85
|
+
## Run In AGILAB
|
|
86
|
+
|
|
87
|
+
Select `data_quality_gate_project`, open `ORCHESTRATE`, then run `INSTALL` and
|
|
88
|
+
`EXECUTE`. Open `ANALYSIS` or inspect the exported evidence directory to review
|
|
89
|
+
the contract, drift metrics, gate decision, and artifact manifest.
|
|
90
|
+
|
|
91
|
+
## Expected Inputs
|
|
92
|
+
|
|
93
|
+
The default run generates deterministic synthetic baseline and candidate
|
|
94
|
+
datasets. It does not require private data, a model registry, a cloud account,
|
|
95
|
+
an LLM, or an external network service.
|
|
96
|
+
|
|
97
|
+
## Expected Outputs
|
|
98
|
+
|
|
99
|
+
The app writes baseline and candidate CSV files, JSON profiles, a data contract,
|
|
100
|
+
drift metrics, a gate decision, a Markdown evidence report, a run manifest, and
|
|
101
|
+
a data-quality summary with artifact hashes.
|
|
102
|
+
|
|
103
|
+
## Change One Thing
|
|
104
|
+
|
|
105
|
+
Change only `drift_strength`, then rerun the app. Lower values should move the
|
|
106
|
+
gate toward `promote`; higher values should move it toward `manual-review` or
|
|
107
|
+
`block`. Keep `seed=2026` when you want artifact deltas that remain easy to
|
|
108
|
+
explain.
|
|
109
|
+
|
|
110
|
+
## Troubleshooting
|
|
111
|
+
|
|
112
|
+
If the package resolves but custom data does not, rerun the default synthetic
|
|
113
|
+
case first. Then verify that CSV and JSON paths are AGILAB-share-relative and
|
|
114
|
+
that the candidate file contains every column required by the contract. A noisy
|
|
115
|
+
or unexpected `manual-review` decision usually means the drift threshold was
|
|
116
|
+
tighter than the candidate distribution, so inspect `drift_metrics.csv` before
|
|
117
|
+
loosening the gate.
|
|
118
|
+
|
|
119
|
+
## Scope
|
|
120
|
+
|
|
121
|
+
This is a compact data-quality gate example. It does not replace a full data
|
|
122
|
+
observability platform, feature store, enterprise governance workflow, or
|
|
123
|
+
production approval authority. Its purpose is to make one data-readiness review
|
|
124
|
+
portable, deterministic, and evidence-backed.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# agi-app-data-quality-gate
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+
[](https://opensource.org/licenses/BSD-3-Clause)
|
|
6
|
+
|
|
7
|
+
`agi-app-data-quality-gate` packages the `data_quality_gate_project` AGILAB app.
|
|
8
|
+
It is a deterministic data contract, drift, leakage, and promotion-gate example
|
|
9
|
+
for teams that need a concrete proof before a candidate dataset reaches model
|
|
10
|
+
training or pilot promotion.
|
|
11
|
+
|
|
12
|
+
## Purpose
|
|
13
|
+
|
|
14
|
+
Use this package to show how AGILAB can turn a data-readiness review into
|
|
15
|
+
replayable evidence. The app generates a baseline dataset and a candidate
|
|
16
|
+
dataset, validates the expected columns, profiles quality, measures drift, and
|
|
17
|
+
writes a decision that can be reviewed before another system takes ownership.
|
|
18
|
+
|
|
19
|
+
## What You Learn
|
|
20
|
+
|
|
21
|
+
The packaged project demonstrates the same contract-first workflow without
|
|
22
|
+
requiring a source checkout. A first run shows the generated datasets, the
|
|
23
|
+
quality profiles, the drift table, the gate decision, and the manifest that ties
|
|
24
|
+
those artifacts together. It is intended to make a data promotion review easy to
|
|
25
|
+
rerun and easy to inspect from AGILAB.
|
|
26
|
+
|
|
27
|
+
## Installed Project
|
|
28
|
+
|
|
29
|
+
The distribution name is `agi-app-data-quality-gate`; the AGILAB project name is
|
|
30
|
+
`data_quality_gate_project`. The package exposes both `data_quality_gate` and
|
|
31
|
+
`data_quality_gate_project` through the `agilab.apps` entry point group, so
|
|
32
|
+
`AgiEnv(app="data_quality_gate_project")` resolves the project without a
|
|
33
|
+
monorepo checkout once this payload package is installed.
|
|
34
|
+
|
|
35
|
+
## Install
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install agi-app-data-quality-gate
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
This is the stable package install shape once this distribution is promoted to
|
|
42
|
+
PyPI. For the current release artifact path, install the wheel directly:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install /path/to/agi_app_data_quality_gate-<version>-py3-none-any.whl
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
This app project is built as wheel and source-distribution artifacts in the
|
|
49
|
+
GitHub Release archive, but it is not promoted to PyPI in the current release
|
|
50
|
+
plan and is not pulled by the `agi-apps` umbrella. Install it directly only when
|
|
51
|
+
validating the data quality gate package from a release artifact or a locally
|
|
52
|
+
built wheel.
|
|
53
|
+
|
|
54
|
+
## Run In AGILAB
|
|
55
|
+
|
|
56
|
+
Select `data_quality_gate_project`, open `ORCHESTRATE`, then run `INSTALL` and
|
|
57
|
+
`EXECUTE`. Open `ANALYSIS` or inspect the exported evidence directory to review
|
|
58
|
+
the contract, drift metrics, gate decision, and artifact manifest.
|
|
59
|
+
|
|
60
|
+
## Expected Inputs
|
|
61
|
+
|
|
62
|
+
The default run generates deterministic synthetic baseline and candidate
|
|
63
|
+
datasets. It does not require private data, a model registry, a cloud account,
|
|
64
|
+
an LLM, or an external network service.
|
|
65
|
+
|
|
66
|
+
## Expected Outputs
|
|
67
|
+
|
|
68
|
+
The app writes baseline and candidate CSV files, JSON profiles, a data contract,
|
|
69
|
+
drift metrics, a gate decision, a Markdown evidence report, a run manifest, and
|
|
70
|
+
a data-quality summary with artifact hashes.
|
|
71
|
+
|
|
72
|
+
## Change One Thing
|
|
73
|
+
|
|
74
|
+
Change only `drift_strength`, then rerun the app. Lower values should move the
|
|
75
|
+
gate toward `promote`; higher values should move it toward `manual-review` or
|
|
76
|
+
`block`. Keep `seed=2026` when you want artifact deltas that remain easy to
|
|
77
|
+
explain.
|
|
78
|
+
|
|
79
|
+
## Troubleshooting
|
|
80
|
+
|
|
81
|
+
If the package resolves but custom data does not, rerun the default synthetic
|
|
82
|
+
case first. Then verify that CSV and JSON paths are AGILAB-share-relative and
|
|
83
|
+
that the candidate file contains every column required by the contract. A noisy
|
|
84
|
+
or unexpected `manual-review` decision usually means the drift threshold was
|
|
85
|
+
tighter than the candidate distribution, so inspect `drift_metrics.csv` before
|
|
86
|
+
loosening the gate.
|
|
87
|
+
|
|
88
|
+
## Scope
|
|
89
|
+
|
|
90
|
+
This is a compact data-quality gate example. It does not replace a full data
|
|
91
|
+
observability platform, feature store, enterprise governance workflow, or
|
|
92
|
+
production approval authority. Its purpose is to make one data-readiness review
|
|
93
|
+
portable, deterministic, and evidence-backed.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
version = "2026.06.23"
|
|
3
|
+
name = "agi-app-data-quality-gate"
|
|
4
|
+
description = "AGILAB deterministic data contract, drift, leakage, and promotion gate"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Jean-Pierre Morard" }
|
|
9
|
+
]
|
|
10
|
+
maintainers = [{ name = "Jean-Pierre Morard" }]
|
|
11
|
+
license = "BSD-3-Clause"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Operating System :: MacOS",
|
|
22
|
+
"Operating System :: Microsoft :: Windows",
|
|
23
|
+
"Operating System :: POSIX :: Linux",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
keywords = [
|
|
27
|
+
"agilab",
|
|
28
|
+
"apps",
|
|
29
|
+
"data-quality",
|
|
30
|
+
"drift",
|
|
31
|
+
"reproducibility",
|
|
32
|
+
"workflow-orchestration",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
dependencies = ["agi-env>=2026.05.31,<2027.0", "agi-node>=2026.05.31,<2027.0"]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Documentation = "https://thalesgroup.github.io/agilab"
|
|
39
|
+
Source = "https://github.com/ThalesGroup/agilab/tree/main/src/agilab/lib/agi-app-data-quality-gate"
|
|
40
|
+
Issues = "https://github.com/ThalesGroup/agilab/issues"
|
|
41
|
+
Homepage = "https://github.com/ThalesGroup/agilab"
|
|
42
|
+
Repository = "https://github.com/ThalesGroup/agilab"
|
|
43
|
+
Discussions = "https://github.com/ThalesGroup/agilab/discussions"
|
|
44
|
+
Changelog = "https://github.com/ThalesGroup/agilab/releases"
|
|
45
|
+
|
|
46
|
+
[project.entry-points."agilab.apps"]
|
|
47
|
+
data_quality_gate = "agi_app_data_quality_gate:project_root"
|
|
48
|
+
data_quality_gate_project = "agi_app_data_quality_gate:project_root"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
[dependency-groups]
|
|
54
|
+
dev = [
|
|
55
|
+
"pytest",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
[tool.uv.sources.agi-env]
|
|
59
|
+
path = "../../core/agi-env"
|
|
60
|
+
editable = true
|
|
61
|
+
|
|
62
|
+
[tool.uv.sources.agi-node]
|
|
63
|
+
path = "../../core/agi-node"
|
|
64
|
+
editable = true
|
|
65
|
+
|
|
66
|
+
[build-system]
|
|
67
|
+
requires = ["setuptools>=68", "wheel"]
|
|
68
|
+
build-backend = "setuptools.build_meta"
|
|
69
|
+
|
|
70
|
+
[tool.setuptools]
|
|
71
|
+
include-package-data = false
|
|
72
|
+
package-dir = {"" = "src"}
|
|
73
|
+
packages = ["agi_app_data_quality_gate"]
|
|
74
|
+
|
|
75
|
+
[tool.setuptools.package-data]
|
|
76
|
+
"agi_app_data_quality_gate" = [
|
|
77
|
+
"project/**/*",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
[tool.setuptools.exclude-package-data]
|
|
81
|
+
"agi_app_data_quality_gate" = [
|
|
82
|
+
"project/**/.venv/**",
|
|
83
|
+
"project/**/__pycache__/**",
|
|
84
|
+
"project/**/*.pyc",
|
|
85
|
+
"project/**/*.pyo",
|
|
86
|
+
"project/**/*.pyx",
|
|
87
|
+
"project/**/*.c",
|
|
88
|
+
"project/**/*.so",
|
|
89
|
+
"project/**/uv.lock",
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
[tool.pytest.ini_options]
|
|
93
|
+
testpaths = ["test"]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import importlib.util
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from setuptools import setup
|
|
7
|
+
from setuptools.command.build_py import build_py as _build_py
|
|
8
|
+
from setuptools.command.sdist import sdist as _sdist
|
|
9
|
+
|
|
10
|
+
APP_PROJECT = "data_quality_gate_project"
|
|
11
|
+
PACKAGE_IMPORT = "agi_app_data_quality_gate"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _load_build_support():
|
|
15
|
+
module_path = Path(__file__).resolve().parents[4] / "src" / "agilab" / "lib" / "app_project_build_support.py"
|
|
16
|
+
if not module_path.exists():
|
|
17
|
+
return None
|
|
18
|
+
spec = importlib.util.spec_from_file_location("agilab_app_project_build_support", module_path)
|
|
19
|
+
if spec is None or spec.loader is None:
|
|
20
|
+
raise RuntimeError(f"Unable to load app project build support from {module_path}")
|
|
21
|
+
module = importlib.util.module_from_spec(spec)
|
|
22
|
+
spec.loader.exec_module(module)
|
|
23
|
+
return module
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _copy_payload(target_root: Path) -> None:
|
|
27
|
+
support = _load_build_support()
|
|
28
|
+
if support is None:
|
|
29
|
+
return
|
|
30
|
+
changed = support.copy_app_project_payload(APP_PROJECT, target_root)
|
|
31
|
+
for pyproject_path in changed:
|
|
32
|
+
print(f"[{PACKAGE_IMPORT}] sanitized packaged app manifest: {pyproject_path}")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class build_py(_build_py):
|
|
36
|
+
def run(self):
|
|
37
|
+
super().run()
|
|
38
|
+
_copy_payload(Path(self.build_lib) / PACKAGE_IMPORT / "project")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class sdist(_sdist):
|
|
42
|
+
def make_release_tree(self, base_dir, files):
|
|
43
|
+
super().make_release_tree(base_dir, files)
|
|
44
|
+
_copy_payload(Path(base_dir) / "src" / PACKAGE_IMPORT / "project")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
setup(cmdclass={"build_py": build_py, "sdist": sdist})
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Installed AGILAB app project provider for data_quality_gate_project."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
APP_SLUG = "data_quality_gate"
|
|
8
|
+
PROJECT_NAME = "data_quality_gate_project"
|
|
9
|
+
PACKAGE_NAME = "agi-app-data-quality-gate"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def package_root() -> Path:
|
|
13
|
+
return Path(__file__).resolve().parent
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def project_root() -> Path:
|
|
17
|
+
source_root = Path(__file__).resolve().parents[4] / "apps" / "builtin" / PROJECT_NAME
|
|
18
|
+
if source_root.exists():
|
|
19
|
+
return source_root
|
|
20
|
+
return package_root() / "project" / PROJECT_NAME
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def metadata() -> dict[str, str]:
|
|
24
|
+
return {
|
|
25
|
+
"slug": APP_SLUG,
|
|
26
|
+
"project": PROJECT_NAME,
|
|
27
|
+
"package": PACKAGE_NAME,
|
|
28
|
+
"project_root": str(project_root()),
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
__all__ = ["APP_SLUG", "PACKAGE_NAME", "PROJECT_NAME", "metadata", "package_root", "project_root"]
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Data Quality Gate Project
|
|
2
|
+
|
|
3
|
+
`data_quality_gate_project` is a built-in AGILAB app for a production-adjacent
|
|
4
|
+
data contract and drift gate. It turns the common "is this candidate dataset
|
|
5
|
+
safe to promote?" review into a deterministic run with machine-readable
|
|
6
|
+
evidence, a human report, and a clear gate decision.
|
|
7
|
+
|
|
8
|
+
## Purpose
|
|
9
|
+
|
|
10
|
+
Use this project when you want a fast, understandable proof that AGILAB can
|
|
11
|
+
protect an AI/ML workflow before model training or promotion. The app generates
|
|
12
|
+
a baseline dataset and a candidate dataset, validates the contract, measures
|
|
13
|
+
quality and drift, then writes a gate decision that can be reviewed or wired
|
|
14
|
+
into a later CI/promotion step.
|
|
15
|
+
|
|
16
|
+
## What You Learn
|
|
17
|
+
|
|
18
|
+
The first run shows how AGILAB turns a data-readiness question into replayable
|
|
19
|
+
evidence rather than a spreadsheet note. You see the app produce source data,
|
|
20
|
+
profile both sides of the comparison, apply a contract, score drift, and write a
|
|
21
|
+
decision card that names the failing or passing gate. It is a compact example of
|
|
22
|
+
how an experiment workbench can protect a downstream model workflow before
|
|
23
|
+
training begins.
|
|
24
|
+
|
|
25
|
+
## Run In AGILAB
|
|
26
|
+
|
|
27
|
+
Select `data_quality_gate_project`, then open `ORCHESTRATE`. Keep the default
|
|
28
|
+
arguments for the first run, click `INSTALL`, then click `RUN`.
|
|
29
|
+
|
|
30
|
+
The default configuration creates a deterministic candidate dataset with a
|
|
31
|
+
small business distribution shift. The run should complete locally and write
|
|
32
|
+
the data quality evidence under `data_quality_gate/evidence`.
|
|
33
|
+
|
|
34
|
+
To gate your own data, place two CSV files under the AGILAB share and set
|
|
35
|
+
`baseline_csv` plus `candidate_csv` to their relative paths. Optional
|
|
36
|
+
`contract_json` and `thresholds_json` files can override the default column
|
|
37
|
+
contract and promotion thresholds without editing Python code.
|
|
38
|
+
|
|
39
|
+
## Expected Inputs
|
|
40
|
+
|
|
41
|
+
No external data, API key, cloud service, notebook, model registry, or LLM is
|
|
42
|
+
required for the first run. The app can also read user-provided baseline and
|
|
43
|
+
candidate CSV files from the AGILAB share. Contract JSON accepts:
|
|
44
|
+
|
|
45
|
+
- `columns`: mapping from column name to `{kind, role, required, drift}`.
|
|
46
|
+
- `allow_unexpected_columns`: whether extra candidate columns are accepted.
|
|
47
|
+
- `target_column`, `identifier_columns`, and `leakage_name_patterns`.
|
|
48
|
+
- `thresholds`: optional overrides for PSI, KS, null-rate, duplicate-rate, row
|
|
49
|
+
count, mean-shift, and category-delta thresholds.
|
|
50
|
+
|
|
51
|
+
## Expected Outputs
|
|
52
|
+
|
|
53
|
+
The worker writes:
|
|
54
|
+
|
|
55
|
+
- `baseline.csv`
|
|
56
|
+
- `candidate.csv`
|
|
57
|
+
- `baseline_profile.json`
|
|
58
|
+
- `candidate_profile.json`
|
|
59
|
+
- `data_contract.json`
|
|
60
|
+
- `drift_metrics.csv`
|
|
61
|
+
- `gate_decision.json`
|
|
62
|
+
- `decision_card.json`
|
|
63
|
+
- `data_quality_dashboard.html`
|
|
64
|
+
- `input_sources.json`
|
|
65
|
+
- `data_quality_report.md`
|
|
66
|
+
- `run_manifest.json`
|
|
67
|
+
- `data_quality_gate_summary.json`
|
|
68
|
+
|
|
69
|
+
The same evidence bundle is mirrored under the app analysis export directory so
|
|
70
|
+
generic artifact readers can inspect it later.
|
|
71
|
+
|
|
72
|
+
## Change One Thing
|
|
73
|
+
|
|
74
|
+
After the default run works, change only one thing:
|
|
75
|
+
|
|
76
|
+
- Raise or lower `drift_strength` to see the synthetic decision move.
|
|
77
|
+
- Or set `baseline_csv` and `candidate_csv` to your own share-relative files.
|
|
78
|
+
- Or set `thresholds_json` to tighten/relax the gate without code changes.
|
|
79
|
+
|
|
80
|
+
Keep `seed=2026` for synthetic comparisons so artifact deltas remain easy to
|
|
81
|
+
explain.
|
|
82
|
+
|
|
83
|
+
## Example Quality Plan
|
|
84
|
+
|
|
85
|
+
- Review artifact: Review `data_quality_report.md` and `gate_decision.json` first; they explain why a dataset is allowed, warned, or blocked before model work starts.
|
|
86
|
+
- Practice change: Change one threshold or one missing-value count in the seeded input and confirm the gate moves from pass to warn or fail with an actionable reason.
|
|
87
|
+
- Quality check: A mature run leaves a stable gate report, a concise summary, and no hidden dependency on private data or external services.
|
|
88
|
+
|
|
89
|
+
## Troubleshooting
|
|
90
|
+
|
|
91
|
+
If custom CSV inputs fail, first run the defaults again to confirm the app and
|
|
92
|
+
worker install are healthy. Then check that `baseline_csv`, `candidate_csv`,
|
|
93
|
+
`contract_json`, and `thresholds_json` are relative to the AGILAB share, not to
|
|
94
|
+
the repository checkout. Contract errors usually mean a required column is
|
|
95
|
+
missing, a numeric column was parsed as text, or a threshold override used a name
|
|
96
|
+
that is not present in the generated `data_contract.json`.
|
|
97
|
+
|
|
98
|
+
## Scope
|
|
99
|
+
|
|
100
|
+
This app is a deterministic data-quality and drift gate example. It is not a
|
|
101
|
+
full data observability platform, feature store, model registry, or production
|
|
102
|
+
governance system. Its job is to make one candidate dataset review reproducible,
|
|
103
|
+
portable, and evidence-backed before another system takes ownership.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[[stages]]
|
|
2
|
+
id = "build_candidate_data"
|
|
3
|
+
label = "Load or build baseline and candidate data"
|
|
4
|
+
kind = "data"
|
|
5
|
+
produces = ["baseline.csv", "candidate.csv", "input_sources.json"]
|
|
6
|
+
M = "user CSV inputs when provided, otherwise deterministic seeded tabular generator"
|
|
7
|
+
|
|
8
|
+
[[stages]]
|
|
9
|
+
id = "validate_contract"
|
|
10
|
+
label = "Validate data contract"
|
|
11
|
+
kind = "validation"
|
|
12
|
+
depends_on = ["build_candidate_data"]
|
|
13
|
+
produces = ["data_contract.json", "baseline_profile.json", "candidate_profile.json"]
|
|
14
|
+
M = "configurable schema, type, null, duplicate, and leakage checks"
|
|
15
|
+
|
|
16
|
+
[[stages]]
|
|
17
|
+
id = "measure_drift"
|
|
18
|
+
label = "Measure drift"
|
|
19
|
+
kind = "evidence"
|
|
20
|
+
depends_on = ["validate_contract"]
|
|
21
|
+
produces = ["drift_metrics.csv"]
|
|
22
|
+
M = "PSI, KS statistic, mean shift, and category delta"
|
|
23
|
+
|
|
24
|
+
[[stages]]
|
|
25
|
+
id = "write_gate_decision"
|
|
26
|
+
label = "Write gate decision"
|
|
27
|
+
kind = "decision"
|
|
28
|
+
depends_on = ["measure_drift"]
|
|
29
|
+
produces = ["gate_decision.json", "decision_card.json", "data_quality_dashboard.html", "run_manifest.json", "data_quality_report.md"]
|
|
30
|
+
M = "AGILAB data-quality evidence writer"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
digraph data_quality_gate_project {
|
|
2
|
+
rankdir=LR;
|
|
3
|
+
node [shape=box, style="rounded"];
|
|
4
|
+
|
|
5
|
+
build_candidate_data [label="Load/build baseline + candidate"];
|
|
6
|
+
validate_contract [label="Validate contract"];
|
|
7
|
+
measure_drift [label="Measure drift"];
|
|
8
|
+
write_gate_decision [label="Write decision + dashboard"];
|
|
9
|
+
|
|
10
|
+
build_candidate_data -> validate_contract;
|
|
11
|
+
validate_contract -> measure_drift;
|
|
12
|
+
measure_drift -> write_gate_decision;
|
|
13
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "data_quality_gate_project"
|
|
3
|
+
version = "2026.05.30.post1"
|
|
4
|
+
description = "Built-in AGILAB data contract and drift gate app with promotion evidence"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Jean-Pierre Morard" }
|
|
9
|
+
]
|
|
10
|
+
dependencies = ["agi-env>=2026.05.31", "agi-node>=2026.05.31", "pandas>=2.3.0,<4", "pydantic>=2.11,<2.13", "streamlit>=1.57,<1.58"]
|
|
11
|
+
|
|
12
|
+
[project.urls]
|
|
13
|
+
Documentation = "https://thalesgroup.github.io/agilab"
|
|
14
|
+
Source = "https://github.com/ThalesGroup/agilab/tree/main/src/agilab/apps/builtin/data_quality_gate_project"
|
|
15
|
+
Issues = "https://github.com/ThalesGroup/agilab/issues"
|
|
16
|
+
Homepage = "https://github.com/ThalesGroup/agilab"
|
|
17
|
+
Repository = "https://github.com/ThalesGroup/agilab"
|
|
18
|
+
Discussions = "https://github.com/ThalesGroup/agilab/discussions"
|
|
19
|
+
Changelog = "https://github.com/ThalesGroup/agilab/releases"
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["setuptools"]
|
|
23
|
+
build-backend = "setuptools.build_meta"
|