agent-ci-verify 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_ci_verify-0.1.0/LICENSE +21 -0
- agent_ci_verify-0.1.0/PKG-INFO +165 -0
- agent_ci_verify-0.1.0/README.md +133 -0
- agent_ci_verify-0.1.0/pyproject.toml +64 -0
- agent_ci_verify-0.1.0/setup.cfg +4 -0
- agent_ci_verify-0.1.0/src/agent_ci/__init__.py +3 -0
- agent_ci_verify-0.1.0/src/agent_ci/checkers/__init__.py +28 -0
- agent_ci_verify-0.1.0/src/agent_ci/checkers/base.py +5 -0
- agent_ci_verify-0.1.0/src/agent_ci/checkers/diff.py +162 -0
- agent_ci_verify-0.1.0/src/agent_ci/checkers/fact.py +300 -0
- agent_ci_verify-0.1.0/src/agent_ci/checkers/schema.py +210 -0
- agent_ci_verify-0.1.0/src/agent_ci/cli.py +156 -0
- agent_ci_verify-0.1.0/src/agent_ci/config.py +94 -0
- agent_ci_verify-0.1.0/src/agent_ci/pipeline.py +44 -0
- agent_ci_verify-0.1.0/src/agent_ci/types.py +82 -0
- agent_ci_verify-0.1.0/src/agent_ci_verify.egg-info/PKG-INFO +165 -0
- agent_ci_verify-0.1.0/src/agent_ci_verify.egg-info/SOURCES.txt +23 -0
- agent_ci_verify-0.1.0/src/agent_ci_verify.egg-info/dependency_links.txt +1 -0
- agent_ci_verify-0.1.0/src/agent_ci_verify.egg-info/entry_points.txt +2 -0
- agent_ci_verify-0.1.0/src/agent_ci_verify.egg-info/requires.txt +16 -0
- agent_ci_verify-0.1.0/src/agent_ci_verify.egg-info/top_level.txt +1 -0
- agent_ci_verify-0.1.0/tests/test_diff_checker.py +122 -0
- agent_ci_verify-0.1.0/tests/test_fact_checker.py +211 -0
- agent_ci_verify-0.1.0/tests/test_pipeline.py +85 -0
- agent_ci_verify-0.1.0/tests/test_schema_checker.py +134 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Lewis-404
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-ci-verify
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CI/CD verification pipeline for AI agent outputs — fact check, schema validation, diff verification
|
|
5
|
+
Author: Lewis-404
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Lewis-404/agent-ci
|
|
8
|
+
Project-URL: Repository, https://github.com/Lewis-404/agent-ci.git
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: pyyaml>=6.0
|
|
18
|
+
Requires-Dist: jsonschema>=4.20
|
|
19
|
+
Requires-Dist: httpx>=0.27
|
|
20
|
+
Requires-Dist: rich>=13.0
|
|
21
|
+
Requires-Dist: click>=8.1
|
|
22
|
+
Provides-Extra: llm
|
|
23
|
+
Requires-Dist: openai>=1.0; extra == "llm"
|
|
24
|
+
Requires-Dist: litellm>=1.0; extra == "llm"
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
|
|
29
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
30
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
# agent-ci
|
|
34
|
+
|
|
35
|
+
> CI/CD verification pipeline for AI agent outputs.
|
|
36
|
+
> **Don't trust your agent's output — verify it.**
|
|
37
|
+
|
|
38
|
+
[](https://github.com/Lewis-404/agent-ci/actions/workflows/ci.yml)
|
|
39
|
+
[](https://pypi.org/project/agent-ci/)
|
|
40
|
+
[](https://pypi.org/project/agent-ci/)
|
|
41
|
+
[](LICENSE)
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Why agent-ci?
|
|
46
|
+
|
|
47
|
+
AI agents are entering production, but **no one can answer "can I trust this output?"**
|
|
48
|
+
|
|
49
|
+
Existing tools are all "eval libraries" — you import them and write tests yourself. That's self-review, not independent verification.
|
|
50
|
+
|
|
51
|
+
**agent-ci is your agent's CI/CD pipeline** — plug it in, and every agent output goes through an independent verification layer before it reaches your users.
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install agent-ci-verify
|
|
57
|
+
agent-ci ./agent-output/
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
agent-ci v0.1.0
|
|
62
|
+
Output dir: ./agent-output/
|
|
63
|
+
Checkers: schema, fact, diff
|
|
64
|
+
|
|
65
|
+
📋 Schema Checker
|
|
66
|
+
┏━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
67
|
+
┃ ✅ │ json_valid │ ┃
|
|
68
|
+
┃ ✅ │ yaml_valid │ ┃
|
|
69
|
+
┃ ✅ │ security_scan │ No secrets detected ┃
|
|
70
|
+
┗━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
|
|
71
|
+
|
|
72
|
+
🔍 Fact Checker
|
|
73
|
+
┏━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
74
|
+
┃ ✅ │ fact:file_count │ 1 files for '*.json' ┃
|
|
75
|
+
┃ ✅ │ fact:content_contains│ 'success' found in result.json ┃
|
|
76
|
+
┗━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
|
|
77
|
+
|
|
78
|
+
╭────────────────────────────────── Verdict ────────────────────────────────╮
|
|
79
|
+
│ ✅ PASS │
|
|
80
|
+
╰───────────────────────────────────────────────────────────────────────────╯
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Three Verification Layers
|
|
84
|
+
|
|
85
|
+
| Layer | What it checks | Example |
|
|
86
|
+
|-------|---------------|---------|
|
|
87
|
+
| **Schema** | Format, structure, security | Valid JSON? API key leaked? Required files present? |
|
|
88
|
+
| **Fact** | File existence, API reconciliation, LLM judging | Agent claimed `result.json` exists — does it? API returned 200? |
|
|
89
|
+
| **Diff** | Regression detection, semantic drift | Output changed vs baseline? Similarity below threshold? |
|
|
90
|
+
|
|
91
|
+
## Configuration
|
|
92
|
+
|
|
93
|
+
Drop `.agent-ci.yaml` in your agent project root:
|
|
94
|
+
|
|
95
|
+
```yaml
|
|
96
|
+
pipeline:
|
|
97
|
+
enabled_checkers: [schema, fact, diff]
|
|
98
|
+
fail_fast: false
|
|
99
|
+
|
|
100
|
+
schema:
|
|
101
|
+
security:
|
|
102
|
+
enabled: true
|
|
103
|
+
required_files:
|
|
104
|
+
- "output/result.json"
|
|
105
|
+
json_schemas:
|
|
106
|
+
schemas/output.schema.json: "output/**/*.json"
|
|
107
|
+
|
|
108
|
+
fact:
|
|
109
|
+
files:
|
|
110
|
+
- pattern: "output/**/*.json"
|
|
111
|
+
expected_count: 1
|
|
112
|
+
min_size_bytes: 10
|
|
113
|
+
content_checks:
|
|
114
|
+
- type: contains
|
|
115
|
+
value: "success"
|
|
116
|
+
- type: not_contains
|
|
117
|
+
value: "error"
|
|
118
|
+
api:
|
|
119
|
+
- endpoint: "https://api.example.com/health"
|
|
120
|
+
expected_status: 200
|
|
121
|
+
llm_judge:
|
|
122
|
+
- file: "output/answer.md"
|
|
123
|
+
rubric: "Is the answer factually correct?"
|
|
124
|
+
model: "gpt-4o-mini"
|
|
125
|
+
|
|
126
|
+
diff:
|
|
127
|
+
baseline: "./baseline-output/"
|
|
128
|
+
semantic_threshold: 0.7
|
|
129
|
+
max_changed_files: 5
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Security Scanning
|
|
133
|
+
|
|
134
|
+
Built-in patterns detect:
|
|
135
|
+
- AWS Access Keys (`AKIA...`)
|
|
136
|
+
- GitHub Tokens (`ghp_...`)
|
|
137
|
+
- OpenAI API Keys (`sk-proj-...`)
|
|
138
|
+
- JWT Tokens
|
|
139
|
+
- Private Keys (RSA, EC, DSA, OpenSSH)
|
|
140
|
+
- Password/Secret assignments
|
|
141
|
+
|
|
142
|
+
## CI Integration
|
|
143
|
+
|
|
144
|
+
```yaml
|
|
145
|
+
# .github/workflows/agent-check.yml
|
|
146
|
+
- name: Verify agent output
|
|
147
|
+
run: |
|
|
148
|
+
pip install agent-ci
|
|
149
|
+
agent-ci ./output/
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Development
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
git clone https://github.com/Lewis-404/agent-ci.git
|
|
156
|
+
cd agent-ci
|
|
157
|
+
python -m venv .venv
|
|
158
|
+
source .venv/bin/activate
|
|
159
|
+
pip install -e ".[dev]"
|
|
160
|
+
pytest tests/ -v
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## License
|
|
164
|
+
|
|
165
|
+
MIT — see [LICENSE](./LICENSE)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# agent-ci
|
|
2
|
+
|
|
3
|
+
> CI/CD verification pipeline for AI agent outputs.
|
|
4
|
+
> **Don't trust your agent's output — verify it.**
|
|
5
|
+
|
|
6
|
+
[](https://github.com/Lewis-404/agent-ci/actions/workflows/ci.yml)
|
|
7
|
+
[](https://pypi.org/project/agent-ci/)
|
|
8
|
+
[](https://pypi.org/project/agent-ci/)
|
|
9
|
+
[](LICENSE)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Why agent-ci?
|
|
14
|
+
|
|
15
|
+
AI agents are entering production, but **no one can answer "can I trust this output?"**
|
|
16
|
+
|
|
17
|
+
Existing tools are all "eval libraries" — you import them and write tests yourself. That's self-review, not independent verification.
|
|
18
|
+
|
|
19
|
+
**agent-ci is your agent's CI/CD pipeline** — plug it in, and every agent output goes through an independent verification layer before it reaches your users.
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install agent-ci-verify
|
|
25
|
+
agent-ci ./agent-output/
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
agent-ci v0.1.0
|
|
30
|
+
Output dir: ./agent-output/
|
|
31
|
+
Checkers: schema, fact, diff
|
|
32
|
+
|
|
33
|
+
📋 Schema Checker
|
|
34
|
+
┏━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
35
|
+
┃ ✅ │ json_valid │ ┃
|
|
36
|
+
┃ ✅ │ yaml_valid │ ┃
|
|
37
|
+
┃ ✅ │ security_scan │ No secrets detected ┃
|
|
38
|
+
┗━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
|
|
39
|
+
|
|
40
|
+
🔍 Fact Checker
|
|
41
|
+
┏━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
42
|
+
┃ ✅ │ fact:file_count │ 1 files for '*.json' ┃
|
|
43
|
+
┃ ✅ │ fact:content_contains│ 'success' found in result.json ┃
|
|
44
|
+
┗━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
|
|
45
|
+
|
|
46
|
+
╭────────────────────────────────── Verdict ────────────────────────────────╮
|
|
47
|
+
│ ✅ PASS │
|
|
48
|
+
╰───────────────────────────────────────────────────────────────────────────╯
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Three Verification Layers
|
|
52
|
+
|
|
53
|
+
| Layer | What it checks | Example |
|
|
54
|
+
|-------|---------------|---------|
|
|
55
|
+
| **Schema** | Format, structure, security | Valid JSON? API key leaked? Required files present? |
|
|
56
|
+
| **Fact** | File existence, API reconciliation, LLM judging | Agent claimed `result.json` exists — does it? API returned 200? |
|
|
57
|
+
| **Diff** | Regression detection, semantic drift | Output changed vs baseline? Similarity below threshold? |
|
|
58
|
+
|
|
59
|
+
## Configuration
|
|
60
|
+
|
|
61
|
+
Drop `.agent-ci.yaml` in your agent project root:
|
|
62
|
+
|
|
63
|
+
```yaml
|
|
64
|
+
pipeline:
|
|
65
|
+
enabled_checkers: [schema, fact, diff]
|
|
66
|
+
fail_fast: false
|
|
67
|
+
|
|
68
|
+
schema:
|
|
69
|
+
security:
|
|
70
|
+
enabled: true
|
|
71
|
+
required_files:
|
|
72
|
+
- "output/result.json"
|
|
73
|
+
json_schemas:
|
|
74
|
+
schemas/output.schema.json: "output/**/*.json"
|
|
75
|
+
|
|
76
|
+
fact:
|
|
77
|
+
files:
|
|
78
|
+
- pattern: "output/**/*.json"
|
|
79
|
+
expected_count: 1
|
|
80
|
+
min_size_bytes: 10
|
|
81
|
+
content_checks:
|
|
82
|
+
- type: contains
|
|
83
|
+
value: "success"
|
|
84
|
+
- type: not_contains
|
|
85
|
+
value: "error"
|
|
86
|
+
api:
|
|
87
|
+
- endpoint: "https://api.example.com/health"
|
|
88
|
+
expected_status: 200
|
|
89
|
+
llm_judge:
|
|
90
|
+
- file: "output/answer.md"
|
|
91
|
+
rubric: "Is the answer factually correct?"
|
|
92
|
+
model: "gpt-4o-mini"
|
|
93
|
+
|
|
94
|
+
diff:
|
|
95
|
+
baseline: "./baseline-output/"
|
|
96
|
+
semantic_threshold: 0.7
|
|
97
|
+
max_changed_files: 5
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Security Scanning
|
|
101
|
+
|
|
102
|
+
Built-in patterns detect:
|
|
103
|
+
- AWS Access Keys (`AKIA...`)
|
|
104
|
+
- GitHub Tokens (`ghp_...`)
|
|
105
|
+
- OpenAI API Keys (`sk-proj-...`)
|
|
106
|
+
- JWT Tokens
|
|
107
|
+
- Private Keys (RSA, EC, DSA, OpenSSH)
|
|
108
|
+
- Password/Secret assignments
|
|
109
|
+
|
|
110
|
+
## CI Integration
|
|
111
|
+
|
|
112
|
+
```yaml
|
|
113
|
+
# .github/workflows/agent-check.yml
|
|
114
|
+
- name: Verify agent output
|
|
115
|
+
run: |
|
|
116
|
+
pip install agent-ci
|
|
117
|
+
agent-ci ./output/
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Development
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
git clone https://github.com/Lewis-404/agent-ci.git
|
|
124
|
+
cd agent-ci
|
|
125
|
+
python -m venv .venv
|
|
126
|
+
source .venv/bin/activate
|
|
127
|
+
pip install -e ".[dev]"
|
|
128
|
+
pytest tests/ -v
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## License
|
|
132
|
+
|
|
133
|
+
MIT — see [LICENSE](./LICENSE)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[tool.setuptools.packages.find]
|
|
6
|
+
where = ["src"]
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "agent-ci-verify"
|
|
10
|
+
version = "0.1.0"
|
|
11
|
+
description = "CI/CD verification pipeline for AI agent outputs — fact check, schema validation, diff verification"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
license = "MIT"
|
|
14
|
+
license-files = ["LICENSE"]
|
|
15
|
+
requires-python = ">=3.10"
|
|
16
|
+
authors = [
|
|
17
|
+
{ name = "Lewis-404" },
|
|
18
|
+
]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 3 - Alpha",
|
|
21
|
+
"Intended Audience :: Developers",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
dependencies = [
|
|
28
|
+
"pyyaml>=6.0",
|
|
29
|
+
"jsonschema>=4.20",
|
|
30
|
+
"httpx>=0.27",
|
|
31
|
+
"rich>=13.0",
|
|
32
|
+
"click>=8.1",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
llm = [
|
|
37
|
+
"openai>=1.0",
|
|
38
|
+
"litellm>=1.0",
|
|
39
|
+
]
|
|
40
|
+
dev = [
|
|
41
|
+
"pytest>=8.0",
|
|
42
|
+
"pytest-cov>=5.0",
|
|
43
|
+
"pytest-asyncio>=0.24",
|
|
44
|
+
"ruff>=0.4",
|
|
45
|
+
"mypy>=1.8",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
agent-ci = "agent_ci.cli:main"
|
|
50
|
+
|
|
51
|
+
[project.urls]
|
|
52
|
+
Homepage = "https://github.com/Lewis-404/agent-ci"
|
|
53
|
+
Repository = "https://github.com/Lewis-404/agent-ci.git"
|
|
54
|
+
|
|
55
|
+
[tool.ruff]
|
|
56
|
+
line-length = 100
|
|
57
|
+
target-version = "py310"
|
|
58
|
+
|
|
59
|
+
[tool.ruff.lint]
|
|
60
|
+
select = ["E", "F", "I", "N", "W", "UP", "B", "SIM"]
|
|
61
|
+
|
|
62
|
+
[tool.pytest.ini_options]
|
|
63
|
+
testpaths = ["tests"]
|
|
64
|
+
addopts = "-v --tb=short"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Checkers package — verification checkers for agent outputs."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from agent_ci.types import CheckerReport
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseChecker(ABC):
|
|
11
|
+
"""Abstract base for all verification checkers."""
|
|
12
|
+
|
|
13
|
+
name: str = "base"
|
|
14
|
+
|
|
15
|
+
def __init__(self, config: dict[str, Any] | None = None):
|
|
16
|
+
self.config = config or {}
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
async def verify(self, output_dir: Path) -> CheckerReport:
|
|
20
|
+
"""Run all checks against the given output directory."""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
def _resolve_path(self, output_dir: Path, pattern: str) -> list[Path]:
|
|
24
|
+
"""Glob-resolve a pattern relative to output_dir."""
|
|
25
|
+
return sorted(output_dir.glob(pattern))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
__all__ = ["BaseChecker"]
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Diff Checker — compares agent output against baseline to detect regressions."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from agent_ci.checkers.base import BaseChecker
|
|
7
|
+
from agent_ci.types import CheckResult, CheckerReport, Severity
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DiffChecker(BaseChecker):
|
|
11
|
+
"""Compares current agent output against a baseline for drift, regression, and anomalies."""
|
|
12
|
+
|
|
13
|
+
name = "diff"
|
|
14
|
+
|
|
15
|
+
async def verify(self, output_dir: Path) -> CheckerReport:
|
|
16
|
+
report = CheckerReport(checker_name=self.name)
|
|
17
|
+
config = self.config.get("diff", {})
|
|
18
|
+
baseline_dir = config.get("baseline")
|
|
19
|
+
if not baseline_dir:
|
|
20
|
+
report.checks.append(CheckResult(
|
|
21
|
+
checker=self.name, check_name="diff",
|
|
22
|
+
severity=Severity.WARN,
|
|
23
|
+
message="No baseline directory configured — skipping diff verification",
|
|
24
|
+
detail="Set 'diff.baseline' in .agent-ci.yaml to enable diff checks.",
|
|
25
|
+
))
|
|
26
|
+
return report
|
|
27
|
+
|
|
28
|
+
baseline = Path(baseline_dir)
|
|
29
|
+
if not baseline.exists():
|
|
30
|
+
report.checks.append(CheckResult(
|
|
31
|
+
checker=self.name, check_name="diff",
|
|
32
|
+
severity=Severity.FAIL,
|
|
33
|
+
message=f"Baseline directory not found: {baseline}",
|
|
34
|
+
))
|
|
35
|
+
return report
|
|
36
|
+
|
|
37
|
+
# Collect all text files from both directories
|
|
38
|
+
text_exts = {".json", ".yaml", ".yml", ".txt", ".md", ".py", ".js",
|
|
39
|
+
".ts", ".go", ".csv", ".html", ".xml", ".toml"}
|
|
40
|
+
|
|
41
|
+
current_files = {f.relative_to(output_dir): f
|
|
42
|
+
for f in output_dir.rglob("*")
|
|
43
|
+
if f.is_file() and f.suffix in text_exts}
|
|
44
|
+
baseline_files = {f.relative_to(baseline): f
|
|
45
|
+
for f in baseline.rglob("*")
|
|
46
|
+
if f.is_file() and f.suffix in text_exts}
|
|
47
|
+
|
|
48
|
+
max_changed = config.get("max_changed_files")
|
|
49
|
+
max_added = config.get("max_added_files")
|
|
50
|
+
max_removed = config.get("max_removed_files")
|
|
51
|
+
semantic_threshold = config.get("semantic_threshold", 0.7)
|
|
52
|
+
|
|
53
|
+
# 1. New files (in current but not baseline)
|
|
54
|
+
added = set(current_files) - set(baseline_files)
|
|
55
|
+
for fpath in sorted(added):
|
|
56
|
+
severity = Severity.FAIL if max_added and len(added) > max_added else Severity.WARN
|
|
57
|
+
report.checks.append(CheckResult(
|
|
58
|
+
checker=self.name, check_name="diff:added",
|
|
59
|
+
severity=severity,
|
|
60
|
+
message=f"New file: {fpath}",
|
|
61
|
+
))
|
|
62
|
+
if not added:
|
|
63
|
+
report.checks.append(CheckResult(
|
|
64
|
+
checker=self.name, check_name="diff:added",
|
|
65
|
+
severity=Severity.PASS,
|
|
66
|
+
message="No new files detected",
|
|
67
|
+
))
|
|
68
|
+
|
|
69
|
+
# 2. Removed files (in baseline but not current)
|
|
70
|
+
removed = set(baseline_files) - set(current_files)
|
|
71
|
+
for fpath in sorted(removed):
|
|
72
|
+
severity = Severity.FAIL if max_removed and len(removed) > max_removed else Severity.WARN
|
|
73
|
+
report.checks.append(CheckResult(
|
|
74
|
+
checker=self.name, check_name="diff:removed",
|
|
75
|
+
severity=severity,
|
|
76
|
+
message=f"Missing file (was in baseline): {fpath}",
|
|
77
|
+
))
|
|
78
|
+
if not removed:
|
|
79
|
+
report.checks.append(CheckResult(
|
|
80
|
+
checker=self.name, check_name="diff:removed",
|
|
81
|
+
severity=Severity.PASS,
|
|
82
|
+
message="No files removed since baseline",
|
|
83
|
+
))
|
|
84
|
+
|
|
85
|
+
# 3. Changed files
|
|
86
|
+
common = set(current_files) & set(baseline_files)
|
|
87
|
+
changed_count = 0
|
|
88
|
+
for fpath in sorted(common):
|
|
89
|
+
current_content = current_files[fpath].read_text(encoding="utf-8")
|
|
90
|
+
baseline_content = baseline_files[fpath].read_text(encoding="utf-8")
|
|
91
|
+
|
|
92
|
+
if current_content != baseline_content:
|
|
93
|
+
changed_count += 1
|
|
94
|
+
similarity = self._text_similarity(baseline_content, current_content)
|
|
95
|
+
|
|
96
|
+
severity = Severity.PASS
|
|
97
|
+
if similarity < 0.5:
|
|
98
|
+
severity = Severity.FAIL
|
|
99
|
+
elif similarity < semantic_threshold:
|
|
100
|
+
severity = Severity.WARN
|
|
101
|
+
|
|
102
|
+
report.checks.append(CheckResult(
|
|
103
|
+
checker=self.name, check_name="diff:changed",
|
|
104
|
+
severity=severity,
|
|
105
|
+
message=f"Changed: {fpath} (similarity: {similarity:.1%})",
|
|
106
|
+
detail=self._generate_diff(baseline_content, current_content, fpath),
|
|
107
|
+
file_path=str(fpath),
|
|
108
|
+
))
|
|
109
|
+
|
|
110
|
+
if changed_count == 0:
|
|
111
|
+
report.checks.append(CheckResult(
|
|
112
|
+
checker=self.name, check_name="diff:changed",
|
|
113
|
+
severity=Severity.PASS,
|
|
114
|
+
message="No files changed since baseline",
|
|
115
|
+
))
|
|
116
|
+
|
|
117
|
+
# 4. Threshold check
|
|
118
|
+
if max_changed and changed_count > max_changed:
|
|
119
|
+
report.checks.append(CheckResult(
|
|
120
|
+
checker=self.name, check_name="diff:threshold",
|
|
121
|
+
severity=Severity.FAIL,
|
|
122
|
+
message=f"Changed files ({changed_count}) exceed max ({max_changed})",
|
|
123
|
+
))
|
|
124
|
+
|
|
125
|
+
return report
|
|
126
|
+
|
|
127
|
+
# ── Similarity ─────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
@staticmethod
|
|
130
|
+
def _text_similarity(text_a: str, text_b: str) -> float:
|
|
131
|
+
"""Simple token-overlap similarity (Jaccard on word tokens)."""
|
|
132
|
+
if not text_a and not text_b:
|
|
133
|
+
return 1.0
|
|
134
|
+
if not text_a or not text_b:
|
|
135
|
+
return 0.0
|
|
136
|
+
tokens_a = set(text_a.lower().split())
|
|
137
|
+
tokens_b = set(text_b.lower().split())
|
|
138
|
+
intersection = tokens_a & tokens_b
|
|
139
|
+
union = tokens_a | tokens_b
|
|
140
|
+
return len(intersection) / len(union) if union else 0.0
|
|
141
|
+
|
|
142
|
+
# ── Diff generation ────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def _generate_diff(before: str, after: str, relpath: Path,
|
|
146
|
+
context_lines: int = 3) -> str:
|
|
147
|
+
"""Generate a unified diff between two strings, capped for report size."""
|
|
148
|
+
import difflib
|
|
149
|
+
|
|
150
|
+
diff = list(difflib.unified_diff(
|
|
151
|
+
before.splitlines(keepends=True),
|
|
152
|
+
after.splitlines(keepends=True),
|
|
153
|
+
fromfile=f"baseline/{relpath}",
|
|
154
|
+
tofile=f"current/{relpath}",
|
|
155
|
+
n=context_lines,
|
|
156
|
+
))
|
|
157
|
+
if not diff:
|
|
158
|
+
return "(binary or identical)"
|
|
159
|
+
# Cap at 50 lines to avoid huge reports
|
|
160
|
+
if len(diff) > 50:
|
|
161
|
+
diff = diff[:47] + ["... (truncated)\n"]
|
|
162
|
+
return "".join(diff)
|