ai-code-quality-auditor 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_code_quality_auditor-0.1.0.dist-info/METADATA +148 -0
- ai_code_quality_auditor-0.1.0.dist-info/RECORD +39 -0
- ai_code_quality_auditor-0.1.0.dist-info/WHEEL +5 -0
- ai_code_quality_auditor-0.1.0.dist-info/entry_points.txt +2 -0
- ai_code_quality_auditor-0.1.0.dist-info/licenses/LICENSE +21 -0
- ai_code_quality_auditor-0.1.0.dist-info/top_level.txt +1 -0
- auditor/__init__.py +0 -0
- auditor/adapters/__init__.py +0 -0
- auditor/adapters/antigravity_adapter.py +139 -0
- auditor/adapters/base_adapter.py +14 -0
- auditor/adapters/claude_code_adapter.py +151 -0
- auditor/adapters/cursor_agent_adapter.py +124 -0
- auditor/adapters/human_control_adapter.py +101 -0
- auditor/adapters/human_control_recorder.py +97 -0
- auditor/adapters/replit_agent_adapter.py +140 -0
- auditor/analyzers/__init__.py +0 -0
- auditor/analyzers/complexity_analyzer.py +34 -0
- auditor/analyzers/duplication_analyzer.py +55 -0
- auditor/analyzers/hallucination_analyzer.py +23 -0
- auditor/analyzers/keystroke_analyzer.py +12 -0
- auditor/analyzers/manifest_deriver.py +62 -0
- auditor/analyzers/security_analyzer.py +63 -0
- auditor/core/__init__.py +0 -0
- auditor/core/cli.py +52 -0
- auditor/core/config.py +21 -0
- auditor/core/experiment.py +149 -0
- auditor/core/logger.py +13 -0
- auditor/core/runner.py +52 -0
- auditor/dashboard/__init__.py +0 -0
- auditor/dashboard/app.py +191 -0
- auditor/dashboard/templates/index.html +139 -0
- auditor/dashboard/templates/report.html +484 -0
- auditor/governance/__init__.py +0 -0
- auditor/governance/compliance_checker.py +167 -0
- auditor/models/__init__.py +0 -0
- auditor/models/audit_result.py +29 -0
- auditor/reporting/__init__.py +0 -0
- auditor/reporting/csv_reporter.py +21 -0
- auditor/reporting/json_reporter.py +8 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ai-code-quality-auditor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Empirical Safety Harness for agentic AI coding systems. Scores AI-generated code on 5 metrics across 5 vendor conditions against one fixed spec.
|
|
5
|
+
Author-email: Dominic Rume <dominicrume@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Dominic Rume
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/dominicrume/NEW-enterprise-ai-code-quality-auditor
|
|
29
|
+
Project-URL: Repository, https://github.com/dominicrume/NEW-enterprise-ai-code-quality-auditor
|
|
30
|
+
Project-URL: Issues, https://github.com/dominicrume/NEW-enterprise-ai-code-quality-auditor/issues
|
|
31
|
+
Project-URL: Documentation, https://github.com/dominicrume/NEW-enterprise-ai-code-quality-auditor/tree/main/docs
|
|
32
|
+
Keywords: ai,code-quality,llm,agents,evaluation,claude-code,cursor,sonarcloud,dissertation
|
|
33
|
+
Classifier: Development Status :: 4 - Beta
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: Intended Audience :: Science/Research
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Operating System :: OS Independent
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
41
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
42
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
43
|
+
Requires-Python: >=3.11
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
License-File: LICENSE
|
|
46
|
+
Requires-Dist: pydantic>=2.0
|
|
47
|
+
Requires-Dist: pyyaml>=6.0
|
|
48
|
+
Requires-Dist: click>=8.1
|
|
49
|
+
Requires-Dist: rich>=13.0
|
|
50
|
+
Requires-Dist: radon>=6.0
|
|
51
|
+
Requires-Dist: python-dotenv>=1.0
|
|
52
|
+
Requires-Dist: bandit>=1.8
|
|
53
|
+
Provides-Extra: dev
|
|
54
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
55
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
56
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
57
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
58
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
59
|
+
Provides-Extra: notebook
|
|
60
|
+
Requires-Dist: pandas>=2.0; extra == "notebook"
|
|
61
|
+
Requires-Dist: scipy>=1.10; extra == "notebook"
|
|
62
|
+
Requires-Dist: matplotlib>=3.7; extra == "notebook"
|
|
63
|
+
Requires-Dist: jupyter>=1.0; extra == "notebook"
|
|
64
|
+
Provides-Extra: dashboard
|
|
65
|
+
Requires-Dist: flask>=3.0; extra == "dashboard"
|
|
66
|
+
Provides-Extra: recorder
|
|
67
|
+
Requires-Dist: pynput>=1.7; extra == "recorder"
|
|
68
|
+
Provides-Extra: all
|
|
69
|
+
Requires-Dist: pytest>=8.0; extra == "all"
|
|
70
|
+
Requires-Dist: pandas>=2.0; extra == "all"
|
|
71
|
+
Requires-Dist: scipy>=1.10; extra == "all"
|
|
72
|
+
Requires-Dist: matplotlib>=3.7; extra == "all"
|
|
73
|
+
Requires-Dist: jupyter>=1.0; extra == "all"
|
|
74
|
+
Requires-Dist: flask>=3.0; extra == "all"
|
|
75
|
+
Requires-Dist: pynput>=1.7; extra == "all"
|
|
76
|
+
Dynamic: license-file
|
|
77
|
+
|
|
78
|
+
# AI Code Quality Auditor — the Referee Tool
|
|
79
|
+
|
|
80
|
+
[](https://github.com/dominicrume/NEW-enterprise-ai-code-quality-auditor/actions/workflows/ci.yml)
|
|
81
|
+
[](https://pypi.org/project/ai-code-quality-auditor/)
|
|
82
|
+
[](LICENSE)
|
|
83
|
+
[](https://auditor-dashboard.fly.dev)
|
|
84
|
+
|
|
85
|
+
> An empirical Safety Harness for agentic AI coding systems.
|
|
86
|
+
> Quantifies where AI-assisted development fails at governance, security,
|
|
87
|
+
> and ethical alignment — *before* the code reaches production.
|
|
88
|
+
|
|
89
|
+
**🟢 Try it in 30 seconds:**
|
|
90
|
+
```bash
|
|
91
|
+
pipx install ai-code-quality-auditor
|
|
92
|
+
auditor --help
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
**🚀 Or wire it into your CI in 6 lines** (`.github/workflows/auditor.yml`):
|
|
96
|
+
```yaml
|
|
97
|
+
jobs:
|
|
98
|
+
audit:
|
|
99
|
+
runs-on: ubuntu-latest
|
|
100
|
+
steps:
|
|
101
|
+
- uses: actions/checkout@v4
|
|
102
|
+
- uses: dominicrume/NEW-enterprise-ai-code-quality-auditor@main
|
|
103
|
+
with:
|
|
104
|
+
run-id: ${{ github.run_id }}
|
|
105
|
+
conditions: claude_code,cursor_agent
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**📊 Live dashboard:** https://auditor-dashboard.fly.dev *(pending deploy — see below)*
|
|
109
|
+
|
|
110
|
+
This is the experimental instrument for the MSc dissertation
|
|
111
|
+
**"AI-Assisted Coding Assessment Tool: Evaluating LLM Performance, Governance,
|
|
112
|
+
and Security in an Agent Education System"** (Aston University, MSc AI &
|
|
113
|
+
Business Strategy). The same instrument is the working prototype for the
|
|
114
|
+
PhD extension at the Aston-Capgemini Centre of Excellence for Enterprise AI.
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## What it does
|
|
119
|
+
Given a fixed specification (the "spec box"), the Auditor:
|
|
120
|
+
1. Runs five experimental conditions against the same task (human control,
|
|
121
|
+
visualisation→Claude→Replit, Cursor IDE, autonomous agent).
|
|
122
|
+
2. Captures every output and every interaction event.
|
|
123
|
+
3. Scores each result on five empirical metrics: security vulnerability
|
|
124
|
+
density, cyclomatic complexity, code duplication, hallucination frequency
|
|
125
|
+
(features outside spec), and keystroke dynamics (correction frequency).
|
|
126
|
+
4. Emits CSV/JSON reports for statistical comparison.
|
|
127
|
+
|
|
128
|
+
## Quick start
|
|
129
|
+
```bash
|
|
130
|
+
cp .env.example .env
|
|
131
|
+
pip install -e .
|
|
132
|
+
auditor run --spec specs/agent_education_system.yaml --workflow human_control
|
|
133
|
+
auditor report --out data/reports/
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Read in this order
|
|
137
|
+
1. `docs/ARCHITECTURE.md` — how the pieces fit
|
|
138
|
+
2. `docs/METHODOLOGY.md` — how an experiment is run
|
|
139
|
+
3. `docs/METRICS.md` — what each metric means and how it's computed
|
|
140
|
+
4. `docs/ETHICS.md` — GDPR, synthetic data, academic integrity
|
|
141
|
+
5. `docs/DISSERTATION_LINKAGE.md` — which folder serves which proposal section
|
|
142
|
+
6. `docs/ROADMAP.md` — the PhD extension (API security + enterprise risk)
|
|
143
|
+
|
|
144
|
+
## Principles
|
|
145
|
+
- One analyzer per metric. One adapter per AI workflow. Single responsibility.
|
|
146
|
+
- The spec is data, not code — externalised in `specs/` for reproducibility.
|
|
147
|
+
- Synthetic data only. No PII, no proprietary corporate records, ever.
|
|
148
|
+
- Every analyzer has a test. Green tests = trustable experiment.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
ai_code_quality_auditor-0.1.0.dist-info/licenses/LICENSE,sha256=D9Qe54txI-WgF2yA83uPVryQLnTT2fB05ySsIkg3-4Q,1069
|
|
2
|
+
auditor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
auditor/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
auditor/adapters/antigravity_adapter.py,sha256=yYiUQZM6lh8rd49QlKwvvQ3LFpRwr8D9K1gpd95VGhU,5504
|
|
5
|
+
auditor/adapters/base_adapter.py,sha256=oG3oHBb2CHW1o7Gu2_H7i-4Su8BAqsiXbD4tvGGRyRU,423
|
|
6
|
+
auditor/adapters/claude_code_adapter.py,sha256=YqbTW0HpkhUiYNXEJc4mfyaccmIiaJbLhsRHKyWaEUQ,5916
|
|
7
|
+
auditor/adapters/cursor_agent_adapter.py,sha256=h9-6k_z1fmaBBx_ffyDynYMq112YHM2bZE7WidAoEHs,4777
|
|
8
|
+
auditor/adapters/human_control_adapter.py,sha256=ry4GUSaZxbtUtzfIr_PnFS6FQn7JeZg5M-bD5_yTOS0,4051
|
|
9
|
+
auditor/adapters/human_control_recorder.py,sha256=I8UM2RyvQz_v2E_HugF6IJOYtfRhI0Iq7elq-gN8Ktw,3166
|
|
10
|
+
auditor/adapters/replit_agent_adapter.py,sha256=n951L0o5s6mZrGK9BvedufBHJ6Ew8evhN6z7zbF9Xdw,5579
|
|
11
|
+
auditor/analyzers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
auditor/analyzers/complexity_analyzer.py,sha256=NyEeezTbPG8G3YQuyJSXAcA7ffG4imYrf6XxzMIW7hY,1168
|
|
13
|
+
auditor/analyzers/duplication_analyzer.py,sha256=Kk9JRa9oIXMI6lU8tFBCVERduLwqF2eN2AP6sFstbeY,2098
|
|
14
|
+
auditor/analyzers/hallucination_analyzer.py,sha256=0fd0JZjBNFWzFeFychsCj9RZQO5Ipbo-sVLuk6FdoG0,977
|
|
15
|
+
auditor/analyzers/keystroke_analyzer.py,sha256=G-yq3XAxe-5AwlJdzfYkDpAPAbPyhSlGmQHnp1rp9Nk,516
|
|
16
|
+
auditor/analyzers/manifest_deriver.py,sha256=0H_gKZpLIWBTZj6bTyrMREwsqQ95_PXb9u4PYZomRPE,2398
|
|
17
|
+
auditor/analyzers/security_analyzer.py,sha256=t7oQY2GdqHALCcMuacKy_fR_I9h0zX8HA1ljUH-Gjkk,2397
|
|
18
|
+
auditor/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
auditor/core/cli.py,sha256=0y2VXeAVeEwyc-Z7Lh50B0vlLX793Jnl-_pNKyjAQyw,2408
|
|
20
|
+
auditor/core/config.py,sha256=6julYSA9kD2jw_b5P50aDkHdiDAcPL5qRqvOheQHdK8,581
|
|
21
|
+
auditor/core/experiment.py,sha256=rl0QVSAdg7Ksvz8rEcosxOZ-T1bh-_LFW2mpiH4OQDI,5500
|
|
22
|
+
auditor/core/logger.py,sha256=zL4aflL6h35I0FRJNVKAcnhc89oGDUzU2tVvP643F5E,446
|
|
23
|
+
auditor/core/runner.py,sha256=7YblOlIrxggv018h7_u6_VEDQIP6ca8VOuMDDVLfI_o,1488
|
|
24
|
+
auditor/dashboard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
auditor/dashboard/app.py,sha256=yz_aYrCxqtER3v67YEPSIzWZ1uRiHAS2Hdv_NCleHJg,6239
|
|
26
|
+
auditor/dashboard/templates/index.html,sha256=OvVnd4CpZ2kCYfmTfZspGjsrmr2cHiSZB0WktbWtORQ,6292
|
|
27
|
+
auditor/dashboard/templates/report.html,sha256=0TydCVQIeHkopZOg_z7tvQrvxu5JB41bshX9vbJnA78,21357
|
|
28
|
+
auditor/governance/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
+
auditor/governance/compliance_checker.py,sha256=fomnTEicVo2fbIec6yno8t0c4LbntbwyZVb0eSA3Y4w,5969
|
|
30
|
+
auditor/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
auditor/models/audit_result.py,sha256=EB3RRSAsY1c1p-DXRV4zq0aMJ9rFbz5MrQbOek-Png4,645
|
|
32
|
+
auditor/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
+
auditor/reporting/csv_reporter.py,sha256=e6ztPRC8zmrVppDHt_bVTxvzNgEDbwfkAAvyqJo6SbM,727
|
|
34
|
+
auditor/reporting/json_reporter.py,sha256=35FS2meVK4g0wErKqfBYe75P3AMNvQMWcgWX0E4-pTw,278
|
|
35
|
+
ai_code_quality_auditor-0.1.0.dist-info/METADATA,sha256=gaxc6z6F8u4kGHaTNpLU0lyeqgmQpKS4umgx3fCIBS8,6899
|
|
36
|
+
ai_code_quality_auditor-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
37
|
+
ai_code_quality_auditor-0.1.0.dist-info/entry_points.txt,sha256=Gxvra13kW5Mg0ra2d7b6_NdEySSjSCNs-hcOS7_NoZM,50
|
|
38
|
+
ai_code_quality_auditor-0.1.0.dist-info/top_level.txt,sha256=8Z_-H3ebzZbnAMChd_M4BQ8AZ7MiD2LX-gwa6p58Xls,8
|
|
39
|
+
ai_code_quality_auditor-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dominic Rume
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
auditor
|
auditor/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""antigravity adapter — vendor: Google Gemini Antigravity.
|
|
2
|
+
|
|
3
|
+
Drives the Antigravity agent CLI against the spec, captures its streamed
|
|
4
|
+
agent events, and reads the produced codebase from the work_dir.
|
|
5
|
+
|
|
6
|
+
Capture contract: see docs/METHODOLOGY.md. Antigravity is agentic, so every
|
|
7
|
+
captured event maps to ``agent_action``.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import shutil
|
|
13
|
+
import subprocess
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Callable, Iterable
|
|
16
|
+
|
|
17
|
+
from auditor.adapters.base_adapter import BaseAdapter
|
|
18
|
+
from auditor.core.config import settings
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_CODE_SUFFIXES = {".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs", ".java",
|
|
22
|
+
".rb", ".sql", ".yaml", ".yml", ".toml", ".md"}
|
|
23
|
+
|
|
24
|
+
Runner = Callable[[str, Path], Iterable[dict]]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _default_runner(prompt: str, work_dir: Path, cli: str = "antigravity",
|
|
28
|
+
timeout: int = 600) -> list[dict]:
|
|
29
|
+
proc = subprocess.run(
|
|
30
|
+
[cli, "run", "--prompt", prompt, "--format", "jsonl"],
|
|
31
|
+
cwd=str(work_dir), capture_output=True, text=True,
|
|
32
|
+
timeout=timeout, check=False,
|
|
33
|
+
)
|
|
34
|
+
events: list[dict] = []
|
|
35
|
+
for line in proc.stdout.splitlines():
|
|
36
|
+
line = line.strip()
|
|
37
|
+
if not line:
|
|
38
|
+
continue
|
|
39
|
+
try:
|
|
40
|
+
events.append(json.loads(line))
|
|
41
|
+
except json.JSONDecodeError:
|
|
42
|
+
continue
|
|
43
|
+
return events
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _build_prompt(spec: dict) -> str:
|
|
47
|
+
return (
|
|
48
|
+
"Build the following specification in the current working directory. "
|
|
49
|
+
"Implement only the listed features.\n\n"
|
|
50
|
+
f"SPEC:\n{json.dumps(spec, indent=2)}\n"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _to_contract_events(raw_events: Iterable[dict]) -> list[dict]:
|
|
55
|
+
out: list[dict] = []
|
|
56
|
+
for ev in raw_events:
|
|
57
|
+
out.append({
|
|
58
|
+
"type": "agent_action",
|
|
59
|
+
"subtype": ev.get("kind") or ev.get("type"),
|
|
60
|
+
"detail": ev.get("phase") or ev.get("status"),
|
|
61
|
+
"tool": ev.get("tool") or ev.get("action"),
|
|
62
|
+
})
|
|
63
|
+
return out
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _load_codebase(work_dir: Path) -> dict:
|
|
67
|
+
work_dir = Path(work_dir)
|
|
68
|
+
if not work_dir.is_dir():
|
|
69
|
+
raise FileNotFoundError(f"work_dir not found: {work_dir}")
|
|
70
|
+
_EXCLUDE_DIRS = {".venv", "venv", "env", "__pycache__", "node_modules",
|
|
71
|
+
".pytest_cache", ".git", "site-packages", "dist", "build",
|
|
72
|
+
".mypy_cache", ".ruff_cache", "egg-info"}
|
|
73
|
+
files: dict[str, str] = {}
|
|
74
|
+
for path in sorted(work_dir.rglob("*")):
|
|
75
|
+
if not path.is_file() or path.name == "manifest.json":
|
|
76
|
+
continue
|
|
77
|
+
if path.suffix not in _CODE_SUFFIXES:
|
|
78
|
+
continue
|
|
79
|
+
if any(part in _EXCLUDE_DIRS or part.endswith(".egg-info")
|
|
80
|
+
for part in path.relative_to(work_dir).parts):
|
|
81
|
+
continue
|
|
82
|
+
files[path.relative_to(work_dir).as_posix()] = path.read_text(encoding="utf-8")
|
|
83
|
+
manifest_path = work_dir / "manifest.json"
|
|
84
|
+
manifest = json.loads(manifest_path.read_text()) if manifest_path.exists() else []
|
|
85
|
+
return {"files": files, "manifest": manifest}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class AntigravityAdapter(BaseAdapter):
|
|
89
|
+
name = "antigravity"
|
|
90
|
+
|
|
91
|
+
def __init__(self, work_dir: str | Path, cli: str = "antigravity",
|
|
92
|
+
run_id: str | None = None, raw_root: str | Path = "data/raw",
|
|
93
|
+
runner: Runner | None = None, timeout: int = 600,
|
|
94
|
+
replay_dir: str | Path | None = None):
|
|
95
|
+
"""
|
|
96
|
+
replay_dir: if given, skip the CLI and load codebase from this folder
|
|
97
|
+
plus an interaction log from ``<replay_dir>/log.json``. Use this to
|
|
98
|
+
score sessions captured manually in the Antigravity web IDE.
|
|
99
|
+
"""
|
|
100
|
+
self.work_dir = Path(work_dir)
|
|
101
|
+
self.cli = cli
|
|
102
|
+
self.run_id = run_id or settings.run_id
|
|
103
|
+
self.raw_root = Path(raw_root)
|
|
104
|
+
self.timeout = timeout
|
|
105
|
+
self.replay_dir = Path(replay_dir) if replay_dir else None
|
|
106
|
+
self._runner: Runner = runner or (
|
|
107
|
+
lambda prompt, wd: _default_runner(prompt, wd, cli=self.cli, timeout=self.timeout)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def _persist(self, codebase, interaction_log, raw_events) -> Path:
|
|
111
|
+
dest = self.raw_root / self.run_id / self.name
|
|
112
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
113
|
+
(dest / "codebase.json").write_text(json.dumps(codebase, indent=2))
|
|
114
|
+
(dest / "interaction_log.json").write_text(json.dumps(interaction_log, indent=2))
|
|
115
|
+
(dest / "raw_stream.json").write_text(json.dumps(raw_events, indent=2))
|
|
116
|
+
code_copy = dest / "code"
|
|
117
|
+
if code_copy.exists():
|
|
118
|
+
shutil.rmtree(code_copy)
|
|
119
|
+
source = self.replay_dir if self.replay_dir is not None else self.work_dir
|
|
120
|
+
if source.exists():
|
|
121
|
+
shutil.copytree(source, code_copy)
|
|
122
|
+
return dest
|
|
123
|
+
|
|
124
|
+
def generate(self, spec: dict) -> tuple[dict, list[dict]]:
|
|
125
|
+
if self.replay_dir is not None:
|
|
126
|
+
codebase = _load_codebase(self.replay_dir)
|
|
127
|
+
log_path = self.replay_dir / "log.json"
|
|
128
|
+
interaction_log = (
|
|
129
|
+
json.loads(log_path.read_text()) if log_path.exists() else []
|
|
130
|
+
)
|
|
131
|
+
self._persist(codebase, interaction_log, raw_events=[])
|
|
132
|
+
return codebase, interaction_log
|
|
133
|
+
self.work_dir.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
prompt = _build_prompt(spec)
|
|
135
|
+
raw_events = list(self._runner(prompt, self.work_dir))
|
|
136
|
+
interaction_log = _to_contract_events(raw_events)
|
|
137
|
+
codebase = _load_codebase(self.work_dir)
|
|
138
|
+
self._persist(codebase, interaction_log, raw_events)
|
|
139
|
+
return codebase, interaction_log
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Base contract every AI workflow adapter implements."""
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BaseAdapter(ABC):
|
|
6
|
+
name: str
|
|
7
|
+
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def generate(self, spec: dict) -> tuple[dict, list[dict]]:
|
|
10
|
+
"""Return (codebase, interaction_log).
|
|
11
|
+
|
|
12
|
+
codebase: {"files": {path: content}, "manifest": [feature_ids...]}
|
|
13
|
+
interaction_log: list of events with at minimum a "type" key.
|
|
14
|
+
"""
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""claude_code adapter — vendor: Anthropic Claude Code (see docs/METHODOLOGY.md).
|
|
2
|
+
|
|
3
|
+
Drives the Claude Code CLI in non-interactive ("-p") mode against the spec,
|
|
4
|
+
captures the streamed agent events, and reads back the final codebase from
|
|
5
|
+
the working directory the CLI wrote into.
|
|
6
|
+
|
|
7
|
+
All vendor-specific glue lives in this file; the engine remains neutral.
|
|
8
|
+
|
|
9
|
+
Capture contract (docs/METHODOLOGY.md):
|
|
10
|
+
codebase : {"files": {path: content}, "manifest": [feature_ids]}
|
|
11
|
+
interaction_log : list of events, each with at minimum {"type": str}
|
|
12
|
+
where type ∈ {keystroke, backspace, delete, agent_action}.
|
|
13
|
+
|
|
14
|
+
Since Claude Code is an agentic system (no human keystrokes), every captured
|
|
15
|
+
event maps to ``agent_action`` with vendor-specific detail preserved in
|
|
16
|
+
sibling keys (``subtype``, ``tool``, etc.) for downstream forensics.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import shutil
|
|
22
|
+
import subprocess
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Callable, Iterable
|
|
25
|
+
|
|
26
|
+
from auditor.adapters.base_adapter import BaseAdapter
|
|
27
|
+
from auditor.core.config import settings
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
_CODE_SUFFIXES = {".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs", ".java",
|
|
31
|
+
".rb", ".sql", ".yaml", ".yml", ".toml", ".md"}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Runner = Callable[[str, Path], Iterable[dict]]
|
|
35
|
+
"""Signature: runner(prompt, work_dir) -> iterable of raw Claude Code events."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _default_runner(prompt: str, work_dir: Path, cli: str = "claude",
|
|
39
|
+
timeout: int = 600) -> list[dict]:
|
|
40
|
+
"""Invoke the real Claude Code CLI and parse its streaming JSON output."""
|
|
41
|
+
proc = subprocess.run(
|
|
42
|
+
[cli, "-p", prompt, "--output-format", "stream-json", "--verbose",
|
|
43
|
+
"--dangerously-skip-permissions"],
|
|
44
|
+
cwd=str(work_dir),
|
|
45
|
+
capture_output=True,
|
|
46
|
+
text=True,
|
|
47
|
+
timeout=timeout,
|
|
48
|
+
check=False,
|
|
49
|
+
)
|
|
50
|
+
events: list[dict] = []
|
|
51
|
+
for line in proc.stdout.splitlines():
|
|
52
|
+
line = line.strip()
|
|
53
|
+
if not line:
|
|
54
|
+
continue
|
|
55
|
+
try:
|
|
56
|
+
events.append(json.loads(line))
|
|
57
|
+
except json.JSONDecodeError:
|
|
58
|
+
continue
|
|
59
|
+
return events
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _build_prompt(spec: dict) -> str:
|
|
63
|
+
"""Render the spec dict into a single prompt string for the agent."""
|
|
64
|
+
return (
|
|
65
|
+
"You are implementing the following specification. Build the code in "
|
|
66
|
+
"the current working directory. Stay strictly within the listed "
|
|
67
|
+
"features — do not invent extras.\n\n"
|
|
68
|
+
f"SPEC:\n{json.dumps(spec, indent=2)}\n"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _to_contract_events(raw_events: Iterable[dict]) -> list[dict]:
|
|
73
|
+
"""Map every raw Claude Code event to a capture-contract event.
|
|
74
|
+
|
|
75
|
+
All Claude Code events are agent actions; we preserve the original
|
|
76
|
+
payload's identifying fields (``type``, ``subtype``, ``tool_name``) under
|
|
77
|
+
new keys so analyzers can interrogate detail without leaking the vendor
|
|
78
|
+
schema into the contract.
|
|
79
|
+
"""
|
|
80
|
+
out: list[dict] = []
|
|
81
|
+
for ev in raw_events:
|
|
82
|
+
out.append({
|
|
83
|
+
"type": "agent_action",
|
|
84
|
+
"subtype": ev.get("type"),
|
|
85
|
+
"detail": ev.get("subtype"),
|
|
86
|
+
"tool": ev.get("tool_name") or ev.get("name"),
|
|
87
|
+
})
|
|
88
|
+
return out
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _load_codebase(work_dir: Path) -> dict:
|
|
92
|
+
work_dir = Path(work_dir)
|
|
93
|
+
if not work_dir.is_dir():
|
|
94
|
+
raise FileNotFoundError(f"work_dir not found: {work_dir}")
|
|
95
|
+
_EXCLUDE_DIRS = {".venv", "venv", "env", "__pycache__", "node_modules",
|
|
96
|
+
".pytest_cache", ".git", "site-packages", "dist", "build",
|
|
97
|
+
".mypy_cache", ".ruff_cache", "egg-info"}
|
|
98
|
+
files: dict[str, str] = {}
|
|
99
|
+
for path in sorted(work_dir.rglob("*")):
|
|
100
|
+
if not path.is_file():
|
|
101
|
+
continue
|
|
102
|
+
if path.name == "manifest.json":
|
|
103
|
+
continue
|
|
104
|
+
if path.suffix not in _CODE_SUFFIXES:
|
|
105
|
+
continue
|
|
106
|
+
rel = path.relative_to(work_dir).as_posix()
|
|
107
|
+
if any(part in _EXCLUDE_DIRS or part.endswith(".egg-info")
|
|
108
|
+
for part in Path(rel).parts):
|
|
109
|
+
continue
|
|
110
|
+
files[rel] = path.read_text(encoding="utf-8")
|
|
111
|
+
manifest_path = work_dir / "manifest.json"
|
|
112
|
+
manifest = json.loads(manifest_path.read_text()) if manifest_path.exists() else []
|
|
113
|
+
return {"files": files, "manifest": manifest}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class ClaudeCodeAdapter(BaseAdapter):
|
|
117
|
+
name = "claude_code"
|
|
118
|
+
|
|
119
|
+
def __init__(self, work_dir: str | Path, cli: str = "claude",
|
|
120
|
+
run_id: str | None = None, raw_root: str | Path = "data/raw",
|
|
121
|
+
runner: Runner | None = None, timeout: int = 600):
|
|
122
|
+
self.work_dir = Path(work_dir)
|
|
123
|
+
self.cli = cli
|
|
124
|
+
self.run_id = run_id or settings.run_id
|
|
125
|
+
self.raw_root = Path(raw_root)
|
|
126
|
+
self.timeout = timeout
|
|
127
|
+
self._runner: Runner = runner or (
|
|
128
|
+
lambda prompt, wd: _default_runner(prompt, wd, cli=self.cli, timeout=self.timeout)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _persist(self, codebase: dict, interaction_log: list[dict],
|
|
132
|
+
raw_events: list[dict]) -> Path:
|
|
133
|
+
dest = self.raw_root / self.run_id / self.name
|
|
134
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
135
|
+
(dest / "codebase.json").write_text(json.dumps(codebase, indent=2))
|
|
136
|
+
(dest / "interaction_log.json").write_text(json.dumps(interaction_log, indent=2))
|
|
137
|
+
(dest / "raw_stream.json").write_text(json.dumps(raw_events, indent=2))
|
|
138
|
+
code_copy = dest / "code"
|
|
139
|
+
if code_copy.exists():
|
|
140
|
+
shutil.rmtree(code_copy)
|
|
141
|
+
shutil.copytree(self.work_dir, code_copy)
|
|
142
|
+
return dest
|
|
143
|
+
|
|
144
|
+
def generate(self, spec: dict) -> tuple[dict, list[dict]]:
|
|
145
|
+
self.work_dir.mkdir(parents=True, exist_ok=True)
|
|
146
|
+
prompt = _build_prompt(spec)
|
|
147
|
+
raw_events = list(self._runner(prompt, self.work_dir))
|
|
148
|
+
interaction_log = _to_contract_events(raw_events)
|
|
149
|
+
codebase = _load_codebase(self.work_dir)
|
|
150
|
+
self._persist(codebase, interaction_log, raw_events)
|
|
151
|
+
return codebase, interaction_log
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""cursor_agent adapter — vendor: Cursor (agent mode CLI).
|
|
2
|
+
|
|
3
|
+
Drives ``cursor-agent`` non-interactively against the spec, captures the
|
|
4
|
+
streamed agent events, and reads the produced codebase from the work_dir.
|
|
5
|
+
|
|
6
|
+
Capture contract: see docs/METHODOLOGY.md. Cursor is agentic, so every
|
|
7
|
+
captured event maps to ``agent_action`` with vendor detail preserved as
|
|
8
|
+
sibling keys (``subtype``, ``tool``).
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import shutil
|
|
14
|
+
import subprocess
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Callable, Iterable
|
|
17
|
+
|
|
18
|
+
from auditor.adapters.base_adapter import BaseAdapter
|
|
19
|
+
from auditor.core.config import settings
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_CODE_SUFFIXES = {".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs", ".java",
|
|
23
|
+
".rb", ".sql", ".yaml", ".yml", ".toml", ".md"}
|
|
24
|
+
|
|
25
|
+
Runner = Callable[[str, Path], Iterable[dict]]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _default_runner(prompt: str, work_dir: Path, cli: str = "cursor-agent",
|
|
29
|
+
timeout: int = 600) -> list[dict]:
|
|
30
|
+
proc = subprocess.run(
|
|
31
|
+
[cli, "-p", "--output-format", "stream-json", "--force",
|
|
32
|
+
"--model", "auto", prompt],
|
|
33
|
+
cwd=str(work_dir), capture_output=True, text=True,
|
|
34
|
+
timeout=timeout, check=False,
|
|
35
|
+
)
|
|
36
|
+
events: list[dict] = []
|
|
37
|
+
for line in proc.stdout.splitlines():
|
|
38
|
+
line = line.strip()
|
|
39
|
+
if not line:
|
|
40
|
+
continue
|
|
41
|
+
try:
|
|
42
|
+
events.append(json.loads(line))
|
|
43
|
+
except json.JSONDecodeError:
|
|
44
|
+
continue
|
|
45
|
+
return events
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _build_prompt(spec: dict) -> str:
|
|
49
|
+
return (
|
|
50
|
+
"Implement the specification below in the current working directory. "
|
|
51
|
+
"Do not introduce features outside the listed set.\n\n"
|
|
52
|
+
f"SPEC:\n{json.dumps(spec, indent=2)}\n"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _to_contract_events(raw_events: Iterable[dict]) -> list[dict]:
|
|
57
|
+
out: list[dict] = []
|
|
58
|
+
for ev in raw_events:
|
|
59
|
+
out.append({
|
|
60
|
+
"type": "agent_action",
|
|
61
|
+
"subtype": ev.get("type") or ev.get("event"),
|
|
62
|
+
"detail": ev.get("subtype") or ev.get("status"),
|
|
63
|
+
"tool": ev.get("tool") or ev.get("tool_name"),
|
|
64
|
+
})
|
|
65
|
+
return out
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _load_codebase(work_dir: Path) -> dict:
|
|
69
|
+
work_dir = Path(work_dir)
|
|
70
|
+
if not work_dir.is_dir():
|
|
71
|
+
raise FileNotFoundError(f"work_dir not found: {work_dir}")
|
|
72
|
+
_EXCLUDE_DIRS = {".venv", "venv", "env", "__pycache__", "node_modules",
|
|
73
|
+
".pytest_cache", ".git", "site-packages", "dist", "build",
|
|
74
|
+
".mypy_cache", ".ruff_cache", "egg-info"}
|
|
75
|
+
files: dict[str, str] = {}
|
|
76
|
+
for path in sorted(work_dir.rglob("*")):
|
|
77
|
+
if not path.is_file() or path.name == "manifest.json":
|
|
78
|
+
continue
|
|
79
|
+
if path.suffix not in _CODE_SUFFIXES:
|
|
80
|
+
continue
|
|
81
|
+
if any(part in _EXCLUDE_DIRS or part.endswith(".egg-info")
|
|
82
|
+
for part in path.relative_to(work_dir).parts):
|
|
83
|
+
continue
|
|
84
|
+
files[path.relative_to(work_dir).as_posix()] = path.read_text(encoding="utf-8")
|
|
85
|
+
manifest_path = work_dir / "manifest.json"
|
|
86
|
+
manifest = json.loads(manifest_path.read_text()) if manifest_path.exists() else []
|
|
87
|
+
return {"files": files, "manifest": manifest}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class CursorAgentAdapter(BaseAdapter):
|
|
91
|
+
name = "cursor_agent"
|
|
92
|
+
|
|
93
|
+
def __init__(self, work_dir: str | Path, cli: str = "cursor-agent",
|
|
94
|
+
run_id: str | None = None, raw_root: str | Path = "data/raw",
|
|
95
|
+
runner: Runner | None = None, timeout: int = 600):
|
|
96
|
+
self.work_dir = Path(work_dir)
|
|
97
|
+
self.cli = cli
|
|
98
|
+
self.run_id = run_id or settings.run_id
|
|
99
|
+
self.raw_root = Path(raw_root)
|
|
100
|
+
self.timeout = timeout
|
|
101
|
+
self._runner: Runner = runner or (
|
|
102
|
+
lambda prompt, wd: _default_runner(prompt, wd, cli=self.cli, timeout=self.timeout)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def _persist(self, codebase, interaction_log, raw_events) -> Path:
|
|
106
|
+
dest = self.raw_root / self.run_id / self.name
|
|
107
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
108
|
+
(dest / "codebase.json").write_text(json.dumps(codebase, indent=2))
|
|
109
|
+
(dest / "interaction_log.json").write_text(json.dumps(interaction_log, indent=2))
|
|
110
|
+
(dest / "raw_stream.json").write_text(json.dumps(raw_events, indent=2))
|
|
111
|
+
code_copy = dest / "code"
|
|
112
|
+
if code_copy.exists():
|
|
113
|
+
shutil.rmtree(code_copy)
|
|
114
|
+
shutil.copytree(self.work_dir, code_copy)
|
|
115
|
+
return dest
|
|
116
|
+
|
|
117
|
+
def generate(self, spec: dict) -> tuple[dict, list[dict]]:
|
|
118
|
+
self.work_dir.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
prompt = _build_prompt(spec)
|
|
120
|
+
raw_events = list(self._runner(prompt, self.work_dir))
|
|
121
|
+
interaction_log = _to_contract_events(raw_events)
|
|
122
|
+
codebase = _load_codebase(self.work_dir)
|
|
123
|
+
self._persist(codebase, interaction_log, raw_events)
|
|
124
|
+
return codebase, interaction_log
|