code-audit-validator 1.2.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_audit_validator-1.2.0/evals/code_audit_validator.egg-info → code_audit_validator-1.3.0}/PKG-INFO +21 -2
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/README.md +19 -8
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/evals/README.md +20 -1
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0/evals/code_audit_validator.egg-info}/PKG-INFO +21 -2
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/evals/code_audit_validator.py +286 -50
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/pyproject.toml +1 -1
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/LICENSE +0 -0
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/NOTICE +0 -0
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/evals/code_audit_validator.egg-info/SOURCES.txt +0 -0
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/evals/code_audit_validator.egg-info/dependency_links.txt +0 -0
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/evals/code_audit_validator.egg-info/entry_points.txt +0 -0
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/evals/code_audit_validator.egg-info/top_level.txt +0 -0
- {code_audit_validator-1.2.0 → code_audit_validator-1.3.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-audit-validator
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Deterministic conformance checker for AUDIT.md agent-audit outputs — CyberSkill code-audit-framework
|
|
5
5
|
Author-email: CyberSkill <info@cyberskill.world>
|
|
6
6
|
License:
|
|
@@ -256,8 +256,9 @@ python3 evals/validate.py --all # full suite, human output
|
|
|
256
256
|
python3 evals/validate.py --all --json # machine-readable
|
|
257
257
|
./evals/run-evals.sh --record # run + pin baseline to current AUDIT.md
|
|
258
258
|
python3 evals/validate.py --run <dir> # validate a real run's docs/ output
|
|
259
|
-
python3 evals/validate.py --run <dir> --report json # structured findings export (
|
|
259
|
+
python3 evals/validate.py --run <dir> --report json # structured findings export (schemas/report.v1.json)
|
|
260
260
|
python3 evals/validate.py --run <dir> --report sarif # GitHub code-scanning format
|
|
261
|
+
python3 evals/validate.py --aggregate r1.json r2.json # portfolio roll-up over report JSONs
|
|
261
262
|
python3 evals/scripts/retro-summary.py # retro scores per protocol version (did each release help?)
|
|
262
263
|
```
|
|
263
264
|
|
|
@@ -265,6 +266,24 @@ Point `--run` at the target repo root (or its `docs/`): if the target's
|
|
|
265
266
|
`AUDIT.md` is found, its CONFIG is preflighted and `PROTECTED_AREAS` is loaded
|
|
266
267
|
automatically; `--protected` extends it.
|
|
267
268
|
|
|
269
|
+
**Waivers.** A target repo may carry `docs/AUDIT-WAIVERS.yaml` — audit-trailed,
|
|
270
|
+
*expiring* suppressions (`code` + optional `file`/`match` + `reason` +
|
|
271
|
+
`approved_by` + mandatory ISO `expires`). A valid waiver suppresses the matched
|
|
272
|
+
violation and is reported separately; an expired or undated one un-suppresses
|
|
273
|
+
it AND flags the stale waiver (`WAIVER-EXPIRED`). This is the sanctioned
|
|
274
|
+
exception channel — eval fixtures, by contrast, may never be weakened.
|
|
275
|
+
|
|
276
|
+
**Parsing notes (precision boundaries, pinned by fixtures).** Tables inside
|
|
277
|
+
``` fences are raw evidence, never artifacts (G07/B19). Tables must use
|
|
278
|
+
leading-pipe GFM rows — the exact Phase 2 template shape; pipeless variants
|
|
279
|
+
read as nonconformant. Protected-area matching is case-insensitive substring —
|
|
280
|
+
keep CONFIG entries specific (`src/billing/`, not `src/`). Artifacts must be
|
|
281
|
+
UTF-8 and ≤ 10 MB (`MALFORMED-FILE` otherwise, never a crash).
|
|
282
|
+
|
|
283
|
+
**Version pinning.** The validator checks the *current* protocol's template.
|
|
284
|
+
Validating artifacts produced under an older protocol? Pin the matching tag
|
|
285
|
+
(validator and protocol release in lockstep: `v1.2.0` ↔ protocol v1.2.0).
|
|
286
|
+
|
|
268
287
|
## Adding a fixture
|
|
269
288
|
|
|
270
289
|
1. Create `evals/fixtures/<Gnn|Bnn>-<slug>/` with `fixture.yaml` + `docs/BACKLOG.md` (and `docs/HANDOFF.md` if relevant).
|
|
@@ -13,10 +13,10 @@ weakens a rule.
|
|
|
13
13
|
|
|
14
14
|
| | |
|
|
15
15
|
|---|---|
|
|
16
|
-
| Protocol | [`AUDIT.md`](./AUDIT.md) — current release **v1.
|
|
16
|
+
| Protocol | [`AUDIT.md`](./AUDIT.md) — current release **v1.3.0** |
|
|
17
17
|
| History | [`CHANGELOG.md`](./CHANGELOG.md) · immutable copies in [`improve/versions/`](./improve/versions/) |
|
|
18
18
|
| Self-improvement | [`improve/CRITIC.md`](./improve/CRITIC.md) — one evidenced change per cycle |
|
|
19
|
-
| Regression gate | [`evals/`](./evals/) — **
|
|
19
|
+
| Regression gate | [`evals/`](./evals/) — **34 fixtures, 34/34 green** at v1.3.0, stdlib-only Python; enforced in CI on every push |
|
|
20
20
|
| For agents | [`AGENTS.md`](./AGENTS.md) — machine-facing operating rules for this repo |
|
|
21
21
|
| License | [Apache-2.0](./LICENSE) · [`CONTRIBUTING.md`](./CONTRIBUTING.md) · [`SECURITY.md`](./SECURITY.md) |
|
|
22
22
|
|
|
@@ -152,8 +152,11 @@ the GitHub Action below. Re-running the same kickoff prompt resumes idempotently
|
|
|
152
152
|
**No clone needed — two distribution channels for step 3:**
|
|
153
153
|
|
|
154
154
|
```bash
|
|
155
|
-
#
|
|
156
|
-
|
|
155
|
+
# From PyPI (https://pypi.org/project/code-audit-validator/):
|
|
156
|
+
pipx install code-audit-validator # or: uvx code-audit-validate --run .
|
|
157
|
+
code-audit-validate --run . --report json
|
|
158
|
+
|
|
159
|
+
# Or straight from the repo (@v1 = floating major tag; pin a release tag for immutability):
|
|
157
160
|
uvx --from git+https://github.com/cyberskill-official/code-audit-framework@v1 \
|
|
158
161
|
code-audit-validate --run . --report json
|
|
159
162
|
```
|
|
@@ -166,8 +169,16 @@ uvx --from git+https://github.com/cyberskill-official/code-audit-framework@v1 \
|
|
|
166
169
|
report: json # optional; also writes audit-report.json
|
|
167
170
|
```
|
|
168
171
|
|
|
169
|
-
(The packaged entry point covers `--run`/`--report`; the fixture
|
|
170
|
-
`--all` stays repo-only, since fixtures ship with the repo, not the wheel.)
|
|
172
|
+
(The packaged entry point covers `--run`/`--report`/`--aggregate`; the fixture
|
|
173
|
+
suite `--all` stays repo-only, since fixtures ship with the repo, not the wheel.)
|
|
174
|
+
|
|
175
|
+
Two operational notes: accepted exceptions go in the target's
|
|
176
|
+
`docs/AUDIT-WAIVERS.yaml` — audit-trailed suppressions with a reason, an
|
|
177
|
+
approver, and a **mandatory expiry** (expired waivers re-raise the finding and
|
|
178
|
+
flag the stale waiver). And the validator is **offline by design**: stdlib-only,
|
|
179
|
+
no network calls, no telemetry — nothing about the audited codebase leaves the
|
|
180
|
+
machine, which makes it safe for air-gapped and regulated environments
|
|
181
|
+
(see [`COMPLIANCE.md`](./COMPLIANCE.md)).
|
|
171
182
|
|
|
172
183
|
**Improving the protocol itself, scripted the same way** (Job B in
|
|
173
184
|
[`AGENTS.md`](./AGENTS.md) — the file agents are pointed at once they're
|
|
@@ -215,7 +226,7 @@ regression-tested, and changed only with evidence.
|
|
|
215
226
|
improve/CRITIC.md ── ONE minimal change; PATCH/MINOR/MAJOR
|
|
216
227
|
│
|
|
217
228
|
▼
|
|
218
|
-
evals/validate.py --all ──
|
|
229
|
+
evals/validate.py --all ── 34 fixtures must stay green
|
|
219
230
|
│
|
|
220
231
|
▼
|
|
221
232
|
CHANGELOG.md + improve/versions/AUDIT-vX.Y.Z.md (immutable release)
|
|
@@ -262,7 +273,7 @@ Full evidence trail: [`CHANGELOG.md`](./CHANGELOG.md),
|
|
|
262
273
|
## The regression harness
|
|
263
274
|
|
|
264
275
|
```bash
|
|
265
|
-
python3 evals/validate.py --all #
|
|
276
|
+
python3 evals/validate.py --all # 34 fixtures: G* must pass, B* must trip
|
|
266
277
|
./evals/run-evals.sh --record # run + pin baseline.json to AUDIT.md's sha256
|
|
267
278
|
python3 evals/validate.py --run DIR # validate any real run's docs/ output
|
|
268
279
|
python3 evals/validate.py --run DIR --report json # structured findings export (or: sarif)
|
|
@@ -35,8 +35,9 @@ python3 evals/validate.py --all # full suite, human output
|
|
|
35
35
|
python3 evals/validate.py --all --json # machine-readable
|
|
36
36
|
./evals/run-evals.sh --record # run + pin baseline to current AUDIT.md
|
|
37
37
|
python3 evals/validate.py --run <dir> # validate a real run's docs/ output
|
|
38
|
-
python3 evals/validate.py --run <dir> --report json # structured findings export (
|
|
38
|
+
python3 evals/validate.py --run <dir> --report json # structured findings export (schemas/report.v1.json)
|
|
39
39
|
python3 evals/validate.py --run <dir> --report sarif # GitHub code-scanning format
|
|
40
|
+
python3 evals/validate.py --aggregate r1.json r2.json # portfolio roll-up over report JSONs
|
|
40
41
|
python3 evals/scripts/retro-summary.py # retro scores per protocol version (did each release help?)
|
|
41
42
|
```
|
|
42
43
|
|
|
@@ -44,6 +45,24 @@ Point `--run` at the target repo root (or its `docs/`): if the target's
|
|
|
44
45
|
`AUDIT.md` is found, its CONFIG is preflighted and `PROTECTED_AREAS` is loaded
|
|
45
46
|
automatically; `--protected` extends it.
|
|
46
47
|
|
|
48
|
+
**Waivers.** A target repo may carry `docs/AUDIT-WAIVERS.yaml` — audit-trailed,
|
|
49
|
+
*expiring* suppressions (`code` + optional `file`/`match` + `reason` +
|
|
50
|
+
`approved_by` + mandatory ISO `expires`). A valid waiver suppresses the matched
|
|
51
|
+
violation and is reported separately; an expired or undated one un-suppresses
|
|
52
|
+
it AND flags the stale waiver (`WAIVER-EXPIRED`). This is the sanctioned
|
|
53
|
+
exception channel — eval fixtures, by contrast, may never be weakened.
|
|
54
|
+
|
|
55
|
+
**Parsing notes (precision boundaries, pinned by fixtures).** Tables inside
|
|
56
|
+
``` fences are raw evidence, never artifacts (G07/B19). Tables must use
|
|
57
|
+
leading-pipe GFM rows — the exact Phase 2 template shape; pipeless variants
|
|
58
|
+
read as nonconformant. Protected-area matching is case-insensitive substring —
|
|
59
|
+
keep CONFIG entries specific (`src/billing/`, not `src/`). Artifacts must be
|
|
60
|
+
UTF-8 and ≤ 10 MB (`MALFORMED-FILE` otherwise, never a crash).
|
|
61
|
+
|
|
62
|
+
**Version pinning.** The validator checks the *current* protocol's template.
|
|
63
|
+
Validating artifacts produced under an older protocol? Pin the matching tag
|
|
64
|
+
(validator and protocol release in lockstep: `v1.2.0` ↔ protocol v1.2.0).
|
|
65
|
+
|
|
47
66
|
## Adding a fixture
|
|
48
67
|
|
|
49
68
|
1. Create `evals/fixtures/<Gnn|Bnn>-<slug>/` with `fixture.yaml` + `docs/BACKLOG.md` (and `docs/HANDOFF.md` if relevant).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-audit-validator
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Deterministic conformance checker for AUDIT.md agent-audit outputs — CyberSkill code-audit-framework
|
|
5
5
|
Author-email: CyberSkill <info@cyberskill.world>
|
|
6
6
|
License:
|
|
@@ -256,8 +256,9 @@ python3 evals/validate.py --all # full suite, human output
|
|
|
256
256
|
python3 evals/validate.py --all --json # machine-readable
|
|
257
257
|
./evals/run-evals.sh --record # run + pin baseline to current AUDIT.md
|
|
258
258
|
python3 evals/validate.py --run <dir> # validate a real run's docs/ output
|
|
259
|
-
python3 evals/validate.py --run <dir> --report json # structured findings export (
|
|
259
|
+
python3 evals/validate.py --run <dir> --report json # structured findings export (schemas/report.v1.json)
|
|
260
260
|
python3 evals/validate.py --run <dir> --report sarif # GitHub code-scanning format
|
|
261
|
+
python3 evals/validate.py --aggregate r1.json r2.json # portfolio roll-up over report JSONs
|
|
261
262
|
python3 evals/scripts/retro-summary.py # retro scores per protocol version (did each release help?)
|
|
262
263
|
```
|
|
263
264
|
|
|
@@ -265,6 +266,24 @@ Point `--run` at the target repo root (or its `docs/`): if the target's
|
|
|
265
266
|
`AUDIT.md` is found, its CONFIG is preflighted and `PROTECTED_AREAS` is loaded
|
|
266
267
|
automatically; `--protected` extends it.
|
|
267
268
|
|
|
269
|
+
**Waivers.** A target repo may carry `docs/AUDIT-WAIVERS.yaml` — audit-trailed,
|
|
270
|
+
*expiring* suppressions (`code` + optional `file`/`match` + `reason` +
|
|
271
|
+
`approved_by` + mandatory ISO `expires`). A valid waiver suppresses the matched
|
|
272
|
+
violation and is reported separately; an expired or undated one un-suppresses
|
|
273
|
+
it AND flags the stale waiver (`WAIVER-EXPIRED`). This is the sanctioned
|
|
274
|
+
exception channel — eval fixtures, by contrast, may never be weakened.
|
|
275
|
+
|
|
276
|
+
**Parsing notes (precision boundaries, pinned by fixtures).** Tables inside
|
|
277
|
+
``` fences are raw evidence, never artifacts (G07/B19). Tables must use
|
|
278
|
+
leading-pipe GFM rows — the exact Phase 2 template shape; pipeless variants
|
|
279
|
+
read as nonconformant. Protected-area matching is case-insensitive substring —
|
|
280
|
+
keep CONFIG entries specific (`src/billing/`, not `src/`). Artifacts must be
|
|
281
|
+
UTF-8 and ≤ 10 MB (`MALFORMED-FILE` otherwise, never a crash).
|
|
282
|
+
|
|
283
|
+
**Version pinning.** The validator checks the *current* protocol's template.
|
|
284
|
+
Validating artifacts produced under an older protocol? Pin the matching tag
|
|
285
|
+
(validator and protocol release in lockstep: `v1.2.0` ↔ protocol v1.2.0).
|
|
286
|
+
|
|
268
287
|
## Adding a fixture
|
|
269
288
|
|
|
270
289
|
1. Create `evals/fixtures/<Gnn|Bnn>-<slug>/` with `fixture.yaml` + `docs/BACKLOG.md` (and `docs/HANDOFF.md` if relevant).
|
|
@@ -29,8 +29,15 @@ machine-checkable subset of AUDIT.md's core rules:
|
|
|
29
29
|
unedited <placeholder> text (Phase 0 preflight)
|
|
30
30
|
CONFIG-BAD-ENUM MODE / DEPTH / BENCHMARK_MODE / SEVERITY_FLOOR outside its
|
|
31
31
|
allowed set (Phase 0 preflight)
|
|
32
|
+
MALFORMED-FILE artifact is not valid UTF-8 text or exceeds the size
|
|
33
|
+
ceiling — a verdict, never a traceback
|
|
34
|
+
WAIVER-EXPIRED a docs/AUDIT-WAIVERS.yaml entry matched a violation but is
|
|
35
|
+
expired/undated; the original violation stays active
|
|
32
36
|
|
|
33
37
|
A loop with zero findings is VALID (R7): absence of tasks is never a violation.
|
|
38
|
+
Waivers (docs/AUDIT-WAIVERS.yaml in the target repo) suppress matched
|
|
39
|
+
violations with an audit trail: code + reason + approved_by + expires (ISO
|
|
40
|
+
date, mandatory). Expired waivers un-suppress and are themselves flagged.
|
|
34
41
|
|
|
35
42
|
When the run directory (or its parent, if you point --run at docs/ itself)
|
|
36
43
|
contains the target's AUDIT.md, the CONFIG block is preflighted and
|
|
@@ -41,6 +48,7 @@ Usage:
|
|
|
41
48
|
python3 evals/validate.py --run <dir-containing-docs> [--protected p1,p2]
|
|
42
49
|
python3 evals/validate.py --run <dir> --report json # structured findings export
|
|
43
50
|
python3 evals/validate.py --run <dir> --report sarif # GitHub code-scanning format
|
|
51
|
+
python3 evals/validate.py --aggregate r1.json r2.json # portfolio roll-up of report JSONs
|
|
44
52
|
python3 evals/validate.py --all # run every fixture, compare to expectations
|
|
45
53
|
python3 evals/validate.py --all --json # machine-readable results
|
|
46
54
|
|
|
@@ -110,11 +118,25 @@ def split_cells(line: str):
|
|
|
110
118
|
|
|
111
119
|
|
|
112
120
|
def parse_tables(text: str):
|
|
113
|
-
"""Yield (header_cells, rows, end_line_idx) for every markdown table.
|
|
121
|
+
"""Yield (header_cells, rows, end_line_idx) for every markdown table.
|
|
122
|
+
|
|
123
|
+
Fence-aware (architect review F-1): R1 *requires* pasting raw tool output
|
|
124
|
+
into ``` fences, and that output may itself contain GFM-table-shaped lines
|
|
125
|
+
(markdown-emitting coverage/lint tools, `gh` CLI). Quoted lines inside a
|
|
126
|
+
fence are raw evidence, not run artifacts — they must neither trip
|
|
127
|
+
task/benchmark checks nor count toward template conformance.
|
|
128
|
+
`section_fences` keeps its own independent fence walk."""
|
|
114
129
|
lines = text.splitlines()
|
|
115
|
-
i = 0
|
|
130
|
+
i, in_fence = 0, False
|
|
116
131
|
while i < len(lines):
|
|
117
132
|
line = lines[i].strip()
|
|
133
|
+
if line.startswith("```"):
|
|
134
|
+
in_fence = not in_fence
|
|
135
|
+
i += 1
|
|
136
|
+
continue
|
|
137
|
+
if in_fence:
|
|
138
|
+
i += 1
|
|
139
|
+
continue
|
|
118
140
|
if line.startswith("|") and i + 1 < len(lines) and re.match(r"^\|[\s:|-]+\|?$", lines[i + 1].strip()):
|
|
119
141
|
header = split_cells(line)
|
|
120
142
|
rows, j = [], i + 2
|
|
@@ -214,11 +236,13 @@ def check_benchmark_like_table(header, rows, end_idx, text, violations, src, is_
|
|
|
214
236
|
fences = section_fences(text, end_idx)
|
|
215
237
|
if has_measured_row and not fences:
|
|
216
238
|
violations.append(("R1-NO-OUTPUT", src, "table has MEASURED/measured rows but no fenced raw-output block before next heading"))
|
|
217
|
-
# …and each measured row must be traceable to ITS verify command
|
|
239
|
+
# …and each measured row must be traceable to ITS verify command.
|
|
240
|
+
# Whitespace-normalized containment (architect review F-6): a long command
|
|
241
|
+
# re-wrapped across lines inside the fence is still the same command.
|
|
218
242
|
elif fences:
|
|
219
|
-
|
|
243
|
+
joined_ws = " ".join("\n".join(fences).split())
|
|
220
244
|
for metric, verify in measured_rows:
|
|
221
|
-
if verify and verify not in {"—", "-", ""} and verify not in
|
|
245
|
+
if verify and verify not in {"—", "-", ""} and " ".join(verify.split()) not in joined_ws:
|
|
222
246
|
violations.append(("R1-UNLINKED-OUTPUT", src, f"measured metric '{metric}': verify command '{verify}' appears in no fenced output block"))
|
|
223
247
|
|
|
224
248
|
|
|
@@ -242,14 +266,16 @@ def check_task_table(header, rows, violations, src, protected):
|
|
|
242
266
|
if status == "BLOCKED" and "root cause" not in " ".join(r).lower():
|
|
243
267
|
violations.append(("R6-NO-ROOTCAUSE", src, f"BLOCKED task '{tid}' has no 'Root cause:' note"))
|
|
244
268
|
if status == "DONE" and protected:
|
|
245
|
-
joined = " ".join(r)
|
|
269
|
+
joined = " ".join(r).casefold() # case-insensitive: src/Billing == src/billing (F-6)
|
|
246
270
|
for p in protected:
|
|
247
|
-
if p and p in joined:
|
|
271
|
+
if p and p.casefold() in joined:
|
|
248
272
|
violations.append(("R3-PROTECTED", src, f"DONE task '{tid}' touches protected path '{p}'"))
|
|
249
273
|
|
|
250
274
|
|
|
251
275
|
APPROVED_RE = re.compile(r"^Approved:\s*(.+)$", re.MULTILINE)
|
|
252
|
-
|
|
276
|
+
# `Mode:` may open the Scope line or follow another field (`Protocol: … | Mode: …`
|
|
277
|
+
# since v1.3.0) — match at line start or after a `|` separator, never mid-prose.
|
|
278
|
+
MODE_GATED_RE = re.compile(r"(?mi)(?:^|\|)\s*-?\s*Mode:\s*gated\b")
|
|
253
279
|
EXECUTED_STATUSES = {"DONE", "IN-PROGRESS", "BLOCKED"}
|
|
254
280
|
|
|
255
281
|
|
|
@@ -294,19 +320,30 @@ def check_secrets(text, violations, src):
|
|
|
294
320
|
violations.append(("R8-SECRET", src, f"unredacted {kind} matching '{m.group(0)[:12]}…'"))
|
|
295
321
|
|
|
296
322
|
|
|
297
|
-
MODE_LINE_RE = re.compile(r"(?mi)
|
|
323
|
+
MODE_LINE_RE = re.compile(r"(?mi)(?:^|\|)\s*-?\s*Mode:\s*\S+")
|
|
324
|
+
PROTO_LINE_RE = re.compile(r"(?mi)(?:^|\|)\s*-?\s*Protocol:\s*v(\d+)\.(\d+)\.(\d+)\b")
|
|
298
325
|
NO_FINDINGS_RE = re.compile(r"No significant findings", re.IGNORECASE)
|
|
299
326
|
|
|
327
|
+
# The template requirements this validator enforces, keyed to the protocol
|
|
328
|
+
# release it ships with (kept in lockstep by check-docs-sync.py). Artifacts
|
|
329
|
+
# that echo an older `Protocol:` are judged by THAT version's template —
|
|
330
|
+
# version-aware validation, architect review F-5. Artifacts without the echo
|
|
331
|
+
# are assumed current (and, from v1.3.0 on, flagged for omitting it).
|
|
332
|
+
CURRENT_PROTOCOL = (1, 3, 0)
|
|
333
|
+
MODE_ECHO_SINCE = (1, 1, 0)
|
|
334
|
+
PROTO_ECHO_SINCE = (1, 3, 0)
|
|
335
|
+
|
|
300
336
|
|
|
301
337
|
def check_template_conformance(text, violations, src):
|
|
302
338
|
"""BLINDSPOTS BS-12 — the meta-tripwire. Every other check activates only
|
|
303
339
|
when output LOOKS like the Phase 2 template (pipe tables, headings, Mode
|
|
304
340
|
echo); a run that emits prose instead silently escapes all of them. This
|
|
305
341
|
converts that silent escape into a violation, making the rest of the rule
|
|
306
|
-
set load-bearing. Per loop section the template requires
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
342
|
+
set load-bearing. Per loop section the template requires (since v1.3.0) a
|
|
343
|
+
`Protocol:` echo, (since v1.1.0) a `Mode:` line, and — all versions —
|
|
344
|
+
EITHER (benchmark table AND task table) OR the R7 "No significant
|
|
345
|
+
findings" line. Requirements are gated on the section's stated protocol
|
|
346
|
+
version, so older artifacts are judged by their own template (F-5)."""
|
|
310
347
|
sections = re.split(r"(?m)^##\s+(?=Loop\b)", text)[1:]
|
|
311
348
|
if not sections:
|
|
312
349
|
violations.append(("TEMPLATE-NONCONFORMANT", src,
|
|
@@ -314,7 +351,13 @@ def check_template_conformance(text, violations, src):
|
|
|
314
351
|
return
|
|
315
352
|
for sec in sections:
|
|
316
353
|
loop_id = (sec.splitlines() or ["?"])[0].strip()
|
|
317
|
-
|
|
354
|
+
pm = PROTO_LINE_RE.search(sec)
|
|
355
|
+
proto = tuple(int(g) for g in pm.groups()) if pm else CURRENT_PROTOCOL
|
|
356
|
+
if pm is None and CURRENT_PROTOCOL >= PROTO_ECHO_SINCE:
|
|
357
|
+
violations.append(("TEMPLATE-NONCONFORMANT", src,
|
|
358
|
+
f"'{loop_id}': Scope & method has no 'Protocol:' echo (required since v1.3.0; "
|
|
359
|
+
f"artifacts from older protocol versions validate with the matching release tag)"))
|
|
360
|
+
if proto >= MODE_ECHO_SINCE and not MODE_LINE_RE.search(sec):
|
|
318
361
|
violations.append(("TEMPLATE-NONCONFORMANT", src,
|
|
319
362
|
f"'{loop_id}': Scope & method has no 'Mode:' line (required since v1.1.0)"))
|
|
320
363
|
tables = list(parse_tables(sec))
|
|
@@ -332,15 +375,28 @@ CONFIG_ENUMS = {
|
|
|
332
375
|
"BENCHMARK_MODE": {"auto", "provided", "none"},
|
|
333
376
|
"SEVERITY_FLOOR": {"Critical", "High", "Medium", "Low"},
|
|
334
377
|
}
|
|
335
|
-
|
|
378
|
+
# Architect review F-4: `<...>` alone misreads Java/TS generics (List<OrderDTO>)
|
|
379
|
+
# and shell redirection (< seed.txt > out.log) as placeholders. A placeholder is
|
|
380
|
+
# either the WHOLE value wrapped in <...>, or text carrying one of the canonical
|
|
381
|
+
# template stems below (the literal phrasings shipped in AUDIT.md's CONFIG).
|
|
382
|
+
TEMPLATE_STEMS = ("<e.g.", "<one line", "<paths/", "<how to", "<constraints", "<optional:")
|
|
336
383
|
CONFIG_KEY_RE = re.compile(r"^([A-Z][A-Z_]+):\s*(.*)$")
|
|
337
384
|
|
|
338
385
|
|
|
386
|
+
def is_placeholder(value: str) -> bool:
|
|
387
|
+
v = value.strip()
|
|
388
|
+
if len(v) > 2 and v.startswith("<") and v.endswith(">"):
|
|
389
|
+
return True
|
|
390
|
+
low = v.lower()
|
|
391
|
+
return any(stem in low for stem in TEMPLATE_STEMS)
|
|
392
|
+
|
|
393
|
+
|
|
339
394
|
def parse_audit_config(audit_md: Path):
|
|
340
395
|
"""Flat KEY: value parse of the CONFIG block in a target repo's AUDIT.md.
|
|
341
|
-
|
|
396
|
+
Comments are stripped only at >=2 spaces before '#' (the template's own
|
|
397
|
+
column style) so values like 'ticket #4211' survive intact (F-4)."""
|
|
342
398
|
cfg, in_config = {}, False
|
|
343
|
-
for line in audit_md.read_text(encoding="utf-8").splitlines():
|
|
399
|
+
for line in audit_md.read_text(encoding="utf-8", errors="replace").splitlines():
|
|
344
400
|
if re.match(r"^##\s*CONFIG\b", line):
|
|
345
401
|
in_config = True
|
|
346
402
|
continue
|
|
@@ -352,34 +408,136 @@ def parse_audit_config(audit_md: Path):
|
|
|
352
408
|
if not m:
|
|
353
409
|
continue
|
|
354
410
|
key, raw = m.groups()
|
|
355
|
-
cfg[key] = re.split(r"\s
|
|
411
|
+
cfg[key] = re.split(r"\s{2,}#", raw, 1)[0].strip()
|
|
356
412
|
return cfg
|
|
357
413
|
|
|
358
414
|
|
|
359
415
|
def check_config_preflight(target_root: Path, violations, protected):
|
|
360
416
|
"""Phase 0 CONFIG preflight (review gap G-D) + PROTECTED_AREAS auto-load
|
|
361
417
|
(gap G-F). Runs only when the target's AUDIT.md is present; placeholder
|
|
362
|
-
values never silently configure anything.
|
|
418
|
+
values never silently configure anything. Enum values are compared on the
|
|
419
|
+
first whitespace token, so an inline trailing comment can't fail the enum."""
|
|
363
420
|
audit = target_root / "AUDIT.md"
|
|
364
421
|
if not audit.exists():
|
|
365
422
|
return
|
|
366
423
|
cfg = parse_audit_config(audit)
|
|
367
424
|
for key, val in cfg.items():
|
|
368
|
-
if
|
|
425
|
+
if is_placeholder(val):
|
|
369
426
|
violations.append(("CONFIG-PLACEHOLDER", "AUDIT.md",
|
|
370
427
|
f"{key} still contains unedited template text: '{val[:60]}'"))
|
|
371
|
-
elif key in CONFIG_ENUMS and val
|
|
372
|
-
|
|
373
|
-
|
|
428
|
+
elif key in CONFIG_ENUMS and val:
|
|
429
|
+
token = val.split()[0]
|
|
430
|
+
if token not in CONFIG_ENUMS[key]:
|
|
431
|
+
violations.append(("CONFIG-BAD-ENUM", "AUDIT.md",
|
|
432
|
+
f"{key} '{token}' not in {sorted(CONFIG_ENUMS[key])}"))
|
|
374
433
|
areas = cfg.get("PROTECTED_AREAS", "")
|
|
375
|
-
if areas and not
|
|
434
|
+
if areas and not is_placeholder(areas):
|
|
376
435
|
for p in areas.split(","):
|
|
377
436
|
p = p.strip()
|
|
378
437
|
if p and p not in protected:
|
|
379
438
|
protected.append(p)
|
|
380
439
|
|
|
381
440
|
|
|
382
|
-
|
|
441
|
+
MAX_ARTIFACT_BYTES = 10 * 1024 * 1024 # 10 MB ceiling — a "report" beyond this is not a report
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def read_artifact(path: Path, violations, src):
|
|
445
|
+
"""Guarded reader (architect review F-3): artifact problems must become
|
|
446
|
+
VERDICTS, never tracebacks — a gate that crashes is neither pass nor fail
|
|
447
|
+
and invites `|| true` workarounds. Returns text, or None after recording
|
|
448
|
+
a MALFORMED-FILE violation."""
|
|
449
|
+
try:
|
|
450
|
+
if path.stat().st_size > MAX_ARTIFACT_BYTES:
|
|
451
|
+
violations.append(("MALFORMED-FILE", src,
|
|
452
|
+
f"file is {path.stat().st_size} bytes — exceeds the {MAX_ARTIFACT_BYTES // (1024*1024)} MB artifact ceiling"))
|
|
453
|
+
return None
|
|
454
|
+
return path.read_text(encoding="utf-8")
|
|
455
|
+
except UnicodeDecodeError as e:
|
|
456
|
+
violations.append(("MALFORMED-FILE", src,
|
|
457
|
+
f"not valid UTF-8 (decode error at byte {e.start}) — artifacts must be UTF-8 text"))
|
|
458
|
+
return None
|
|
459
|
+
except OSError as e:
|
|
460
|
+
violations.append(("MALFORMED-FILE", src, f"unreadable: {e.__class__.__name__}"))
|
|
461
|
+
return None
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def load_waivers(docs: Path):
|
|
465
|
+
"""docs/AUDIT-WAIVERS.yaml in the TARGET repo — audit-trailed, expiring
|
|
466
|
+
suppressions (architect review §3.1). Deliberately different from eval
|
|
467
|
+
fixtures (which may never be weakened): waivers live in the audited repo,
|
|
468
|
+
name an approver, and MUST expire. Minimal YAML subset, stdlib-only:
|
|
469
|
+
|
|
470
|
+
- code: R2-UNCITED # required: violation code to waive
|
|
471
|
+
file: BACKLOG.md # optional: artifact filename
|
|
472
|
+
match: "Palantir" # optional: substring of the detail
|
|
473
|
+
reason: "approved comparison for marketing deck"
|
|
474
|
+
approved_by: "name@company"
|
|
475
|
+
expires: 2026-09-01 # required: ISO date
|
|
476
|
+
"""
|
|
477
|
+
f = docs / "AUDIT-WAIVERS.yaml"
|
|
478
|
+
entries, cur = [], None
|
|
479
|
+
if not f.exists():
|
|
480
|
+
return entries
|
|
481
|
+
for raw in f.read_text(encoding="utf-8", errors="replace").splitlines():
|
|
482
|
+
line = raw.strip()
|
|
483
|
+
if not line or line.startswith("#"):
|
|
484
|
+
continue
|
|
485
|
+
if line.startswith("- "):
|
|
486
|
+
cur = {}
|
|
487
|
+
entries.append(cur)
|
|
488
|
+
line = line[2:].strip()
|
|
489
|
+
if cur is None or ":" not in line:
|
|
490
|
+
continue
|
|
491
|
+
k, _, v = line.partition(":")
|
|
492
|
+
cur[k.strip()] = v.split(" #")[0].strip().strip("\"'")
|
|
493
|
+
return entries
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def apply_waivers(docs: Path, violations, waived_out=None):
|
|
497
|
+
"""Partition violations into active vs waived. An expired (or undated)
|
|
498
|
+
waiver does NOT suppress — the original violation stays active and the
|
|
499
|
+
waiver itself becomes a WAIVER-EXPIRED violation. WAIVER-EXPIRED is not
|
|
500
|
+
itself waivable."""
|
|
501
|
+
import datetime
|
|
502
|
+
waivers = load_waivers(docs)
|
|
503
|
+
if not waivers:
|
|
504
|
+
return violations
|
|
505
|
+
today = datetime.date.today()
|
|
506
|
+
active, flagged = [], set()
|
|
507
|
+
for code, src, detail in violations:
|
|
508
|
+
match = None
|
|
509
|
+
for i, w in enumerate(waivers):
|
|
510
|
+
if w.get("code") != code:
|
|
511
|
+
continue
|
|
512
|
+
if w.get("file") and w["file"] != src:
|
|
513
|
+
continue
|
|
514
|
+
if w.get("match") and w["match"] not in detail:
|
|
515
|
+
continue
|
|
516
|
+
match = (i, w)
|
|
517
|
+
break
|
|
518
|
+
if match is None:
|
|
519
|
+
active.append((code, src, detail))
|
|
520
|
+
continue
|
|
521
|
+
i, w = match
|
|
522
|
+
try:
|
|
523
|
+
valid = datetime.date.fromisoformat(w.get("expires", "")) >= today
|
|
524
|
+
except ValueError:
|
|
525
|
+
valid = False
|
|
526
|
+
if valid:
|
|
527
|
+
if waived_out is not None:
|
|
528
|
+
waived_out.append({"code": code, "file": src, "detail": detail,
|
|
529
|
+
"reason": w.get("reason", ""), "approved_by": w.get("approved_by", ""),
|
|
530
|
+
"expires": w.get("expires", "")})
|
|
531
|
+
else:
|
|
532
|
+
active.append((code, src, detail))
|
|
533
|
+
if i not in flagged:
|
|
534
|
+
flagged.add(i)
|
|
535
|
+
active.append(("WAIVER-EXPIRED", "AUDIT-WAIVERS.yaml",
|
|
536
|
+
f"waiver for {code} ('{w.get('reason', 'no reason')}') expired or has no valid 'expires:' date — renew it or fix the violation"))
|
|
537
|
+
return active
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def validate_run(run_dir: Path, protected=None, waived_out=None):
|
|
383
541
|
"""Validate one run directory (containing docs/BACKLOG.md, docs/HANDOFF.md)."""
|
|
384
542
|
protected = list(protected or [])
|
|
385
543
|
violations = []
|
|
@@ -392,26 +550,32 @@ def validate_run(run_dir: Path, protected=None):
|
|
|
392
550
|
if not backlog.exists():
|
|
393
551
|
violations.append(("MISSING-FILE", "docs/BACKLOG.md", "file not found"))
|
|
394
552
|
if backlog.exists():
|
|
395
|
-
text = backlog.
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
553
|
+
text = read_artifact(backlog, violations, "BACKLOG.md")
|
|
554
|
+
if text is not None:
|
|
555
|
+
check_template_conformance(text, violations, "BACKLOG.md")
|
|
556
|
+
check_secrets(text, violations, "BACKLOG.md")
|
|
557
|
+
check_approvals(text, violations, "BACKLOG.md")
|
|
558
|
+
for header, rows, end in parse_tables(text):
|
|
559
|
+
# Architect review F-2: a metric table in the BACKLOG is ALWAYS
|
|
560
|
+
# checked — column shape selects semantics, it never disables
|
|
561
|
+
# the check (the `Final`-column escape hatch is closed).
|
|
562
|
+
if col(header, "metric") is not None:
|
|
563
|
+
handoff_shaped = col(header, "final") is not None and col(header, "status") is not None
|
|
564
|
+
check_benchmark_like_table(header, rows, end, text, violations, "BACKLOG.md", is_handoff=handoff_shaped)
|
|
565
|
+
elif col(header, "status") is not None and col(header, "id") is not None:
|
|
566
|
+
check_task_table(header, rows, violations, "BACKLOG.md", protected)
|
|
404
567
|
if handoff.exists():
|
|
405
|
-
text = handoff.
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
568
|
+
text = read_artifact(handoff, violations, "HANDOFF.md")
|
|
569
|
+
if text is not None:
|
|
570
|
+
check_secrets(text, violations, "HANDOFF.md")
|
|
571
|
+
if not STOP_RE.search(text):
|
|
572
|
+
violations.append(("P5-NO-STOP-REASON", "HANDOFF.md", "no 'Stop condition: (a|b|c)' line"))
|
|
573
|
+
for header, rows, end in parse_tables(text):
|
|
574
|
+
if col(header, "metric") is not None:
|
|
575
|
+
check_benchmark_like_table(header, rows, end, text, violations, "HANDOFF.md", is_handoff=(col(header, "final") is not None))
|
|
576
|
+
elif col(header, "status") is not None and col(header, "id") is not None:
|
|
577
|
+
check_task_table(header, rows, violations, "HANDOFF.md", protected)
|
|
578
|
+
return apply_waivers(docs, violations, waived_out)
|
|
415
579
|
|
|
416
580
|
|
|
417
581
|
LOOP_HEAD_RE = re.compile(r"^Loop\s+(\d+)\s*(?:—|-)?\s*(.*)$")
|
|
@@ -446,11 +610,13 @@ def build_report(run_dir: Path, protected, violations):
|
|
|
446
610
|
for sec in re.split(r"(?m)^##\s+(?=Loop\b)", text)[1:]:
|
|
447
611
|
first = (sec.splitlines() or [""])[0]
|
|
448
612
|
hm = LOOP_HEAD_RE.match(first.strip())
|
|
449
|
-
mode_m = re.search(r"(?mi)
|
|
613
|
+
mode_m = re.search(r"(?mi)(?:^|\|)\s*-?\s*Mode:\s*(\S+)", sec)
|
|
614
|
+
proto_m = PROTO_LINE_RE.search(sec)
|
|
450
615
|
appr_m = APPROVED_RE.search(sec)
|
|
451
616
|
loop = {
|
|
452
617
|
"loop": int(hm.group(1)) if hm else None,
|
|
453
618
|
"date": (hm.group(2).strip() or None) if hm else None,
|
|
619
|
+
"protocol": f"v{'.'.join(proto_m.groups())}" if proto_m else None,
|
|
454
620
|
"mode": mode_m.group(1) if mode_m else None,
|
|
455
621
|
"approved": ([] if appr_m.group(1).strip().lower() == "none"
|
|
456
622
|
else [norm(x) for x in appr_m.group(1).split(",") if x.strip()]) if appr_m else None,
|
|
@@ -485,10 +651,17 @@ def build_report(run_dir: Path, protected, violations):
|
|
|
485
651
|
def cell(ix):
|
|
486
652
|
return r[ix] if ix is not None and ix < len(r) else ""
|
|
487
653
|
if cell(mi):
|
|
488
|
-
|
|
654
|
+
entry = {
|
|
489
655
|
"metric": cell(mi), "baseline": cell(bi), "final": cell(fi),
|
|
490
656
|
"delta": cell(di), "target": cell(ti), "verify": cell(vi), "status": cell(si),
|
|
491
|
-
}
|
|
657
|
+
}
|
|
658
|
+
# Computed delta when both ends parse as numbers (review §2):
|
|
659
|
+
# the reported Delta cell is echoed, never trusted as math.
|
|
660
|
+
num = lambda s: (re.search(r"-?\d+(?:\.\d+)?", s) or [None]) and re.search(r"-?\d+(?:\.\d+)?", s) # noqa: E731
|
|
661
|
+
b_m, f_m = num(entry["baseline"]), num(entry["final"])
|
|
662
|
+
if b_m and f_m:
|
|
663
|
+
entry["delta_computed"] = round(float(f_m.group(0)) - float(b_m.group(0)), 6)
|
|
664
|
+
report["metrics"].append(entry)
|
|
492
665
|
tasks = [t for l in report["loops"] for t in l["tasks"]]
|
|
493
666
|
by = lambda key: {k: sum(1 for t in tasks if t[key] == k) # noqa: E731
|
|
494
667
|
for k in sorted({t[key] for t in tasks if t[key]})}
|
|
@@ -552,6 +725,8 @@ def load_fixture_meta(fdir: Path):
|
|
|
552
725
|
meta[k] = [x.strip() for x in v.strip("[]").split(",") if x.strip()]
|
|
553
726
|
elif k in ("id", "expect", "description"):
|
|
554
727
|
meta[k] = v
|
|
728
|
+
if meta["expect"] not in ("pass", "fail"): # F-6: a typo must not silently change semantics
|
|
729
|
+
raise SystemExit(f"fixture {fdir.name}: expect '{meta['expect']}' must be 'pass' or 'fail'")
|
|
555
730
|
return meta
|
|
556
731
|
|
|
557
732
|
|
|
@@ -616,6 +791,38 @@ def run_all(as_json=False):
|
|
|
616
791
|
return 0 if ok else 1
|
|
617
792
|
|
|
618
793
|
|
|
794
|
+
def aggregate_reports(paths):
|
|
795
|
+
"""Portfolio roll-up over per-run report JSONs (architect review §3.2)."""
|
|
796
|
+
import datetime
|
|
797
|
+
runs, by_code, by_sev = [], {}, {}
|
|
798
|
+
for p in paths:
|
|
799
|
+
r = json.loads(Path(p).read_text(encoding="utf-8"))
|
|
800
|
+
s = r.get("summary", {})
|
|
801
|
+
runs.append({
|
|
802
|
+
"run_dir": r.get("run_dir"), "protocol_version": r.get("protocol_version"),
|
|
803
|
+
"clean": s.get("clean"), "violations": s.get("violations", 0),
|
|
804
|
+
"waived": len(r.get("waived", [])), "tasks": s.get("tasks", 0),
|
|
805
|
+
"loops": s.get("loops", 0),
|
|
806
|
+
})
|
|
807
|
+
for code, n in s.get("violations_by_code", {}).items():
|
|
808
|
+
by_code[code] = by_code.get(code, 0) + n
|
|
809
|
+
for sev, n in s.get("tasks_by_severity", {}).items():
|
|
810
|
+
by_sev[sev] = by_sev.get(sev, 0) + n
|
|
811
|
+
return {
|
|
812
|
+
"schema": "code-audit-framework/portfolio@1",
|
|
813
|
+
"generated_at": datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds"),
|
|
814
|
+
"runs": runs,
|
|
815
|
+
"totals": {
|
|
816
|
+
"runs": len(runs),
|
|
817
|
+
"clean_runs": sum(1 for r in runs if r["clean"]),
|
|
818
|
+
"violations": sum(r["violations"] for r in runs),
|
|
819
|
+
"waived": sum(r["waived"] for r in runs),
|
|
820
|
+
"violations_by_code": dict(sorted(by_code.items())),
|
|
821
|
+
"tasks_by_severity": dict(sorted(by_sev.items())),
|
|
822
|
+
},
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
|
|
619
826
|
def main():
|
|
620
827
|
ap = argparse.ArgumentParser()
|
|
621
828
|
ap.add_argument("--run", help="validate one run directory (containing docs/)")
|
|
@@ -623,22 +830,51 @@ def main():
|
|
|
623
830
|
ap.add_argument("--json", action="store_true")
|
|
624
831
|
ap.add_argument("--report", choices=["json", "sarif"],
|
|
625
832
|
help="with --run: emit a structured findings report instead of plain violations")
|
|
833
|
+
ap.add_argument("--aggregate", nargs="+", metavar="REPORT_JSON",
|
|
834
|
+
help="portfolio roll-up over per-run --report json files")
|
|
626
835
|
ap.add_argument("--protected", default="", help="comma-separated protected paths (extends the target AUDIT.md's PROTECTED_AREAS)")
|
|
627
836
|
args = ap.parse_args()
|
|
628
837
|
if args.all:
|
|
629
838
|
sys.exit(run_all(as_json=args.json))
|
|
839
|
+
if args.aggregate:
|
|
840
|
+
missing = [p for p in args.aggregate if not Path(p).is_file()]
|
|
841
|
+
if missing:
|
|
842
|
+
print(f"usage error: report file(s) not found: {', '.join(missing)}", file=sys.stderr)
|
|
843
|
+
sys.exit(2)
|
|
844
|
+
agg = aggregate_reports(args.aggregate)
|
|
845
|
+
if args.json:
|
|
846
|
+
print(json.dumps(agg, indent=2))
|
|
847
|
+
else:
|
|
848
|
+
t = agg["totals"]
|
|
849
|
+
print(f"{'Run':40s} {'proto':8s} {'clean':5s} {'viol':>4s} {'waived':>6s} {'tasks':>5s}")
|
|
850
|
+
for r in agg["runs"]:
|
|
851
|
+
print(f"{str(r['run_dir'])[:40]:40s} {str(r['protocol_version']):8s} "
|
|
852
|
+
f"{'yes' if r['clean'] else 'NO':5s} {r['violations']:4d} {r['waived']:6d} {r['tasks']:5d}")
|
|
853
|
+
print(f"\n{t['clean_runs']}/{t['runs']} runs clean — {t['violations']} active violation(s), "
|
|
854
|
+
f"{t['waived']} waived — by code: {t['violations_by_code'] or '{}'}")
|
|
855
|
+
sys.exit(0)
|
|
630
856
|
if args.run:
|
|
857
|
+
run_path = Path(args.run)
|
|
858
|
+
if not run_path.exists():
|
|
859
|
+
print(f"usage error: --run path does not exist: {run_path}", file=sys.stderr)
|
|
860
|
+
sys.exit(2)
|
|
631
861
|
protected = [p for p in args.protected.split(",") if p]
|
|
632
|
-
|
|
862
|
+
waived = []
|
|
863
|
+
v = validate_run(run_path, protected=protected, waived_out=waived)
|
|
633
864
|
if args.report:
|
|
634
|
-
report = build_report(
|
|
865
|
+
report = build_report(run_path, protected, v)
|
|
866
|
+
report["waived"] = waived
|
|
867
|
+
report["summary"]["waived"] = len(waived)
|
|
635
868
|
print(json.dumps(to_sarif(report) if args.report == "sarif" else report, indent=2))
|
|
636
869
|
elif args.json:
|
|
637
870
|
print(json.dumps([{"code": c, "file": s, "detail": d} for c, s, d in v], indent=2))
|
|
638
871
|
else:
|
|
639
872
|
for c, s, d in v:
|
|
640
873
|
print(f"VIOLATION {c} [{s}] {d}")
|
|
641
|
-
|
|
874
|
+
for w in waived:
|
|
875
|
+
print(f"WAIVED {w['code']} [{w['file']}] until {w['expires']} — {w['reason']} (approved by {w['approved_by']})")
|
|
876
|
+
tail = "CLEAN — no violations" if not v else f"{len(v)} violation(s)"
|
|
877
|
+
print(tail + (f" ({len(waived)} waived)" if waived else ""))
|
|
642
878
|
sys.exit(0 if not v else 1)
|
|
643
879
|
ap.print_help()
|
|
644
880
|
sys.exit(2)
|
|
@@ -19,7 +19,7 @@ build-backend = "setuptools.build_meta"
|
|
|
19
19
|
|
|
20
20
|
[project]
|
|
21
21
|
name = "code-audit-validator"
|
|
22
|
-
version = "1.
|
|
22
|
+
version = "1.3.0"
|
|
23
23
|
description = "Deterministic conformance checker for AUDIT.md agent-audit outputs — CyberSkill code-audit-framework"
|
|
24
24
|
readme = { file = "evals/README.md", content-type = "text/markdown" }
|
|
25
25
|
license = { file = "LICENSE" }
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|