@intentsolutions/audit-harness 0.1.0 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ audit-harness gen-layer-applicability — project the canonical registry datum into
4
+ the human-readable layer-applicability matrix.
5
+
6
+ `schemas/audit-profile/registry.v1.json` is THE single source of truth for "which
7
+ gates apply to repo-type X, in which dimension, at what applicability". This
8
+ generator renders `schemas/audit-profile/layer-applicability.md` as a PROJECTION
9
+ of that datum so the doc can never silently drift from the registry the classifier
10
+ actually resolves against (PP-PLAN-040 Phase 0, bead c2b).
11
+
12
+ Modes:
13
+ (default) print the rendered markdown to stdout
14
+ --write write it to schemas/audit-profile/layer-applicability.md
15
+ --check regenerate in-memory and diff against the committed file;
16
+ exit 1 on drift (the CI `layer-applicability-drift` gate)
17
+
18
+ Stdlib only. Read-only except in --write mode (which only writes the one doc).
19
+ """
20
+ import argparse
21
+ import difflib
22
+ import hashlib
23
+ import json
24
+ import os
25
+ import sys
26
+
27
+ HERE = os.path.dirname(os.path.abspath(__file__))
28
+ REGISTRY = os.path.join(HERE, "..", "schemas", "audit-profile", "registry.v1.json")
29
+ DOC = os.path.join(HERE, "..", "schemas", "audit-profile", "layer-applicability.md")
30
+
31
+ GLYPH = {"required": "✅", "recommended": "⭕", "conditional": "⚠", "waived": "❌"}
32
+
33
+
34
+ def sha256_file(path):
35
+ h = hashlib.sha256()
36
+ with open(path, "rb") as f:
37
+ for chunk in iter(lambda: f.read(65536), b""):
38
+ h.update(chunk)
39
+ return "sha256:" + h.hexdigest()
40
+
41
+
42
+ def row(gate):
43
+ app = gate.get("applicability", "")
44
+ return "| `{gid}` | {dim} | {glyph} {app} | {enf} | {tool} |".format(
45
+ gid=gate["gate_id"],
46
+ dim=gate.get("dimension", ""),
47
+ glyph=GLYPH.get(app, ""),
48
+ app=app,
49
+ enf=gate.get("enforcement", "advisory"),
50
+ tool=("`" + gate["tool"] + "`") if gate.get("tool") else "—",
51
+ )
52
+
53
+
54
+ def table(gates):
55
+ out = ["| Gate | Dimension | Applicability | Enforcement | Tool |",
56
+ "|---|---|---|---|---|"]
57
+ out += [row(g) for g in sorted(gates, key=lambda g: (g.get("dimension", ""), g["gate_id"]))]
58
+ return "\n".join(out)
59
+
60
+
61
+ def render(registry, registry_hash):
62
+ lines = []
63
+ a = lines.append
64
+ a("# Layer Applicability — GENERATED from `registry.v1.json`")
65
+ a("")
66
+ a("> ⚠️ **GENERATED FILE — do not edit by hand.**")
67
+ a("> Source of truth: [`registry.v1.json`](registry.v1.json) "
68
+ "(the canonical dimension→gate datum; `classify` resolves against it).")
69
+ a("> Regenerate: `audit-harness gen-layer-applicability --write` "
70
+ "(or `python3 scripts/gen-layer-applicability.py --write`).")
71
+ a("> CI gate `layer-applicability-drift` fails the build if this file drifts from the registry.")
72
+ a(">")
73
+ a(f"> registry `{registry_hash}`")
74
+ a("")
75
+ a(registry.get("description", "").strip())
76
+ a("")
77
+ a("**Legend (applicability):** "
78
+ + " · ".join(f"{GLYPH[k]} {k}" for k in ("required", "recommended", "conditional", "waived")))
79
+ a("")
80
+ a("Every gate defaults to `enforcement: advisory`. Blocking is **earned** — "
81
+ "engineer-pinned in the target repo's `tests/TESTING.md`, FP-rate-gated "
82
+ "(see [`gate-promotion.md`](../../docs/gate-promotion.md)).")
83
+ a("")
84
+ a("## Base gates (apply to every repo)")
85
+ a("")
86
+ a(table(registry.get("base", [])))
87
+ a("")
88
+ a("## By classification")
89
+ a("")
90
+ a("A repo carries the **UNION** of every classification it matches "
91
+ "(`classify` never picks a single winner). Gates dedup by `gate_id`, "
92
+ "keeping the highest applicability.")
93
+ a("")
94
+ for kind in sorted(registry.get("classifications", {})):
95
+ a(f"### `{kind}`")
96
+ a("")
97
+ a(table(registry["classifications"][kind]))
98
+ a("")
99
+ overlays = registry.get("overlays", {})
100
+ if overlays:
101
+ a("## Overlays")
102
+ a("")
103
+ for name in sorted(overlays):
104
+ ov = overlays[name]
105
+ a(f"### `{name}`")
106
+ a("")
107
+ a(ov.get("description", "").strip())
108
+ promote = ov.get("promote_to_required", [])
109
+ if promote:
110
+ a("")
111
+ a("Promotes to **required**: " + ", ".join(f"`{d}`" for d in promote) + ".")
112
+ a("")
113
+ return "\n".join(lines).rstrip() + "\n"
114
+
115
+
116
+ def main():
117
+ ap = argparse.ArgumentParser(description="Project registry.v1.json -> layer-applicability.md")
118
+ ap.add_argument("--write", action="store_true", help="write the doc to its canonical path")
119
+ ap.add_argument("--check", action="store_true", help="fail (exit 1) if the committed doc drifts")
120
+ ap.add_argument("--registry", default=REGISTRY)
121
+ ap.add_argument("--out", default=DOC)
122
+ args = ap.parse_args()
123
+
124
+ registry_path = os.path.abspath(args.registry)
125
+ with open(registry_path, "r", encoding="utf-8") as f:
126
+ registry = json.load(f)
127
+ rendered = render(registry, sha256_file(registry_path))
128
+
129
+ if args.check:
130
+ try:
131
+ with open(args.out, "r", encoding="utf-8") as f:
132
+ current = f.read()
133
+ except FileNotFoundError:
134
+ print(f"gen-layer-applicability: {args.out} missing — run --write", file=sys.stderr)
135
+ sys.exit(1)
136
+ if current != rendered:
137
+ diff = difflib.unified_diff(
138
+ current.splitlines(True), rendered.splitlines(True),
139
+ fromfile="committed", tofile="generated",
140
+ )
141
+ sys.stderr.write("".join(diff))
142
+ sys.stderr.write("\ngen-layer-applicability: DRIFT — regenerate with --write\n")
143
+ sys.exit(1)
144
+ print("gen-layer-applicability: layer-applicability.md matches the registry datum")
145
+ sys.exit(0)
146
+
147
+ if args.write:
148
+ with open(args.out, "w", encoding="utf-8") as f:
149
+ f.write(rendered)
150
+ print(f"gen-layer-applicability: wrote {os.path.relpath(args.out, os.path.join(HERE, '..'))}")
151
+ sys.exit(0)
152
+
153
+ sys.stdout.write(rendered)
154
+
155
+
156
+ if __name__ == "__main__":
157
+ main()
@@ -15,11 +15,13 @@ set -euo pipefail
15
15
 
16
16
  PATH_ARG="features/"
17
17
  STRICT=0
18
+ JSON_OUT=0
18
19
 
19
20
  while [[ $# -gt 0 ]]; do
20
21
  case "$1" in
21
22
  --path) PATH_ARG="$2"; shift 2 ;;
22
23
  --strict) STRICT=1; shift ;;
24
+ --json) JSON_OUT=1; shift ;;
23
25
  --help|-h)
24
26
  sed -n '2,15p' "$0"; exit 0 ;;
25
27
  *) echo "gherkin-lint: unknown flag $1" >&2; exit 2 ;;
@@ -27,15 +29,40 @@ while [[ $# -gt 0 ]]; do
27
29
  done
28
30
 
29
31
  if [[ ! -d "$PATH_ARG" ]]; then
32
+ if [[ "$JSON_OUT" -eq 1 ]]; then
33
+ printf '{"gate_id":"audit-harness:%s:gherkin-lint","result":"NOT_APPLICABLE","input_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"reason":"path not found","path":"%s"}}\n' \
34
+ "${AUDIT_HARNESS_SIDE:-ci}" "$PATH_ARG"
35
+ fi
30
36
  echo "gherkin-lint: path not found: $PATH_ARG" >&2
31
37
  exit 2
32
38
  fi
33
39
 
40
+ INPUT_HASH=$(find "$PATH_ARG" -name "*.feature" -type f -exec sha256sum {} \; 2>/dev/null | sort | sha256sum | awk '{print "sha256:"$1}')
41
+
42
+ if [[ "$JSON_OUT" -eq 1 ]]; then
43
+ exec 3>&1
44
+ exec 1>&2
45
+ fi
46
+
34
47
  WARN_COUNT=0
35
48
  ERROR_COUNT=0
36
49
 
37
50
  warn() { echo "WARN $1:$2 $3"; WARN_COUNT=$((WARN_COUNT + 1)); }
38
- err() { echo "ERROR $1:$2 $3"; ERROR_COUNT=$((ERROR_COUNT + 1)); }
51
+
52
+ # process_awk_output — funnel awk-printed WARN/ERROR lines through the bash
53
+ # counters so the summary + exit code reflect awk-fallback findings (the
54
+ # subprocesses below can't otherwise touch the parent-shell counters).
55
+ # Single-pass awk counts both at once; no-match handled cleanly under
56
+ # set -euo pipefail via the `+0` numeric coercions.
57
+ process_awk_output() {
58
+ local out="$1"
59
+ [ -z "$out" ] && return 0
60
+ local w=0 e=0
61
+ read -r w e < <(awk '/^WARN /{w++} /^ERROR /{e++} END {print w+0, e+0}' <<< "$out")
62
+ WARN_COUNT=$((WARN_COUNT + w))
63
+ ERROR_COUNT=$((ERROR_COUNT + e))
64
+ printf '%s\n' "$out"
65
+ }
39
66
 
40
67
  # 1. Prefer official gherkin-lint if available
41
68
  if command -v gherkin-lint >/dev/null 2>&1; then
@@ -48,7 +75,7 @@ else
48
75
 
49
76
  while IFS= read -r -d '' feature; do
50
77
  # Imperative verbs / CSS selectors in steps (declarative warning)
51
- awk -v file="$feature" '
78
+ process_awk_output "$(awk -v file="$feature" '
52
79
  /^[[:space:]]*(Given|When|Then|And|But)/ {
53
80
  line = $0
54
81
  if (line ~ /click|type|fill[ _]in|press|select.*from[ _]dropdown/) {
@@ -58,10 +85,10 @@ else
58
85
  printf "WARN %s:%d CSS selector / xpath in step (prefer business language)\n", file, NR
59
86
  }
60
87
  }
61
- ' "$feature"
88
+ ' "$feature")"
62
89
 
63
90
  # Scenario length (> 10 steps)
64
- awk -v file="$feature" '
91
+ process_awk_output "$(awk -v file="$feature" '
65
92
  /^[[:space:]]*Scenario/ { sc = NR; steps = 0; sn = $0; next }
66
93
  /^[[:space:]]*(Given|When|Then|And|But)/ { if (sc) steps++ }
67
94
  /^[[:space:]]*Scenario|^[[:space:]]*Feature|^$/ {
@@ -75,7 +102,7 @@ else
75
102
  printf "WARN %s:%d scenario has %d steps (>10 is too long)\n", file, sc, steps
76
103
  }
77
104
  }
78
- ' "$feature"
105
+ ' "$feature")"
79
106
 
80
107
  # Repeated Givens without Background (3+ identical Given lines)
81
108
  dupe=$(awk '/^[[:space:]]*Given/ { print }' "$feature" | sort | uniq -c | awk '$1 >= 3 { print }')
@@ -84,9 +111,7 @@ else
84
111
  fi
85
112
 
86
113
  # "And" at scenario start (grammar error)
87
- awk -v file="$feature" '
88
- prev_blank = 1
89
- /^[[:space:]]*$/ { prev_blank = 1; next }
114
+ process_awk_output "$(awk -v file="$feature" '
90
115
  /^[[:space:]]*Scenario/ { in_scenario = 1; step_count = 0; next }
91
116
  /^[[:space:]]*(Given|When|Then|And|But)/ {
92
117
  if (in_scenario && step_count == 0 && /^[[:space:]]*And/) {
@@ -94,7 +119,7 @@ else
94
119
  }
95
120
  step_count++
96
121
  }
97
- ' "$feature"
122
+ ' "$feature")"
98
123
 
99
124
  done < <(find "$PATH_ARG" -name "*.feature" -print0)
100
125
  fi
@@ -102,6 +127,25 @@ fi
102
127
  echo ""
103
128
  echo "gherkin-lint summary: $WARN_COUNT warning(s), $ERROR_COUNT error(s)"
104
129
 
130
+ if [[ "$JSON_OUT" -eq 1 ]]; then
131
+ exec 1>&3 3>&-
132
+ result="PASS"
133
+ sev_block=""
134
+ if [[ "$ERROR_COUNT" -gt 0 ]]; then
135
+ result="FAIL"
136
+ elif [[ "$WARN_COUNT" -gt 0 ]]; then
137
+ if [[ "$STRICT" -eq 1 ]]; then
138
+ result="FAIL"
139
+ else
140
+ result="ADVISORY"
141
+ sev_block=',"advisory_severity":"warn"'
142
+ fi
143
+ fi
144
+ printf '{"gate_id":"audit-harness:%s:gherkin-lint","result":"%s"%s,"input_hash":"%s","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"warnings":%d,"errors":%d,"strict":%s,"path":"%s"}}\n' \
145
+ "${AUDIT_HARNESS_SIDE:-ci}" "$result" "$sev_block" "$INPUT_HASH" "$WARN_COUNT" "$ERROR_COUNT" \
146
+ "$([[ "$STRICT" -eq 1 ]] && echo true || echo false)" "$PATH_ARG"
147
+ fi
148
+
105
149
  if [[ "$ERROR_COUNT" -gt 0 ]]; then
106
150
  exit 1
107
151
  fi
@@ -6,19 +6,48 @@
6
6
  # causes escape-scan.sh to REFUSE the AI diff.
7
7
  #
8
8
  # Usage:
9
- # bash harness-hash.sh --init # write manifest (engineer-initiated)
10
- # bash harness-hash.sh --verify # compare current hashes to manifest
11
- # bash harness-hash.sh --list # show which files are pinned
9
+ # bash harness-hash.sh --init # write manifest (engineer-initiated)
10
+ # bash harness-hash.sh --verify # compare current hashes to manifest
11
+ # bash harness-hash.sh --verify --json # machine-readable JSON to stdout (verify only)
12
+ # bash harness-hash.sh --list # show which files are pinned
12
13
  #
13
14
  # Exit codes:
14
15
  # 0 — OK (pin matches, or init succeeded)
15
16
  # 2 — HARNESS_TAMPERED (hash mismatch)
16
17
  # 3 — no manifest found (--verify without --init)
18
+ #
19
+ # JSON mode:
20
+ # stdout = single JSON object suitable for piping to `audit-harness emit-evidence`
21
+ # stderr = unchanged human-readable summary (preserves backward-compat)
22
+ # exit codes unchanged
17
23
 
18
24
  set -euo pipefail
19
25
 
26
+ # Cross-platform SHA-256: `sha256sum` ships with GNU coreutils (Linux);
27
+ # macOS only has `shasum -a 256`. Both produce identical `<hash> <file>`
28
+ # output, so downstream awk parsing is unchanged.
29
+ if command -v sha256sum >/dev/null 2>&1; then
30
+ SHA256_CMD=(sha256sum)
31
+ elif command -v shasum >/dev/null 2>&1; then
32
+ SHA256_CMD=(shasum -a 256)
33
+ else
34
+ echo "harness-hash: neither sha256sum nor shasum found in PATH" >&2
35
+ exit 2
36
+ fi
37
+
20
38
  ROOT="${ROOT:-$(pwd)}"
21
39
  MANIFEST="${ROOT}/.harness-hash"
40
+ JSON_OUT=0
41
+
42
+ # Peel --json from anywhere in args (additive, doesn't disturb existing arg shape)
43
+ _filtered_args=()
44
+ for arg in "$@"; do
45
+ case "$arg" in
46
+ --json) JSON_OUT=1 ;;
47
+ *) _filtered_args+=("$arg") ;;
48
+ esac
49
+ done
50
+ set -- "${_filtered_args[@]+"${_filtered_args[@]}"}"
22
51
 
23
52
  PATTERNS=(
24
53
  # Wall 1: acceptance
@@ -42,6 +71,27 @@ PATTERNS=(
42
71
  "stryker.config.js"
43
72
  )
44
73
 
74
+ # Optional per-repo extra patterns appended from .harness-hash-extra-patterns
75
+ # at the repo root. Used by repos whose policy files don't match the default
76
+ # canonical patterns above — e.g., the audit-harness repo itself pins its own
77
+ # scripts (scripts/*.sh + scripts/*.py + bin/audit-harness.js), which are the
78
+ # policy enforcement surface but aren't covered by the consumer-facing
79
+ # defaults. Lines beginning with `#` are comments; blank lines are ignored.
80
+ # This mechanism is additive — repos without the file get exactly the
81
+ # default behavior, so consumer repos are not affected.
82
+ EXTRA_PATTERNS_FILE="${ROOT}/.harness-hash-extra-patterns"
83
+ if [[ -f "${EXTRA_PATTERNS_FILE}" ]]; then
84
+ while IFS= read -r line || [[ -n "${line}" ]]; do
85
+ # strip inline comments
86
+ line="${line%%#*}"
87
+ # trim leading + trailing whitespace
88
+ line="${line#"${line%%[![:space:]]*}"}"
89
+ line="${line%"${line##*[![:space:]]}"}"
90
+ [[ -z "${line}" ]] && continue
91
+ PATTERNS+=("${line}")
92
+ done < "${EXTRA_PATTERNS_FILE}"
93
+ fi
94
+
45
95
  collect_files() {
46
96
  local out=()
47
97
  shopt -s nullglob globstar
@@ -61,7 +111,7 @@ hash_files() {
61
111
  return 0
62
112
  fi
63
113
  while IFS= read -r f; do
64
- printf '%s %s\n' "$(sha256sum "$f" | awk '{print $1}')" "$f"
114
+ printf '%s %s\n' "$("${SHA256_CMD[@]}" "$f" | awk '{print $1}')" "$f"
65
115
  done <<< "$files"
66
116
  }
67
117
 
@@ -76,6 +126,10 @@ cmd_init() {
76
126
  cmd_verify() {
77
127
  cd "$ROOT"
78
128
  if [[ ! -f "$MANIFEST" ]]; then
129
+ if [[ "$JSON_OUT" -eq 1 ]]; then
130
+ printf '{"gate_id":"audit-harness:%s:harness-hash","result":"NOT_APPLICABLE","input_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","policy_hash":"sha256:0000000000000000000000000000000000000000000000000000000000000000","metadata":{"reason":"no manifest at %s (run --init)"}}\n' \
131
+ "${AUDIT_HARNESS_SIDE:-ci}" "$MANIFEST"
132
+ fi
79
133
  echo "harness-hash: no manifest at $MANIFEST (run --init)" >&2
80
134
  exit 3
81
135
  fi
@@ -84,13 +138,32 @@ cmd_verify() {
84
138
  local expected
85
139
  expected=$(cat "$MANIFEST")
86
140
 
141
+ local manifest_hash
142
+ manifest_hash=$("${SHA256_CMD[@]}" "$MANIFEST" | awk '{print "sha256:"$1}')
143
+
144
+ local pinned_count
145
+ pinned_count=$(echo "$expected" | grep -c '^' || true)
146
+
87
147
  # Compare sorted manifests so order doesn't matter
88
148
  local diff_out
89
149
  diff_out=$(diff <(echo "$expected" | sort) <(echo "$current" | sort) || true)
90
150
  if [[ -z "$diff_out" ]]; then
91
- echo "harness-hash: OK"
151
+ if [[ "$JSON_OUT" -eq 1 ]]; then
152
+ printf '{"gate_id":"audit-harness:%s:harness-hash","result":"PASS","input_hash":"%s","policy_hash":"%s","metadata":{"pinned_count":%d}}\n' \
153
+ "${AUDIT_HARNESS_SIDE:-ci}" "$manifest_hash" "$manifest_hash" "$pinned_count"
154
+ echo "harness-hash: OK" >&2
155
+ else
156
+ echo "harness-hash: OK"
157
+ fi
92
158
  exit 0
93
159
  fi
160
+ if [[ "$JSON_OUT" -eq 1 ]]; then
161
+ # diff output may contain quotes/newlines; encode as a single-line escaped string
162
+ local diff_escaped
163
+ diff_escaped=$(printf '%s' "$diff_out" | python3 -c 'import sys, json; print(json.dumps(sys.stdin.read()))')
164
+ printf '{"gate_id":"audit-harness:%s:harness-hash","result":"FAIL","failure_mode":"HARNESS_TAMPERED","input_hash":"%s","policy_hash":"%s","metadata":{"pinned_count":%d,"diff":%s}}\n' \
165
+ "${AUDIT_HARNESS_SIDE:-ci}" "$manifest_hash" "$manifest_hash" "$pinned_count" "$diff_escaped"
166
+ fi
94
167
  echo "HARNESS_TAMPERED: pinned artifact changed" >&2
95
168
  echo "$diff_out" >&2
96
169
  exit 2
@@ -0,0 +1,228 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ audit-harness scan — read-only security / hygiene / skill-quality gate-runner
4
+ (PP-PLAN-040 Phase 4 / E6).
5
+
6
+ For every `dimension: security | hygiene | skill-quality` gate in a repo's
7
+ audit-profile/v1, scan runs the right external tool with the repo present and wraps
8
+ its exit code into a `gate-result/v1` row (JSON array, stdout). Advisory-first; a
9
+ missing tool degrades to ADVISORY indeterminate (never a false FAIL). It NEVER
10
+ fixes anything and NEVER reimplements a scanner.
11
+
12
+ Strategies:
13
+ - local hygiene-readme: deterministic README presence check (no tool).
14
+ - shell-out every gate carrying a `tool` (gitleaks, osv-scanner, semgrep, syft,
15
+ markdownlint, lychee, ...): run it if on PATH; clean exit -> PASS;
16
+ findings -> ADVISORY(error) (or FAIL under --strict / blocking);
17
+ tool absent -> ADVISORY indeterminate.
18
+ - consume skill-quality skill-behavioral (tool j-rig): CONSUME a j-rig
19
+ Evidence Bundle verdict row (--jrig-verdict PATH or a default
20
+ location). The harness does NOT run behavioral judgment itself —
21
+ it ingests j-rig's verdict. No verdict -> ADVISORY indeterminate.
22
+
23
+ Stdlib only. No network beyond whatever the shelled-out tool does (and the only
24
+ network-touching gates fail open to indeterminate). No filesystem mutation.
25
+ """
26
+ import argparse
27
+ import hashlib
28
+ import json
29
+ import os
30
+ import shutil
31
+ import subprocess
32
+ import sys
33
+ from datetime import datetime, timezone
34
+
35
+ HERE = os.path.dirname(os.path.abspath(__file__))
36
+ if HERE not in sys.path:
37
+ sys.path.insert(0, HERE)
38
+ import classify as C # noqa: E402
39
+
40
+ EMPTY_SHA = "sha256:" + hashlib.sha256(b"").hexdigest()
41
+ SCAN_DIMENSIONS = {"security", "hygiene", "skill-quality"}
42
+
43
+ # tool -> argv (run with cwd=repo). "generation" tools (syft) are PASS on exit 0,
44
+ # INDETERMINATE on failure (they produce an artifact, they don't pass/fail policy).
45
+ TOOL_CMD = {
46
+ "gitleaks": (["gitleaks", "detect", "--no-banner"], "scan"),
47
+ "osv-scanner": (["osv-scanner", "-r", "."], "scan"),
48
+ "semgrep": (["semgrep", "scan", "--error", "--quiet"], "scan"),
49
+ "syft": (["syft", "."], "generation"),
50
+ "markdownlint": (["markdownlint", "."], "scan"),
51
+ "lychee": (["lychee", "--offline", "--no-progress", "."], "scan"),
52
+ }
53
+
54
+
55
+ def sha256_str(s):
56
+ return "sha256:" + hashlib.sha256(s.encode("utf-8")).hexdigest()
57
+
58
+
59
+ def make_row(gate_id, result, *, policy_hash, input_hash, commit_sha, runner,
60
+ metadata=None, failure_mode=None, advisory_severity=None):
61
+ row = {
62
+ "gate_id": gate_id, "result": result, "policy_hash": policy_hash,
63
+ "input_hash": input_hash,
64
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
65
+ "runner": runner, "commit_sha": commit_sha,
66
+ }
67
+ if metadata:
68
+ row["metadata"] = metadata
69
+ if failure_mode is not None:
70
+ row["failure_mode"] = failure_mode
71
+ if advisory_severity is not None:
72
+ row["advisory_severity"] = advisory_severity
73
+ return row
74
+
75
+
76
+ def gate_suffix(gate_id):
77
+ return gate_id.rsplit(":", 1)[-1]
78
+
79
+
80
+ def indeterminate(gate, commit_sha, runner, reason, policy):
81
+ return make_row(gate["gate_id"], "ADVISORY", policy_hash=sha256_str(policy),
82
+ input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
83
+ advisory_severity="warn",
84
+ metadata={"indeterminate": True, "reason": reason})
85
+
86
+
87
+ def run_readme(repo, gate, commit_sha, runner, strict):
88
+ enforcement = gate.get("enforcement", "advisory")
89
+ present = any(os.path.isfile(os.path.join(repo, n))
90
+ for n in ("README.md", "README.rst", "README.txt", "README"))
91
+ if present:
92
+ return make_row(gate["gate_id"], "PASS", policy_hash=sha256_str("hygiene:readme"),
93
+ input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
94
+ metadata={"method": "local-presence", "signal": "README present"})
95
+ result, fm, sev = ("FAIL", "hygiene:readme-missing", None) if (strict or enforcement == "blocking") \
96
+ else ("ADVISORY", None, "warn")
97
+ return make_row(gate["gate_id"], result, policy_hash=sha256_str("hygiene:readme"),
98
+ input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
99
+ failure_mode=fm, advisory_severity=sev,
100
+ metadata={"method": "local-presence", "reason": "no README found"})
101
+
102
+
103
+ def run_tool(tool, repo, gate, commit_sha, runner, strict):
104
+ enforcement = gate.get("enforcement", "advisory")
105
+ policy = f"tool:{tool}"
106
+ if tool not in TOOL_CMD:
107
+ return indeterminate(gate, commit_sha, runner,
108
+ f"no invocation wired for tool '{tool}'", policy)
109
+ if shutil.which(tool) is None:
110
+ return indeterminate(gate, commit_sha, runner,
111
+ f"{tool} not on PATH — {gate.get('dimension')} unmeasured", policy)
112
+ argv, kind = TOOL_CMD[tool]
113
+ try:
114
+ proc = subprocess.run(argv, cwd=repo, capture_output=True, text=True, timeout=300)
115
+ except Exception as e:
116
+ return indeterminate(gate, commit_sha, runner, f"{tool} failed to run: {e}", policy)
117
+ if proc.returncode == 0:
118
+ return make_row(gate["gate_id"], "PASS", policy_hash=sha256_str(policy),
119
+ input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
120
+ metadata={"method": "shell-out", "tool": tool})
121
+ if kind == "generation":
122
+ # syft etc. failing to generate is infra, not a policy violation
123
+ return indeterminate(gate, commit_sha, runner,
124
+ f"{tool} could not generate artifact (exit {proc.returncode})", policy)
125
+ detail = (proc.stdout or proc.stderr).strip()[:2000]
126
+ result, fm, sev = ("FAIL", f"scan:{tool}-findings", None) if (strict or enforcement == "blocking") \
127
+ else ("ADVISORY", None, "error")
128
+ return make_row(gate["gate_id"], result, policy_hash=sha256_str(policy),
129
+ input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
130
+ failure_mode=fm, advisory_severity=sev,
131
+ metadata={"method": "shell-out", "tool": tool, "detail": detail})
132
+
133
+
134
+ def consume_jrig(repo, gate, commit_sha, runner, strict, verdict_path):
135
+ """Ingest a j-rig Evidence Bundle verdict row — never run judgment here."""
136
+ policy = "consume:j-rig"
137
+ candidates = [verdict_path] if verdict_path else []
138
+ candidates += [os.path.join(repo, p) for p in
139
+ (".j-rig/verdict.json", ".jrig/verdict.json", "j-rig-verdict.json")]
140
+ path = next((p for p in candidates if p and os.path.isfile(p)), None)
141
+ if path is None:
142
+ return indeterminate(gate, commit_sha, runner,
143
+ "no j-rig verdict available — run j-rig eval and pass --jrig-verdict",
144
+ policy)
145
+ verdict = C.read_json(path)
146
+ if not isinstance(verdict, dict):
147
+ return indeterminate(gate, commit_sha, runner, f"unreadable j-rig verdict at {path}", policy)
148
+ # Pass through j-rig's own result if present; otherwise interpret a boolean pass.
149
+ enforcement = gate.get("enforcement", "advisory")
150
+ jres = verdict.get("result") or ("PASS" if verdict.get("passed") else "FAIL")
151
+ meta = {"method": "consume-j-rig", "source": os.path.relpath(path, repo),
152
+ "jrig": {k: verdict.get(k) for k in ("result", "passed", "layers_passed", "baseline_delta")
153
+ if k in verdict}}
154
+ if jres == "PASS":
155
+ return make_row(gate["gate_id"], "PASS", policy_hash=sha256_str(policy),
156
+ input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner, metadata=meta)
157
+ result, fm, sev = ("FAIL", "skill-quality:jrig-fail", None) if (strict or enforcement == "blocking") \
158
+ else ("ADVISORY", None, "error")
159
+ return make_row(gate["gate_id"], result, policy_hash=sha256_str(policy),
160
+ input_hash=EMPTY_SHA, commit_sha=commit_sha, runner=runner,
161
+ failure_mode=fm, advisory_severity=sev, metadata=meta)
162
+
163
+
164
+ def compute_profile(repo, registry_path, profile_arg):
165
+ if profile_arg == "-":
166
+ return json.load(sys.stdin)
167
+ if profile_arg:
168
+ with open(profile_arg, "r", encoding="utf-8") as f:
169
+ return json.load(f)
170
+ out = subprocess.run([sys.executable, os.path.join(HERE, "classify.py"), repo,
171
+ "--registry", registry_path], capture_output=True, text=True)
172
+ if out.returncode != 0:
173
+ sys.stderr.write(out.stderr)
174
+ raise SystemExit(2)
175
+ return json.loads(out.stdout)
176
+
177
+
178
+ def main():
179
+ ap = argparse.ArgumentParser(description="Security/hygiene/skill-quality gate-runner -> gate-result/v1")
180
+ ap.add_argument("repo", nargs="?", default=".")
181
+ ap.add_argument("--strict", action="store_true", help="treat a finding/gap as FAIL (exit 1)")
182
+ ap.add_argument("--registry", default=C.DEFAULT_REGISTRY)
183
+ ap.add_argument("--profile", default=None, help="pinned audit-profile/v1 (PATH or '-')")
184
+ ap.add_argument("--jrig-verdict", default=None, help="path to a j-rig Evidence Bundle verdict to consume")
185
+ args = ap.parse_args()
186
+
187
+ repo = os.path.abspath(args.repo)
188
+ runner = f"audit-harness@{C.harness_version()}"
189
+
190
+ override_path = os.path.join(repo, ".audit-harness.yml")
191
+ override = C.parse_override(override_path) if os.path.isfile(override_path) else {"disable": False}
192
+ if override.get("disable") or os.environ.get("AUDIT_HARNESS_DISABLE") == "1":
193
+ sys.stderr.write("audit-harness: KILL-SWITCH active — scan skipped (no rows emitted)\n")
194
+ print("[]")
195
+ sys.exit(0)
196
+
197
+ profile = compute_profile(repo, os.path.abspath(args.registry), args.profile)
198
+ commit_sha = profile.get("subject", {}).get("commit_sha") or C.git_short_sha(repo)
199
+
200
+ gates = [g for g in profile.get("gates", [])
201
+ if g.get("dimension") in SCAN_DIMENSIONS and g.get("enforcement") != "disabled"]
202
+
203
+ rows = []
204
+ for gate in gates:
205
+ suffix = gate_suffix(gate["gate_id"])
206
+ tool = gate.get("tool")
207
+ if suffix == "hygiene-readme":
208
+ rows.append(run_readme(repo, gate, commit_sha, runner, args.strict))
209
+ elif tool == "j-rig":
210
+ rows.append(consume_jrig(repo, gate, commit_sha, runner, args.strict, args.jrig_verdict))
211
+ elif tool:
212
+ rows.append(run_tool(tool, repo, gate, commit_sha, runner, args.strict))
213
+ else:
214
+ rows.append(indeterminate(gate, commit_sha, runner,
215
+ f"gate '{suffix}' has no tool wired in this harness version",
216
+ f"scan:{suffix}"))
217
+
218
+ print(json.dumps(rows, indent=2))
219
+ n_fail = sum(1 for r in rows if r["result"] == "FAIL")
220
+ n_adv = sum(1 for r in rows if r["result"] == "ADVISORY")
221
+ n_pass = sum(1 for r in rows if r["result"] == "PASS")
222
+ sys.stderr.write(f"audit-harness scan: {n_pass} PASS, {n_adv} ADVISORY, {n_fail} FAIL "
223
+ f"across {len(rows)} gate(s)\n")
224
+ sys.exit(1 if n_fail else 0)
225
+
226
+
227
+ if __name__ == "__main__":
228
+ main()