@intentsolutions/audit-harness 1.1.7 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +65 -0
- package/bin/audit-harness.js +17 -6
- package/docs/cred-gate.md +131 -0
- package/package.json +8 -1
- package/schemas/currency/pins.v1.json +164 -22
- package/scripts/arch-check.sh +20 -2
- package/scripts/bias-count.sh +18 -1
- package/scripts/caa-check.sh +143 -0
- package/scripts/check-wrapper-sync.sh +120 -0
- package/scripts/crap-score.py +57 -6
- package/scripts/cred-gate.sh +238 -0
- package/scripts/currency.py +70 -25
- package/scripts/dnssec-check.sh +158 -0
- package/scripts/emit-evidence.sh +186 -14
- package/scripts/escape-scan.sh +28 -3
- package/scripts/gherkin-lint.sh +5 -0
- package/scripts/harness-hash.sh +5 -0
- package/scripts/kernel-shadow-check.sh +132 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# caa-check.sh — verify a namespace publishes CAA records (and, when configured,
|
|
3
|
+
# pins the EXPECTED certificate authority) before a production signed attestation
|
|
4
|
+
# is anchored against it.
|
|
5
|
+
#
|
|
6
|
+
# WHY THIS EXISTS (CISO binding, DR-010 Q5 / ISEDC v1 Q1 2026-05-10):
|
|
7
|
+
# CAA (RFC 8659) records constrain which CAs may issue certificates for a
|
|
8
|
+
# namespace. Pinning the CA on evals.intentsolutions.io closes the mis-issuance
|
|
9
|
+
# path an attacker could otherwise use to obtain a look-alike cert and present
|
|
10
|
+
# forged attestation infrastructure. This must be verified BEFORE the first
|
|
11
|
+
# production attestation. This script is that gate — read-only, fail-closed.
|
|
12
|
+
#
|
|
13
|
+
# WHY IT QUERIES AN EXPLICIT RESOLVER (the bug this version fixes):
|
|
14
|
+
# Querying the LOCAL STUB RESOLVER (plain `dig`, no `@server`) FALSE-NEGATIVES
|
|
15
|
+
# on hosts whose stub resolver lags CAA propagation or strips the record type
|
|
16
|
+
# (systemd-resolved, many CI runners, dev boxes). On such a host a correctly
|
|
17
|
+
# CAA-pinned zone looks like it has no CAA, and the gate refuses a legitimate
|
|
18
|
+
# production sign. The fix is to query a TRUSTED PUBLIC resolver. The gate
|
|
19
|
+
# stays fail-closed: PASS only on a positive matching CAA record from a trusted
|
|
20
|
+
# resolver; absence / mismatch / unreachable => non-zero.
|
|
21
|
+
#
|
|
22
|
+
# Usage:
|
|
23
|
+
# bash scripts/caa-check.sh [DOMAIN]
|
|
24
|
+
# EXPECTED_CAA_ISSUER=letsencrypt.org bash scripts/caa-check.sh evals.intentsolutions.io
|
|
25
|
+
#
|
|
26
|
+
# Resolution order for the domain:
|
|
27
|
+
# 1. $1 (positional)
|
|
28
|
+
# 2. $CAA_CHECK_DOMAIN
|
|
29
|
+
# 3. default: evals.intentsolutions.io
|
|
30
|
+
#
|
|
31
|
+
# Issuer policy:
|
|
32
|
+
# - EXPECTED_CAA_ISSUER (env) — when set, at least one CAA `issue` (or
|
|
33
|
+
# `issuewild`) record MUST name this CA, else the check FAILS (exit 1).
|
|
34
|
+
# Default: letsencrypt.org (the CA the IS public-namespace certs are issued
|
|
35
|
+
# by). Override per-deployment.
|
|
36
|
+
# - EXPECTED_CAA_ISSUER=ANY (case-insensitive) — relax to "any CAA record is
|
|
37
|
+
# acceptable"; presence of ANY CAA record passes, absence fails, and a
|
|
38
|
+
# warning is emitted that no specific CA is being pinned.
|
|
39
|
+
#
|
|
40
|
+
# Exit codes:
|
|
41
|
+
# 0 — CAA verified (present at a trusted resolver, and matches
|
|
42
|
+
# EXPECTED_CAA_ISSUER when a specific issuer is required)
|
|
43
|
+
# 1 — CAA NOT verified (no CAA records, or expected issuer not present, from
|
|
44
|
+
# any trusted resolver)
|
|
45
|
+
# 2 — UNKNOWN/UNREACHABLE (no resolver tool installed)
|
|
46
|
+
#
|
|
47
|
+
# Override knobs:
|
|
48
|
+
# CAA_CHECK_RESOLVERS — space-separated list of trusted public resolvers to
|
|
49
|
+
# query in order (default: "1.1.1.1 8.8.8.8").
|
|
50
|
+
# CAA_CHECK_DIG_CMD — command used in place of `dig` (default: dig)
|
|
51
|
+
|
|
52
|
+
set -euo pipefail
|
|
53
|
+
|
|
54
|
+
DOMAIN="${1:-${CAA_CHECK_DOMAIN:-evals.intentsolutions.io}}"
|
|
55
|
+
EXPECTED_CAA_ISSUER="${EXPECTED_CAA_ISSUER:-letsencrypt.org}"
|
|
56
|
+
DIG_CMD="${CAA_CHECK_DIG_CMD:-dig}"
|
|
57
|
+
# Trusted public resolvers, queried in order, until one returns a CAA record.
|
|
58
|
+
RESOLVERS="${CAA_CHECK_RESOLVERS:-1.1.1.1 8.8.8.8}"
|
|
59
|
+
|
|
60
|
+
log() { printf 'caa-check: %s\n' "$1" >&2; }
|
|
61
|
+
|
|
62
|
+
if [[ "$DOMAIN" == "-h" || "$DOMAIN" == "--help" ]]; then
|
|
63
|
+
sed -n '2,60p' "$0"
|
|
64
|
+
exit 0
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
have() { command -v "$1" >/dev/null 2>&1; }
|
|
68
|
+
|
|
69
|
+
if ! have "$DIG_CMD"; then
|
|
70
|
+
log "UNKNOWN/UNREACHABLE — '$DIG_CMD' is not installed; cannot look up CAA for '$DOMAIN'"
|
|
71
|
+
log " failing closed (production must not sign on UNKNOWN)"
|
|
72
|
+
log " remediation: install bind9-dnsutils (provides dig) on the signing host"
|
|
73
|
+
exit 2
|
|
74
|
+
fi
|
|
75
|
+
|
|
76
|
+
# issuer_matches CAA_TEXT -> 0 if a matching issue/issuewild record is present.
|
|
77
|
+
# Match any `issue` or `issuewild` property whose value contains the expected
|
|
78
|
+
# CA. CAA values are quoted; we match case-insensitively on the issuer substring.
|
|
79
|
+
issuer_matches() {
|
|
80
|
+
printf '%s\n' "$1" \
|
|
81
|
+
| grep -iE '[[:space:]]issue(wild)?[[:space:]]' \
|
|
82
|
+
| grep -iqF "$EXPECTED_CAA_ISSUER"
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# is_blank CAA_TEXT -> 0 if the text is empty after stripping whitespace.
|
|
86
|
+
is_blank() {
|
|
87
|
+
[[ -z "${1//[$' \t\r\n']/}" ]]
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
last_caa_out="" # records from the last resolver that returned ANY CAA records
|
|
91
|
+
saw_records=0 # at least one trusted resolver returned CAA records
|
|
92
|
+
|
|
93
|
+
shopt -s nocasematch
|
|
94
|
+
relax_any=0
|
|
95
|
+
[[ "$EXPECTED_CAA_ISSUER" == "ANY" ]] && relax_any=1
|
|
96
|
+
shopt -u nocasematch
|
|
97
|
+
|
|
98
|
+
for resolver in $RESOLVERS; do
|
|
99
|
+
log "looking up CAA records for '$DOMAIN' via $DIG_CMD @$resolver"
|
|
100
|
+
# `dig @resolver +short CAA` prints one line per record, e.g.:
|
|
101
|
+
# 0 issue "letsencrypt.org"
|
|
102
|
+
# 0 issuewild ";"
|
|
103
|
+
caa_out="$("$DIG_CMD" "@$resolver" +short CAA "$DOMAIN" 2>/dev/null || true)"
|
|
104
|
+
|
|
105
|
+
if is_blank "$caa_out"; then
|
|
106
|
+
log " no CAA records returned by @$resolver"
|
|
107
|
+
continue
|
|
108
|
+
fi
|
|
109
|
+
|
|
110
|
+
saw_records=1
|
|
111
|
+
last_caa_out="$caa_out"
|
|
112
|
+
|
|
113
|
+
# --- ANY-issuer relaxation: any CAA record present passes ---
|
|
114
|
+
if [[ "$relax_any" -eq 1 ]]; then
|
|
115
|
+
log "VERIFIED (presence only) — CAA records exist for '$DOMAIN' (via @$resolver)"
|
|
116
|
+
log " WARNING: EXPECTED_CAA_ISSUER=ANY — no specific CA is being pinned."
|
|
117
|
+
log " Records found:"
|
|
118
|
+
printf '%s\n' "$caa_out" | sed 's/^/ /' >&2
|
|
119
|
+
exit 0
|
|
120
|
+
fi
|
|
121
|
+
|
|
122
|
+
# --- Specific-issuer pinning ---
|
|
123
|
+
if issuer_matches "$caa_out"; then
|
|
124
|
+
log "VERIFIED — '$DOMAIN' pins issuance to '$EXPECTED_CAA_ISSUER' (via @$resolver)"
|
|
125
|
+
exit 0
|
|
126
|
+
fi
|
|
127
|
+
|
|
128
|
+
log " CAA records exist at @$resolver but none pin '$EXPECTED_CAA_ISSUER'; trying next resolver"
|
|
129
|
+
done
|
|
130
|
+
|
|
131
|
+
# No trusted resolver yielded a matching CAA record -> fail-closed (exit 1).
|
|
132
|
+
if [[ "$saw_records" -eq 1 ]]; then
|
|
133
|
+
log "NOT VERIFIED — CAA records exist for '$DOMAIN' but none pin '$EXPECTED_CAA_ISSUER'"
|
|
134
|
+
log " Records found:"
|
|
135
|
+
printf '%s\n' "$last_caa_out" | sed 's/^/ /' >&2
|
|
136
|
+
log " remediation: add a CAA record pinning the expected CA, or set"
|
|
137
|
+
log " EXPECTED_CAA_ISSUER to the CA actually published (or ANY to accept any CAA)."
|
|
138
|
+
else
|
|
139
|
+
log "NOT VERIFIED — no CAA records found for '$DOMAIN' (resolvers tried: $RESOLVERS)"
|
|
140
|
+
log " remediation: publish a CAA record pinning the issuing CA, e.g.:"
|
|
141
|
+
log " $DOMAIN. CAA 0 issue \"$EXPECTED_CAA_ISSUER\""
|
|
142
|
+
fi
|
|
143
|
+
exit 1
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# check-wrapper-sync.sh — assert the bundled wrapper-script mirrors are byte-identical
|
|
3
|
+
# to their canonical source under scripts/.
|
|
4
|
+
#
|
|
5
|
+
# WHY THIS EXISTS
|
|
6
|
+
# ---------------
|
|
7
|
+
# The Node package (bin/audit-harness.js) dispatches to the CANONICAL scripts under
|
|
8
|
+
# scripts/. The Python wrapper (intent-audit-harness on PyPI) and the Rust wrapper
|
|
9
|
+
# (intent-audit-harness on crates.io) cannot reach those canonical files at install
|
|
10
|
+
# time, so each BUNDLES a copy:
|
|
11
|
+
#
|
|
12
|
+
# * python/src/intent_audit_harness/scripts/<name> (packaged into the wheel)
|
|
13
|
+
# * rust/scripts/<name> (include_bytes!'d into the binary)
|
|
14
|
+
#
|
|
15
|
+
# Those copies are hand-maintained. On 2026-05-24 they were found ~1 month stale:
|
|
16
|
+
# the bundled crap-score.py was missing v1.1.1's --json evidence envelope, the
|
|
17
|
+
# `which_or_none("go")` PATH guard (silent crash on Go-less hosts), and the
|
|
18
|
+
# rglob->os.walk directory pruning. A user running
|
|
19
|
+
# `pip install intent-audit-harness && audit-harness crap` got the OLD gate.
|
|
20
|
+
# (Tracking bead: iah-python-wrapper-scripts-sync / bd_000-projects-65k4.)
|
|
21
|
+
#
|
|
22
|
+
# This gate makes that class of drift IMPOSSIBLE to merge silently: every bundled
|
|
23
|
+
# mirror MUST be a byte-for-byte copy of its canonical source. There is no
|
|
24
|
+
# wrapper-only delta — both wrappers invoke the script verbatim via bash/python3.
|
|
25
|
+
#
|
|
26
|
+
# RESYNC (when this gate REDs)
|
|
27
|
+
# ----------------------------
|
|
28
|
+
# bash scripts/check-wrapper-sync.sh --fix # copy canonical -> both mirrors
|
|
29
|
+
# then review + commit the result.
|
|
30
|
+
#
|
|
31
|
+
# Exit codes:
|
|
32
|
+
# 0 all mirrors in sync (or --fix completed)
|
|
33
|
+
# 1 drift detected (and not in --fix mode)
|
|
34
|
+
set -euo pipefail
|
|
35
|
+
|
|
36
|
+
# Resolve repo root from this script's own location so the gate works regardless
|
|
37
|
+
# of the caller's CWD (CI runs it from the repo root; a dev may run it elsewhere).
|
|
38
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
39
|
+
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
|
40
|
+
CANONICAL_DIR="${REPO_ROOT}/scripts"
|
|
41
|
+
|
|
42
|
+
# The set of scripts the Python + Rust wrappers DISPATCH. Keep this in lock-step
|
|
43
|
+
# with:
|
|
44
|
+
# * python/src/intent_audit_harness/cli.py (COMMANDS dict)
|
|
45
|
+
# * rust/src/main.rs (SCRIPTS array)
|
|
46
|
+
# If a wrapper starts dispatching a new canonical script, add it here AND to both
|
|
47
|
+
# wrapper sources, and copy it into both mirror dirs.
|
|
48
|
+
MIRRORED_SCRIPTS=(
|
|
49
|
+
"harness-hash.sh"
|
|
50
|
+
"escape-scan.sh"
|
|
51
|
+
"arch-check.sh"
|
|
52
|
+
"bias-count.sh"
|
|
53
|
+
"gherkin-lint.sh"
|
|
54
|
+
"crap-score.py"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Each mirror directory that bundles a copy of the canonical scripts.
|
|
58
|
+
MIRROR_DIRS=(
|
|
59
|
+
"python/src/intent_audit_harness/scripts"
|
|
60
|
+
"rust/scripts"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
FIX=0
|
|
64
|
+
if [[ "${1:-}" == "--fix" ]]; then
|
|
65
|
+
FIX=1
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
drift_found=0
|
|
69
|
+
missing_canonical=0
|
|
70
|
+
|
|
71
|
+
for name in "${MIRRORED_SCRIPTS[@]}"; do
|
|
72
|
+
canonical="${CANONICAL_DIR}/${name}"
|
|
73
|
+
if [[ ! -f "${canonical}" ]]; then
|
|
74
|
+
echo "ERROR: canonical source missing: scripts/${name}" >&2
|
|
75
|
+
missing_canonical=1
|
|
76
|
+
continue
|
|
77
|
+
fi
|
|
78
|
+
for mdir in "${MIRROR_DIRS[@]}"; do
|
|
79
|
+
mirror="${REPO_ROOT}/${mdir}/${name}"
|
|
80
|
+
if [[ ! -f "${mirror}" ]]; then
|
|
81
|
+
echo "DRIFT: missing mirror ${mdir}/${name} (expected a copy of scripts/${name})" >&2
|
|
82
|
+
drift_found=1
|
|
83
|
+
if [[ "${FIX}" -eq 1 ]]; then
|
|
84
|
+
cp -f "${canonical}" "${mirror}"
|
|
85
|
+
echo " fixed: created ${mdir}/${name}"
|
|
86
|
+
fi
|
|
87
|
+
continue
|
|
88
|
+
fi
|
|
89
|
+
if ! diff -q "${canonical}" "${mirror}" >/dev/null 2>&1; then
|
|
90
|
+
echo "DRIFT: ${mdir}/${name} differs from canonical scripts/${name}" >&2
|
|
91
|
+
drift_found=1
|
|
92
|
+
if [[ "${FIX}" -eq 1 ]]; then
|
|
93
|
+
cp -f "${canonical}" "${mirror}"
|
|
94
|
+
echo " fixed: resynced ${mdir}/${name}"
|
|
95
|
+
fi
|
|
96
|
+
fi
|
|
97
|
+
done
|
|
98
|
+
done
|
|
99
|
+
|
|
100
|
+
if [[ "${missing_canonical}" -eq 1 ]]; then
|
|
101
|
+
echo "FAIL: one or more canonical scripts are missing — cannot verify mirror sync." >&2
|
|
102
|
+
exit 1
|
|
103
|
+
fi
|
|
104
|
+
|
|
105
|
+
if [[ "${FIX}" -eq 1 ]]; then
|
|
106
|
+
echo "check-wrapper-sync: --fix complete. Review + commit the resynced mirrors."
|
|
107
|
+
exit 0
|
|
108
|
+
fi
|
|
109
|
+
|
|
110
|
+
if [[ "${drift_found}" -eq 1 ]]; then
|
|
111
|
+
echo "" >&2
|
|
112
|
+
echo "FAIL: bundled wrapper mirrors are out of sync with canonical scripts/." >&2
|
|
113
|
+
echo " The Python (PyPI) and Rust (crates.io) packages would ship STALE gates." >&2
|
|
114
|
+
echo " Resync with: bash scripts/check-wrapper-sync.sh --fix" >&2
|
|
115
|
+
echo " then review + commit the result." >&2
|
|
116
|
+
exit 1
|
|
117
|
+
fi
|
|
118
|
+
|
|
119
|
+
echo "check-wrapper-sync: OK — all ${#MIRRORED_SCRIPTS[@]} bundled mirrors match canonical in ${#MIRROR_DIRS[@]} wrapper dirs."
|
|
120
|
+
exit 0
|
package/scripts/crap-score.py
CHANGED
|
@@ -50,6 +50,22 @@ EXCLUDED_DIRS = {
|
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
def is_excluded_dir(name: str) -> bool:
|
|
54
|
+
"""Single exclusion predicate shared by the candidate-discovery walk and
|
|
55
|
+
the --json input-hash walk.
|
|
56
|
+
|
|
57
|
+
Both walks MUST agree on which directories they descend into; otherwise the
|
|
58
|
+
set of files that feed the CRAP score can diverge from the set that feeds
|
|
59
|
+
the input_hash, and the score/hash desync (a hash that claims to cover
|
|
60
|
+
files the score never saw, or vice versa). The rule is: skip any dot-dir
|
|
61
|
+
(e.g. `.idea`, `.svn`, `.git`) OR any explicitly-named build/vendor dir in
|
|
62
|
+
EXCLUDED_DIRS. Previously discovery dropped all dot-dirs while the hash walk
|
|
63
|
+
dropped only the named subset, so a dot-dir not in EXCLUDED_DIRS was hashed
|
|
64
|
+
but never scored.
|
|
65
|
+
"""
|
|
66
|
+
return name.startswith(".") or name in EXCLUDED_DIRS
|
|
67
|
+
|
|
68
|
+
|
|
53
69
|
def crap(complexity: int, coverage_pct: float) -> float:
|
|
54
70
|
cov = max(0.0, min(100.0, coverage_pct)) / 100.0
|
|
55
71
|
return (complexity ** 2) * ((1.0 - cov) ** 3) + complexity
|
|
@@ -98,8 +114,7 @@ def score_python(root: Path, kind: str) -> list[MethodScore]:
|
|
|
98
114
|
scanned = [
|
|
99
115
|
p.name for p in root.iterdir()
|
|
100
116
|
if p.is_dir()
|
|
101
|
-
and not p.name
|
|
102
|
-
and p.name not in EXCLUDED_DIRS
|
|
117
|
+
and not is_excluded_dir(p.name)
|
|
103
118
|
and p.name not in test_dirs
|
|
104
119
|
and any(p.rglob("*.py"))
|
|
105
120
|
]
|
|
@@ -165,7 +180,15 @@ def score_go(root: Path, kind: str) -> list[MethodScore]:
|
|
|
165
180
|
print("[crap-score] gocyclo not installed", file=sys.stderr)
|
|
166
181
|
return []
|
|
167
182
|
|
|
168
|
-
|
|
183
|
+
# For kind="src", ignore *_test.go at the gocyclo level. For kind="test",
|
|
184
|
+
# do NOT pass -ignore: a pattern like `.*\.go$` matches every analyzable
|
|
185
|
+
# file (gocyclo only reads .go files), which silenced all test-kind output.
|
|
186
|
+
# The include-filter below keeps only *_test.go rows for kind="test".
|
|
187
|
+
gocyclo_cmd = ["gocyclo"]
|
|
188
|
+
if kind == "src":
|
|
189
|
+
gocyclo_cmd += ["-ignore", "_test.go"]
|
|
190
|
+
gocyclo_cmd.append(".")
|
|
191
|
+
rc, out, _ = run(gocyclo_cmd, root)
|
|
169
192
|
complexity: list[tuple[str, str, int]] = []
|
|
170
193
|
for line in out.splitlines():
|
|
171
194
|
parts = line.strip().split()
|
|
@@ -187,11 +210,28 @@ def score_go(root: Path, kind: str) -> list[MethodScore]:
|
|
|
187
210
|
if not cov_out.is_file() and which_or_none("go"):
|
|
188
211
|
run(["go", "test", "-coverprofile=coverage.out", "-covermode=atomic", "./..."], root)
|
|
189
212
|
if cov_out.is_file() and which_or_none("go"):
|
|
213
|
+
# `go tool cover -func` reports module-qualified paths
|
|
214
|
+
# (github.com/user/repo/pkg/file.go) while gocyclo reports repo-relative
|
|
215
|
+
# paths (pkg/file.go). Strip the module prefix read from go.mod so the
|
|
216
|
+
# coverage keys join the complexity keys.
|
|
217
|
+
module_prefix = ""
|
|
218
|
+
go_mod = root / "go.mod"
|
|
219
|
+
if go_mod.is_file():
|
|
220
|
+
try:
|
|
221
|
+
for mod_line in go_mod.read_text().splitlines():
|
|
222
|
+
mod_line = mod_line.strip()
|
|
223
|
+
if mod_line.startswith("module ") or mod_line.startswith("module\t"):
|
|
224
|
+
module_prefix = mod_line.split(None, 1)[1].strip() + "/"
|
|
225
|
+
break
|
|
226
|
+
except OSError:
|
|
227
|
+
pass
|
|
190
228
|
rc, out, _ = run(["go", "tool", "cover", "-func=coverage.out"], root)
|
|
191
229
|
for line in out.splitlines():
|
|
192
230
|
parts = line.split()
|
|
193
231
|
if len(parts) >= 3 and parts[-1].endswith("%"):
|
|
194
232
|
fpath = parts[0].split(":", 1)[0]
|
|
233
|
+
if module_prefix and fpath.startswith(module_prefix):
|
|
234
|
+
fpath = fpath[len(module_prefix):]
|
|
195
235
|
try:
|
|
196
236
|
pct = float(parts[-1].rstrip("%"))
|
|
197
237
|
except ValueError:
|
|
@@ -228,6 +268,17 @@ def score_js(root: Path, kind: str) -> list[MethodScore]:
|
|
|
228
268
|
except json.JSONDecodeError:
|
|
229
269
|
return []
|
|
230
270
|
|
|
271
|
+
# c8/istanbul's json-summary reporter keys files by ABSOLUTE path while
|
|
272
|
+
# complexity-report (run with a repo-relative target) reports repo-relative
|
|
273
|
+
# paths. Normalize both sides to repo-relative so the coverage join works.
|
|
274
|
+
def _rel_to_root(p: str) -> str:
|
|
275
|
+
if os.path.isabs(p):
|
|
276
|
+
try:
|
|
277
|
+
return os.path.relpath(p, str(root))
|
|
278
|
+
except ValueError:
|
|
279
|
+
return p # e.g. different drive on Windows — keep as-is
|
|
280
|
+
return p
|
|
281
|
+
|
|
231
282
|
cov_path = root / "coverage" / "coverage-summary.json"
|
|
232
283
|
coverage: dict[str, float] = {}
|
|
233
284
|
if cov_path.is_file():
|
|
@@ -237,14 +288,14 @@ def score_js(root: Path, kind: str) -> list[MethodScore]:
|
|
|
237
288
|
if fpath == "total":
|
|
238
289
|
continue
|
|
239
290
|
lines_pct = summary.get("lines", {}).get("pct", 0.0)
|
|
240
|
-
coverage[fpath] = float(lines_pct)
|
|
291
|
+
coverage[_rel_to_root(fpath)] = float(lines_pct)
|
|
241
292
|
except (OSError, json.JSONDecodeError):
|
|
242
293
|
pass
|
|
243
294
|
|
|
244
295
|
scores: list[MethodScore] = []
|
|
245
296
|
for report in data.get("reports", []):
|
|
246
297
|
fpath = report.get("path", "")
|
|
247
|
-
cov = coverage.get(fpath, 0.0)
|
|
298
|
+
cov = coverage.get(_rel_to_root(fpath), 0.0)
|
|
248
299
|
for func in report.get("functions", []):
|
|
249
300
|
c = int(func.get("cyclomatic", 1))
|
|
250
301
|
scores.append(
|
|
@@ -403,7 +454,7 @@ def main() -> int:
|
|
|
403
454
|
exts = (".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".kt", ".cs", ".php", ".rb")
|
|
404
455
|
collected: list[Path] = []
|
|
405
456
|
for dirpath, dirs, files in os.walk(root):
|
|
406
|
-
dirs[:] = [d for d in dirs if
|
|
457
|
+
dirs[:] = [d for d in dirs if not is_excluded_dir(d)]
|
|
407
458
|
for fn in files:
|
|
408
459
|
if fn.endswith(exts):
|
|
409
460
|
collected.append(Path(dirpath) / fn)
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# cred-gate.sh — Provider credential PASS/FAIL gate (iah-E08).
|
|
3
|
+
#
|
|
4
|
+
# CISO non-negotiable per DR-010 S1Q5: before any provider abstraction is allowed
|
|
5
|
+
# to flow data into an Evidence Bundle / OTel signal / gate-result envelope, two
|
|
6
|
+
# things MUST hold and are gated here, deterministically and offline:
|
|
7
|
+
#
|
|
8
|
+
# 1. CREDENTIAL REDACTION — no provider secret VALUE appears verbatim in the
|
|
9
|
+
# candidate artifact (the JSON the runner is about to sign, the OTel line it
|
|
10
|
+
# is about to emit, any log it captures). A leaked API key in a signed,
|
|
11
|
+
# Rekor-anchored Statement is irreversible.
|
|
12
|
+
#
|
|
13
|
+
# 2. ENV-VAR SPILLOVER — the candidate artifact does not blindly serialize the
|
|
14
|
+
# process environment (e.g. an `env` dump, a `process.env` spread, or a
|
|
15
|
+
# "context": {<all env>} block). A provider key need not be named to leak:
|
|
16
|
+
# a wholesale env dump spills every secret at once.
|
|
17
|
+
#
|
|
18
|
+
# This gate is READ-ONLY and OFFLINE. It never contacts a provider, never reads
|
|
19
|
+
# a real key from disk, and never writes. It inspects the candidate artifact you
|
|
20
|
+
# hand it (stdin or --input) against the secret values present in the environment
|
|
21
|
+
# (referenced by NAME via --secret-env, so the values never appear on the command
|
|
22
|
+
# line) plus a built-in catalog of provider-key SHAPES.
|
|
23
|
+
#
|
|
24
|
+
# It emits a gate-result/v1 envelope on stdout (--json) suitable for piping to
|
|
25
|
+
# emit-evidence, OR a human-readable PASS/FAIL summary (default).
|
|
26
|
+
#
|
|
27
|
+
# Usage:
|
|
28
|
+
# bash cred-gate.sh --input candidate.json
|
|
29
|
+
# <producer> | bash cred-gate.sh # candidate on stdin
|
|
30
|
+
# bash cred-gate.sh --secret-env ANTHROPIC_API_KEY --secret-env OPENAI_API_KEY < cand.json
|
|
31
|
+
# bash cred-gate.sh --json < candidate.json | bash emit-evidence.sh
|
|
32
|
+
#
|
|
33
|
+
# Flags:
|
|
34
|
+
# --input PATH Read the candidate artifact from PATH instead of stdin.
|
|
35
|
+
# --secret-env NAME Treat $NAME's VALUE as a secret that must NOT appear in the
|
|
36
|
+
# candidate. Repeatable. The value is read from the
|
|
37
|
+
# environment by name — it is never passed on argv.
|
|
38
|
+
# --json Emit a gate-result/v1 envelope (JSON) instead of text.
|
|
39
|
+
# --gate-id ID Override the gate_id in the envelope (default: provider-cred-gate).
|
|
40
|
+
# --help, -h Print help.
|
|
41
|
+
#
|
|
42
|
+
# Exit codes:
|
|
43
|
+
# 0 — PASS (no secret value present; no env-var spillover detected)
|
|
44
|
+
# 1 — FAIL (a secret value leaked OR an env-var spillover pattern matched)
|
|
45
|
+
# 2 — usage / input error (no candidate, unreadable --input)
|
|
46
|
+
#
|
|
47
|
+
# Failure-mode docs (iah-E08d): see docs/cred-gate.md for the catalog of detected
|
|
48
|
+
# shapes, the spillover heuristics, the false-positive posture, and remediation.
|
|
49
|
+
|
|
50
|
+
set -euo pipefail
|
|
51
|
+
|
|
52
|
+
# Bash version floor: align with the rest of the harness (jcgw).
|
|
53
|
+
[ "${BASH_VERSINFO:-0}" -ge 4 ] || { echo 'audit-harness requires bash >= 4' >&2; exit 2; }
|
|
54
|
+
|
|
55
|
+
INPUT="-"
|
|
56
|
+
EMIT_JSON=0
|
|
57
|
+
GATE_ID="provider-cred-gate"
|
|
58
|
+
SECRET_ENVS=()
|
|
59
|
+
|
|
60
|
+
while [[ $# -gt 0 ]]; do
|
|
61
|
+
case "$1" in
|
|
62
|
+
--input) INPUT="$2"; shift 2 ;;
|
|
63
|
+
--secret-env) SECRET_ENVS+=("$2"); shift 2 ;;
|
|
64
|
+
--json) EMIT_JSON=1; shift ;;
|
|
65
|
+
--gate-id) GATE_ID="$2"; shift 2 ;;
|
|
66
|
+
--help|-h) sed -n '2,46p' "$0"; exit 0 ;;
|
|
67
|
+
*) echo "cred-gate: unknown flag $1" >&2; exit 2 ;;
|
|
68
|
+
esac
|
|
69
|
+
done
|
|
70
|
+
|
|
71
|
+
# --- Read the candidate artifact ---
|
|
72
|
+
if [[ "$INPUT" == "-" ]]; then
|
|
73
|
+
CANDIDATE=$(cat)
|
|
74
|
+
else
|
|
75
|
+
if [[ ! -r "$INPUT" ]]; then
|
|
76
|
+
echo "cred-gate: cannot read $INPUT" >&2
|
|
77
|
+
exit 2
|
|
78
|
+
fi
|
|
79
|
+
CANDIDATE=$(cat "$INPUT")
|
|
80
|
+
fi
|
|
81
|
+
|
|
82
|
+
if [[ -z "$CANDIDATE" ]]; then
|
|
83
|
+
echo "cred-gate: empty candidate artifact" >&2
|
|
84
|
+
exit 2
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
# Resolve the gate input hash (sha256 of the candidate bytes) so the emitted
|
|
88
|
+
# envelope's input_hash is coherent with what was actually inspected.
|
|
89
|
+
INPUT_HASH="sha256:$(printf '%s' "$CANDIDATE" | sha256sum | cut -d' ' -f1)"
|
|
90
|
+
# The policy is this script's own bytes — a content address of the gate logic.
|
|
91
|
+
POLICY_HASH="sha256:$(sha256sum "$0" | cut -d' ' -f1)"
|
|
92
|
+
|
|
93
|
+
# --- Collect the secret VALUES to redaction-check (by env-var name) ---
|
|
94
|
+
# Built as a NUL-delimited blob so values with newlines/spaces stay intact and
|
|
95
|
+
# never touch argv.
|
|
96
|
+
SECRET_VALUES_BLOB=""
|
|
97
|
+
for name in "${SECRET_ENVS[@]:-}"; do
|
|
98
|
+
[[ -z "$name" ]] && continue
|
|
99
|
+
# Indirect expansion: read $name's value without it ever appearing on argv.
|
|
100
|
+
val="${!name:-}"
|
|
101
|
+
# Skip empty / trivially short values: a 1-char "secret" would false-positive
|
|
102
|
+
# on virtually any artifact and is not a real credential.
|
|
103
|
+
[[ ${#val} -lt 8 ]] && continue
|
|
104
|
+
SECRET_VALUES_BLOB+="$val"$'\0'
|
|
105
|
+
done
|
|
106
|
+
|
|
107
|
+
# --- Deterministic analysis in python (offline; values via env, not argv) ---
|
|
108
|
+
# We pass the candidate + the secret blob + the catalog knobs through the
|
|
109
|
+
# environment so no secret value is ever visible in `ps`.
|
|
110
|
+
RESULT=$(
|
|
111
|
+
CANDIDATE="$CANDIDATE" \
|
|
112
|
+
SECRET_VALUES_BLOB="$SECRET_VALUES_BLOB" \
|
|
113
|
+
GATE_ID="$GATE_ID" \
|
|
114
|
+
python3 - <<'PY'
|
|
115
|
+
import json
|
|
116
|
+
import os
|
|
117
|
+
import re
|
|
118
|
+
import sys
|
|
119
|
+
|
|
120
|
+
candidate = os.environ["CANDIDATE"]
|
|
121
|
+
|
|
122
|
+
findings = [] # list of {"kind": ..., "detail": ...}
|
|
123
|
+
|
|
124
|
+
# --- 1. Credential redaction: explicit secret VALUES must not appear verbatim ---
|
|
125
|
+
blob = os.environ.get("SECRET_VALUES_BLOB", "")
|
|
126
|
+
secret_values = [v for v in blob.split("\0") if v]
|
|
127
|
+
for val in secret_values:
|
|
128
|
+
if val in candidate:
|
|
129
|
+
# NEVER echo the secret. Report only its length + a non-reversible
|
|
130
|
+
# fingerprint so the finding is actionable without re-leaking.
|
|
131
|
+
import hashlib
|
|
132
|
+
|
|
133
|
+
fp = hashlib.sha256(val.encode("utf-8")).hexdigest()[:12]
|
|
134
|
+
findings.append(
|
|
135
|
+
{
|
|
136
|
+
"kind": "secret-value-leak",
|
|
137
|
+
"detail": (
|
|
138
|
+
"a declared secret value (len=%d, sha256:%s...) appears "
|
|
139
|
+
"verbatim in the candidate artifact" % (len(val), fp)
|
|
140
|
+
),
|
|
141
|
+
}
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# --- 2. Credential redaction: provider-key SHAPES (value-agnostic catalog) ---
|
|
145
|
+
# Each pattern matches the literal on-the-wire shape of a known provider key.
|
|
146
|
+
# A match means a raw key is embedded even if it was not declared via
|
|
147
|
+
# --secret-env. Patterns are intentionally specific to keep the FP rate low.
|
|
148
|
+
SHAPE_PATTERNS = [
|
|
149
|
+
("anthropic-key", r"sk-ant-[A-Za-z0-9_-]{20,}"),
|
|
150
|
+
# OpenAI keys start sk- but NOT sk-ant- (that's anthropic, matched above).
|
|
151
|
+
# The negative lookahead keeps the two findings disjoint.
|
|
152
|
+
("openai-key", r"sk-(?!ant-)(?:proj-)?[A-Za-z0-9_-]{20,}"),
|
|
153
|
+
("groq-key", r"gsk_[A-Za-z0-9]{20,}"),
|
|
154
|
+
("nvidia-key", r"nvapi-[A-Za-z0-9_-]{20,}"),
|
|
155
|
+
("aws-access-key-id", r"AKIA[0-9A-Z]{16}"),
|
|
156
|
+
("google-api-key", r"AIza[0-9A-Za-z_-]{35}"),
|
|
157
|
+
("github-token", r"gh[posru]_[A-Za-z0-9]{36,}"),
|
|
158
|
+
("slack-token", r"xox[baprs]-[A-Za-z0-9-]{10,}"),
|
|
159
|
+
("private-key-block", r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----"),
|
|
160
|
+
]
|
|
161
|
+
for name, pattern in SHAPE_PATTERNS:
|
|
162
|
+
if re.search(pattern, candidate):
|
|
163
|
+
findings.append(
|
|
164
|
+
{
|
|
165
|
+
"kind": "secret-shape-match",
|
|
166
|
+
"detail": "candidate contains a value matching the %s key shape"
|
|
167
|
+
% name,
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# --- 3. Env-var spillover: wholesale environment serialization ---
|
|
172
|
+
# A provider key need not be NAMED to leak — a blanket env dump spills every
|
|
173
|
+
# secret at once. We flag the structural patterns that serialize the whole
|
|
174
|
+
# environment into the artifact.
|
|
175
|
+
SPILLOVER_PATTERNS = [
|
|
176
|
+
("process-env-spread", r"\.\.\.\s*process\.env\b"),
|
|
177
|
+
("os-environ-dump", r"\bdict\(\s*os\.environ\s*\)|\bos\.environ\b\s*[,}\]]"),
|
|
178
|
+
("env-block-key", r'"(?:env|environ|environment)"\s*:\s*\{'),
|
|
179
|
+
("printenv-capture", r"\b(?:printenv|/usr/bin/env)\b"),
|
|
180
|
+
]
|
|
181
|
+
# These are heuristics: matching one is an ADVISORY-grade structural smell, but
|
|
182
|
+
# combined with an actual secret leak it is a hard FAIL. We treat any spillover
|
|
183
|
+
# match as a finding so the gate FAILs — an env dump in a to-be-signed artifact
|
|
184
|
+
# is exactly the irreversible leak this gate exists to stop.
|
|
185
|
+
for name, pattern in SPILLOVER_PATTERNS:
|
|
186
|
+
if re.search(pattern, candidate):
|
|
187
|
+
findings.append(
|
|
188
|
+
{
|
|
189
|
+
"kind": "env-spillover",
|
|
190
|
+
"detail": "candidate serializes the process environment via "
|
|
191
|
+
"the %s pattern" % name,
|
|
192
|
+
}
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
result = "FAIL" if findings else "PASS"
|
|
196
|
+
print(json.dumps({"result": result, "findings": findings}))
|
|
197
|
+
PY
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# --- Parse the python result ---
|
|
201
|
+
GATE_RESULT=$(printf '%s' "$RESULT" | python3 -c "import json,sys; print(json.load(sys.stdin)['result'])")
|
|
202
|
+
FINDINGS_JSON=$(printf '%s' "$RESULT" | python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin)['findings']))")
|
|
203
|
+
FINDING_COUNT=$(printf '%s' "$RESULT" | python3 -c "import json,sys; print(len(json.load(sys.stdin)['findings']))")
|
|
204
|
+
|
|
205
|
+
# --- Emit ---
|
|
206
|
+
if [[ "$EMIT_JSON" -eq 1 ]]; then
|
|
207
|
+
GATE_ID="$GATE_ID" GATE_RESULT="$GATE_RESULT" INPUT_HASH="$INPUT_HASH" \
|
|
208
|
+
POLICY_HASH="$POLICY_HASH" FINDINGS_JSON="$FINDINGS_JSON" \
|
|
209
|
+
python3 - <<'PY'
|
|
210
|
+
import json
|
|
211
|
+
import os
|
|
212
|
+
|
|
213
|
+
env = {
|
|
214
|
+
"gate_id": os.environ["GATE_ID"],
|
|
215
|
+
"result": os.environ["GATE_RESULT"],
|
|
216
|
+
"input_hash": os.environ["INPUT_HASH"],
|
|
217
|
+
"policy_hash": os.environ["POLICY_HASH"],
|
|
218
|
+
"metadata": {"findings": json.loads(os.environ["FINDINGS_JSON"])},
|
|
219
|
+
}
|
|
220
|
+
if env["result"] == "FAIL":
|
|
221
|
+
env["failure_mode"] = "provider_credential_leak"
|
|
222
|
+
print(json.dumps(env, separators=(",", ":")))
|
|
223
|
+
PY
|
|
224
|
+
else
|
|
225
|
+
if [[ "$GATE_RESULT" == "PASS" ]]; then
|
|
226
|
+
echo "cred-gate: PASS — no provider secret value present, no env-var spillover detected"
|
|
227
|
+
else
|
|
228
|
+
echo "cred-gate: FAIL — $FINDING_COUNT credential finding(s):" >&2
|
|
229
|
+
printf '%s' "$FINDINGS_JSON" | python3 -c "
|
|
230
|
+
import json, sys
|
|
231
|
+
for f in json.load(sys.stdin):
|
|
232
|
+
sys.stderr.write(' ⛔ [%s] %s\n' % (f['kind'], f['detail']))
|
|
233
|
+
"
|
|
234
|
+
echo "cred-gate: see docs/cred-gate.md for remediation (iah-E08d)." >&2
|
|
235
|
+
fi
|
|
236
|
+
fi
|
|
237
|
+
|
|
238
|
+
[[ "$GATE_RESULT" == "PASS" ]] && exit 0 || exit 1
|