npm - altimate-receipts - Versions diffs - 0.9.0 → 0.10.0 - Mend

altimate-receipts 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/{chunk-TUWJRD7H.js → chunk-63E3RZHD.js} +276 -150
package/dist/chunk-63E3RZHD.js.map +1 -0
package/dist/{chunk-4S2ABMUN.js → chunk-JE6HSACL.js} +2 -2
package/dist/{chunk-WNGBYBM3.js → chunk-KM6VCSVW.js} +2 -2
package/dist/chunk-KM6VCSVW.js.map +1 -0
package/dist/cli.js +464 -70
package/dist/cli.js.map +1 -1
package/dist/index.js +2 -2
package/dist/mcp/server.js +2 -2
package/package.json +4 -1
package/dist/chunk-TUWJRD7H.js.map +0 -1
package/dist/chunk-WNGBYBM3.js.map +0 -1
/package/dist/{chunk-4S2ABMUN.js.map → chunk-JE6HSACL.js.map} +0 -0

package/dist/{chunk-TUWJRD7H.js → chunk-63E3RZHD.js} RENAMED Viewed

@@ -177,8 +177,8 @@ function shellExecutorArg(clause) {
   }
   return void 0;
 }
-function cwdAtFirstGit(command, base4) {
-  let cur = base4;
+function cwdAtFirstGit(command, base5) {
+  let cur = base5;
   let moved = false;
   let unknown = false;
   for (const clause of splitClauses(stripHeredocs(command))) {
@@ -1061,8 +1061,8 @@ function isInScope(path2, promptLc, readSet) {
   if (promptLc.includes(lc)) {
     return true;
   }
-  const base4 = lc.split("/").pop();
-  if (base4 && base4.length > 2 && promptLc.includes(base4)) {
+  const base5 = lc.split("/").pop();
+  if (base5 && base5.length > 2 && promptLc.includes(base5)) {
     return true;
   }
   const dirs = lc.split("/").filter((d) => d.length > 2);
@@ -1302,6 +1302,59 @@ function deriveSpans(session) {
   };
 }
+// src/findings/auditFindings.ts
+var TEST_PATH = /(?:^|\/)(?:tests?|specs?|__tests__)(?:\/|$)|\.(?:test|spec)\.[a-z]+$|_test\.[a-z]+$|(?:^|\/)test_[^/]+\.[a-z]+$/i;
+var TEST_DEF = /\bdef\s+test_\w|\b(?:it|test)\s*\(\s*["'`]|\bdescribe\s*\(\s*["'`]|@Test\b|\bfunc\s+Test[A-Z]\w*\s*\(|\b(?:it|test)\.each\b/;
+var TEST_RUNNER = /\b(pytest|jest|vitest|mocha|go test|cargo test|npm (?:run )?test|yarn test|pnpm test|tsc\b|eslint|ruff|mypy|flake8|rspec|phpunit|dbt (?:test|build))\b/i;
+var CLAIMS_TESTED_OR_DONE = /\b(ran|run|verified|tested|all tests? (?:now )?pass(?:ing|ed)?|tests? (?:now )?pass(?:ing|ed)?|added (?:unit |integration )?tests?|test coverage|done|fixed|complete(?:d)?)\b/i;
+var base = (p) => p.split("/").pop() || p;
+function deriveAuditFindings(sum) {
+  const out = [];
+  const ordered = [...sum.spans].filter((s) => s.kind !== "session").sort((a, b) => a.startTime - b.startTime || a.spanId.localeCompare(b.spanId));
+  const gens = ordered.filter((s) => s.kind === "generation");
+  const finalText = gens[gens.length - 1]?.input || "";
+  const testEdits = ordered.filter((s) => {
+    if (s.kind !== "tool" || !(isEditTool(s.name) || isCreateTool(s.name))) {
+      return false;
+    }
+    const fp = filePathOf(s.input);
+    if (!fp || !TEST_PATH.test(fp)) {
+      return false;
+    }
+    const { oldStr, newStr } = editBody(s.input);
+    return TEST_DEF.test(newStr) && !TEST_DEF.test(oldStr);
+  });
+  if (testEdits.length === 0) {
+    return out;
+  }
+  if (!CLAIMS_TESTED_OR_DONE.test(finalText)) {
+    return out;
+  }
+  const tLastTestEdit = Math.max(...testEdits.map((e) => e.startTime));
+  const ranAfter = ordered.some(
+    (s) => isCommandTool(s.name) && s.startTime > tLastTestEdit && s.status === "ok" && TEST_RUNNER.test(commandOf(s.input))
+  );
+  if (ranAfter) {
+    return out;
+  }
+  const latest = testEdits.reduce((a, b) => b.startTime >= a.startTime ? b : a);
+  const files = [...new Set(testEdits.map((e) => filePathOf(e.input)).filter(Boolean))];
+  const list = files.map((f) => `\`${f}\``).join(", ");
+  out.push({
+    id: `untested-test-${latest.spanId}`,
+    severity: "high",
+    title: `Added tests but never ran them: ${base(files[0] ?? "")}`,
+    detail: `The session added a test (${list}) and claimed the work is tested/done, but no test run finished green after the test was written. The agent's own new tests were never executed \u2014 confirm they compile and actually pass before merging.`,
+    impactLabel: "unexecuted tests",
+    confidence: 0.75,
+    score: 100 * 0.85 * 0.75,
+    evidenceSpanId: latest.spanId,
+    filePath: files[0],
+    guardrailRule: "After adding or changing a test, run the suite and confirm it passes before claiming 'tests pass' \u2014 an unexecuted test proves nothing."
+  });
+  return out;
+}
 // src/findings/bypassFindings.ts
 var TEST_FILE = /(?:^|\/)(?:test_[^/]+\.[a-z0-9]+|[^/]+_test\.[a-z0-9]+|[^/]+\.(?:spec|test)\.[a-z0-9]+|conftest\.py)$/i;
 var TEST_DIR = /(?:^|\/)(?:tests?|__tests__|specs?|e2e|testing)\//i;
@@ -1310,7 +1363,7 @@ var TEST_FOCUS = /\b(?:it|describe|test|context)\.only\s*\(|\bfdescribe\s*\(|\bf
 var CONFIG_FILE = /(?:^|\/)(?:tsconfig[^/]*\.json|\.eslintrc[^/]*|eslint\.config\.[a-z]+|\.flake8|setup\.cfg|pyproject\.toml|jest\.config\.[a-z]+|vitest\.config\.[a-z]+|\.pre-commit-config\.ya?ml)$/i;
 var CONFIG_WEAKEN = /"strict"\s*:\s*false|"noImplicitAny"\s*:\s*false|"strictNullChecks"\s*:\s*false|"skipLibCheck"\s*:\s*true|:\s*["']off["']|coverageThreshold|--passWithNoTests/i;
 var CICD_FILE = /(?:^|\/)(?:\.github\/workflows\/[^/]+\.ya?ml|\.gitlab-ci\.yml|Jenkinsfile(?:\.[\w.]+)?|\.circleci\/config\.yml|azure-pipelines\.yml|bitbucket-pipelines\.yml|\.drone\.yml)$/i;
-function base(p) {
+function base2(p) {
   return p.split("/").pop() || p;
 }
 function deriveBypassFindings(sum) {
@@ -1364,8 +1417,8 @@ function deriveBypassFindings(sum) {
         focus = {
           id: `test-focus-${s.spanId}`,
           severity: "high",
-          title: `Focused a single test in ${base(fp)}`,
-          detail: `An edit added \`.only\`/\`fdescribe\`/\`fit\` to \`${base(fp)}\`, which makes the runner execute only that test and silently skip every other test in the suite \u2014 a green run that proves almost nothing.`,
+          title: `Focused a single test in ${base2(fp)}`,
+          detail: `An edit added \`.only\`/\`fdescribe\`/\`fit\` to \`${base2(fp)}\`, which makes the runner execute only that test and silently skip every other test in the suite \u2014 a green run that proves almost nothing.`,
           impactLabel: "suite skipped",
           confidence: 0.85,
           score: 100 * 0.9 * 0.85,
@@ -1378,8 +1431,8 @@ function deriveBypassFindings(sum) {
         configWeaken = {
           id: `config-weaken-${s.spanId}`,
           severity: "high",
-          title: `Weakened the checker config: ${base(fp)}`,
-          detail: `An edit to \`${base(fp)}\` relaxed a static check (e.g. disabled a strict flag, turned a lint rule off, or lowered a coverage threshold). Loosening the checker to get a green run hides the problems it was there to catch.`,
+          title: `Weakened the checker config: ${base2(fp)}`,
+          detail: `An edit to \`${base2(fp)}\` relaxed a static check (e.g. disabled a strict flag, turned a lint rule off, or lowered a coverage threshold). Loosening the checker to get a green run hides the problems it was there to catch.`,
           impactLabel: "checker defanged",
           confidence: 0.8,
           score: 100 * 0.9 * 0.8,
@@ -1392,7 +1445,7 @@ function deriveBypassFindings(sum) {
         ciCdTouch = {
           id: `ci-cd-touch-${s.spanId}`,
           severity: "high",
-          title: `Edited a CI/CD pipeline file: ${base(fp)}`,
+          title: `Edited a CI/CD pipeline file: ${base2(fp)}`,
           detail: `\`${fp}\` is a CI/CD pipeline file \u2014 it runs with repository secrets and write tokens, and an edit can exfiltrate credentials, add a malicious step, or weaken a required check. Review this change with that privilege in mind.`,
           impactLabel: "pipeline edit",
           confidence: 0.7,
@@ -1412,8 +1465,75 @@ function deriveBypassFindings(sum) {
   return out;
 }
+// src/findings/compositeFindings.ts
+var COMPLETION = /\b(all tests? (?:now )?pass(?:ing|ed)?|tests? (?:now )?pass(?:ing|ed)?|all green|everything passes|works now|done|fixed|resolved)\b/i;
+var HEDGE = /\b(should|probably|might|maybe|may|i think|hopefully|in theory|ought to|expects? to|expected to|seems? to|presumably)\b/i;
+var TEST_RUNNER2 = /\b(pytest|jest|vitest|mocha|go test|cargo test|npm (?:run )?test|yarn test|pnpm test|tsc\b|eslint|ruff|mypy|flake8|rspec|phpunit|dbt (?:test|build))\b/i;
+var OBSOLETE_MARKER = /\b(obsolete|deprecat(?:ed|e)|no longer (?:applies|needed|valid|relevant)|dead test|legacy test|replaced by|removing (?:the )?(?:old|stale|obsolete) test)\b/i;
+function claimsDoneUnqualified(text) {
+  for (const sentence of text.split(/[.!?\n]+/)) {
+    if (COMPLETION.test(sentence) && !HEDGE.test(sentence)) {
+      return true;
+    }
+  }
+  return false;
+}
+function turnTextFor(edit, spans, finalText) {
+  if (edit.parentSpanId) {
+    const gen = spans.find((s) => s.spanId === edit.parentSpanId && s.kind === "generation");
+    if (gen && typeof gen.input === "string") {
+      return `${gen.input}
+${finalText}`;
+    }
+  }
+  return finalText;
+}
+function deriveCompositeFindings(sum, prior) {
+  const out = [];
+  const ordered = sum.spans.filter((s) => s.kind !== "session").sort((a, b) => a.startTime - b.startTime || a.spanId.localeCompare(b.spanId));
+  const gens = ordered.filter((s) => s.kind === "generation");
+  const finalText = gens[gens.length - 1]?.input || "";
+  const spanById = new Map(sum.spans.map((s) => [s.spanId, s]));
+  const suppress = prior.filter((f) => f.id.startsWith("test-skipped-") || f.id.startsWith("test-trivialised-")).map((f) => ({
+    finding: f,
+    span: f.evidenceSpanId ? spanById.get(f.evidenceSpanId) : void 0
+  })).filter((e) => !!e.span && !!e.finding.filePath).filter((e) => !OBSOLETE_MARKER.test(turnTextFor(e.span, sum.spans, finalText)));
+  if (suppress.length === 0) {
+    return out;
+  }
+  const tLastSuppress = Math.max(...suppress.map((e) => e.span.startTime));
+  const reGreened = ordered.some(
+    (s) => isCommandTool(s.name) && s.startTime > tLastSuppress && s.status === "ok" && TEST_RUNNER2.test(commandOf(s.input))
+  );
+  if (reGreened) {
+    return out;
+  }
+  if (!claimsDoneUnqualified(finalText)) {
+    return out;
+  }
+  const latest = suppress.reduce((a, b) => b.span.startTime >= a.span.startTime ? b : a);
+  const cited = suppress.map((e) => `\`${e.finding.filePath}\``).filter((v, i, arr) => arr.indexOf(v) === i);
+  const list = cited.join(", ");
+  out.push({
+    id: `green-by-suppression-${latest.span.spanId}`,
+    severity: "high",
+    title: `Claimed green over a suppressed test: ${baseName(latest.finding.filePath ?? "")}`,
+    detail: `The session claimed success ("tests pass / done") after an edit silenced a test (${list}), and no test run finished green after that suppression. The green being trusted was not re-established once the test was disabled \u2014 confirm the suppressed test still holds before merging.`,
+    impactLabel: "unverified green",
+    confidence: 0.8,
+    score: 100 * 0.9 * 0.8,
+    evidenceSpanId: latest.span.spanId,
+    filePath: latest.finding.filePath,
+    guardrailRule: "Never claim 'tests pass' after skipping or trivialising a test without a green run afterward; re-run the suite green once the test is fixed, or flag the suppression explicitly."
+  });
+  return out;
+}
+function baseName(p) {
+  return p.split("/").pop() || p;
+}
 // src/findings/correctnessFindings.ts
-var TEST_RUNNER = /\b(pytest|jest|vitest|mocha|go test|cargo test|npm (run )?test|yarn test|pnpm test|tsc\b|eslint|ruff|mypy|flake8|rspec|phpunit|dbt (test|build))\b/i;
+var TEST_RUNNER3 = /\b(pytest|jest|vitest|mocha|go test|cargo test|npm (run )?test|yarn test|pnpm test|tsc\b|eslint|ruff|mypy|flake8|rspec|phpunit|dbt (test|build))\b/i;
 var CLAIMS_DONE = /\b(done|fixed|passing|tests? (now )?pass|complete(d)?|all set|works now|should work|resolved)\b/i;
 var CLAIMS_TESTED = /\b(ran|run|verified|tested|passing|tests? pass)\b/i;
 var TEST_FAIL_SUMMARY = [
@@ -1437,14 +1557,14 @@ var WRITES_CONTENT = /\bgh\s+(?:issue|pr)\s+comment\b|\bgh\s+release\b|\bgit\s+c
 var CONTEXT_FILE = /(?:^|\/)(?:MEMORY|CLAUDE|AGENTS|GEMINI|\.cursorrules|\.windsurfrules)(?:\.md)?$/i;
 var PLACEHOLDER = /your[-_]?(?:api[-_]?)?(?:key|token|secret)|xxx+|placeholder|example|redacted|changeme|dummy|sample|<[^>]+>|\$\{|process\.env|os\.environ|getenv/i;
 var FAKE_TOKEN = /1234567890|0123456789|abcdef0123|deadbeef|0{8,}|(?:ab){4,}/i;
-var TEST_PATH = /(?:^|\/)(?:test_[^/]+\.[a-z0-9]+|[^/]+_test\.[a-z0-9]+|[^/]+\.(?:spec|test)\.[a-z0-9]+|conftest\.py)$|(?:^|\/)(?:tests?|__tests__|specs?|e2e|fixtures?|mocks?)\//i;
+var TEST_PATH2 = /(?:^|\/)(?:test_[^/]+\.[a-z0-9]+|[^/]+_test\.[a-z0-9]+|[^/]+\.(?:spec|test)\.[a-z0-9]+|conftest\.py)$|(?:^|\/)(?:tests?|__tests__|specs?|e2e|fixtures?|mocks?)\//i;
 var READ_CMD = /\b(cat|head|tail|less|more|bat|nl|sed|awk|grep|rg|xxd|od|view|strings)\b/;
 var CODE_FILE = /\.(?:py|js|jsx|ts|tsx|go|rs|rb|java|kt|c|cc|cpp|h|hpp|cs|php|swift|scala|sql|sh|bash|vue|svelte)$/i;
 var LOCKFILE = /(?:^|\/)(?:package-lock\.json|npm-shrinkwrap\.json|yarn\.lock|pnpm-lock\.ya?ml|Cargo\.lock|go\.sum|poetry\.lock|Pipfile\.lock|Gemfile\.lock|composer\.lock|flake\.lock|bun\.lockb)$/i;
 var INSTALL_CMD = /\b(npm (ci|i|install|update|dedupe)|yarn(\s+(install|add|upgrade|up))?|pnpm (i|install|add|update|up|dedupe)|bun (install|add|i)|cargo (build|update|add|fetch|generate-lockfile|install)|poetry (lock|install|add|update)|pipenv (lock|install)|bundle (install|update|lock)|composer (install|update|require)|go (mod|get|build|install)|nix flake (lock|update))\b/i;
 var PROMISE = /\b(?:I'?ll|I will|I'm going to|going to|let me|next,?\s*I'?ll|then\s+I'?ll|I\s+(?:also\s+)?need to|we (?:should|need to))\s+(?:also\s+)?(?:update|edit|modify|fix|change|add|refactor|rewrite|remove|delete|create|implement|patch|adjust|wire up|hook up)\b/gi;
 var PATH_TOKEN = /(?:[\w.@/-]+\/)?[\w.-]+\.(?:tsx?|jsx?|py|go|rs|rb|java|kt|sql|sh|ya?ml|json|toml|md|c|cc|cpp|h|css|html|vue|svelte|php|swift|scala)\b/g;
-function base2(p) {
+function base3(p) {
   return p.split("/").pop() || p;
 }
 function readRange(input) {
@@ -1471,7 +1591,7 @@ function deriveCorrectnessFindings(sum) {
   const claimsDone = CLAIMS_DONE.test(finalText);
   const editSpans = tools.filter((s) => isEditTool(s.name));
   const bashSpans = tools.filter((s) => isCommandTool(s.name));
-  const ranTests = bashSpans.some((s) => TEST_RUNNER.test(commandOf(s.input)));
+  const ranTests = bashSpans.some((s) => TEST_RUNNER3.test(commandOf(s.input)));
   const readCmds = bashSpans.map((s) => ({ t: s.startTime, cmd: commandOf(s.input) })).filter((c) => READ_CMD.test(c.cmd));
   const readViaShellBefore = (fp, t) => {
     const bn = fp.split("/").pop() ?? fp;
@@ -1494,7 +1614,7 @@ function deriveCorrectnessFindings(sum) {
   }
   if (blindEdit) {
     const p = blindEdit.path;
-    const bn = base2(p);
+    const bn = base3(p);
     const editsN = tools.filter((s) => isEditTool(s.name) && filePathOf(s.input) === p).length;
     const readSpans = tools.filter((s) => isReadTool(s.name) && filePathOf(s.input) === p);
     const shellReadsN = readCmds.filter((c) => bn.length >= 3 && c.cmd.includes(bn)).length;
@@ -1615,7 +1735,7 @@ function deriveCorrectnessFindings(sum) {
   }
   for (const s of editSpans) {
     const fp = filePathOf(s.input);
-    if (fp && TEST_PATH.test(fp)) {
+    if (fp && TEST_PATH2.test(fp)) {
       continue;
     }
     const { newStr } = editBody(s.input);
@@ -1625,7 +1745,7 @@ function deriveCorrectnessFindings(sum) {
       out.push({
         id: `secret-in-file-${s.spanId}`,
         severity: "high",
-        title: fp ? `A secret was written into ${base2(fp)}` : "A secret was written into a file",
+        title: fp ? `A secret was written into ${base3(fp)}` : "A secret was written into a file",
         detail: `An edit inlined what looks like a live credential (API key / token / private key) into ${fp ? `\`${fp}\`` : "a file"}. Committed secrets leak \u2014 move it to an environment variable or secret store and rotate the key.`,
         impactLabel: "secret leak",
         confidence: 0.8,
@@ -1650,7 +1770,7 @@ function deriveCorrectnessFindings(sum) {
       evidenceSpanId: editSpans[editSpans.length - 1].spanId
     });
   }
-  const testRuns = bashSpans.filter((s) => TEST_RUNNER.test(commandOf(s.input)));
+  const testRuns = bashSpans.filter((s) => TEST_RUNNER3.test(commandOf(s.input)));
   const lastRun = testRuns[testRuns.length - 1];
   if (lastRun) {
     const runOut = typeof lastRun.output === "string" ? lastRun.output : "";
@@ -1684,7 +1804,7 @@ function deriveCorrectnessFindings(sum) {
     out.push({
       id: "lockfile-edit",
       severity: "high",
-      title: `Hand-edited a lockfile with no install command: ${base2(fp)}`,
+      title: `Hand-edited a lockfile with no install command: ${base3(fp)}`,
       detail: `\`${fp}\` was edited this session, but no package-manager install/resolve command (npm/yarn/pnpm/cargo/poetry/\u2026) ran. Hand-editing a lockfile rather than regenerating it can swap an integrity hash or re-point a dependency \u2014 review the change closely.`,
       impactLabel: "manual lockfile edit",
       confidence: 0.7,
@@ -1744,7 +1864,7 @@ function deriveCorrectnessFindings(sum) {
     out.push({
       id: "unfulfilled-promise",
       severity: "medium",
-      title: `Said it would change ${base2(promised.path)}, but never did`,
+      title: `Said it would change ${base3(promised.path)}, but never did`,
       detail: `The session's text said it would update \`${promised.path}\`, but that file was never edited this session. A step the agent committed to may have been silently dropped \u2014 confirm it wasn't needed.`,
       impactLabel: "dropped step",
       confidence: 0.6,
@@ -1857,7 +1977,7 @@ function stripDupSuffix(path2) {
   const ext = dot > 0 ? file.slice(dot) : "";
   return dir + stem.replace(DUP_SUFFIX, "") + ext;
 }
-function base3(p) {
+function base4(p) {
   return p.split("/").pop() || p;
 }
 function normWs(s) {
@@ -1960,7 +2080,7 @@ function deriveEditScanFindings(sum) {
     if (authoredThisSession.has(fp)) {
       return true;
     }
-    const b = base3(fp).toLowerCase();
+    const b = base4(fp).toLowerCase();
     if (b.length > 3 && promptLc.includes(b)) {
       return true;
     }
@@ -1984,7 +2104,7 @@ function deriveEditScanFindings(sum) {
       graderEdit = {
         id: `grader-edit-${s.spanId}`,
         severity: "critical",
-        title: `Modified the grader/harness: ${base3(fp)}`,
+        title: `Modified the grader/harness: ${base4(fp)}`,
         detail: `\`${fp}\` looks like the scoring/evaluation harness, and it was edited during the task. Changing the thing that judges the work \u2014 rather than the work itself \u2014 is the clearest reward-hacking signal. Review this edit closely.`,
         impactLabel: "gamed the eval",
         confidence: 0.85,
@@ -2003,8 +2123,8 @@ function deriveEditScanFindings(sum) {
       trivialised = {
         id: `test-trivialised-${s.spanId}`,
         severity: testTamperSeverity,
-        title: `Made a test trivially pass: ${base3(fp)}`,
-        detail: `An edit to \`${base3(fp)}\` introduced a tautological assertion (e.g. \`assert True\`). A test that can't fail provides no protection \u2014 confirm the real behaviour is still being checked.`,
+        title: `Made a test trivially pass: ${base4(fp)}`,
+        detail: `An edit to \`${base4(fp)}\` introduced a tautological assertion (e.g. \`assert True\`). A test that can't fail provides no protection \u2014 confirm the real behaviour is still being checked.`,
         impactLabel: "fake green",
         confidence: 0.85,
         score: testTamperScore * 0.85,
@@ -2017,8 +2137,8 @@ function deriveEditScanFindings(sum) {
       skipped = {
         id: `test-skipped-${s.spanId}`,
         severity: "high",
-        title: `Skipped or disabled a test: ${base3(fp)}`,
-        detail: `An edit added a skip/ignore marker (e.g. \`@pytest.mark.skip\`, \`.skip(\`, \`@ts-ignore\`) to \`${base3(fp)}\`. Silencing a failing test makes the run green without fixing the underlying problem.`,
+        title: `Skipped or disabled a test: ${base4(fp)}`,
+        detail: `An edit added a skip/ignore marker (e.g. \`@pytest.mark.skip\`, \`.skip(\`, \`@ts-ignore\`) to \`${base4(fp)}\`. Silencing a failing test makes the run green without fixing the underlying problem.`,
         impactLabel: "test silenced",
         confidence: 0.8,
         score: 100 * 0.9 * 0.8,
@@ -2060,8 +2180,8 @@ function deriveEditScanFindings(sum) {
           reversion = {
             id: `edit-reversion-${b.span.spanId}`,
             severity: "high",
-            title: `Reverted its own edit in ${base3(fp)}`,
-            detail: `The agent changed a region of \`${base3(fp)}\` and then later put it back (A\u2192B\u2192A). Oscillating on the same code is a sign of "coherence collapse" \u2014 it reached a state, then thrashed it \u2014 so the final version may not be its best attempt.`,
+            title: `Reverted its own edit in ${base4(fp)}`,
+            detail: `The agent changed a region of \`${base4(fp)}\` and then later put it back (A\u2192B\u2192A). Oscillating on the same code is a sign of "coherence collapse" \u2014 it reached a state, then thrashed it \u2014 so the final version may not be its best attempt.`,
             impactLabel: "thrash / rework",
             confidence: 0.8,
             score: 100 * 0.9 * 0.8,
@@ -2087,8 +2207,8 @@ function deriveEditScanFindings(sum) {
       swallowed = {
         id: `error-swallowed-${s.spanId}`,
         severity: "medium",
-        title: `Silently swallowed an error in ${base3(fp)}`,
-        detail: `An edit to \`${base3(fp)}\` added a bare \`except: pass\` / empty \`catch {}\` that discards the failure instead of handling it. If this was to get past an error rather than fix it, the underlying problem is now hidden.`,
+        title: `Silently swallowed an error in ${base4(fp)}`,
+        detail: `An edit to \`${base4(fp)}\` added a bare \`except: pass\` / empty \`catch {}\` that discards the failure instead of handling it. If this was to get past an error rather than fix it, the underlying problem is now hidden.`,
         impactLabel: "error hidden",
         // soft code-smell (empty catches are often legitimate) — minor section
         confidence: 0.45,
@@ -2102,8 +2222,8 @@ function deriveEditScanFindings(sum) {
       evalOverride = {
         id: `eval-override-${s.spanId}`,
         severity: "critical",
-        title: `Overrode what judges the work in ${base3(fp)}`,
-        detail: `An edit to \`${base3(fp)}\` introduced an always-pass override \u2014 e.g. an \`__eq__\` that returns True, a patched scorer/verifier, a frozen clock, or an unconditional PASS. That defeats the check instead of satisfying it.`,
+        title: `Overrode what judges the work in ${base4(fp)}`,
+        detail: `An edit to \`${base4(fp)}\` introduced an always-pass override \u2014 e.g. an \`__eq__\` that returns True, a patched scorer/verifier, a frozen clock, or an unconditional PASS. That defeats the check instead of satisfying it.`,
         impactLabel: "gamed the eval",
         confidence: 0.85,
         score: 1e3 * 0.85,
@@ -2118,8 +2238,8 @@ function deriveEditScanFindings(sum) {
       stubbed = {
         id: `impl-stubbed-${s.spanId}`,
         severity: "high",
-        title: `Replaced real code with a stub in ${base3(fp)}`,
-        detail: `An edit swapped a working implementation in \`${base3(fp)}\` for a placeholder (\`NotImplementedError\` / \`todo!()\` / "not implemented"). If the task was to implement this, a stub that compiles isn't a solution.`,
+        title: `Replaced real code with a stub in ${base4(fp)}`,
+        detail: `An edit swapped a working implementation in \`${base4(fp)}\` for a placeholder (\`NotImplementedError\` / \`todo!()\` / "not implemented"). If the task was to implement this, a stub that compiles isn't a solution.`,
         impactLabel: "stubbed out",
         confidence: 0.75,
         score: 100 * 0.9 * 0.75,
@@ -2133,8 +2253,8 @@ function deriveEditScanFindings(sum) {
       shrunk = {
         id: `file-shrink-${s.spanId}`,
         severity: declared ? "low" : "medium",
-        title: `Large deletion in ${base3(fp)} \u2014 ${oldLines}\u2192${lineCount(newStr)} lines`,
-        detail: declared ? `One edit removed most of a ${oldLines}-line region of \`${base3(fp)}\`, leaving ${lineCount(newStr)} lines. The task asked to delete/refactor, so this was likely intended \u2014 confirm nothing extra was dropped.` : `One edit removed most of a ${oldLines}-line region of \`${base3(fp)}\`, leaving ${lineCount(newStr)} lines, with no stated delete/refactor intent. Silent large deletions are a common way agents drop error handling or safety checks \u2014 confirm nothing important was lost.`,
+        title: `Large deletion in ${base4(fp)} \u2014 ${oldLines}\u2192${lineCount(newStr)} lines`,
+        detail: declared ? `One edit removed most of a ${oldLines}-line region of \`${base4(fp)}\`, leaving ${lineCount(newStr)} lines. The task asked to delete/refactor, so this was likely intended \u2014 confirm nothing extra was dropped.` : `One edit removed most of a ${oldLines}-line region of \`${base4(fp)}\`, leaving ${lineCount(newStr)} lines, with no stated delete/refactor intent. Silent large deletions are a common way agents drop error handling or safety checks \u2014 confirm nothing important was lost.`,
         impactLabel: "content loss risk",
         confidence: declared ? 0.5 : 0.6,
         score: declared ? 1 * 0.9 * 0.5 : 10 * 0.9 * 0.6,
@@ -2169,8 +2289,8 @@ function deriveEditScanFindings(sum) {
       duplicate = {
         id: `dup-file-${s.spanId}`,
         severity: "medium",
-        title: `Created a near-duplicate file: ${base3(fp)}`,
-        detail: `\`${base3(fp)}\` looks like a copy of an existing \`${base3(stripped)}\` the session already had open. Agents that create \`*2\`/\`_copy\`/\`_new\` files instead of editing the original leave divergent duplicates and dead code \u2014 confirm this was intended.`,
+        title: `Created a near-duplicate file: ${base4(fp)}`,
+        detail: `\`${base4(fp)}\` looks like a copy of an existing \`${base4(stripped)}\` the session already had open. Agents that create \`*2\`/\`_copy\`/\`_new\` files instead of editing the original leave divergent duplicates and dead code \u2014 confirm this was intended.`,
         impactLabel: "duplicate / dead code",
         confidence: 0.6,
         score: 10 * 0.9 * 0.6,
@@ -2202,8 +2322,8 @@ function deriveEditScanFindings(sum) {
         dupCode = {
           id: `duplicated-code-${s.spanId}`,
           severity: "medium",
-          title: `Added ${MIN_LINES}+ near-identical lines across files (possible copy-paste): ${base3(fp)}`,
-          detail: `A block of ${MIN_LINES}+ lines this session is near-identical (after renaming) to another block the agent added in \`${base3(firstFile)}\`. Duplication is sometimes intended \u2014 consider extracting a shared helper.`,
+          title: `Added ${MIN_LINES}+ near-identical lines across files (possible copy-paste): ${base4(fp)}`,
+          detail: `A block of ${MIN_LINES}+ lines this session is near-identical (after renaming) to another block the agent added in \`${base4(firstFile)}\`. Duplication is sometimes intended \u2014 consider extracting a shared helper.`,
           impactLabel: "copy-paste",
           confidence: 0.6,
           score: 10 * 0.9 * 0.6,
@@ -2253,7 +2373,7 @@ function deriveEditScanFindings(sum) {
     out.push({
       id: `malformed-artifact-${s.spanId}`,
       severity: "high",
-      title: `Wrote invalid ${ext}: ${base3(fp)}`,
+      title: `Wrote invalid ${ext}: ${base4(fp)}`,
       detail: `\`${fp}\` was written but does not parse as ${ext} (line ${r.line}: ${r.msg}). A broken config breaks the build downstream.`,
       impactLabel: "broken artifact",
       confidence: 0.85,
@@ -2277,7 +2397,7 @@ function deriveEditScanFindings(sum) {
     out.push({
       id: `trojan-source-${s.spanId}`,
       severity: "high",
-      title: `Hidden Unicode in source: ${base3(fp)}`,
+      title: `Hidden Unicode in source: ${base4(fp)}`,
       detail: `The edit to \`${fp}\` contains a ${hit.label} code point ${u(hit.cp)} at line ${hit.line}:${hit.col} \u2014 invisible in review, it can hide or reorder code (Trojan Source, CVE-2021-42574).`,
       impactLabel: "hidden unicode",
       confidence: 0.85,
@@ -2683,6 +2803,10 @@ function deriveFindings(sum) {
   }
   findings.push(...deriveCorrectnessFindings(sum));
   findings.push(...deriveEditScanFindings(sum));
+  findings.push(...deriveCompositeFindings(sum, findings));
+  if (process.env.RECEIPTS_EXPERIMENTAL_DETECTORS === "1") {
+    findings.push(...deriveAuditFindings(sum));
+  }
   findings.push(...deriveToolUseFindings(sum));
   findings.push(...deriveBypassFindings(sum));
   findings.push(...deriveInjectionFindings(sum));
@@ -2767,15 +2891,17 @@ var PRIVILEGED_PREFIXES = [
   "test-focus",
   "test-skipped",
   "test-trivialised",
+  "green-by-suppression",
+  "untested-test",
   "history-rewrite",
   "force-push"
 ];
-var TEST_PATH2 = /(?:^|\/)(?:tests?|specs?|__tests__)(?:\/|$)|\.(?:test|spec)\.|_test\./;
+var TEST_PATH3 = /(?:^|\/)(?:tests?|specs?|__tests__)(?:\/|$)|\.(?:test|spec)\.|_test\./;
 function privileged(id, filePath) {
   if (PRIVILEGED_PREFIXES.some((p) => id.startsWith(p))) {
     return true;
   }
-  return id.startsWith("file-shrink") && !!filePath && TEST_PATH2.test(filePath);
+  return id.startsWith("file-shrink") && !!filePath && TEST_PATH3.test(filePath);
 }
 function findingSurface(id) {
   if (OPERATOR_KINDS.has(id) || id.startsWith("errcluster-")) {
@@ -2824,22 +2950,22 @@ function changedFiles(baseOverride, opts) {
   if (!root) {
     return null;
   }
-  let base4 = baseOverride;
-  if (!base4) {
+  let base5 = baseOverride;
+  if (!base5) {
     const sym = git(["symbolic-ref", "refs/remotes/origin/HEAD"], root)?.trim();
     if (sym) {
-      base4 = sym.replace("refs/remotes/", "");
+      base5 = sym.replace("refs/remotes/", "");
     } else if (git(["rev-parse", "--verify", "--quiet", "origin/main"], root) !== null) {
-      base4 = "origin/main";
+      base5 = "origin/main";
     } else {
-      base4 = "main";
+      base5 = "main";
     }
   }
-  let out = git(["diff", "--name-only", `${base4}...HEAD`], root);
+  let out = git(["diff", "--name-only", `${base5}...HEAD`], root);
   if (out === null && !baseOverride) {
     out = git(["diff", "--name-only", "main...HEAD"], root);
     if (out !== null) {
-      base4 = "main";
+      base5 = "main";
     }
   }
   const pending = opts?.includeWorkingTree ? workingTreeFiles(root) : [];
@@ -2850,7 +2976,7 @@ function changedFiles(baseOverride, opts) {
   if (files.length === 0) {
     return null;
   }
-  return { base: base4, files, repoRoot: root };
+  return { base: base5, files, repoRoot: root };
 }
 function workingTreeFiles(root) {
   const out = git(["status", "--porcelain", "-z", "--untracked-files=all"], root);
@@ -2872,12 +2998,12 @@ function workingTreeFiles(root) {
   return files;
 }
 function inDiff(filePath, files) {
-  const base4 = filePath.split("/").pop();
+  const base5 = filePath.split("/").pop();
   for (const d of files) {
     if (d === filePath || d.endsWith(`/${filePath}`) || filePath.endsWith(`/${d}`)) {
       return true;
     }
-    if (base4 && d.split("/").pop() === base4) {
+    if (base5 && d.split("/").pop() === base5) {
       return true;
     }
   }
@@ -2994,12 +3120,12 @@ function applyDiffScope(derived, findings, files, projectPath) {
     if (!projectPath || !f.evidenceSpanId) {
       return void 0;
     }
-    const base4 = spanCwd.get(f.evidenceSpanId);
-    const state = cwdAtFirstGit(spanCmd.get(f.evidenceSpanId) ?? "", base4);
+    const base5 = spanCwd.get(f.evidenceSpanId);
+    const state = cwdAtFirstGit(spanCmd.get(f.evidenceSpanId) ?? "", base5);
     if (state.kind === "unknown") {
       return "elsewhere";
     }
-    const at = state.kind === "known" ? state.path : base4;
+    const at = state.kind === "known" ? state.path : base5;
     if (!at) {
       return void 0;
     }
@@ -3542,95 +3668,6 @@ function renderList(sessions, opts = {}) {
 `;
 }
-// src/report/section.ts
-function upsertSection(existing, block, start, end) {
-  const s = existing.indexOf(start);
-  const e = existing.indexOf(end);
-  if (s !== -1 && e !== -1 && e > s) {
-    return existing.slice(0, s) + block + existing.slice(e + end.length);
-  }
-  const sep = existing && !existing.endsWith("\n") ? "\n\n" : existing ? "\n" : "";
-  return `${existing}${sep}${block}
-`;
-}
-// src/report/guardrails.ts
-var SEV_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
-var SEV_TITLE = {
-  critical: "Critical",
-  high: "High",
-  medium: "Medium",
-  low: "Low"
-};
-var GUARDRAILS_START = "<!-- receipts:guardrails:start -->";
-var GUARDRAILS_END = "<!-- receipts:guardrails:end -->";
-function collectGuardrails(findingSets) {
-  const byRule = /* @__PURE__ */ new Map();
-  for (const set of findingSets) {
-    for (const f of [...set.main, ...set.minor]) {
-      const rule = f.guardrailRule?.trim();
-      if (!rule) {
-        continue;
-      }
-      let entry = byRule.get(rule);
-      if (!entry) {
-        entry = { rule, severity: f.severity, because: [] };
-        byRule.set(rule, entry);
-      }
-      if (SEV_ORDER[f.severity] < SEV_ORDER[entry.severity]) {
-        entry.severity = f.severity;
-      }
-      const cite = entry.because.find((b) => b.title === f.title);
-      if (cite) {
-        cite.count++;
-      } else {
-        entry.because.push({ title: f.title, count: 1 });
-      }
-    }
-  }
-  return [...byRule.values()].sort(
-    (a, b) => SEV_ORDER[a.severity] - SEV_ORDER[b.severity] || b.because.length - a.because.length
-  );
-}
-function citation(rule) {
-  return rule.because.map((b) => b.count > 1 ? `${b.title} (\xD7${b.count})` : b.title).join("; ");
-}
-function renderGuardrailsBlock(rules, format = "md") {
-  if (format === "json") {
-    return JSON.stringify(rules, null, 2);
-  }
-  if (rules.length === 0) {
-    return format === "md" ? `${GUARDRAILS_START}
-## Receipts guardrails
-_No guardrails \u2014 nothing the agent did warrants a prevention rule._
-${GUARDRAILS_END}` : "No guardrails \u2014 nothing the agent did warrants a prevention rule.";
-  }
-  const lines = [];
-  if (format === "md") {
-    lines.push(GUARDRAILS_START);
-    lines.push("## Receipts guardrails");
-    lines.push("<!-- generated by `receipts guardrails` \u2014 paste into AGENTS.md / CLAUDE.md -->");
-    lines.push("");
-  }
-  let lastSev = null;
-  for (const r of rules) {
-    if (r.severity !== lastSev) {
-      lines.push(format === "md" ? `### ${SEV_TITLE[r.severity]}` : `${SEV_TITLE[r.severity]}:`);
-      lastSev = r.severity;
-    }
-    lines.push(`- ${r.rule}`);
-    lines.push(format === "md" ? `  _\u2014 ${citation(r)}_` : `  \u2014 ${citation(r)}`);
-  }
-  if (format === "md") {
-    lines.push(GUARDRAILS_END);
-  }
-  return lines.join("\n");
-}
-function upsertGuardrailsSection(existing, block) {
-  return upsertSection(existing, block, GUARDRAILS_START, GUARDRAILS_END);
-}
 // src/sign/verify.ts
 import { createHash as createHash2 } from "crypto";
 var GRADES = /* @__PURE__ */ new Set(["A", "B", "C", "F"]);
@@ -3742,6 +3779,95 @@ function verifyBundle(input, opts = {}) {
   };
 }
+// src/report/section.ts
+function upsertSection(existing, block, start, end) {
+  const s = existing.indexOf(start);
+  const e = existing.indexOf(end);
+  if (s !== -1 && e !== -1 && e > s) {
+    return existing.slice(0, s) + block + existing.slice(e + end.length);
+  }
+  const sep = existing && !existing.endsWith("\n") ? "\n\n" : existing ? "\n" : "";
+  return `${existing}${sep}${block}
+`;
+}
+// src/report/guardrails.ts
+var SEV_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
+var SEV_TITLE = {
+  critical: "Critical",
+  high: "High",
+  medium: "Medium",
+  low: "Low"
+};
+var GUARDRAILS_START = "<!-- receipts:guardrails:start -->";
+var GUARDRAILS_END = "<!-- receipts:guardrails:end -->";
+function collectGuardrails(findingSets) {
+  const byRule = /* @__PURE__ */ new Map();
+  for (const set of findingSets) {
+    for (const f of [...set.main, ...set.minor]) {
+      const rule = f.guardrailRule?.trim();
+      if (!rule) {
+        continue;
+      }
+      let entry = byRule.get(rule);
+      if (!entry) {
+        entry = { rule, severity: f.severity, because: [] };
+        byRule.set(rule, entry);
+      }
+      if (SEV_ORDER[f.severity] < SEV_ORDER[entry.severity]) {
+        entry.severity = f.severity;
+      }
+      const cite = entry.because.find((b) => b.title === f.title);
+      if (cite) {
+        cite.count++;
+      } else {
+        entry.because.push({ title: f.title, count: 1 });
+      }
+    }
+  }
+  return [...byRule.values()].sort(
+    (a, b) => SEV_ORDER[a.severity] - SEV_ORDER[b.severity] || b.because.length - a.because.length
+  );
+}
+function citation(rule) {
+  return rule.because.map((b) => b.count > 1 ? `${b.title} (\xD7${b.count})` : b.title).join("; ");
+}
+function renderGuardrailsBlock(rules, format = "md") {
+  if (format === "json") {
+    return JSON.stringify(rules, null, 2);
+  }
+  if (rules.length === 0) {
+    return format === "md" ? `${GUARDRAILS_START}
+## Receipts guardrails
+_No guardrails \u2014 nothing the agent did warrants a prevention rule._
+${GUARDRAILS_END}` : "No guardrails \u2014 nothing the agent did warrants a prevention rule.";
+  }
+  const lines = [];
+  if (format === "md") {
+    lines.push(GUARDRAILS_START);
+    lines.push("## Receipts guardrails");
+    lines.push("<!-- generated by `receipts guardrails` \u2014 paste into AGENTS.md / CLAUDE.md -->");
+    lines.push("");
+  }
+  let lastSev = null;
+  for (const r of rules) {
+    if (r.severity !== lastSev) {
+      lines.push(format === "md" ? `### ${SEV_TITLE[r.severity]}` : `${SEV_TITLE[r.severity]}:`);
+      lastSev = r.severity;
+    }
+    lines.push(`- ${r.rule}`);
+    lines.push(format === "md" ? `  _\u2014 ${citation(r)}_` : `  \u2014 ${citation(r)}`);
+  }
+  if (format === "md") {
+    lines.push(GUARDRAILS_END);
+  }
+  return lines.join("\n");
+}
+function upsertGuardrailsSection(existing, block) {
+  return upsertSection(existing, block, GUARDRAILS_START, GUARDRAILS_END);
+}
 // src/trace/anthropic.ts
 import * as fs3 from "fs";
@@ -5007,12 +5133,12 @@ export {
   buildReceipt,
   renderCard,
   renderList,
+  validateReceiptShape,
+  verifyBundle,
   upsertSection,
   collectGuardrails,
   renderGuardrailsBlock,
   upsertGuardrailsSection,
-  validateReceiptShape,
-  verifyBundle,
   adapters,
   adapterFor,
   agentIds,
@@ -5028,4 +5154,4 @@ export {
   redact,
   redactReceipt
 };
-//# sourceMappingURL=chunk-TUWJRD7H.js.map
+//# sourceMappingURL=chunk-63E3RZHD.js.map