altimate-receipts 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TUWJRD7H.js → chunk-63E3RZHD.js} +276 -150
- package/dist/chunk-63E3RZHD.js.map +1 -0
- package/dist/{chunk-4S2ABMUN.js → chunk-JE6HSACL.js} +2 -2
- package/dist/{chunk-WNGBYBM3.js → chunk-KM6VCSVW.js} +2 -2
- package/dist/chunk-KM6VCSVW.js.map +1 -0
- package/dist/cli.js +464 -70
- package/dist/cli.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/mcp/server.js +2 -2
- package/package.json +4 -1
- package/dist/chunk-TUWJRD7H.js.map +0 -1
- package/dist/chunk-WNGBYBM3.js.map +0 -1
- /package/dist/{chunk-4S2ABMUN.js.map → chunk-JE6HSACL.js.map} +0 -0
|
@@ -177,8 +177,8 @@ function shellExecutorArg(clause) {
|
|
|
177
177
|
}
|
|
178
178
|
return void 0;
|
|
179
179
|
}
|
|
180
|
-
function cwdAtFirstGit(command,
|
|
181
|
-
let cur =
|
|
180
|
+
function cwdAtFirstGit(command, base5) {
|
|
181
|
+
let cur = base5;
|
|
182
182
|
let moved = false;
|
|
183
183
|
let unknown = false;
|
|
184
184
|
for (const clause of splitClauses(stripHeredocs(command))) {
|
|
@@ -1061,8 +1061,8 @@ function isInScope(path2, promptLc, readSet) {
|
|
|
1061
1061
|
if (promptLc.includes(lc)) {
|
|
1062
1062
|
return true;
|
|
1063
1063
|
}
|
|
1064
|
-
const
|
|
1065
|
-
if (
|
|
1064
|
+
const base5 = lc.split("/").pop();
|
|
1065
|
+
if (base5 && base5.length > 2 && promptLc.includes(base5)) {
|
|
1066
1066
|
return true;
|
|
1067
1067
|
}
|
|
1068
1068
|
const dirs = lc.split("/").filter((d) => d.length > 2);
|
|
@@ -1302,6 +1302,59 @@ function deriveSpans(session) {
|
|
|
1302
1302
|
};
|
|
1303
1303
|
}
|
|
1304
1304
|
|
|
1305
|
+
// src/findings/auditFindings.ts
|
|
1306
|
+
var TEST_PATH = /(?:^|\/)(?:tests?|specs?|__tests__)(?:\/|$)|\.(?:test|spec)\.[a-z]+$|_test\.[a-z]+$|(?:^|\/)test_[^/]+\.[a-z]+$/i;
|
|
1307
|
+
var TEST_DEF = /\bdef\s+test_\w|\b(?:it|test)\s*\(\s*["'`]|\bdescribe\s*\(\s*["'`]|@Test\b|\bfunc\s+Test[A-Z]\w*\s*\(|\b(?:it|test)\.each\b/;
|
|
1308
|
+
var TEST_RUNNER = /\b(pytest|jest|vitest|mocha|go test|cargo test|npm (?:run )?test|yarn test|pnpm test|tsc\b|eslint|ruff|mypy|flake8|rspec|phpunit|dbt (?:test|build))\b/i;
|
|
1309
|
+
var CLAIMS_TESTED_OR_DONE = /\b(ran|run|verified|tested|all tests? (?:now )?pass(?:ing|ed)?|tests? (?:now )?pass(?:ing|ed)?|added (?:unit |integration )?tests?|test coverage|done|fixed|complete(?:d)?)\b/i;
|
|
1310
|
+
var base = (p) => p.split("/").pop() || p;
|
|
1311
|
+
function deriveAuditFindings(sum) {
|
|
1312
|
+
const out = [];
|
|
1313
|
+
const ordered = [...sum.spans].filter((s) => s.kind !== "session").sort((a, b) => a.startTime - b.startTime || a.spanId.localeCompare(b.spanId));
|
|
1314
|
+
const gens = ordered.filter((s) => s.kind === "generation");
|
|
1315
|
+
const finalText = gens[gens.length - 1]?.input || "";
|
|
1316
|
+
const testEdits = ordered.filter((s) => {
|
|
1317
|
+
if (s.kind !== "tool" || !(isEditTool(s.name) || isCreateTool(s.name))) {
|
|
1318
|
+
return false;
|
|
1319
|
+
}
|
|
1320
|
+
const fp = filePathOf(s.input);
|
|
1321
|
+
if (!fp || !TEST_PATH.test(fp)) {
|
|
1322
|
+
return false;
|
|
1323
|
+
}
|
|
1324
|
+
const { oldStr, newStr } = editBody(s.input);
|
|
1325
|
+
return TEST_DEF.test(newStr) && !TEST_DEF.test(oldStr);
|
|
1326
|
+
});
|
|
1327
|
+
if (testEdits.length === 0) {
|
|
1328
|
+
return out;
|
|
1329
|
+
}
|
|
1330
|
+
if (!CLAIMS_TESTED_OR_DONE.test(finalText)) {
|
|
1331
|
+
return out;
|
|
1332
|
+
}
|
|
1333
|
+
const tLastTestEdit = Math.max(...testEdits.map((e) => e.startTime));
|
|
1334
|
+
const ranAfter = ordered.some(
|
|
1335
|
+
(s) => isCommandTool(s.name) && s.startTime > tLastTestEdit && s.status === "ok" && TEST_RUNNER.test(commandOf(s.input))
|
|
1336
|
+
);
|
|
1337
|
+
if (ranAfter) {
|
|
1338
|
+
return out;
|
|
1339
|
+
}
|
|
1340
|
+
const latest = testEdits.reduce((a, b) => b.startTime >= a.startTime ? b : a);
|
|
1341
|
+
const files = [...new Set(testEdits.map((e) => filePathOf(e.input)).filter(Boolean))];
|
|
1342
|
+
const list = files.map((f) => `\`${f}\``).join(", ");
|
|
1343
|
+
out.push({
|
|
1344
|
+
id: `untested-test-${latest.spanId}`,
|
|
1345
|
+
severity: "high",
|
|
1346
|
+
title: `Added tests but never ran them: ${base(files[0] ?? "")}`,
|
|
1347
|
+
detail: `The session added a test (${list}) and claimed the work is tested/done, but no test run finished green after the test was written. The agent's own new tests were never executed \u2014 confirm they compile and actually pass before merging.`,
|
|
1348
|
+
impactLabel: "unexecuted tests",
|
|
1349
|
+
confidence: 0.75,
|
|
1350
|
+
score: 100 * 0.85 * 0.75,
|
|
1351
|
+
evidenceSpanId: latest.spanId,
|
|
1352
|
+
filePath: files[0],
|
|
1353
|
+
guardrailRule: "After adding or changing a test, run the suite and confirm it passes before claiming 'tests pass' \u2014 an unexecuted test proves nothing."
|
|
1354
|
+
});
|
|
1355
|
+
return out;
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1305
1358
|
// src/findings/bypassFindings.ts
|
|
1306
1359
|
var TEST_FILE = /(?:^|\/)(?:test_[^/]+\.[a-z0-9]+|[^/]+_test\.[a-z0-9]+|[^/]+\.(?:spec|test)\.[a-z0-9]+|conftest\.py)$/i;
|
|
1307
1360
|
var TEST_DIR = /(?:^|\/)(?:tests?|__tests__|specs?|e2e|testing)\//i;
|
|
@@ -1310,7 +1363,7 @@ var TEST_FOCUS = /\b(?:it|describe|test|context)\.only\s*\(|\bfdescribe\s*\(|\bf
|
|
|
1310
1363
|
var CONFIG_FILE = /(?:^|\/)(?:tsconfig[^/]*\.json|\.eslintrc[^/]*|eslint\.config\.[a-z]+|\.flake8|setup\.cfg|pyproject\.toml|jest\.config\.[a-z]+|vitest\.config\.[a-z]+|\.pre-commit-config\.ya?ml)$/i;
|
|
1311
1364
|
var CONFIG_WEAKEN = /"strict"\s*:\s*false|"noImplicitAny"\s*:\s*false|"strictNullChecks"\s*:\s*false|"skipLibCheck"\s*:\s*true|:\s*["']off["']|coverageThreshold|--passWithNoTests/i;
|
|
1312
1365
|
var CICD_FILE = /(?:^|\/)(?:\.github\/workflows\/[^/]+\.ya?ml|\.gitlab-ci\.yml|Jenkinsfile(?:\.[\w.]+)?|\.circleci\/config\.yml|azure-pipelines\.yml|bitbucket-pipelines\.yml|\.drone\.yml)$/i;
|
|
1313
|
-
function
|
|
1366
|
+
function base2(p) {
|
|
1314
1367
|
return p.split("/").pop() || p;
|
|
1315
1368
|
}
|
|
1316
1369
|
function deriveBypassFindings(sum) {
|
|
@@ -1364,8 +1417,8 @@ function deriveBypassFindings(sum) {
|
|
|
1364
1417
|
focus = {
|
|
1365
1418
|
id: `test-focus-${s.spanId}`,
|
|
1366
1419
|
severity: "high",
|
|
1367
|
-
title: `Focused a single test in ${
|
|
1368
|
-
detail: `An edit added \`.only\`/\`fdescribe\`/\`fit\` to \`${
|
|
1420
|
+
title: `Focused a single test in ${base2(fp)}`,
|
|
1421
|
+
detail: `An edit added \`.only\`/\`fdescribe\`/\`fit\` to \`${base2(fp)}\`, which makes the runner execute only that test and silently skip every other test in the suite \u2014 a green run that proves almost nothing.`,
|
|
1369
1422
|
impactLabel: "suite skipped",
|
|
1370
1423
|
confidence: 0.85,
|
|
1371
1424
|
score: 100 * 0.9 * 0.85,
|
|
@@ -1378,8 +1431,8 @@ function deriveBypassFindings(sum) {
|
|
|
1378
1431
|
configWeaken = {
|
|
1379
1432
|
id: `config-weaken-${s.spanId}`,
|
|
1380
1433
|
severity: "high",
|
|
1381
|
-
title: `Weakened the checker config: ${
|
|
1382
|
-
detail: `An edit to \`${
|
|
1434
|
+
title: `Weakened the checker config: ${base2(fp)}`,
|
|
1435
|
+
detail: `An edit to \`${base2(fp)}\` relaxed a static check (e.g. disabled a strict flag, turned a lint rule off, or lowered a coverage threshold). Loosening the checker to get a green run hides the problems it was there to catch.`,
|
|
1383
1436
|
impactLabel: "checker defanged",
|
|
1384
1437
|
confidence: 0.8,
|
|
1385
1438
|
score: 100 * 0.9 * 0.8,
|
|
@@ -1392,7 +1445,7 @@ function deriveBypassFindings(sum) {
|
|
|
1392
1445
|
ciCdTouch = {
|
|
1393
1446
|
id: `ci-cd-touch-${s.spanId}`,
|
|
1394
1447
|
severity: "high",
|
|
1395
|
-
title: `Edited a CI/CD pipeline file: ${
|
|
1448
|
+
title: `Edited a CI/CD pipeline file: ${base2(fp)}`,
|
|
1396
1449
|
detail: `\`${fp}\` is a CI/CD pipeline file \u2014 it runs with repository secrets and write tokens, and an edit can exfiltrate credentials, add a malicious step, or weaken a required check. Review this change with that privilege in mind.`,
|
|
1397
1450
|
impactLabel: "pipeline edit",
|
|
1398
1451
|
confidence: 0.7,
|
|
@@ -1412,8 +1465,75 @@ function deriveBypassFindings(sum) {
|
|
|
1412
1465
|
return out;
|
|
1413
1466
|
}
|
|
1414
1467
|
|
|
1468
|
+
// src/findings/compositeFindings.ts
|
|
1469
|
+
var COMPLETION = /\b(all tests? (?:now )?pass(?:ing|ed)?|tests? (?:now )?pass(?:ing|ed)?|all green|everything passes|works now|done|fixed|resolved)\b/i;
|
|
1470
|
+
var HEDGE = /\b(should|probably|might|maybe|may|i think|hopefully|in theory|ought to|expects? to|expected to|seems? to|presumably)\b/i;
|
|
1471
|
+
var TEST_RUNNER2 = /\b(pytest|jest|vitest|mocha|go test|cargo test|npm (?:run )?test|yarn test|pnpm test|tsc\b|eslint|ruff|mypy|flake8|rspec|phpunit|dbt (?:test|build))\b/i;
|
|
1472
|
+
var OBSOLETE_MARKER = /\b(obsolete|deprecat(?:ed|e)|no longer (?:applies|needed|valid|relevant)|dead test|legacy test|replaced by|removing (?:the )?(?:old|stale|obsolete) test)\b/i;
|
|
1473
|
+
function claimsDoneUnqualified(text) {
|
|
1474
|
+
for (const sentence of text.split(/[.!?\n]+/)) {
|
|
1475
|
+
if (COMPLETION.test(sentence) && !HEDGE.test(sentence)) {
|
|
1476
|
+
return true;
|
|
1477
|
+
}
|
|
1478
|
+
}
|
|
1479
|
+
return false;
|
|
1480
|
+
}
|
|
1481
|
+
function turnTextFor(edit, spans, finalText) {
|
|
1482
|
+
if (edit.parentSpanId) {
|
|
1483
|
+
const gen = spans.find((s) => s.spanId === edit.parentSpanId && s.kind === "generation");
|
|
1484
|
+
if (gen && typeof gen.input === "string") {
|
|
1485
|
+
return `${gen.input}
|
|
1486
|
+
${finalText}`;
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
return finalText;
|
|
1490
|
+
}
|
|
1491
|
+
function deriveCompositeFindings(sum, prior) {
|
|
1492
|
+
const out = [];
|
|
1493
|
+
const ordered = sum.spans.filter((s) => s.kind !== "session").sort((a, b) => a.startTime - b.startTime || a.spanId.localeCompare(b.spanId));
|
|
1494
|
+
const gens = ordered.filter((s) => s.kind === "generation");
|
|
1495
|
+
const finalText = gens[gens.length - 1]?.input || "";
|
|
1496
|
+
const spanById = new Map(sum.spans.map((s) => [s.spanId, s]));
|
|
1497
|
+
const suppress = prior.filter((f) => f.id.startsWith("test-skipped-") || f.id.startsWith("test-trivialised-")).map((f) => ({
|
|
1498
|
+
finding: f,
|
|
1499
|
+
span: f.evidenceSpanId ? spanById.get(f.evidenceSpanId) : void 0
|
|
1500
|
+
})).filter((e) => !!e.span && !!e.finding.filePath).filter((e) => !OBSOLETE_MARKER.test(turnTextFor(e.span, sum.spans, finalText)));
|
|
1501
|
+
if (suppress.length === 0) {
|
|
1502
|
+
return out;
|
|
1503
|
+
}
|
|
1504
|
+
const tLastSuppress = Math.max(...suppress.map((e) => e.span.startTime));
|
|
1505
|
+
const reGreened = ordered.some(
|
|
1506
|
+
(s) => isCommandTool(s.name) && s.startTime > tLastSuppress && s.status === "ok" && TEST_RUNNER2.test(commandOf(s.input))
|
|
1507
|
+
);
|
|
1508
|
+
if (reGreened) {
|
|
1509
|
+
return out;
|
|
1510
|
+
}
|
|
1511
|
+
if (!claimsDoneUnqualified(finalText)) {
|
|
1512
|
+
return out;
|
|
1513
|
+
}
|
|
1514
|
+
const latest = suppress.reduce((a, b) => b.span.startTime >= a.span.startTime ? b : a);
|
|
1515
|
+
const cited = suppress.map((e) => `\`${e.finding.filePath}\``).filter((v, i, arr) => arr.indexOf(v) === i);
|
|
1516
|
+
const list = cited.join(", ");
|
|
1517
|
+
out.push({
|
|
1518
|
+
id: `green-by-suppression-${latest.span.spanId}`,
|
|
1519
|
+
severity: "high",
|
|
1520
|
+
title: `Claimed green over a suppressed test: ${baseName(latest.finding.filePath ?? "")}`,
|
|
1521
|
+
detail: `The session claimed success ("tests pass / done") after an edit silenced a test (${list}), and no test run finished green after that suppression. The green being trusted was not re-established once the test was disabled \u2014 confirm the suppressed test still holds before merging.`,
|
|
1522
|
+
impactLabel: "unverified green",
|
|
1523
|
+
confidence: 0.8,
|
|
1524
|
+
score: 100 * 0.9 * 0.8,
|
|
1525
|
+
evidenceSpanId: latest.span.spanId,
|
|
1526
|
+
filePath: latest.finding.filePath,
|
|
1527
|
+
guardrailRule: "Never claim 'tests pass' after skipping or trivialising a test without a green run afterward; re-run the suite green once the test is fixed, or flag the suppression explicitly."
|
|
1528
|
+
});
|
|
1529
|
+
return out;
|
|
1530
|
+
}
|
|
1531
|
+
function baseName(p) {
|
|
1532
|
+
return p.split("/").pop() || p;
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1415
1535
|
// src/findings/correctnessFindings.ts
|
|
1416
|
-
var
|
|
1536
|
+
var TEST_RUNNER3 = /\b(pytest|jest|vitest|mocha|go test|cargo test|npm (run )?test|yarn test|pnpm test|tsc\b|eslint|ruff|mypy|flake8|rspec|phpunit|dbt (test|build))\b/i;
|
|
1417
1537
|
var CLAIMS_DONE = /\b(done|fixed|passing|tests? (now )?pass|complete(d)?|all set|works now|should work|resolved)\b/i;
|
|
1418
1538
|
var CLAIMS_TESTED = /\b(ran|run|verified|tested|passing|tests? pass)\b/i;
|
|
1419
1539
|
var TEST_FAIL_SUMMARY = [
|
|
@@ -1437,14 +1557,14 @@ var WRITES_CONTENT = /\bgh\s+(?:issue|pr)\s+comment\b|\bgh\s+release\b|\bgit\s+c
|
|
|
1437
1557
|
var CONTEXT_FILE = /(?:^|\/)(?:MEMORY|CLAUDE|AGENTS|GEMINI|\.cursorrules|\.windsurfrules)(?:\.md)?$/i;
|
|
1438
1558
|
var PLACEHOLDER = /your[-_]?(?:api[-_]?)?(?:key|token|secret)|xxx+|placeholder|example|redacted|changeme|dummy|sample|<[^>]+>|\$\{|process\.env|os\.environ|getenv/i;
|
|
1439
1559
|
var FAKE_TOKEN = /1234567890|0123456789|abcdef0123|deadbeef|0{8,}|(?:ab){4,}/i;
|
|
1440
|
-
var
|
|
1560
|
+
var TEST_PATH2 = /(?:^|\/)(?:test_[^/]+\.[a-z0-9]+|[^/]+_test\.[a-z0-9]+|[^/]+\.(?:spec|test)\.[a-z0-9]+|conftest\.py)$|(?:^|\/)(?:tests?|__tests__|specs?|e2e|fixtures?|mocks?)\//i;
|
|
1441
1561
|
var READ_CMD = /\b(cat|head|tail|less|more|bat|nl|sed|awk|grep|rg|xxd|od|view|strings)\b/;
|
|
1442
1562
|
var CODE_FILE = /\.(?:py|js|jsx|ts|tsx|go|rs|rb|java|kt|c|cc|cpp|h|hpp|cs|php|swift|scala|sql|sh|bash|vue|svelte)$/i;
|
|
1443
1563
|
var LOCKFILE = /(?:^|\/)(?:package-lock\.json|npm-shrinkwrap\.json|yarn\.lock|pnpm-lock\.ya?ml|Cargo\.lock|go\.sum|poetry\.lock|Pipfile\.lock|Gemfile\.lock|composer\.lock|flake\.lock|bun\.lockb)$/i;
|
|
1444
1564
|
var INSTALL_CMD = /\b(npm (ci|i|install|update|dedupe)|yarn(\s+(install|add|upgrade|up))?|pnpm (i|install|add|update|up|dedupe)|bun (install|add|i)|cargo (build|update|add|fetch|generate-lockfile|install)|poetry (lock|install|add|update)|pipenv (lock|install)|bundle (install|update|lock)|composer (install|update|require)|go (mod|get|build|install)|nix flake (lock|update))\b/i;
|
|
1445
1565
|
var PROMISE = /\b(?:I'?ll|I will|I'm going to|going to|let me|next,?\s*I'?ll|then\s+I'?ll|I\s+(?:also\s+)?need to|we (?:should|need to))\s+(?:also\s+)?(?:update|edit|modify|fix|change|add|refactor|rewrite|remove|delete|create|implement|patch|adjust|wire up|hook up)\b/gi;
|
|
1446
1566
|
var PATH_TOKEN = /(?:[\w.@/-]+\/)?[\w.-]+\.(?:tsx?|jsx?|py|go|rs|rb|java|kt|sql|sh|ya?ml|json|toml|md|c|cc|cpp|h|css|html|vue|svelte|php|swift|scala)\b/g;
|
|
1447
|
-
function
|
|
1567
|
+
function base3(p) {
|
|
1448
1568
|
return p.split("/").pop() || p;
|
|
1449
1569
|
}
|
|
1450
1570
|
function readRange(input) {
|
|
@@ -1471,7 +1591,7 @@ function deriveCorrectnessFindings(sum) {
|
|
|
1471
1591
|
const claimsDone = CLAIMS_DONE.test(finalText);
|
|
1472
1592
|
const editSpans = tools.filter((s) => isEditTool(s.name));
|
|
1473
1593
|
const bashSpans = tools.filter((s) => isCommandTool(s.name));
|
|
1474
|
-
const ranTests = bashSpans.some((s) =>
|
|
1594
|
+
const ranTests = bashSpans.some((s) => TEST_RUNNER3.test(commandOf(s.input)));
|
|
1475
1595
|
const readCmds = bashSpans.map((s) => ({ t: s.startTime, cmd: commandOf(s.input) })).filter((c) => READ_CMD.test(c.cmd));
|
|
1476
1596
|
const readViaShellBefore = (fp, t) => {
|
|
1477
1597
|
const bn = fp.split("/").pop() ?? fp;
|
|
@@ -1494,7 +1614,7 @@ function deriveCorrectnessFindings(sum) {
|
|
|
1494
1614
|
}
|
|
1495
1615
|
if (blindEdit) {
|
|
1496
1616
|
const p = blindEdit.path;
|
|
1497
|
-
const bn =
|
|
1617
|
+
const bn = base3(p);
|
|
1498
1618
|
const editsN = tools.filter((s) => isEditTool(s.name) && filePathOf(s.input) === p).length;
|
|
1499
1619
|
const readSpans = tools.filter((s) => isReadTool(s.name) && filePathOf(s.input) === p);
|
|
1500
1620
|
const shellReadsN = readCmds.filter((c) => bn.length >= 3 && c.cmd.includes(bn)).length;
|
|
@@ -1615,7 +1735,7 @@ function deriveCorrectnessFindings(sum) {
|
|
|
1615
1735
|
}
|
|
1616
1736
|
for (const s of editSpans) {
|
|
1617
1737
|
const fp = filePathOf(s.input);
|
|
1618
|
-
if (fp &&
|
|
1738
|
+
if (fp && TEST_PATH2.test(fp)) {
|
|
1619
1739
|
continue;
|
|
1620
1740
|
}
|
|
1621
1741
|
const { newStr } = editBody(s.input);
|
|
@@ -1625,7 +1745,7 @@ function deriveCorrectnessFindings(sum) {
|
|
|
1625
1745
|
out.push({
|
|
1626
1746
|
id: `secret-in-file-${s.spanId}`,
|
|
1627
1747
|
severity: "high",
|
|
1628
|
-
title: fp ? `A secret was written into ${
|
|
1748
|
+
title: fp ? `A secret was written into ${base3(fp)}` : "A secret was written into a file",
|
|
1629
1749
|
detail: `An edit inlined what looks like a live credential (API key / token / private key) into ${fp ? `\`${fp}\`` : "a file"}. Committed secrets leak \u2014 move it to an environment variable or secret store and rotate the key.`,
|
|
1630
1750
|
impactLabel: "secret leak",
|
|
1631
1751
|
confidence: 0.8,
|
|
@@ -1650,7 +1770,7 @@ function deriveCorrectnessFindings(sum) {
|
|
|
1650
1770
|
evidenceSpanId: editSpans[editSpans.length - 1].spanId
|
|
1651
1771
|
});
|
|
1652
1772
|
}
|
|
1653
|
-
const testRuns = bashSpans.filter((s) =>
|
|
1773
|
+
const testRuns = bashSpans.filter((s) => TEST_RUNNER3.test(commandOf(s.input)));
|
|
1654
1774
|
const lastRun = testRuns[testRuns.length - 1];
|
|
1655
1775
|
if (lastRun) {
|
|
1656
1776
|
const runOut = typeof lastRun.output === "string" ? lastRun.output : "";
|
|
@@ -1684,7 +1804,7 @@ function deriveCorrectnessFindings(sum) {
|
|
|
1684
1804
|
out.push({
|
|
1685
1805
|
id: "lockfile-edit",
|
|
1686
1806
|
severity: "high",
|
|
1687
|
-
title: `Hand-edited a lockfile with no install command: ${
|
|
1807
|
+
title: `Hand-edited a lockfile with no install command: ${base3(fp)}`,
|
|
1688
1808
|
detail: `\`${fp}\` was edited this session, but no package-manager install/resolve command (npm/yarn/pnpm/cargo/poetry/\u2026) ran. Hand-editing a lockfile rather than regenerating it can swap an integrity hash or re-point a dependency \u2014 review the change closely.`,
|
|
1689
1809
|
impactLabel: "manual lockfile edit",
|
|
1690
1810
|
confidence: 0.7,
|
|
@@ -1744,7 +1864,7 @@ function deriveCorrectnessFindings(sum) {
|
|
|
1744
1864
|
out.push({
|
|
1745
1865
|
id: "unfulfilled-promise",
|
|
1746
1866
|
severity: "medium",
|
|
1747
|
-
title: `Said it would change ${
|
|
1867
|
+
title: `Said it would change ${base3(promised.path)}, but never did`,
|
|
1748
1868
|
detail: `The session's text said it would update \`${promised.path}\`, but that file was never edited this session. A step the agent committed to may have been silently dropped \u2014 confirm it wasn't needed.`,
|
|
1749
1869
|
impactLabel: "dropped step",
|
|
1750
1870
|
confidence: 0.6,
|
|
@@ -1857,7 +1977,7 @@ function stripDupSuffix(path2) {
|
|
|
1857
1977
|
const ext = dot > 0 ? file.slice(dot) : "";
|
|
1858
1978
|
return dir + stem.replace(DUP_SUFFIX, "") + ext;
|
|
1859
1979
|
}
|
|
1860
|
-
function
|
|
1980
|
+
function base4(p) {
|
|
1861
1981
|
return p.split("/").pop() || p;
|
|
1862
1982
|
}
|
|
1863
1983
|
function normWs(s) {
|
|
@@ -1960,7 +2080,7 @@ function deriveEditScanFindings(sum) {
|
|
|
1960
2080
|
if (authoredThisSession.has(fp)) {
|
|
1961
2081
|
return true;
|
|
1962
2082
|
}
|
|
1963
|
-
const b =
|
|
2083
|
+
const b = base4(fp).toLowerCase();
|
|
1964
2084
|
if (b.length > 3 && promptLc.includes(b)) {
|
|
1965
2085
|
return true;
|
|
1966
2086
|
}
|
|
@@ -1984,7 +2104,7 @@ function deriveEditScanFindings(sum) {
|
|
|
1984
2104
|
graderEdit = {
|
|
1985
2105
|
id: `grader-edit-${s.spanId}`,
|
|
1986
2106
|
severity: "critical",
|
|
1987
|
-
title: `Modified the grader/harness: ${
|
|
2107
|
+
title: `Modified the grader/harness: ${base4(fp)}`,
|
|
1988
2108
|
detail: `\`${fp}\` looks like the scoring/evaluation harness, and it was edited during the task. Changing the thing that judges the work \u2014 rather than the work itself \u2014 is the clearest reward-hacking signal. Review this edit closely.`,
|
|
1989
2109
|
impactLabel: "gamed the eval",
|
|
1990
2110
|
confidence: 0.85,
|
|
@@ -2003,8 +2123,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2003
2123
|
trivialised = {
|
|
2004
2124
|
id: `test-trivialised-${s.spanId}`,
|
|
2005
2125
|
severity: testTamperSeverity,
|
|
2006
|
-
title: `Made a test trivially pass: ${
|
|
2007
|
-
detail: `An edit to \`${
|
|
2126
|
+
title: `Made a test trivially pass: ${base4(fp)}`,
|
|
2127
|
+
detail: `An edit to \`${base4(fp)}\` introduced a tautological assertion (e.g. \`assert True\`). A test that can't fail provides no protection \u2014 confirm the real behaviour is still being checked.`,
|
|
2008
2128
|
impactLabel: "fake green",
|
|
2009
2129
|
confidence: 0.85,
|
|
2010
2130
|
score: testTamperScore * 0.85,
|
|
@@ -2017,8 +2137,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2017
2137
|
skipped = {
|
|
2018
2138
|
id: `test-skipped-${s.spanId}`,
|
|
2019
2139
|
severity: "high",
|
|
2020
|
-
title: `Skipped or disabled a test: ${
|
|
2021
|
-
detail: `An edit added a skip/ignore marker (e.g. \`@pytest.mark.skip\`, \`.skip(\`, \`@ts-ignore\`) to \`${
|
|
2140
|
+
title: `Skipped or disabled a test: ${base4(fp)}`,
|
|
2141
|
+
detail: `An edit added a skip/ignore marker (e.g. \`@pytest.mark.skip\`, \`.skip(\`, \`@ts-ignore\`) to \`${base4(fp)}\`. Silencing a failing test makes the run green without fixing the underlying problem.`,
|
|
2022
2142
|
impactLabel: "test silenced",
|
|
2023
2143
|
confidence: 0.8,
|
|
2024
2144
|
score: 100 * 0.9 * 0.8,
|
|
@@ -2060,8 +2180,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2060
2180
|
reversion = {
|
|
2061
2181
|
id: `edit-reversion-${b.span.spanId}`,
|
|
2062
2182
|
severity: "high",
|
|
2063
|
-
title: `Reverted its own edit in ${
|
|
2064
|
-
detail: `The agent changed a region of \`${
|
|
2183
|
+
title: `Reverted its own edit in ${base4(fp)}`,
|
|
2184
|
+
detail: `The agent changed a region of \`${base4(fp)}\` and then later put it back (A\u2192B\u2192A). Oscillating on the same code is a sign of "coherence collapse" \u2014 it reached a state, then thrashed it \u2014 so the final version may not be its best attempt.`,
|
|
2065
2185
|
impactLabel: "thrash / rework",
|
|
2066
2186
|
confidence: 0.8,
|
|
2067
2187
|
score: 100 * 0.9 * 0.8,
|
|
@@ -2087,8 +2207,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2087
2207
|
swallowed = {
|
|
2088
2208
|
id: `error-swallowed-${s.spanId}`,
|
|
2089
2209
|
severity: "medium",
|
|
2090
|
-
title: `Silently swallowed an error in ${
|
|
2091
|
-
detail: `An edit to \`${
|
|
2210
|
+
title: `Silently swallowed an error in ${base4(fp)}`,
|
|
2211
|
+
detail: `An edit to \`${base4(fp)}\` added a bare \`except: pass\` / empty \`catch {}\` that discards the failure instead of handling it. If this was to get past an error rather than fix it, the underlying problem is now hidden.`,
|
|
2092
2212
|
impactLabel: "error hidden",
|
|
2093
2213
|
// soft code-smell (empty catches are often legitimate) — minor section
|
|
2094
2214
|
confidence: 0.45,
|
|
@@ -2102,8 +2222,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2102
2222
|
evalOverride = {
|
|
2103
2223
|
id: `eval-override-${s.spanId}`,
|
|
2104
2224
|
severity: "critical",
|
|
2105
|
-
title: `Overrode what judges the work in ${
|
|
2106
|
-
detail: `An edit to \`${
|
|
2225
|
+
title: `Overrode what judges the work in ${base4(fp)}`,
|
|
2226
|
+
detail: `An edit to \`${base4(fp)}\` introduced an always-pass override \u2014 e.g. an \`__eq__\` that returns True, a patched scorer/verifier, a frozen clock, or an unconditional PASS. That defeats the check instead of satisfying it.`,
|
|
2107
2227
|
impactLabel: "gamed the eval",
|
|
2108
2228
|
confidence: 0.85,
|
|
2109
2229
|
score: 1e3 * 0.85,
|
|
@@ -2118,8 +2238,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2118
2238
|
stubbed = {
|
|
2119
2239
|
id: `impl-stubbed-${s.spanId}`,
|
|
2120
2240
|
severity: "high",
|
|
2121
|
-
title: `Replaced real code with a stub in ${
|
|
2122
|
-
detail: `An edit swapped a working implementation in \`${
|
|
2241
|
+
title: `Replaced real code with a stub in ${base4(fp)}`,
|
|
2242
|
+
detail: `An edit swapped a working implementation in \`${base4(fp)}\` for a placeholder (\`NotImplementedError\` / \`todo!()\` / "not implemented"). If the task was to implement this, a stub that compiles isn't a solution.`,
|
|
2123
2243
|
impactLabel: "stubbed out",
|
|
2124
2244
|
confidence: 0.75,
|
|
2125
2245
|
score: 100 * 0.9 * 0.75,
|
|
@@ -2133,8 +2253,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2133
2253
|
shrunk = {
|
|
2134
2254
|
id: `file-shrink-${s.spanId}`,
|
|
2135
2255
|
severity: declared ? "low" : "medium",
|
|
2136
|
-
title: `Large deletion in ${
|
|
2137
|
-
detail: declared ? `One edit removed most of a ${oldLines}-line region of \`${
|
|
2256
|
+
title: `Large deletion in ${base4(fp)} \u2014 ${oldLines}\u2192${lineCount(newStr)} lines`,
|
|
2257
|
+
detail: declared ? `One edit removed most of a ${oldLines}-line region of \`${base4(fp)}\`, leaving ${lineCount(newStr)} lines. The task asked to delete/refactor, so this was likely intended \u2014 confirm nothing extra was dropped.` : `One edit removed most of a ${oldLines}-line region of \`${base4(fp)}\`, leaving ${lineCount(newStr)} lines, with no stated delete/refactor intent. Silent large deletions are a common way agents drop error handling or safety checks \u2014 confirm nothing important was lost.`,
|
|
2138
2258
|
impactLabel: "content loss risk",
|
|
2139
2259
|
confidence: declared ? 0.5 : 0.6,
|
|
2140
2260
|
score: declared ? 1 * 0.9 * 0.5 : 10 * 0.9 * 0.6,
|
|
@@ -2169,8 +2289,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2169
2289
|
duplicate = {
|
|
2170
2290
|
id: `dup-file-${s.spanId}`,
|
|
2171
2291
|
severity: "medium",
|
|
2172
|
-
title: `Created a near-duplicate file: ${
|
|
2173
|
-
detail: `\`${
|
|
2292
|
+
title: `Created a near-duplicate file: ${base4(fp)}`,
|
|
2293
|
+
detail: `\`${base4(fp)}\` looks like a copy of an existing \`${base4(stripped)}\` the session already had open. Agents that create \`*2\`/\`_copy\`/\`_new\` files instead of editing the original leave divergent duplicates and dead code \u2014 confirm this was intended.`,
|
|
2174
2294
|
impactLabel: "duplicate / dead code",
|
|
2175
2295
|
confidence: 0.6,
|
|
2176
2296
|
score: 10 * 0.9 * 0.6,
|
|
@@ -2202,8 +2322,8 @@ function deriveEditScanFindings(sum) {
|
|
|
2202
2322
|
dupCode = {
|
|
2203
2323
|
id: `duplicated-code-${s.spanId}`,
|
|
2204
2324
|
severity: "medium",
|
|
2205
|
-
title: `Added ${MIN_LINES}+ near-identical lines across files (possible copy-paste): ${
|
|
2206
|
-
detail: `A block of ${MIN_LINES}+ lines this session is near-identical (after renaming) to another block the agent added in \`${
|
|
2325
|
+
title: `Added ${MIN_LINES}+ near-identical lines across files (possible copy-paste): ${base4(fp)}`,
|
|
2326
|
+
detail: `A block of ${MIN_LINES}+ lines this session is near-identical (after renaming) to another block the agent added in \`${base4(firstFile)}\`. Duplication is sometimes intended \u2014 consider extracting a shared helper.`,
|
|
2207
2327
|
impactLabel: "copy-paste",
|
|
2208
2328
|
confidence: 0.6,
|
|
2209
2329
|
score: 10 * 0.9 * 0.6,
|
|
@@ -2253,7 +2373,7 @@ function deriveEditScanFindings(sum) {
|
|
|
2253
2373
|
out.push({
|
|
2254
2374
|
id: `malformed-artifact-${s.spanId}`,
|
|
2255
2375
|
severity: "high",
|
|
2256
|
-
title: `Wrote invalid ${ext}: ${
|
|
2376
|
+
title: `Wrote invalid ${ext}: ${base4(fp)}`,
|
|
2257
2377
|
detail: `\`${fp}\` was written but does not parse as ${ext} (line ${r.line}: ${r.msg}). A broken config breaks the build downstream.`,
|
|
2258
2378
|
impactLabel: "broken artifact",
|
|
2259
2379
|
confidence: 0.85,
|
|
@@ -2277,7 +2397,7 @@ function deriveEditScanFindings(sum) {
|
|
|
2277
2397
|
out.push({
|
|
2278
2398
|
id: `trojan-source-${s.spanId}`,
|
|
2279
2399
|
severity: "high",
|
|
2280
|
-
title: `Hidden Unicode in source: ${
|
|
2400
|
+
title: `Hidden Unicode in source: ${base4(fp)}`,
|
|
2281
2401
|
detail: `The edit to \`${fp}\` contains a ${hit.label} code point ${u(hit.cp)} at line ${hit.line}:${hit.col} \u2014 invisible in review, it can hide or reorder code (Trojan Source, CVE-2021-42574).`,
|
|
2282
2402
|
impactLabel: "hidden unicode",
|
|
2283
2403
|
confidence: 0.85,
|
|
@@ -2683,6 +2803,10 @@ function deriveFindings(sum) {
|
|
|
2683
2803
|
}
|
|
2684
2804
|
findings.push(...deriveCorrectnessFindings(sum));
|
|
2685
2805
|
findings.push(...deriveEditScanFindings(sum));
|
|
2806
|
+
findings.push(...deriveCompositeFindings(sum, findings));
|
|
2807
|
+
if (process.env.RECEIPTS_EXPERIMENTAL_DETECTORS === "1") {
|
|
2808
|
+
findings.push(...deriveAuditFindings(sum));
|
|
2809
|
+
}
|
|
2686
2810
|
findings.push(...deriveToolUseFindings(sum));
|
|
2687
2811
|
findings.push(...deriveBypassFindings(sum));
|
|
2688
2812
|
findings.push(...deriveInjectionFindings(sum));
|
|
@@ -2767,15 +2891,17 @@ var PRIVILEGED_PREFIXES = [
|
|
|
2767
2891
|
"test-focus",
|
|
2768
2892
|
"test-skipped",
|
|
2769
2893
|
"test-trivialised",
|
|
2894
|
+
"green-by-suppression",
|
|
2895
|
+
"untested-test",
|
|
2770
2896
|
"history-rewrite",
|
|
2771
2897
|
"force-push"
|
|
2772
2898
|
];
|
|
2773
|
-
var
|
|
2899
|
+
var TEST_PATH3 = /(?:^|\/)(?:tests?|specs?|__tests__)(?:\/|$)|\.(?:test|spec)\.|_test\./;
|
|
2774
2900
|
function privileged(id, filePath) {
|
|
2775
2901
|
if (PRIVILEGED_PREFIXES.some((p) => id.startsWith(p))) {
|
|
2776
2902
|
return true;
|
|
2777
2903
|
}
|
|
2778
|
-
return id.startsWith("file-shrink") && !!filePath &&
|
|
2904
|
+
return id.startsWith("file-shrink") && !!filePath && TEST_PATH3.test(filePath);
|
|
2779
2905
|
}
|
|
2780
2906
|
function findingSurface(id) {
|
|
2781
2907
|
if (OPERATOR_KINDS.has(id) || id.startsWith("errcluster-")) {
|
|
@@ -2824,22 +2950,22 @@ function changedFiles(baseOverride, opts) {
|
|
|
2824
2950
|
if (!root) {
|
|
2825
2951
|
return null;
|
|
2826
2952
|
}
|
|
2827
|
-
let
|
|
2828
|
-
if (!
|
|
2953
|
+
let base5 = baseOverride;
|
|
2954
|
+
if (!base5) {
|
|
2829
2955
|
const sym = git(["symbolic-ref", "refs/remotes/origin/HEAD"], root)?.trim();
|
|
2830
2956
|
if (sym) {
|
|
2831
|
-
|
|
2957
|
+
base5 = sym.replace("refs/remotes/", "");
|
|
2832
2958
|
} else if (git(["rev-parse", "--verify", "--quiet", "origin/main"], root) !== null) {
|
|
2833
|
-
|
|
2959
|
+
base5 = "origin/main";
|
|
2834
2960
|
} else {
|
|
2835
|
-
|
|
2961
|
+
base5 = "main";
|
|
2836
2962
|
}
|
|
2837
2963
|
}
|
|
2838
|
-
let out = git(["diff", "--name-only", `${
|
|
2964
|
+
let out = git(["diff", "--name-only", `${base5}...HEAD`], root);
|
|
2839
2965
|
if (out === null && !baseOverride) {
|
|
2840
2966
|
out = git(["diff", "--name-only", "main...HEAD"], root);
|
|
2841
2967
|
if (out !== null) {
|
|
2842
|
-
|
|
2968
|
+
base5 = "main";
|
|
2843
2969
|
}
|
|
2844
2970
|
}
|
|
2845
2971
|
const pending = opts?.includeWorkingTree ? workingTreeFiles(root) : [];
|
|
@@ -2850,7 +2976,7 @@ function changedFiles(baseOverride, opts) {
|
|
|
2850
2976
|
if (files.length === 0) {
|
|
2851
2977
|
return null;
|
|
2852
2978
|
}
|
|
2853
|
-
return { base:
|
|
2979
|
+
return { base: base5, files, repoRoot: root };
|
|
2854
2980
|
}
|
|
2855
2981
|
function workingTreeFiles(root) {
|
|
2856
2982
|
const out = git(["status", "--porcelain", "-z", "--untracked-files=all"], root);
|
|
@@ -2872,12 +2998,12 @@ function workingTreeFiles(root) {
|
|
|
2872
2998
|
return files;
|
|
2873
2999
|
}
|
|
2874
3000
|
function inDiff(filePath, files) {
|
|
2875
|
-
const
|
|
3001
|
+
const base5 = filePath.split("/").pop();
|
|
2876
3002
|
for (const d of files) {
|
|
2877
3003
|
if (d === filePath || d.endsWith(`/${filePath}`) || filePath.endsWith(`/${d}`)) {
|
|
2878
3004
|
return true;
|
|
2879
3005
|
}
|
|
2880
|
-
if (
|
|
3006
|
+
if (base5 && d.split("/").pop() === base5) {
|
|
2881
3007
|
return true;
|
|
2882
3008
|
}
|
|
2883
3009
|
}
|
|
@@ -2994,12 +3120,12 @@ function applyDiffScope(derived, findings, files, projectPath) {
|
|
|
2994
3120
|
if (!projectPath || !f.evidenceSpanId) {
|
|
2995
3121
|
return void 0;
|
|
2996
3122
|
}
|
|
2997
|
-
const
|
|
2998
|
-
const state = cwdAtFirstGit(spanCmd.get(f.evidenceSpanId) ?? "",
|
|
3123
|
+
const base5 = spanCwd.get(f.evidenceSpanId);
|
|
3124
|
+
const state = cwdAtFirstGit(spanCmd.get(f.evidenceSpanId) ?? "", base5);
|
|
2999
3125
|
if (state.kind === "unknown") {
|
|
3000
3126
|
return "elsewhere";
|
|
3001
3127
|
}
|
|
3002
|
-
const at = state.kind === "known" ? state.path :
|
|
3128
|
+
const at = state.kind === "known" ? state.path : base5;
|
|
3003
3129
|
if (!at) {
|
|
3004
3130
|
return void 0;
|
|
3005
3131
|
}
|
|
@@ -3542,95 +3668,6 @@ function renderList(sessions, opts = {}) {
|
|
|
3542
3668
|
`;
|
|
3543
3669
|
}
|
|
3544
3670
|
|
|
3545
|
-
// src/report/section.ts
|
|
3546
|
-
function upsertSection(existing, block, start, end) {
|
|
3547
|
-
const s = existing.indexOf(start);
|
|
3548
|
-
const e = existing.indexOf(end);
|
|
3549
|
-
if (s !== -1 && e !== -1 && e > s) {
|
|
3550
|
-
return existing.slice(0, s) + block + existing.slice(e + end.length);
|
|
3551
|
-
}
|
|
3552
|
-
const sep = existing && !existing.endsWith("\n") ? "\n\n" : existing ? "\n" : "";
|
|
3553
|
-
return `${existing}${sep}${block}
|
|
3554
|
-
`;
|
|
3555
|
-
}
|
|
3556
|
-
|
|
3557
|
-
// src/report/guardrails.ts
|
|
3558
|
-
var SEV_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
|
|
3559
|
-
var SEV_TITLE = {
|
|
3560
|
-
critical: "Critical",
|
|
3561
|
-
high: "High",
|
|
3562
|
-
medium: "Medium",
|
|
3563
|
-
low: "Low"
|
|
3564
|
-
};
|
|
3565
|
-
var GUARDRAILS_START = "<!-- receipts:guardrails:start -->";
|
|
3566
|
-
var GUARDRAILS_END = "<!-- receipts:guardrails:end -->";
|
|
3567
|
-
function collectGuardrails(findingSets) {
|
|
3568
|
-
const byRule = /* @__PURE__ */ new Map();
|
|
3569
|
-
for (const set of findingSets) {
|
|
3570
|
-
for (const f of [...set.main, ...set.minor]) {
|
|
3571
|
-
const rule = f.guardrailRule?.trim();
|
|
3572
|
-
if (!rule) {
|
|
3573
|
-
continue;
|
|
3574
|
-
}
|
|
3575
|
-
let entry = byRule.get(rule);
|
|
3576
|
-
if (!entry) {
|
|
3577
|
-
entry = { rule, severity: f.severity, because: [] };
|
|
3578
|
-
byRule.set(rule, entry);
|
|
3579
|
-
}
|
|
3580
|
-
if (SEV_ORDER[f.severity] < SEV_ORDER[entry.severity]) {
|
|
3581
|
-
entry.severity = f.severity;
|
|
3582
|
-
}
|
|
3583
|
-
const cite = entry.because.find((b) => b.title === f.title);
|
|
3584
|
-
if (cite) {
|
|
3585
|
-
cite.count++;
|
|
3586
|
-
} else {
|
|
3587
|
-
entry.because.push({ title: f.title, count: 1 });
|
|
3588
|
-
}
|
|
3589
|
-
}
|
|
3590
|
-
}
|
|
3591
|
-
return [...byRule.values()].sort(
|
|
3592
|
-
(a, b) => SEV_ORDER[a.severity] - SEV_ORDER[b.severity] || b.because.length - a.because.length
|
|
3593
|
-
);
|
|
3594
|
-
}
|
|
3595
|
-
function citation(rule) {
|
|
3596
|
-
return rule.because.map((b) => b.count > 1 ? `${b.title} (\xD7${b.count})` : b.title).join("; ");
|
|
3597
|
-
}
|
|
3598
|
-
function renderGuardrailsBlock(rules, format = "md") {
|
|
3599
|
-
if (format === "json") {
|
|
3600
|
-
return JSON.stringify(rules, null, 2);
|
|
3601
|
-
}
|
|
3602
|
-
if (rules.length === 0) {
|
|
3603
|
-
return format === "md" ? `${GUARDRAILS_START}
|
|
3604
|
-
## Receipts guardrails
|
|
3605
|
-
|
|
3606
|
-
_No guardrails \u2014 nothing the agent did warrants a prevention rule._
|
|
3607
|
-
${GUARDRAILS_END}` : "No guardrails \u2014 nothing the agent did warrants a prevention rule.";
|
|
3608
|
-
}
|
|
3609
|
-
const lines = [];
|
|
3610
|
-
if (format === "md") {
|
|
3611
|
-
lines.push(GUARDRAILS_START);
|
|
3612
|
-
lines.push("## Receipts guardrails");
|
|
3613
|
-
lines.push("<!-- generated by `receipts guardrails` \u2014 paste into AGENTS.md / CLAUDE.md -->");
|
|
3614
|
-
lines.push("");
|
|
3615
|
-
}
|
|
3616
|
-
let lastSev = null;
|
|
3617
|
-
for (const r of rules) {
|
|
3618
|
-
if (r.severity !== lastSev) {
|
|
3619
|
-
lines.push(format === "md" ? `### ${SEV_TITLE[r.severity]}` : `${SEV_TITLE[r.severity]}:`);
|
|
3620
|
-
lastSev = r.severity;
|
|
3621
|
-
}
|
|
3622
|
-
lines.push(`- ${r.rule}`);
|
|
3623
|
-
lines.push(format === "md" ? ` _\u2014 ${citation(r)}_` : ` \u2014 ${citation(r)}`);
|
|
3624
|
-
}
|
|
3625
|
-
if (format === "md") {
|
|
3626
|
-
lines.push(GUARDRAILS_END);
|
|
3627
|
-
}
|
|
3628
|
-
return lines.join("\n");
|
|
3629
|
-
}
|
|
3630
|
-
function upsertGuardrailsSection(existing, block) {
|
|
3631
|
-
return upsertSection(existing, block, GUARDRAILS_START, GUARDRAILS_END);
|
|
3632
|
-
}
|
|
3633
|
-
|
|
3634
3671
|
// src/sign/verify.ts
|
|
3635
3672
|
import { createHash as createHash2 } from "crypto";
|
|
3636
3673
|
var GRADES = /* @__PURE__ */ new Set(["A", "B", "C", "F"]);
|
|
@@ -3742,6 +3779,95 @@ function verifyBundle(input, opts = {}) {
|
|
|
3742
3779
|
};
|
|
3743
3780
|
}
|
|
3744
3781
|
|
|
3782
|
+
// src/report/section.ts
|
|
3783
|
+
function upsertSection(existing, block, start, end) {
|
|
3784
|
+
const s = existing.indexOf(start);
|
|
3785
|
+
const e = existing.indexOf(end);
|
|
3786
|
+
if (s !== -1 && e !== -1 && e > s) {
|
|
3787
|
+
return existing.slice(0, s) + block + existing.slice(e + end.length);
|
|
3788
|
+
}
|
|
3789
|
+
const sep = existing && !existing.endsWith("\n") ? "\n\n" : existing ? "\n" : "";
|
|
3790
|
+
return `${existing}${sep}${block}
|
|
3791
|
+
`;
|
|
3792
|
+
}
|
|
3793
|
+
|
|
3794
|
+
// src/report/guardrails.ts
|
|
3795
|
+
var SEV_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
|
|
3796
|
+
var SEV_TITLE = {
|
|
3797
|
+
critical: "Critical",
|
|
3798
|
+
high: "High",
|
|
3799
|
+
medium: "Medium",
|
|
3800
|
+
low: "Low"
|
|
3801
|
+
};
|
|
3802
|
+
var GUARDRAILS_START = "<!-- receipts:guardrails:start -->";
|
|
3803
|
+
var GUARDRAILS_END = "<!-- receipts:guardrails:end -->";
|
|
3804
|
+
function collectGuardrails(findingSets) {
|
|
3805
|
+
const byRule = /* @__PURE__ */ new Map();
|
|
3806
|
+
for (const set of findingSets) {
|
|
3807
|
+
for (const f of [...set.main, ...set.minor]) {
|
|
3808
|
+
const rule = f.guardrailRule?.trim();
|
|
3809
|
+
if (!rule) {
|
|
3810
|
+
continue;
|
|
3811
|
+
}
|
|
3812
|
+
let entry = byRule.get(rule);
|
|
3813
|
+
if (!entry) {
|
|
3814
|
+
entry = { rule, severity: f.severity, because: [] };
|
|
3815
|
+
byRule.set(rule, entry);
|
|
3816
|
+
}
|
|
3817
|
+
if (SEV_ORDER[f.severity] < SEV_ORDER[entry.severity]) {
|
|
3818
|
+
entry.severity = f.severity;
|
|
3819
|
+
}
|
|
3820
|
+
const cite = entry.because.find((b) => b.title === f.title);
|
|
3821
|
+
if (cite) {
|
|
3822
|
+
cite.count++;
|
|
3823
|
+
} else {
|
|
3824
|
+
entry.because.push({ title: f.title, count: 1 });
|
|
3825
|
+
}
|
|
3826
|
+
}
|
|
3827
|
+
}
|
|
3828
|
+
return [...byRule.values()].sort(
|
|
3829
|
+
(a, b) => SEV_ORDER[a.severity] - SEV_ORDER[b.severity] || b.because.length - a.because.length
|
|
3830
|
+
);
|
|
3831
|
+
}
|
|
3832
|
+
function citation(rule) {
|
|
3833
|
+
return rule.because.map((b) => b.count > 1 ? `${b.title} (\xD7${b.count})` : b.title).join("; ");
|
|
3834
|
+
}
|
|
3835
|
+
function renderGuardrailsBlock(rules, format = "md") {
|
|
3836
|
+
if (format === "json") {
|
|
3837
|
+
return JSON.stringify(rules, null, 2);
|
|
3838
|
+
}
|
|
3839
|
+
if (rules.length === 0) {
|
|
3840
|
+
return format === "md" ? `${GUARDRAILS_START}
|
|
3841
|
+
## Receipts guardrails
|
|
3842
|
+
|
|
3843
|
+
_No guardrails \u2014 nothing the agent did warrants a prevention rule._
|
|
3844
|
+
${GUARDRAILS_END}` : "No guardrails \u2014 nothing the agent did warrants a prevention rule.";
|
|
3845
|
+
}
|
|
3846
|
+
const lines = [];
|
|
3847
|
+
if (format === "md") {
|
|
3848
|
+
lines.push(GUARDRAILS_START);
|
|
3849
|
+
lines.push("## Receipts guardrails");
|
|
3850
|
+
lines.push("<!-- generated by `receipts guardrails` \u2014 paste into AGENTS.md / CLAUDE.md -->");
|
|
3851
|
+
lines.push("");
|
|
3852
|
+
}
|
|
3853
|
+
let lastSev = null;
|
|
3854
|
+
for (const r of rules) {
|
|
3855
|
+
if (r.severity !== lastSev) {
|
|
3856
|
+
lines.push(format === "md" ? `### ${SEV_TITLE[r.severity]}` : `${SEV_TITLE[r.severity]}:`);
|
|
3857
|
+
lastSev = r.severity;
|
|
3858
|
+
}
|
|
3859
|
+
lines.push(`- ${r.rule}`);
|
|
3860
|
+
lines.push(format === "md" ? ` _\u2014 ${citation(r)}_` : ` \u2014 ${citation(r)}`);
|
|
3861
|
+
}
|
|
3862
|
+
if (format === "md") {
|
|
3863
|
+
lines.push(GUARDRAILS_END);
|
|
3864
|
+
}
|
|
3865
|
+
return lines.join("\n");
|
|
3866
|
+
}
|
|
3867
|
+
function upsertGuardrailsSection(existing, block) {
|
|
3868
|
+
return upsertSection(existing, block, GUARDRAILS_START, GUARDRAILS_END);
|
|
3869
|
+
}
|
|
3870
|
+
|
|
3745
3871
|
// src/trace/anthropic.ts
|
|
3746
3872
|
import * as fs3 from "fs";
|
|
3747
3873
|
|
|
@@ -5007,12 +5133,12 @@ export {
|
|
|
5007
5133
|
buildReceipt,
|
|
5008
5134
|
renderCard,
|
|
5009
5135
|
renderList,
|
|
5136
|
+
validateReceiptShape,
|
|
5137
|
+
verifyBundle,
|
|
5010
5138
|
upsertSection,
|
|
5011
5139
|
collectGuardrails,
|
|
5012
5140
|
renderGuardrailsBlock,
|
|
5013
5141
|
upsertGuardrailsSection,
|
|
5014
|
-
validateReceiptShape,
|
|
5015
|
-
verifyBundle,
|
|
5016
5142
|
adapters,
|
|
5017
5143
|
adapterFor,
|
|
5018
5144
|
agentIds,
|
|
@@ -5028,4 +5154,4 @@ export {
|
|
|
5028
5154
|
redact,
|
|
5029
5155
|
redactReceipt
|
|
5030
5156
|
};
|
|
5031
|
-
//# sourceMappingURL=chunk-
|
|
5157
|
+
//# sourceMappingURL=chunk-63E3RZHD.js.map
|