agentskeptic 5.0.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actionableFailure.remediationExhaustive.test.js +1 -1
- package/dist/bootstrap/runBootstrapSubcommand.js +1 -1
- package/dist/cli.js +1 -1
- package/dist/commercial/trustDecisionRecord.schema.test.js +23 -3
- package/dist/commercial/trustDecisionRecord.schema.test.js.map +1 -1
- package/dist/compare.acceptance.test.js +1 -1
- package/dist/decisionEvidenceBundle/validateDecisionEvidenceBundle.js +1 -1
- package/dist/decisionEvidenceBundle/writeDecisionEvidenceBundle.js +1 -1
- package/dist/decisionEvidenceBundle.test.js +40 -3
- package/dist/decisionEvidenceBundle.test.js.map +1 -1
- package/dist/decisionGate.js +1 -1
- package/dist/evidenceCompleteness.d.ts.map +1 -1
- package/dist/evidenceCompleteness.js +2 -24
- package/dist/evidenceCompleteness.js.map +1 -1
- package/dist/execution-identity.v1.json +1 -1
- package/dist/failureSpine.d.ts +37 -0
- package/dist/failureSpine.d.ts.map +1 -0
- package/dist/failureSpine.js +218 -0
- package/dist/failureSpine.js.map +1 -0
- package/dist/failureSpine.test.d.ts +2 -0
- package/dist/failureSpine.test.d.ts.map +1 -0
- package/dist/failureSpine.test.js +28 -0
- package/dist/failureSpine.test.js.map +1 -0
- package/dist/formatFailureSpineHuman.d.ts +4 -0
- package/dist/formatFailureSpineHuman.d.ts.map +1 -0
- package/dist/formatFailureSpineHuman.js +18 -0
- package/dist/formatFailureSpineHuman.js.map +1 -0
- package/dist/governanceEvidence.test.js +21 -2
- package/dist/governanceEvidence.test.js.map +1 -1
- package/dist/index.d.ts +6 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -1
- package/dist/index.js.map +1 -1
- package/dist/langGraphCheckpointTrustGate.js +1 -1
- package/dist/langGraphCheckpointTrustIneligibleCertificate.js +1 -1
- package/dist/outcomeCertificate.d.ts +16 -12
- package/dist/outcomeCertificate.d.ts.map +1 -1
- package/dist/outcomeCertificate.js +50 -10
- package/dist/outcomeCertificate.js.map +1 -1
- package/dist/outcomeCertificate.test.js +1 -1
- package/dist/publicDistribution.generated.d.ts +1 -1
- package/dist/publicDistribution.generated.js +1 -1
- package/dist/quickVerify/quickParamPointerNegatives.test.js +5 -2
- package/dist/quickVerify/quickParamPointerNegatives.test.js.map +1 -1
- package/dist/quickVerify/quickParamPointerPromotionGoldens.test.js +7 -4
- package/dist/quickVerify/quickParamPointerPromotionGoldens.test.js.map +1 -1
- package/dist/remediationConsumptionGuard.test.js +2 -0
- package/dist/remediationConsumptionGuard.test.js.map +1 -1
- package/dist/remediationMessage.d.ts +6 -0
- package/dist/remediationMessage.d.ts.map +1 -0
- package/dist/remediationMessage.js +30 -0
- package/dist/remediationMessage.js.map +1 -0
- package/dist/remediationWireSurfaceGuard.test.js +2 -0
- package/dist/remediationWireSurfaceGuard.test.js.map +1 -1
- package/dist/schemaLoad.d.ts +1 -1
- package/dist/schemaLoad.d.ts.map +1 -1
- package/dist/schemaLoad.js +13 -3
- package/dist/schemaLoad.js.map +1 -1
- package/dist/shareReport/postPublicVerificationReport.d.ts +3 -3
- package/dist/shareReport/postPublicVerificationReport.js +1 -1
- package/dist/shareReport/postPublicVerificationReport.js.map +1 -1
- package/dist/standardVerifyWorkflowCli.d.ts +7 -7
- package/dist/standardVerifyWorkflowCli.js +3 -3
- package/dist/trustDecision.d.ts +7 -2
- package/dist/trustDecision.d.ts.map +1 -1
- package/dist/trustDecision.js +14 -7
- package/dist/trustDecision.js.map +1 -1
- package/dist/verify/batchVerifyTelemetrySubcommand.js +2 -2
- package/package.json +1 -1
- package/schemas/failure-spine-v1.schema.json +38 -0
- package/schemas/openapi-commercial-v1.in.yaml +1 -1
- package/schemas/openapi-commercial-v1.yaml +2 -2
- package/schemas/outcome-certificate-v3.schema.json +97 -0
- package/schemas/public-verification-report-v3.schema.json +1 -1
- package/schemas/regression-artifact-v1.schema.json +1 -1
- package/scripts/discovery-payload.lib.cjs +140 -18
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json",
|
|
3
|
+
"title": "OutcomeCertificateV3",
|
|
4
|
+
"type": "object",
|
|
5
|
+
"additionalProperties": false,
|
|
6
|
+
"required": [
|
|
7
|
+
"schemaVersion",
|
|
8
|
+
"workflowId",
|
|
9
|
+
"runKind",
|
|
10
|
+
"stateRelation",
|
|
11
|
+
"highStakesReliance",
|
|
12
|
+
"relianceRationale",
|
|
13
|
+
"intentSummary",
|
|
14
|
+
"explanation",
|
|
15
|
+
"steps",
|
|
16
|
+
"humanReport",
|
|
17
|
+
"evidenceCompleteness",
|
|
18
|
+
"failureSpine"
|
|
19
|
+
],
|
|
20
|
+
"properties": {
|
|
21
|
+
"schemaVersion": { "type": "integer", "const": 3 },
|
|
22
|
+
"workflowId": { "type": "string", "minLength": 1, "maxLength": 512 },
|
|
23
|
+
"runKind": {
|
|
24
|
+
"type": "string",
|
|
25
|
+
"enum": ["contract_sql", "contract_sql_langgraph_checkpoint_trust", "quick_preview"]
|
|
26
|
+
},
|
|
27
|
+
"stateRelation": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"enum": ["matches_expectations", "does_not_match", "not_established"]
|
|
30
|
+
},
|
|
31
|
+
"highStakesReliance": { "type": "string", "enum": ["permitted", "prohibited"] },
|
|
32
|
+
"relianceRationale": { "type": "string", "minLength": 1, "maxLength": 8192 },
|
|
33
|
+
"intentSummary": { "type": "string", "minLength": 1, "maxLength": 8192 },
|
|
34
|
+
"explanation": {
|
|
35
|
+
"type": "object",
|
|
36
|
+
"additionalProperties": false,
|
|
37
|
+
"required": ["headline", "details"],
|
|
38
|
+
"properties": {
|
|
39
|
+
"headline": { "type": "string", "minLength": 1, "maxLength": 2048 },
|
|
40
|
+
"details": {
|
|
41
|
+
"type": "array",
|
|
42
|
+
"items": {
|
|
43
|
+
"type": "object",
|
|
44
|
+
"additionalProperties": false,
|
|
45
|
+
"required": ["code", "message"],
|
|
46
|
+
"properties": {
|
|
47
|
+
"code": { "type": "string", "minLength": 1, "maxLength": 256 },
|
|
48
|
+
"message": { "type": "string", "minLength": 1, "maxLength": 4096 }
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"steps": {
|
|
55
|
+
"type": "array",
|
|
56
|
+
"items": {
|
|
57
|
+
"type": "object",
|
|
58
|
+
"additionalProperties": false,
|
|
59
|
+
"required": ["seq", "declaredAction", "expectedOutcome", "observedOutcome"],
|
|
60
|
+
"properties": {
|
|
61
|
+
"seq": { "type": "integer", "minimum": 0 },
|
|
62
|
+
"toolId": { "type": "string", "maxLength": 512 },
|
|
63
|
+
"declaredAction": { "type": "string", "minLength": 1, "maxLength": 4096 },
|
|
64
|
+
"expectedOutcome": { "type": "string", "minLength": 1, "maxLength": 4096 },
|
|
65
|
+
"observedOutcome": { "type": "string", "minLength": 1, "maxLength": 8192 }
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
"humanReport": { "type": "string", "minLength": 1, "maxLength": 1048576 },
|
|
70
|
+
"checkpointVerdicts": {
|
|
71
|
+
"type": "array",
|
|
72
|
+
"items": {
|
|
73
|
+
"type": "object",
|
|
74
|
+
"additionalProperties": false,
|
|
75
|
+
"required": ["checkpointKey", "verdict", "seqs", "productionMeaning"],
|
|
76
|
+
"properties": {
|
|
77
|
+
"checkpointKey": { "type": "string", "minLength": 1, "maxLength": 2048 },
|
|
78
|
+
"verdict": {
|
|
79
|
+
"type": "string",
|
|
80
|
+
"enum": ["verified", "inconsistent", "incomplete"]
|
|
81
|
+
},
|
|
82
|
+
"seqs": {
|
|
83
|
+
"type": "array",
|
|
84
|
+
"items": { "type": "integer", "minimum": 0 }
|
|
85
|
+
},
|
|
86
|
+
"productionMeaning": { "type": "string", "minLength": 1, "maxLength": 8192 }
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"evidenceCompleteness": {
|
|
91
|
+
"$ref": "https://agentskeptic.com/schemas/evidence-completeness-v1.schema.json"
|
|
92
|
+
},
|
|
93
|
+
"failureSpine": {
|
|
94
|
+
"$ref": "https://agentskeptic.com/schemas/failure-spine-v1.schema.json"
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
"properties": {
|
|
8
8
|
"schemaVersion": { "type": "integer", "const": 3 },
|
|
9
9
|
"certificate": {
|
|
10
|
-
"$ref": "https://agentskeptic.com/schemas/outcome-certificate-
|
|
10
|
+
"$ref": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json"
|
|
11
11
|
},
|
|
12
12
|
"cliVersion": { "type": "string", "maxLength": 128 },
|
|
13
13
|
"createdFrom": { "type": "string", "maxLength": 256 }
|
|
@@ -82,7 +82,7 @@
|
|
|
82
82
|
"enum": ["contract_sql", "contract_sql_langgraph_checkpoint_trust"]
|
|
83
83
|
},
|
|
84
84
|
"certificateCanonicalDigest": { "type": "string", "pattern": "^[a-f0-9]{64}$" },
|
|
85
|
-
"certificate": { "$ref": "https://agentskeptic.com/schemas/outcome-certificate-
|
|
85
|
+
"certificate": { "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json" }
|
|
86
86
|
}
|
|
87
87
|
}
|
|
88
88
|
},
|
|
@@ -27,6 +27,9 @@ const MAX_SUMMARY_UTF8_BYTES = 65536;
|
|
|
27
27
|
const MAX_PR_BODY_UTF8_BYTES = 10240;
|
|
28
28
|
const STDERR_TAIL_LINES = 20;
|
|
29
29
|
|
|
30
|
+
/** Max UTF-8 bytes of stdout parsed for Outcome Certificate JSON (`failureSpine` extraction). */
|
|
31
|
+
const MAX_STDOUT_PARSE_BYTES = 262144;
|
|
32
|
+
|
|
30
33
|
const REPO_ROOT = join(__dirname, "..");
|
|
31
34
|
const README_ADOPTION_START = "<!-- adoption-canonical:start -->";
|
|
32
35
|
const README_ADOPTION_END = "<!-- adoption-canonical:end -->";
|
|
@@ -311,8 +314,109 @@ function formatStderrBlock(stderrText) {
|
|
|
311
314
|
}
|
|
312
315
|
|
|
313
316
|
/**
|
|
314
|
-
*
|
|
315
|
-
*
|
|
317
|
+
* Parse workflow stdout for a single-line/single-object Outcome Certificate JSON and extract `failureSpine`.
|
|
318
|
+
* @param {string} stdoutText
|
|
319
|
+
* @returns {{ ok: true; spine: Record<string, unknown> } | { malformed: true } | { oversized: true }}
|
|
320
|
+
*/
|
|
321
|
+
function extractFailureSummaryFromStdout(stdoutText) {
|
|
322
|
+
const t = String(stdoutText ?? "").trim();
|
|
323
|
+
if (t.length === 0) return { malformed: true };
|
|
324
|
+
if (utf8ByteLength(t) > MAX_STDOUT_PARSE_BYTES) return { oversized: true };
|
|
325
|
+
let obj;
|
|
326
|
+
try {
|
|
327
|
+
obj = JSON.parse(t);
|
|
328
|
+
} catch {
|
|
329
|
+
return { malformed: true };
|
|
330
|
+
}
|
|
331
|
+
if (!obj || typeof obj !== "object") return { malformed: true };
|
|
332
|
+
const spine = obj.failureSpine;
|
|
333
|
+
if (!spine || typeof spine !== "object") return { malformed: true };
|
|
334
|
+
return { ok: true, spine };
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* @param {Record<string, unknown>} spine
|
|
339
|
+
*/
|
|
340
|
+
function renderFailureSummaryMarkdownFromSpine(spine) {
|
|
341
|
+
const af = /** @type {{ category: string; severity: string; recommendedAction: string; automationSafe: boolean }} */ (
|
|
342
|
+
spine.actionableFailure
|
|
343
|
+
);
|
|
344
|
+
const codes = Array.isArray(spine.primaryCodes) ? spine.primaryCodes.join(",") : "";
|
|
345
|
+
return [
|
|
346
|
+
"## Failure summary (agentskeptic)",
|
|
347
|
+
"",
|
|
348
|
+
`- trust_decision: ${spine.trustDecision}`,
|
|
349
|
+
`- summary: ${spine.summary}`,
|
|
350
|
+
`- actionable_failure: category=${af.category} severity=${af.severity} recommended_action=${af.recommendedAction} automation_safe=${af.automationSafe}`,
|
|
351
|
+
`- primary_codes: ${codes}`,
|
|
352
|
+
`- rerun_guidance: ${spine.rerunGuidance}`,
|
|
353
|
+
`- source: ${spine.source}`,
|
|
354
|
+
"",
|
|
355
|
+
].join("\n");
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* @param {Record<string, unknown>} envelope — cli-error-envelope JSON
|
|
360
|
+
*/
|
|
361
|
+
function projectCliEnvelopeToCiMarkdown(envelope) {
|
|
362
|
+
const fd = /** @type {{ summary: string; actionableFailure: { category: string; severity: string; recommendedAction: string; automationSafe: boolean } }} */ (
|
|
363
|
+
envelope.failureDiagnosis
|
|
364
|
+
);
|
|
365
|
+
const af = fd.actionableFailure;
|
|
366
|
+
return [
|
|
367
|
+
"## Failure summary (agentskeptic)",
|
|
368
|
+
"",
|
|
369
|
+
"- trust_decision: unknown",
|
|
370
|
+
`- summary: ${fd.summary}`,
|
|
371
|
+
`- actionable_failure: category=${af.category} severity=${af.severity} recommended_action=${af.recommendedAction} automation_safe=${af.automationSafe}`,
|
|
372
|
+
"- primary_codes: _(operational)_",
|
|
373
|
+
`- rerun_guidance: ${String(envelope.message)}`,
|
|
374
|
+
"- source: operational",
|
|
375
|
+
"",
|
|
376
|
+
].join("\n");
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* @param {string} line
|
|
381
|
+
*/
|
|
382
|
+
function tryParseCliErrorEnvelopeLine(line) {
|
|
383
|
+
const s = String(line).trim();
|
|
384
|
+
if (!s.startsWith("{")) return null;
|
|
385
|
+
try {
|
|
386
|
+
const o = JSON.parse(s);
|
|
387
|
+
if (
|
|
388
|
+
o &&
|
|
389
|
+
typeof o === "object" &&
|
|
390
|
+
o.schemaVersion === 2 &&
|
|
391
|
+
o.kind === "execution_truth_layer_error" &&
|
|
392
|
+
o.failureDiagnosis &&
|
|
393
|
+
typeof o.failureDiagnosis === "object" &&
|
|
394
|
+
o.failureDiagnosis.actionableFailure
|
|
395
|
+
) {
|
|
396
|
+
return o;
|
|
397
|
+
}
|
|
398
|
+
} catch {
|
|
399
|
+
/* ignore */
|
|
400
|
+
}
|
|
401
|
+
return null;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* @param {string} stderrText
|
|
406
|
+
* @returns {string[]}
|
|
407
|
+
*/
|
|
408
|
+
function extractOperationalFailureMarkdownFromStderr(stderrText) {
|
|
409
|
+
const out = [];
|
|
410
|
+
for (const line of String(stderrText).split(/\r?\n/)) {
|
|
411
|
+
const env = tryParseCliErrorEnvelopeLine(line);
|
|
412
|
+
if (env) out.push(projectCliEnvelopeToCiMarkdown(env));
|
|
413
|
+
}
|
|
414
|
+
return out;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Assemble PR body: header → optional stdout oversize note → failure summary (certificate spine and/or operational stderr) → stderr → footer → marker.
|
|
419
|
+
* Truncates stderr tail lines from the front until UTF-8 length ≤ max (failure summary retained).
|
|
316
420
|
*
|
|
317
421
|
* @param {Record<string, unknown>} payload
|
|
318
422
|
* @param {{ stderrText: string; workflowStdoutText: string }} capture
|
|
@@ -329,12 +433,27 @@ ${String(payload.identityOneLiner)}
|
|
|
329
433
|
|
|
330
434
|
`;
|
|
331
435
|
|
|
332
|
-
const
|
|
333
|
-
const
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
436
|
+
const ext = extractFailureSummaryFromStdout(workflowStdoutText);
|
|
437
|
+
const operationalBlocks = extractOperationalFailureMarkdownFromStderr(stderrText);
|
|
438
|
+
|
|
439
|
+
let oversizedNote = "";
|
|
440
|
+
if (ext.oversized) {
|
|
441
|
+
oversizedNote = `_(stdout exceeded 262144 UTF-8 bytes; failure summary skipped)_\n\n`;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
const failureParts = [];
|
|
445
|
+
if ("ok" in ext && ext.ok) failureParts.push(renderFailureSummaryMarkdownFromSpine(ext.spine));
|
|
446
|
+
failureParts.push(...operationalBlocks);
|
|
447
|
+
|
|
448
|
+
const failureSummaryBlock = failureParts.length > 0 ? failureParts.join("\n") : "";
|
|
449
|
+
|
|
450
|
+
let unparsedStdoutBlock = "";
|
|
451
|
+
if (ext.malformed) {
|
|
452
|
+
const rawOut = String(workflowStdoutText).trim();
|
|
453
|
+
if (rawOut.length > 0) {
|
|
454
|
+
unparsedStdoutBlock = `## Verification stdout (unparsed)\n\n\`\`\`text\n${rawOut}\n\`\`\`\n\n`;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
338
457
|
|
|
339
458
|
let stderrBlock = formatStderrBlock(stderrText);
|
|
340
459
|
|
|
@@ -350,12 +469,16 @@ ${String(payload.identityOneLiner)}
|
|
|
350
469
|
"",
|
|
351
470
|
].join("\n");
|
|
352
471
|
|
|
353
|
-
function assemble(
|
|
354
|
-
const raw = header +
|
|
472
|
+
function assemble(middle) {
|
|
473
|
+
const raw = header + middle + footer;
|
|
355
474
|
return normalizeDiscoveryText(raw);
|
|
356
475
|
}
|
|
357
476
|
|
|
358
|
-
|
|
477
|
+
function middleFrom(stderrBlk) {
|
|
478
|
+
return oversizedNote + failureSummaryBlock + unparsedStdoutBlock + stderrBlk;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
let body = assemble(middleFrom(stderrBlock));
|
|
359
482
|
if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) {
|
|
360
483
|
return body;
|
|
361
484
|
}
|
|
@@ -367,19 +490,14 @@ ${String(payload.identityOneLiner)}
|
|
|
367
490
|
stderrBlock = inner
|
|
368
491
|
? `## CLI stderr (last ${STDERR_TAIL_LINES} lines)\n\n\`\`\`text\n${inner}\n\`\`\`\n`
|
|
369
492
|
: "## CLI stderr (last 20 lines)\n\n_(no stderr)_\n";
|
|
370
|
-
body = assemble(
|
|
493
|
+
body = assemble(middleFrom(stderrBlock));
|
|
371
494
|
if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
|
|
372
495
|
}
|
|
373
496
|
|
|
374
497
|
stderrBlock = "## CLI stderr (last 20 lines)\n\n_(no stderr)_\n";
|
|
375
|
-
body = assemble(
|
|
498
|
+
body = assemble(middleFrom(stderrBlock));
|
|
376
499
|
if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
|
|
377
500
|
|
|
378
|
-
if (verdictSection) {
|
|
379
|
-
body = assemble("", stderrBlock);
|
|
380
|
-
if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
|
|
381
|
-
}
|
|
382
|
-
|
|
383
501
|
throw new Error(
|
|
384
502
|
`discovery-payload: PR body still exceeds ${MAX_PR_BODY_UTF8_BYTES} bytes after truncation`,
|
|
385
503
|
);
|
|
@@ -408,6 +526,7 @@ module.exports = {
|
|
|
408
526
|
PR_MARKER_LINE_LEGACY,
|
|
409
527
|
MAX_SUMMARY_UTF8_BYTES,
|
|
410
528
|
MAX_PR_BODY_UTF8_BYTES,
|
|
529
|
+
MAX_STDOUT_PARSE_BYTES,
|
|
411
530
|
STDERR_TAIL_LINES,
|
|
412
531
|
buildDiscoveryPayload,
|
|
413
532
|
normalizeDiscoveryText,
|
|
@@ -417,6 +536,9 @@ module.exports = {
|
|
|
417
536
|
renderLlmsTextFromPayload,
|
|
418
537
|
renderCiSummaryMarkdownFromPayload,
|
|
419
538
|
renderCiPrBodyFromPayload,
|
|
539
|
+
extractFailureSummaryFromStdout,
|
|
540
|
+
renderFailureSummaryMarkdownFromSpine,
|
|
541
|
+
projectCliEnvelopeToCiMarkdown,
|
|
420
542
|
parseGithubRepoFromUrl,
|
|
421
543
|
selectPrCommentUpsert,
|
|
422
544
|
};
|