agentskeptic 5.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/actionableFailure.remediationExhaustive.test.js +1 -1
  2. package/dist/bootstrap/runBootstrapSubcommand.js +1 -1
  3. package/dist/cli.js +1 -1
  4. package/dist/commercial/trustDecisionRecord.schema.test.js +23 -3
  5. package/dist/commercial/trustDecisionRecord.schema.test.js.map +1 -1
  6. package/dist/compare.acceptance.test.js +1 -1
  7. package/dist/decisionEvidenceBundle/validateDecisionEvidenceBundle.js +1 -1
  8. package/dist/decisionEvidenceBundle/writeDecisionEvidenceBundle.js +1 -1
  9. package/dist/decisionEvidenceBundle.test.js +40 -3
  10. package/dist/decisionEvidenceBundle.test.js.map +1 -1
  11. package/dist/decisionGate.js +1 -1
  12. package/dist/evidenceCompleteness.d.ts.map +1 -1
  13. package/dist/evidenceCompleteness.js +2 -24
  14. package/dist/evidenceCompleteness.js.map +1 -1
  15. package/dist/execution-identity.v1.json +1 -1
  16. package/dist/failureSpine.d.ts +37 -0
  17. package/dist/failureSpine.d.ts.map +1 -0
  18. package/dist/failureSpine.js +218 -0
  19. package/dist/failureSpine.js.map +1 -0
  20. package/dist/failureSpine.test.d.ts +2 -0
  21. package/dist/failureSpine.test.d.ts.map +1 -0
  22. package/dist/failureSpine.test.js +28 -0
  23. package/dist/failureSpine.test.js.map +1 -0
  24. package/dist/formatFailureSpineHuman.d.ts +4 -0
  25. package/dist/formatFailureSpineHuman.d.ts.map +1 -0
  26. package/dist/formatFailureSpineHuman.js +18 -0
  27. package/dist/formatFailureSpineHuman.js.map +1 -0
  28. package/dist/governanceEvidence.test.js +21 -2
  29. package/dist/governanceEvidence.test.js.map +1 -1
  30. package/dist/index.d.ts +6 -2
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +4 -1
  33. package/dist/index.js.map +1 -1
  34. package/dist/langGraphCheckpointTrustGate.js +1 -1
  35. package/dist/langGraphCheckpointTrustIneligibleCertificate.js +1 -1
  36. package/dist/outcomeCertificate.d.ts +16 -12
  37. package/dist/outcomeCertificate.d.ts.map +1 -1
  38. package/dist/outcomeCertificate.js +50 -10
  39. package/dist/outcomeCertificate.js.map +1 -1
  40. package/dist/outcomeCertificate.test.js +1 -1
  41. package/dist/publicDistribution.generated.d.ts +1 -1
  42. package/dist/publicDistribution.generated.js +1 -1
  43. package/dist/quickVerify/quickParamPointerNegatives.test.js +5 -2
  44. package/dist/quickVerify/quickParamPointerNegatives.test.js.map +1 -1
  45. package/dist/quickVerify/quickParamPointerPromotionGoldens.test.js +7 -4
  46. package/dist/quickVerify/quickParamPointerPromotionGoldens.test.js.map +1 -1
  47. package/dist/remediationConsumptionGuard.test.js +2 -0
  48. package/dist/remediationConsumptionGuard.test.js.map +1 -1
  49. package/dist/remediationMessage.d.ts +6 -0
  50. package/dist/remediationMessage.d.ts.map +1 -0
  51. package/dist/remediationMessage.js +30 -0
  52. package/dist/remediationMessage.js.map +1 -0
  53. package/dist/remediationWireSurfaceGuard.test.js +2 -0
  54. package/dist/remediationWireSurfaceGuard.test.js.map +1 -1
  55. package/dist/schemaLoad.d.ts +1 -1
  56. package/dist/schemaLoad.d.ts.map +1 -1
  57. package/dist/schemaLoad.js +13 -3
  58. package/dist/schemaLoad.js.map +1 -1
  59. package/dist/shareReport/postPublicVerificationReport.d.ts +3 -3
  60. package/dist/shareReport/postPublicVerificationReport.js +1 -1
  61. package/dist/shareReport/postPublicVerificationReport.js.map +1 -1
  62. package/dist/standardVerifyWorkflowCli.d.ts +7 -7
  63. package/dist/standardVerifyWorkflowCli.js +3 -3
  64. package/dist/trustDecision.d.ts +7 -2
  65. package/dist/trustDecision.d.ts.map +1 -1
  66. package/dist/trustDecision.js +14 -7
  67. package/dist/trustDecision.js.map +1 -1
  68. package/dist/verify/batchVerifyTelemetrySubcommand.js +2 -2
  69. package/package.json +1 -1
  70. package/schemas/failure-spine-v1.schema.json +38 -0
  71. package/schemas/openapi-commercial-v1.in.yaml +1 -1
  72. package/schemas/openapi-commercial-v1.yaml +2 -2
  73. package/schemas/outcome-certificate-v3.schema.json +97 -0
  74. package/schemas/public-verification-report-v3.schema.json +1 -1
  75. package/schemas/regression-artifact-v1.schema.json +1 -1
  76. package/scripts/discovery-payload.lib.cjs +140 -18
@@ -0,0 +1,97 @@
1
+ {
2
+ "$id": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json",
3
+ "title": "OutcomeCertificateV3",
4
+ "type": "object",
5
+ "additionalProperties": false,
6
+ "required": [
7
+ "schemaVersion",
8
+ "workflowId",
9
+ "runKind",
10
+ "stateRelation",
11
+ "highStakesReliance",
12
+ "relianceRationale",
13
+ "intentSummary",
14
+ "explanation",
15
+ "steps",
16
+ "humanReport",
17
+ "evidenceCompleteness",
18
+ "failureSpine"
19
+ ],
20
+ "properties": {
21
+ "schemaVersion": { "type": "integer", "const": 3 },
22
+ "workflowId": { "type": "string", "minLength": 1, "maxLength": 512 },
23
+ "runKind": {
24
+ "type": "string",
25
+ "enum": ["contract_sql", "contract_sql_langgraph_checkpoint_trust", "quick_preview"]
26
+ },
27
+ "stateRelation": {
28
+ "type": "string",
29
+ "enum": ["matches_expectations", "does_not_match", "not_established"]
30
+ },
31
+ "highStakesReliance": { "type": "string", "enum": ["permitted", "prohibited"] },
32
+ "relianceRationale": { "type": "string", "minLength": 1, "maxLength": 8192 },
33
+ "intentSummary": { "type": "string", "minLength": 1, "maxLength": 8192 },
34
+ "explanation": {
35
+ "type": "object",
36
+ "additionalProperties": false,
37
+ "required": ["headline", "details"],
38
+ "properties": {
39
+ "headline": { "type": "string", "minLength": 1, "maxLength": 2048 },
40
+ "details": {
41
+ "type": "array",
42
+ "items": {
43
+ "type": "object",
44
+ "additionalProperties": false,
45
+ "required": ["code", "message"],
46
+ "properties": {
47
+ "code": { "type": "string", "minLength": 1, "maxLength": 256 },
48
+ "message": { "type": "string", "minLength": 1, "maxLength": 4096 }
49
+ }
50
+ }
51
+ }
52
+ }
53
+ },
54
+ "steps": {
55
+ "type": "array",
56
+ "items": {
57
+ "type": "object",
58
+ "additionalProperties": false,
59
+ "required": ["seq", "declaredAction", "expectedOutcome", "observedOutcome"],
60
+ "properties": {
61
+ "seq": { "type": "integer", "minimum": 0 },
62
+ "toolId": { "type": "string", "maxLength": 512 },
63
+ "declaredAction": { "type": "string", "minLength": 1, "maxLength": 4096 },
64
+ "expectedOutcome": { "type": "string", "minLength": 1, "maxLength": 4096 },
65
+ "observedOutcome": { "type": "string", "minLength": 1, "maxLength": 8192 }
66
+ }
67
+ }
68
+ },
69
+ "humanReport": { "type": "string", "minLength": 1, "maxLength": 1048576 },
70
+ "checkpointVerdicts": {
71
+ "type": "array",
72
+ "items": {
73
+ "type": "object",
74
+ "additionalProperties": false,
75
+ "required": ["checkpointKey", "verdict", "seqs", "productionMeaning"],
76
+ "properties": {
77
+ "checkpointKey": { "type": "string", "minLength": 1, "maxLength": 2048 },
78
+ "verdict": {
79
+ "type": "string",
80
+ "enum": ["verified", "inconsistent", "incomplete"]
81
+ },
82
+ "seqs": {
83
+ "type": "array",
84
+ "items": { "type": "integer", "minimum": 0 }
85
+ },
86
+ "productionMeaning": { "type": "string", "minLength": 1, "maxLength": 8192 }
87
+ }
88
+ }
89
+ },
90
+ "evidenceCompleteness": {
91
+ "$ref": "https://agentskeptic.com/schemas/evidence-completeness-v1.schema.json"
92
+ },
93
+ "failureSpine": {
94
+ "$ref": "https://agentskeptic.com/schemas/failure-spine-v1.schema.json"
95
+ }
96
+ }
97
+ }
@@ -7,7 +7,7 @@
7
7
  "properties": {
8
8
  "schemaVersion": { "type": "integer", "const": 3 },
9
9
  "certificate": {
10
- "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v2.schema.json"
10
+ "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json"
11
11
  },
12
12
  "cliVersion": { "type": "string", "maxLength": 128 },
13
13
  "createdFrom": { "type": "string", "maxLength": 256 }
@@ -82,7 +82,7 @@
82
82
  "enum": ["contract_sql", "contract_sql_langgraph_checkpoint_trust"]
83
83
  },
84
84
  "certificateCanonicalDigest": { "type": "string", "pattern": "^[a-f0-9]{64}$" },
85
- "certificate": { "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v2.schema.json" }
85
+ "certificate": { "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json" }
86
86
  }
87
87
  }
88
88
  },
@@ -27,6 +27,9 @@ const MAX_SUMMARY_UTF8_BYTES = 65536;
27
27
  const MAX_PR_BODY_UTF8_BYTES = 10240;
28
28
  const STDERR_TAIL_LINES = 20;
29
29
 
30
+ /** Max UTF-8 bytes of stdout parsed for Outcome Certificate JSON (`failureSpine` extraction). */
31
+ const MAX_STDOUT_PARSE_BYTES = 262144;
32
+
30
33
  const REPO_ROOT = join(__dirname, "..");
31
34
  const README_ADOPTION_START = "<!-- adoption-canonical:start -->";
32
35
  const README_ADOPTION_END = "<!-- adoption-canonical:end -->";
@@ -311,8 +314,109 @@ function formatStderrBlock(stderrText) {
311
314
  }
312
315
 
313
316
  /**
314
- * Assemble PR body: header optional verdict stderr footer → marker.
315
- * Truncates stderr block from the start only until UTF-8 length ≤ max.
317
+ * Parse workflow stdout for a single-line/single-object Outcome Certificate JSON and extract `failureSpine`.
318
+ * @param {string} stdoutText
319
+ * @returns {{ ok: true; spine: Record<string, unknown> } | { malformed: true } | { oversized: true }}
320
+ */
321
+ function extractFailureSummaryFromStdout(stdoutText) {
322
+ const t = String(stdoutText ?? "").trim();
323
+ if (t.length === 0) return { malformed: true };
324
+ if (utf8ByteLength(t) > MAX_STDOUT_PARSE_BYTES) return { oversized: true };
325
+ let obj;
326
+ try {
327
+ obj = JSON.parse(t);
328
+ } catch {
329
+ return { malformed: true };
330
+ }
331
+ if (!obj || typeof obj !== "object") return { malformed: true };
332
+ const spine = obj.failureSpine;
333
+ if (!spine || typeof spine !== "object") return { malformed: true };
334
+ return { ok: true, spine };
335
+ }
336
+
337
+ /**
338
+ * @param {Record<string, unknown>} spine
339
+ */
340
+ function renderFailureSummaryMarkdownFromSpine(spine) {
341
+ const af = /** @type {{ category: string; severity: string; recommendedAction: string; automationSafe: boolean }} */ (
342
+ spine.actionableFailure
343
+ );
344
+ const codes = Array.isArray(spine.primaryCodes) ? spine.primaryCodes.join(",") : "";
345
+ return [
346
+ "## Failure summary (agentskeptic)",
347
+ "",
348
+ `- trust_decision: ${spine.trustDecision}`,
349
+ `- summary: ${spine.summary}`,
350
+ `- actionable_failure: category=${af.category} severity=${af.severity} recommended_action=${af.recommendedAction} automation_safe=${af.automationSafe}`,
351
+ `- primary_codes: ${codes}`,
352
+ `- rerun_guidance: ${spine.rerunGuidance}`,
353
+ `- source: ${spine.source}`,
354
+ "",
355
+ ].join("\n");
356
+ }
357
+
358
+ /**
359
+ * @param {Record<string, unknown>} envelope — cli-error-envelope JSON
360
+ */
361
+ function projectCliEnvelopeToCiMarkdown(envelope) {
362
+ const fd = /** @type {{ summary: string; actionableFailure: { category: string; severity: string; recommendedAction: string; automationSafe: boolean } }} */ (
363
+ envelope.failureDiagnosis
364
+ );
365
+ const af = fd.actionableFailure;
366
+ return [
367
+ "## Failure summary (agentskeptic)",
368
+ "",
369
+ "- trust_decision: unknown",
370
+ `- summary: ${fd.summary}`,
371
+ `- actionable_failure: category=${af.category} severity=${af.severity} recommended_action=${af.recommendedAction} automation_safe=${af.automationSafe}`,
372
+ "- primary_codes: _(operational)_",
373
+ `- rerun_guidance: ${String(envelope.message)}`,
374
+ "- source: operational",
375
+ "",
376
+ ].join("\n");
377
+ }
378
+
379
+ /**
380
+ * @param {string} line
381
+ */
382
+ function tryParseCliErrorEnvelopeLine(line) {
383
+ const s = String(line).trim();
384
+ if (!s.startsWith("{")) return null;
385
+ try {
386
+ const o = JSON.parse(s);
387
+ if (
388
+ o &&
389
+ typeof o === "object" &&
390
+ o.schemaVersion === 2 &&
391
+ o.kind === "execution_truth_layer_error" &&
392
+ o.failureDiagnosis &&
393
+ typeof o.failureDiagnosis === "object" &&
394
+ o.failureDiagnosis.actionableFailure
395
+ ) {
396
+ return o;
397
+ }
398
+ } catch {
399
+ /* ignore */
400
+ }
401
+ return null;
402
+ }
403
+
404
+ /**
405
+ * @param {string} stderrText
406
+ * @returns {string[]}
407
+ */
408
+ function extractOperationalFailureMarkdownFromStderr(stderrText) {
409
+ const out = [];
410
+ for (const line of String(stderrText).split(/\r?\n/)) {
411
+ const env = tryParseCliErrorEnvelopeLine(line);
412
+ if (env) out.push(projectCliEnvelopeToCiMarkdown(env));
413
+ }
414
+ return out;
415
+ }
416
+
417
+ /**
418
+ * Assemble PR body: header → optional stdout oversize note → failure summary (certificate spine and/or operational stderr) → stderr → footer → marker.
419
+ * Truncates stderr tail lines from the front until UTF-8 length ≤ max (failure summary retained).
316
420
  *
317
421
  * @param {Record<string, unknown>} payload
318
422
  * @param {{ stderrText: string; workflowStdoutText: string }} capture
@@ -329,12 +433,27 @@ ${String(payload.identityOneLiner)}
329
433
 
330
434
  `;
331
435
 
332
- const verdictTrim = String(workflowStdoutText).trim();
333
- const oneLine =
334
- verdictTrim.length > 0 ? verdictTrim.split("\n")[0].slice(0, 500) : "";
335
- const verdictSection = oneLine
336
- ? ["## Verification stdout (first line)", "", "```", oneLine, "```", ""].join("\n")
337
- : "";
436
+ const ext = extractFailureSummaryFromStdout(workflowStdoutText);
437
+ const operationalBlocks = extractOperationalFailureMarkdownFromStderr(stderrText);
438
+
439
+ let oversizedNote = "";
440
+ if (ext.oversized) {
441
+ oversizedNote = `_(stdout exceeded 262144 UTF-8 bytes; failure summary skipped)_\n\n`;
442
+ }
443
+
444
+ const failureParts = [];
445
+ if ("ok" in ext && ext.ok) failureParts.push(renderFailureSummaryMarkdownFromSpine(ext.spine));
446
+ failureParts.push(...operationalBlocks);
447
+
448
+ const failureSummaryBlock = failureParts.length > 0 ? failureParts.join("\n") : "";
449
+
450
+ let unparsedStdoutBlock = "";
451
+ if (ext.malformed) {
452
+ const rawOut = String(workflowStdoutText).trim();
453
+ if (rawOut.length > 0) {
454
+ unparsedStdoutBlock = `## Verification stdout (unparsed)\n\n\`\`\`text\n${rawOut}\n\`\`\`\n\n`;
455
+ }
456
+ }
338
457
 
339
458
  let stderrBlock = formatStderrBlock(stderrText);
340
459
 
@@ -350,12 +469,16 @@ ${String(payload.identityOneLiner)}
350
469
  "",
351
470
  ].join("\n");
352
471
 
353
- function assemble(verdict, sb) {
354
- const raw = header + verdict + sb + footer;
472
+ function assemble(middle) {
473
+ const raw = header + middle + footer;
355
474
  return normalizeDiscoveryText(raw);
356
475
  }
357
476
 
358
- let body = assemble(verdictSection, stderrBlock);
477
+ function middleFrom(stderrBlk) {
478
+ return oversizedNote + failureSummaryBlock + unparsedStdoutBlock + stderrBlk;
479
+ }
480
+
481
+ let body = assemble(middleFrom(stderrBlock));
359
482
  if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) {
360
483
  return body;
361
484
  }
@@ -367,19 +490,14 @@ ${String(payload.identityOneLiner)}
367
490
  stderrBlock = inner
368
491
  ? `## CLI stderr (last ${STDERR_TAIL_LINES} lines)\n\n\`\`\`text\n${inner}\n\`\`\`\n`
369
492
  : "## CLI stderr (last 20 lines)\n\n_(no stderr)_\n";
370
- body = assemble(verdictSection, stderrBlock);
493
+ body = assemble(middleFrom(stderrBlock));
371
494
  if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
372
495
  }
373
496
 
374
497
  stderrBlock = "## CLI stderr (last 20 lines)\n\n_(no stderr)_\n";
375
- body = assemble(verdictSection, stderrBlock);
498
+ body = assemble(middleFrom(stderrBlock));
376
499
  if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
377
500
 
378
- if (verdictSection) {
379
- body = assemble("", stderrBlock);
380
- if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
381
- }
382
-
383
501
  throw new Error(
384
502
  `discovery-payload: PR body still exceeds ${MAX_PR_BODY_UTF8_BYTES} bytes after truncation`,
385
503
  );
@@ -408,6 +526,7 @@ module.exports = {
408
526
  PR_MARKER_LINE_LEGACY,
409
527
  MAX_SUMMARY_UTF8_BYTES,
410
528
  MAX_PR_BODY_UTF8_BYTES,
529
+ MAX_STDOUT_PARSE_BYTES,
411
530
  STDERR_TAIL_LINES,
412
531
  buildDiscoveryPayload,
413
532
  normalizeDiscoveryText,
@@ -417,6 +536,9 @@ module.exports = {
417
536
  renderLlmsTextFromPayload,
418
537
  renderCiSummaryMarkdownFromPayload,
419
538
  renderCiPrBodyFromPayload,
539
+ extractFailureSummaryFromStdout,
540
+ renderFailureSummaryMarkdownFromSpine,
541
+ projectCliEnvelopeToCiMarkdown,
420
542
  parseGithubRepoFromUrl,
421
543
  selectPrCommentUpsert,
422
544
  };