agentskeptic 5.0.0 → 6.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/actionableFailure.remediationExhaustive.test.js +1 -1
  2. package/dist/bootstrap/runBootstrapSubcommand.js +1 -1
  3. package/dist/cli.js +1 -1
  4. package/dist/commercial/trustDecisionRecord.schema.test.js +23 -3
  5. package/dist/commercial/trustDecisionRecord.schema.test.js.map +1 -1
  6. package/dist/compare.acceptance.test.js +1 -1
  7. package/dist/decisionEvidenceBundle/validateDecisionEvidenceBundle.js +1 -1
  8. package/dist/decisionEvidenceBundle/writeDecisionEvidenceBundle.js +1 -1
  9. package/dist/decisionEvidenceBundle.test.js +40 -3
  10. package/dist/decisionEvidenceBundle.test.js.map +1 -1
  11. package/dist/decisionGate.js +1 -1
  12. package/dist/evidenceCompleteness.d.ts.map +1 -1
  13. package/dist/evidenceCompleteness.js +2 -24
  14. package/dist/evidenceCompleteness.js.map +1 -1
  15. package/dist/execution-identity.v1.json +1 -1
  16. package/dist/failureSpine.d.ts +37 -0
  17. package/dist/failureSpine.d.ts.map +1 -0
  18. package/dist/failureSpine.js +218 -0
  19. package/dist/failureSpine.js.map +1 -0
  20. package/dist/failureSpine.test.d.ts +2 -0
  21. package/dist/failureSpine.test.d.ts.map +1 -0
  22. package/dist/failureSpine.test.js +28 -0
  23. package/dist/failureSpine.test.js.map +1 -0
  24. package/dist/formatFailureSpineHuman.d.ts +4 -0
  25. package/dist/formatFailureSpineHuman.d.ts.map +1 -0
  26. package/dist/formatFailureSpineHuman.js +18 -0
  27. package/dist/formatFailureSpineHuman.js.map +1 -0
  28. package/dist/governanceEvidence.test.js +21 -2
  29. package/dist/governanceEvidence.test.js.map +1 -1
  30. package/dist/index.d.ts +6 -2
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +4 -1
  33. package/dist/index.js.map +1 -1
  34. package/dist/langGraphCheckpointTrustGate.js +1 -1
  35. package/dist/langGraphCheckpointTrustIneligibleCertificate.js +1 -1
  36. package/dist/outcomeCertificate.d.ts +16 -12
  37. package/dist/outcomeCertificate.d.ts.map +1 -1
  38. package/dist/outcomeCertificate.js +50 -10
  39. package/dist/outcomeCertificate.js.map +1 -1
  40. package/dist/outcomeCertificate.test.js +1 -1
  41. package/dist/publicDistribution.generated.d.ts +1 -1
  42. package/dist/publicDistribution.generated.js +1 -1
  43. package/dist/quickVerify/quickParamPointerNegatives.test.js +5 -2
  44. package/dist/quickVerify/quickParamPointerNegatives.test.js.map +1 -1
  45. package/dist/quickVerify/quickParamPointerPromotionGoldens.test.js +7 -4
  46. package/dist/quickVerify/quickParamPointerPromotionGoldens.test.js.map +1 -1
  47. package/dist/remediationConsumptionGuard.test.js +2 -0
  48. package/dist/remediationConsumptionGuard.test.js.map +1 -1
  49. package/dist/remediationMessage.d.ts +6 -0
  50. package/dist/remediationMessage.d.ts.map +1 -0
  51. package/dist/remediationMessage.js +30 -0
  52. package/dist/remediationMessage.js.map +1 -0
  53. package/dist/remediationWireSurfaceGuard.test.js +2 -0
  54. package/dist/remediationWireSurfaceGuard.test.js.map +1 -1
  55. package/dist/schemaLoad.d.ts +1 -1
  56. package/dist/schemaLoad.d.ts.map +1 -1
  57. package/dist/schemaLoad.js +13 -3
  58. package/dist/schemaLoad.js.map +1 -1
  59. package/dist/shareReport/postPublicVerificationReport.d.ts +3 -3
  60. package/dist/shareReport/postPublicVerificationReport.js +1 -1
  61. package/dist/shareReport/postPublicVerificationReport.js.map +1 -1
  62. package/dist/standardVerifyWorkflowCli.d.ts +7 -7
  63. package/dist/standardVerifyWorkflowCli.js +3 -3
  64. package/dist/trustDecision.d.ts +7 -2
  65. package/dist/trustDecision.d.ts.map +1 -1
  66. package/dist/trustDecision.js +14 -7
  67. package/dist/trustDecision.js.map +1 -1
  68. package/dist/verify/batchVerifyTelemetrySubcommand.js +2 -2
  69. package/package.json +1 -1
  70. package/schemas/failure-spine-v1.schema.json +38 -0
  71. package/schemas/openapi-commercial-v1.in.yaml +1 -1
  72. package/schemas/openapi-commercial-v1.yaml +2 -2
  73. package/schemas/outcome-certificate-v3.schema.json +97 -0
  74. package/schemas/public-verification-report-v3.schema.json +1 -1
  75. package/schemas/regression-artifact-v1.schema.json +1 -1
  76. package/scripts/discovery-payload.lib.cjs +140 -18
  77. package/scripts/emit-primary-marketing.cjs +36 -0
@@ -0,0 +1,97 @@
1
+ {
2
+ "$id": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json",
3
+ "title": "OutcomeCertificateV3",
4
+ "type": "object",
5
+ "additionalProperties": false,
6
+ "required": [
7
+ "schemaVersion",
8
+ "workflowId",
9
+ "runKind",
10
+ "stateRelation",
11
+ "highStakesReliance",
12
+ "relianceRationale",
13
+ "intentSummary",
14
+ "explanation",
15
+ "steps",
16
+ "humanReport",
17
+ "evidenceCompleteness",
18
+ "failureSpine"
19
+ ],
20
+ "properties": {
21
+ "schemaVersion": { "type": "integer", "const": 3 },
22
+ "workflowId": { "type": "string", "minLength": 1, "maxLength": 512 },
23
+ "runKind": {
24
+ "type": "string",
25
+ "enum": ["contract_sql", "contract_sql_langgraph_checkpoint_trust", "quick_preview"]
26
+ },
27
+ "stateRelation": {
28
+ "type": "string",
29
+ "enum": ["matches_expectations", "does_not_match", "not_established"]
30
+ },
31
+ "highStakesReliance": { "type": "string", "enum": ["permitted", "prohibited"] },
32
+ "relianceRationale": { "type": "string", "minLength": 1, "maxLength": 8192 },
33
+ "intentSummary": { "type": "string", "minLength": 1, "maxLength": 8192 },
34
+ "explanation": {
35
+ "type": "object",
36
+ "additionalProperties": false,
37
+ "required": ["headline", "details"],
38
+ "properties": {
39
+ "headline": { "type": "string", "minLength": 1, "maxLength": 2048 },
40
+ "details": {
41
+ "type": "array",
42
+ "items": {
43
+ "type": "object",
44
+ "additionalProperties": false,
45
+ "required": ["code", "message"],
46
+ "properties": {
47
+ "code": { "type": "string", "minLength": 1, "maxLength": 256 },
48
+ "message": { "type": "string", "minLength": 1, "maxLength": 4096 }
49
+ }
50
+ }
51
+ }
52
+ }
53
+ },
54
+ "steps": {
55
+ "type": "array",
56
+ "items": {
57
+ "type": "object",
58
+ "additionalProperties": false,
59
+ "required": ["seq", "declaredAction", "expectedOutcome", "observedOutcome"],
60
+ "properties": {
61
+ "seq": { "type": "integer", "minimum": 0 },
62
+ "toolId": { "type": "string", "maxLength": 512 },
63
+ "declaredAction": { "type": "string", "minLength": 1, "maxLength": 4096 },
64
+ "expectedOutcome": { "type": "string", "minLength": 1, "maxLength": 4096 },
65
+ "observedOutcome": { "type": "string", "minLength": 1, "maxLength": 8192 }
66
+ }
67
+ }
68
+ },
69
+ "humanReport": { "type": "string", "minLength": 1, "maxLength": 1048576 },
70
+ "checkpointVerdicts": {
71
+ "type": "array",
72
+ "items": {
73
+ "type": "object",
74
+ "additionalProperties": false,
75
+ "required": ["checkpointKey", "verdict", "seqs", "productionMeaning"],
76
+ "properties": {
77
+ "checkpointKey": { "type": "string", "minLength": 1, "maxLength": 2048 },
78
+ "verdict": {
79
+ "type": "string",
80
+ "enum": ["verified", "inconsistent", "incomplete"]
81
+ },
82
+ "seqs": {
83
+ "type": "array",
84
+ "items": { "type": "integer", "minimum": 0 }
85
+ },
86
+ "productionMeaning": { "type": "string", "minLength": 1, "maxLength": 8192 }
87
+ }
88
+ }
89
+ },
90
+ "evidenceCompleteness": {
91
+ "$ref": "https://agentskeptic.com/schemas/evidence-completeness-v1.schema.json"
92
+ },
93
+ "failureSpine": {
94
+ "$ref": "https://agentskeptic.com/schemas/failure-spine-v1.schema.json"
95
+ }
96
+ }
97
+ }
@@ -7,7 +7,7 @@
7
7
  "properties": {
8
8
  "schemaVersion": { "type": "integer", "const": 3 },
9
9
  "certificate": {
10
- "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v2.schema.json"
10
+ "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json"
11
11
  },
12
12
  "cliVersion": { "type": "string", "maxLength": 128 },
13
13
  "createdFrom": { "type": "string", "maxLength": 256 }
@@ -82,7 +82,7 @@
82
82
  "enum": ["contract_sql", "contract_sql_langgraph_checkpoint_trust"]
83
83
  },
84
84
  "certificateCanonicalDigest": { "type": "string", "pattern": "^[a-f0-9]{64}$" },
85
- "certificate": { "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v2.schema.json" }
85
+ "certificate": { "$ref": "https://agentskeptic.com/schemas/outcome-certificate-v3.schema.json" }
86
86
  }
87
87
  }
88
88
  },
@@ -27,6 +27,9 @@ const MAX_SUMMARY_UTF8_BYTES = 65536;
27
27
  const MAX_PR_BODY_UTF8_BYTES = 10240;
28
28
  const STDERR_TAIL_LINES = 20;
29
29
 
30
+ /** Max UTF-8 bytes of stdout parsed for Outcome Certificate JSON (`failureSpine` extraction). */
31
+ const MAX_STDOUT_PARSE_BYTES = 262144;
32
+
30
33
  const REPO_ROOT = join(__dirname, "..");
31
34
  const README_ADOPTION_START = "<!-- adoption-canonical:start -->";
32
35
  const README_ADOPTION_END = "<!-- adoption-canonical:end -->";
@@ -311,8 +314,109 @@ function formatStderrBlock(stderrText) {
311
314
  }
312
315
 
313
316
  /**
314
- * Assemble PR body: header optional verdict stderr footer → marker.
315
- * Truncates stderr block from the start only until UTF-8 length ≤ max.
317
+ * Parse workflow stdout for a single-line/single-object Outcome Certificate JSON and extract `failureSpine`.
318
+ * @param {string} stdoutText
319
+ * @returns {{ ok: true; spine: Record<string, unknown> } | { malformed: true } | { oversized: true }}
320
+ */
321
+ function extractFailureSummaryFromStdout(stdoutText) {
322
+ const t = String(stdoutText ?? "").trim();
323
+ if (t.length === 0) return { malformed: true };
324
+ if (utf8ByteLength(t) > MAX_STDOUT_PARSE_BYTES) return { oversized: true };
325
+ let obj;
326
+ try {
327
+ obj = JSON.parse(t);
328
+ } catch {
329
+ return { malformed: true };
330
+ }
331
+ if (!obj || typeof obj !== "object") return { malformed: true };
332
+ const spine = obj.failureSpine;
333
+ if (!spine || typeof spine !== "object") return { malformed: true };
334
+ return { ok: true, spine };
335
+ }
336
+
337
+ /**
338
+ * @param {Record<string, unknown>} spine
339
+ */
340
+ function renderFailureSummaryMarkdownFromSpine(spine) {
341
+ const af = /** @type {{ category: string; severity: string; recommendedAction: string; automationSafe: boolean }} */ (
342
+ spine.actionableFailure
343
+ );
344
+ const codes = Array.isArray(spine.primaryCodes) ? spine.primaryCodes.join(",") : "";
345
+ return [
346
+ "## Failure summary (agentskeptic)",
347
+ "",
348
+ `- trust_decision: ${spine.trustDecision}`,
349
+ `- summary: ${spine.summary}`,
350
+ `- actionable_failure: category=${af.category} severity=${af.severity} recommended_action=${af.recommendedAction} automation_safe=${af.automationSafe}`,
351
+ `- primary_codes: ${codes}`,
352
+ `- rerun_guidance: ${spine.rerunGuidance}`,
353
+ `- source: ${spine.source}`,
354
+ "",
355
+ ].join("\n");
356
+ }
357
+
358
+ /**
359
+ * @param {Record<string, unknown>} envelope — cli-error-envelope JSON
360
+ */
361
+ function projectCliEnvelopeToCiMarkdown(envelope) {
362
+ const fd = /** @type {{ summary: string; actionableFailure: { category: string; severity: string; recommendedAction: string; automationSafe: boolean } }} */ (
363
+ envelope.failureDiagnosis
364
+ );
365
+ const af = fd.actionableFailure;
366
+ return [
367
+ "## Failure summary (agentskeptic)",
368
+ "",
369
+ "- trust_decision: unknown",
370
+ `- summary: ${fd.summary}`,
371
+ `- actionable_failure: category=${af.category} severity=${af.severity} recommended_action=${af.recommendedAction} automation_safe=${af.automationSafe}`,
372
+ "- primary_codes: _(operational)_",
373
+ `- rerun_guidance: ${String(envelope.message)}`,
374
+ "- source: operational",
375
+ "",
376
+ ].join("\n");
377
+ }
378
+
379
+ /**
380
+ * @param {string} line
381
+ */
382
+ function tryParseCliErrorEnvelopeLine(line) {
383
+ const s = String(line).trim();
384
+ if (!s.startsWith("{")) return null;
385
+ try {
386
+ const o = JSON.parse(s);
387
+ if (
388
+ o &&
389
+ typeof o === "object" &&
390
+ o.schemaVersion === 2 &&
391
+ o.kind === "execution_truth_layer_error" &&
392
+ o.failureDiagnosis &&
393
+ typeof o.failureDiagnosis === "object" &&
394
+ o.failureDiagnosis.actionableFailure
395
+ ) {
396
+ return o;
397
+ }
398
+ } catch {
399
+ /* ignore */
400
+ }
401
+ return null;
402
+ }
403
+
404
+ /**
405
+ * @param {string} stderrText
406
+ * @returns {string[]}
407
+ */
408
+ function extractOperationalFailureMarkdownFromStderr(stderrText) {
409
+ const out = [];
410
+ for (const line of String(stderrText).split(/\r?\n/)) {
411
+ const env = tryParseCliErrorEnvelopeLine(line);
412
+ if (env) out.push(projectCliEnvelopeToCiMarkdown(env));
413
+ }
414
+ return out;
415
+ }
416
+
417
+ /**
418
+ * Assemble PR body: header → optional stdout oversize note → failure summary (certificate spine and/or operational stderr) → stderr → footer → marker.
419
+ * Truncates stderr tail lines from the front until UTF-8 length ≤ max (failure summary retained).
316
420
  *
317
421
  * @param {Record<string, unknown>} payload
318
422
  * @param {{ stderrText: string; workflowStdoutText: string }} capture
@@ -329,12 +433,27 @@ ${String(payload.identityOneLiner)}
329
433
 
330
434
  `;
331
435
 
332
- const verdictTrim = String(workflowStdoutText).trim();
333
- const oneLine =
334
- verdictTrim.length > 0 ? verdictTrim.split("\n")[0].slice(0, 500) : "";
335
- const verdictSection = oneLine
336
- ? ["## Verification stdout (first line)", "", "```", oneLine, "```", ""].join("\n")
337
- : "";
436
+ const ext = extractFailureSummaryFromStdout(workflowStdoutText);
437
+ const operationalBlocks = extractOperationalFailureMarkdownFromStderr(stderrText);
438
+
439
+ let oversizedNote = "";
440
+ if (ext.oversized) {
441
+ oversizedNote = `_(stdout exceeded 262144 UTF-8 bytes; failure summary skipped)_\n\n`;
442
+ }
443
+
444
+ const failureParts = [];
445
+ if ("ok" in ext && ext.ok) failureParts.push(renderFailureSummaryMarkdownFromSpine(ext.spine));
446
+ failureParts.push(...operationalBlocks);
447
+
448
+ const failureSummaryBlock = failureParts.length > 0 ? failureParts.join("\n") : "";
449
+
450
+ let unparsedStdoutBlock = "";
451
+ if (ext.malformed) {
452
+ const rawOut = String(workflowStdoutText).trim();
453
+ if (rawOut.length > 0) {
454
+ unparsedStdoutBlock = `## Verification stdout (unparsed)\n\n\`\`\`text\n${rawOut}\n\`\`\`\n\n`;
455
+ }
456
+ }
338
457
 
339
458
  let stderrBlock = formatStderrBlock(stderrText);
340
459
 
@@ -350,12 +469,16 @@ ${String(payload.identityOneLiner)}
350
469
  "",
351
470
  ].join("\n");
352
471
 
353
- function assemble(verdict, sb) {
354
- const raw = header + verdict + sb + footer;
472
+ function assemble(middle) {
473
+ const raw = header + middle + footer;
355
474
  return normalizeDiscoveryText(raw);
356
475
  }
357
476
 
358
- let body = assemble(verdictSection, stderrBlock);
477
+ function middleFrom(stderrBlk) {
478
+ return oversizedNote + failureSummaryBlock + unparsedStdoutBlock + stderrBlk;
479
+ }
480
+
481
+ let body = assemble(middleFrom(stderrBlock));
359
482
  if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) {
360
483
  return body;
361
484
  }
@@ -367,19 +490,14 @@ ${String(payload.identityOneLiner)}
367
490
  stderrBlock = inner
368
491
  ? `## CLI stderr (last ${STDERR_TAIL_LINES} lines)\n\n\`\`\`text\n${inner}\n\`\`\`\n`
369
492
  : "## CLI stderr (last 20 lines)\n\n_(no stderr)_\n";
370
- body = assemble(verdictSection, stderrBlock);
493
+ body = assemble(middleFrom(stderrBlock));
371
494
  if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
372
495
  }
373
496
 
374
497
  stderrBlock = "## CLI stderr (last 20 lines)\n\n_(no stderr)_\n";
375
- body = assemble(verdictSection, stderrBlock);
498
+ body = assemble(middleFrom(stderrBlock));
376
499
  if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
377
500
 
378
- if (verdictSection) {
379
- body = assemble("", stderrBlock);
380
- if (utf8ByteLength(body) <= MAX_PR_BODY_UTF8_BYTES) return body;
381
- }
382
-
383
501
  throw new Error(
384
502
  `discovery-payload: PR body still exceeds ${MAX_PR_BODY_UTF8_BYTES} bytes after truncation`,
385
503
  );
@@ -408,6 +526,7 @@ module.exports = {
408
526
  PR_MARKER_LINE_LEGACY,
409
527
  MAX_SUMMARY_UTF8_BYTES,
410
528
  MAX_PR_BODY_UTF8_BYTES,
529
+ MAX_STDOUT_PARSE_BYTES,
411
530
  STDERR_TAIL_LINES,
412
531
  buildDiscoveryPayload,
413
532
  normalizeDiscoveryText,
@@ -417,6 +536,9 @@ module.exports = {
417
536
  renderLlmsTextFromPayload,
418
537
  renderCiSummaryMarkdownFromPayload,
419
538
  renderCiPrBodyFromPayload,
539
+ extractFailureSummaryFromStdout,
540
+ renderFailureSummaryMarkdownFromSpine,
541
+ projectCliEnvelopeToCiMarkdown,
420
542
  parseGithubRepoFromUrl,
421
543
  selectPrCommentUpsert,
422
544
  };
@@ -178,6 +178,42 @@ Normative **public distribution** and anchor sync: [\`docs/public-distribution.m
178
178
  - Verification Contract Manifest (repo raw): ${contractRaw}
179
179
  - Acquisition page (canonical): ${acquisitionUrl}
180
180
  - CI regeneration + drift pathspecs: [\`schemas/ci/verification-truth.manifest.json\`](schemas/ci/verification-truth.manifest.json) (validated by [\`test/verification-truth.closed-drift.contract.test.mjs\`](test/verification-truth.closed-drift.contract.test.mjs))
181
+
182
+ ## Cursor Cloud specific instructions
183
+
184
+ ### Architecture
185
+
186
+ This is an npm workspace monorepo with two packages:
187
+ - **Root** (\`agentskeptic\`): Core verification library + CLI. Framework-agnostic Node.js, ESM, TypeScript.
188
+ - **\`website/\`** (\`agentskeptic-web\`): Next.js 16 commercial SaaS app (Auth, Stripe, API keys, verification demo).
189
+
190
+ ### Prerequisites
191
+
192
+ - **Node.js 22.x** (required by \`engines\` field). Install via nvm: \`nvm install 22\`.
193
+ - **PostgreSQL 16** with two databases: \`wfv_website\` (core) and \`wfv_telemetry\` (telemetry).
194
+ - The env file at \`website/.env\` (gitignored) must have \`DATABASE_URL\` and \`TELEMETRY_DATABASE_URL\` pointing to local Postgres. See \`website/.env.example\` for all variables.
195
+
196
+ ### Common commands
197
+
198
+ | Task | Command | Notes |
199
+ |------|---------|-------|
200
+ | Install deps | \`npm install\` (repo root) | Installs root + website workspace |
201
+ | Build core | \`npm run build\` | TypeScript compile + asset generation |
202
+ | Run demo | \`npm start\` | Builds then runs bundled wf_complete/wf_missing verification |
203
+ | Website dev | \`npm run dev\` (repo root) | Delegates to \`next dev --turbopack\` in website workspace |
204
+ | Core vitest | \`npx vitest run\` (repo root) | Runs \`src/**/*.test.ts\` and \`test/**/*.test.ts\` |
205
+ | SQLite tests | \`npm run test:node:sqlite\` | Builds first, then runs node:test suite (fast, no Postgres) |
206
+ | Website vitest | \`npm run test:vitest -w agentskeptic-web\` | Needs \`DATABASE_URL\` + \`TELEMETRY_DATABASE_URL\` |
207
+ | Full CI gate | \`npm test\` (or \`npm run verification:truth\`) | Requires Postgres; see \`docs/testing.md\` |
208
+ | DB migrate | \`npm run db:migrate\` / \`npm run db:migrate:telemetry\` (from \`website/\`) | Requires \`DATABASE_URL\`/\`TELEMETRY_DATABASE_URL\` in env or \`website/.env\` |
209
+
210
+ ### Gotchas
211
+
212
+ - \`npm run build\` must complete before the CLI (\`node dist/cli.js\`) or website demo API (\`/api/verify\`) work.
213
+ - The website migration scripts (\`db-migrate.mjs\`) load \`website/.env\` but only for keys not already in \`process.env\`. If env vars are not exported in the shell, the \`.env\` file must exist.
214
+ - The commit hook (\`.husky/commit-msg\`) runs \`commitlint\` for Conventional Commits. Use \`--no-verify\` to skip if needed, but CI enforces the same rules on PRs.
215
+ - PostgreSQL must be running before migrations or website dev. Start with: \`pg_ctlcluster 16 main start\`.
216
+ - \`src/planTransition.test.ts\` has a known timeout-sensitive integration test that may flake in resource-constrained environments. This is pre-existing, not a setup issue.
181
217
  `;
182
218
  writeFileSync(join(ROOT, "AGENTS.md"), body, "utf8");
183
219
  }