@sanity/ailf 5.0.0 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +276 -0
  2. package/config/bigquery/views/synthesis_parse_failure_rate_7d.sql +42 -0
  3. package/config/diagnosis-cards.ts +318 -0
  4. package/config/models.ts +12 -0
  5. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
  6. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
  7. package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
  8. package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
  9. package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
  10. package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
  11. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
  12. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
  13. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
  14. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
  15. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
  16. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
  17. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  18. package/dist/_vendor/ailf-core/index.js +4 -0
  19. package/dist/_vendor/ailf-core/ports/context.d.ts +12 -0
  20. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -0
  21. package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -0
  22. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
  23. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
  24. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
  25. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
  26. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
  27. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +230 -0
  28. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
  29. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
  30. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
  31. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
  32. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
  33. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +108 -0
  34. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
  35. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +140 -0
  36. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +49 -0
  37. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +65 -0
  38. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
  39. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +93 -0
  40. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
  41. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
  42. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
  43. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +130 -0
  44. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
  45. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +111 -0
  46. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
  47. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +118 -0
  48. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
  49. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +286 -0
  50. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
  51. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
  52. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
  53. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
  54. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
  55. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
  56. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
  57. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
  58. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
  59. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
  60. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +18 -0
  61. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +74 -0
  62. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +10 -0
  63. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +10 -0
  64. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +119 -2
  65. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +136 -2
  66. package/dist/_vendor/ailf-core/services/index.d.ts +5 -1
  67. package/dist/_vendor/ailf-core/services/index.js +15 -2
  68. package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
  69. package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
  70. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +115 -10
  71. package/dist/_vendor/ailf-core/types/diagnosis.js +3 -1
  72. package/dist/_vendor/ailf-core/types/index.d.ts +8 -1
  73. package/dist/_vendor/ailf-core/types/repo-config.d.ts +16 -0
  74. package/dist/_vendor/ailf-core/types/synthesis-telemetry.d.ts +101 -0
  75. package/dist/_vendor/ailf-core/types/synthesis-telemetry.js +18 -0
  76. package/dist/adapters/config-sources/file-config-adapter.js +8 -6
  77. package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
  78. package/dist/adapters/llm/fake-llm-client.js +38 -1
  79. package/dist/adapters/llm/index.d.ts +1 -1
  80. package/dist/adapters/llm/index.js +1 -1
  81. package/dist/adapters/llm/openai-llm-client.js +59 -5
  82. package/dist/adapters/llm/retry.d.ts +18 -0
  83. package/dist/adapters/llm/retry.js +21 -0
  84. package/dist/adapters/synthesis/synthesis-telemetry-schema.d.ts +49 -0
  85. package/dist/adapters/synthesis/synthesis-telemetry-schema.js +55 -0
  86. package/dist/adapters/task-sources/content-lake-task-source.js +10 -5
  87. package/dist/adapters/task-sources/repo-schemas.d.ts +7 -0
  88. package/dist/adapters/task-sources/repo-schemas.js +10 -0
  89. package/dist/cli-program.js +3 -0
  90. package/dist/commands/interpret.d.ts +70 -0
  91. package/dist/commands/interpret.js +221 -0
  92. package/dist/commands/pipeline-action.d.ts +44 -0
  93. package/dist/commands/pipeline-action.js +193 -1
  94. package/dist/commands/run.d.ts +2 -0
  95. package/dist/commands/run.js +2 -0
  96. package/dist/composition-root.d.ts +21 -23
  97. package/dist/composition-root.js +107 -41
  98. package/dist/config/diagnosis-cards.ts +318 -0
  99. package/dist/config/models.ts +12 -0
  100. package/dist/grader/agent-harness.d.ts +5 -10
  101. package/dist/grader/agent-harness.js +5 -13
  102. package/dist/grader/common.d.ts +5 -13
  103. package/dist/grader/common.js +5 -17
  104. package/dist/grader/index.d.ts +15 -29
  105. package/dist/grader/index.js +15 -66
  106. package/dist/grader/knowledge-probe.d.ts +5 -10
  107. package/dist/grader/knowledge-probe.js +5 -14
  108. package/dist/grader/literacy.d.ts +5 -9
  109. package/dist/grader/literacy.js +5 -13
  110. package/dist/grader/mcp.d.ts +5 -10
  111. package/dist/grader/mcp.js +5 -14
  112. package/dist/orchestration/pipeline-orchestrator.js +3 -0
  113. package/dist/report-store.d.ts +26 -0
  114. package/dist/report-store.js +63 -0
  115. package/package.json +2 -2
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Synthesis cost telemetry types — canonical TS-first shapes for
3
+ * Phase 6 DIAG-06 cost and parse-failure observability.
4
+ *
5
+ * These interfaces are authored independently of their Zod adapter schema
6
+ * (Plan 06-02) per D0045: the Zod schema declares
7
+ * `satisfies z.ZodType<SynthesisCostTelemetry>` against this independent
8
+ * type so drift is a build error, not a runtime bug.
9
+ *
10
+ * The 14 attribute paths on `SynthesisCostTelemetry` + `SynthesisPerCardTelemetry`
11
+ * land on the `ailf.report` Sanity doc under `summary.synthesis.diagnosis.*`
12
+ * (D6-09). No new sibling doc type (D0033 / D6-09).
13
+ *
14
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
15
+ * @see .planning/phases/06-post-run-integration-cost-telemetry/06-CONTEXT.md §D6-09
16
+ * @see .planning/phases/06-post-run-integration-cost-telemetry/06-CONTEXT.md §D6-12
17
+ */
18
+ export {};
@@ -115,12 +115,10 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
115
115
  compareBaseline: config.compareBaseline,
116
116
  gapAnalysisEnabled: config.execution?.gapAnalysis ?? true,
117
117
  // W0077 Phase 4 — `publish` is now a policy object. Map the auto value
118
- // directly to a boolean for the file-config path; the runtime
119
- // smart-default logic in pipeline-action.ts isn't relevant here because
120
- // the user has explicitly handed us a config file.
121
- publishEnabled: config.publish?.auto === "never"
122
- ? false
123
- : config.publish?.auto !== undefined,
118
+ // to a boolean for the file-config path. Absence of publish.auto mirrors
119
+ // the CLI's "full-runs" default (enable publish; composition root gates on
120
+ // token availability). Only "never" explicitly disables auto-publish.
121
+ publishEnabled: config.publish?.auto !== "never",
124
122
  publishTag: config.publish?.tag,
125
123
  noCache: config.noCache ?? false,
126
124
  noRemoteCache: config.noRemoteCache ?? false,
@@ -150,5 +148,9 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
150
148
  ? resolve(rootDir, config.taskSource.repoTasksPath)
151
149
  : undefined,
152
150
  presets: config.presets,
151
+ // Phase 6 / DIAG-06 — thread summary.onRun into ResolvedConfig so the
152
+ // file-config exit branch in executePipeline can pass it to
153
+ // runPostPipelineHooks.
154
+ summaryOnRun: config.summary?.onRun,
153
155
  };
154
156
  }
@@ -40,9 +40,29 @@ export declare class FakeLLMClient implements LLMClient {
40
40
  readonly calls: FakeCallRecord[];
41
41
  private readonly completeQueue;
42
42
  private readonly structuredQueue;
43
+ /**
44
+ * Per-cardId keyed responses. A single-value entry is returned on every
45
+ * call for that cardId (repeated calls always get the same response). An
46
+ * array-value entry is consumed in order; once exhausted, calls for that
47
+ * cardId fall back to the FIFO structuredQueue.
48
+ *
49
+ * This is the substrate Plan 07's 17-fixture eval matrix uses to wire
50
+ * deterministic responses to specific LLM cards.
51
+ */
52
+ private readonly keyedResponses;
43
53
  constructor(args?: {
44
54
  completeResponses?: FakeCompletionResponse[];
45
55
  structuredResponses?: FakeStructuredResponse[];
56
+ /**
57
+ * Optional keyed-response map. Keys are `cardId` values from
58
+ * `args.context.cardId`. When a call matches a key the keyed entry is
59
+ * used instead of the FIFO queue.
60
+ *
61
+ * - Single-value entry: same response on every call for this cardId.
62
+ * - Array-value entry: entries consumed in insertion order; falls back
63
+ * to FIFO (or throws) when the array is exhausted.
64
+ */
65
+ keyedResponses?: Record<string, FakeStructuredResponse | FakeStructuredResponse[]>;
46
66
  });
47
67
  complete(args: LLMCompleteArgs): Promise<LLMCompletion>;
48
68
  completeStructured<T>(args: LLMCompleteStructuredArgs<T>): Promise<LLMStructuredCompletion<T>>;
@@ -11,9 +11,25 @@ export class FakeLLMClient {
11
11
  calls = [];
12
12
  completeQueue;
13
13
  structuredQueue;
14
+ /**
15
+ * Per-cardId keyed responses. A single-value entry is returned on every
16
+ * call for that cardId (repeated calls always get the same response). An
17
+ * array-value entry is consumed in order; once exhausted, calls for that
18
+ * cardId fall back to the FIFO structuredQueue.
19
+ *
20
+ * This is the substrate Plan 07's 17-fixture eval matrix uses to wire
21
+ * deterministic responses to specific LLM cards.
22
+ */
23
+ keyedResponses;
14
24
  constructor(args = {}) {
15
25
  this.completeQueue = [...(args.completeResponses ?? [])];
16
26
  this.structuredQueue = [...(args.structuredResponses ?? [])];
27
+ // Deep-copy arrays so the caller's fixture data is not mutated.
28
+ const keyed = {};
29
+ for (const [key, val] of Object.entries(args.keyedResponses ?? {})) {
30
+ keyed[key] = Array.isArray(val) ? [...val] : val;
31
+ }
32
+ this.keyedResponses = keyed;
17
33
  }
18
34
  async complete(args) {
19
35
  this.calls.push({
@@ -37,13 +53,34 @@ export class FakeLLMClient {
37
53
  };
38
54
  }
39
55
  async completeStructured(args) {
56
+ // Record every call first so test assertions on this.calls are never
57
+ // affected by which branch (keyed vs FIFO) handles the response.
40
58
  this.calls.push({
41
59
  kind: "completeStructured",
42
60
  model: args.model,
43
61
  prompt: args.prompt,
44
62
  ...(args.context ? { context: args.context } : {}),
45
63
  });
46
- const next = this.structuredQueue.shift();
64
+ let next;
65
+ const cardId = args.context?.cardId;
66
+ if (cardId !== undefined && cardId in this.keyedResponses) {
67
+ const entry = this.keyedResponses[cardId];
68
+ if (Array.isArray(entry)) {
69
+ // Array-value: consume one entry per call. When exhausted, fall
70
+ // through to the FIFO queue below.
71
+ if (entry.length > 0) {
72
+ next = entry.shift();
73
+ }
74
+ }
75
+ else {
76
+ // Single-value: return the same response on every call.
77
+ next = entry;
78
+ }
79
+ }
80
+ if (next === undefined) {
81
+ // FIFO fallback (existing behavior)
82
+ next = this.structuredQueue.shift();
83
+ }
47
84
  if (!next) {
48
85
  throw new Error("FakeLLMClient: no more queued structured responses (call exceeded queue)");
49
86
  }
@@ -5,5 +5,5 @@ export type { FakeCallRecord, FakeCompletionResponse, FakeStructuredResponse, }
5
5
  export { OpenAILLMClient } from "./openai-llm-client.js";
6
6
  export type { OpenAILLMClientOptions } from "./openai-llm-client.js";
7
7
  export type { ModelPricing } from "./pricing.js";
8
- export { DEFAULT_RETRY_POLICY, LLMHttpError, isRetryableStatus, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
8
+ export { DEFAULT_RETRY_POLICY, LLMHttpError, LLMParseError, isRetryableStatus, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
9
9
  export type { RetryPolicy } from "./retry.js";
@@ -1,4 +1,4 @@
1
1
  export { AnthropicLLMClient } from "./anthropic-llm-client.js";
2
2
  export { FakeLLMClient } from "./fake-llm-client.js";
3
3
  export { OpenAILLMClient } from "./openai-llm-client.js";
4
- export { DEFAULT_RETRY_POLICY, LLMHttpError, isRetryableStatus, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
4
+ export { DEFAULT_RETRY_POLICY, LLMHttpError, LLMParseError, isRetryableStatus, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
@@ -10,8 +10,9 @@
10
10
  * the adapter never reads `process.env`. The composition root maps env vars
11
11
  * to typed constructor args.
12
12
  */
13
+ import { z } from "zod";
13
14
  import { OpenAIChatResponseSchema, splitModelId, } from "../../_vendor/ailf-core/index.js";
14
- import { DEFAULT_RETRY_POLICY, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
15
+ import { DEFAULT_RETRY_POLICY, LLMParseError, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
15
16
  const DEFAULT_BASE_URL = "https://api.openai.com/v1/chat/completions";
16
17
  /**
17
18
  * Conservative defaults for the models in `packages/eval/config/models.ts`.
@@ -67,10 +68,25 @@ export class OpenAILLMClient {
67
68
  }
68
69
  async completeStructured(args) {
69
70
  const { modelName } = splitModelId(args.model);
71
+ // Derive the JSON Schema from the caller's Zod schema. Zod v4 natively
72
+ // emits `additionalProperties: false` on every nested z.object node —
73
+ // this is required for OpenAI strict-mode.
74
+ const jsonSchema = z.toJSONSchema(args.schema, { target: "draft-2020-12" });
75
+ // OpenAI strict-mode requires the root to be a plain object schema (no
76
+ // anyOf/oneOf/allOf at the top level). Discriminated unions produce
77
+ // anyOf at the root — callers must wrap them in a discriminator object.
78
+ assertSchemaIsObjectRoot(jsonSchema, args.model);
70
79
  const body = buildBody(modelName, args.prompt, {
71
- temperature: args.temperature,
72
- maxTokens: args.maxTokens,
73
- responseFormat: { type: "json_object" },
80
+ temperature: args.temperature ?? 0.1,
81
+ maxTokens: args.maxTokens ?? 2000,
82
+ responseFormat: {
83
+ type: "json_schema",
84
+ json_schema: {
85
+ name: args.context?.cardId ?? "structured_output",
86
+ schema: jsonSchema,
87
+ strict: true,
88
+ },
89
+ },
74
90
  });
75
91
  const data = await this.callApi(body);
76
92
  const raw = data.choices?.[0]?.message?.content;
@@ -82,8 +98,16 @@ export class OpenAILLMClient {
82
98
  parsed = JSON.parse(raw);
83
99
  }
84
100
  catch (err) {
85
- throw new Error(`OpenAI structured completion returned invalid JSON for model ${args.model}: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
101
+ // Sanitize: SyntaxError.message embeds a snippet at the failure offset,
102
+ // which can leak prompt text or user content echoed back by the model.
103
+ // Keep the raw body on the instance for callers that opt in via .raw,
104
+ // mirroring the LLMHttpError pattern (verified by the "does not leak
105
+ // the response body" test in openai-llm-client.test.ts).
106
+ throw new LLMParseError(`OpenAI structured completion returned invalid JSON for model ${args.model}`, raw, { cause: err });
86
107
  }
108
+ // strict:true guarantees a valid-against-the-schema JSON document, but
109
+ // the Zod parse is still load-bearing — it brands the result as T and is
110
+ // the only contract the engine trusts (D0045 parse-don't-validate).
87
111
  const value = args.schema.parse(parsed);
88
112
  const usage = extractUsage(data.usage);
89
113
  const cost = this.computeCost(modelName, usage);
@@ -145,6 +169,36 @@ export class OpenAILLMClient {
145
169
  `cost_usd=${cost.toFixed(6)}`);
146
170
  }
147
171
  }
172
+ /**
173
+ * Assert that the JSON Schema root is a plain object type.
174
+ *
175
+ * OpenAI strict-mode requires the root schema to be `{ type: "object" }`.
176
+ * A discriminated union (`z.union([...])`) produces `{ anyOf: [...] }` at
177
+ * the root — callers must wrap the union in a discriminator object before
178
+ * passing it to `completeStructured`.
179
+ *
180
+ * Per AI-SPEC §3 Pitfall 6 + T-05-03-01: caught at request-build time to
181
+ * avoid wasting API budget on a guaranteed 400.
182
+ */
183
+ function assertSchemaIsObjectRoot(schema, modelId) {
184
+ if (typeof schema !== "object" || schema === null) {
185
+ throw new Error(`OpenAILLMClient: OpenAI strict-mode requires a single z.object at the ` +
186
+ `schema root for model ${modelId}; got non-object JSON Schema root.`);
187
+ }
188
+ const node = schema;
189
+ if (node.type !== "object") {
190
+ // Identify the kind so the error message is actionable.
191
+ const kind = "anyOf" in node
192
+ ? "z.union"
193
+ : "oneOf" in node
194
+ ? "z.discriminatedUnion"
195
+ : "allOf" in node
196
+ ? "z.intersection"
197
+ : String(node.type ?? "unknown");
198
+ throw new Error(`OpenAILLMClient: OpenAI strict-mode requires a single z.object at the ` +
199
+ `schema root; got ${kind}. Wrap the union in a discriminator object.`);
200
+ }
201
+ }
148
202
  function buildBody(modelName, prompt, opts) {
149
203
  const body = {
150
204
  model: modelName,
@@ -33,6 +33,24 @@ export declare class LLMHttpError extends Error {
33
33
  readonly body: string;
34
34
  constructor(status: number, body: string, attempts: number);
35
35
  }
36
+ /**
37
+ * Sanitized error raised when an LLM adapter receives an HTTP-200 response
38
+ * whose body is not valid JSON. The raw response body (which may echo back
39
+ * user prompt content or even API-key fragments from prompts) is kept on the
40
+ * instance for callers that opt in via `.raw`, NOT in the message string.
41
+ *
42
+ * Mirrors the LLMHttpError pattern verified by the
43
+ * "does not leak the response body" test in openai-llm-client.test.ts.
44
+ */
45
+ export declare class LLMParseError extends Error {
46
+ /** Full raw response body (kept on the instance, NOT in `message`). */
47
+ readonly raw: string;
48
+ /** Byte length of `raw` — safe to include in the message. */
49
+ readonly rawLength: number;
50
+ constructor(message: string, raw: string, options?: {
51
+ cause?: unknown;
52
+ });
53
+ }
36
54
  export declare function isRetryableStatus(status: number): boolean;
37
55
  export interface RunWithRetryArgs<T> {
38
56
  policy: RetryPolicy;
@@ -29,6 +29,27 @@ export class LLMHttpError extends Error {
29
29
  this.body = body;
30
30
  }
31
31
  }
32
+ /**
33
+ * Sanitized error raised when an LLM adapter receives an HTTP-200 response
34
+ * whose body is not valid JSON. The raw response body (which may echo back
35
+ * user prompt content or even API-key fragments from prompts) is kept on the
36
+ * instance for callers that opt in via `.raw`, NOT in the message string.
37
+ *
38
+ * Mirrors the LLMHttpError pattern verified by the
39
+ * "does not leak the response body" test in openai-llm-client.test.ts.
40
+ */
41
+ export class LLMParseError extends Error {
42
+ /** Full raw response body (kept on the instance, NOT in `message`). */
43
+ raw;
44
+ /** Byte length of `raw` — safe to include in the message. */
45
+ rawLength;
46
+ constructor(message, raw, options) {
47
+ super(`${message} (raw=${raw.length}B)`, options);
48
+ this.name = "LLMParseError";
49
+ this.raw = raw;
50
+ this.rawLength = raw.length;
51
+ }
52
+ }
32
53
  export function isRetryableStatus(status) {
33
54
  return status === 429 || (status >= 500 && status < 600);
34
55
  }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Zod adapter schema for SynthesisCostTelemetry at the trust boundary.
3
+ *
4
+ * This schema sits at `packages/eval/src/adapters/**` and is therefore
5
+ * scanned by `pnpm check-trust-boundary-satisfies` (D0045). The
6
+ * `satisfies z.ZodType<SynthesisCostTelemetry>` clause makes schema/type
7
+ * drift a build error, not a runtime bug.
8
+ *
9
+ * Used by:
10
+ * - Plan 06-04 `ReportStore.patchSynthesis` — validates telemetry before
11
+ * writing to Sanity (process memory → Sanity write boundary, T-06-04).
12
+ * - Any future Sanity-side reader of `summary.synthesis.diagnosis.*`
13
+ * (Sanity Content Lake → eval process boundary, T-06-04).
14
+ *
15
+ * Security constraints:
16
+ * - No `.passthrough()` — schema is closed to prevent PII leakage from
17
+ * card body text into the telemetry shape (T-06-05).
18
+ * - Satisfies clause is load-bearing (T-06-06); no exemption marker.
19
+ *
20
+ * @see packages/core/src/types/synthesis-telemetry.ts — independently authored domain types
21
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
22
+ * @see .planning/phases/06-post-run-integration-cost-telemetry/06-CONTEXT.md §D6-09
23
+ */
24
+ import { z } from "zod";
25
+ export declare const SynthesisCostTelemetrySchema: z.ZodObject<{
26
+ cost: z.ZodNumber;
27
+ parseFailureCount: z.ZodNumber;
28
+ parseFailureRate: z.ZodNumber;
29
+ perCard: z.ZodArray<z.ZodObject<{
30
+ cardType: z.ZodEnum<{
31
+ "area-summary": "area-summary";
32
+ "failure-mode-summary": "failure-mode-summary";
33
+ "no-issues": "no-issues";
34
+ "top-recommendations": "top-recommendations";
35
+ "weakest-area": "weakest-area";
36
+ "low-confidence-attribution": "low-confidence-attribution";
37
+ "doc-attribution-spotlight": "doc-attribution-spotlight";
38
+ "regression-vs-baseline": "regression-vs-baseline";
39
+ }>;
40
+ cost: z.ZodOptional<z.ZodNumber>;
41
+ parseFailed: z.ZodBoolean;
42
+ latencyMs: z.ZodOptional<z.ZodNumber>;
43
+ tokenInput: z.ZodOptional<z.ZodNumber>;
44
+ tokenOutput: z.ZodOptional<z.ZodNumber>;
45
+ cardVersion: z.ZodString;
46
+ generatedAt: z.ZodString;
47
+ }, z.core.$strip>>;
48
+ }, z.core.$strip>;
49
+ export type { SynthesisCostTelemetry, SynthesisPerCardTelemetry, } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Zod adapter schema for SynthesisCostTelemetry at the trust boundary.
3
+ *
4
+ * This schema sits at `packages/eval/src/adapters/**` and is therefore
5
+ * scanned by `pnpm check-trust-boundary-satisfies` (D0045). The
6
+ * `satisfies z.ZodType<SynthesisCostTelemetry>` clause makes schema/type
7
+ * drift a build error, not a runtime bug.
8
+ *
9
+ * Used by:
10
+ * - Plan 06-04 `ReportStore.patchSynthesis` — validates telemetry before
11
+ * writing to Sanity (process memory → Sanity write boundary, T-06-04).
12
+ * - Any future Sanity-side reader of `summary.synthesis.diagnosis.*`
13
+ * (Sanity Content Lake → eval process boundary, T-06-04).
14
+ *
15
+ * Security constraints:
16
+ * - No `.passthrough()` — schema is closed to prevent PII leakage from
17
+ * card body text into the telemetry shape (T-06-05).
18
+ * - Satisfies clause is load-bearing (T-06-06); no exemption marker.
19
+ *
20
+ * @see packages/core/src/types/synthesis-telemetry.ts — independently authored domain types
21
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
22
+ * @see .planning/phases/06-post-run-integration-cost-telemetry/06-CONTEXT.md §D6-09
23
+ */
24
+ import { z } from "zod";
25
+ /**
26
+ * Enum of all valid card types — mirrors `CardType` from diagnosis.ts.
27
+ * Using `z.enum()` (not `z.string()`) so the schema satisfies
28
+ * `z.ZodType<SynthesisPerCardTelemetry>` (which requires `cardType: CardType`).
29
+ */
30
+ const CardTypeSchema = z.enum([
31
+ "area-summary",
32
+ "failure-mode-summary",
33
+ "no-issues",
34
+ "top-recommendations",
35
+ "weakest-area",
36
+ "low-confidence-attribution",
37
+ "doc-attribution-spotlight",
38
+ "regression-vs-baseline",
39
+ ]);
40
+ const SynthesisPerCardSchema = z.object({
41
+ cardType: CardTypeSchema,
42
+ cost: z.number().nonnegative().optional(),
43
+ parseFailed: z.boolean(),
44
+ latencyMs: z.number().int().nonnegative().optional(),
45
+ tokenInput: z.number().int().nonnegative().optional(),
46
+ tokenOutput: z.number().int().nonnegative().optional(),
47
+ cardVersion: z.string(),
48
+ generatedAt: z.string().datetime({ offset: false }), // ISO 8601 UTC required
49
+ });
50
+ export const SynthesisCostTelemetrySchema = z.object({
51
+ cost: z.number().nonnegative(),
52
+ parseFailureCount: z.number().int().nonnegative(),
53
+ parseFailureRate: z.number().min(0).max(1),
54
+ perCard: z.array(SynthesisPerCardSchema),
55
+ });
@@ -286,16 +286,21 @@ function mapAssertions(raw) {
286
286
  .map((c) => ({ id: c.id, text: c.text })),
287
287
  template: a.template,
288
288
  type: "llm-rubric",
289
- ...(a.weight !== undefined ? { weight: a.weight } : {}),
289
+ // Use `!= null` (loose) so we drop both `undefined` AND `null`.
290
+ // GROQ projects missing scalar fields as `null`, but the domain
291
+ // schema's `z.number().optional()` accepts `T | undefined`, not
292
+ // `T | null` — a strict `!== undefined` check would forward
293
+ // `weight: null` and trigger Zod's "Invalid input" on assertions.
294
+ ...(a.weight != null ? { weight: a.weight } : {}),
290
295
  };
291
296
  }
292
- // Value-based assertion
297
+ // Value-based assertion — same null-vs-undefined hazard as above.
293
298
  const result = { type: a.type };
294
- if (a.value !== undefined)
299
+ if (a.value != null)
295
300
  result.value = a.value;
296
- if (a.threshold !== undefined)
301
+ if (a.threshold != null)
297
302
  result.threshold = a.threshold;
298
- if (a.weight !== undefined)
303
+ if (a.weight != null)
299
304
  result.weight = a.weight;
300
305
  return result;
301
306
  });
@@ -1561,6 +1561,13 @@ export declare const RepoConfigSchema: z.ZodObject<{
1561
1561
  dir: z.ZodOptional<z.ZodString>;
1562
1562
  exclude: z.ZodOptional<z.ZodArray<z.ZodString>>;
1563
1563
  }, z.core.$strip>>;
1564
+ summary: z.ZodOptional<z.ZodObject<{
1565
+ onRun: z.ZodOptional<z.ZodEnum<{
1566
+ never: "never";
1567
+ always: "always";
1568
+ auto: "auto";
1569
+ }>>;
1570
+ }, z.core.$strip>>;
1564
1571
  taskSource: z.ZodOptional<z.ZodObject<{
1565
1572
  type: z.ZodOptional<z.ZodEnum<{
1566
1573
  "content-lake": "content-lake";
@@ -646,6 +646,15 @@ const OwnerConfigSchema = z
646
646
  individual: z.string().min(1).optional(),
647
647
  })
648
648
  .optional();
649
+ /**
650
+ * Post-run diagnosis summary policy (Phase 6 / DIAG-06).
651
+ * Sits in the W0077 Phase-6a auto-load pathway.
652
+ */
653
+ const SummaryConfigSchema = z
654
+ .object({
655
+ onRun: z.enum(["auto", "always", "never"]).optional(),
656
+ })
657
+ .optional();
649
658
  /**
650
659
  * Agentic-mode configuration (W0077 Phase 6f). Replaces the retired
651
660
  * `--header` and `--allowed-origin` CLI flags. `headers` is a key/value
@@ -694,6 +703,7 @@ export const RepoConfigSchema = z.object({
694
703
  owner: OwnerConfigSchema,
695
704
  agentic: AgenticConfigSchema,
696
705
  artifacts: ArtifactsConfigSchema,
706
+ summary: SummaryConfigSchema,
697
707
  taskSource: TaskSourceConfigSchema,
698
708
  triggers: z
699
709
  .object({
@@ -32,6 +32,7 @@ import { createFetchDocsCommand } from "./commands/fetch-docs.js";
32
32
  import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
33
33
  import { createGraderCommand } from "./commands/grader/index.js";
34
34
  import { createInitCommand } from "./commands/init.js";
35
+ import { createInterpretCommand } from "./commands/interpret.js";
35
36
  import { createInteractiveCommand } from "./commands/interactive.js";
36
37
  import { createLookupDocCommand } from "./commands/lookup-doc.js";
37
38
  import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
@@ -110,6 +111,8 @@ export function buildCliProgram(opts) {
110
111
  .addCommand(createWeeklyDigestCommand())
111
112
  .addCommand(createCheckStalenessCommand());
112
113
  program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
114
+ // `ailf interpret <reportId>` — top-level (not nested under report) per AI-SPEC
115
+ program.addCommand(createInterpretCommand().helpGroup(CommandGroup.AnalysisReports));
113
116
  // ── Grader Reliability ────────────────────────────────────────────────
114
117
  program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
115
118
  // ── Setup & Configuration ─────────────────────────────────────────────
@@ -0,0 +1,70 @@
1
+ /**
2
+ * interpret command — generate a Diagnosis for a Report.
3
+ *
4
+ * Wraps `getDiagnosisRunner(ctx)` from the composition root in a Commander
5
+ * command for consistent CLI integration. Closest analog: compare.ts.
6
+ *
7
+ * Entry points:
8
+ * ailf interpret <reportId> — one-line-per-card summary
9
+ * ailf interpret <reportId> --json — full Diagnosis JSON
10
+ * ailf interpret latest — most recent report
11
+ * ailf interpret <id> --compare <ref> — DIAG-05 regression comparison
12
+ * ailf interpret <id> --refresh — bypass version-keyed cache
13
+ *
14
+ * @see packages/eval/src/commands/compare.ts — CLI factory analog
15
+ * @see packages/eval/src/composition-root.ts — getDiagnosisRunner
16
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §6
17
+ */
18
+ import { Command } from "commander";
19
+ import { type DiagnosisCard, type DiagnosisRunner, type VersionedInputs } from "../_vendor/ailf-core/index.d.ts";
20
+ interface MinimalReportStore {
21
+ read(id: string): Promise<unknown | null>;
22
+ latest(): Promise<unknown | null>;
23
+ }
24
+ export interface InterpretCommandOptions {
25
+ /**
26
+ * Override the runner factory for tests. When omitted, the command
27
+ * imports `getDiagnosisRunner` from the composition root at action time.
28
+ */
29
+ readonly runnerFactory?: (ctx: unknown) => DiagnosisRunner;
30
+ /**
31
+ * Override the store factory for tests. When omitted, the command
32
+ * creates the app context and uses `ctx.reportStore` at action time.
33
+ */
34
+ readonly storeFactory?: () => MinimalReportStore | null;
35
+ /**
36
+ * Override the versions resolver for tests. Receives the stored report
37
+ * record and returns the `VersionedInputs` needed by the runner.
38
+ * When omitted, the command derives versions from the report's metadata.
39
+ */
40
+ readonly versionsFromReport?: (report: unknown) => VersionedInputs;
41
+ }
42
+ /**
43
+ * Visual status markers — locked visual contract per plan Test 7:
44
+ * ready: "✓", degraded: "⚠", missing: "—"
45
+ *
46
+ * Exported so Plan 06-04's post-run hook imports the SAME object and
47
+ * D6-04's "single formatter, single visual contract" is physically
48
+ * enforced — no copy/paste drift possible.
49
+ */
50
+ export declare const STATUS_ICONS: Record<DiagnosisCard["status"], string>;
51
+ /**
52
+ * Format a single card as a one-line summary string.
53
+ *
54
+ * Format: `<icon> <cardType>: <summary>`
55
+ * Per AI-SPEC §6: distinct icons for ready / degraded / missing.
56
+ *
57
+ * Exported so Plan 06-04's post-run hook imports the SAME function and
58
+ * D6-04's "single formatter, single visual contract" is physically
59
+ * enforced — no copy/paste drift possible.
60
+ */
61
+ export declare function formatCardSummaryLine(card: DiagnosisCard): string;
62
+ /**
63
+ * Create the `ailf interpret <reportId>` Commander command.
64
+ *
65
+ * Accepts optional `InterpretCommandOptions` for testability — tests can
66
+ * inject a fake runner factory and store factory without touching module
67
+ * mocks (preferred per testing.md).
68
+ */
69
+ export declare function createInterpretCommand(options?: InterpretCommandOptions): Command;
70
+ export {};