@sanity/ailf 4.6.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/canonical/grader-references/agent-harness-tools.yaml +42 -0
  2. package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
  3. package/canonical/grader-references/mcp-server-spec.yaml +51 -0
  4. package/canonical/grader-references/portable-text.yaml +48 -0
  5. package/config/diagnosis-cards.ts +318 -0
  6. package/config/models.ts +12 -0
  7. package/config/rubrics.ts +38 -2
  8. package/dist/_vendor/ailf-core/artifact-registry.d.ts +60 -2
  9. package/dist/_vendor/ailf-core/artifact-registry.js +288 -7
  10. package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
  11. package/dist/_vendor/ailf-core/examples/index.js +146 -47
  12. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
  13. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
  14. package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
  15. package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
  16. package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
  17. package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
  18. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
  19. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
  20. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
  21. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
  22. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
  23. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
  24. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  25. package/dist/_vendor/ailf-core/index.js +4 -0
  26. package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
  27. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
  28. package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
  29. package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
  30. package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
  31. package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
  32. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  33. package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
  34. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  35. package/dist/_vendor/ailf-core/schemas/index.js +9 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
  40. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
  41. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
  42. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
  43. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
  44. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
  45. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +171 -0
  46. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
  47. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
  48. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
  49. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
  50. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
  51. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +104 -0
  52. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
  53. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +96 -0
  54. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +39 -0
  55. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +52 -0
  56. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
  57. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +77 -0
  58. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
  59. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
  60. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
  61. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +126 -0
  62. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
  63. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +107 -0
  64. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
  65. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +114 -0
  66. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
  67. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +273 -0
  68. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
  69. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
  70. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
  71. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
  72. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
  73. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
  74. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
  75. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
  76. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
  77. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
  78. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +16 -0
  79. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +86 -0
  80. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +50 -0
  81. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +35 -0
  82. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +136 -0
  83. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +153 -0
  84. package/dist/_vendor/ailf-core/services/index.d.ts +6 -0
  85. package/dist/_vendor/ailf-core/services/index.js +18 -0
  86. package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
  87. package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
  88. package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
  89. package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
  90. package/dist/_vendor/ailf-core/types/attribution.js +18 -0
  91. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
  92. package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
  93. package/dist/_vendor/ailf-core/types/confidence.d.ts +1 -1
  94. package/dist/_vendor/ailf-core/types/confidence.js +7 -0
  95. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +271 -0
  96. package/dist/_vendor/ailf-core/types/diagnosis.js +19 -0
  97. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
  98. package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
  99. package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
  100. package/dist/_vendor/ailf-core/types/index.d.ts +80 -29
  101. package/dist/_vendor/ailf-core/types/index.js +15 -1
  102. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
  103. package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
  104. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
  105. package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
  106. package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
  107. package/dist/adapters/api-client/build-request.d.ts +1 -0
  108. package/dist/adapters/api-client/build-request.js +3 -0
  109. package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
  110. package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
  111. package/dist/adapters/attribution/index.d.ts +9 -0
  112. package/dist/adapters/attribution/index.js +8 -0
  113. package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
  114. package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
  115. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  116. package/dist/adapters/grader-outputs/index.d.ts +10 -0
  117. package/dist/adapters/grader-outputs/index.js +8 -0
  118. package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
  119. package/dist/adapters/grader-outputs/legacy/index.js +10 -0
  120. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
  121. package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
  122. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
  123. package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
  124. package/dist/adapters/index.d.ts +3 -0
  125. package/dist/adapters/index.js +4 -0
  126. package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
  127. package/dist/adapters/llm/fake-llm-client.js +38 -1
  128. package/dist/adapters/llm/openai-llm-client.js +52 -3
  129. package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
  130. package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
  131. package/dist/adapters/task-sources/repo-schemas.d.ts +79 -11
  132. package/dist/adapters/task-sources/repo-schemas.js +19 -2
  133. package/dist/cli-program.js +3 -0
  134. package/dist/commands/calculate-scores.js +1 -1
  135. package/dist/commands/explain-handler.js +1 -1
  136. package/dist/commands/interpret.d.ts +50 -0
  137. package/dist/commands/interpret.js +212 -0
  138. package/dist/commands/lookup-doc.d.ts +1 -1
  139. package/dist/commands/lookup-doc.js +3 -3
  140. package/dist/commands/pipeline-action.d.ts +6 -0
  141. package/dist/commands/pipeline-action.js +2 -0
  142. package/dist/commands/remote-pipeline.js +1 -0
  143. package/dist/composition-root.d.ts +57 -23
  144. package/dist/composition-root.js +155 -41
  145. package/dist/config/diagnosis-cards.ts +318 -0
  146. package/dist/config/models.ts +12 -0
  147. package/dist/config/rubrics.ts +38 -2
  148. package/dist/grader/agent-harness.d.ts +9 -0
  149. package/dist/grader/agent-harness.js +9 -0
  150. package/dist/grader/common.d.ts +9 -0
  151. package/dist/grader/common.js +9 -0
  152. package/dist/grader/index.d.ts +24 -0
  153. package/dist/grader/index.js +24 -0
  154. package/dist/grader/knowledge-probe.d.ts +9 -0
  155. package/dist/grader/knowledge-probe.js +9 -0
  156. package/dist/grader/literacy.d.ts +9 -0
  157. package/dist/grader/literacy.js +9 -0
  158. package/dist/grader/mcp.d.ts +9 -0
  159. package/dist/grader/mcp.js +9 -0
  160. package/dist/orchestration/build-app-context.js +1 -0
  161. package/dist/orchestration/build-step-sequence.js +5 -0
  162. package/dist/orchestration/steps/calculate-scores-step.js +23 -1
  163. package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
  164. package/dist/orchestration/steps/compute-attribution-step.js +279 -0
  165. package/dist/orchestration/steps/gap-analysis-step.js +35 -7
  166. package/dist/orchestration/steps/index.d.ts +1 -0
  167. package/dist/orchestration/steps/index.js +1 -0
  168. package/dist/pipeline/attribution.d.ts +15 -0
  169. package/dist/pipeline/attribution.js +18 -9
  170. package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
  171. package/dist/pipeline/borderline-consensus-runner.js +124 -0
  172. package/dist/pipeline/borderline-detector.d.ts +24 -0
  173. package/dist/pipeline/borderline-detector.js +26 -0
  174. package/dist/pipeline/calculate-scores.d.ts +114 -3
  175. package/dist/pipeline/calculate-scores.js +426 -24
  176. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  177. package/dist/pipeline/compiler/literacy-bridge.js +35 -17
  178. package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
  179. package/dist/pipeline/compiler/rubric-resolution.js +9 -1
  180. package/dist/pipeline/compute-attribution.d.ts +80 -0
  181. package/dist/pipeline/compute-attribution.js +196 -0
  182. package/dist/pipeline/failure-modes.d.ts +52 -17
  183. package/dist/pipeline/failure-modes.js +178 -117
  184. package/dist/pipeline/map-request-to-config.js +1 -0
  185. package/package.json +7 -5
@@ -0,0 +1,49 @@
1
+ /**
2
+ * per-entry-attribution-writer.ts — Zod schema for the per-judgment
3
+ * attribution artifact (ATTR-01) emitted by Phase 4's
4
+ * `ComputeAttributionStep` and read back by Phase 5's diagnosis runner.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<JudgmentAttribution>` against
7
+ * the canonical domain type in `packages/core/src/types/attribution.ts`
8
+ * (D0045 / W0187) — drift between schema and type is a build error.
9
+ *
10
+ * Phase 1 lands the SHAPE only — no compute, no file I/O. Phase 4 wires
11
+ * the writer; Phase 5 wires the reader. Both `satisfies` against this
12
+ * single source-of-truth schema.
13
+ *
14
+ * `hallucinationCheckedAgainst` is REQUIRED (Pitfall #11): consumers
15
+ * must be able to audit citation grounding without re-deriving the
16
+ * resolvable-set. The canonical task field is `contextDocs`; do NOT
17
+ * invent `expectedDocs` / `usedDocs` synonyms.
18
+ *
19
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
20
+ * @see docs/decisions/D0049-shared-confidence-contract.md
21
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
22
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
23
+ */
24
+ import { z } from "zod";
25
+ import { ConfidenceSchema } from "../../_vendor/ailf-core/schemas/index.js";
26
+ const DocAttributionSchema = z.object({
27
+ documentId: z.string().min(1),
28
+ slug: z.string().optional(),
29
+ score: z.number().min(0).max(1),
30
+ signals: z.object({
31
+ citation: z.number().min(0).max(1).optional(),
32
+ canonical: z.number().min(0).max(1).optional(),
33
+ retrieved: z.number().min(0).max(1).optional(),
34
+ }),
35
+ confidence: ConfidenceSchema,
36
+ });
37
+ /**
38
+ * Canonical schema for {@link JudgmentAttribution}. Persisted at
39
+ * `runs/{runId}/attribution/{entryKey}.json` (Phase 4) and parsed by
40
+ * the diagnosis runner on read (Phase 5).
41
+ */
42
+ export const JudgmentAttributionSchema = z.object({
43
+ judgmentRef: z.string().min(1),
44
+ taskId: z.string().min(1),
45
+ modelId: z.string().min(1),
46
+ dimension: z.string().min(1),
47
+ attributions: z.array(DocAttributionSchema),
48
+ hallucinationCheckedAgainst: z.array(z.string()),
49
+ });
@@ -125,6 +125,7 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
125
125
  noCache: config.noCache ?? false,
126
126
  noRemoteCache: config.noRemoteCache ?? false,
127
127
  graderReplications: config.execution?.graderReplications,
128
+ borderlineReplications: config.execution?.borderlineReplications,
128
129
  graderContext: config.grader?.context,
129
130
  urls: config.urls,
130
131
  headers: config.agentic?.headers,
@@ -0,0 +1,10 @@
1
+ /**
2
+ * grader-outputs adapter barrel — named re-exports only (W0124 / D0045).
3
+ *
4
+ * The grader-output schema lives here so it enters the D0045
5
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
6
+ */
7
+ export { GraderJudgmentSchema, graderJudgmentsVersion, } from "./promptfoo-grader-output.js";
8
+ export type { GraderJudgment } from "../../_vendor/ailf-core/index.d.ts";
9
+ export { LegacyGraderJudgmentSchema } from "./legacy/index.js";
10
+ export type { LegacyGraderJudgment } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,8 @@
1
+ /**
2
+ * grader-outputs adapter barrel — named re-exports only (W0124 / D0045).
3
+ *
4
+ * The grader-output schema lives here so it enters the D0045
5
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
6
+ */
7
+ export { GraderJudgmentSchema, graderJudgmentsVersion, } from "./promptfoo-grader-output.js";
8
+ export { LegacyGraderJudgmentSchema } from "./legacy/index.js";
@@ -0,0 +1,11 @@
1
+ /**
2
+ * legacy grader-outputs adapter sub-barrel — named re-exports only
3
+ * (W0124 / D0045).
4
+ *
5
+ * Read-only schema for the Phase 1 free-prose grader-output shape,
6
+ * invoked only by historical-report rendering paths through Phase 7
7
+ * (GRAD-06 cutover). The schema lives here so it enters the D0045
8
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
9
+ */
10
+ export { LegacyGraderJudgmentSchema } from "./promptfoo-grader-output-legacy.js";
11
+ export type { LegacyGraderJudgment } from "../../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,10 @@
1
+ /**
2
+ * legacy grader-outputs adapter sub-barrel — named re-exports only
3
+ * (W0124 / D0045).
4
+ *
5
+ * Read-only schema for the Phase 1 free-prose grader-output shape,
6
+ * invoked only by historical-report rendering paths through Phase 7
7
+ * (GRAD-06 cutover). The schema lives here so it enters the D0045
8
+ * `pnpm check-trust-boundary-satisfies` SCAN_ROOTS gate.
9
+ */
10
+ export { LegacyGraderJudgmentSchema } from "./promptfoo-grader-output-legacy.js";
@@ -0,0 +1,49 @@
1
+ /**
2
+ * promptfoo-grader-output-legacy.ts — Zod schema for the Phase 1
3
+ * free-prose grader-output shape, used by historical-report rendering
4
+ * paths.
5
+ *
6
+ * READ-ONLY: invoked only by historical-report rendering paths through
7
+ * Phase 7 (GRAD-06 cutover). Reports are immutable events — once a
8
+ * Report is written to Content Lake, the structured grader-judgment
9
+ * shape it captures cannot be back-filled. The legacy schema exists so
10
+ * pre-Phase-3 reports continue to deserialize cleanly.
11
+ *
12
+ * Live grader output that fails the strict {@link GraderJudgmentSchema}
13
+ * parse must NOT fall back to this schema. Drop to
14
+ * `failureMode: "unclassified"` instead. Strict and legacy schemas are
15
+ * deliberate siblings, not a legacy/canonical pair to consolidate.
16
+ *
17
+ * The schema asserts `satisfies z.ZodType<LegacyGraderJudgment>` against
18
+ * the canonical domain type in
19
+ * `packages/core/src/types/legacy-grader-judgment.ts` (D0045 / W0187) —
20
+ * drift between schema and type is a build error. The domain type is
21
+ * authored independently in `@sanity/ailf-core`; this file authors ONLY
22
+ * the schema and never derives the domain type from the schema itself
23
+ * (no schema-derived self-reference allowed by D0045).
24
+ *
25
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
26
+ * @see ../promptfoo-grader-output.ts — the strict (live-path) sibling
27
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
28
+ * §"Backwards compatibility"
29
+ */
30
+ import { z } from "zod";
31
+ /**
32
+ * Canonical schema for {@link LegacyGraderJudgment}. Mirrors the Phase 1
33
+ * superset core (`taskId`, `modelId`, `dimension`, `reason`, `score`,
34
+ * optional `outputFailure`). NO GRAD-02 additive fields — those are by
35
+ * construction absent on pre-Phase-3 output.
36
+ *
37
+ * Intentionally NOT `.strict()` — pre-Phase-3 reports may carry stray
38
+ * keys; the legacy parser tolerates them so historical-report rendering
39
+ * keeps working through the GRAD-06 cutover.
40
+ */
41
+ export declare const LegacyGraderJudgmentSchema: z.ZodObject<{
42
+ taskId: z.ZodString;
43
+ modelId: z.ZodString;
44
+ dimension: z.ZodString;
45
+ reason: z.ZodString;
46
+ score: z.ZodNumber;
47
+ outputFailure: z.ZodOptional<z.ZodBoolean>;
48
+ }, z.core.$strip>;
49
+ export type { LegacyGraderJudgment } from "../../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,48 @@
1
+ /**
2
+ * promptfoo-grader-output-legacy.ts — Zod schema for the Phase 1
3
+ * free-prose grader-output shape, used by historical-report rendering
4
+ * paths.
5
+ *
6
+ * READ-ONLY: invoked only by historical-report rendering paths through
7
+ * Phase 7 (GRAD-06 cutover). Reports are immutable events — once a
8
+ * Report is written to Content Lake, the structured grader-judgment
9
+ * shape it captures cannot be back-filled. The legacy schema exists so
10
+ * pre-Phase-3 reports continue to deserialize cleanly.
11
+ *
12
+ * Live grader output that fails the strict {@link GraderJudgmentSchema}
13
+ * parse must NOT fall back to this schema. Drop to
14
+ * `failureMode: "unclassified"` instead. Strict and legacy schemas are
15
+ * deliberate siblings, not a legacy/canonical pair to consolidate.
16
+ *
17
+ * The schema asserts `satisfies z.ZodType<LegacyGraderJudgment>` against
18
+ * the canonical domain type in
19
+ * `packages/core/src/types/legacy-grader-judgment.ts` (D0045 / W0187) —
20
+ * drift between schema and type is a build error. The domain type is
21
+ * authored independently in `@sanity/ailf-core`; this file authors ONLY
22
+ * the schema and never derives the domain type from the schema itself
23
+ * (no schema-derived self-reference allowed by D0045).
24
+ *
25
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
26
+ * @see ../promptfoo-grader-output.ts — the strict (live-path) sibling
27
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
28
+ * §"Backwards compatibility"
29
+ */
30
+ import { z } from "zod";
31
+ /**
32
+ * Canonical schema for {@link LegacyGraderJudgment}. Mirrors the Phase 1
33
+ * superset core (`taskId`, `modelId`, `dimension`, `reason`, `score`,
34
+ * optional `outputFailure`). NO GRAD-02 additive fields — those are by
35
+ * construction absent on pre-Phase-3 output.
36
+ *
37
+ * Intentionally NOT `.strict()` — pre-Phase-3 reports may carry stray
38
+ * keys; the legacy parser tolerates them so historical-report rendering
39
+ * keeps working through the GRAD-06 cutover.
40
+ */
41
+ export const LegacyGraderJudgmentSchema = z.object({
42
+ taskId: z.string().min(1),
43
+ modelId: z.string().min(1),
44
+ dimension: z.string().min(1),
45
+ reason: z.string(),
46
+ score: z.number(),
47
+ outputFailure: z.boolean().optional(),
48
+ });
@@ -0,0 +1,102 @@
1
+ /**
2
+ * promptfoo-grader-output.ts — Zod schema for the structured grader output
3
+ * (GRAD-02) emitted by the promptfoo grader process and consumed by the
4
+ * eval pipeline.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<GraderJudgment>` against the
7
+ * canonical domain type in `packages/core/src/types/grader-judgment.ts`
8
+ * (D0045 / W0187) — drift between schema and type is a build error.
9
+ * The domain type was authored independently in Plan 01-01; this file
10
+ * authors ONLY the schema and never derives the domain type from the
11
+ * schema itself (no schema-derived self-reference allowed by D0045).
12
+ *
13
+ * `graderJudgmentsVersion` is co-located with the schema (VER-01 D-02 —
14
+ * source-of-truth file owns its version constant). Bumped by hand when
15
+ * the grader rubric, prompt template, or judgment shape changes.
16
+ *
17
+ * Phase 3 will replace the inline `JSON.parse` at
18
+ * `pipeline/calculate-scores.ts:380-392` (Pitfall #4) so all grader
19
+ * output flows through this schema.
20
+ *
21
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
22
+ * @see docs/decisions/D0049-shared-confidence-contract.md
23
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
24
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
25
+ */
26
+ import { z } from "zod";
27
+ /**
28
+ * VER-01 D-02 — co-located version constant. Bumped by hand when the
29
+ * grader rubric, prompt template, or judgment shape changes in a way
30
+ * that should invalidate cached Diagnoses.
31
+ *
32
+ * Phase 3 GRAD-05 bumped this from `"0.1.0"` to `"1.0.0"` (semver
33
+ * major) — the additive GRAD-02 surface is now required + the schema
34
+ * is `.strict()`. AILF has no installed external base; the legacy
35
+ * parser at `./legacy/promptfoo-grader-output-legacy.ts` is the named
36
+ * consumer for already-stored historical reports.
37
+ */
38
+ export declare const graderJudgmentsVersion = "1.0.0";
39
+ /**
40
+ * Canonical schema for {@link GraderJudgment}. Required fields mirror
41
+ * the existing pipeline core (Doc 03 §"existing, unchanged"):
42
+ * `taskId`, `modelId`, `dimension`, `reason`, `score`. Phase 3 GRAD-05
43
+ * has tightened the additive surface to required and added `.strict()`
44
+ * — the schema rejects unknown fields (defense-in-depth against future
45
+ * prompt-injection attempts that try to smuggle keys through the
46
+ * grader emission).
47
+ *
48
+ * Branded `JudgmentId` is represented at runtime by a non-empty string;
49
+ * the schema routes the brand through `brandedString<"JudgmentId">()`
50
+ * — the project's single audited cast site for branded-string
51
+ * schemas (project typescript rule: no `as` on `unknown`).
52
+ */
53
+ export declare const GraderJudgmentSchema: z.ZodObject<{
54
+ taskId: z.ZodString;
55
+ modelId: z.ZodString;
56
+ dimension: z.ZodString;
57
+ reason: z.ZodString;
58
+ score: z.ZodNumber;
59
+ outputFailure: z.ZodOptional<z.ZodBoolean>;
60
+ judgmentId: z.ZodType<import("@sanity/ailf-core").Brand<string, "JudgmentId">, unknown, z.core.$ZodTypeInternals<import("@sanity/ailf-core").Brand<string, "JudgmentId">, unknown>>;
61
+ subJudgments: z.ZodArray<z.ZodObject<{
62
+ criterionId: z.ZodString;
63
+ met: z.ZodBoolean;
64
+ evidence: z.ZodString;
65
+ confidence: z.ZodObject<{
66
+ level: z.ZodEnum<{
67
+ low: "low";
68
+ medium: "medium";
69
+ high: "high";
70
+ }>;
71
+ signalsPresent: z.ZodNumber;
72
+ derivation: z.ZodString;
73
+ }, z.core.$strip>;
74
+ }, z.core.$strip>>;
75
+ docCitations: z.ZodArray<z.ZodObject<{
76
+ documentId: z.ZodString;
77
+ slug: z.ZodOptional<z.ZodString>;
78
+ role: z.ZodEnum<{
79
+ supports: "supports";
80
+ contradicts: "contradicts";
81
+ missing: "missing";
82
+ irrelevant: "irrelevant";
83
+ }>;
84
+ hallucinated: z.ZodOptional<z.ZodBoolean>;
85
+ }, z.core.$strip>>;
86
+ failureMode: z.ZodString;
87
+ confidence: z.ZodObject<{
88
+ level: z.ZodEnum<{
89
+ low: "low";
90
+ medium: "medium";
91
+ high: "high";
92
+ }>;
93
+ signalsPresent: z.ZodNumber;
94
+ derivation: z.ZodString;
95
+ }, z.core.$strip>;
96
+ hallucinationCheckedAgainst: z.ZodArray<z.ZodString>;
97
+ metadata: z.ZodObject<{
98
+ graderModel: z.ZodString;
99
+ graderJudgmentsVersion: z.ZodString;
100
+ }, z.core.$strip>;
101
+ }, z.core.$strict>;
102
+ export type { GraderJudgment } from "../../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,93 @@
1
+ /**
2
+ * promptfoo-grader-output.ts — Zod schema for the structured grader output
3
+ * (GRAD-02) emitted by the promptfoo grader process and consumed by the
4
+ * eval pipeline.
5
+ *
6
+ * The schema asserts `satisfies z.ZodType<GraderJudgment>` against the
7
+ * canonical domain type in `packages/core/src/types/grader-judgment.ts`
8
+ * (D0045 / W0187) — drift between schema and type is a build error.
9
+ * The domain type was authored independently in Plan 01-01; this file
10
+ * authors ONLY the schema and never derives the domain type from the
11
+ * schema itself (no schema-derived self-reference allowed by D0045).
12
+ *
13
+ * `graderJudgmentsVersion` is co-located with the schema (VER-01 D-02 —
14
+ * source-of-truth file owns its version constant). Bumped by hand when
15
+ * the grader rubric, prompt template, or judgment shape changes.
16
+ *
17
+ * Phase 3 will replace the inline `JSON.parse` at
18
+ * `pipeline/calculate-scores.ts:380-392` (Pitfall #4) so all grader
19
+ * output flows through this schema.
20
+ *
21
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
22
+ * @see docs/decisions/D0049-shared-confidence-contract.md
23
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
24
+ * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
25
+ */
26
+ import { z } from "zod";
27
+ import { brandedString, ConfidenceSchema } from "../../_vendor/ailf-core/schemas/index.js";
28
+ /**
29
+ * VER-01 D-02 — co-located version constant. Bumped by hand when the
30
+ * grader rubric, prompt template, or judgment shape changes in a way
31
+ * that should invalidate cached Diagnoses.
32
+ *
33
+ * Phase 3 GRAD-05 bumped this from `"0.1.0"` to `"1.0.0"` (semver
34
+ * major) — the additive GRAD-02 surface is now required + the schema
35
+ * is `.strict()`. AILF has no installed external base; the legacy
36
+ * parser at `./legacy/promptfoo-grader-output-legacy.ts` is the named
37
+ * consumer for already-stored historical reports.
38
+ */
39
+ export const graderJudgmentsVersion = "1.0.0";
40
+ const DocCitationRoleSchema = z.enum([
41
+ "supports",
42
+ "contradicts",
43
+ "missing",
44
+ "irrelevant",
45
+ ]);
46
+ const DocCitationSchema = z.object({
47
+ documentId: z.string().min(1),
48
+ slug: z.string().optional(),
49
+ role: DocCitationRoleSchema,
50
+ hallucinated: z.boolean().optional(),
51
+ });
52
+ const CriterionSubJudgmentSchema = z.object({
53
+ criterionId: z.string().min(1),
54
+ met: z.boolean(),
55
+ evidence: z.string().max(280),
56
+ confidence: ConfidenceSchema,
57
+ });
58
+ /**
59
+ * Canonical schema for {@link GraderJudgment}. Required fields mirror
60
+ * the existing pipeline core (Doc 03 §"existing, unchanged"):
61
+ * `taskId`, `modelId`, `dimension`, `reason`, `score`. Phase 3 GRAD-05
62
+ * has tightened the additive surface to required and added `.strict()`
63
+ * — the schema rejects unknown fields (defense-in-depth against future
64
+ * prompt-injection attempts that try to smuggle keys through the
65
+ * grader emission).
66
+ *
67
+ * Branded `JudgmentId` is represented at runtime by a non-empty string;
68
+ * the schema routes the brand through `brandedString<"JudgmentId">()`
69
+ * — the project's single audited cast site for branded-string
70
+ * schemas (project typescript rule: no `as` on `unknown`).
71
+ */
72
+ export const GraderJudgmentSchema = z
73
+ .object({
74
+ // ── Existing pipeline core (required — Doc 03 §"existing, unchanged") ─
75
+ taskId: z.string().min(1),
76
+ modelId: z.string().min(1),
77
+ dimension: z.string().min(1),
78
+ reason: z.string(),
79
+ score: z.number(),
80
+ outputFailure: z.boolean().optional(),
81
+ // ── GRAD-02 additive — required from Phase 3 GRAD-05 ───────────────
82
+ judgmentId: brandedString(),
83
+ subJudgments: z.array(CriterionSubJudgmentSchema),
84
+ docCitations: z.array(DocCitationSchema),
85
+ failureMode: z.string(),
86
+ confidence: ConfidenceSchema,
87
+ hallucinationCheckedAgainst: z.array(z.string()),
88
+ metadata: z.object({
89
+ graderModel: z.string().min(1),
90
+ graderJudgmentsVersion: z.string().min(1),
91
+ }),
92
+ })
93
+ .strict();
@@ -10,3 +10,6 @@ export { PromptfooEvalAdapter } from "./eval-runners/index.js";
10
10
  export { ConsoleLogger, type ConsoleLoggerOptions, JsonLogger, QuietLogger, } from "./loggers/index.js";
11
11
  export { CliConfigAdapter, FileConfigAdapter } from "./config-sources/index.js";
12
12
  export { DtsPackageSurface, InMemoryPackageSurface, type DtsPackageSurfaceOptions, type PackageRootResolver, parseDtsExports, type ParsedDtsExports, } from "./package-surface/index.js";
13
+ export { GraderJudgmentSchema, graderJudgmentsVersion, } from "./grader-outputs/index.js";
14
+ export { AttributionMetaSchema, JudgmentAttributionSchema, } from "./attribution/index.js";
15
+ export type { AttributionMeta, DocAttribution, GraderJudgment, JudgmentAttribution, } from "../_vendor/ailf-core/index.d.ts";
@@ -10,3 +10,7 @@ export { PromptfooEvalAdapter } from "./eval-runners/index.js";
10
10
  export { ConsoleLogger, JsonLogger, QuietLogger, } from "./loggers/index.js";
11
11
  export { CliConfigAdapter, FileConfigAdapter } from "./config-sources/index.js";
12
12
  export { DtsPackageSurface, InMemoryPackageSurface, parseDtsExports, } from "./package-surface/index.js";
13
+ // Phase 1 Plan 02 — actionability-ladder adapter schemas (GRAD-02, ATTR-01).
14
+ // Named re-exports only (W0124 / D0045).
15
+ export { GraderJudgmentSchema, graderJudgmentsVersion, } from "./grader-outputs/index.js";
16
+ export { AttributionMetaSchema, JudgmentAttributionSchema, } from "./attribution/index.js";
@@ -40,9 +40,29 @@ export declare class FakeLLMClient implements LLMClient {
40
40
  readonly calls: FakeCallRecord[];
41
41
  private readonly completeQueue;
42
42
  private readonly structuredQueue;
43
+ /**
44
+ * Per-cardId keyed responses. A single-value entry is returned on every
45
+ * call for that cardId (repeated calls always get the same response). An
46
+ * array-value entry is consumed in order; once exhausted, calls for that
47
+ * cardId fall back to the FIFO structuredQueue.
48
+ *
49
+ * This is the substrate Plan 07's 17-fixture eval matrix uses to wire
50
+ * deterministic responses to specific LLM cards.
51
+ */
52
+ private readonly keyedResponses;
43
53
  constructor(args?: {
44
54
  completeResponses?: FakeCompletionResponse[];
45
55
  structuredResponses?: FakeStructuredResponse[];
56
+ /**
57
+ * Optional keyed-response map. Keys are `cardId` values from
58
+ * `args.context.cardId`. When a call matches a key the keyed entry is
59
+ * used instead of the FIFO queue.
60
+ *
61
+ * - Single-value entry: same response on every call for this cardId.
62
+ * - Array-value entry: entries consumed in insertion order; falls back
63
+ * to FIFO (or throws) when the array is exhausted.
64
+ */
65
+ keyedResponses?: Record<string, FakeStructuredResponse | FakeStructuredResponse[]>;
46
66
  });
47
67
  complete(args: LLMCompleteArgs): Promise<LLMCompletion>;
48
68
  completeStructured<T>(args: LLMCompleteStructuredArgs<T>): Promise<LLMStructuredCompletion<T>>;
@@ -11,9 +11,25 @@ export class FakeLLMClient {
11
11
  calls = [];
12
12
  completeQueue;
13
13
  structuredQueue;
14
+ /**
15
+ * Per-cardId keyed responses. A single-value entry is returned on every
16
+ * call for that cardId (repeated calls always get the same response). An
17
+ * array-value entry is consumed in order; once exhausted, calls for that
18
+ * cardId fall back to the FIFO structuredQueue.
19
+ *
20
+ * This is the substrate Plan 07's 17-fixture eval matrix uses to wire
21
+ * deterministic responses to specific LLM cards.
22
+ */
23
+ keyedResponses;
14
24
  constructor(args = {}) {
15
25
  this.completeQueue = [...(args.completeResponses ?? [])];
16
26
  this.structuredQueue = [...(args.structuredResponses ?? [])];
27
+ // Deep-copy arrays so the caller's fixture data is not mutated.
28
+ const keyed = {};
29
+ for (const [key, val] of Object.entries(args.keyedResponses ?? {})) {
30
+ keyed[key] = Array.isArray(val) ? [...val] : val;
31
+ }
32
+ this.keyedResponses = keyed;
17
33
  }
18
34
  async complete(args) {
19
35
  this.calls.push({
@@ -37,13 +53,34 @@ export class FakeLLMClient {
37
53
  };
38
54
  }
39
55
  async completeStructured(args) {
56
+ // Record every call first so test assertions on this.calls are never
57
+ // affected by which branch (keyed vs FIFO) handles the response.
40
58
  this.calls.push({
41
59
  kind: "completeStructured",
42
60
  model: args.model,
43
61
  prompt: args.prompt,
44
62
  ...(args.context ? { context: args.context } : {}),
45
63
  });
46
- const next = this.structuredQueue.shift();
64
+ let next;
65
+ const cardId = args.context?.cardId;
66
+ if (cardId !== undefined && cardId in this.keyedResponses) {
67
+ const entry = this.keyedResponses[cardId];
68
+ if (Array.isArray(entry)) {
69
+ // Array-value: consume one entry per call. When exhausted, fall
70
+ // through to the FIFO queue below.
71
+ if (entry.length > 0) {
72
+ next = entry.shift();
73
+ }
74
+ }
75
+ else {
76
+ // Single-value: return the same response on every call.
77
+ next = entry;
78
+ }
79
+ }
80
+ if (next === undefined) {
81
+ // FIFO fallback (existing behavior)
82
+ next = this.structuredQueue.shift();
83
+ }
47
84
  if (!next) {
48
85
  throw new Error("FakeLLMClient: no more queued structured responses (call exceeded queue)");
49
86
  }
@@ -10,6 +10,7 @@
10
10
  * the adapter never reads `process.env`. The composition root maps env vars
11
11
  * to typed constructor args.
12
12
  */
13
+ import { z } from "zod";
13
14
  import { OpenAIChatResponseSchema, splitModelId, } from "../../_vendor/ailf-core/index.js";
14
15
  import { DEFAULT_RETRY_POLICY, parseRetryAfterSeconds, runWithRetry, } from "./retry.js";
15
16
  const DEFAULT_BASE_URL = "https://api.openai.com/v1/chat/completions";
@@ -67,10 +68,25 @@ export class OpenAILLMClient {
67
68
  }
68
69
  async completeStructured(args) {
69
70
  const { modelName } = splitModelId(args.model);
71
+ // Derive the JSON Schema from the caller's Zod schema. Zod v4 natively
72
+ // emits `additionalProperties: false` on every nested z.object node —
73
+ // this is required for OpenAI strict-mode.
74
+ const jsonSchema = z.toJSONSchema(args.schema, { target: "draft-2020-12" });
75
+ // OpenAI strict-mode requires the root to be a plain object schema (no
76
+ // anyOf/oneOf/allOf at the top level). Discriminated unions produce
77
+ // anyOf at the root — callers must wrap them in a discriminator object.
78
+ assertSchemaIsObjectRoot(jsonSchema, args.model);
70
79
  const body = buildBody(modelName, args.prompt, {
71
- temperature: args.temperature,
72
- maxTokens: args.maxTokens,
73
- responseFormat: { type: "json_object" },
80
+ temperature: args.temperature ?? 0.1,
81
+ maxTokens: args.maxTokens ?? 2000,
82
+ responseFormat: {
83
+ type: "json_schema",
84
+ json_schema: {
85
+ name: args.context?.cardId ?? "structured_output",
86
+ schema: jsonSchema,
87
+ strict: true,
88
+ },
89
+ },
74
90
  });
75
91
  const data = await this.callApi(body);
76
92
  const raw = data.choices?.[0]?.message?.content;
@@ -84,6 +100,9 @@ export class OpenAILLMClient {
84
100
  catch (err) {
85
101
  throw new Error(`OpenAI structured completion returned invalid JSON for model ${args.model}: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
86
102
  }
103
+ // strict:true guarantees a valid-against-the-schema JSON document, but
104
+ // the Zod parse is still load-bearing — it brands the result as T and is
105
+ // the only contract the engine trusts (D0045 parse-don't-validate).
87
106
  const value = args.schema.parse(parsed);
88
107
  const usage = extractUsage(data.usage);
89
108
  const cost = this.computeCost(modelName, usage);
@@ -145,6 +164,36 @@ export class OpenAILLMClient {
145
164
  `cost_usd=${cost.toFixed(6)}`);
146
165
  }
147
166
  }
167
+ /**
168
+ * Assert that the JSON Schema root is a plain object type.
169
+ *
170
+ * OpenAI strict-mode requires the root schema to be `{ type: "object" }`.
171
+ * A discriminated union (`z.union([...])`) produces `{ anyOf: [...] }` at
172
+ * the root — callers must wrap the union in a discriminator object before
173
+ * passing it to `completeStructured`.
174
+ *
175
+ * Per AI-SPEC §3 Pitfall 6 + T-05-03-01: caught at request-build time to
176
+ * avoid wasting API budget on a guaranteed 400.
177
+ */
178
+ function assertSchemaIsObjectRoot(schema, modelId) {
179
+ if (typeof schema !== "object" || schema === null) {
180
+ throw new Error(`OpenAILLMClient: OpenAI strict-mode requires a single z.object at the ` +
181
+ `schema root for model ${modelId}; got non-object JSON Schema root.`);
182
+ }
183
+ const node = schema;
184
+ if (node.type !== "object") {
185
+ // Identify the kind so the error message is actionable.
186
+ const kind = "anyOf" in node
187
+ ? "z.union"
188
+ : "oneOf" in node
189
+ ? "z.discriminatedUnion"
190
+ : "allOf" in node
191
+ ? "z.intersection"
192
+ : String(node.type ?? "unknown");
193
+ throw new Error(`OpenAILLMClient: OpenAI strict-mode requires a single z.object at the ` +
194
+ `schema root; got ${kind}. Wrap the union in a discriminator object.`);
195
+ }
196
+ }
148
197
  function buildBody(modelName, prompt, opts) {
149
198
  const body = {
150
199
  model: modelName,
@@ -55,9 +55,13 @@ interface ContentLakeCanonicalDoc {
55
55
  sectionSlug?: string;
56
56
  slug?: string;
57
57
  }
58
+ interface ContentLakeCriterion {
59
+ id?: string;
60
+ text?: string;
61
+ }
58
62
  /** Assertion shape from the Content Lake (mirrors the Studio schema). */
59
63
  interface ContentLakeAssertion {
60
- criteria?: string[];
64
+ criteria?: ContentLakeCriterion[];
61
65
  template?: string;
62
66
  threshold?: number;
63
67
  type?: string;
@@ -73,7 +73,13 @@ const TASKS_QUERY = /* groq */ `
73
73
  perspective,
74
74
  reason
75
75
  },
76
- "assertions": coalesce(assertions, assert),
76
+ "assertions": coalesce(assertions, assert)[] {
77
+ type, template, weight, value, threshold,
78
+ "criteria": criteria[] {
79
+ "id": coalesce(id.current, _key),
80
+ "text": coalesce(text, @)
81
+ }
82
+ },
77
83
  rawAssert,
78
84
  baseline,
79
85
  tags,
@@ -256,8 +262,28 @@ function mapAssertions(raw) {
256
262
  .filter((a) => !!a.type)
257
263
  .map((a) => {
258
264
  if (a.type === "llm-rubric" && a.template && a.criteria) {
265
+ // Tighten the runtime contract: the GROQ projection's
266
+ // `coalesce(text, @)` falls through to the entire criterion
267
+ // element when `text` is missing, so a partial legacy criterion
268
+ // like `{_key: "abc"}` arrives here as `{ id: "abc", text: {...} }`
269
+ // — `text` set to the whole `@` object. Explicit type checks
270
+ // drop those with a diagnostic, instead of letting the non-string
271
+ // `text` propagate until the outer ContentLakeAuthorableTaskSchema
272
+ // parse fails deep inside the assertions array (noisy diagnostic).
259
273
  return {
260
- criteria: a.criteria,
274
+ criteria: a.criteria
275
+ .filter((c) => {
276
+ if (!c)
277
+ return false;
278
+ const idOk = typeof c.id === "string" && c.id.length > 0;
279
+ const textOk = typeof c.text === "string" && c.text.length > 0;
280
+ if (!idOk || !textOk) {
281
+ console.warn(`[ContentLakeTaskSource] dropping malformed criterion: ${JSON.stringify(c).slice(0, 100)}`);
282
+ return false;
283
+ }
284
+ return true;
285
+ })
286
+ .map((c) => ({ id: c.id, text: c.text })),
261
287
  template: a.template,
262
288
  type: "llm-rubric",
263
289
  ...(a.weight !== undefined ? { weight: a.weight } : {}),