npm - @sanity/ailf - Versions diffs - 4.6.0 → 5.0.0 - Mend

@sanity/ailf 4.6.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

package/canonical/grader-references/agent-harness-tools.yaml +42 -0
package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
package/canonical/grader-references/mcp-server-spec.yaml +51 -0
package/canonical/grader-references/portable-text.yaml +48 -0
package/config/rubrics.ts +38 -2
package/dist/_vendor/ailf-core/artifact-registry.d.ts +60 -2
package/dist/_vendor/ailf-core/artifact-registry.js +288 -7
package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
package/dist/_vendor/ailf-core/examples/index.js +146 -47
package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
package/dist/_vendor/ailf-core/schemas/index.js +9 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +40 -0
package/dist/_vendor/ailf-core/services/diagnosis/registry.js +25 -0
package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +19 -0
package/dist/_vendor/ailf-core/services/diagnosis-runner.js +19 -0
package/dist/_vendor/ailf-core/services/index.d.ts +2 -0
package/dist/_vendor/ailf-core/services/index.js +5 -0
package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
package/dist/_vendor/ailf-core/types/attribution.js +18 -0
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
package/dist/_vendor/ailf-core/types/confidence.d.ts +1 -1
package/dist/_vendor/ailf-core/types/confidence.js +7 -0
package/dist/_vendor/ailf-core/types/diagnosis.d.ts +169 -0
package/dist/_vendor/ailf-core/types/diagnosis.js +17 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
package/dist/_vendor/ailf-core/types/index.d.ts +80 -29
package/dist/_vendor/ailf-core/types/index.js +15 -1
package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
package/dist/adapters/api-client/build-request.d.ts +1 -0
package/dist/adapters/api-client/build-request.js +3 -0
package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
package/dist/adapters/attribution/index.d.ts +9 -0
package/dist/adapters/attribution/index.js +8 -0
package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
package/dist/adapters/config-sources/file-config-adapter.js +1 -0
package/dist/adapters/grader-outputs/index.d.ts +10 -0
package/dist/adapters/grader-outputs/index.js +8 -0
package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
package/dist/adapters/grader-outputs/legacy/index.js +10 -0
package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
package/dist/adapters/index.d.ts +3 -0
package/dist/adapters/index.js +4 -0
package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
package/dist/adapters/task-sources/repo-schemas.d.ts +79 -11
package/dist/adapters/task-sources/repo-schemas.js +19 -2
package/dist/commands/calculate-scores.js +1 -1
package/dist/commands/explain-handler.js +1 -1
package/dist/commands/lookup-doc.d.ts +1 -1
package/dist/commands/lookup-doc.js +3 -3
package/dist/commands/pipeline-action.d.ts +6 -0
package/dist/commands/pipeline-action.js +2 -0
package/dist/commands/remote-pipeline.js +1 -0
package/dist/composition-root.d.ts +36 -0
package/dist/composition-root.js +48 -0
package/dist/config/rubrics.ts +38 -2
package/dist/grader/agent-harness.d.ts +14 -0
package/dist/grader/agent-harness.js +17 -0
package/dist/grader/common.d.ts +17 -0
package/dist/grader/common.js +21 -0
package/dist/grader/index.d.ts +38 -0
package/dist/grader/index.js +75 -0
package/dist/grader/knowledge-probe.d.ts +14 -0
package/dist/grader/knowledge-probe.js +18 -0
package/dist/grader/literacy.d.ts +13 -0
package/dist/grader/literacy.js +17 -0
package/dist/grader/mcp.d.ts +14 -0
package/dist/grader/mcp.js +18 -0
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/build-step-sequence.js +5 -0
package/dist/orchestration/steps/calculate-scores-step.js +23 -1
package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
package/dist/orchestration/steps/compute-attribution-step.js +279 -0
package/dist/orchestration/steps/gap-analysis-step.js +35 -7
package/dist/orchestration/steps/index.d.ts +1 -0
package/dist/orchestration/steps/index.js +1 -0
package/dist/pipeline/attribution.d.ts +15 -0
package/dist/pipeline/attribution.js +18 -9
package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
package/dist/pipeline/borderline-consensus-runner.js +124 -0
package/dist/pipeline/borderline-detector.d.ts +24 -0
package/dist/pipeline/borderline-detector.js +26 -0
package/dist/pipeline/calculate-scores.d.ts +114 -3
package/dist/pipeline/calculate-scores.js +426 -24
package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
package/dist/pipeline/compiler/literacy-bridge.js +35 -17
package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
package/dist/pipeline/compiler/rubric-resolution.js +9 -1
package/dist/pipeline/compute-attribution.d.ts +80 -0
package/dist/pipeline/compute-attribution.js +196 -0
package/dist/pipeline/failure-modes.d.ts +52 -17
package/dist/pipeline/failure-modes.js +178 -117
package/dist/pipeline/map-request-to-config.js +1 -0
package/package.json +6 -4

package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts ADDED Viewed

@@ -0,0 +1,36 @@
+/**
+ * confidence-schema.ts — shared Zod schema for the D0049 `Confidence` triple.
+ *
+ * Authored ONCE here so adapter schemas under
+ * `packages/eval/src/adapters/grader-outputs/` and
+ * `packages/eval/src/adapters/attribution/` import a single shared schema
+ * fragment instead of redeclaring the shape inline. The schema asserts
+ * `satisfies z.ZodType<Confidence>` against the domain type in
+ * `packages/core/src/types/confidence.ts` so drift is a build error.
+ *
+ * NON-BOUNDARY HELPER: this file lives outside the D0045 SCAN_ROOTS gate
+ * by intent — it is a reusable schema fragment, not a trust boundary.
+ * Consumers import via the pinned subpath export
+ * `@sanity/ailf-core/schemas` (declared in `packages/core/package.json`),
+ * NOT through the top-level barrel — that pin is the
+ * single legal access path so all adapter sites use the same specifier.
+ *
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
+ * @see docs/decisions/D0049-shared-confidence-contract.md
+ */
+import { z } from "zod";
+/**
+ * Shared schema for {@link Confidence}. The `derivation` field is the
+ * open `ConfidenceDerivation` tag; we accept any non-empty string so
+ * future emitters can mint their own identifiers without editing this
+ * package (matches `isConfidence`'s runtime guard).
+ */
+export declare const ConfidenceSchema: z.ZodObject<{
+    level: z.ZodEnum<{
+        low: "low";
+        medium: "medium";
+        high: "high";
+    }>;
+    signalsPresent: z.ZodNumber;
+    derivation: z.ZodString;
+}, z.core.$strip>;

package/dist/_vendor/ailf-core/schemas/confidence-schema.js ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * confidence-schema.ts — shared Zod schema for the D0049 `Confidence` triple.
+ *
+ * Authored ONCE here so adapter schemas under
+ * `packages/eval/src/adapters/grader-outputs/` and
+ * `packages/eval/src/adapters/attribution/` import a single shared schema
+ * fragment instead of redeclaring the shape inline. The schema asserts
+ * `satisfies z.ZodType<Confidence>` against the domain type in
+ * `packages/core/src/types/confidence.ts` so drift is a build error.
+ *
+ * NON-BOUNDARY HELPER: this file lives outside the D0045 SCAN_ROOTS gate
+ * by intent — it is a reusable schema fragment, not a trust boundary.
+ * Consumers import via the pinned subpath export
+ * `@sanity/ailf-core/schemas` (declared in `packages/core/package.json`),
+ * NOT through the top-level barrel — that pin is the
+ * single legal access path so all adapter sites use the same specifier.
+ *
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
+ * @see docs/decisions/D0049-shared-confidence-contract.md
+ */
+import { z } from "zod";
+/**
+ * Shared schema for {@link Confidence}. The `derivation` field is the
+ * open `ConfidenceDerivation` tag; we accept any non-empty string so
+ * future emitters can mint their own identifiers without editing this
+ * package (matches `isConfidence`'s runtime guard).
+ */
+export const ConfidenceSchema = z.object({
+    level: z.enum(["high", "medium", "low"]),
+    signalsPresent: z.number().int().nonnegative(),
+    derivation: z.string().min(1),
+});

package/dist/_vendor/ailf-core/schemas/eval-config.d.ts CHANGED Viewed

@@ -41,6 +41,7 @@ export declare const EvalConfigSchema: z.ZodObject<{
     execution: z.ZodOptional<z.ZodObject<{
         concurrency: z.ZodOptional<z.ZodNumber>;
         graderReplications: z.ZodOptional<z.ZodNumber>;
+        borderlineReplications: z.ZodOptional<z.ZodNumber>;
         gapAnalysis: z.ZodOptional<z.ZodBoolean>;
         apiUrl: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;

package/dist/_vendor/ailf-core/schemas/eval-config.js CHANGED Viewed

@@ -85,15 +85,19 @@ export const EvalConfigSchema = z
      * `concurrency`, `gapAnalysis`, and `graderReplications` fields and adds
      * `apiUrl` to the same group.
      *
-     * - `concurrency`         — max parallel API calls
-     * - `graderReplications`  — grader consistency replications
-     * - `gapAnalysis`         — enable failure-mode + impact analysis (default true)
-     * - `apiUrl`              — AILF API base URL (default https://ailf-api.sanity.build)
+     * - `concurrency`             — max parallel API calls
+     * - `graderReplications`      — grader consistency replications
+     * - `borderlineReplications`  — replications per borderline judgment
+     *                               for the GRAD-04 intra-grader consensus
+     *                               pass (default 3 in composition-root)
+     * - `gapAnalysis`             — enable failure-mode + impact analysis (default true)
+     * - `apiUrl`                  — AILF API base URL (default https://ailf-api.sanity.build)
      */
     execution: z
         .object({
         concurrency: z.number().int().positive().optional(),
         graderReplications: z.number().int().positive().optional(),
+        borderlineReplications: z.number().int().positive().optional(),
         gapAnalysis: z.boolean().optional(),
         apiUrl: z.string().url().optional(),
     })

package/dist/_vendor/ailf-core/schemas/index.d.ts CHANGED Viewed

@@ -19,3 +19,5 @@ export * from "./schedules.js";
 export * from "./sinks.js";
 export * from "./symbol-preflight-report.js";
 export * from "./test-budgets.js";
+export { ConfidenceSchema } from "./confidence-schema.js";
+export { brandedString } from "./branded-string.js";

package/dist/_vendor/ailf-core/schemas/index.js CHANGED Viewed

@@ -19,3 +19,12 @@ export * from "./schedules.js";
 export * from "./sinks.js";
 export * from "./symbol-preflight-report.js";
 export * from "./test-budgets.js";
+// Phase 1 Plan 02 — shared schema fragment for D0049 Confidence.
+// Named re-export only (W0124 / D0045) and pinned-subpath access path
+// `@sanity/ailf-core/schemas` for adapter consumers.
+export { ConfidenceSchema } from "./confidence-schema.js";
+// Phase 1 Plan 03 — single audited cast site for `Brand<string, T>`
+// schemas. Adapters MUST route branded-field declarations through this
+// helper instead of replicating `as unknown as z.ZodType<…>` at each
+// schema author site (project rule: no `as` on `unknown`).
+export { brandedString } from "./branded-string.js";

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts CHANGED Viewed

@@ -48,6 +48,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
         "with-docs": "with-docs";
     }>>;
     graderReplications: z.ZodOptional<z.ZodNumber>;
+    borderlineReplications: z.ZodOptional<z.ZodNumber>;
     headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
     inlineTasks: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
     jobId: z.ZodOptional<z.ZodString>;

package/dist/_vendor/ailf-core/schemas/pipeline-request.js CHANGED Viewed

@@ -114,6 +114,7 @@ export const PipelineRequestSchema = z.object({
      */
     graderContext: z.enum(["rubric-only", "with-docs"]).optional(),
     graderReplications: z.number().int().positive().optional(),
+    borderlineReplications: z.number().int().positive().optional(),
     headers: z.record(z.string(), z.string()).optional(),
     inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),
     jobId: z.string().optional(),

package/dist/_vendor/ailf-core/schemas/pipeline.d.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import { z } from "zod";
 export declare const RubricTemplateSchema: z.ZodObject<{
     criteria_label: z.ZodOptional<z.ZodNullable<z.ZodString>>;
     dimension: z.ZodOptional<z.ZodString>;
+    failureModes: z.ZodOptional<z.ZodArray<z.ZodString>>;
     header: z.ZodString;
     scale: z.ZodArray<z.ZodString>;
 }, z.core.$strip>;
@@ -52,6 +53,7 @@ export declare const RubricConfigSchema: z.ZodObject<{
     templates: z.ZodRecord<z.ZodString, z.ZodObject<{
         criteria_label: z.ZodOptional<z.ZodNullable<z.ZodString>>;
         dimension: z.ZodOptional<z.ZodString>;
+        failureModes: z.ZodOptional<z.ZodArray<z.ZodString>>;
         header: z.ZodString;
         scale: z.ZodArray<z.ZodString>;
     }, z.core.$strip>>;
@@ -112,7 +114,10 @@ export type FeatureRegistry = z.infer<typeof FeatureRegistrySchema>;
  * and provides task-specific criteria.
  */
 declare const TemplatedLlmRubricAssertSchema: z.ZodObject<{
-    criteria: z.ZodArray<z.ZodString>;
+    criteria: z.ZodArray<z.ZodObject<{
+        id: z.ZodString;
+        text: z.ZodString;
+    }, z.core.$strip>>;
     template: z.ZodString;
     type: z.ZodLiteral<"llm-rubric">;
     weight: z.ZodOptional<z.ZodNumber>;
@@ -129,7 +134,10 @@ export type TemplatedLlmRubricAssert = z.infer<typeof TemplatedLlmRubricAssertSc
  * is gone, but union is more flexible for future additions).
  */
 export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
-    criteria: z.ZodArray<z.ZodString>;
+    criteria: z.ZodArray<z.ZodObject<{
+        id: z.ZodString;
+        text: z.ZodString;
+    }, z.core.$strip>>;
     template: z.ZodString;
     type: z.ZodLiteral<"llm-rubric">;
     weight: z.ZodOptional<z.ZodNumber>;
@@ -174,7 +182,10 @@ export type CanonicalDoc = z.infer<typeof CanonicalDocSchema>;
  */
 export declare const SingleTaskSchema: z.ZodObject<{
     assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         template: z.ZodString;
         type: z.ZodLiteral<"llm-rubric">;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -233,7 +244,10 @@ export type SingleTask = z.infer<typeof SingleTaskSchema>;
  */
 export declare const LegacyTaskSchema: z.ZodObject<{
     assert: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         template: z.ZodString;
         type: z.ZodLiteral<"llm-rubric">;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -269,7 +283,10 @@ export type LegacyTask = z.infer<typeof LegacyTaskSchema>;
  */
 export declare const TaskEntrySchema: z.ZodUnion<readonly [z.ZodObject<{
     assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         template: z.ZodString;
         type: z.ZodLiteral<"llm-rubric">;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -321,7 +338,10 @@ export declare const TaskEntrySchema: z.ZodUnion<readonly [z.ZodObject<{
     }, z.core.$loose>;
 }, z.core.$strip>, z.ZodObject<{
     assert: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         template: z.ZodString;
         type: z.ZodLiteral<"llm-rubric">;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -355,7 +375,10 @@ export type TaskEntryParsed = z.infer<typeof TaskEntrySchema>;
  */
 export declare const TaskFileSchema: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
     assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         template: z.ZodString;
         type: z.ZodLiteral<"llm-rubric">;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -407,7 +430,10 @@ export declare const TaskFileSchema: z.ZodArray<z.ZodUnion<readonly [z.ZodObject
     }, z.core.$loose>;
 }, z.core.$strip>, z.ZodObject<{
     assert: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
-        criteria: z.ZodArray<z.ZodString>;
+        criteria: z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            text: z.ZodString;
+        }, z.core.$strip>>;
         template: z.ZodString;
         type: z.ZodLiteral<"llm-rubric">;
         weight: z.ZodOptional<z.ZodNumber>;

package/dist/_vendor/ailf-core/schemas/pipeline.js CHANGED Viewed

@@ -26,6 +26,14 @@ export const RubricTemplateSchema = z.object({
         .min(1, "criteria_label must be a non-empty string")
         .nullish(),
     dimension: z.string().min(1).optional(),
+    /**
+     * Plan 03-02 — per-dimension legal failure-mode list. When present and
+     * non-empty, the runtime rubric assembler announces the legal modes to
+     * the grader before the structured-shape footer (Plan 03-01).
+     * Stamped at config-load time by `failureModesForDimension(dimension)`
+     * from `packages/eval/src/grader/index.ts`.
+     */
+    failureModes: z.array(z.string().min(1)).optional(),
     header: z.string().min(1, "header must be a non-empty string"),
     scale: z
         .array(z.string().min(1))
@@ -118,13 +126,27 @@ export const FeatureRegistrySchema = z.object({
 // ---------------------------------------------------------------------------
 // Assertion schemas — one per Promptfoo assertion type
 // ---------------------------------------------------------------------------
+// TODO(GRAD-01 follow-up): This schema duplicates
+// packages/eval/src/adapters/task-sources/repo-schemas.ts:TemplatedAssertionSchema.
+// Retiring requires reverse-extracting the canonical schema into
+// packages/core/src/schemas/ (D0048 prevents importing from packages/eval).
+// Out of Phase 2 scope; tracked separately.
+//
+// The `satisfies z.ZodType<CriterionRef>` clause asserts this duplicate
+// stays shape-compatible with the canonical domain type in
+// `@sanity/ailf-core` (D0045). If a future edit adds a third field to one
+// schema and not the other, this build error catches the drift.
+const CriterionRefShape = z.object({
+    id: z.string().min(1, "id must be a non-empty slug"),
+    text: z.string().min(1, "text must be a non-empty string"),
+});
 /**
  * Templated llm-rubric assertion — references a rubric template by key
  * and provides task-specific criteria.
  */
 const TemplatedLlmRubricAssertSchema = z.object({
     criteria: z
-        .array(z.string().min(1))
+        .array(CriterionRefShape)
         .min(1, "criteria must have at least one entry"),
     template: z.string().min(1, "template must be a non-empty string"),
     type: z.literal("llm-rubric"),

package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts ADDED Viewed

@@ -0,0 +1,40 @@
+/**
+ * Diagnosis card registry — placeholder home for Phase 5 cards.
+ *
+ * Phase 5 cards declare:
+ *
+ *   export const card = {
+ *     type, version, schema, generate
+ *   } satisfies CardDefinition
+ *
+ * The compound `cardVersion` (VER-01 / D-02) is built from per-card
+ * `version` by sorting `${type}@${version}` ascending and joining with
+ * `,`. Phase 1 lands the empty registry; Phase 5 registers cards via
+ * the composition root, not by mutating this binding.
+ *
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
+ * @see docs/decisions/D0048-engine-homes-for-cli-api-parity.md
+ * @see .planning/phases/01-foundation-contracts-cross-cutting-schemas/01-CONTEXT.md (D-02, D-08)
+ */
+import type { z } from "zod";
+import type { CardType, DiagnosisCard } from "../../types/diagnosis.js";
+/**
+ * Per-card definition. `schema` is the per-card body parser; `generate`
+ * is the runner-invoked builder. Phase 5 fills in the
+ * `report+attribution+llm` parameter list when card files land — Phase 1
+ * keeps the signature minimal so the registry compiles before any cards
+ * exist.
+ */
+export interface CardDefinition<TBody = unknown> {
+    readonly type: CardType;
+    readonly version: string;
+    readonly schema: z.ZodType<TBody>;
+    readonly generate: () => Promise<DiagnosisCard>;
+}
+/**
+ * Phase 1: empty entrypoint. Phase 5 cards register here through the
+ * composition root. The exported binding is a `ReadonlyMap` so
+ * downstream consumers cannot mutate it (would re-introduce the vitest
+ * worker-leak hazard).
+ */
+export declare const cardRegistry: ReadonlyMap<CardType, CardDefinition>;

package/dist/_vendor/ailf-core/services/diagnosis/registry.js ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Diagnosis card registry — placeholder home for Phase 5 cards.
+ *
+ * Phase 5 cards declare:
+ *
+ *   export const card = {
+ *     type, version, schema, generate
+ *   } satisfies CardDefinition
+ *
+ * The compound `cardVersion` (VER-01 / D-02) is built from per-card
+ * `version` by sorting `${type}@${version}` ascending and joining with
+ * `,`. Phase 1 lands the empty registry; Phase 5 registers cards via
+ * the composition root, not by mutating this binding.
+ *
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
+ * @see docs/decisions/D0048-engine-homes-for-cli-api-parity.md
+ * @see .planning/phases/01-foundation-contracts-cross-cutting-schemas/01-CONTEXT.md (D-02, D-08)
+ */
+/**
+ * Phase 1: empty entrypoint. Phase 5 cards register here through the
+ * composition root. The exported binding is a `ReadonlyMap` so
+ * downstream consumers cannot mutate it (would re-introduce the vitest
+ * worker-leak hazard).
+ */
+export const cardRegistry = new Map();

package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Diagnosis runner — engine entry point (D0048).
+ *
+ * Phase 1 lands the version constant only; the runner factory + cache
+ * lookup land in Phase 5.
+ *
+ * @see docs/decisions/D0048-engine-homes-for-cli-api-parity.md
+ * @see .planning/phases/01-foundation-contracts-cross-cutting-schemas/01-CONTEXT.md (D-02)
+ */
+/**
+ * Bumped when the runner's selection logic, prompt orchestration, or
+ * card-set composition changes in a way that should invalidate cached
+ * Diagnoses (VER-01 / D-02). Co-located here so the cache-invalidation
+ * contract test reads the canonical value.
+ *
+ * `export const` (never `export let`) — module-scope mutables leak
+ * across vitest workers (cross-cutting hazard #2).
+ */
+export declare const diagnosisVersion = "0.1.0";

package/dist/_vendor/ailf-core/services/diagnosis-runner.js ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Diagnosis runner — engine entry point (D0048).
+ *
+ * Phase 1 lands the version constant only; the runner factory + cache
+ * lookup land in Phase 5.
+ *
+ * @see docs/decisions/D0048-engine-homes-for-cli-api-parity.md
+ * @see .planning/phases/01-foundation-contracts-cross-cutting-schemas/01-CONTEXT.md (D-02)
+ */
+/**
+ * Bumped when the runner's selection logic, prompt orchestration, or
+ * card-set composition changes in a way that should invalidate cached
+ * Diagnoses (VER-01 / D-02). Co-located here so the cache-invalidation
+ * contract test reads the canonical value.
+ *
+ * `export const` (never `export let`) — module-scope mutables leak
+ * across vitest workers (cross-cutting hazard #2).
+ */
+export const diagnosisVersion = "0.1.0";

package/dist/_vendor/ailf-core/services/index.d.ts CHANGED Viewed

@@ -13,3 +13,5 @@ export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskS
 export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";
 export { buildSlimReportSummary } from "./slim-report-summary.js";
 export { reportToMarkdown, type RenderableReport, } from "./report-to-markdown.js";
+export { diagnosisVersion } from "./diagnosis-runner.js";
+export { cardRegistry, type CardDefinition } from "./diagnosis/registry.js";

package/dist/_vendor/ailf-core/services/index.js CHANGED Viewed

@@ -13,3 +13,8 @@ export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskS
 export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";
 export { buildSlimReportSummary } from "./slim-report-summary.js";
 export { reportToMarkdown, } from "./report-to-markdown.js";
+// ---------------------------------------------------------------------------
+// Actionability ladder Phase 1 — diagnosis runner + card registry
+// ---------------------------------------------------------------------------
+export { diagnosisVersion } from "./diagnosis-runner.js";
+export { cardRegistry } from "./diagnosis/registry.js";

package/dist/_vendor/ailf-core/services/report-to-markdown.js CHANGED Viewed

@@ -493,8 +493,9 @@ function renderLowScoringJudgments(md, judgments) {
                     .join("\n");
                 md.line(reasonLines);
                 md.blank();
-                if (j.canonicalDocs && j.canonicalDocs.length > 0) {
-                    const docList = j.canonicalDocs.map((d) => `\`${d.slug}\``).join(", ");
+                const jDocs = j.contextDocs ?? j.canonicalDocs;
+                if (jDocs && jDocs.length > 0) {
+                    const docList = jDocs.map((d) => `\`${d.slug}\``).join(", ");
                     md.line(`*Expected docs: ${docList}*`);
                     md.blank();
                 }

package/dist/_vendor/ailf-core/types/attribution.d.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Attribution core domain types — canonical shapes for the per-document
+ * attribution ensemble (Doc 04).
+ *
+ * Phase 1 lands the type carriers; Phase 4 lands the compute step. The
+ * Zod schemas in `packages/eval/src/adapters/attribution/` assert
+ * `satisfies z.ZodType<...>` against these types.
+ *
+ * Doc identity is referenced by `documentId` (D0052), not by `slug` —
+ * `slug` is retained as a human-readable annotation only. The
+ * resolvable-set check is carried as a separate
+ * `hallucinationCheckedAgainst: string[]` field (Pitfall #11).
+ *
+ * @see docs/decisions/D0049-shared-confidence-contract.md
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
+ */
+import type { Confidence } from "./confidence.js";
+/**
+ * Per-document attribution score for one judgment. The `signals` sub-record
+ * carries each ensemble member's contribution; the top-level `score` is
+ * the post-weighting composite.
+ *
+ * `documentId` is the canonical D0052 reference; `slug` is a
+ * human-readable annotation only and must not be relied on for identity.
+ */
+export interface DocAttribution {
+    /** Canonical D0052 document ref (id, not slug). */
+    documentId: string;
+    /** Optional human-readable annotation. Never the identity. */
+    slug?: string;
+    /** Composite attribution score in [0, 1]. */
+    score: number;
+    /** Per-ensemble-member contributions before weighting. */
+    signals: {
+        citation?: number;
+        canonical?: number;
+        retrieved?: number;
+    };
+    /** Shared D0049 confidence triple. */
+    confidence: Confidence;
+}
+/**
+ * Per-judgment attribution carrier. Emitted by Phase 4's
+ * `ComputeAttributionStep`; persisted at
+ * `runs/{runId}/attribution/{entryKey}.json`.
+ *
+ * `hallucinationCheckedAgainst` is the resolvable-set used at compute
+ * time — required (not optional) so consumers can audit citation
+ * grounding without re-deriving the set. Per Pitfall #11 the canonical
+ * task field is `contextDocs`; do not invent `expectedDocs` /
+ * `usedDocs` synonyms.
+ */
+export interface JudgmentAttribution {
+    /** D0052 granular ref to the underlying grader judgment. */
+    judgmentRef: string;
+    taskId: string;
+    modelId: string;
+    dimension: string;
+    attributions: DocAttribution[];
+    /** Resolvable-set used at compute time (Pitfall #11). */
+    hallucinationCheckedAgainst: string[];
+}
+/**
+ * Run-scoped attribution metadata. Persisted alongside the per-entry
+ * attribution objects so consumers can interpret signal-weighting and
+ * embedding choices without re-loading the calibration set.
+ *
+ * `embeddingModel` is REQUIRED (Pitfall #6) — silently downgrading to a
+ * default has caused regressions in adjacent codebases.
+ */
+export interface AttributionMeta {
+    ensembleVersion: string;
+    /** Embedding model identifier — REQUIRED (Pitfall #6). */
+    embeddingModel: string;
+    calibrationSetVersion?: string;
+    weights: {
+        citation: number;
+        canonical: number;
+        retrieved: number;
+    };
+}

package/dist/_vendor/ailf-core/types/attribution.js ADDED Viewed

@@ -0,0 +1,18 @@
+/**
+ * Attribution core domain types — canonical shapes for the per-document
+ * attribution ensemble (Doc 04).
+ *
+ * Phase 1 lands the type carriers; Phase 4 lands the compute step. The
+ * Zod schemas in `packages/eval/src/adapters/attribution/` assert
+ * `satisfies z.ZodType<...>` against these types.
+ *
+ * Doc identity is referenced by `documentId` (D0052), not by `slug` —
+ * `slug` is retained as a human-readable annotation only. The
+ * resolvable-set check is carried as a separate
+ * `hallucinationCheckedAgainst: string[]` field (Pitfall #11).
+ *
+ * @see docs/decisions/D0049-shared-confidence-contract.md
+ * @see docs/decisions/D0052-judgment-ref-granularity.md
+ * @see docs/design-docs/actionability-ladder/04-per-document-attribution-ensemble.md
+ */
+export {};

package/dist/_vendor/ailf-core/types/branded-ids.d.ts CHANGED Viewed

@@ -29,6 +29,8 @@ declare const __brand: unique symbol;
 export type Brand<T, B extends string> = T & {
     readonly [__brand]: B;
 };
+/** Unique identifier for a grader judgment (D0052 granular). */
+export type JudgmentId = Brand<string, "JudgmentId">;
 /** Unique identifier for an evaluation task */
 export type TaskId = Brand<string, "TaskId">;
 /** URL-safe slug for a task (derived from title) */
@@ -74,7 +76,7 @@ export type ArtifactId = Brand<string, "ArtifactId">;
  * per-mode (e.g. `failureModes`, one entry per classified failure category —
  * D0033 M7, W0051 Slice 2).
  */
-export type AssociationAxis = "run" | "mode" | "task" | "model" | "grader" | "trial" | "category";
+export type AssociationAxis = "run" | "mode" | "task" | "model" | "grader" | "trial" | "category" | "report";
 /**
  * The sanitized, filename-safe identifier for a single per-entry artifact
  * object. Produced by `ArtifactDescriptor.formatEntryKey` and parsed by
@@ -178,4 +180,27 @@ export declare function providerId(raw: string): Result<ProviderId, IdValidation
  * Valid format: alphanumeric + hyphens, 1–128 characters.
  */
 export declare function fixtureId(raw: string): Result<FixtureId, IdValidationError>;
+/**
+ * Parse a raw string into a `JudgmentId`.
+ *
+ * See `JUDGMENT_ID_RE` for the accepted formats.
+ */
+export declare function judgmentId(raw: string): Result<JudgmentId, IdValidationError>;
+/**
+ * Generate a deterministic `JudgmentId` for a synthesized fall-back
+ * judgment. Salting with `runId` (when supplied) makes the id unique
+ * per-run so consumers' `(taskId, modelId, dimension)` dedup key
+ * doesn't collide across re-runs of the same task — every run writes
+ * fresh ids that still encode the natural composite key.
+ *
+ * When `runId` is absent the salt collapses to `nosalt`, preserving the
+ * legacy "deterministic across runs" shape for callers that explicitly
+ * want it (e.g. unit tests that assert the exact id string).
+ */
+export declare function generateJudgmentId(input: {
+    taskId: string;
+    modelId: string;
+    dimension: string;
+    runId?: RunId | string;
+}): JudgmentId;
 export {};