npm - @sanity/ailf - Versions diffs - 7.0.1 → 7.1.2 - Mend

@sanity/ailf 7.0.1 → 7.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/config/rubrics.ts +12 -13
package/dist/_vendor/ailf-core/ports/context.d.ts +45 -3
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +10 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +1 -1
package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +9 -1
package/dist/_vendor/ailf-core/schemas/branded-string.js +16 -6
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +7 -0
package/dist/_vendor/ailf-core/schemas/report.d.ts +12 -0
package/dist/_vendor/ailf-core/schemas/report.js +2 -0
package/dist/_vendor/ailf-core/schemas/team.d.ts +22 -0
package/dist/_vendor/ailf-core/schemas/team.js +63 -0
package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +51 -0
package/dist/_vendor/ailf-core/types/index.d.ts +8 -1
package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +17 -0
package/dist/_vendor/ailf-core/types/team.d.ts +65 -0
package/dist/_vendor/ailf-core/types/team.js +1 -0
package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -1
package/dist/_vendor/ailf-shared/document-ref.js +23 -1
package/dist/_vendor/ailf-shared/eval-modes.d.ts +2 -0
package/dist/_vendor/ailf-shared/eval-modes.js +5 -0
package/dist/_vendor/ailf-shared/event-types.d.ts +15 -0
package/dist/_vendor/ailf-shared/event-types.js +23 -0
package/dist/_vendor/ailf-shared/generated/help-content.js +2 -2
package/dist/_vendor/ailf-shared/index.d.ts +5 -3
package/dist/_vendor/ailf-shared/index.js +5 -2
package/dist/_vendor/ailf-shared/member-roles.d.ts +16 -0
package/dist/_vendor/ailf-shared/member-roles.js +16 -0
package/dist/_vendor/ailf-shared/owner-teams.d.ts +19 -0
package/dist/_vendor/ailf-shared/owner-teams.js +26 -6
package/dist/_vendor/ailf-shared/run-context.d.ts +8 -1
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +15 -1
package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +65 -1
package/dist/adapters/grader-outputs/promptfoo-grader-output.js +35 -0
package/dist/adapters/task-sources/changed-docs-filter.d.ts +12 -0
package/dist/adapters/task-sources/changed-docs-filter.js +30 -0
package/dist/adapters/task-sources/content-lake-task-source.js +14 -8
package/dist/adapters/task-sources/repo-task-source.js +2 -1
package/dist/commands/pipeline-action.d.ts +4 -3
package/dist/commands/pipeline-action.js +7 -5
package/dist/commands/run.js +2 -2
package/dist/config/rubrics.ts +12 -13
package/dist/job-store.d.ts +18 -0
package/dist/job-store.js +34 -0
package/dist/orchestration/build-app-context.js +8 -1
package/dist/orchestration/pipeline-orchestrator.js +46 -1
package/dist/orchestration/steps/compare-step.d.ts +7 -0
package/dist/orchestration/steps/compare-step.js +59 -23
package/dist/orchestration/steps/fetch-docs-step.js +3 -0
package/dist/orchestration/steps/finalize-run-step.js +2 -0
package/dist/orchestration/steps/gap-analysis-step.js +9 -8
package/dist/orchestration/steps/generate-configs-step.d.ts +32 -1
package/dist/orchestration/steps/generate-configs-step.js +47 -13
package/dist/orchestration/steps/grader-consistency-step.js +11 -0
package/dist/orchestration/steps/publish-report-step.d.ts +12 -1
package/dist/orchestration/steps/publish-report-step.js +36 -8
package/dist/pipeline/cache-hit-restore.d.ts +14 -1
package/dist/pipeline/cache-hit-restore.js +17 -0
package/dist/pipeline/calculate-scores.d.ts +13 -1
package/dist/pipeline/calculate-scores.js +123 -29
package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +7 -2
package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +13 -4
package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +1 -1
package/dist/pipeline/compiler/provider-assembler.d.ts +15 -1
package/dist/pipeline/compiler/provider-assembler.js +16 -3
package/dist/pipeline/failure-modes.d.ts +20 -10
package/dist/pipeline/failure-modes.js +84 -15
package/dist/pipeline/map-request-to-config.js +2 -0
package/dist/pipeline/normalize-mode.d.ts +1 -1
package/dist/pipeline/normalize-mode.js +2 -0
package/dist/pipeline/run-context.d.ts +16 -1
package/dist/pipeline/run-context.js +12 -1
package/dist/pipeline/validate.d.ts +8 -4
package/dist/pipeline/validate.js +8 -18
package/dist/report-store.d.ts +14 -1
package/dist/report-store.js +32 -0
package/dist/sanity/client.js +2 -2
package/dist/sanity/queries.d.ts +1 -1
package/dist/sanity/queries.js +1 -0
package/dist/sources.js +40 -2
package/package.json +1 -1

package/dist/pipeline/run-context.d.ts CHANGED Viewed

@@ -13,7 +13,7 @@
  * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
  */
 import { type Logger, type RunContext } from "../_vendor/ailf-core/index.d.ts";
-import { type RunClassification, type RunExecutor, type RunExecutorSurface, type RunHost, type RunLineage, type RunOwner, type RunTool } from "../_vendor/ailf-shared/index.d.ts";
+import { type LiteracyVariant, type RunClassification, type RunExecutor, type RunExecutorSurface, type RunHost, type RunLineage, type RunOwner, type RunTool } from "../_vendor/ailf-shared/index.d.ts";
 import type { ResolvedSourceConfig } from "../sources.js";
 import type { EvalMode } from "./types.js";
 /**
@@ -74,6 +74,21 @@ export interface RunContextInput {
     source: ResolvedSourceConfig;
     /** Specific task IDs evaluated (if scoped) */
     taskIds?: string[];
+    /**
+     * Literacy mode variant (`baseline | agentic | observed | full`). Only
+     * meaningful when `mode === "literacy"`; ignored for other modes. Lands
+     * on `RunContext.variant` and `ReportProvenance.variant` so consumers
+     * can disambiguate which literacy variant the run executed.
+     */
+    variant?: LiteracyVariant;
+    /**
+     * Model IDs the caller requested via `PipelineRequest.models`. When
+     * present, `RunContext.models` is filtered to this subset so the report's
+     * `provenance.models` reflects what was actually evaluated. Unknown IDs
+     * are silently filtered out — the upstream rejection path (W0281
+     * `filterModelsByRequest`) has already failed the run or warned.
+     */
+    requestedModelIds?: string[];
 }
 /**
  * Derive `RunContext` from pipeline inputs. The only construction path.

package/dist/pipeline/run-context.js CHANGED Viewed

@@ -68,8 +68,18 @@ export function buildRunContext(input) {
     // config/models.ts model matrix — listing those models would be
     // misleading. Only include them for literacy mode where they're the
     // actual eval targets.
+    //
+    // When `PipelineRequest.models` pinned a subset, filter here too so
+    // `provenance.models` matches what actually ran (W0281). Without this
+    // the report would advertise the full cohort even though only the
+    // requested subset reached the LLMs.
+    const requestedSet = input.requestedModelIds?.length
+        ? new Set(input.requestedModelIds)
+        : undefined;
     const evaluatedModels = input.mode === "literacy"
-        ? models.models.map((m) => ({ id: m.id, label: m.label }))
+        ? models.models
+            .filter((m) => !requestedSet || requestedSet.has(m.id))
+            .map((m) => ({ id: m.id, label: m.label }))
         : [];
     return {
         areas: input.areas,
@@ -95,6 +105,7 @@ export function buildRunContext(input) {
         taskIds: input.taskIds,
         tool,
         trigger,
+        variant: input.mode === "literacy" ? input.variant : undefined,
     };
 }
 // ---------------------------------------------------------------------------

package/dist/pipeline/validate.d.ts CHANGED Viewed

@@ -14,11 +14,15 @@ import type { ValidationIssue, ValidationResult } from "./types.js";
  */
 export declare function validateConfiguration(rootDir: string): ValidationResult;
 /**
- * Check that canonical context files exist. These are the per-task
- * gold-retrieval contexts actually referenced by task definitions.
+ * Check that the canonical-contexts directory exists.
  *
- * Contexts are generated by fetch-docs and may not exist yet —
- * returns warnings, not errors.
+ * Contexts are populated by fetch-docs, which scopes to the tasks
+ * actually being evaluated (not every task in the registry). Warning
+ * on individual missing files here would fire for every task the user
+ * didn't select — pure noise that previously crowded out real errors
+ * in the GHA safety-net's tail-of-log capture (W0282). The per-task
+ * precondition is enforced by `run-eval-step.ts:checkCanonicalContextsExist`
+ * against the filtered task set, where missing files are real errors.
  */
 export declare function validateContexts(rootDir: string): ValidationIssue[];
 /**

package/dist/pipeline/validate.js CHANGED Viewed

@@ -34,11 +34,15 @@ export function validateConfiguration(rootDir) {
     return { issues, valid };
 }
 /**
- * Check that canonical context files exist. These are the per-task
- * gold-retrieval contexts actually referenced by task definitions.
+ * Check that the canonical-contexts directory exists.
  *
- * Contexts are generated by fetch-docs and may not exist yet —
- * returns warnings, not errors.
+ * Contexts are populated by fetch-docs, which scopes to the tasks
+ * actually being evaluated (not every task in the registry). Warning
+ * on individual missing files here would fire for every task the user
+ * didn't select — pure noise that previously crowded out real errors
+ * in the GHA safety-net's tail-of-log capture (W0282). The per-task
+ * precondition is enforced by `run-eval-step.ts:checkCanonicalContextsExist`
+ * against the filtered task set, where missing files are real errors.
  */
 export function validateContexts(rootDir) {
     const source = "validateContexts";
@@ -46,20 +50,6 @@ export function validateContexts(rootDir) {
     const canonicalDir = path.join(rootDir, "contexts", "canonical");
     if (!fs.existsSync(canonicalDir)) {
         issues.push(warning(source, "contexts/canonical/ directory not found — run 'pnpm fetch-docs' to generate", canonicalDir));
-        return issues;
-    }
-    const mappings = resolveMappings(rootDir);
-    for (const [, areaConfig] of Object.entries(mappings.feature_areas)) {
-        if (!areaConfig?.tasks)
-            continue;
-        for (const task of areaConfig.tasks) {
-            if (!task.id)
-                continue;
-            const contextFile = path.join(canonicalDir, `${task.id}.md`);
-            if (!fs.existsSync(contextFile)) {
-                issues.push(warning(source, `Missing canonical context for task '${task.id}' — run 'pnpm fetch-docs' to generate`, contextFile));
-            }
-        }
     }
     return issues;
 }

package/dist/report-store.d.ts CHANGED Viewed

@@ -15,7 +15,7 @@
  * @see docs/design-docs/report-store/domain-model.md
  */
 import type { SanityClient } from "@sanity/client";
-import type { ArtifactRef, ArtifactType, SynthesisCostTelemetry } from "./_vendor/ailf-core/index.d.ts";
+import type { ArtifactRef, ArtifactType, LoadBaselineResult, SynthesisCostTelemetry } from "./_vendor/ailf-core/index.d.ts";
 import type { ComparisonReport, ISOTimestamp, LineageQuery, Report, ReportId, ReportProvenance, ScoreSummary } from "./pipeline/types.js";
 /**
  * Result of an auto-comparison, bundling the ComparisonReport with the
@@ -113,6 +113,19 @@ export declare class ReportStore {
      *   W0191 runtime schema gate. Sanity API failures still return null.
      */
     read(id: ReportId): Promise<null | Report>;
+    /**
+     * Load a previously-published report's score summary as a baseline
+     * for comparison. Returns a discriminated result so the caller can
+     * distinguish a genuine 404 (skip compare with a clear reason) from
+     * a transport failure (fail the step — the user pinned a baseline
+     * and deserves to know it didn't actually compare).
+     *
+     * The report's `summary` field is a `ReportSummary` — a superset of
+     * `ComparableSummary` — so the projection below carries everything
+     * the `compare()` primitive needs (`overall`, `perModel`, `scores`)
+     * without re-hydrating the slim prose/array fields.
+     */
+    loadBaselineFromReport(reportId: string): Promise<LoadBaselineResult>;
     /**
      * Write a report to the Sanity Content Lake.
      *

package/dist/report-store.js CHANGED Viewed

@@ -270,6 +270,38 @@ export class ReportStore {
             return null;
         }
     }
+    /**
+     * Load a previously-published report's score summary as a baseline
+     * for comparison. Returns a discriminated result so the caller can
+     * distinguish a genuine 404 (skip compare with a clear reason) from
+     * a transport failure (fail the step — the user pinned a baseline
+     * and deserves to know it didn't actually compare).
+     *
+     * The report's `summary` field is a `ReportSummary` — a superset of
+     * `ComparableSummary` — so the projection below carries everything
+     * the `compare()` primitive needs (`overall`, `perModel`, `scores`)
+     * without re-hydrating the slim prose/array fields.
+     */
+    async loadBaselineFromReport(reportId) {
+        try {
+            const doc = await this.client.fetch(`*[_type == $type && reportId == $id][0]{ summary }`, { id: reportId, type: REPORT_TYPE });
+            const summary = doc?.summary;
+            if (!summary)
+                return { kind: "not_found" };
+            return {
+                kind: "ok",
+                baseline: {
+                    overall: summary.overall,
+                    perModel: summary.perModel,
+                    scores: summary.scores,
+                },
+            };
+        }
+        catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            return { kind: "error", message };
+        }
+    }
     /**
      * Write a report to the Sanity Content Lake.
      *

package/dist/sanity/client.js CHANGED Viewed

@@ -108,8 +108,8 @@ export function getSanityClient(overrides, source) {
  * fall back to `SANITY_DATASET` so existing CI workflows that pin a
  * test/staging dataset (e.g. Tier 2 with `SANITY_DATASET=ailf-test`)
  * continue to work without a new env var. The hard-coded fallback is
- * the editorial dataset name during the D0043 cutover window — the flip
- * to `ailf-prod-private` happens after the migration script runs.
+ * `AILF_DATASET_DEFAULT` (`ailf-prod-private`, D0043) — only reached for
+ * ad-hoc runs with no env at all.
  *
  * Token resolution prefers the AILF-scoped token, falling back to
  * the shared `SANITY_API_TOKEN`.

package/dist/sanity/queries.d.ts CHANGED Viewed

@@ -69,7 +69,7 @@ export declare const ALL_ARTICLES_QUERY = "\n  *[_type == \"article\"\n    && !(
  *
  * Usage: client.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: ["slug-a", "slug-b"] })
  */
-export declare const ARTICLES_METADATA_BY_SLUGS_QUERY = "\n  *[_type == \"article\"\n    && slug.current in $slugs\n    && !(_id in path(\"drafts.**\"))\n  ] {\n    \"slug\": slug.current,\n    _id,\n    _rev,\n    title\n  }\n";
+export declare const ARTICLES_METADATA_BY_SLUGS_QUERY = "\n  *[_type == \"article\"\n    && slug.current in $slugs\n    && !(_id in path(\"drafts.**\"))\n  ] {\n    \"slug\": slug.current,\n    \"sectionSlug\": primarySection->slug.current,\n    _id,\n    _rev,\n    title\n  }\n";
 /**
  * Fetch a single article by its slug — identical to ARTICLE_BY_SLUG_QUERY
  * but designed to be called with a perspective-enabled client.

package/dist/sanity/queries.js CHANGED Viewed

@@ -203,6 +203,7 @@ export const ARTICLES_METADATA_BY_SLUGS_QUERY = `
     && !(_id in path("drafts.**"))
   ] {
     "slug": slug.current,
+    "sectionSlug": primarySection->slug.current,
     _id,
     _rev,
     title

package/dist/sources.js CHANGED Viewed

@@ -37,6 +37,44 @@ const DEFAULT_SOURCE = {
     studioOrigin: "https://admin.sanity.io",
     urls: [],
 };
+/**
+ * Apply `SourceOverrides` + env-var fallbacks to `DEFAULT_SOURCE`.
+ *
+ * The DEFAULT_SOURCE early-return branches are taken when `config/sources`
+ * is missing or empty — the production state, since the named source
+ * definitions actually live in the `sanity-literacy` preset's `sourceDefs`
+ * (which `loadSource` doesn't consult). Returning `DEFAULT_SOURCE`
+ * verbatim drops every override the caller passed in, including
+ * `perspective` — observed live as production-source release evals
+ * fetching the published doc revision (W0295).
+ *
+ * The merge order mirrors the priority-1 (env-baseUrl) branch. The two
+ * paths diverge in three ways, all intentional: this branch (a) pins
+ * `baseUrl` / `llmsTxt` / `name` / `priorityDomain` to `DEFAULT_SOURCE`,
+ * (b) returns `documentIds: []` (the prior `DEFAULT_SOURCE` shape) where
+ * priority-1 would return `undefined` — both fall through the same
+ * `length > 0` consumer check, so behaviorally equivalent.
+ */
+function applyOverridesToDefault(overrides) {
+    const allowedOrigins = overrides?.allowedOrigins ?? parseAllowedOriginsEnv();
+    const headers = overrides?.headers ?? parseHeadersEnv();
+    return {
+        ...DEFAULT_SOURCE,
+        ...(allowedOrigins ? { allowedOrigins } : {}),
+        // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
+        dataset: overrides?.dataset ?? (process.env.SANITY_DATASET || "next"),
+        documentIds: overrides?.documentIds ?? parseDocumentIdsEnv() ?? [],
+        ...(headers ? { headers } : {}),
+        // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
+        perspective: overrides?.perspective ?? (process.env.SANITY_PERSPECTIVE || undefined),
+        // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
+        projectId: overrides?.projectId ?? (process.env.SANITY_PROJECT_ID || "3do82whm"),
+        // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
+        studioOrigin: overrides?.studioOrigin ??
+            (process.env.SANITY_STUDIO_ORIGIN || "https://admin.sanity.io"),
+        urls: overrides?.directUrls ?? parseDirectUrlsEnv(),
+    };
+}
 // ---------------------------------------------------------------------------
 // Validation
 // ---------------------------------------------------------------------------
@@ -117,12 +155,12 @@ export function loadSource(name, overrides, logger) {
             defaultBaseUrl: DEFAULT_SOURCE.baseUrl,
         });
         console.log("  No config/sources found, using built-in default (sanity.io production)");
-        return DEFAULT_SOURCE;
+        return applyOverridesToDefault(overrides);
     }
     if (!rawFile?.sources || Object.keys(rawFile.sources).length === 0) {
         log.debug("config/sources is empty, falling back to built-in default");
         console.log("  config/sources is empty, using built-in default");
-        return DEFAULT_SOURCE;
+        return applyOverridesToDefault(overrides);
     }
     // Resolve which source to use
     const sourceName =

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "7.0.1",
+  "version": "7.1.2",
   "private": false,
   "publishConfig": {
     "access": "public"