npm - @sanity/ailf - Versions diffs - 4.2.0 → 4.3.1 - Mend

@sanity/ailf 4.2.0 → 4.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

package/config/package-surface.ts +37 -0
package/config/preflight-scoring.ts +26 -0
package/dist/_vendor/ailf-core/artifact-registry.d.ts +1 -1
package/dist/_vendor/ailf-core/artifact-registry.js +47 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +35 -0
package/dist/_vendor/ailf-core/config-helpers.js +67 -0
package/dist/_vendor/ailf-core/index.d.ts +1 -1
package/dist/_vendor/ailf-core/index.js +1 -1
package/dist/_vendor/ailf-core/ports/context.d.ts +18 -0
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +30 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +3 -1
package/dist/_vendor/ailf-core/ports/index.js +1 -0
package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +23 -0
package/dist/_vendor/ailf-core/ports/package-surface-resolver.d.ts +71 -0
package/dist/_vendor/ailf-core/ports/package-surface-resolver.js +36 -0
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +6 -0
package/dist/_vendor/ailf-core/schemas/eval-config.js +14 -0
package/dist/_vendor/ailf-core/schemas/index.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/index.js +1 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +7 -0
package/dist/_vendor/ailf-core/schemas/symbol-preflight-report.d.ts +51 -0
package/dist/_vendor/ailf-core/schemas/symbol-preflight-report.js +57 -0
package/dist/_vendor/ailf-core/types/index.d.ts +12 -0
package/dist/_vendor/ailf-core/types/index.js +1 -0
package/dist/_vendor/ailf-core/types/package-surface.d.ts +36 -0
package/dist/_vendor/ailf-core/types/package-surface.js +13 -0
package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
package/dist/_vendor/ailf-core/types/preflight-scoring.d.ts +52 -0
package/dist/_vendor/ailf-core/types/preflight-scoring.js +18 -0
package/dist/_vendor/ailf-core/types/repo-config.d.ts +14 -0
package/dist/_vendor/ailf-core/types/symbol-preflight-report.d.ts +66 -0
package/dist/_vendor/ailf-core/types/symbol-preflight-report.js +25 -0
package/dist/adapters/api-client/build-request.d.ts +1 -0
package/dist/adapters/api-client/build-request.js +3 -0
package/dist/adapters/config-sources/file-config-adapter.js +1 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +4 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +159 -82
package/dist/adapters/index.d.ts +1 -0
package/dist/adapters/index.js +1 -0
package/dist/adapters/package-surface/dts-package-surface.d.ts +46 -0
package/dist/adapters/package-surface/dts-package-surface.js +173 -0
package/dist/adapters/package-surface/in-memory-package-surface.d.ts +15 -0
package/dist/adapters/package-surface/in-memory-package-surface.js +28 -0
package/dist/adapters/package-surface/index.d.ts +9 -0
package/dist/adapters/package-surface/index.js +8 -0
package/dist/adapters/package-surface/parse-dts-exports.d.ts +31 -0
package/dist/adapters/package-surface/parse-dts-exports.js +54 -0
package/dist/adapters/task-sources/repo-schemas.d.ts +6 -0
package/dist/adapters/task-sources/repo-schemas.js +15 -0
package/dist/commands/pipeline-action.d.ts +2 -0
package/dist/commands/pipeline-action.js +12 -0
package/dist/commands/remote-pipeline.js +10 -2
package/dist/commands/remote-results.d.ts +12 -1
package/dist/commands/remote-results.js +25 -5
package/dist/composition-root.js +9 -0
package/dist/config/package-surface.ts +37 -0
package/dist/config/preflight-scoring.ts +26 -0
package/dist/index.d.ts +2 -2
package/dist/index.js +1 -1
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/pipeline-orchestrator.d.ts +19 -1
package/dist/orchestration/pipeline-orchestrator.js +38 -0
package/dist/orchestration/steps/calculate-scores-step.js +11 -0
package/dist/orchestration/steps/generate-configs-step.js +16 -1
package/dist/orchestration/steps/run-eval-step.js +27 -0
package/dist/pipeline/calculate-scores.d.ts +66 -5
package/dist/pipeline/calculate-scores.js +141 -27
package/dist/pipeline/compiler/index.d.ts +1 -1
package/dist/pipeline/compiler/index.js +1 -1
package/dist/pipeline/compiler/literacy-bridge.d.ts +9 -0
package/dist/pipeline/compiler/literacy-bridge.js +2 -0
package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +31 -4
package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +146 -1
package/dist/pipeline/compiler/mode-handlers/literacy/index.js +2 -0
package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +17 -2
package/dist/pipeline/compiler/rubric-resolution.d.ts +17 -1
package/dist/pipeline/compiler/rubric-resolution.js +78 -2
package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -2
package/dist/pipeline/compiler/scoring-bridge.js +104 -10
package/dist/pipeline/eval-fingerprint.d.ts +9 -0
package/dist/pipeline/eval-fingerprint.js +7 -1
package/dist/pipeline/map-request-to-config.js +1 -0
package/dist/pipeline/preflight/compute-preflight.d.ts +67 -0
package/dist/pipeline/preflight/compute-preflight.js +118 -0
package/dist/pipeline/preflight/emit-symbol-preflight.d.ts +51 -0
package/dist/pipeline/preflight/emit-symbol-preflight.js +102 -0
package/dist/pipeline/preflight/load-package-surface.d.ts +14 -0
package/dist/pipeline/preflight/load-package-surface.js +19 -0
package/dist/pipeline/preflight/load-preflight-context.d.ts +13 -0
package/dist/pipeline/preflight/load-preflight-context.js +25 -0
package/dist/pipeline/preflight/load-preflight-scoring.d.ts +12 -0
package/dist/pipeline/preflight/load-preflight-scoring.js +17 -0
package/dist/pipeline/preflight/parse-imports.d.ts +62 -0
package/dist/pipeline/preflight/parse-imports.js +125 -0
package/dist/report-store.d.ts +8 -0
package/dist/report-store.js +55 -6
package/dist/sanity/document-renderers.d.ts +45 -7
package/dist/sanity/document-renderers.js +99 -13
package/dist/sanity/queries.d.ts +11 -11
package/dist/sanity/queries.js +7 -0
package/dist/sanity/symbol-index.d.ts +98 -0
package/dist/sanity/symbol-index.js +615 -0
package/package.json +2 -1

package/dist/adapters/package-surface/in-memory-package-surface.js ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * InMemoryPackageSurface — `PackageSurfaceResolver` test double.
+ *
+ * Backed by a plain `Map<string, PackageSurface>`; calls for unknown
+ * packages throw the same `package-not-installed` error the
+ * `DtsPackageSurface` adapter throws, so test scenarios for the
+ * `unresolved` path need no special handling.
+ */
+import { PackageSurfaceResolverError, } from "../../_vendor/ailf-core/index.js";
+export class InMemoryPackageSurface {
+    surfaces;
+    constructor(surfaces = []) {
+        this.surfaces = new Map();
+        for (const surface of surfaces) {
+            this.surfaces.set(surface.pkg, surface);
+        }
+    }
+    set(surface) {
+        this.surfaces.set(surface.pkg, surface);
+    }
+    async resolveExports(pkg) {
+        const surface = this.surfaces.get(pkg);
+        if (!surface) {
+            throw new PackageSurfaceResolverError("package-not-installed", pkg, `InMemoryPackageSurface has no entry for "${pkg}".`);
+        }
+        return surface;
+    }
+}

package/dist/adapters/package-surface/index.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Package-surface resolver adapters.
+ *
+ * @see packages/core/src/ports/package-surface-resolver.ts
+ */
+export { DtsPackageSurface, type DtsPackageSurfaceOptions, type PackageRootResolver, } from "./dts-package-surface.js";
+export { InMemoryPackageSurface } from "./in-memory-package-surface.js";
+export { parseDtsExports } from "./parse-dts-exports.js";
+export type { ParsedDtsExports } from "./parse-dts-exports.js";

package/dist/adapters/package-surface/index.js ADDED Viewed

@@ -0,0 +1,8 @@
+/**
+ * Package-surface resolver adapters.
+ *
+ * @see packages/core/src/ports/package-surface-resolver.ts
+ */
+export { DtsPackageSurface, } from "./dts-package-surface.js";
+export { InMemoryPackageSurface } from "./in-memory-package-surface.js";
+export { parseDtsExports } from "./parse-dts-exports.js";

package/dist/adapters/package-surface/parse-dts-exports.d.ts ADDED Viewed

@@ -0,0 +1,31 @@
+/**
+ * parse-dts-exports — pure function that extracts the public surface of a
+ * single `.d.ts` file as a list of top-level exported binding names plus
+ * any `export * from "./relative"` re-export specifiers.
+ *
+ * Implementation: delegates to `oxc-parser`'s `staticExports` view, which
+ * already decomposes each export statement into entries with `importName` /
+ * `exportName` / `moduleRequest` discriminators. We translate that view
+ * into the two outputs the W0198 preflight cares about — bare names and
+ * wildcard re-export specifiers — and drop default exports per the
+ * design's "named-bindings only" rule.
+ *
+ * Why oxc-parser instead of regex: top-level `.d.ts` syntax has enough TS
+ * surface area (declaration merging, conditional `exports` map types,
+ * ambient namespace augmentation) that a real AST is cheaper to maintain
+ * than a regex with the same coverage. Why oxc-parser instead of
+ * `typescript`: typescript isn't in `@sanity/ailf`'s runtime install graph
+ * and adding it adds ~50MB; oxc-parser is a few-MB native binary aligned
+ * with our existing `oxlint` / `oxfmt` toolchain.
+ */
+export interface ParsedDtsExports {
+    /** Bare exported identifier names found in this file. */
+    readonly names: readonly string[];
+    /**
+     * Specifiers from `export * from "<spec>"` declarations. Only relative
+     * specifiers (starting with `.`) are useful for one-hop following; the
+     * caller decides which to resolve.
+     */
+    readonly reExports: readonly string[];
+}
+export declare function parseDtsExports(src: string): ParsedDtsExports;

package/dist/adapters/package-surface/parse-dts-exports.js ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * parse-dts-exports — pure function that extracts the public surface of a
+ * single `.d.ts` file as a list of top-level exported binding names plus
+ * any `export * from "./relative"` re-export specifiers.
+ *
+ * Implementation: delegates to `oxc-parser`'s `staticExports` view, which
+ * already decomposes each export statement into entries with `importName` /
+ * `exportName` / `moduleRequest` discriminators. We translate that view
+ * into the two outputs the W0198 preflight cares about — bare names and
+ * wildcard re-export specifiers — and drop default exports per the
+ * design's "named-bindings only" rule.
+ *
+ * Why oxc-parser instead of regex: top-level `.d.ts` syntax has enough TS
+ * surface area (declaration merging, conditional `exports` map types,
+ * ambient namespace augmentation) that a real AST is cheaper to maintain
+ * than a regex with the same coverage. Why oxc-parser instead of
+ * `typescript`: typescript isn't in `@sanity/ailf`'s runtime install graph
+ * and adding it adds ~50MB; oxc-parser is a few-MB native binary aligned
+ * with our existing `oxlint` / `oxfmt` toolchain.
+ */
+import { parseSync } from "oxc-parser";
+export function parseDtsExports(src) {
+    // Filename hint drives the parser's grammar — `.d.ts` enables the
+    // ambient-only forms we want and disables expression-context grammar
+    // we'd otherwise have to ignore.
+    const result = parseSync("input.d.ts", src, { lang: "dts" });
+    const names = new Set();
+    const reExports = [];
+    for (const exportStmt of result.module.staticExports) {
+        for (const entry of exportStmt.entries) {
+            // Wildcard re-export: `export * from "./other"`. The namespace form
+            // `export * as ns from "./other"` falls into the named-export branch
+            // below because it does expose a binding (`ns`) at the top level.
+            if (entry.importName.kind === "AllButDefault" &&
+                entry.moduleRequest !== null) {
+                reExports.push(entry.moduleRequest.value);
+                continue;
+            }
+            // Anything that produces a stable named binding visible to consumers.
+            // Covers own declarations, local re-exports (`export { x as y }`),
+            // module re-exports (`export { x } from "./y"`), and namespace
+            // re-exports (`export * as ns from "./y"`). `export default ...`
+            // lands in `exportName.kind === "Default"` and is intentionally
+            // skipped — the W0198 preflight only judges named bindings.
+            if (entry.exportName.kind === "Name" && entry.exportName.name) {
+                names.add(entry.exportName.name);
+            }
+        }
+    }
+    return {
+        names: [...names].sort(),
+        reExports,
+    };
+}

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -1471,6 +1471,12 @@ export declare const RepoConfigSchema: z.ZodObject<{
         gapAnalysis: z.ZodOptional<z.ZodBoolean>;
         apiUrl: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;
+    grader: z.ZodOptional<z.ZodObject<{
+        context: z.ZodOptional<z.ZodEnum<{
+            "rubric-only": "rubric-only";
+            "with-docs": "with-docs";
+        }>>;
+    }, z.core.$strip>>;
     output: z.ZodOptional<z.ZodObject<{
         dir: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -566,6 +566,20 @@ const ExecutionConfigSchema = z
     apiUrl: z.string().url().optional(),
 })
     .optional();
+/**
+ * Grader configuration.
+ *
+ * - `context: "rubric-only"` — grader sees only the rubric template +
+ *   criteria + candidate response.
+ * - `context: "with-docs"` — canonical reference content is injected into
+ *   the assertion's `rubricPrompt` so the grader has authoritative ground
+ *   truth.
+ */
+const GraderConfigSchema = z
+    .object({
+    context: z.enum(["rubric-only", "with-docs"]).optional(),
+})
+    .optional();
 /**
  * Task-source configuration (W0077 Phase 6h). Replaces the retired
  * `--task-source` and `--repo-tasks-path` CLI flags on `ailf run`.
@@ -658,6 +672,7 @@ export const RepoConfigSchema = z.object({
     reportStore: ReportStoreConfigSchema,
     publish: PublishConfigSchema,
     execution: ExecutionConfigSchema,
+    grader: GraderConfigSchema,
     output: OutputConfigSchema,
     owner: OwnerConfigSchema,
     agentic: AgenticConfigSchema,

package/dist/commands/pipeline-action.d.ts CHANGED Viewed

@@ -27,6 +27,8 @@ export interface ResolvedOptions {
     dryRun: boolean;
     gapAnalysisEnabled: boolean;
     graderReplications?: number;
+    /** Grader context policy from `.ailf/config.yaml` `grader.context` */
+    graderContext?: "rubric-only" | "with-docs";
     headerArgs: string[];
     impactSummary?: ImpactSummary;
     mode: EvalMode;

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -249,6 +249,17 @@ export function computeResolvedOptions(opts) {
     const concurrency = repoConfig?.execution?.concurrency;
     const graderReplications = repoConfig?.execution?.graderReplications;
     const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
+    // Grader context policy. Cascade: env var > .ailf/config.yaml > unset
+    // (defaults to rubric-only at the EvalConfig boundary). The env var is the
+    // operational lever for one-shot comparison runs without editing the config file.
+    const rawGraderContext = process.env.AILF_GRADER_CONTEXT ?? repoConfig?.grader?.context;
+    const graderContext = rawGraderContext === "with-docs" || rawGraderContext === "rubric-only"
+        ? rawGraderContext
+        : undefined;
+    if (rawGraderContext && graderContext === undefined) {
+        console.error(`❌ Invalid grader.context "${rawGraderContext}". Must be "rubric-only" or "with-docs".`);
+        process.exit(1);
+    }
     // Remote mode
     const remote = opts.remote || process.env.AILF_REMOTE === "1";
     const apiUrl = process.env.AILF_API_URL ??
@@ -274,6 +285,7 @@ export function computeResolvedOptions(opts) {
         dryRun: opts.dryRun,
         gapAnalysisEnabled,
         graderReplications,
+        graderContext,
         headerArgs,
         impactSummary,
         mode,

package/dist/commands/remote-pipeline.js CHANGED Viewed

@@ -90,12 +90,19 @@ export async function runRemotePipeline(opts, rootDir) {
         console.error(formatJobError(job));
         process.exit(1);
     }
-    // 7. Fetch and write output artifacts
-    await writeRemoteResults(client, job, {
+    // 7. Fetch and write output artifacts. A `completed` job that carries
+    //    `job.error` is a degraded completion (DOC-2121 RC-3): a configured
+    //    optional step failed end-to-end. Artifacts still write so the caller
+    //    keeps useful local state, but the CLI exits non-zero so external
+    //    `--remote` consumers don't mistake the placeholder for success.
+    const outcome = await writeRemoteResults(client, job, {
         outputDir: opts.outputDir,
         outputPath: opts.outputPath,
         apiUrl: opts.apiUrl,
     });
+    if (outcome.degraded) {
+        process.exit(1);
+    }
 }
 // ---------------------------------------------------------------------------
 // Helpers
@@ -133,6 +140,7 @@ function toConfigSlice(opts) {
         datasetOverride: opts.datasetOverride,
         projectIdOverride: opts.projectIdOverride,
         perspectiveOverride: opts.perspectiveOverride,
+        graderContext: opts.graderContext,
         graderReplications: opts.graderReplications,
         gapAnalysisEnabled: opts.gapAnalysisEnabled,
         noRemoteCache: opts.noRemoteCache,

package/dist/commands/remote-results.d.ts CHANGED Viewed

@@ -21,6 +21,11 @@ export interface WriteResultsOptions {
     /** API base URL (for metadata). */
     apiUrl: string;
 }
+/** Outcome flags so the caller can decide the process exit code. */
+export interface WriteResultsOutcome {
+    /** True when `job.error` was set on a completed job (DOC-2121 RC-3). */
+    degraded: boolean;
+}
 /**
  * Fetch report artifacts from the API and write them to disk.
  *
@@ -29,5 +34,11 @@ export interface WriteResultsOptions {
  * - `<outputDir>/report.md` — full markdown report (if reportId present)
  * - `<outputDir>/job-metadata.json` — job tracking info
  * - `--output` path — markdown report (if specified)
+ *
+ * Returns an outcome the caller uses to choose an exit code: a `completed`
+ * job that carries `job.error` is treated as a *degraded* completion (a
+ * configured optional step failed end-to-end; see DOC-2121 RC-3) and the
+ * caller should exit non-zero so external `--remote` consumers don't read
+ * a clean completion as success.
  */
-export declare function writeRemoteResults(client: ApiClient, job: JobResponse, options: WriteResultsOptions): Promise<void>;
+export declare function writeRemoteResults(client: ApiClient, job: JobResponse, options: WriteResultsOptions): Promise<WriteResultsOutcome>;

package/dist/commands/remote-results.js CHANGED Viewed

@@ -12,9 +12,6 @@
  */
 import { mkdirSync, writeFileSync } from "fs";
 import { resolve } from "path";
-// ---------------------------------------------------------------------------
-// Public API
-// ---------------------------------------------------------------------------
 /**
  * Fetch report artifacts from the API and write them to disk.
  *
@@ -23,6 +20,12 @@ import { resolve } from "path";
  * - `<outputDir>/report.md` — full markdown report (if reportId present)
  * - `<outputDir>/job-metadata.json` — job tracking info
  * - `--output` path — markdown report (if specified)
+ *
+ * Returns an outcome the caller uses to choose an exit code: a `completed`
+ * job that carries `job.error` is treated as a *degraded* completion (a
+ * configured optional step failed end-to-end; see DOC-2121 RC-3) and the
+ * caller should exit non-zero so external `--remote` consumers don't read
+ * a clean completion as success.
  */
 export async function writeRemoteResults(client, job, options) {
     const resultsDir = options.outputDir;
@@ -55,11 +58,20 @@ export async function writeRemoteResults(client, job, options) {
         reportId: job.reportId ?? null,
         reportUrl: job.reportUrl ?? null,
         execution: job.execution ?? null,
+        error: job.error ?? null,
         apiUrl: options.apiUrl,
     }, null, 2));
-    // 4. Print summary
+    // 4. Print summary. A completed job with `job.error` set means a
+    // configured optional step failed end-to-end — print the diagnostic
+    // and signal the caller to exit non-zero.
+    const degraded = Boolean(job.error);
     console.log("");
-    console.log(`✅ Evaluation completed`);
+    if (degraded) {
+        console.log(`⚠️  Evaluation completed with errors`);
+    }
+    else {
+        console.log(`✅ Evaluation completed`);
+    }
     console.log(`   📊 Results: ${resolve(resultsDir, "score-summary.json")}`);
     if (reportWritten) {
         console.log(`   📝 Report:  ${resolve(resultsDir, "report.md")}`);
@@ -71,6 +83,14 @@ export async function writeRemoteResults(client, job, options) {
         console.log(`   🔗 Studio:  ${job.reportUrl}`);
     }
     console.log(`   🏷️  Job ID:  ${job.jobId}`);
+    if (job.error) {
+        console.error("");
+        console.error(`   ❌ Step "${job.error.step ?? "<unknown>"}" failed: ${job.error.message}`);
+        if (!job.reportId) {
+            console.error("   No report was published. See the API gateway run page for details.");
+        }
+    }
+    return { degraded };
 }
 // ---------------------------------------------------------------------------
 // Helpers

package/dist/composition-root.js CHANGED Viewed

@@ -29,6 +29,7 @@ import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js"
 import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
 import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
 import { PromptfooEvalAdapter } from "./adapters/eval-runners/promptfoo-eval-adapter.js";
+import { DtsPackageSurface } from "./adapters/package-surface/index.js";
 import { ConsoleLogger, JsonLogger, QuietLogger, } from "./adapters/loggers/index.js";
 import { ConsoleProgressReporter } from "./adapters/progress/console-progress-reporter.js";
 import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, } from "./adapters/task-sources/index.js";
@@ -65,6 +66,13 @@ export function createAppContext(config) {
     const docFetcher = docFetcherFactory ? docFetcherFactory() : undefined;
     // Eval runner — Promptfoo subprocess
     const evalRunner = new PromptfooEvalAdapter(config.rootDir);
+    // Package-surface resolver for the W0198 symbol-resolution preflight.
+    // Reads installed `.d.ts` from the eval package's node_modules chain
+    // (anchored at `config.rootDir`). Phase 1 wires the adapter; later
+    // phases consume it.
+    const packageSurfaceResolver = new DtsPackageSurface({
+        resolveFromDir: config.rootDir,
+    });
     // Report store — Sanity Content Lake (for publish + auto-compare)
     const reportStore = createReportStore(config);
     // Sinks — loaded from config/sinks
@@ -90,6 +98,7 @@ export function createAppContext(config) {
         docFetcher,
         evalRunner,
         logger,
+        packageSurfaceResolver,
         progress,
         registry,
         reportStore,

package/dist/config/package-surface.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * package-surface.ts — Framework-level package-surface manifest for the
+ * W0198 symbol-resolution preflight.
+ *
+ * The manifest pins each in-scope package to a single semver-major range.
+ * The preflight resolver answers "does symbol X export from package Y" by
+ * reading the installed package's `.d.ts` against this pin. Tasks
+ * reference packages by name; they do not carry per-package version
+ * metadata (per-task overrides remain a future extension point).
+ *
+ * Bumping a major is an editorial event — one PR that updates the pin,
+ * regenerates cached surfaces, and re-runs the historical comparison set.
+ * Patch and minor releases within a pinned major flow silently because
+ * semver disallows the export removals that would change a deduction
+ * outcome.
+ *
+ * @see docs/design-docs/two-stage-grader-symbol-preflight.md
+ */
+import { definePackageSurface } from "../_vendor/ailf-core/index.js"
+export default definePackageSurface({
+  packages: [
+    // Sanity App SDK — drives App SDK literacy tasks (e.g. DOC-2117).
+    { pkg: "@sanity/sdk-react", semverPin: "^2.0.0" },
+    // Sanity Studio runtime — drives Studio-side literacy tasks.
+    // Pinned to the installed major in this repo (^5.x). The earlier
+    // major (v3) is an explicit per-task override candidate when a task
+    // exercises legacy Studio behavior.
+    { pkg: "sanity", semverPin: "^5.0.0" },
+    // @sanity/client — drives direct-client literacy tasks. Pinned to
+    // the installed major in this repo (^7.x).
+    { pkg: "@sanity/client", semverPin: "^7.0.0" },
+  ],
+})

package/dist/config/preflight-scoring.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * preflight-scoring.ts — How heavily the W0198 deterministic preflight
+ * contributes to the `code-correctness` dimension.
+ *
+ * The preflight (`SymbolPreflightReport`) and the LLM rubric both feed
+ * into `code-correctness` per D0010's weighted dimension aggregation.
+ * `codeCorrectnessWeight` sets the relative share between them — `0.4`
+ * means preflight is 40% of the dimension, rubric 60%.
+ *
+ * Bumping this is an editorial decision: a higher weight ties more of the
+ * `code-correctness` score to the deterministic existence-check (less
+ * grader noise on the symbol-existence question, but also less elasticity
+ * for the rubric to penalize stylistic/correctness issues the preflight
+ * cannot see). A lower weight cedes more authority back to the rubric.
+ *
+ * The default of `0.4` is a starting balance; revisit once the
+ * `unresolved` rate stabilizes in CI.
+ *
+ * @see docs/design-docs/two-stage-grader-symbol-preflight.md
+ */
+import { definePreflightScoring } from "../_vendor/ailf-core/index.js"
+export default definePreflightScoring({
+  codeCorrectnessWeight: 0.4,
+})

package/dist/index.d.ts CHANGED Viewed

@@ -33,8 +33,8 @@
  * })
  * ```
  */
-export { defineConfig, defineFeatures, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
-export type { PricingEntry, PromptEntry, SourceEntry } from "./_vendor/ailf-core/index.d.ts";
+export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
+export type { PackageSurfaceConfig, PackageSurfaceEntry, PreflightScoringConfig, PricingEntry, PromptEntry, SourceEntry, } from "./_vendor/ailf-core/index.d.ts";
 export { env } from "./_vendor/ailf-core/index.d.ts";
 export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
 export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RubricTemplateName, } from "./adapters/task-sources/repo-schemas.js";

package/dist/index.js CHANGED Viewed

@@ -36,7 +36,7 @@
 // ---------------------------------------------------------------------------
 // Configuration helpers (define* identity functions for typed authoring)
 // ---------------------------------------------------------------------------
-export { defineConfig, defineFeatures, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
+export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
 // ---------------------------------------------------------------------------
 // Environment helper
 // ---------------------------------------------------------------------------

package/dist/orchestration/build-app-context.js CHANGED Viewed

@@ -49,6 +49,7 @@ export function mapToResolvedConfig(opts, rootDir) {
         noCache: opts.noCache,
         noRemoteCache: opts.noRemoteCache,
         graderReplications: opts.graderReplications,
+        graderContext: opts.graderContext,
         outputDir: opts.outputDir,
         outputPath: opts.outputPath,
         urls: opts.urlArgs.length > 0 ? opts.urlArgs : undefined,

package/dist/orchestration/pipeline-orchestrator.d.ts CHANGED Viewed

@@ -11,7 +11,7 @@
  * each step completes. This enables the GET /v1/jobs/:jobId polling
  * endpoint to show real-time progress.
  */
-import { type AppContext, type PipelineResult, type PipelineStep } from "../_vendor/ailf-core/index.d.ts";
+import { type AppContext, type PipelineResult, type PipelineStep, type StepResult } from "../_vendor/ailf-core/index.d.ts";
 /**
  * Run a sequence of pipeline steps, short-circuiting on required step failure.
  *
@@ -22,3 +22,21 @@ import { type AppContext, type PipelineResult, type PipelineStep } from "../_ven
  * Lake after each step completes.
  */
 export declare function orchestratePipeline(ctx: AppContext, steps: PipelineStep[]): Promise<PipelineResult>;
+/**
+ * Find the first optional pipeline step that returned `status: "failed"`
+ * in step-array order. Returns the diagnostic shape the API job document
+ * already accepts (`{ message, step }`), or null when no optional step
+ * failed.
+ *
+ * Required-step failures don't reach this code path — the orchestrator
+ * aborts before completion when a required step fails. This helper is
+ * the bridge between "step ran and failed" and the wire signal that
+ * external `--remote` consumers use to distinguish a clean completion
+ * from a degraded one.
+ *
+ * @see docs/design-docs/optional-step-failure-surfacing.md
+ */
+export declare function getFirstOptionalFailure(steps: readonly PipelineStep[], results: Record<string, StepResult>): {
+    message: string;
+    step: string;
+} | null;

package/dist/orchestration/pipeline-orchestrator.js CHANGED Viewed

@@ -236,6 +236,12 @@ export async function orchestratePipeline(ctx, steps) {
                     process.env.SANITY_API_TOKEN ??
                     undefined,
             });
+            // DOC-2121 RC-3 — surface the first configured-but-failed optional
+            // step on the job document so external --remote consumers can detect
+            // partial-completion outcomes. The pipeline still ran end to end
+            // (P5 / local-first) and `success: true` is preserved; the `error`
+            // field is the wire signal that a configured optional step failed.
+            const firstOptionalFailure = getFirstOptionalFailure(steps, results);
             await store.updateJob(ctx.config.jobId, {
                 status: "completed",
                 completedAt: new Date().toISOString(),
@@ -245,6 +251,7 @@ export async function orchestratePipeline(ctx, steps) {
                     totalSteps: steps.length,
                 },
                 ...(state.reportId ? { reportId: state.reportId } : {}),
+                ...(firstOptionalFailure ? { error: firstOptionalFailure } : {}),
             });
         }
         catch {
@@ -275,6 +282,37 @@ export async function orchestratePipeline(ctx, steps) {
     };
 }
 // ---------------------------------------------------------------------------
+// Optional-step failure surfacing (DOC-2121 RC-3)
+// ---------------------------------------------------------------------------
+/**
+ * Find the first optional pipeline step that returned `status: "failed"`
+ * in step-array order. Returns the diagnostic shape the API job document
+ * already accepts (`{ message, step }`), or null when no optional step
+ * failed.
+ *
+ * Required-step failures don't reach this code path — the orchestrator
+ * aborts before completion when a required step fails. This helper is
+ * the bridge between "step ran and failed" and the wire signal that
+ * external `--remote` consumers use to distinguish a clean completion
+ * from a degraded one.
+ *
+ * @see docs/design-docs/optional-step-failure-surfacing.md
+ */
+export function getFirstOptionalFailure(steps, results) {
+    for (const step of steps) {
+        if (step.optional !== true)
+            continue;
+        const result = results[step.name];
+        if (result?.status === "failed") {
+            return {
+                message: result.error ?? `${step.name} failed`,
+                step: step.name,
+            };
+        }
+    }
+    return null;
+}
+// ---------------------------------------------------------------------------
 // Artifact export phase gate (W0053)
 // ---------------------------------------------------------------------------
 /**

package/dist/orchestration/steps/calculate-scores-step.js CHANGED Viewed

@@ -14,6 +14,7 @@ import { buildCacheContext } from "../cache-context.js";
 import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
 import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
 import { resultsFileForMode } from "../../pipeline/eval-constants.js";
+import { loadPreflightScoring } from "../../pipeline/preflight/load-preflight-scoring.js";
 import { loadSource } from "../../sources.js";
 import { uploadTestOutputs } from "../../pipeline/upload-test-outputs.js";
 import { configToSourceOverrides } from "../config-to-source-overrides.js";
@@ -77,6 +78,13 @@ export class CalculateScoresStep {
         catch {
             // Non-fatal — proceed without source metadata
         }
+        // W0198 — load preflight scoring config (silent fall-through to the
+        // default weight when absent). Lazy: ignored when no preflight reports
+        // were emitted upstream.
+        const preflightScoring = await loadPreflightScoring(ctx.config.rootDir).catch((err) => {
+            ctx.logger.warn(`[warn] W0198 preflight: failed to load preflight-scoring config — ${err instanceof Error ? err.message : String(err)}`);
+            return undefined;
+        });
         let belowCritical = [];
         try {
             const result = calculateAndWriteScores({
@@ -87,11 +95,14 @@ export class CalculateScoresStep {
                 mode: ctx.config.mode === "literacy"
                     ? (ctx.config.variant ?? LiteracyVariant.STANDARD)
                     : ctx.config.mode,
+                preflightReports: state.preflightReports,
+                preflightWeight: preflightScoring?.codeCorrectnessWeight,
                 resolvedSource,
                 resultsPath: primaryResultsRun !== LiteracyVariant.STANDARD
                     ? join(ctx.config.rootDir, resultsFile)
                     : undefined,
                 rootDir: ctx.config.rootDir,
+                runId: ctx.runId,
                 searchMode: ctx.config.searchMode,
                 source: ctx.config.source,
             });

package/dist/orchestration/steps/generate-configs-step.js CHANGED Viewed

@@ -18,6 +18,7 @@ import { getStepInputPaths } from "../../pipeline/cache.js";
 import { buildCacheContext } from "../cache-context.js";
 import { checkGeneratedConfigsExist } from "../../pipeline/checks.js";
 import { validateModelsYaml } from "../../pipeline/validate.js";
+import { loadPreflightContext } from "../../pipeline/preflight/load-preflight-context.js";
 import { loadSource } from "../../sources.js";
 import { configToSourceOverrides } from "../config-to-source-overrides.js";
 export class GenerateConfigsStep {
@@ -110,12 +111,21 @@ export class GenerateConfigsStep {
         catch {
             ctx.logger.warn("  ⚠ Could not load rubric config — templates will not resolve");
         }
-        // Compile for each variant
+        // Compile for each variant. `graderContext` defaults to "rubric-only" here
+        // so handlers see a definite value rather than implicit-undefined.
+        const graderContext = ctx.config.graderContext ?? "rubric-only";
+        // W0198 Phase 6 — when the package-surface manifest is authored, pass
+        // the in-scope package list down so the literacy mode handler can
+        // prefix the `code-correctness` rubric with the deterministic-lane
+        // system instruction. Silent fall-through when absent.
+        const preflightContext = await loadPreflightContext(ctx.config.rootDir, ctx.logger);
         const baselineResults = this.compileAll(handler, tasks, {
             rootDir: ctx.config.rootDir,
             graderProvider: models.grader.id,
             models: baselineModels,
             rubricConfig,
+            graderContext,
+            preflightContext,
             evalMode: LiteracyVariant.STANDARD,
         });
         const agenticResults = this.compileAll(handler, tasks, {
@@ -123,6 +133,8 @@ export class GenerateConfigsStep {
             graderProvider: models.grader.id,
             models: agenticModels,
             rubricConfig,
+            graderContext,
+            preflightContext,
             evalMode: LiteracyVariant.AGENTIC,
         });
         // Log warnings
@@ -174,11 +186,14 @@ export class GenerateConfigsStep {
         catch {
             ctx.logger.warn("  ⚠ Could not load rubric config — templates will not resolve");
         }
+        const preflightContext = await loadPreflightContext(ctx.config.rootDir, ctx.logger);
         const merged = this.compileAll(handler, tasks, {
             rootDir: ctx.config.rootDir,
             graderProvider: models.grader.id,
             models: modeModels,
             rubricConfig,
+            graderContext: ctx.config.graderContext ?? "rubric-only",
+            preflightContext,
         });
         for (const w of merged.warnings) {
             ctx.logger.warn(`  ⚠ ${w}`);