npm - @sanity/ailf - Versions diffs - 4.3.0 → 4.4.0 - Mend

@sanity/ailf 4.3.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/_vendor/ailf-core/ports/context.d.ts CHANGED Viewed

@@ -86,7 +86,8 @@ export interface ResolvedConfig {
      * canonical doc content as authoritative ground truth.
      *
      * Sourced from EvalConfig `grader.context` or the equivalent CLI/env
-     * surface. Defaults to `"rubric-only"` at the EvalConfig boundary.
+     * surface. Defaults to `"with-docs"` at the EvalConfig boundary
+     * (flipped in W0200 after DOC-2117 validation).
      */
     graderContext?: "rubric-only" | "with-docs";
     /** Base directory for user-facing pipeline output artifacts. */

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts CHANGED Viewed

@@ -43,6 +43,10 @@ export declare const PipelineRequestSchema: z.ZodObject<{
         sample: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>]>>;
     gapAnalysis: z.ZodOptional<z.ZodBoolean>;
+    graderContext: z.ZodOptional<z.ZodEnum<{
+        "rubric-only": "rubric-only";
+        "with-docs": "with-docs";
+    }>>;
     graderReplications: z.ZodOptional<z.ZodNumber>;
     headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
     inlineTasks: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;

package/dist/_vendor/ailf-core/schemas/pipeline-request.js CHANGED Viewed

@@ -106,6 +106,13 @@ export const PipelineRequestSchema = z.object({
     dataset: z.string().optional(),
     debug: z.union([z.boolean(), DebugOptionsSchema]).optional(),
     gapAnalysis: z.boolean().optional(),
+    /**
+     * Grader-context policy (W0196 / DOC-2117). When `"with-docs"`, the
+     * canonical reference is injected into the LLM grader's `rubricPrompt`
+     * as authoritative ground truth. When omitted or `"rubric-only"`, the
+     * grader sees only the rubric template (legacy behavior).
+     */
+    graderContext: z.enum(["rubric-only", "with-docs"]).optional(),
     graderReplications: z.number().int().positive().optional(),
     headers: z.record(z.string(), z.string()).optional(),
     inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),

package/dist/_vendor/ailf-core/types/pipeline-request.d.ts CHANGED Viewed

@@ -85,6 +85,7 @@ export interface PipelineRequest {
     debug?: PipelineRequestDebug | boolean;
     executor?: PipelineRequestCallerExecutor;
     gapAnalysis?: boolean;
+    graderContext?: "rubric-only" | "with-docs";
     graderReplications?: number;
     headers?: Record<string, string>;
     inlineTasks?: Record<string, unknown>[];

package/dist/_vendor/ailf-core/types/repo-config.d.ts CHANGED Viewed

@@ -54,7 +54,9 @@ export interface RepoExecutionConfig {
 /** Grader configuration. */
 export interface RepoGraderConfig {
     /**
-     * Grader context policy.
+     * Grader context policy. Defaults to `"with-docs"` (W0200 flip after
+     * DOC-2117 validation); set to `"rubric-only"` to opt back into the
+     * legacy behavior.
      *
      * - `"rubric-only"` — grader sees only the rubric template + criteria +
      *   candidate response.

package/dist/adapters/api-client/build-request.d.ts CHANGED Viewed

@@ -57,6 +57,7 @@ export interface RemoteConfigSlice {
     datasetOverride?: string;
     projectIdOverride?: string;
     perspectiveOverride?: string;
+    graderContext?: "rubric-only" | "with-docs";
     graderReplications?: number;
     gapAnalysisEnabled?: boolean;
     noRemoteCache?: boolean;

package/dist/adapters/api-client/build-request.js CHANGED Viewed

@@ -124,6 +124,9 @@ export async function buildRemoteRequest(options) {
     if (config.perspectiveOverride)
         raw.perspective = config.perspectiveOverride;
     // Advanced
+    if (config.graderContext) {
+        raw.graderContext = config.graderContext;
+    }
     if (config.graderReplications) {
         raw.graderReplications = config.graderReplications;
     }

package/dist/commands/remote-pipeline.js CHANGED Viewed

@@ -140,6 +140,7 @@ function toConfigSlice(opts) {
         datasetOverride: opts.datasetOverride,
         projectIdOverride: opts.projectIdOverride,
         perspectiveOverride: opts.perspectiveOverride,
+        graderContext: opts.graderContext,
         graderReplications: opts.graderReplications,
         gapAnalysisEnabled: opts.gapAnalysisEnabled,
         noRemoteCache: opts.noRemoteCache,

package/dist/orchestration/steps/generate-configs-step.js CHANGED Viewed

@@ -111,9 +111,10 @@ export class GenerateConfigsStep {
         catch {
             ctx.logger.warn("  ⚠ Could not load rubric config — templates will not resolve");
         }
-        // Compile for each variant. `graderContext` defaults to "rubric-only" here
-        // so handlers see a definite value rather than implicit-undefined.
-        const graderContext = ctx.config.graderContext ?? "rubric-only";
+        // Compile for each variant. `graderContext` defaults to "with-docs" here
+        // (flipped in W0200 after PR #555 validation receipts) so handlers see a
+        // definite value rather than implicit-undefined.
+        const graderContext = ctx.config.graderContext ?? "with-docs";
         // W0198 Phase 6 — when the package-surface manifest is authored, pass
         // the in-scope package list down so the literacy mode handler can
         // prefix the `code-correctness` rubric with the deterministic-lane
@@ -192,7 +193,7 @@ export class GenerateConfigsStep {
             graderProvider: models.grader.id,
             models: modeModels,
             rubricConfig,
-            graderContext: ctx.config.graderContext ?? "rubric-only",
+            graderContext: ctx.config.graderContext ?? "with-docs",
             preflightContext,
         });
         for (const w of merged.warnings) {

package/dist/pipeline/eval-fingerprint.d.ts CHANGED Viewed

@@ -46,8 +46,8 @@ export interface FingerprintInput {
      * content, so the cache must treat them as different evaluations even
      * when tasks + docs + grader model match.
      *
-     * Defaults to "rubric-only" inside the hash when undefined, matching
-     * the EvalConfig boundary default.
+     * Defaults to "with-docs" inside the hash when undefined, matching
+     * the EvalConfig boundary default (flipped in v4 / W0200).
      */
     graderContext?: "rubric-only" | "with-docs";
 }

package/dist/pipeline/eval-fingerprint.js CHANGED Viewed

@@ -42,8 +42,14 @@ import { join, relative, resolve } from "path";
  * v3 (2026-05-06): grader-context policy ("rubric-only" vs "with-docs")
  * affects rubricPrompt content and therefore eval output, so it must be
  * hashed. Bumping invalidates v2 fingerprints.
+ *
+ * v4 (2026-05-08): grader-context default flipped from "rubric-only" to
+ * "with-docs" (W0200 / DOC-2117) after PR #555 validation receipts. The
+ * undefined-equivalence at the EvalConfig boundary moves with it, so
+ * cached v3 fingerprints would no longer match the same EvalConfig.
+ * Bumping invalidates v3 fingerprints.
  */
-const FINGERPRINT_VERSION = "eval-fingerprint-v3";
+const FINGERPRINT_VERSION = "eval-fingerprint-v4";
 /**
  * Compute a deterministic SHA-256 fingerprint of all evaluation inputs.
  *
@@ -56,7 +62,7 @@ const FINGERPRINT_VERSION = "eval-fingerprint-v3";
  */
 export function computeEvalFingerprint(input) {
     const { graderModel, mode, rootDir, tasks } = input;
-    const graderContext = input.graderContext ?? "rubric-only";
+    const graderContext = input.graderContext ?? "with-docs";
     const hash = createHash("sha256");
     hash.update(`version:${FINGERPRINT_VERSION}\n`);
     hash.update(`mode:${mode}\n`);

package/dist/pipeline/map-request-to-config.js CHANGED Viewed

@@ -52,6 +52,7 @@ export function mapRequestToConfig(request, rootDir) {
         noAutoScope: request.noAutoScope ?? false,
         noCache: request.noCache ?? false,
         noRemoteCache: request.noRemoteCache ?? false,
+        graderContext: request.graderContext,
         graderReplications: request.graderReplications,
         urls: request.urls,
         headers: request.headers,

package/dist/pipeline/preflight/emit-symbol-preflight.js CHANGED Viewed

@@ -23,6 +23,7 @@
 import { readFileSync } from "node:fs";
 import { resolveVariantMode, } from "../../_vendor/ailf-core/index.js";
 import { computePreflight } from "./compute-preflight.js";
+import { extractCodeBlocks } from "./extract-code-blocks.js";
 /** Map a per-row preflight key to a stable string for in-memory lookup. */
 export function preflightKey(key) {
     return `${key.run}/${key.mode}/${key.task}/${key.model}`;
@@ -78,10 +79,15 @@ export async function emitSymbolPreflight(input) {
         const output = row.response?.output;
         if (typeof output !== "string" || output.length === 0)
             continue;
+        // Candidate outputs are typically markdown — prose with fenced code
+        // blocks. Feed `parseImports` only the TS/JS source so it doesn't
+        // choke on the surrounding markdown. Falls back to the raw text
+        // when no recognized fences are present.
+        const code = extractCodeBlocks(output);
         let report;
         try {
             report = await computePreflight({
-                code: output,
+                code,
                 candidate: { taskId: axisTask, testIndex: i },
                 packageSurface: input.packageSurface,
                 resolver: input.resolver,

package/dist/pipeline/preflight/extract-code-blocks.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+/**
+ * extract-code-blocks — pull TS/TSX/JS/JSX source out of a candidate
+ * response that's typically structured as markdown prose plus one or
+ * more fenced code blocks.
+ *
+ * AI candidates almost always wrap their code in triple-backtick fences
+ * with language hints (`\`\`\`tsx`, `\`\`\`typescript`, …). Feeding that
+ * raw text to `parseImports` (which calls a strict TS parser) extracts
+ * zero imports because the markdown surrounds — and breaks — the parse.
+ *
+ * Strategy: pull every fenced block whose language hint is in the
+ * allow-list (or unset, which we treat as TS-compatible by convention),
+ * concatenate the contents with blank-line separators, and return that.
+ * When the input has no recognized fences, return it unchanged so a
+ * raw-code candidate still parses.
+ *
+ * Out of scope: indented (4-space) code blocks. The corpus uses fenced
+ * blocks exclusively, and supporting indented blocks would require a
+ * markdown parser; the candidate-corpus reality doesn't justify it.
+ */
+export declare function extractCodeBlocks(text: string): string;

package/dist/pipeline/preflight/extract-code-blocks.js ADDED Viewed

@@ -0,0 +1,48 @@
+/**
+ * extract-code-blocks — pull TS/TSX/JS/JSX source out of a candidate
+ * response that's typically structured as markdown prose plus one or
+ * more fenced code blocks.
+ *
+ * AI candidates almost always wrap their code in triple-backtick fences
+ * with language hints (`\`\`\`tsx`, `\`\`\`typescript`, …). Feeding that
+ * raw text to `parseImports` (which calls a strict TS parser) extracts
+ * zero imports because the markdown surrounds — and breaks — the parse.
+ *
+ * Strategy: pull every fenced block whose language hint is in the
+ * allow-list (or unset, which we treat as TS-compatible by convention),
+ * concatenate the contents with blank-line separators, and return that.
+ * When the input has no recognized fences, return it unchanged so a
+ * raw-code candidate still parses.
+ *
+ * Out of scope: indented (4-space) code blocks. The corpus uses fenced
+ * blocks exclusively, and supporting indented blocks would require a
+ * markdown parser; the candidate-corpus reality doesn't justify it.
+ */
+const ALLOWED_LANGS = new Set([
+    "tsx",
+    "ts",
+    "typescript",
+    "jsx",
+    "js",
+    "javascript",
+    "",
+]);
+const FENCE_RE = /^([ \t]*)```([^\n`]*)\n([\s\S]*?)\n[ \t]*```\s*$/gm;
+export function extractCodeBlocks(text) {
+    let sawAnyFence = false;
+    const blocks = [];
+    for (const match of text.matchAll(FENCE_RE)) {
+        sawAnyFence = true;
+        const lang = (match[2] ?? "").trim().toLowerCase();
+        if (!ALLOWED_LANGS.has(lang))
+            continue;
+        blocks.push(match[3] ?? "");
+    }
+    // No fences at all → assume the candidate emitted raw source.
+    if (!sawAnyFence)
+        return text;
+    // Fences were present but none in a JS/TS-family language.
+    if (blocks.length === 0)
+        return "";
+    return blocks.join("\n\n");
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "4.3.0",
+  "version": "4.4.0",
   "private": false,
   "publishConfig": {
     "access": "public"