npm - @sanity/ailf - Versions diffs - 0.1.25 → 0.1.27 - Mend

@sanity/ailf 0.1.25 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/_vendor/ailf-core/types/index.d.ts +6 -0
package/dist/orchestration/steps/calculate-scores-step.d.ts +2 -2
package/dist/orchestration/steps/calculate-scores-step.js +23 -1
package/dist/orchestration/steps/run-eval-step.js +4 -0
package/dist/webhook/eval-request-handler.d.ts +37 -10
package/dist/webhook/eval-request-handler.js +97 -62
package/package.json +1 -1

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -438,6 +438,12 @@ export interface PipelineState {
     evalFingerprint?: string;
     /** Promptfoo share URLs produced by RunEvalStep, consumed by PublishReportStep */
     promptfooUrls?: PromptfooUrlEntry[];
+    /**
+     * Eval modes that were satisfied by a remote cache hit (score-summary.json
+     * was restored from the Content Lake). Produced by RunEvalStep, consumed by
+     * CalculateScoresStep to skip re-calculation when all required modes are cached.
+     */
+    remoteCacheHits?: Set<string>;
     /**
      * Release auto-scope metadata. Set by FetchDocsStep when a perspective
      * is active and release impact identifies affected documents.

package/dist/orchestration/steps/calculate-scores-step.d.ts CHANGED Viewed

@@ -4,10 +4,10 @@
  * Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
  * typed options derived from AppContext. No env bridge needed.
  */
-import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
+import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
 export declare class CalculateScoresStep implements PipelineStep {
     readonly name = "calculate-scores";
     check(): ValidationIssue[];
-    execute(ctx: AppContext): Promise<StepResult>;
+    execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
     cacheInputs(ctx: AppContext): string[];
 }

package/dist/orchestration/steps/calculate-scores-step.js CHANGED Viewed

@@ -5,6 +5,7 @@
  * typed options derived from AppContext. No env bridge needed.
  */
 import { join } from "path";
+import { FULL_MODE_SUBMODES } from "../../_vendor/ailf-shared/index.js";
 import { getStepInputPaths } from "../../pipeline/cache.js";
 import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
 import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
@@ -16,8 +17,29 @@ export class CalculateScoresStep {
     check() {
         return [];
     }
-    async execute(ctx) {
+    async execute(ctx, state) {
         const start = Date.now();
+        // When all required eval modes were satisfied by remote cache hits,
+        // score-summary.json was already restored from the cached report.
+        // Skip re-calculation — the raw eval-results files don't exist.
+        if (state.remoteCacheHits?.size) {
+            const requiredModes = ctx.config.mode === "full"
+                ? [...FULL_MODE_SUBMODES]
+                : [ctx.config.mode];
+            const allCached = requiredModes.every((m) => state.remoteCacheHits.has(m));
+            if (allCached) {
+                // Verify the restored score-summary.json is valid
+                const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
+                const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
+                if (summaryErrors.length === 0) {
+                    return {
+                        reason: "Remote cache hit — score-summary.json restored from cached report",
+                        status: "skipped",
+                    };
+                }
+                // If the summary is invalid, fall through to normal calculation
+            }
+        }
         const primaryMode = ctx.config.mode === "full"
             ? "baseline"
             : ctx.config.mode;

package/dist/orchestration/steps/run-eval-step.js CHANGED Viewed

@@ -102,6 +102,10 @@ export class RunEvalStep {
             ctx.reportStore) {
             const remoteCacheResult = await checkRemoteCache(evalFingerprint, ctx.reportStore, rootDir);
             if (remoteCacheResult) {
+                // Record the cache hit so CalculateScoresStep can skip when all
+                // required eval modes were satisfied from the remote cache.
+                state.remoteCacheHits ??= new Set();
+                state.remoteCacheHits.add(this.mode);
                 return {
                     durationMs: Date.now() - start,
                     status: "success",

package/dist/webhook/eval-request-handler.d.ts CHANGED Viewed

@@ -3,23 +3,44 @@
  *
  * Server-side handler for `ailf.evalRequest` documents from the Sanity
  * Content Lake. This is the counterpart to the Studio's "Request Evaluation"
- * action — when a content editor creates an eval request document via the
- * Studio UI, a Sanity webhook fires and calls this handler.
+ * and "Run Task Eval" actions — when a content editor creates an eval
+ * request document via the Studio UI, a Sanity webhook fires and calls
+ * this handler.
  *
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
  * functions, Express, Hono, etc.
  *
+ * Supports two scoping modes:
+ * - **Release-scoped** — requires `perspective` field
+ * - **Task-scoped** — requires `tasks` array (optionally with `areas`)
+ *
+ * At least one of `perspective` or `tasks` must be present.
+ *
  * Flow:
  * 1. Receive eval request payload (from Sanity webhook projection)
  * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
- *    non-empty `perspective`
- * 3. Dispatch a full evaluation to GitHub Actions via `repository_dispatch`
- *    with `external-eval` event type and release-scoped client payload
+ *    with either `perspective` or `tasks`
+ * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
+ *    with `external-eval` event type and scoped client payload
  * 4. On success: PATCH the eval request document → `status: "dispatched"`
  * 5. On failure: PATCH the eval request document → `status: "failed"` + error
  * 6. Return a structured result
  *
- * @see packages/studio/src/actions/dispatch.ts — Studio-side dispatch
+ * ## Sanity Manage Webhook Configuration
+ *
+ * The Sanity webhook projection MUST include all fields consumed by
+ * EvalRequestPayload. The recommended projection is `{...}` (spread),
+ * which passes the entire document and avoids silently dropping fields
+ * when the schema evolves.
+ *
+ * Recommended projection:
+ * ```
+ * {...}
+ * ```
+ *
+ * @see packages/api/src/routes/webhooks.ts — API gateway webhook handler
+ * @see packages/studio/src/actions/RunEvaluationAction.tsx — release eval
+ * @see packages/studio/src/actions/RunTaskEvaluationAction.tsx — task eval
  * @see .github/workflows/external-eval.yml — receiving workflow
  * @see docs/design-docs/report-store/visibility-workflows.md
  */
@@ -29,24 +50,30 @@ export interface EvalRequestPayload {
     _id: string;
     /** The Sanity document _type (should be "ailf.evalRequest") */
     _type: string;
+    /** Feature areas to scope the evaluation (task-scoped evals) */
+    areas?: string[];
     /** Sanity dataset */
     dataset: string;
+    /** Run in debug mode */
+    debug?: boolean;
     /** Error message (only if status is "failed") */
     error?: string;
     /** Evaluation mode */
     mode: string;
-    /** Content release perspective ID */
-    perspective: string;
+    /** Content release perspective ID (release-scoped evals) */
+    perspective?: string;
     /** Sanity project ID */
     projectId: string;
     /** ISO datetime of when the request was created */
     requestedAt: string;
     /** User ID who requested */
     requestedBy?: string;
-    /** Publish tag */
-    tag?: string;
     /** Request status */
     status: string;
+    /** Publish tag */
+    tag?: string;
+    /** Specific task IDs to evaluate (task-scoped evals) */
+    tasks?: string[];
 }
 /** Configuration for the eval request handler. */
 export interface EvalRequestHandlerConfig {

package/dist/webhook/eval-request-handler.js CHANGED Viewed

@@ -3,23 +3,44 @@
  *
  * Server-side handler for `ailf.evalRequest` documents from the Sanity
  * Content Lake. This is the counterpart to the Studio's "Request Evaluation"
- * action — when a content editor creates an eval request document via the
- * Studio UI, a Sanity webhook fires and calls this handler.
+ * and "Run Task Eval" actions — when a content editor creates an eval
+ * request document via the Studio UI, a Sanity webhook fires and calls
+ * this handler.
  *
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
  * functions, Express, Hono, etc.
  *
+ * Supports two scoping modes:
+ * - **Release-scoped** — requires `perspective` field
+ * - **Task-scoped** — requires `tasks` array (optionally with `areas`)
+ *
+ * At least one of `perspective` or `tasks` must be present.
+ *
  * Flow:
  * 1. Receive eval request payload (from Sanity webhook projection)
  * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
- *    non-empty `perspective`
- * 3. Dispatch a full evaluation to GitHub Actions via `repository_dispatch`
- *    with `external-eval` event type and release-scoped client payload
+ *    with either `perspective` or `tasks`
+ * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
+ *    with `external-eval` event type and scoped client payload
  * 4. On success: PATCH the eval request document → `status: "dispatched"`
  * 5. On failure: PATCH the eval request document → `status: "failed"` + error
  * 6. Return a structured result
  *
- * @see packages/studio/src/actions/dispatch.ts — Studio-side dispatch
+ * ## Sanity Manage Webhook Configuration
+ *
+ * The Sanity webhook projection MUST include all fields consumed by
+ * EvalRequestPayload. The recommended projection is `{...}` (spread),
+ * which passes the entire document and avoids silently dropping fields
+ * when the schema evolves.
+ *
+ * Recommended projection:
+ * ```
+ * {...}
+ * ```
+ *
+ * @see packages/api/src/routes/webhooks.ts — API gateway webhook handler
+ * @see packages/studio/src/actions/RunEvaluationAction.tsx — release eval
+ * @see packages/studio/src/actions/RunTaskEvaluationAction.tsx — task eval
  * @see .github/workflows/external-eval.yml — receiving workflow
  * @see docs/design-docs/report-store/visibility-workflows.md
  */
@@ -53,98 +74,112 @@ const GITHUB_API = "https://api.github.com";
 export async function handleEvalRequest(payload, config) {
     const requestId = payload._id ?? "unknown";
     // -------------------------------------------------------------------------
-    // 1. Validate payload
+    // 1. Create Sanity client early so validation failures can mark the
+    //    document as "failed" instead of leaving it stuck at "pending".
+    // -------------------------------------------------------------------------
+    const client = payload.projectId && payload.dataset
+        ? createClient({
+            apiVersion: "2026-03-11",
+            dataset: payload.dataset,
+            projectId: payload.projectId,
+            token: config.sanityToken,
+            useCdn: false,
+        })
+        : null;
+    // Helper: mark the eval request as failed in the Content Lake so the
+    // Studio UI can show the error instead of polling forever.
+    async function markFailed(errorMessage) {
+        if (client && payload._id) {
+            try {
+                await client
+                    .patch(payload._id)
+                    .set({ error: errorMessage, status: "failed" })
+                    .commit();
+            }
+            catch (err) {
+                console.warn(`  ⚠️  Failed to update document with error status: ${err instanceof Error ? err.message : String(err)}`);
+            }
+        }
+        return { error: errorMessage, ok: false, requestId };
+    }
+    // -------------------------------------------------------------------------
+    // 2. Validate payload
     // -------------------------------------------------------------------------
     if (payload._type !== "ailf.evalRequest") {
-        return {
-            error: `Unexpected document type: "${payload._type}" (expected "ailf.evalRequest")`,
-            ok: false,
-            requestId,
-        };
+        return markFailed(`Unexpected document type: "${payload._type}" (expected "ailf.evalRequest")`);
     }
     if (payload.status !== "pending") {
+        // Don't mark as failed — it's already in a non-pending state
         return {
             error: `Eval request is not pending (status: "${payload.status}")`,
             ok: false,
             requestId,
         };
     }
-    if (!payload.perspective) {
-        return {
-            error: "Missing required field: perspective",
-            ok: false,
-            requestId,
-        };
+    const hasPerspective = !!payload.perspective;
+    const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
+    if (!hasPerspective && !hasTasks) {
+        return markFailed("Missing required field: perspective or tasks. " +
+            "Provide a content release perspective for release evals, " +
+            "or a tasks array for task-scoped evals.");
     }
     // -------------------------------------------------------------------------
-    // 2. Dispatch evaluation via GitHub Actions
+    // 3. Dispatch evaluation via GitHub Actions
     // -------------------------------------------------------------------------
     const repo = config.githubRepo ?? DEFAULT_REPO;
     const dispatchResult = await dispatchGitHubEval(repo, payload, config);
     // -------------------------------------------------------------------------
-    // 3. Update eval request document status
+    // 4. Update eval request document status
     // -------------------------------------------------------------------------
-    const client = createClient({
-        apiVersion: "2026-03-11",
-        dataset: payload.dataset,
-        projectId: payload.projectId,
-        token: config.sanityToken,
-        useCdn: false,
-    });
     if (dispatchResult.ok) {
-        try {
-            await client
-                .patch(payload._id)
-                .set({
-                dispatchedAt: new Date().toISOString(),
-                status: "dispatched",
-            })
-                .commit();
-        }
-        catch (err) {
-            // Dispatch succeeded but status update failed — log and still return ok
-            console.warn(`  ⚠️  Dispatch succeeded but failed to update document status: ${err instanceof Error ? err.message : String(err)}`);
+        if (client) {
+            try {
+                await client
+                    .patch(payload._id)
+                    .set({
+                    dispatchedAt: new Date().toISOString(),
+                    status: "dispatched",
+                })
+                    .commit();
+            }
+            catch (err) {
+                // Dispatch succeeded but status update failed — log and still return ok
+                console.warn(`  ⚠️  Dispatch succeeded but failed to update document status: ${err instanceof Error ? err.message : String(err)}`);
+            }
         }
         return { ok: true, requestId };
     }
     // Dispatch failed — mark the document as failed
-    try {
-        await client
-            .patch(payload._id)
-            .set({
-            error: dispatchResult.error ?? "Unknown dispatch error",
-            status: "failed",
-        })
-            .commit();
-    }
-    catch (err) {
-        console.warn(`  ⚠️  Failed to update document with error status: ${err instanceof Error ? err.message : String(err)}`);
-    }
-    return {
-        error: dispatchResult.error,
-        ok: false,
-        requestId,
-    };
+    return markFailed(dispatchResult.error ?? "Unknown dispatch error");
 }
 /**
- * Dispatch a release-scoped evaluation via GitHub Actions repository_dispatch.
+ * Dispatch an evaluation via GitHub Actions repository_dispatch.
  *
- * Uses the `external-eval` event type with a client_payload conforming to
- * PipelineRequestSchema. The workflow passes it directly to the CLI via
- * `--config` without field translation.
+ * Supports both release-scoped (perspective) and task-scoped (tasks/areas)
+ * evaluations. Uses the `external-eval` event type with a client_payload
+ * conforming to PipelineRequestSchema. The workflow passes it directly to
+ * the CLI via `--config` without field translation.
  */
 async function dispatchGitHubEval(repo, payload, config) {
     const url = `${GITHUB_API}/repos/${repo}/dispatches`;
+    const hasPerspective = !!payload.perspective;
+    const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
+    const hasAreas = Array.isArray(payload.areas) && payload.areas.length > 0;
     const body = {
         client_payload: {
             caller_repo: "sanity-io/www-sanity-io",
             dataset: payload.dataset,
             mode: payload.mode,
-            perspective: payload.perspective,
             projectId: payload.projectId,
             publish: true,
-            ...(payload.tag ? { publishTag: payload.tag } : {}),
             source: "production",
+            // Release-scoped fields
+            ...(hasPerspective ? { perspective: payload.perspective } : {}),
+            // Task-scoped fields
+            ...(hasTasks ? { tasks: payload.tasks } : {}),
+            ...(hasAreas ? { areas: payload.areas } : {}),
+            ...(payload.debug ? { debug: true } : {}),
+            ...(payload.tag ? { publishTag: payload.tag } : {}),
         },
         event_type: "external-eval",
     };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "0.1.25",
+  "version": "0.1.27",
   "private": false,
   "publishConfig": {
     "access": "restricted"