@sanity/ailf 0.1.25 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -438,6 +438,12 @@ export interface PipelineState {
438
438
  evalFingerprint?: string;
439
439
  /** Promptfoo share URLs produced by RunEvalStep, consumed by PublishReportStep */
440
440
  promptfooUrls?: PromptfooUrlEntry[];
441
+ /**
442
+ * Eval modes that were satisfied by a remote cache hit (score-summary.json
443
+ * was restored from the Content Lake). Produced by RunEvalStep, consumed by
444
+ * CalculateScoresStep to skip re-calculation when all required modes are cached.
445
+ */
446
+ remoteCacheHits?: Set<string>;
441
447
  /**
442
448
  * Release auto-scope metadata. Set by FetchDocsStep when a perspective
443
449
  * is active and release impact identifies affected documents.
@@ -4,10 +4,10 @@
4
4
  * Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
5
5
  * typed options derived from AppContext. No env bridge needed.
6
6
  */
7
- import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
7
+ import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
8
  export declare class CalculateScoresStep implements PipelineStep {
9
9
  readonly name = "calculate-scores";
10
10
  check(): ValidationIssue[];
11
- execute(ctx: AppContext): Promise<StepResult>;
11
+ execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
12
12
  cacheInputs(ctx: AppContext): string[];
13
13
  }
@@ -5,6 +5,7 @@
5
5
  * typed options derived from AppContext. No env bridge needed.
6
6
  */
7
7
  import { join } from "path";
8
+ import { FULL_MODE_SUBMODES } from "../../_vendor/ailf-shared/index.js";
8
9
  import { getStepInputPaths } from "../../pipeline/cache.js";
9
10
  import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
10
11
  import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
@@ -16,8 +17,29 @@ export class CalculateScoresStep {
16
17
  check() {
17
18
  return [];
18
19
  }
19
- async execute(ctx) {
20
+ async execute(ctx, state) {
20
21
  const start = Date.now();
22
+ // When all required eval modes were satisfied by remote cache hits,
23
+ // score-summary.json was already restored from the cached report.
24
+ // Skip re-calculation — the raw eval-results files don't exist.
25
+ if (state.remoteCacheHits?.size) {
26
+ const requiredModes = ctx.config.mode === "full"
27
+ ? [...FULL_MODE_SUBMODES]
28
+ : [ctx.config.mode];
29
+ const allCached = requiredModes.every((m) => state.remoteCacheHits.has(m));
30
+ if (allCached) {
31
+ // Verify the restored score-summary.json is valid
32
+ const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
33
+ const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
34
+ if (summaryErrors.length === 0) {
35
+ return {
36
+ reason: "Remote cache hit — score-summary.json restored from cached report",
37
+ status: "skipped",
38
+ };
39
+ }
40
+ // If the summary is invalid, fall through to normal calculation
41
+ }
42
+ }
21
43
  const primaryMode = ctx.config.mode === "full"
22
44
  ? "baseline"
23
45
  : ctx.config.mode;
@@ -102,6 +102,10 @@ export class RunEvalStep {
102
102
  ctx.reportStore) {
103
103
  const remoteCacheResult = await checkRemoteCache(evalFingerprint, ctx.reportStore, rootDir);
104
104
  if (remoteCacheResult) {
105
+ // Record the cache hit so CalculateScoresStep can skip when all
106
+ // required eval modes were satisfied from the remote cache.
107
+ state.remoteCacheHits ??= new Set();
108
+ state.remoteCacheHits.add(this.mode);
105
109
  return {
106
110
  durationMs: Date.now() - start,
107
111
  status: "success",
@@ -3,23 +3,44 @@
3
3
  *
4
4
  * Server-side handler for `ailf.evalRequest` documents from the Sanity
5
5
  * Content Lake. This is the counterpart to the Studio's "Request Evaluation"
6
- * action — when a content editor creates an eval request document via the
7
- * Studio UI, a Sanity webhook fires and calls this handler.
6
+ * and "Run Task Eval" actions — when a content editor creates an eval
7
+ * request document via the Studio UI, a Sanity webhook fires and calls
8
+ * this handler.
8
9
  *
9
10
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
10
11
  * functions, Express, Hono, etc.
11
12
  *
13
+ * Supports two scoping modes:
14
+ * - **Release-scoped** — requires `perspective` field
15
+ * - **Task-scoped** — requires `tasks` array (optionally with `areas`)
16
+ *
17
+ * At least one of `perspective` or `tasks` must be present.
18
+ *
12
19
  * Flow:
13
20
  * 1. Receive eval request payload (from Sanity webhook projection)
14
21
  * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
15
- * non-empty `perspective`
16
- * 3. Dispatch a full evaluation to GitHub Actions via `repository_dispatch`
17
- * with `external-eval` event type and release-scoped client payload
22
+ * with either `perspective` or `tasks`
23
+ * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
24
+ * with `external-eval` event type and scoped client payload
18
25
  * 4. On success: PATCH the eval request document → `status: "dispatched"`
19
26
  * 5. On failure: PATCH the eval request document → `status: "failed"` + error
20
27
  * 6. Return a structured result
21
28
  *
22
- * @see packages/studio/src/actions/dispatch.ts Studio-side dispatch
29
+ * ## Sanity Manage Webhook Configuration
30
+ *
31
+ * The Sanity webhook projection MUST include all fields consumed by
32
+ * EvalRequestPayload. The recommended projection is `{...}` (spread),
33
+ * which passes the entire document and avoids silently dropping fields
34
+ * when the schema evolves.
35
+ *
36
+ * Recommended projection:
37
+ * ```
38
+ * {...}
39
+ * ```
40
+ *
41
+ * @see packages/api/src/routes/webhooks.ts — API gateway webhook handler
42
+ * @see packages/studio/src/actions/RunEvaluationAction.tsx — release eval
43
+ * @see packages/studio/src/actions/RunTaskEvaluationAction.tsx — task eval
23
44
  * @see .github/workflows/external-eval.yml — receiving workflow
24
45
  * @see docs/design-docs/report-store/visibility-workflows.md
25
46
  */
@@ -29,24 +50,30 @@ export interface EvalRequestPayload {
29
50
  _id: string;
30
51
  /** The Sanity document _type (should be "ailf.evalRequest") */
31
52
  _type: string;
53
+ /** Feature areas to scope the evaluation (task-scoped evals) */
54
+ areas?: string[];
32
55
  /** Sanity dataset */
33
56
  dataset: string;
57
+ /** Run in debug mode */
58
+ debug?: boolean;
34
59
  /** Error message (only if status is "failed") */
35
60
  error?: string;
36
61
  /** Evaluation mode */
37
62
  mode: string;
38
- /** Content release perspective ID */
39
- perspective: string;
63
+ /** Content release perspective ID (release-scoped evals) */
64
+ perspective?: string;
40
65
  /** Sanity project ID */
41
66
  projectId: string;
42
67
  /** ISO datetime of when the request was created */
43
68
  requestedAt: string;
44
69
  /** User ID who requested */
45
70
  requestedBy?: string;
46
- /** Publish tag */
47
- tag?: string;
48
71
  /** Request status */
49
72
  status: string;
73
+ /** Publish tag */
74
+ tag?: string;
75
+ /** Specific task IDs to evaluate (task-scoped evals) */
76
+ tasks?: string[];
50
77
  }
51
78
  /** Configuration for the eval request handler. */
52
79
  export interface EvalRequestHandlerConfig {
@@ -3,23 +3,44 @@
3
3
  *
4
4
  * Server-side handler for `ailf.evalRequest` documents from the Sanity
5
5
  * Content Lake. This is the counterpart to the Studio's "Request Evaluation"
6
- * action — when a content editor creates an eval request document via the
7
- * Studio UI, a Sanity webhook fires and calls this handler.
6
+ * and "Run Task Eval" actions — when a content editor creates an eval
7
+ * request document via the Studio UI, a Sanity webhook fires and calls
8
+ * this handler.
8
9
  *
9
10
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
10
11
  * functions, Express, Hono, etc.
11
12
  *
13
+ * Supports two scoping modes:
14
+ * - **Release-scoped** — requires `perspective` field
15
+ * - **Task-scoped** — requires `tasks` array (optionally with `areas`)
16
+ *
17
+ * At least one of `perspective` or `tasks` must be present.
18
+ *
12
19
  * Flow:
13
20
  * 1. Receive eval request payload (from Sanity webhook projection)
14
21
  * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
15
- * non-empty `perspective`
16
- * 3. Dispatch a full evaluation to GitHub Actions via `repository_dispatch`
17
- * with `external-eval` event type and release-scoped client payload
22
+ * with either `perspective` or `tasks`
23
+ * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
24
+ * with `external-eval` event type and scoped client payload
18
25
  * 4. On success: PATCH the eval request document → `status: "dispatched"`
19
26
  * 5. On failure: PATCH the eval request document → `status: "failed"` + error
20
27
  * 6. Return a structured result
21
28
  *
22
- * @see packages/studio/src/actions/dispatch.ts Studio-side dispatch
29
+ * ## Sanity Manage Webhook Configuration
30
+ *
31
+ * The Sanity webhook projection MUST include all fields consumed by
32
+ * EvalRequestPayload. The recommended projection is `{...}` (spread),
33
+ * which passes the entire document and avoids silently dropping fields
34
+ * when the schema evolves.
35
+ *
36
+ * Recommended projection:
37
+ * ```
38
+ * {...}
39
+ * ```
40
+ *
41
+ * @see packages/api/src/routes/webhooks.ts — API gateway webhook handler
42
+ * @see packages/studio/src/actions/RunEvaluationAction.tsx — release eval
43
+ * @see packages/studio/src/actions/RunTaskEvaluationAction.tsx — task eval
23
44
  * @see .github/workflows/external-eval.yml — receiving workflow
24
45
  * @see docs/design-docs/report-store/visibility-workflows.md
25
46
  */
@@ -53,98 +74,112 @@ const GITHUB_API = "https://api.github.com";
53
74
  export async function handleEvalRequest(payload, config) {
54
75
  const requestId = payload._id ?? "unknown";
55
76
  // -------------------------------------------------------------------------
56
- // 1. Validate payload
77
+ // 1. Create Sanity client early so validation failures can mark the
78
+ // document as "failed" instead of leaving it stuck at "pending".
79
+ // -------------------------------------------------------------------------
80
+ const client = payload.projectId && payload.dataset
81
+ ? createClient({
82
+ apiVersion: "2026-03-11",
83
+ dataset: payload.dataset,
84
+ projectId: payload.projectId,
85
+ token: config.sanityToken,
86
+ useCdn: false,
87
+ })
88
+ : null;
89
+ // Helper: mark the eval request as failed in the Content Lake so the
90
+ // Studio UI can show the error instead of polling forever.
91
+ async function markFailed(errorMessage) {
92
+ if (client && payload._id) {
93
+ try {
94
+ await client
95
+ .patch(payload._id)
96
+ .set({ error: errorMessage, status: "failed" })
97
+ .commit();
98
+ }
99
+ catch (err) {
100
+ console.warn(` ⚠️ Failed to update document with error status: ${err instanceof Error ? err.message : String(err)}`);
101
+ }
102
+ }
103
+ return { error: errorMessage, ok: false, requestId };
104
+ }
105
+ // -------------------------------------------------------------------------
106
+ // 2. Validate payload
57
107
  // -------------------------------------------------------------------------
58
108
  if (payload._type !== "ailf.evalRequest") {
59
- return {
60
- error: `Unexpected document type: "${payload._type}" (expected "ailf.evalRequest")`,
61
- ok: false,
62
- requestId,
63
- };
109
+ return markFailed(`Unexpected document type: "${payload._type}" (expected "ailf.evalRequest")`);
64
110
  }
65
111
  if (payload.status !== "pending") {
112
+ // Don't mark as failed — it's already in a non-pending state
66
113
  return {
67
114
  error: `Eval request is not pending (status: "${payload.status}")`,
68
115
  ok: false,
69
116
  requestId,
70
117
  };
71
118
  }
72
- if (!payload.perspective) {
73
- return {
74
- error: "Missing required field: perspective",
75
- ok: false,
76
- requestId,
77
- };
119
+ const hasPerspective = !!payload.perspective;
120
+ const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
121
+ if (!hasPerspective && !hasTasks) {
122
+ return markFailed("Missing required field: perspective or tasks. " +
123
+ "Provide a content release perspective for release evals, " +
124
+ "or a tasks array for task-scoped evals.");
78
125
  }
79
126
  // -------------------------------------------------------------------------
80
- // 2. Dispatch evaluation via GitHub Actions
127
+ // 3. Dispatch evaluation via GitHub Actions
81
128
  // -------------------------------------------------------------------------
82
129
  const repo = config.githubRepo ?? DEFAULT_REPO;
83
130
  const dispatchResult = await dispatchGitHubEval(repo, payload, config);
84
131
  // -------------------------------------------------------------------------
85
- // 3. Update eval request document status
132
+ // 4. Update eval request document status
86
133
  // -------------------------------------------------------------------------
87
- const client = createClient({
88
- apiVersion: "2026-03-11",
89
- dataset: payload.dataset,
90
- projectId: payload.projectId,
91
- token: config.sanityToken,
92
- useCdn: false,
93
- });
94
134
  if (dispatchResult.ok) {
95
- try {
96
- await client
97
- .patch(payload._id)
98
- .set({
99
- dispatchedAt: new Date().toISOString(),
100
- status: "dispatched",
101
- })
102
- .commit();
103
- }
104
- catch (err) {
105
- // Dispatch succeeded but status update failed — log and still return ok
106
- console.warn(` ⚠️ Dispatch succeeded but failed to update document status: ${err instanceof Error ? err.message : String(err)}`);
135
+ if (client) {
136
+ try {
137
+ await client
138
+ .patch(payload._id)
139
+ .set({
140
+ dispatchedAt: new Date().toISOString(),
141
+ status: "dispatched",
142
+ })
143
+ .commit();
144
+ }
145
+ catch (err) {
146
+ // Dispatch succeeded but status update failed log and still return ok
147
+ console.warn(` ⚠️ Dispatch succeeded but failed to update document status: ${err instanceof Error ? err.message : String(err)}`);
148
+ }
107
149
  }
108
150
  return { ok: true, requestId };
109
151
  }
110
152
  // Dispatch failed — mark the document as failed
111
- try {
112
- await client
113
- .patch(payload._id)
114
- .set({
115
- error: dispatchResult.error ?? "Unknown dispatch error",
116
- status: "failed",
117
- })
118
- .commit();
119
- }
120
- catch (err) {
121
- console.warn(` ⚠️ Failed to update document with error status: ${err instanceof Error ? err.message : String(err)}`);
122
- }
123
- return {
124
- error: dispatchResult.error,
125
- ok: false,
126
- requestId,
127
- };
153
+ return markFailed(dispatchResult.error ?? "Unknown dispatch error");
128
154
  }
129
155
  /**
130
- * Dispatch a release-scoped evaluation via GitHub Actions repository_dispatch.
156
+ * Dispatch an evaluation via GitHub Actions repository_dispatch.
131
157
  *
132
- * Uses the `external-eval` event type with a client_payload conforming to
133
- * PipelineRequestSchema. The workflow passes it directly to the CLI via
134
- * `--config` without field translation.
158
+ * Supports both release-scoped (perspective) and task-scoped (tasks/areas)
159
+ * evaluations. Uses the `external-eval` event type with a client_payload
160
+ * conforming to PipelineRequestSchema. The workflow passes it directly to
161
+ * the CLI via `--config` without field translation.
135
162
  */
136
163
  async function dispatchGitHubEval(repo, payload, config) {
137
164
  const url = `${GITHUB_API}/repos/${repo}/dispatches`;
165
+ const hasPerspective = !!payload.perspective;
166
+ const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
167
+ const hasAreas = Array.isArray(payload.areas) && payload.areas.length > 0;
138
168
  const body = {
139
169
  client_payload: {
140
170
  caller_repo: "sanity-io/www-sanity-io",
141
171
  dataset: payload.dataset,
142
172
  mode: payload.mode,
143
- perspective: payload.perspective,
144
173
  projectId: payload.projectId,
145
174
  publish: true,
146
- ...(payload.tag ? { publishTag: payload.tag } : {}),
147
175
  source: "production",
176
+ // Release-scoped fields
177
+ ...(hasPerspective ? { perspective: payload.perspective } : {}),
178
+ // Task-scoped fields
179
+ ...(hasTasks ? { tasks: payload.tasks } : {}),
180
+ ...(hasAreas ? { areas: payload.areas } : {}),
181
+ ...(payload.debug ? { debug: true } : {}),
182
+ ...(payload.tag ? { publishTag: payload.tag } : {}),
148
183
  },
149
184
  event_type: "external-eval",
150
185
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "0.1.25",
3
+ "version": "0.1.27",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "restricted"