@sanity/ailf 6.1.2 → 7.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Re-export of the build-generated help-topic table.
3
+ *
4
+ * The underlying file `src/generated/help-content.ts` is emitted by
5
+ * `scripts/extract-help.ts` and is gitignored. Run `pnpm extract-help`
6
+ * (invoked automatically by this package's `prebuild`) to (re)generate it.
7
+ *
8
+ * @see scripts/extract-help.ts
9
+ */
10
+ export { HELP_TOPICS } from "./generated/help-content.js";
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Help topic type — extracted from `:::help` remark directives in
3
+ * `docs/**\/*.md` by `scripts/extract-help.ts`. Consumed by both the
4
+ * Studio plugin and the App SDK dashboard.
5
+ *
6
+ * The extraction script emits `src/generated/help-content.ts` (gitignored)
7
+ * within this package. Consumers import `HELP_TOPICS` from the package
8
+ * barrel rather than reaching into the generated path directly.
9
+ *
10
+ * @see scripts/extract-help.ts
11
+ * @see docs/design-docs/contextual-help-sidebar.md
12
+ */
13
+ export interface HelpTopic {
14
+ /** URL-safe identifier — matches the #id in the :::help directive */
15
+ id: string;
16
+ /** Display title shown in the drawer header */
17
+ title: string;
18
+ /** Markdown body content (rendered in the drawer) */
19
+ body: string;
20
+ /** Source file path (for debugging / "Edit this page" links) */
21
+ source: string;
22
+ /** Related topic IDs — rendered as "See also" links */
23
+ related?: string[];
24
+ /** Tags for search/filtering */
25
+ tags?: string[];
26
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -21,6 +21,10 @@ export { computeCanaryDrift, type CanaryDriftReport, type CanaryReportSlim, type
21
21
  export { type DocumentRef } from "./document-ref.js";
22
22
  export { makeEditorialReference, type EditorialReference, type MakeEditorialReferenceArgs, } from "./editorial-reference.js";
23
23
  export { FEATURE_FLAGS, type FeatureFlag, type FeatureFlagKey, } from "./feature-flags.js";
24
+ export { DEFAULT_GCS_ARTIFACT_BUCKET } from "./gcs-defaults.js";
25
+ export { GLOSSARY, type GlossaryEntry, type GlossarySlug } from "./glossary.js";
26
+ export { HELP_TOPICS } from "./help-content.js";
27
+ export { type HelpTopic } from "./help-topics.js";
24
28
  export { GRADE_BOUNDARIES, scoreGrade, type ScoreGrade, } from "./score-grades.js";
25
29
  export { NOISE_THRESHOLD } from "./noise-threshold.js";
26
30
  export { CANONICAL_EVAL_MODES, LEGACY_EVAL_MODE_ALIASES, LITERACY_VARIANTS, RAW_EVAL_MODES, type EvalMode, type LiteracyVariant, type RawEvalMode, } from "./eval-modes.js";
@@ -20,6 +20,9 @@
20
20
  export { computeCanaryDrift, } from "./canary-drift.js";
21
21
  export { makeEditorialReference, } from "./editorial-reference.js";
22
22
  export { FEATURE_FLAGS, } from "./feature-flags.js";
23
+ export { DEFAULT_GCS_ARTIFACT_BUCKET } from "./gcs-defaults.js";
24
+ export { GLOSSARY } from "./glossary.js";
25
+ export { HELP_TOPICS } from "./help-content.js";
23
26
  export { GRADE_BOUNDARIES, scoreGrade, } from "./score-grades.js";
24
27
  export { NOISE_THRESHOLD } from "./noise-threshold.js";
25
28
  export { CANONICAL_EVAL_MODES, LEGACY_EVAL_MODE_ALIASES, LITERACY_VARIANTS, RAW_EVAL_MODES, } from "./eval-modes.js";
@@ -18,6 +18,7 @@
18
18
  import { promises as fs } from "node:fs";
19
19
  import path from "node:path";
20
20
  import { ARTIFACT_EXPORT_PHASE_ID, DIAGNOSIS_CARD_GENERATORS, InMemoryPluginRegistry, NoOpArtifactWriter, NoOpProgressReporter, createDiagnosisRunner, createLLMClient, generateRunId, isArtifactType, modelId, } from "./_vendor/ailf-core/index.js";
21
+ import { DEFAULT_GCS_ARTIFACT_BUCKET } from "./_vendor/ailf-shared/index.js";
21
22
  import { JudgmentAttributionSchema } from "./adapters/attribution/per-entry-attribution-writer.js";
22
23
  import { AccumulatingArtifactWriter } from "./artifact-capture/accumulating-artifact-writer.js";
23
24
  import { ApiGatewayArtifactWriter } from "./artifact-capture/api-gateway-artifact-writer.js";
@@ -162,12 +163,13 @@ function createProgressReporter() {
162
163
  });
163
164
  }
164
165
  /**
165
- * Shared GCS bucket for report artifacts. Matches the gateway default at
166
- * packages/api/src/routes/artifacts.ts — both sides assume ailf-artifacts
167
- * unless explicitly overridden. The gateway's signing credentials are scoped
168
- * to this bucket, so alternate names require reconfiguring the gateway.
166
+ * Shared GCS bucket for report artifacts. Canonical default lives in
167
+ * `@sanity/ailf-shared` so the gateway (`packages/api/src/routes/runs.ts`)
168
+ * and the dashboard read the same value. The gateway's signing credentials
169
+ * are scoped to this bucket, so alternate names require reconfiguring the
170
+ * gateway in addition to setting `AILF_GCS_ARTIFACT_BUCKET`.
169
171
  */
170
- const DEFAULT_ARTIFACT_BUCKET = "ailf-artifacts";
172
+ const DEFAULT_ARTIFACT_BUCKET = DEFAULT_GCS_ARTIFACT_BUCKET;
171
173
  /**
172
174
  * D0033 M4 default root for local artifacts when `--artifacts-dir` is unset.
173
175
  * Mirrors the pre-W0050 capture root so existing dev tooling (Studio
@@ -10,12 +10,24 @@
10
10
  */
11
11
  import type { ArtifactManifest, RunId } from "../_vendor/ailf-core/index.d.ts";
12
12
  /**
13
- * Copy an artifact manifest verbatim and stamp `sourceRunId` on every ref.
13
+ * Copy an artifact manifest verbatim and stamp `sourceRunId` on every ref
14
+ * that doesn't already carry one.
14
15
  *
15
16
  * The ref's `path`, `bucket`, `entries`, `bytes`, `preview`, etc. travel
16
- * unchanged — they already point at the source run's storage. Only
17
- * `sourceRunId` is added so retention/GC and observability tooling can
18
- * follow the cross-run dependency.
17
+ * unchanged — they already point at the source run's storage. `sourceRunId`
18
+ * is added so retention/GC and observability tooling can follow the
19
+ * cross-run dependency.
20
+ *
21
+ * **Transitive lineage.** When a cached report's refs already carry a
22
+ * `sourceRunId` (because that report was itself a cache hit), we preserve it.
23
+ * `opts.sourceRunId` is only the *immediate* cache parent; if the cached
24
+ * report's refs already point at the ultimate source run, blindly overwriting
25
+ * would drop the lineage one hop per cache propagation and 404 readers that
26
+ * trust `sourceRunId` for path reconstruction.
27
+ *
28
+ * Invariant maintained across any number of cache hops: every ref's
29
+ * `sourceRunId` equals the runId encoded in its `path` (= where the bytes
30
+ * physically live).
19
31
  *
20
32
  * Pure function; safe to call without side effects.
21
33
  */
@@ -9,12 +9,24 @@
9
9
  * @see docs/design-docs/cache-hit-artifact-restoration.md
10
10
  */
11
11
  /**
12
- * Copy an artifact manifest verbatim and stamp `sourceRunId` on every ref.
12
+ * Copy an artifact manifest verbatim and stamp `sourceRunId` on every ref
13
+ * that doesn't already carry one.
13
14
  *
14
15
  * The ref's `path`, `bucket`, `entries`, `bytes`, `preview`, etc. travel
15
- * unchanged — they already point at the source run's storage. Only
16
- * `sourceRunId` is added so retention/GC and observability tooling can
17
- * follow the cross-run dependency.
16
+ * unchanged — they already point at the source run's storage. `sourceRunId`
17
+ * is added so retention/GC and observability tooling can follow the
18
+ * cross-run dependency.
19
+ *
20
+ * **Transitive lineage.** When a cached report's refs already carry a
21
+ * `sourceRunId` (because that report was itself a cache hit), we preserve it.
22
+ * `opts.sourceRunId` is only the *immediate* cache parent; if the cached
23
+ * report's refs already point at the ultimate source run, blindly overwriting
24
+ * would drop the lineage one hop per cache propagation and 404 readers that
25
+ * trust `sourceRunId` for path reconstruction.
26
+ *
27
+ * Invariant maintained across any number of cache hops: every ref's
28
+ * `sourceRunId` equals the runId encoded in its `path` (= where the bytes
29
+ * physically live).
18
30
  *
19
31
  * Pure function; safe to call without side effects.
20
32
  */
@@ -23,9 +35,10 @@ export function remapToCacheHitRefs(source, opts) {
23
35
  for (const [type, ref] of Object.entries(source)) {
24
36
  if (!ref)
25
37
  continue;
38
+ const typed = ref;
26
39
  out[type] = {
27
- ...ref,
28
- sourceRunId: opts.sourceRunId,
40
+ ...typed,
41
+ sourceRunId: typed.sourceRunId ?? opts.sourceRunId,
29
42
  };
30
43
  }
31
44
  return out;
@@ -10,21 +10,25 @@
10
10
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
11
11
  * functions, Express, Hono, etc.
12
12
  *
13
- * Supports two scoping modes:
14
- * - **Release-scoped** requires `perspective` field
15
- * - **Task-scoped** requires `tasks` array (optionally with `areas`)
16
- *
17
- * At least one of `perspective` or `tasks` must be present.
13
+ * The eval-request document carries a canonical `PipelineRequest` JSON
14
+ * blob in its `pipelineRequest` field (see W0239). The handler parses it
15
+ * via `PipelineRequestSchema` from `@sanity/ailf-core` and forwards it
16
+ * to the dispatcher as-is. Scoping (release-scoped via `perspective`,
17
+ * task-scoped via `tasks`) is asserted on the parsed `PipelineRequest`
18
+ * — at least one must be present.
18
19
  *
19
20
  * Flow:
20
21
  * 1. Receive eval request payload (from Sanity webhook projection)
21
- * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
22
- * with either `perspective` or `tasks`
23
- * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
24
- * with `external-eval` event type and scoped client payload
25
- * 4. On success: PATCH the eval request document → `status: "dispatched"`
26
- * 5. On failure: PATCH the eval request document → `status: "failed"` + error
27
- * 6. Return a structured result
22
+ * 2. Validate envelope: must be `ailf.evalRequest` type, `pending` status,
23
+ * `pipelineRequest` present
24
+ * 3. Parse + Zod-validate `pipelineRequest` against `PipelineRequestSchema`
25
+ * 4. Assert scoping: parsed request must have `perspective` or `tasks`
26
+ * 5. Dispatch evaluation to GitHub Actions via `repository_dispatch`
27
+ * with `external-eval` event type the parsed `PipelineRequest`
28
+ * rides as `client_payload.request` unchanged
29
+ * 6. On success: PATCH the eval request document → `status: "dispatched"`
30
+ * 7. On failure: PATCH the eval request document → `status: "failed"` + error
31
+ * 8. Return a structured result
28
32
  *
29
33
  * ## Sanity Manage Webhook Configuration
30
34
  *
@@ -44,38 +48,37 @@
44
48
  * @see .github/workflows/external-eval.yml — receiving workflow
45
49
  * @see docs/design-docs/report-store/visibility-workflows.md
46
50
  */
47
- /** Projected shape of an `ailf.evalRequest` document from a Sanity webhook. */
51
+ /**
52
+ * Projected shape of an `ailf.evalRequest` document from a Sanity webhook.
53
+ *
54
+ * Per the W0239 schema redesign, request-scope fields (mode, perspective,
55
+ * tasks, areas, debug, tag, etc.) ride inside the `pipelineRequest` JSON
56
+ * blob — the canonical `PipelineRequest` serialization. The handler parses
57
+ * it via `PipelineRequestSchema` from `@sanity/ailf-core` and forwards it
58
+ * to the dispatcher as-is.
59
+ */
48
60
  export interface EvalRequestPayload {
49
61
  /** The Sanity document _id */
50
62
  _id: string;
51
63
  /** The Sanity document _type (should be "ailf.evalRequest") */
52
64
  _type: string;
53
- /** Feature areas to scope the evaluation (task-scoped evals) */
54
- areas?: string[];
55
- /** Sanity dataset */
65
+ /** Sanity dataset hosting the eval-request document itself */
56
66
  dataset: string;
57
- /** Run in debug mode */
58
- debug?: boolean;
59
67
  /** Error message (only if status is "failed") */
60
68
  error?: string;
61
- /** Evaluation mode */
62
- mode: string;
63
- /** Content release perspective ID (release-scoped evals) */
64
- perspective?: string;
65
- /** Sanity project ID */
69
+ /**
70
+ * Canonical `PipelineRequest` JSON. Source of truth for the dispatch
71
+ * payload. Parses against `PipelineRequestSchema` from `@sanity/ailf-core`.
72
+ */
73
+ pipelineRequest: string;
74
+ /** Sanity project ID hosting the eval-request document itself */
66
75
  projectId: string;
67
76
  /** ISO datetime of when the request was created */
68
77
  requestedAt: string;
69
78
  /** User ID who requested */
70
79
  requestedBy?: string;
71
- /** Report ID that triggered this re-run (if any) */
72
- sourceReportId?: string;
73
80
  /** Request status */
74
81
  status: string;
75
- /** Publish tag */
76
- tag?: string;
77
- /** Specific task IDs to evaluate (task-scoped evals) */
78
- tasks?: string[];
79
82
  }
80
83
  /** Configuration for the eval request handler. */
81
84
  export interface EvalRequestHandlerConfig {
@@ -10,21 +10,25 @@
10
10
  * Designed to run in any HTTP environment: Cloudflare Workers, Vercel
11
11
  * functions, Express, Hono, etc.
12
12
  *
13
- * Supports two scoping modes:
14
- * - **Release-scoped** requires `perspective` field
15
- * - **Task-scoped** requires `tasks` array (optionally with `areas`)
16
- *
17
- * At least one of `perspective` or `tasks` must be present.
13
+ * The eval-request document carries a canonical `PipelineRequest` JSON
14
+ * blob in its `pipelineRequest` field (see W0239). The handler parses it
15
+ * via `PipelineRequestSchema` from `@sanity/ailf-core` and forwards it
16
+ * to the dispatcher as-is. Scoping (release-scoped via `perspective`,
17
+ * task-scoped via `tasks`) is asserted on the parsed `PipelineRequest`
18
+ * — at least one must be present.
18
19
  *
19
20
  * Flow:
20
21
  * 1. Receive eval request payload (from Sanity webhook projection)
21
- * 2. Validate: must be `ailf.evalRequest` type, `pending` status,
22
- * with either `perspective` or `tasks`
23
- * 3. Dispatch evaluation to GitHub Actions via `repository_dispatch`
24
- * with `external-eval` event type and scoped client payload
25
- * 4. On success: PATCH the eval request document → `status: "dispatched"`
26
- * 5. On failure: PATCH the eval request document → `status: "failed"` + error
27
- * 6. Return a structured result
22
+ * 2. Validate envelope: must be `ailf.evalRequest` type, `pending` status,
23
+ * `pipelineRequest` present
24
+ * 3. Parse + Zod-validate `pipelineRequest` against `PipelineRequestSchema`
25
+ * 4. Assert scoping: parsed request must have `perspective` or `tasks`
26
+ * 5. Dispatch evaluation to GitHub Actions via `repository_dispatch`
27
+ * with `external-eval` event type the parsed `PipelineRequest`
28
+ * rides as `client_payload.request` unchanged
29
+ * 6. On success: PATCH the eval request document → `status: "dispatched"`
30
+ * 7. On failure: PATCH the eval request document → `status: "failed"` + error
31
+ * 8. Return a structured result
28
32
  *
29
33
  * ## Sanity Manage Webhook Configuration
30
34
  *
@@ -45,6 +49,7 @@
45
49
  * @see docs/design-docs/report-store/visibility-workflows.md
46
50
  */
47
51
  import { createClient } from "@sanity/client";
52
+ import { PipelineRequestSchema } from "../_vendor/ailf-core/index.js";
48
53
  // ---------------------------------------------------------------------------
49
54
  // Constants
50
55
  // ---------------------------------------------------------------------------
@@ -116,18 +121,33 @@ export async function handleEvalRequest(payload, config) {
116
121
  requestId,
117
122
  };
118
123
  }
119
- const hasPerspective = !!payload.perspective;
120
- const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
124
+ if (!payload.pipelineRequest) {
125
+ return markFailed("Missing required field: pipelineRequest. The eval-request document " +
126
+ "must carry a canonical PipelineRequest JSON serialization.");
127
+ }
128
+ let parsedRequest;
129
+ try {
130
+ parsedRequest = JSON.parse(payload.pipelineRequest);
131
+ }
132
+ catch (err) {
133
+ return markFailed(`pipelineRequest is not valid JSON: ${err instanceof Error ? err.message : String(err)}`);
134
+ }
135
+ const parseResult = PipelineRequestSchema.safeParse(parsedRequest);
136
+ if (!parseResult.success) {
137
+ return markFailed(`pipelineRequest failed PipelineRequestSchema validation: ${parseResult.error.message}`);
138
+ }
139
+ const request = reconcileCallerIdentity(parseResult.data, payload.requestedBy);
140
+ const hasPerspective = !!request.perspective;
141
+ const hasTasks = Array.isArray(request.tasks) && request.tasks.length > 0;
121
142
  if (!hasPerspective && !hasTasks) {
122
- return markFailed("Missing required field: perspective or tasks. " +
123
- "Provide a content release perspective for release evals, " +
124
- "or a tasks array for task-scoped evals.");
143
+ return markFailed("pipelineRequest must scope the evaluation: provide either " +
144
+ "`perspective` (release-scoped) or `tasks` (task-scoped).");
125
145
  }
126
146
  // -------------------------------------------------------------------------
127
147
  // 3. Dispatch evaluation via GitHub Actions
128
148
  // -------------------------------------------------------------------------
129
149
  const repo = config.githubRepo ?? DEFAULT_REPO;
130
- const dispatchResult = await dispatchGitHubEval(repo, payload, config);
150
+ const dispatchResult = await dispatchGitHubEval(repo, request, config);
131
151
  // -------------------------------------------------------------------------
132
152
  // 4. Update eval request document status
133
153
  // -------------------------------------------------------------------------
@@ -152,46 +172,66 @@ export async function handleEvalRequest(payload, config) {
152
172
  // Dispatch failed — mark the document as failed
153
173
  return markFailed(dispatchResult.error ?? "Unknown dispatch error");
154
174
  }
175
+ // ---------------------------------------------------------------------------
176
+ // Internal helpers
177
+ // ---------------------------------------------------------------------------
178
+ /**
179
+ * Reconcile caller-claimed identity against the trustworthy Sanity write
180
+ * context.
181
+ *
182
+ * The `pipelineRequest` blob is authored by whoever wrote the Sanity
183
+ * document — a browser writer (App SDK dashboard) can set
184
+ * `executor.name` / `owner.individual` to any string, including
185
+ * someone else's. The webhook's only trustworthy identity signal is
186
+ * `payload.requestedBy` (the Sanity-session-authenticated writer).
187
+ *
188
+ * Per D0037, `owner.team` is caller-supplied (the caller knows their
189
+ * team); `executor.surface` / `executor.type` are caller-supplied.
190
+ * Identity fields (`executor.name`, `executor.githubActor`,
191
+ * `owner.individual`) are overwritten or stripped server-side here so
192
+ * downstream provenance reflects who actually wrote the document, not
193
+ * what they claimed.
194
+ *
195
+ * When `requestedBy` is missing (legacy documents), the executor/owner
196
+ * identity fields are stripped — the pipeline's server-side detection
197
+ * fills them as best it can.
198
+ */
199
+ function reconcileCallerIdentity(request, requestedBy) {
200
+ const out = { ...request };
201
+ if (request.executor) {
202
+ out.executor = {
203
+ ...request.executor,
204
+ ...(requestedBy ? { name: requestedBy } : { name: undefined }),
205
+ githubActor: undefined,
206
+ };
207
+ }
208
+ if (request.owner) {
209
+ out.owner = {
210
+ ...request.owner,
211
+ ...(requestedBy
212
+ ? { individual: requestedBy }
213
+ : { individual: undefined }),
214
+ };
215
+ }
216
+ return out;
217
+ }
155
218
  /**
156
219
  * Dispatch an evaluation via GitHub Actions repository_dispatch.
157
220
  *
158
- * Supports both release-scoped (perspective) and task-scoped (tasks/areas)
159
- * evaluations. Uses the `external-eval` event type with a client_payload
160
- * conforming to PipelineRequestSchema. The workflow passes it directly to
161
- * the CLI via `--config` without field translation.
221
+ * Forwards the already-validated `PipelineRequest` as-is under
222
+ * `client_payload.request` no field translation, no hardcoded
223
+ * overrides. The workflow passes the request to the CLI via `--config`.
224
+ *
225
+ * Workflow-level metadata (`caller_repo`) stays at the top level of
226
+ * `client_payload` for the workflow to read, separate from the
227
+ * pipeline-invocation contract.
162
228
  */
163
- async function dispatchGitHubEval(repo, payload, config) {
229
+ async function dispatchGitHubEval(repo, request, config) {
164
230
  const url = `${GITHUB_API}/repos/${repo}/dispatches`;
165
- const hasPerspective = !!payload.perspective;
166
- const hasTasks = Array.isArray(payload.tasks) && payload.tasks.length > 0;
167
- const hasAreas = Array.isArray(payload.areas) && payload.areas.length > 0;
168
- // Nest the PipelineRequest under `request` to stay within GitHub's
169
- // 10-property limit on client_payload. Workflow-level metadata
170
- // (caller_repo) stays at the top level for the workflow to read.
171
231
  const body = {
172
232
  client_payload: {
173
233
  caller_repo: "sanity-io/www-sanity-io",
174
- request: {
175
- dataset: payload.dataset,
176
- mode: payload.mode,
177
- projectId: payload.projectId,
178
- publish: true,
179
- source: "production",
180
- // Studio-initiated evals always use Content Lake as the task source.
181
- // Without this, the pipeline only loads filesystem .task.ts files and
182
- // Studio-owned tasks are invisible.
183
- taskMode: "content-lake",
184
- // Release-scoped fields
185
- ...(hasPerspective ? { perspective: payload.perspective } : {}),
186
- // Task-scoped fields
187
- ...(hasTasks ? { tasks: payload.tasks } : {}),
188
- ...(hasAreas ? { areas: payload.areas } : {}),
189
- ...(payload.debug ? { debug: true } : {}),
190
- ...(payload.tag ? { publishTag: payload.tag } : {}),
191
- ...(payload.sourceReportId
192
- ? { sourceReportId: payload.sourceReportId }
193
- : {}),
194
- },
234
+ request,
195
235
  },
196
236
  event_type: "external-eval",
197
237
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "6.1.2",
3
+ "version": "7.0.1",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"