@sanity/ailf 7.0.1 → 7.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/config/rubrics.ts +12 -13
  2. package/dist/_vendor/ailf-core/ports/context.d.ts +45 -3
  3. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +10 -0
  4. package/dist/_vendor/ailf-core/ports/index.d.ts +1 -1
  5. package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +9 -1
  6. package/dist/_vendor/ailf-core/schemas/branded-string.js +16 -6
  7. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -0
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +7 -0
  9. package/dist/_vendor/ailf-core/schemas/report.d.ts +12 -0
  10. package/dist/_vendor/ailf-core/schemas/report.js +2 -0
  11. package/dist/_vendor/ailf-core/schemas/team.d.ts +22 -0
  12. package/dist/_vendor/ailf-core/schemas/team.js +63 -0
  13. package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +51 -0
  14. package/dist/_vendor/ailf-core/types/index.d.ts +8 -1
  15. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +17 -0
  16. package/dist/_vendor/ailf-core/types/team.d.ts +65 -0
  17. package/dist/_vendor/ailf-core/types/team.js +1 -0
  18. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -1
  19. package/dist/_vendor/ailf-shared/document-ref.js +23 -1
  20. package/dist/_vendor/ailf-shared/eval-modes.d.ts +2 -0
  21. package/dist/_vendor/ailf-shared/eval-modes.js +5 -0
  22. package/dist/_vendor/ailf-shared/event-types.d.ts +15 -0
  23. package/dist/_vendor/ailf-shared/event-types.js +23 -0
  24. package/dist/_vendor/ailf-shared/generated/help-content.js +2 -2
  25. package/dist/_vendor/ailf-shared/index.d.ts +5 -3
  26. package/dist/_vendor/ailf-shared/index.js +5 -2
  27. package/dist/_vendor/ailf-shared/member-roles.d.ts +16 -0
  28. package/dist/_vendor/ailf-shared/member-roles.js +16 -0
  29. package/dist/_vendor/ailf-shared/owner-teams.d.ts +19 -0
  30. package/dist/_vendor/ailf-shared/owner-teams.js +26 -6
  31. package/dist/_vendor/ailf-shared/run-context.d.ts +8 -1
  32. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +15 -1
  33. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +65 -1
  34. package/dist/adapters/grader-outputs/promptfoo-grader-output.js +35 -0
  35. package/dist/adapters/task-sources/changed-docs-filter.d.ts +12 -0
  36. package/dist/adapters/task-sources/changed-docs-filter.js +30 -0
  37. package/dist/adapters/task-sources/content-lake-task-source.js +14 -8
  38. package/dist/adapters/task-sources/repo-task-source.js +2 -1
  39. package/dist/commands/pipeline-action.d.ts +4 -3
  40. package/dist/commands/pipeline-action.js +7 -5
  41. package/dist/commands/run.js +2 -2
  42. package/dist/config/rubrics.ts +12 -13
  43. package/dist/job-store.d.ts +18 -0
  44. package/dist/job-store.js +34 -0
  45. package/dist/orchestration/build-app-context.js +8 -1
  46. package/dist/orchestration/pipeline-orchestrator.js +46 -1
  47. package/dist/orchestration/steps/compare-step.d.ts +7 -0
  48. package/dist/orchestration/steps/compare-step.js +59 -23
  49. package/dist/orchestration/steps/fetch-docs-step.js +3 -0
  50. package/dist/orchestration/steps/finalize-run-step.js +2 -0
  51. package/dist/orchestration/steps/gap-analysis-step.js +9 -8
  52. package/dist/orchestration/steps/generate-configs-step.d.ts +32 -1
  53. package/dist/orchestration/steps/generate-configs-step.js +47 -13
  54. package/dist/orchestration/steps/grader-consistency-step.js +11 -0
  55. package/dist/orchestration/steps/publish-report-step.d.ts +12 -1
  56. package/dist/orchestration/steps/publish-report-step.js +36 -8
  57. package/dist/pipeline/cache-hit-restore.d.ts +14 -1
  58. package/dist/pipeline/cache-hit-restore.js +17 -0
  59. package/dist/pipeline/calculate-scores.d.ts +13 -1
  60. package/dist/pipeline/calculate-scores.js +123 -29
  61. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +7 -2
  62. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +13 -4
  63. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +1 -1
  64. package/dist/pipeline/compiler/provider-assembler.d.ts +15 -1
  65. package/dist/pipeline/compiler/provider-assembler.js +16 -3
  66. package/dist/pipeline/failure-modes.d.ts +20 -10
  67. package/dist/pipeline/failure-modes.js +84 -15
  68. package/dist/pipeline/map-request-to-config.js +2 -0
  69. package/dist/pipeline/normalize-mode.d.ts +1 -1
  70. package/dist/pipeline/normalize-mode.js +2 -0
  71. package/dist/pipeline/run-context.d.ts +16 -1
  72. package/dist/pipeline/run-context.js +12 -1
  73. package/dist/pipeline/validate.d.ts +8 -4
  74. package/dist/pipeline/validate.js +8 -18
  75. package/dist/report-store.d.ts +14 -1
  76. package/dist/report-store.js +32 -0
  77. package/dist/sanity/client.js +2 -2
  78. package/dist/sanity/queries.d.ts +1 -1
  79. package/dist/sanity/queries.js +1 -0
  80. package/dist/sources.js +40 -2
  81. package/package.json +1 -1
@@ -13,7 +13,7 @@
13
13
  * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
14
14
  */
15
15
  import { type Logger, type RunContext } from "../_vendor/ailf-core/index.d.ts";
16
- import { type RunClassification, type RunExecutor, type RunExecutorSurface, type RunHost, type RunLineage, type RunOwner, type RunTool } from "../_vendor/ailf-shared/index.d.ts";
16
+ import { type LiteracyVariant, type RunClassification, type RunExecutor, type RunExecutorSurface, type RunHost, type RunLineage, type RunOwner, type RunTool } from "../_vendor/ailf-shared/index.d.ts";
17
17
  import type { ResolvedSourceConfig } from "../sources.js";
18
18
  import type { EvalMode } from "./types.js";
19
19
  /**
@@ -74,6 +74,21 @@ export interface RunContextInput {
74
74
  source: ResolvedSourceConfig;
75
75
  /** Specific task IDs evaluated (if scoped) */
76
76
  taskIds?: string[];
77
+ /**
78
+ * Literacy mode variant (`baseline | agentic | observed | full`). Only
79
+ * meaningful when `mode === "literacy"`; ignored for other modes. Lands
80
+ * on `RunContext.variant` and `ReportProvenance.variant` so consumers
81
+ * can disambiguate which literacy variant the run executed.
82
+ */
83
+ variant?: LiteracyVariant;
84
+ /**
85
+ * Model IDs the caller requested via `PipelineRequest.models`. When
86
+ * present, `RunContext.models` is filtered to this subset so the report's
87
+ * `provenance.models` reflects what was actually evaluated. Unknown IDs
88
+ * are silently filtered out — the upstream rejection path (W0281
89
+ * `filterModelsByRequest`) has already failed the run or warned.
90
+ */
91
+ requestedModelIds?: string[];
77
92
  }
78
93
  /**
79
94
  * Derive `RunContext` from pipeline inputs. The only construction path.
@@ -68,8 +68,18 @@ export function buildRunContext(input) {
68
68
  // config/models.ts model matrix — listing those models would be
69
69
  // misleading. Only include them for literacy mode where they're the
70
70
  // actual eval targets.
71
+ //
72
+ // When `PipelineRequest.models` pinned a subset, filter here too so
73
+ // `provenance.models` matches what actually ran (W0281). Without this
74
+ // the report would advertise the full cohort even though only the
75
+ // requested subset reached the LLMs.
76
+ const requestedSet = input.requestedModelIds?.length
77
+ ? new Set(input.requestedModelIds)
78
+ : undefined;
71
79
  const evaluatedModels = input.mode === "literacy"
72
- ? models.models.map((m) => ({ id: m.id, label: m.label }))
80
+ ? models.models
81
+ .filter((m) => !requestedSet || requestedSet.has(m.id))
82
+ .map((m) => ({ id: m.id, label: m.label }))
73
83
  : [];
74
84
  return {
75
85
  areas: input.areas,
@@ -95,6 +105,7 @@ export function buildRunContext(input) {
95
105
  taskIds: input.taskIds,
96
106
  tool,
97
107
  trigger,
108
+ variant: input.mode === "literacy" ? input.variant : undefined,
98
109
  };
99
110
  }
100
111
  // ---------------------------------------------------------------------------
@@ -14,11 +14,15 @@ import type { ValidationIssue, ValidationResult } from "./types.js";
14
14
  */
15
15
  export declare function validateConfiguration(rootDir: string): ValidationResult;
16
16
  /**
17
- * Check that canonical context files exist. These are the per-task
18
- * gold-retrieval contexts actually referenced by task definitions.
17
+ * Check that the canonical-contexts directory exists.
19
18
  *
20
- * Contexts are generated by fetch-docs and may not exist yet —
21
- * returns warnings, not errors.
19
+ * Contexts are populated by fetch-docs, which scopes to the tasks
20
+ * actually being evaluated (not every task in the registry). Warning
21
+ * on individual missing files here would fire for every task the user
22
+ * didn't select — pure noise that previously crowded out real errors
23
+ * in the GHA safety-net's tail-of-log capture (W0282). The per-task
24
+ * precondition is enforced by `run-eval-step.ts:checkCanonicalContextsExist`
25
+ * against the filtered task set, where missing files are real errors.
22
26
  */
23
27
  export declare function validateContexts(rootDir: string): ValidationIssue[];
24
28
  /**
@@ -34,11 +34,15 @@ export function validateConfiguration(rootDir) {
34
34
  return { issues, valid };
35
35
  }
36
36
  /**
37
- * Check that canonical context files exist. These are the per-task
38
- * gold-retrieval contexts actually referenced by task definitions.
37
+ * Check that the canonical-contexts directory exists.
39
38
  *
40
- * Contexts are generated by fetch-docs and may not exist yet —
41
- * returns warnings, not errors.
39
+ * Contexts are populated by fetch-docs, which scopes to the tasks
40
+ * actually being evaluated (not every task in the registry). Warning
41
+ * on individual missing files here would fire for every task the user
42
+ * didn't select — pure noise that previously crowded out real errors
43
+ * in the GHA safety-net's tail-of-log capture (W0282). The per-task
44
+ * precondition is enforced by `run-eval-step.ts:checkCanonicalContextsExist`
45
+ * against the filtered task set, where missing files are real errors.
42
46
  */
43
47
  export function validateContexts(rootDir) {
44
48
  const source = "validateContexts";
@@ -46,20 +50,6 @@ export function validateContexts(rootDir) {
46
50
  const canonicalDir = path.join(rootDir, "contexts", "canonical");
47
51
  if (!fs.existsSync(canonicalDir)) {
48
52
  issues.push(warning(source, "contexts/canonical/ directory not found — run 'pnpm fetch-docs' to generate", canonicalDir));
49
- return issues;
50
- }
51
- const mappings = resolveMappings(rootDir);
52
- for (const [, areaConfig] of Object.entries(mappings.feature_areas)) {
53
- if (!areaConfig?.tasks)
54
- continue;
55
- for (const task of areaConfig.tasks) {
56
- if (!task.id)
57
- continue;
58
- const contextFile = path.join(canonicalDir, `${task.id}.md`);
59
- if (!fs.existsSync(contextFile)) {
60
- issues.push(warning(source, `Missing canonical context for task '${task.id}' — run 'pnpm fetch-docs' to generate`, contextFile));
61
- }
62
- }
63
53
  }
64
54
  return issues;
65
55
  }
@@ -15,7 +15,7 @@
15
15
  * @see docs/design-docs/report-store/domain-model.md
16
16
  */
17
17
  import type { SanityClient } from "@sanity/client";
18
- import type { ArtifactRef, ArtifactType, SynthesisCostTelemetry } from "./_vendor/ailf-core/index.d.ts";
18
+ import type { ArtifactRef, ArtifactType, LoadBaselineResult, SynthesisCostTelemetry } from "./_vendor/ailf-core/index.d.ts";
19
19
  import type { ComparisonReport, ISOTimestamp, LineageQuery, Report, ReportId, ReportProvenance, ScoreSummary } from "./pipeline/types.js";
20
20
  /**
21
21
  * Result of an auto-comparison, bundling the ComparisonReport with the
@@ -113,6 +113,19 @@ export declare class ReportStore {
113
113
  * W0191 runtime schema gate. Sanity API failures still return null.
114
114
  */
115
115
  read(id: ReportId): Promise<null | Report>;
116
+ /**
117
+ * Load a previously-published report's score summary as a baseline
118
+ * for comparison. Returns a discriminated result so the caller can
119
+ * distinguish a genuine 404 (skip compare with a clear reason) from
120
+ * a transport failure (fail the step — the user pinned a baseline
121
+ * and deserves to know it didn't actually compare).
122
+ *
123
+ * The report's `summary` field is a `ReportSummary` — a superset of
124
+ * `ComparableSummary` — so the projection below carries everything
125
+ * the `compare()` primitive needs (`overall`, `perModel`, `scores`)
126
+ * without re-hydrating the slim prose/array fields.
127
+ */
128
+ loadBaselineFromReport(reportId: string): Promise<LoadBaselineResult>;
116
129
  /**
117
130
  * Write a report to the Sanity Content Lake.
118
131
  *
@@ -270,6 +270,38 @@ export class ReportStore {
270
270
  return null;
271
271
  }
272
272
  }
273
+ /**
274
+ * Load a previously-published report's score summary as a baseline
275
+ * for comparison. Returns a discriminated result so the caller can
276
+ * distinguish a genuine 404 (skip compare with a clear reason) from
277
+ * a transport failure (fail the step — the user pinned a baseline
278
+ * and deserves to know it didn't actually compare).
279
+ *
280
+ * The report's `summary` field is a `ReportSummary` — a superset of
281
+ * `ComparableSummary` — so the projection below carries everything
282
+ * the `compare()` primitive needs (`overall`, `perModel`, `scores`)
283
+ * without re-hydrating the slim prose/array fields.
284
+ */
285
+ async loadBaselineFromReport(reportId) {
286
+ try {
287
+ const doc = await this.client.fetch(`*[_type == $type && reportId == $id][0]{ summary }`, { id: reportId, type: REPORT_TYPE });
288
+ const summary = doc?.summary;
289
+ if (!summary)
290
+ return { kind: "not_found" };
291
+ return {
292
+ kind: "ok",
293
+ baseline: {
294
+ overall: summary.overall,
295
+ perModel: summary.perModel,
296
+ scores: summary.scores,
297
+ },
298
+ };
299
+ }
300
+ catch (error) {
301
+ const message = error instanceof Error ? error.message : String(error);
302
+ return { kind: "error", message };
303
+ }
304
+ }
273
305
  /**
274
306
  * Write a report to the Sanity Content Lake.
275
307
  *
@@ -108,8 +108,8 @@ export function getSanityClient(overrides, source) {
108
108
  * fall back to `SANITY_DATASET` so existing CI workflows that pin a
109
109
  * test/staging dataset (e.g. Tier 2 with `SANITY_DATASET=ailf-test`)
110
110
  * continue to work without a new env var. The hard-coded fallback is
111
- * the editorial dataset name during the D0043 cutover window the flip
112
- * to `ailf-prod-private` happens after the migration script runs.
111
+ * `AILF_DATASET_DEFAULT` (`ailf-prod-private`, D0043) only reached for
112
+ * ad-hoc runs with no env at all.
113
113
  *
114
114
  * Token resolution prefers the AILF-scoped token, falling back to
115
115
  * the shared `SANITY_API_TOKEN`.
@@ -69,7 +69,7 @@ export declare const ALL_ARTICLES_QUERY = "\n *[_type == \"article\"\n && !(
69
69
  *
70
70
  * Usage: client.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: ["slug-a", "slug-b"] })
71
71
  */
72
- export declare const ARTICLES_METADATA_BY_SLUGS_QUERY = "\n *[_type == \"article\"\n && slug.current in $slugs\n && !(_id in path(\"drafts.**\"))\n ] {\n \"slug\": slug.current,\n _id,\n _rev,\n title\n }\n";
72
+ export declare const ARTICLES_METADATA_BY_SLUGS_QUERY = "\n *[_type == \"article\"\n && slug.current in $slugs\n && !(_id in path(\"drafts.**\"))\n ] {\n \"slug\": slug.current,\n \"sectionSlug\": primarySection->slug.current,\n _id,\n _rev,\n title\n }\n";
73
73
  /**
74
74
  * Fetch a single article by its slug — identical to ARTICLE_BY_SLUG_QUERY
75
75
  * but designed to be called with a perspective-enabled client.
@@ -203,6 +203,7 @@ export const ARTICLES_METADATA_BY_SLUGS_QUERY = `
203
203
  && !(_id in path("drafts.**"))
204
204
  ] {
205
205
  "slug": slug.current,
206
+ "sectionSlug": primarySection->slug.current,
206
207
  _id,
207
208
  _rev,
208
209
  title
package/dist/sources.js CHANGED
@@ -37,6 +37,44 @@ const DEFAULT_SOURCE = {
37
37
  studioOrigin: "https://admin.sanity.io",
38
38
  urls: [],
39
39
  };
40
+ /**
41
+ * Apply `SourceOverrides` + env-var fallbacks to `DEFAULT_SOURCE`.
42
+ *
43
+ * The DEFAULT_SOURCE early-return branches are taken when `config/sources`
44
+ * is missing or empty — the production state, since the named source
45
+ * definitions actually live in the `sanity-literacy` preset's `sourceDefs`
46
+ * (which `loadSource` doesn't consult). Returning `DEFAULT_SOURCE`
47
+ * verbatim drops every override the caller passed in, including
48
+ * `perspective` — observed live as production-source release evals
49
+ * fetching the published doc revision (W0295).
50
+ *
51
+ * The merge order mirrors the priority-1 (env-baseUrl) branch. The two
52
+ * paths diverge in three ways, all intentional: this branch (a) pins
53
+ * `baseUrl` / `llmsTxt` / `name` / `priorityDomain` to `DEFAULT_SOURCE`,
54
+ * (b) returns `documentIds: []` (the prior `DEFAULT_SOURCE` shape) where
55
+ * priority-1 would return `undefined` — both fall through the same
56
+ * `length > 0` consumer check, so behaviorally equivalent.
57
+ */
58
+ function applyOverridesToDefault(overrides) {
59
+ const allowedOrigins = overrides?.allowedOrigins ?? parseAllowedOriginsEnv();
60
+ const headers = overrides?.headers ?? parseHeadersEnv();
61
+ return {
62
+ ...DEFAULT_SOURCE,
63
+ ...(allowedOrigins ? { allowedOrigins } : {}),
64
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
65
+ dataset: overrides?.dataset ?? (process.env.SANITY_DATASET || "next"),
66
+ documentIds: overrides?.documentIds ?? parseDocumentIdsEnv() ?? [],
67
+ ...(headers ? { headers } : {}),
68
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
69
+ perspective: overrides?.perspective ?? (process.env.SANITY_PERSPECTIVE || undefined),
70
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
71
+ projectId: overrides?.projectId ?? (process.env.SANITY_PROJECT_ID || "3do82whm"),
72
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
73
+ studioOrigin: overrides?.studioOrigin ??
74
+ (process.env.SANITY_STUDIO_ORIGIN || "https://admin.sanity.io"),
75
+ urls: overrides?.directUrls ?? parseDirectUrlsEnv(),
76
+ };
77
+ }
40
78
  // ---------------------------------------------------------------------------
41
79
  // Validation
42
80
  // ---------------------------------------------------------------------------
@@ -117,12 +155,12 @@ export function loadSource(name, overrides, logger) {
117
155
  defaultBaseUrl: DEFAULT_SOURCE.baseUrl,
118
156
  });
119
157
  console.log(" No config/sources found, using built-in default (sanity.io production)");
120
- return DEFAULT_SOURCE;
158
+ return applyOverridesToDefault(overrides);
121
159
  }
122
160
  if (!rawFile?.sources || Object.keys(rawFile.sources).length === 0) {
123
161
  log.debug("config/sources is empty, falling back to built-in default");
124
162
  console.log(" config/sources is empty, using built-in default");
125
- return DEFAULT_SOURCE;
163
+ return applyOverridesToDefault(overrides);
126
164
  }
127
165
  // Resolve which source to use
128
166
  const sourceName =
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "7.0.1",
3
+ "version": "7.1.2",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"