@sanity/ailf 7.0.1 → 7.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/config/rubrics.ts +12 -13
  2. package/dist/_vendor/ailf-core/ports/context.d.ts +45 -3
  3. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +10 -0
  4. package/dist/_vendor/ailf-core/ports/index.d.ts +1 -1
  5. package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +9 -1
  6. package/dist/_vendor/ailf-core/schemas/branded-string.js +16 -6
  7. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -0
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +7 -0
  9. package/dist/_vendor/ailf-core/schemas/report.d.ts +12 -0
  10. package/dist/_vendor/ailf-core/schemas/report.js +2 -0
  11. package/dist/_vendor/ailf-core/schemas/team.d.ts +22 -0
  12. package/dist/_vendor/ailf-core/schemas/team.js +63 -0
  13. package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +51 -0
  14. package/dist/_vendor/ailf-core/types/index.d.ts +8 -1
  15. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +17 -0
  16. package/dist/_vendor/ailf-core/types/team.d.ts +65 -0
  17. package/dist/_vendor/ailf-core/types/team.js +1 -0
  18. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -1
  19. package/dist/_vendor/ailf-shared/document-ref.js +23 -1
  20. package/dist/_vendor/ailf-shared/eval-modes.d.ts +2 -0
  21. package/dist/_vendor/ailf-shared/eval-modes.js +5 -0
  22. package/dist/_vendor/ailf-shared/event-types.d.ts +15 -0
  23. package/dist/_vendor/ailf-shared/event-types.js +23 -0
  24. package/dist/_vendor/ailf-shared/generated/help-content.js +2 -2
  25. package/dist/_vendor/ailf-shared/index.d.ts +5 -3
  26. package/dist/_vendor/ailf-shared/index.js +5 -2
  27. package/dist/_vendor/ailf-shared/member-roles.d.ts +16 -0
  28. package/dist/_vendor/ailf-shared/member-roles.js +16 -0
  29. package/dist/_vendor/ailf-shared/owner-teams.d.ts +19 -0
  30. package/dist/_vendor/ailf-shared/owner-teams.js +26 -6
  31. package/dist/_vendor/ailf-shared/run-context.d.ts +8 -1
  32. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +15 -1
  33. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +65 -1
  34. package/dist/adapters/grader-outputs/promptfoo-grader-output.js +35 -0
  35. package/dist/adapters/task-sources/changed-docs-filter.d.ts +12 -0
  36. package/dist/adapters/task-sources/changed-docs-filter.js +30 -0
  37. package/dist/adapters/task-sources/content-lake-task-source.js +14 -8
  38. package/dist/adapters/task-sources/repo-task-source.js +2 -1
  39. package/dist/commands/pipeline-action.d.ts +4 -3
  40. package/dist/commands/pipeline-action.js +7 -5
  41. package/dist/commands/run.js +2 -2
  42. package/dist/config/rubrics.ts +12 -13
  43. package/dist/job-store.d.ts +18 -0
  44. package/dist/job-store.js +34 -0
  45. package/dist/orchestration/build-app-context.js +8 -1
  46. package/dist/orchestration/pipeline-orchestrator.js +46 -1
  47. package/dist/orchestration/steps/compare-step.d.ts +7 -0
  48. package/dist/orchestration/steps/compare-step.js +59 -23
  49. package/dist/orchestration/steps/fetch-docs-step.js +3 -0
  50. package/dist/orchestration/steps/finalize-run-step.js +2 -0
  51. package/dist/orchestration/steps/gap-analysis-step.js +9 -8
  52. package/dist/orchestration/steps/generate-configs-step.d.ts +32 -1
  53. package/dist/orchestration/steps/generate-configs-step.js +47 -13
  54. package/dist/orchestration/steps/grader-consistency-step.js +11 -0
  55. package/dist/orchestration/steps/publish-report-step.d.ts +12 -1
  56. package/dist/orchestration/steps/publish-report-step.js +36 -8
  57. package/dist/pipeline/cache-hit-restore.d.ts +14 -1
  58. package/dist/pipeline/cache-hit-restore.js +17 -0
  59. package/dist/pipeline/calculate-scores.d.ts +13 -1
  60. package/dist/pipeline/calculate-scores.js +123 -29
  61. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +7 -2
  62. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +13 -4
  63. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +1 -1
  64. package/dist/pipeline/compiler/provider-assembler.d.ts +15 -1
  65. package/dist/pipeline/compiler/provider-assembler.js +16 -3
  66. package/dist/pipeline/failure-modes.d.ts +20 -10
  67. package/dist/pipeline/failure-modes.js +84 -15
  68. package/dist/pipeline/map-request-to-config.js +2 -0
  69. package/dist/pipeline/normalize-mode.d.ts +1 -1
  70. package/dist/pipeline/normalize-mode.js +2 -0
  71. package/dist/pipeline/run-context.d.ts +16 -1
  72. package/dist/pipeline/run-context.js +12 -1
  73. package/dist/pipeline/validate.d.ts +8 -4
  74. package/dist/pipeline/validate.js +8 -18
  75. package/dist/report-store.d.ts +14 -1
  76. package/dist/report-store.js +32 -0
  77. package/dist/sanity/client.js +2 -2
  78. package/dist/sanity/queries.d.ts +1 -1
  79. package/dist/sanity/queries.js +1 -0
  80. package/dist/sources.js +40 -2
  81. package/package.json +1 -1
package/config/rubrics.ts CHANGED
@@ -15,10 +15,6 @@ import { defineRubrics } from "@sanity/ailf-core"
15
15
  // template entry below. Source of truth lives in packages/eval/src/grader/;
16
16
  // the helper picks the right list by dimension family.
17
17
  import { failureModesForDimension } from "../src/grader/index.js"
18
- // Single source of truth for the wire-format version stamped into the
19
- // grader-prompt footer (VER-01 D-02). Interpolated below so the
20
- // announced version cannot drift from the schema's expected value.
21
- import { graderJudgmentsVersion } from "../src/adapters/grader-outputs/index.js"
22
18
 
23
19
  export default defineRubrics({
24
20
  templates: {
@@ -242,20 +238,23 @@ export default defineRubrics({
242
238
  "agent-harness": { gold: "agent-harness" },
243
239
  },
244
240
 
245
- // Phase 3 GRAD-05 (Plan 03-01) structured GraderJudgment JSON sketch.
246
- // Documents the target wire format the grader emits. The strict schema's
247
- // GRAD-02 additive fields stay optional in this plan; Plan 03-04 flips
248
- // them to required and bumps graderJudgmentsVersion to 1.0.0.
241
+ // W0273 the footer documents the wire-format subset of GraderJudgment
242
+ // that the grader LLM actually controls. The pipeline parses this against
243
+ // GraderEmittedJudgmentSchema and then synthesizes the four pipeline-owned
244
+ // fields (judgmentId, metadata.{graderModel,graderJudgmentsVersion},
245
+ // hallucinationCheckedAgainst) to build the storage GraderJudgment.
246
+ //
247
+ // See docs/audits/2026-05-22-empty-gap-analysis-regression.md for the
248
+ // rationale (Phase 3 GRAD-05 made these fields required + .strict(),
249
+ // and asking the LLM for pipeline-owned values caused 100% parse
250
+ // failures starting 2026-05-11).
249
251
  footer: `Return ONLY a JSON object with this exact shape:
250
252
  {
251
- "judgmentId": "<string>",
252
253
  "score": <number 0-100>,
253
254
  "reason": "<explanation, ≤500 chars>",
255
+ "failureMode": "<one of the declared modes for this dimension or 'unclassified'>",
254
256
  "subJudgments": [{ "criterionId": "<id>", "met": <bool>, "evidence": "<≤280 chars>", "confidence": { "level": "high|medium|low", "signalsPresent": <int>, "derivation": "<string>" } }],
255
257
  "docCitations": [{ "documentId": "<id>", "slug": "<optional slug>", "role": "supports|contradicts|missing|irrelevant", "hallucinated": <bool> }],
256
- "failureMode": "<one of the declared modes for this dimension or 'unclassified'>",
257
- "confidence": { "level": "high|medium|low", "signalsPresent": <int>, "derivation": "<string>" },
258
- "hallucinationCheckedAgainst": ["<doc id>"],
259
- "metadata": { "graderModel": "<string>", "graderJudgmentsVersion": "${graderJudgmentsVersion}" }
258
+ "confidence": { "level": "high|medium|low", "signalsPresent": <int>, "derivation": "<string>" }
260
259
  }`,
261
260
  })
@@ -11,7 +11,7 @@
11
11
  * Fields marked optional are transitional — they will become required
12
12
  * as downstream consumers are converted to use them.
13
13
  */
14
- import type { RunClassification, RunExecutorSurface } from "../../ailf-shared/index.d.ts";
14
+ import type { LiteracyVariant, RunClassification, RunExecutorSurface } from "../../ailf-shared/index.d.ts";
15
15
  import type { RunId } from "../types/branded-ids.js";
16
16
  import type { DebugOptions, EvalMode, PluginRegistry } from "../types/index.js";
17
17
  import type { ArtifactWriter } from "./artifact-writer.js";
@@ -42,12 +42,20 @@ export interface ResolvedConfig {
42
42
  * `mode: "literacy", variant: "baseline"`. This keeps the pipeline
43
43
  * mode-agnostic while preserving literacy's multi-variant behavior.
44
44
  *
45
- * Values: "baseline" | "agentic" | "observed" | "full" | undefined
46
45
  * Undefined means "use the default variant for the mode" (baseline for literacy).
47
46
  */
48
- variant?: string;
47
+ variant?: LiteracyVariant;
49
48
  /** Debug options */
50
49
  debug?: DebugOptions;
50
+ /**
51
+ * Filter the evaluated cohort to a subset of the configured model IDs.
52
+ *
53
+ * Each entry must match the `id` of a model declared in
54
+ * `config/models.ts`. Unknown IDs are dropped at the runner with a
55
+ * structured warning AND surfaced on the job's `error` field so callers
56
+ * can detect typos — silent strips are not acceptable.
57
+ */
58
+ models?: string[];
51
59
  /** Feature area filter */
52
60
  areas?: string[];
53
61
  /** Task ID filter */
@@ -68,6 +76,12 @@ export interface ResolvedConfig {
68
76
  compareThreshold?: number;
69
77
  /** Comparison baseline path */
70
78
  compareBaseline?: string;
79
+ /**
80
+ * Comparison baseline expressed as a previously-published
81
+ * `ailf.report` document id. Takes precedence over `compareBaseline`
82
+ * when both are set.
83
+ */
84
+ compareBaselineReportId?: string;
71
85
  /** Whether gap analysis is enabled */
72
86
  gapAnalysisEnabled: boolean;
73
87
  /** Whether publishing is enabled */
@@ -323,6 +337,26 @@ export interface AppContext {
323
337
  /** Task definition source (YAML, Content Lake, repo) */
324
338
  readonly taskSource: TaskSource;
325
339
  }
340
+ /**
341
+ * Discriminated result for `ReportStorePort.loadBaselineFromReport`.
342
+ *
343
+ * Lets the compare step distinguish a genuine 404 (the pinned report
344
+ * doesn't exist — skip with a clear reason) from a transport failure
345
+ * (Sanity 5xx, network blew up — fail the step so the user knows the
346
+ * pinned baseline didn't actually compare). The `baseline` payload is
347
+ * typed as `unknown` to keep the port surface decoupled from the eval
348
+ * package's `ComparableSummary` type — concrete implementations return
349
+ * a more specific shape, which is sound.
350
+ */
351
+ export type LoadBaselineResult = {
352
+ kind: "ok";
353
+ baseline: unknown;
354
+ } | {
355
+ kind: "not_found";
356
+ } | {
357
+ kind: "error";
358
+ message: string;
359
+ };
326
360
  /**
327
361
  * Minimal report store interface used by AppContext.
328
362
  *
@@ -341,6 +375,14 @@ export interface ReportStorePort {
341
375
  write(report: unknown): Promise<unknown>;
342
376
  /** Read a report by its ID (used by the post-run diagnosis hook). */
343
377
  read(id: string): Promise<null | unknown>;
378
+ /**
379
+ * Load a previously-published report's score summary as a baseline
380
+ * for the `compare` step. Returns a discriminated result so callers
381
+ * can distinguish a genuine 404 (skip with a clear reason) from a
382
+ * transport failure (fail the step — the user pinned a baseline and
383
+ * deserves to know it didn't actually compare).
384
+ */
385
+ loadBaselineFromReport(reportId: string): Promise<LoadBaselineResult>;
344
386
  /** Patch synthesis telemetry onto a published report (Phase 6 / DIAG-06). */
345
387
  patchSynthesis(id: string, telemetry: unknown): Promise<void>;
346
388
  /**
@@ -53,6 +53,16 @@ export interface DocumentManifestEntry {
53
53
  _id: string;
54
54
  _rev: string;
55
55
  slug: string;
56
+ /** Parent section slug (`primarySection->slug.current`), when resolvable. */
57
+ sectionSlug?: string;
58
+ /**
59
+ * Full URL path under `/docs/` (e.g. `content-lake/groq-introduction`)
60
+ * composed via `buildContextDocPath` from `sectionSlug + "/" + slug`.
61
+ * Optional — historical manifests written before W0287 only carry
62
+ * `slug`; downstream `DocumentRef` builders fall back to slug-only
63
+ * display when this is absent.
64
+ */
65
+ path?: string;
56
66
  title: string;
57
67
  }
58
68
  /** Impact of a content release on canonical documents */
@@ -8,7 +8,7 @@ export type { ArtifactEntry, ArtifactWriter } from "./artifact-writer.js";
8
8
  export { NoOpArtifactWriter } from "./artifact-writer.js";
9
9
  export type { CacheEntryMetadata, CacheKey, CacheLookupResult, CacheRecordInput, CacheStore, } from "./cache-store.js";
10
10
  export type { ConfigSource } from "./config-source.js";
11
- export type { AppContext, ReportSinkPort, ReportStorePort, ResolvedConfig, } from "./context.js";
11
+ export type { AppContext, LoadBaselineResult, ReportSinkPort, ReportStorePort, ResolvedConfig, } from "./context.js";
12
12
  export type { DocContext, DocFetcher, DocSourceConfig, DocumentManifestEntry, DocumentOverlaySummary, FetchMetadata, FetchResult, ReleaseImpact, SymbolIndexManifestEntry, UrlFetchEntry, UrlFetchSummary, } from "./doc-fetcher.js";
13
13
  export type { EvalRunConfig, EvalRunner } from "./eval-runner.js";
14
14
  export type { LLMCallContext, LLMClient, LLMCompleteArgs, LLMCompleteStructuredArgs, LLMCompletion, LLMStructuredCompletion, LLMUsage, ModelId, ModelProvider, ParsedModelId, } from "./llm-client.js";
@@ -36,5 +36,13 @@ import type { Brand } from "../types/branded-ids.js";
36
36
  * exit from the project's no-`as`-on-`unknown` rule. Adapters MUST
37
37
  * NOT replicate the cast at their own call sites — call this helper
38
38
  * instead so the rule violation stays centralized.
39
+ *
40
+ * Pass `regex` to enforce a stricter shape than non-empty. The
41
+ * runtime validator becomes `z.string().regex(regex)` instead of
42
+ * `z.string().min(1)`; the brand-cast at the call boundary is
43
+ * unchanged. Callers passing `regex` are responsible for ensuring
44
+ * it rejects the empty string (typically anchor with `^` and
45
+ * require at least one character via `+` or a non-`*` quantifier);
46
+ * the `.min(1)` floor is dropped when `regex` is supplied.
39
47
  */
40
- export declare function brandedString<TBrand extends string>(): z.ZodType<Brand<string, TBrand>>;
48
+ export declare function brandedString<TBrand extends string>(regex?: RegExp): z.ZodType<Brand<string, TBrand>>;
@@ -35,11 +35,21 @@ import { z } from "zod";
35
35
  * exit from the project's no-`as`-on-`unknown` rule. Adapters MUST
36
36
  * NOT replicate the cast at their own call sites — call this helper
37
37
  * instead so the rule violation stays centralized.
38
+ *
39
+ * Pass `regex` to enforce a stricter shape than non-empty. The
40
+ * runtime validator becomes `z.string().regex(regex)` instead of
41
+ * `z.string().min(1)`; the brand-cast at the call boundary is
42
+ * unchanged. Callers passing `regex` are responsible for ensuring
43
+ * it rejects the empty string (typically anchor with `^` and
44
+ * require at least one character via `+` or a non-`*` quantifier);
45
+ * the `.min(1)` floor is dropped when `regex` is supplied.
38
46
  */
39
- export function brandedString() {
40
- // The runtime is a plain non-empty string; the brand is a
41
- // compile-time-only nominal tag (see `Brand<>` in branded-ids.ts).
42
- // Zod 4's `.brand()` uses a different symbol shape, so a direct
43
- // composition does not yield the project's `Brand<…>` type.
44
- return z.string().min(1);
47
+ export function brandedString(regex) {
48
+ // The runtime is a plain string (non-empty or regex-validated);
49
+ // the brand is a compile-time-only nominal tag (see `Brand<>` in
50
+ // branded-ids.ts). Zod 4's `.brand()` uses a different symbol
51
+ // shape, so a direct composition does not yield the project's
52
+ // `Brand<…>` type.
53
+ const base = regex === undefined ? z.string().min(1) : z.string().regex(regex);
54
+ return base;
45
55
  }
@@ -33,6 +33,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
33
33
  changedDocs: z.ZodOptional<z.ZodArray<z.ZodString>>;
34
34
  compare: z.ZodOptional<z.ZodBoolean>;
35
35
  compareBaseline: z.ZodOptional<z.ZodString>;
36
+ compareBaselineReportId: z.ZodOptional<z.ZodString>;
36
37
  compareThreshold: z.ZodOptional<z.ZodNumber>;
37
38
  concurrency: z.ZodOptional<z.ZodNumber>;
38
39
  dataset: z.ZodOptional<z.ZodString>;
@@ -63,6 +64,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
63
64
  observed: "observed";
64
65
  full: "full";
65
66
  }>>;
67
+ models: z.ZodOptional<z.ZodArray<z.ZodString>>;
66
68
  noAutoScope: z.ZodOptional<z.ZodBoolean>;
67
69
  noCache: z.ZodOptional<z.ZodBoolean>;
68
70
  noRemoteCache: z.ZodOptional<z.ZodBoolean>;
@@ -101,6 +101,7 @@ export const PipelineRequestSchema = z.object({
101
101
  changedDocs: z.array(z.string()).optional(),
102
102
  compare: z.boolean().optional(),
103
103
  compareBaseline: z.string().optional(),
104
+ compareBaselineReportId: z.string().min(1).optional(),
104
105
  compareThreshold: z.number().min(0).optional(),
105
106
  concurrency: z.number().int().positive().optional(),
106
107
  dataset: z.string().optional(),
@@ -123,6 +124,12 @@ export const PipelineRequestSchema = z.object({
123
124
  * Legacy names must pass through normalizeMode() before entering typed pipeline code.
124
125
  */
125
126
  mode: z.enum(RAW_EVAL_MODES).optional(),
127
+ /**
128
+ * Filter the evaluation cohort to a subset of the configured model IDs
129
+ * (W0281). Unknown IDs are dropped at the runner with a structured
130
+ * warning + job-error patch.
131
+ */
132
+ models: z.array(z.string().min(1)).optional(),
126
133
  noAutoScope: z.boolean().optional(),
127
134
  noCache: z.boolean().optional(),
128
135
  noRemoteCache: z.boolean().optional(),
@@ -113,6 +113,12 @@ export declare const ReportProvenanceSchema: z.ZodObject<{
113
113
  documentId: z.ZodOptional<z.ZodString>;
114
114
  source: z.ZodString;
115
115
  }, z.core.$strict>], "type">;
116
+ variant: z.ZodOptional<z.ZodEnum<{
117
+ agentic: "agentic";
118
+ baseline: "baseline";
119
+ observed: "observed";
120
+ full: "full";
121
+ }>>;
116
122
  autoScope: z.ZodOptional<z.ZodObject<{
117
123
  enabled: z.ZodBoolean;
118
124
  affectedTaskIds: z.ZodArray<z.ZodString>;
@@ -222,6 +228,12 @@ export declare const ReportSchema: z.ZodObject<{
222
228
  documentId: z.ZodOptional<z.ZodString>;
223
229
  source: z.ZodString;
224
230
  }, z.core.$strict>], "type">;
231
+ variant: z.ZodOptional<z.ZodEnum<{
232
+ agentic: "agentic";
233
+ baseline: "baseline";
234
+ observed: "observed";
235
+ full: "full";
236
+ }>>;
225
237
  autoScope: z.ZodOptional<z.ZodObject<{
226
238
  enabled: z.ZodBoolean;
227
239
  affectedTaskIds: z.ZodArray<z.ZodString>;
@@ -24,6 +24,7 @@
24
24
  * @see docs/work-items/W0191-report-store-schema-gate.json
25
25
  */
26
26
  import { z } from "zod";
27
+ import { LITERACY_VARIANTS } from "../../ailf-shared/index.js";
27
28
  // ---------------------------------------------------------------------------
28
29
  // RunContext building blocks (mirrors packages/shared/src/run-context.ts)
29
30
  // ---------------------------------------------------------------------------
@@ -195,6 +196,7 @@ export const ReportProvenanceSchema = z
195
196
  taskIds: z.array(z.string()).optional(),
196
197
  tool: RunToolSchema.optional(),
197
198
  trigger: RunTriggerSchema,
199
+ variant: z.enum(LITERACY_VARIANTS).optional(),
198
200
  // ReportProvenance additions
199
201
  autoScope: ReportAutoScopeSchema.optional(),
200
202
  contextHash: z.string().optional(),
@@ -0,0 +1,22 @@
1
+ import { z } from "zod";
2
+ import type { NotificationChannel } from "../types/team.js";
3
+ export declare const TeamSchema: z.ZodObject<{
4
+ id: z.ZodType<import("../index.js").Brand<string, "TeamId">, unknown, z.core.$ZodTypeInternals<import("../index.js").Brand<string, "TeamId">, unknown>>;
5
+ slug: z.ZodType<import("../index.js").Brand<string, "TeamSlug">, unknown, z.core.$ZodTypeInternals<import("../index.js").Brand<string, "TeamSlug">, unknown>>;
6
+ displayName: z.ZodString;
7
+ description: z.ZodOptional<z.ZodString>;
8
+ status: z.ZodEnum<{
9
+ active: "active";
10
+ archived: "archived";
11
+ }>;
12
+ members: z.ZodArray<z.ZodObject<{
13
+ email: z.ZodOptional<z.ZodString>;
14
+ sanityUserId: z.ZodOptional<z.ZodString>;
15
+ githubUsername: z.ZodOptional<z.ZodString>;
16
+ displayName: z.ZodOptional<z.ZodString>;
17
+ role: z.ZodOptional<z.ZodString>;
18
+ lastVerifiedAt: z.ZodOptional<z.ZodString>;
19
+ }, z.core.$strip>>;
20
+ repos: z.ZodOptional<z.ZodArray<z.ZodString>>;
21
+ notifications: z.ZodOptional<z.ZodArray<z.ZodType<NotificationChannel, unknown, z.core.$ZodTypeInternals<NotificationChannel, unknown>>>>;
22
+ }, z.core.$strip>;
@@ -0,0 +1,63 @@
1
+ import { z } from "zod";
2
+ import { brandedString } from "./branded-string.js";
3
+ const SLUG_REGEX = /^[a-z0-9][a-z0-9-]*$/;
4
+ const TEAM_ID_REGEX = /^ailf\.team\.[a-z0-9][a-z0-9-]*$/;
5
+ const TeamMemberSchema = z
6
+ .object({
7
+ email: z.string().email().optional(),
8
+ sanityUserId: z.string().optional(),
9
+ githubUsername: z.string().optional(),
10
+ displayName: z.string().optional(),
11
+ role: z.string().optional(),
12
+ lastVerifiedAt: z.string().datetime().optional(),
13
+ })
14
+ .refine((m) => Boolean(m.email || m.sanityUserId || m.githubUsername), {
15
+ message: "TeamMember requires at least one of email, sanityUserId, githubUsername",
16
+ });
17
+ const ChannelScopeSchema = z.discriminatedUnion("type", [
18
+ z.object({ type: z.literal("owned") }),
19
+ z.object({ type: z.literal("all") }),
20
+ z.object({ type: z.literal("areas"), areas: z.array(z.string()) }),
21
+ z.object({ type: z.literal("repos"), repos: z.array(z.string()) }),
22
+ z.object({ type: z.literal("tags"), tags: z.array(z.string()) }),
23
+ ]);
24
+ const SlackChannelSchema = z.object({
25
+ _key: z.string(),
26
+ type: z.literal("slack"),
27
+ channelId: z.string().min(1),
28
+ channelName: z.string().optional(),
29
+ purpose: z.string().optional(),
30
+ events: z.array(z.string()).optional(),
31
+ scope: ChannelScopeSchema.optional(),
32
+ });
33
+ const EmailChannelSchema = z.object({
34
+ _key: z.string(),
35
+ type: z.literal("email"),
36
+ addresses: z.array(z.string().email()).min(1),
37
+ purpose: z.string().optional(),
38
+ events: z.array(z.string()).optional(),
39
+ scope: ChannelScopeSchema.optional(),
40
+ });
41
+ const WebhookChannelSchema = z.object({
42
+ _key: z.string(),
43
+ type: z.literal("webhook"),
44
+ logicalName: z.string().min(1),
45
+ purpose: z.string().optional(),
46
+ events: z.array(z.string()).optional(),
47
+ scope: ChannelScopeSchema.optional(),
48
+ });
49
+ const NotificationChannelSchema = z.discriminatedUnion("type", [
50
+ SlackChannelSchema,
51
+ EmailChannelSchema,
52
+ WebhookChannelSchema,
53
+ ]);
54
+ export const TeamSchema = z.object({
55
+ id: brandedString(TEAM_ID_REGEX),
56
+ slug: brandedString(SLUG_REGEX),
57
+ displayName: z.string().min(1),
58
+ description: z.string().optional(),
59
+ status: z.enum(["active", "archived"]),
60
+ members: z.array(TeamMemberSchema).min(1),
61
+ repos: z.array(z.string()).optional(),
62
+ notifications: z.array(NotificationChannelSchema).optional(),
63
+ });
@@ -123,3 +123,54 @@ export interface GraderJudgment {
123
123
  graderJudgmentsVersion: string;
124
124
  };
125
125
  }
126
+ /**
127
+ * Wire-format subset of {@link GraderJudgment} — the fields a grader LLM
128
+ * is responsible for emitting in its JSON response. The pipeline parses
129
+ * untrusted grader output against this shape, then synthesizes the
130
+ * remaining storage fields (`taskId`, `modelId`, `dimension`, `judgmentId`,
131
+ * `metadata.{graderModel, graderJudgmentsVersion}`, and
132
+ * `hallucinationCheckedAgainst`) from server-side context.
133
+ *
134
+ * The split exists because four of `GraderJudgment`'s required fields are
135
+ * pipeline-owned semantics the LLM cannot produce correctly:
136
+ *
137
+ * - `judgmentId` — D0052 branded id with `(taskId, modelId, dimension,
138
+ * runId)` uniqueness invariant. Minted by `generateJudgmentId`.
139
+ * - `metadata.graderJudgmentsVersion` — static constant co-located with
140
+ * the schema (`promptfoo-grader-output.ts:48`).
141
+ * - `metadata.graderModel` — the grader's deployment alias (pipeline
142
+ * knows from provider config; the LLM doesn't reliably know its own).
143
+ * - `hallucinationCheckedAgainst` — the resolvable-set union of
144
+ * `task.context.docs` and `run.documentManifest`, composed by
145
+ * `populateHallucinationFields` (gap-analysis-step.ts).
146
+ *
147
+ * Asking the LLM for any of these produces drift; `.strict()` on
148
+ * `GraderJudgmentSchema` amplifies that drift into 100% parse failures
149
+ * (the 2026-05-11 empty-gapReport regression — see W0273 and
150
+ * `docs/audits/2026-05-22-empty-gap-analysis-regression.md`).
151
+ *
152
+ * `taskId`, `modelId`, and `dimension` are also pipeline-supplied (from
153
+ * `result.description`, `result.providerId`, and the rubric-classifier
154
+ * output in `calculate-scores.ts:475-479`) — kept out of the wire shape
155
+ * for the same reason.
156
+ */
157
+ export interface GraderEmittedJudgment {
158
+ /** Numeric score in [0, 100] (normalized). */
159
+ score: number;
160
+ /** The grader's natural-language reasoning. */
161
+ reason: string;
162
+ /** Per-dimension failure mode (must match the legal-mode list in the rubric). */
163
+ failureMode: string;
164
+ /** Per-criterion sub-judgments. */
165
+ subJudgments: CriterionSubJudgment[];
166
+ /** Doc citations with role + hallucinated flag. */
167
+ docCitations: DocCitation[];
168
+ /** Grader self-confidence per D0049. */
169
+ confidence: Confidence;
170
+ /**
171
+ * True when the candidate response was empty/whitespace/refused. The
172
+ * pipeline also independently detects this from
173
+ * `result.response.output` — both signals are OR'd.
174
+ */
175
+ outputFailure?: boolean;
176
+ }
@@ -39,8 +39,9 @@ export type { AgentHarnessTaskDefinition, ContentLakeAuthorableMode, ContentLake
39
39
  export type { ActionSuggestion, AreaSummaryBody, CardMeta, CardType, Diagnosis, DiagnosisCard, DocAttributionSpotlightBody, FailureModeSummaryBody, JudgmentRef, LowConfidenceAttributionBody, NoIssuesBody, RegressionVsBaselineBody, TopRecommendationsBody, VersionedInputs, WeakestAreaBody, } from "./diagnosis.js";
40
40
  export type { SynthesisCostTelemetry, SynthesisPerCardTelemetry, } from "./synthesis-telemetry.js";
41
41
  export type { AttributionMeta, DocAttribution, JudgmentAttribution, } from "./attribution.js";
42
- export type { CriterionSubJudgment, DocCitation, DocCitationRole, GraderJudgment, } from "./grader-judgment.js";
42
+ export type { CriterionSubJudgment, DocCitation, DocCitationRole, GraderEmittedJudgment, GraderJudgment, } from "./grader-judgment.js";
43
43
  export type { LegacyGraderJudgment } from "./legacy-grader-judgment.js";
44
+ export type { BaseChannel, ChannelScope, EmailChannel, EventType, KnownEventType, KnownMemberRole, MemberRole, NotificationChannel, NotificationChannelType, SlackChannel, Team, TeamId, TeamMember, TeamRef, TeamSlug, TeamStatus, WebhookChannel, } from "./team.js";
44
45
  type DocumentRef = _DocumentRef;
45
46
  /** Aggregated retrieval metrics for a feature area */
46
47
  export interface AreaRetrievalMetrics {
@@ -259,6 +260,12 @@ export interface FilterOptions {
259
260
  tags?: string[];
260
261
  /** Specific task IDs to include (e.g., ["groq-blog-queries"]) */
261
262
  taskIds?: string[];
263
+ /**
264
+ * Doc slugs that changed in the calling context. When set, only tasks
265
+ * whose `context.docs[*].slug` intersects this list are returned.
266
+ * Empty array is a no-op (treated as undefined).
267
+ */
268
+ changedDocs?: readonly string[];
262
269
  }
263
270
  /** Full gap analysis report */
264
271
  export interface GapAnalysisReport {
@@ -79,6 +79,13 @@ export interface PipelineRequest {
79
79
  classification?: RunClassification;
80
80
  compare?: boolean;
81
81
  compareBaseline?: string;
82
+ /**
83
+ * Compare against a baseline extracted from a previously-published
84
+ * `ailf.report` document. Takes precedence over `compareBaseline`
85
+ * (local FS path). Dashboard-friendly: a report id is something the
86
+ * user can pick from a list.
87
+ */
88
+ compareBaselineReportId?: string;
82
89
  compareThreshold?: number;
83
90
  concurrency?: number;
84
91
  dataset?: string;
@@ -93,6 +100,16 @@ export interface PipelineRequest {
93
100
  jobId?: string;
94
101
  labels?: string[];
95
102
  mode?: RawEvalMode;
103
+ /**
104
+ * Filter the evaluation cohort to a subset of the configured model IDs.
105
+ *
106
+ * Each entry must match the `id` of a model declared in
107
+ * `packages/eval/config/models.ts`. IDs that don't match are dropped
108
+ * with a structured warning AND surfaced on the job's `error` field so
109
+ * callers can detect typos — silent strips are not acceptable
110
+ * (W0281 acceptance criterion 5).
111
+ */
112
+ models?: string[];
96
113
  noAutoScope?: boolean;
97
114
  noCache?: boolean;
98
115
  noRemoteCache?: boolean;
@@ -0,0 +1,65 @@
1
+ import type { Brand } from "./branded-ids.js";
2
+ export type TeamId = Brand<string, "TeamId">;
3
+ export type TeamSlug = Brand<string, "TeamSlug">;
4
+ export type TeamStatus = "active" | "archived";
5
+ export type KnownMemberRole = "lead" | "member" | "oncall";
6
+ export type MemberRole = KnownMemberRole | (string & {});
7
+ export type KnownEventType = "eval.failed" | "eval.completed" | "eval.threshold-breached" | "eval.score-regressed" | "task.created" | "task.archived" | "area.unowned-tasks";
8
+ export type EventType = KnownEventType | (string & {});
9
+ export type NotificationChannelType = "slack" | "email" | "webhook";
10
+ export interface TeamMember {
11
+ email?: string;
12
+ sanityUserId?: string;
13
+ githubUsername?: string;
14
+ displayName?: string;
15
+ role?: MemberRole;
16
+ lastVerifiedAt?: string;
17
+ }
18
+ export interface BaseChannel {
19
+ _key: string;
20
+ type: NotificationChannelType;
21
+ purpose?: string;
22
+ events?: EventType[];
23
+ scope?: ChannelScope;
24
+ }
25
+ export interface SlackChannel extends BaseChannel {
26
+ type: "slack";
27
+ channelId: string;
28
+ channelName?: string;
29
+ }
30
+ export interface EmailChannel extends BaseChannel {
31
+ type: "email";
32
+ addresses: string[];
33
+ }
34
+ export interface WebhookChannel extends BaseChannel {
35
+ type: "webhook";
36
+ logicalName: string;
37
+ }
38
+ export type NotificationChannel = SlackChannel | EmailChannel | WebhookChannel;
39
+ export type ChannelScope = {
40
+ type: "owned";
41
+ } | {
42
+ type: "all";
43
+ } | {
44
+ type: "areas";
45
+ areas: string[];
46
+ } | {
47
+ type: "repos";
48
+ repos: string[];
49
+ } | {
50
+ type: "tags";
51
+ tags: string[];
52
+ };
53
+ export interface Team {
54
+ id: TeamId;
55
+ slug: TeamSlug;
56
+ displayName: string;
57
+ description?: string;
58
+ status: TeamStatus;
59
+ members: TeamMember[];
60
+ repos?: string[];
61
+ notifications?: NotificationChannel[];
62
+ }
63
+ export type TeamRef = {
64
+ _ref: string;
65
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -22,8 +22,36 @@ export interface DocumentRef {
22
22
  * Named `revision` (not `_rev`) for the same Sanity reserved-name reason.
23
23
  */
24
24
  revision?: string;
25
- /** URL-path identifier (e.g., "groq-introduction") */
25
+ /** URL-path identifier (e.g., "groq-introduction") — leaf segment only. */
26
26
  slug: string;
27
+ /**
28
+ * Full URL path under `/docs/` (e.g., `content-lake/groq-introduction`).
29
+ * Composed from the article's `primarySection->slug.current` and
30
+ * `slug.current` via {@link buildContextDocPath}. Optional — historical
31
+ * reports written before W0287 carry only `slug`; consumers must fall
32
+ * back to `slug` for display when `path` is absent.
33
+ */
34
+ path?: string;
27
35
  /** Human-readable document title */
28
36
  title: string;
29
37
  }
38
+ /**
39
+ * Compose the canonical `/docs/`-relative path for a context-doc reference.
40
+ *
41
+ * Single source of truth across producers (eval doc fetcher, repo-task
42
+ * mirroring) and consumers (dashboard projections). Resolution order:
43
+ *
44
+ * 1. An explicit `path` (e.g. authored on a YAML/repo-mirrored task) wins.
45
+ * 2. Otherwise compose `sectionSlug + "/" + slug` when both are present.
46
+ * 3. Otherwise `null` — neither caller can build a working docs URL, so
47
+ * consumers should disable the link rather than emit a 404.
48
+ *
49
+ * The leaf `slug` alone is never returned as the path because
50
+ * `article.slug.current` is leaf-only on sanity.io/docs; the hierarchy
51
+ * lives on `primarySection->slug.current`.
52
+ */
53
+ export declare function buildContextDocPath(input: {
54
+ path?: string | null;
55
+ sectionSlug?: string | null;
56
+ slug?: string | null;
57
+ }): string | null;
@@ -1 +1,23 @@
1
- export {};
1
+ /**
2
+ * Compose the canonical `/docs/`-relative path for a context-doc reference.
3
+ *
4
+ * Single source of truth across producers (eval doc fetcher, repo-task
5
+ * mirroring) and consumers (dashboard projections). Resolution order:
6
+ *
7
+ * 1. An explicit `path` (e.g. authored on a YAML/repo-mirrored task) wins.
8
+ * 2. Otherwise compose `sectionSlug + "/" + slug` when both are present.
9
+ * 3. Otherwise `null` — neither caller can build a working docs URL, so
10
+ * consumers should disable the link rather than emit a 404.
11
+ *
12
+ * The leaf `slug` alone is never returned as the path because
13
+ * `article.slug.current` is leaf-only on sanity.io/docs; the hierarchy
14
+ * lives on `primarySection->slug.current`.
15
+ */
16
+ export function buildContextDocPath(input) {
17
+ if (input.path)
18
+ return input.path;
19
+ if (input.sectionSlug && input.slug) {
20
+ return `${input.sectionSlug}/${input.slug}`;
21
+ }
22
+ return null;
23
+ }
@@ -52,6 +52,8 @@ export declare const LEGACY_EVAL_MODE_ALIASES: readonly ["baseline", "agentic",
52
52
  export declare const LITERACY_VARIANTS: readonly ["baseline", "agentic", "observed", "full"];
53
53
  /** Union of all literacy variant string values. */
54
54
  export type LiteracyVariant = (typeof LITERACY_VARIANTS)[number];
55
+ /** Type guard for `LiteracyVariant` — true when `value` is one of the closed set. */
56
+ export declare function isLiteracyVariant(value: unknown): value is LiteracyVariant;
55
57
  /**
56
58
  * All accepted mode names for Zod enum construction.
57
59
  * Canonical modes first, then legacy aliases.
@@ -40,6 +40,11 @@ export const LITERACY_VARIANTS = [
40
40
  "observed",
41
41
  "full",
42
42
  ];
43
+ /** Type guard for `LiteracyVariant` — true when `value` is one of the closed set. */
44
+ export function isLiteracyVariant(value) {
45
+ return (typeof value === "string" &&
46
+ LITERACY_VARIANTS.includes(value));
47
+ }
43
48
  /**
44
49
  * All accepted mode names for Zod enum construction.
45
50
  * Canonical modes first, then legacy aliases.