@sanity/ailf 7.1.0 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +10 -0
  2. package/dist/_vendor/ailf-core/schemas/index.d.ts +1 -0
  3. package/dist/_vendor/ailf-core/schemas/index.js +4 -0
  4. package/dist/_vendor/ailf-core/schemas/report.d.ts +11 -0
  5. package/dist/_vendor/ailf-core/schemas/report.js +14 -0
  6. package/dist/_vendor/ailf-core/schemas/user.d.ts +22 -0
  7. package/dist/_vendor/ailf-core/schemas/user.js +23 -0
  8. package/dist/_vendor/ailf-core/types/index.d.ts +29 -0
  9. package/dist/_vendor/ailf-core/types/index.js +13 -0
  10. package/dist/_vendor/ailf-core/types/user.d.ts +49 -0
  11. package/dist/_vendor/ailf-core/types/user.js +1 -0
  12. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -1
  13. package/dist/_vendor/ailf-shared/document-ref.js +23 -1
  14. package/dist/_vendor/ailf-shared/generated/help-content.js +26 -14
  15. package/dist/_vendor/ailf-shared/index.d.ts +1 -1
  16. package/dist/_vendor/ailf-shared/index.js +1 -0
  17. package/dist/_vendor/ailf-shared/owner-teams.js +19 -6
  18. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +15 -1
  19. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +2 -2
  20. package/dist/adapters/task-sources/content-lake-task-source.js +12 -7
  21. package/dist/orchestration/steps/compute-attribution-step.d.ts +2 -2
  22. package/dist/orchestration/steps/compute-attribution-step.js +17 -2
  23. package/dist/orchestration/steps/gap-analysis-step.d.ts +2 -2
  24. package/dist/orchestration/steps/gap-analysis-step.js +29 -10
  25. package/dist/orchestration/steps/publish-report-step.d.ts +15 -1
  26. package/dist/orchestration/steps/publish-report-step.js +63 -6
  27. package/dist/pipeline/calculate-scores.d.ts +13 -1
  28. package/dist/pipeline/calculate-scores.js +125 -22
  29. package/dist/pipeline/enrichment-preconditions.d.ts +52 -0
  30. package/dist/pipeline/enrichment-preconditions.js +84 -0
  31. package/dist/pipeline/extract-grader-judgments-resilient.d.ts +88 -0
  32. package/dist/pipeline/extract-grader-judgments-resilient.js +122 -0
  33. package/dist/report-store.d.ts +1 -0
  34. package/dist/report-store.js +2 -0
  35. package/dist/sanity/queries.d.ts +1 -1
  36. package/dist/sanity/queries.js +1 -0
  37. package/dist/sources.js +40 -2
  38. package/package.json +1 -1
@@ -53,6 +53,16 @@ export interface DocumentManifestEntry {
53
53
  _id: string;
54
54
  _rev: string;
55
55
  slug: string;
56
+ /** Parent section slug (`primarySection->slug.current`), when resolvable. */
57
+ sectionSlug?: string;
58
+ /**
59
+ * Full URL path under `/docs/` (e.g. `content-lake/groq-introduction`)
60
+ * composed via `buildContextDocPath` from `sectionSlug + "/" + slug`.
61
+ * Optional — historical manifests written before W0287 only carry
62
+ * `slug`; downstream `DocumentRef` builders fall back to slug-only
63
+ * display when this is absent.
64
+ */
65
+ path?: string;
56
66
  title: string;
57
67
  }
58
68
  /** Impact of a content release on canonical documents */
@@ -21,3 +21,4 @@ export * from "./symbol-preflight-report.js";
21
21
  export * from "./test-budgets.js";
22
22
  export { ConfidenceSchema } from "./confidence-schema.js";
23
23
  export { brandedString } from "./branded-string.js";
24
+ export { AilfUserSchema } from "./user.js";
@@ -28,3 +28,7 @@ export { ConfidenceSchema } from "./confidence-schema.js";
28
28
  // helper instead of replicating `as unknown as z.ZodType<…>` at each
29
29
  // schema author site (project rule: no `as` on `unknown`).
30
30
  export { brandedString } from "./branded-string.js";
31
+ // User-preferences subsystem (W0302). Named export — not `export *` — because
32
+ // the schema file re-exports the `AilfUser` domain type, and a star re-export
33
+ // would surface that type through two paths (W0124 DTS ambiguity).
34
+ export { AilfUserSchema } from "./user.js";
@@ -258,6 +258,17 @@ export declare const ReportSchema: z.ZodObject<{
258
258
  artifactManifest: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
259
259
  tag: z.ZodOptional<z.ZodNullable<z.ZodString>>;
260
260
  title: z.ZodOptional<z.ZodNullable<z.ZodString>>;
261
+ degraded: z.ZodOptional<z.ZodObject<{
262
+ reason: z.ZodLiteral<"enrichment-missing">;
263
+ missing: z.ZodArray<z.ZodEnum<{
264
+ documentManifest: "documentManifest";
265
+ failureModes: "failureModes";
266
+ lowScoringJudgments: "lowScoringJudgments";
267
+ recommendations: "recommendations";
268
+ testResults: "testResults";
269
+ }>>;
270
+ detail: z.ZodString;
271
+ }, z.core.$strict>>;
261
272
  }, z.core.$loose>;
262
273
  export type ReportSchemaInput = z.input<typeof ReportSchema>;
263
274
  export type ReportSchemaOutput = z.infer<typeof ReportSchema>;
@@ -25,6 +25,7 @@
25
25
  */
26
26
  import { z } from "zod";
27
27
  import { LITERACY_VARIANTS } from "../../ailf-shared/index.js";
28
+ import { DEGRADED_ENRICHMENT_FIELDS } from "../types/index.js";
28
29
  // ---------------------------------------------------------------------------
29
30
  // RunContext building blocks (mirrors packages/shared/src/run-context.ts)
30
31
  // ---------------------------------------------------------------------------
@@ -233,5 +234,18 @@ export const ReportSchema = z
233
234
  // `title: report.title ?? null`, so the schema accepts null on both.
234
235
  tag: z.string().nullable().optional(),
235
236
  title: z.string().nullable().optional(),
237
+ // Degraded marker (mirrors `ReportDegradation`): present only when a full
238
+ // eval scored tests but enrichment did not complete. Strict — unknown
239
+ // keys here signal real drift.
240
+ degraded: z
241
+ .object({
242
+ reason: z.literal("enrichment-missing"),
243
+ // Enum derived from the canonical DegradedEnrichmentField tuple so the
244
+ // schema cannot drift from the core type.
245
+ missing: z.array(z.enum(DEGRADED_ENRICHMENT_FIELDS)),
246
+ detail: z.string().min(1),
247
+ })
248
+ .strict()
249
+ .optional(),
236
250
  })
237
251
  .passthrough();
@@ -0,0 +1,22 @@
1
+ import { z } from "zod";
2
+ export declare const AilfUserSchema: z.ZodObject<{
3
+ _id: z.ZodString;
4
+ _type: z.ZodLiteral<"ailf.user">;
5
+ sanityUserId: z.ZodString;
6
+ email: z.ZodString;
7
+ displayName: z.ZodOptional<z.ZodString>;
8
+ teams: z.ZodArray<z.ZodObject<{
9
+ _type: z.ZodLiteral<"reference">;
10
+ _ref: z.ZodString;
11
+ _key: z.ZodOptional<z.ZodString>;
12
+ }, z.core.$strip>>;
13
+ preferences: z.ZodObject<{
14
+ primaryTeam: z.ZodOptional<z.ZodObject<{
15
+ _type: z.ZodLiteral<"reference">;
16
+ _ref: z.ZodString;
17
+ _key: z.ZodOptional<z.ZodString>;
18
+ }, z.core.$strip>>;
19
+ }, z.core.$strip>;
20
+ updatedAt: z.ZodString;
21
+ }, z.core.$strip>;
22
+ export type { AilfUser } from "../types/user.js";
@@ -0,0 +1,23 @@
1
+ import { z } from "zod";
2
+ // `_id` is constructed as `ailf.user.${CurrentUser.id}` at write time. The
3
+ // account id segment is opaque (may contain `|`, `.`, etc. for SSO providers),
4
+ // so the prefix is all we constrain here. The deterministic-id invariant
5
+ // (`_id === ailf.user.${sanityUserId}`) is enforced on the write path.
6
+ const USER_ID_REGEX = /^ailf\.user\..+$/;
7
+ const TeamReferenceSchema = z.object({
8
+ _type: z.literal("reference"),
9
+ _ref: z.string().min(1),
10
+ _key: z.string().optional(),
11
+ });
12
+ export const AilfUserSchema = z.object({
13
+ _id: z.string().regex(USER_ID_REGEX),
14
+ _type: z.literal("ailf.user"),
15
+ sanityUserId: z.string().min(1),
16
+ email: z.string().email(),
17
+ displayName: z.string().optional(),
18
+ teams: z.array(TeamReferenceSchema),
19
+ preferences: z.object({
20
+ primaryTeam: TeamReferenceSchema.optional(),
21
+ }),
22
+ updatedAt: z.string().datetime(),
23
+ });
@@ -42,6 +42,7 @@ export type { AttributionMeta, DocAttribution, JudgmentAttribution, } from "./at
42
42
  export type { CriterionSubJudgment, DocCitation, DocCitationRole, GraderEmittedJudgment, GraderJudgment, } from "./grader-judgment.js";
43
43
  export type { LegacyGraderJudgment } from "./legacy-grader-judgment.js";
44
44
  export type { BaseChannel, ChannelScope, EmailChannel, EventType, KnownEventType, KnownMemberRole, MemberRole, NotificationChannel, NotificationChannelType, SlackChannel, Team, TeamId, TeamMember, TeamRef, TeamSlug, TeamStatus, WebhookChannel, } from "./team.js";
45
+ export type { AilfUser, AilfUserPreferences, TeamReference } from "./user.js";
45
46
  type DocumentRef = _DocumentRef;
46
47
  /** Aggregated retrieval metrics for a feature area */
47
48
  export interface AreaRetrievalMetrics {
@@ -1488,8 +1489,36 @@ export interface ArtifactRef {
1488
1489
  * two becomes a compile error (W0049 review finding C1).
1489
1490
  */
1490
1491
  export type ArtifactManifest = Partial<Record<ArtifactType, ArtifactRef>>;
1492
+ /**
1493
+ * Enrichment surfaces that gap-analysis writes onto a report. When a full
1494
+ * eval scores tests but these are absent, the report renders as "no tests"
1495
+ * despite carrying a score — the degraded condition `ReportDegradation`
1496
+ * records.
1497
+ */
1498
+ export declare const DEGRADED_ENRICHMENT_FIELDS: readonly ["documentManifest", "failureModes", "lowScoringJudgments", "recommendations", "testResults"];
1499
+ export type DegradedEnrichmentField = (typeof DEGRADED_ENRICHMENT_FIELDS)[number];
1500
+ /**
1501
+ * Marks a published report as degraded: the eval ran and scored tests, but
1502
+ * one or more enrichment surfaces never landed (e.g. gap-analysis skipped
1503
+ * because `grader-judgments.json` was missing). Present so the dashboard and
1504
+ * Studio can show "enrichment failed" rather than a misleading empty
1505
+ * "no tests" state on a report that still has a score.
1506
+ */
1507
+ export interface ReportDegradation {
1508
+ /** Why the report is degraded. Single-variant union, widen as needed. */
1509
+ reason: "enrichment-missing";
1510
+ /** Enrichment surfaces absent on this report despite a full eval. */
1511
+ missing: DegradedEnrichmentField[];
1512
+ /** Human-readable explanation for dashboard / Studio empty-state copy. */
1513
+ detail: string;
1514
+ }
1491
1515
  /** A published evaluation report — the atomic unit of the report store */
1492
1516
  export interface Report {
1517
+ /**
1518
+ * Set when the report is published in a degraded state — a full eval
1519
+ * scored tests but enrichment did not complete. Absent on healthy reports.
1520
+ */
1521
+ degraded?: ReportDegradation;
1493
1522
  /**
1494
1523
  * Snapshot of the run manifest's `artifacts` slice at publish time (D0032).
1495
1524
  * The source of truth lives in `gs://…/runs/{runId}/manifest.json`; this
@@ -42,3 +42,16 @@ export function isLegacyFailureMode(mode) {
42
42
  * that imports it from @sanity/ailf-core.
43
43
  */
44
44
  export { NOISE_THRESHOLD as DEFAULT_NOISE_THRESHOLD } from "../../ailf-shared/index.js";
45
+ /**
46
+ * Enrichment surfaces that gap-analysis writes onto a report. When a full
47
+ * eval scores tests but these are absent, the report renders as "no tests"
48
+ * despite carrying a score — the degraded condition `ReportDegradation`
49
+ * records.
50
+ */
51
+ export const DEGRADED_ENRICHMENT_FIELDS = [
52
+ "documentManifest",
53
+ "failureModes",
54
+ "lowScoringJudgments",
55
+ "recommendations",
56
+ "testResults",
57
+ ];
@@ -0,0 +1,49 @@
1
+ /**
2
+ * A Sanity reference to an `ailf.team` document.
3
+ *
4
+ * Members of an array (`AilfUser.teams[]`) carry a `_key`; the single-valued
5
+ * `preferences.primaryTeam` does not. The team slug downstream consumers need
6
+ * is a derived, read-time value from a GROQ projection — never stored here.
7
+ */
8
+ export interface TeamReference {
9
+ _type: "reference";
10
+ _ref: string;
11
+ _key?: string;
12
+ }
13
+ /**
14
+ * Per-user UI preferences. Room to grow (default view, density, …) — kept
15
+ * minimal for v0 (YAGNI).
16
+ */
17
+ export interface AilfUserPreferences {
18
+ /**
19
+ * Reference to the user's default team — one of `AilfUser.teams[]`. Distinct
20
+ * from `teams[]` so "which team's view do I default to" can differ from "all
21
+ * teams I affiliate with". The slug is derived in GROQ at read time.
22
+ */
23
+ primaryTeam?: TeamReference;
24
+ }
25
+ /**
26
+ * Per-account user document — one per Sanity account, keyed by a deterministic
27
+ * `_id` of `ailf.user.${sanityUserId}`. Stores self-declared team affiliation
28
+ * (references to `ailf.team`) plus UI preferences, and is the primary source
29
+ * for dashboard personalization. Stores minimal PII: `sanityUserId`, `email`,
30
+ * and `displayName` only.
31
+ *
32
+ * @see docs/design-docs/user-settings.md
33
+ */
34
+ export interface AilfUser {
35
+ /** Deterministic: `ailf.user.${sanityUserId}`. */
36
+ _id: string;
37
+ _type: "ailf.user";
38
+ /** `CurrentUser.id` — the stable, globally-unique key, mirrored for GROQ. */
39
+ sanityUserId: string;
40
+ /** Denormalized for display / joins (lowercased at write time). */
41
+ email: string;
42
+ /** `CurrentUser.name` snapshot. */
43
+ displayName?: string;
44
+ /** Self-declared affiliation — drives personalization only. */
45
+ teams: TeamReference[];
46
+ preferences: AilfUserPreferences;
47
+ /** ISO 8601 UTC — stamped on each save. */
48
+ updatedAt: string;
49
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -22,8 +22,36 @@ export interface DocumentRef {
22
22
  * Named `revision` (not `_rev`) for the same Sanity reserved-name reason.
23
23
  */
24
24
  revision?: string;
25
- /** URL-path identifier (e.g., "groq-introduction") */
25
+ /** URL-path identifier (e.g., "groq-introduction") — leaf segment only. */
26
26
  slug: string;
27
+ /**
28
+ * Full URL path under `/docs/` (e.g., `content-lake/groq-introduction`).
29
+ * Composed from the article's `primarySection->slug.current` and
30
+ * `slug.current` via {@link buildContextDocPath}. Optional — historical
31
+ * reports written before W0287 carry only `slug`; consumers must fall
32
+ * back to `slug` for display when `path` is absent.
33
+ */
34
+ path?: string;
27
35
  /** Human-readable document title */
28
36
  title: string;
29
37
  }
38
+ /**
39
+ * Compose the canonical `/docs/`-relative path for a context-doc reference.
40
+ *
41
+ * Single source of truth across producers (eval doc fetcher, repo-task
42
+ * mirroring) and consumers (dashboard projections). Resolution order:
43
+ *
44
+ * 1. An explicit `path` (e.g. authored on a YAML/repo-mirrored task) wins.
45
+ * 2. Otherwise compose `sectionSlug + "/" + slug` when both are present.
46
+ * 3. Otherwise `null` — neither caller can build a working docs URL, so
47
+ * consumers should disable the link rather than emit a 404.
48
+ *
49
+ * The leaf `slug` alone is never returned as the path because
50
+ * `article.slug.current` is leaf-only on sanity.io/docs; the hierarchy
51
+ * lives on `primarySection->slug.current`.
52
+ */
53
+ export declare function buildContextDocPath(input: {
54
+ path?: string | null;
55
+ sectionSlug?: string | null;
56
+ slug?: string | null;
57
+ }): string | null;
@@ -1 +1,23 @@
1
- export {};
1
+ /**
2
+ * Compose the canonical `/docs/`-relative path for a context-doc reference.
3
+ *
4
+ * Single source of truth across producers (eval doc fetcher, repo-task
5
+ * mirroring) and consumers (dashboard projections). Resolution order:
6
+ *
7
+ * 1. An explicit `path` (e.g. authored on a YAML/repo-mirrored task) wins.
8
+ * 2. Otherwise compose `sectionSlug + "/" + slug` when both are present.
9
+ * 3. Otherwise `null` — neither caller can build a working docs URL, so
10
+ * consumers should disable the link rather than emit a 404.
11
+ *
12
+ * The leaf `slug` alone is never returned as the path because
13
+ * `article.slug.current` is leaf-only on sanity.io/docs; the hierarchy
14
+ * lives on `primarySection->slug.current`.
15
+ */
16
+ export function buildContextDocPath(input) {
17
+ if (input.path)
18
+ return input.path;
19
+ if (input.sectionSlug && input.slug) {
20
+ return `${input.sectionSlug}/${input.slug}`;
21
+ }
22
+ return null;
23
+ }
@@ -44,6 +44,17 @@ export const HELP_TOPICS = [
44
44
  "scoring-model"
45
45
  ]
46
46
  },
47
+ {
48
+ "id": "failure-modes",
49
+ "title": "Failure Modes",
50
+ "body": "## What this view is for\n\nThe Recommendations view tells you which fixes to make. This view tells you what\nkind of problem you have. It groups the run's weaknesses by the documentation\nissue behind them, so you can see patterns across the whole evaluation rather\nthan one fix at a time. If most of your weak spots are the same kind of problem,\nthat is a signal about how to spend your docs effort.\n\n## What you are looking at\n\nRecent reports show **interpretive cards** drawn from the run's diagnosis:\n\n- **Weakest area** names the single feature area dragging the score down most,\n the failure mode behind it, and a confidence level with the sample size, so\n you know how strong the signal is.\n- **Failure mode** highlights one category of problem, which scoring dimension\n it shows up in, and how often it occurred across the tests that were checked.\n- **Area summary** gives a plain-language read on how an area is doing and why.\n\nOlder reports show a **category breakdown** instead. Each failure category is a\nchip with a count. Selecting a chip lists the gaps in that category, and each\ngap shows an estimated score lift if fixed, a confidence level, a short\nremediation note, and the specific tasks that exposed it. You can click a task\nto jump to it.\n\n## The failure modes\n\nEach weakness is sorted into one of these categories. The category is the\nfastest way to know what kind of work the fix needs:\n\n- **Missing docs**: the doc the model needed does not exist or is not indexed.\n The fix is to write new documentation.\n- **Incorrect docs**: a doc has a factual error or a wrong example. The fix is\n to correct it.\n- **Outdated docs**: a doc exists but reflects a previous API surface. The fix\n is to bring it up to date.\n- **Poor structure**: the information is correct but hard for an agent to find\n or skim. The fix is to reorganize or clarify.\n- **Model limitation**: the model struggles even with correct docs available.\n This is not a documentation problem, so treat it as context rather than a\n to-do.\n- **Unclassified**: the run could not categorize the weakness. Use the linked\n tasks and the grader's notes to judge it yourself.\n\nDepending on the evaluation mode you may see additional categories, including\nones specific to agent behavior such as tool misuse or missing error handling.\n\n## How to use it\n\nStart with the category that has the most gaps or the highest combined lift. The\ncategory tells you the shape of the work before you open a single page: write,\ncorrect, update, or restructure. Categories that are not documentation problems,\nsuch as model limitation, are worth noting but are not yours to fix in the docs.\n\n## Related views\n\n- **Recommendations** turns these weaknesses into a ranked list of specific\n edits.\n- **Low-scoring judgments** shows the grader's raw notes on the tests that\n scored lowest, which is the most granular signal behind any failure mode.\n\n## When this view is empty\n\nIf a report shows no failure modes, the evaluation either classified nothing\nworth flagging or the run predates this view. A clean result here usually means\nthe docs held up across the evaluated tasks.",
51
+ "source": "docs/help/failure-modes.md",
52
+ "related": [
53
+ "recommendations",
54
+ "scoring-model",
55
+ "negative-doc-lift"
56
+ ]
57
+ },
47
58
  {
48
59
  "id": "getting-started",
49
60
  "title": "Getting Started",
@@ -57,11 +68,12 @@ export const HELP_TOPICS = [
57
68
  {
58
69
  "id": "interpreting-diagnostics",
59
70
  "title": "Interpreting Diagnostics",
60
- "body": "## The diagnostics tab\n\nWhen you open a report and click the **Diagnostics** tab, you see a health\nsummary of your documentation across all feature areas. This is the most\nactionable view in the dashboard it tells you exactly where to focus your doc\nimprovement efforts.\n\n## Health categories\n\nFeature areas are grouped into three health bands:\n\n- **Strong (80+)** Docs are working well. AI agents produce correct, complete\n implementations. No action needed unless you see regression.\n- **Needs Attention (7079)** Docs are okay but have gaps. There may be\n specific dimensions (like code correctness or doc coverage) dragging the score\n down. Worth investigating.\n- **Weak (below 70)** Docs are not providing enough support. AI agents\n consistently struggle with these features. These need priority attention.\n\n## Strengths vs. Issues\n\nThe diagnostics tab has two sub-views:\n\n**Strengths** highlights what's working: high-scoring areas, strong dimensions,\nand areas where agents successfully find and use your docs. Use this to\nunderstand what good looks like in your docs and replicate it elsewhere.\n\n**Issues** lists the problems: weak areas, dimensions scoring below 50, negative\ndoc lift, retrieval problems, and (if gap analysis was run) specific\nrecommendations with estimated score lift.\n\n## Key diagnostic signals\n\n| Signal | What it means | What to do |\n| ------------------------------ | ------------------------------------------ | ---------------------------------------- |\n| **Negative doc lift** | Docs are worse than no docs | Rewrite or remove the offending docs |\n| **Large retrieval gap** | Good docs exist but agents can't find them | Improve page titles, metadata, SEO |\n| **Low code correctness** | Agents find the docs but produce bad code | Add or fix code examples |\n| **Low doc coverage** | The docs don't cover what the task needs | Write new documentation |\n| **Efficiency anomaly (>100%)** | Agents do better without gold docs | Injected docs may be confusing the model |",
71
+ "body": "## Reading the health of your docs\n\nA report scores each feature area on how well your documentation lets AI coding\ntools implement that feature. Reading those scores well is what turns a number\ninto a plan: it tells you where the docs are working, where they are not, and\nwhat kind of problem you are dealing with.\n\n## Health bands\n\nEach area's score falls into one of three bands:\n\n- **Strong (80 and above)**: docs are working well. Agents produce correct,\n complete implementations. No action needed unless you see a regression.\n- **Needs attention (70 to 79)**: docs are okay but have gaps. A specific\n dimension such as code correctness or doc coverage may be dragging the score\n down. Worth investigating.\n- **Weak (below 70)**: docs are not providing enough support. Agents\n consistently struggle with these features. These need priority attention.\n\n## Strong areas are signal too\n\nIt is easy to focus only on what is broken, but the strong areas are worth\nreading. They show what good looks like in your docs: clear structure, accurate\nexamples, the patterns agents can follow. When you fix a weak area, that is the\nbar to copy.\n\n## Key diagnostic signals\n\nA low score has a reason behind it. These signals tell you which reason, and\nwhat to do about it:\n\n| Signal | What it means | What to do |\n| ------------------------------ | ------------------------------------------- | ---------------------------------------- |\n| **Negative doc lift** | Docs are worse than no docs | Rewrite or remove the offending docs |\n| **Large retrieval gap** | Good docs exist but agents cannot find them | Improve page titles, metadata, structure |\n| **Low code correctness** | Agents find the docs but produce bad code | Add or fix code examples |\n| **Low doc coverage** | The docs do not cover what the task needs | Write new documentation |\n| **Efficiency anomaly (>100%)** | Agents do better without the docs | Injected docs may be confusing the model |\n\n## Where to go next\n\nWhen you know which areas are weak and why, the **Recommendations** view turns\nthat into a ranked list of specific edits, and the **Failure modes** view groups\nthe weaknesses by the kind of documentation problem behind them.",
61
72
  "source": "docs/help/interpreting-diagnostics.md",
62
73
  "related": [
63
- "scoring-model",
64
- "weaknesses-recommendations"
74
+ "recommendations",
75
+ "failure-modes",
76
+ "scoring-model"
65
77
  ]
66
78
  },
67
79
  {
@@ -74,6 +86,17 @@ export const HELP_TOPICS = [
74
86
  "comparing-runs"
75
87
  ]
76
88
  },
89
+ {
90
+ "id": "recommendations",
91
+ "title": "Recommendations",
92
+ "body": "## What this view is for\n\nThis is the \"what do I fix\" view. The scores tell you how well your\ndocumentation supports AI coding tools. This view turns those scores into a\nranked list of specific changes, so you can spend your time on the edits that\nshould move the score the most.\n\nEverything here comes from the same evaluation run you are looking at, and it\npoints at your own documentation pages rather than giving generic advice.\n\n## What you are looking at\n\nRecent reports show a set of **diagnosis cards**. Each card answers one question\nabout the run.\n\n**Top recommendations** is the main card. It opens with a short summary, then\nlists a few suggested changes ranked by priority. Each suggestion has:\n\n- A **priority** tag of high, medium, or low that tells you what to do first.\n- A **title** that names the change in one line.\n- A **description** of the specific fix, usually quoting the exact symbol,\n query, or pattern involved.\n- A **doc reference** showing which page, and the section when it is known, the\n change applies to. Every reference points to a real page that was part of this\n run, so you can open it and start editing.\n\nYou may also see supporting cards:\n\n- **Doc attribution spotlight** shows which documentation pages most influenced\n the results, and whether each one helped or hurt. Use it to confirm a\n recommendation is pointing at the right page.\n- **Low-confidence attribution** lists results where the link between a doc and\n an outcome was uncertain. Treat anything flagged here as a lead to verify, not\n a settled conclusion.\n- **Regression vs baseline** appears when you are comparing against an earlier\n run. It shows which areas moved up or down and the likely reason for each\n change.\n\n## How to use it\n\nWork top down. Start with the high-priority suggestions, open the referenced\npage, and make the change. Priority reflects how much each change is expected to\nhelp, so the top of the list is usually where your effort goes furthest.\n\nThe recommendations are written by a model that reads this run's results. They\nare grounded in your actual docs and cannot reference a page that was not in the\nrun, but they are still suggestions. Read the linked page before acting, and use\nthe confidence signals to decide how much to trust each item.\n\n## Where this comes from\n\nA recommendation is the end of a chain: a test scored low, the grader said why,\nthe run classified that into a failure mode, and this view proposes the edit. If\nyou want to see the failure modes themselves, grouped by category, open the\n**Failure modes** view. If you want the grader's raw notes on the lowest scores,\nopen the **Low-scoring judgments** view.\n\n## Older reports\n\nReports created before the diagnosis cards shipped show a simpler list instead.\nEach row names a feature area, the failure mode behind it, an estimated score\nlift if you fix it, a confidence level, and the tasks that exposed the gap. The\nestimated lift is conservative. It assumes fixing the gap raises the weak\ndimension only to the median of the others, so the real improvement can be\nhigher.\n\n## When this view is empty\n\nIf a report shows no recommendations, the evaluation either ran and found\nnothing worth flagging, or the run predates this feature. A score with no\nrecommendations is usually a good sign, because it means the docs held up across\nthe evaluated tasks.",
93
+ "source": "docs/help/recommendations.md",
94
+ "related": [
95
+ "failure-modes",
96
+ "interpreting-diagnostics",
97
+ "scoring-model"
98
+ ]
99
+ },
77
100
  {
78
101
  "id": "retrieval-gap",
79
102
  "title": "Retrieval Gap & Infrastructure Efficiency",
@@ -96,17 +119,6 @@ export const HELP_TOPICS = [
96
119
  "eval-modes"
97
120
  ]
98
121
  },
99
- {
100
- "id": "weaknesses-recommendations",
101
- "title": "Weaknesses & Recommendations",
102
- "body": "## Understanding weaknesses\n\nThe Issues sub-tab in Diagnostics lists every area or dimension that scored\nbelow threshold. Each weakness entry shows:\n\n- **The feature area** — Which product feature is affected (e.g., GROQ,\n Functions, Webhooks).\n- **The bottleneck dimension** — Which scoring dimension is dragging the area\n down: task completion, code correctness, or doc coverage.\n- **The score** — How far below threshold the dimension scored.\n\n## Gap analysis recommendations\n\nWhen an evaluation runs with gap analysis enabled, the dashboard shows\n**prioritized recommendations** — specific actions ranked by estimated impact.\n\nEach recommendation includes:\n\n- **Failure mode** — The type of doc problem identified:\n - `missing-docs` — The functionality isn't documented at all.\n - `incorrect-docs` — The docs contain factual errors.\n - `outdated-docs` — The docs describe an old API version or pattern.\n - `poor-structure` — The docs exist but are hard to find or understand.\n- **Estimated lift** — How many score points fixing this gap would add. Based on\n raising the bottleneck dimension to the median of non-bottleneck dimensions.\n Conservative estimate — actual improvement may be higher.\n- **Confidence** — How sure the analysis is about this diagnosis (high, medium,\n or low).\n- **Affected tasks** — Which specific evaluation tasks exposed this gap.\n\n## Diagnosis cards\n\nEvery published report now carries a **diagnosis artifact** — a set of cards\nproduced by the post-pipeline hook (`ailf interpret`). The Studio diagnosis\npanel renders these cards directly; the dashboard's Recommendations and\nFailure-modes panels migrate to the same source in a follow-up.\n\nThe hook runs by default for every pipeline invocation. To opt out for a single\nrun, pass `--no-summary`; to opt out in CI, set `AILF_INTERPRET_ON_RUN=0` in the\nworkflow env block; to opt out project-wide, set `summary.onRun: never` in\n`.ailf/config.yaml`.\n\n## Low-scoring judgments\n\nBelow the recommendations, you'll find the **grader's explanations** for tests\nthat scored below 70. These are the raw assessments from the grading model\nexplaining exactly what went wrong — missing API calls, incorrect patterns,\nhallucinated features, etc.\n\nEach judgment shows the task, the dimension, the score, and the grader's natural\nlanguage reason. These are the most granular diagnostic signal available and\noften point directly to the doc section that needs fixing.",
103
- "source": "docs/help/weaknesses-recommendations.md",
104
- "related": [
105
- "interpreting-diagnostics",
106
- "scoring-model",
107
- "negative-doc-lift"
108
- ]
109
- },
110
122
  {
111
123
  "id": "how-agents-work",
112
124
  "title": "How AI Agents Find Documentation",
@@ -18,7 +18,7 @@
18
18
  * surface against future regressions.
19
19
  */
20
20
  export { computeCanaryDrift, type CanaryDriftReport, type CanaryReportSlim, type DriftEntry, type DriftThresholds, type DriftVerdict, } from "./canary-drift.js";
21
- export { type DocumentRef } from "./document-ref.js";
21
+ export { buildContextDocPath, type DocumentRef } from "./document-ref.js";
22
22
  export { makeEditorialReference, type EditorialReference, type MakeEditorialReferenceArgs, } from "./editorial-reference.js";
23
23
  export { isKnownEventType, KNOWN_EVENT_TYPES, type EventType, type KnownEventType, } from "./event-types.js";
24
24
  export { FEATURE_FLAGS, type FeatureFlag, type FeatureFlagKey, } from "./feature-flags.js";
@@ -18,6 +18,7 @@
18
18
  * surface against future regressions.
19
19
  */
20
20
  export { computeCanaryDrift, } from "./canary-drift.js";
21
+ export { buildContextDocPath } from "./document-ref.js";
21
22
  export { makeEditorialReference, } from "./editorial-reference.js";
22
23
  export { isKnownEventType, KNOWN_EVENT_TYPES, } from "./event-types.js";
23
24
  export { FEATURE_FLAGS, } from "./feature-flags.js";
@@ -14,11 +14,21 @@
14
14
  * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
15
15
  */
16
16
  export const KNOWN_OWNER_TEAMS = [
17
+ "ai-growth",
18
+ "billing-and-integrations",
19
+ "content-agent",
17
20
  "content-lake",
18
- "core-docs",
19
- "growth",
20
- "media",
21
- "platform",
21
+ "data",
22
+ "design-and-research",
23
+ "docs",
24
+ "editorial-experience",
25
+ "engineering",
26
+ "identity",
27
+ "media-library",
28
+ "product",
29
+ "runtime",
30
+ "sdk",
31
+ "ssi",
22
32
  "studio",
23
33
  ];
24
34
  /**
@@ -26,8 +36,11 @@ export const KNOWN_OWNER_TEAMS = [
26
36
  * drift has been observed belong here. Unknown values pass through.
27
37
  */
28
38
  const OWNER_TEAM_ALIASES = {
29
- coredocs: "core-docs",
30
- docs: "core-docs",
39
+ "core-docs": "docs",
40
+ coredocs: "docs",
41
+ documentation: "docs",
42
+ growth: "ai-growth",
43
+ media: "media-library",
31
44
  studio_team: "studio",
32
45
  "studio-team": "studio",
33
46
  };
@@ -16,6 +16,7 @@
16
16
  import { mkdirSync, writeFileSync } from "fs";
17
17
  import { join } from "path";
18
18
  import { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
19
+ import { buildContextDocPath } from "../../_vendor/ailf-shared/index.js";
19
20
  import { fetchUrlContent, } from "../../pipeline/fetch-url-content.js";
20
21
  import { createPerspectiveClient, createPublishedClient, getSanityClient, } from "../../sanity/client.js";
21
22
  import { extractSymbolsForDoc, renderDocument, } from "../../sanity/document-renderers.js";
@@ -376,7 +377,20 @@ export class SanityDocFetcher {
376
377
  : getSanityClient(toSanityOverrides(source));
377
378
  const allMetadata = await client.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: [...allSlugs] });
378
379
  return allMetadata
379
- .map((m) => ({ _id: m._id, _rev: m._rev, slug: m.slug, title: m.title }))
380
+ .map((m) => {
381
+ const path = buildContextDocPath({
382
+ sectionSlug: m.sectionSlug,
383
+ slug: m.slug,
384
+ });
385
+ return {
386
+ _id: m._id,
387
+ _rev: m._rev,
388
+ slug: m.slug,
389
+ ...(m.sectionSlug ? { sectionSlug: m.sectionSlug } : {}),
390
+ ...(path ? { path } : {}),
391
+ title: m.title,
392
+ };
393
+ })
380
394
  .sort((a, b) => a.slug.localeCompare(b.slug));
381
395
  }
382
396
  // -----------------------------------------------------------------------
@@ -76,9 +76,9 @@ export declare const GraderJudgmentSchema: z.ZodObject<{
76
76
  documentId: z.ZodString;
77
77
  slug: z.ZodOptional<z.ZodString>;
78
78
  role: z.ZodEnum<{
79
+ missing: "missing";
79
80
  supports: "supports";
80
81
  contradicts: "contradicts";
81
- missing: "missing";
82
82
  irrelevant: "irrelevant";
83
83
  }>;
84
84
  hallucinated: z.ZodOptional<z.ZodBoolean>;
@@ -145,9 +145,9 @@ export declare const GraderEmittedJudgmentSchema: z.ZodObject<{
145
145
  documentId: z.ZodString;
146
146
  slug: z.ZodOptional<z.ZodString>;
147
147
  role: z.ZodEnum<{
148
+ missing: "missing";
148
149
  supports: "supports";
149
150
  contradicts: "contradicts";
150
- missing: "missing";
151
151
  irrelevant: "irrelevant";
152
152
  }>;
153
153
  hallucinated: z.ZodOptional<z.ZodBoolean>;
@@ -15,6 +15,7 @@
15
15
  * @see packages/core/src/ports/task-source.ts — TaskSource port
16
16
  * @see docs/decisions/D0038-content-lake-authorable-task-modes.md
17
17
  */
18
+ import { buildContextDocPath } from "../../_vendor/ailf-shared/index.js";
18
19
  import { filterByChangedDocs } from "./changed-docs-filter.js";
19
20
  import { ContentLakeAuthorableTaskSchema } from "./repo-schemas.js";
20
21
  // ---------------------------------------------------------------------------
@@ -223,9 +224,11 @@ function mapCanonicalDocRef(raw) {
223
224
  case "slug":
224
225
  return raw.slug ? { slug: raw.slug, reason } : null;
225
226
  case "path": {
226
- // Prefer explicit path field; fall back to deriving from doc reference
227
- const path = raw.path ||
228
- (raw.sectionSlug && raw.slug ? `${raw.sectionSlug}/${raw.slug}` : null);
227
+ const path = buildContextDocPath({
228
+ path: raw.path,
229
+ sectionSlug: raw.sectionSlug,
230
+ slug: raw.slug,
231
+ });
229
232
  return path ? { path, reason } : null;
230
233
  }
231
234
  case "id": {
@@ -233,10 +236,12 @@ function mapCanonicalDocRef(raw) {
233
236
  const id = raw.docId || raw.docRefId || null;
234
237
  if (!id)
235
238
  return null;
236
- // Carry slug and derived path as optional DX annotations
237
- const derivedPath = raw.sectionSlug && raw.slug
238
- ? `${raw.sectionSlug}/${raw.slug}`
239
- : undefined;
239
+ // Carry slug and derived path as optional DX annotations — single
240
+ // source of truth in `buildContextDocPath` (@sanity/ailf-shared).
241
+ const derivedPath = buildContextDocPath({
242
+ sectionSlug: raw.sectionSlug,
243
+ slug: raw.slug,
244
+ });
240
245
  return {
241
246
  id,
242
247
  reason,
@@ -35,10 +35,10 @@
35
35
  * @see docs/decisions/D0050-per-entry-attribution-layout.md
36
36
  * @see docs/decisions/D0052-judgment-ref-granularity.md
37
37
  */
38
- import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
38
+ import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
39
39
  export declare class ComputeAttributionStep implements PipelineStep {
40
40
  readonly name = "compute-attribution";
41
41
  readonly optional = true;
42
42
  check(ctx: AppContext): ValidationIssue[];
43
- execute(ctx: AppContext, _state?: unknown): Promise<StepResult>;
43
+ execute(ctx: AppContext, state?: PipelineState): Promise<StepResult>;
44
44
  }
@@ -40,6 +40,7 @@ import { resolve } from "node:path";
40
40
  import { isSlugRef } from "../../_vendor/ailf-core/index.js";
41
41
  import { calibrationSetVersion, embeddingModel, ensembleVersion, } from "../../pipeline/attribution.js";
42
42
  import { V0_WEIGHTS, computeJudgmentAttribution, } from "../../pipeline/compute-attribution.js";
43
+ import { classifyEnrichmentInputs, degradedEnrichmentError, } from "../../pipeline/enrichment-preconditions.js";
43
44
  // ---------------------------------------------------------------------------
44
45
  // Step implementation
45
46
  // ---------------------------------------------------------------------------
@@ -79,12 +80,26 @@ export class ComputeAttributionStep {
79
80
  }
80
81
  return issues;
81
82
  }
82
- async execute(ctx, _state) {
83
+ async execute(ctx, state) {
83
84
  const start = Date.now();
84
85
  const root = ctx.config.rootDir;
85
86
  const judgmentsPath = resolve(root, "results", "latest", "grader-judgments.json");
86
87
  const summaryPath = resolve(root, "results", "latest", "score-summary.json");
87
- if (!existsSync(judgmentsPath)) {
88
+ // Mirror gap-analysis: a full eval that scored tests but persisted no
89
+ // grader judgments is a degraded run, not a benign skip. Fail loud so the
90
+ // outcome surfaces in pipeline-result and on the job document. A remote
91
+ // cache hit restores score-summary.json without grader-judgments.json, so
92
+ // its missing judgments are legitimate — never fail loud on a cache hit.
93
+ const fromRemoteCache = (state?.remoteCacheHits?.size ?? 0) > 0;
94
+ const inputs = classifyEnrichmentInputs(root);
95
+ if (inputs.kind === "judgments-missing-after-eval" && !fromRemoteCache) {
96
+ return {
97
+ durationMs: Date.now() - start,
98
+ status: "failed",
99
+ error: degradedEnrichmentError("compute-attribution", inputs.scoredTestCount),
100
+ };
101
+ }
102
+ if (inputs.kind !== "ready") {
88
103
  return { status: "skipped", reason: "No grader-judgments.json" };
89
104
  }
90
105
  if (!existsSync(summaryPath)) {
@@ -14,10 +14,10 @@
14
14
  *
15
15
  * This is an optional step — failure doesn't stop the pipeline.
16
16
  */
17
- import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
17
+ import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
18
18
  export declare class GapAnalysisStep implements PipelineStep {
19
19
  readonly name = "gap-analysis";
20
20
  readonly optional = true;
21
21
  check(ctx: AppContext): ValidationIssue[];
22
- execute(ctx: AppContext): Promise<StepResult>;
22
+ execute(ctx: AppContext, state?: PipelineState): Promise<StepResult>;
23
23
  }