npm - @sanity/ailf - Versions diffs - 7.1.0 → 7.2.0 - Mend

@sanity/ailf 7.1.0 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +10 -0
package/dist/_vendor/ailf-core/schemas/index.d.ts +1 -0
package/dist/_vendor/ailf-core/schemas/index.js +4 -0
package/dist/_vendor/ailf-core/schemas/report.d.ts +11 -0
package/dist/_vendor/ailf-core/schemas/report.js +14 -0
package/dist/_vendor/ailf-core/schemas/user.d.ts +22 -0
package/dist/_vendor/ailf-core/schemas/user.js +23 -0
package/dist/_vendor/ailf-core/types/index.d.ts +29 -0
package/dist/_vendor/ailf-core/types/index.js +13 -0
package/dist/_vendor/ailf-core/types/user.d.ts +49 -0
package/dist/_vendor/ailf-core/types/user.js +1 -0
package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -1
package/dist/_vendor/ailf-shared/document-ref.js +23 -1
package/dist/_vendor/ailf-shared/generated/help-content.js +26 -14
package/dist/_vendor/ailf-shared/index.d.ts +1 -1
package/dist/_vendor/ailf-shared/index.js +1 -0
package/dist/_vendor/ailf-shared/owner-teams.js +19 -6
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +15 -1
package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +2 -2
package/dist/adapters/task-sources/content-lake-task-source.js +12 -7
package/dist/orchestration/steps/compute-attribution-step.d.ts +2 -2
package/dist/orchestration/steps/compute-attribution-step.js +17 -2
package/dist/orchestration/steps/gap-analysis-step.d.ts +2 -2
package/dist/orchestration/steps/gap-analysis-step.js +29 -10
package/dist/orchestration/steps/publish-report-step.d.ts +15 -1
package/dist/orchestration/steps/publish-report-step.js +63 -6
package/dist/pipeline/calculate-scores.d.ts +13 -1
package/dist/pipeline/calculate-scores.js +125 -22
package/dist/pipeline/enrichment-preconditions.d.ts +52 -0
package/dist/pipeline/enrichment-preconditions.js +84 -0
package/dist/pipeline/extract-grader-judgments-resilient.d.ts +88 -0
package/dist/pipeline/extract-grader-judgments-resilient.js +122 -0
package/dist/report-store.d.ts +1 -0
package/dist/report-store.js +2 -0
package/dist/sanity/queries.d.ts +1 -1
package/dist/sanity/queries.js +1 -0
package/dist/sources.js +40 -2
package/package.json +1 -1

package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts CHANGED Viewed

@@ -53,6 +53,16 @@ export interface DocumentManifestEntry {
     _id: string;
     _rev: string;
     slug: string;
+    /** Parent section slug (`primarySection->slug.current`), when resolvable. */
+    sectionSlug?: string;
+    /**
+     * Full URL path under `/docs/` (e.g. `content-lake/groq-introduction`)
+     * composed via `buildContextDocPath` from `sectionSlug + "/" + slug`.
+     * Optional — historical manifests written before W0287 only carry
+     * `slug`; downstream `DocumentRef` builders fall back to slug-only
+     * display when this is absent.
+     */
+    path?: string;
     title: string;
 }
 /** Impact of a content release on canonical documents */

package/dist/_vendor/ailf-core/schemas/index.d.ts CHANGED Viewed

@@ -21,3 +21,4 @@ export * from "./symbol-preflight-report.js";
 export * from "./test-budgets.js";
 export { ConfidenceSchema } from "./confidence-schema.js";
 export { brandedString } from "./branded-string.js";
+export { AilfUserSchema } from "./user.js";

package/dist/_vendor/ailf-core/schemas/index.js CHANGED Viewed

@@ -28,3 +28,7 @@ export { ConfidenceSchema } from "./confidence-schema.js";
 // helper instead of replicating `as unknown as z.ZodType<…>` at each
 // schema author site (project rule: no `as` on `unknown`).
 export { brandedString } from "./branded-string.js";
+// User-preferences subsystem (W0302). Named export — not `export *` — because
+// the schema file re-exports the `AilfUser` domain type, and a star re-export
+// would surface that type through two paths (W0124 DTS ambiguity).
+export { AilfUserSchema } from "./user.js";

package/dist/_vendor/ailf-core/schemas/report.d.ts CHANGED Viewed

@@ -258,6 +258,17 @@ export declare const ReportSchema: z.ZodObject<{
     artifactManifest: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
     tag: z.ZodOptional<z.ZodNullable<z.ZodString>>;
     title: z.ZodOptional<z.ZodNullable<z.ZodString>>;
+    degraded: z.ZodOptional<z.ZodObject<{
+        reason: z.ZodLiteral<"enrichment-missing">;
+        missing: z.ZodArray<z.ZodEnum<{
+            documentManifest: "documentManifest";
+            failureModes: "failureModes";
+            lowScoringJudgments: "lowScoringJudgments";
+            recommendations: "recommendations";
+            testResults: "testResults";
+        }>>;
+        detail: z.ZodString;
+    }, z.core.$strict>>;
 }, z.core.$loose>;
 export type ReportSchemaInput = z.input<typeof ReportSchema>;
 export type ReportSchemaOutput = z.infer<typeof ReportSchema>;

package/dist/_vendor/ailf-core/schemas/report.js CHANGED Viewed

@@ -25,6 +25,7 @@
  */
 import { z } from "zod";
 import { LITERACY_VARIANTS } from "../../ailf-shared/index.js";
+import { DEGRADED_ENRICHMENT_FIELDS } from "../types/index.js";
 // ---------------------------------------------------------------------------
 // RunContext building blocks (mirrors packages/shared/src/run-context.ts)
 // ---------------------------------------------------------------------------
@@ -233,5 +234,18 @@ export const ReportSchema = z
     // `title: report.title ?? null`, so the schema accepts null on both.
     tag: z.string().nullable().optional(),
     title: z.string().nullable().optional(),
+    // Degraded marker (mirrors `ReportDegradation`): present only when a full
+    // eval scored tests but enrichment did not complete. Strict — unknown
+    // keys here signal real drift.
+    degraded: z
+        .object({
+        reason: z.literal("enrichment-missing"),
+        // Enum derived from the canonical DegradedEnrichmentField tuple so the
+        // schema cannot drift from the core type.
+        missing: z.array(z.enum(DEGRADED_ENRICHMENT_FIELDS)),
+        detail: z.string().min(1),
+    })
+        .strict()
+        .optional(),
 })
     .passthrough();

package/dist/_vendor/ailf-core/schemas/user.d.ts ADDED Viewed

@@ -0,0 +1,22 @@
+import { z } from "zod";
+export declare const AilfUserSchema: z.ZodObject<{
+    _id: z.ZodString;
+    _type: z.ZodLiteral<"ailf.user">;
+    sanityUserId: z.ZodString;
+    email: z.ZodString;
+    displayName: z.ZodOptional<z.ZodString>;
+    teams: z.ZodArray<z.ZodObject<{
+        _type: z.ZodLiteral<"reference">;
+        _ref: z.ZodString;
+        _key: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    preferences: z.ZodObject<{
+        primaryTeam: z.ZodOptional<z.ZodObject<{
+            _type: z.ZodLiteral<"reference">;
+            _ref: z.ZodString;
+            _key: z.ZodOptional<z.ZodString>;
+        }, z.core.$strip>>;
+    }, z.core.$strip>;
+    updatedAt: z.ZodString;
+}, z.core.$strip>;
+export type { AilfUser } from "../types/user.js";

package/dist/_vendor/ailf-core/schemas/user.js ADDED Viewed

@@ -0,0 +1,23 @@
+import { z } from "zod";
+// `_id` is constructed as `ailf.user.${CurrentUser.id}` at write time. The
+// account id segment is opaque (may contain `|`, `.`, etc. for SSO providers),
+// so the prefix is all we constrain here. The deterministic-id invariant
+// (`_id === ailf.user.${sanityUserId}`) is enforced on the write path.
+const USER_ID_REGEX = /^ailf\.user\..+$/;
+const TeamReferenceSchema = z.object({
+    _type: z.literal("reference"),
+    _ref: z.string().min(1),
+    _key: z.string().optional(),
+});
+export const AilfUserSchema = z.object({
+    _id: z.string().regex(USER_ID_REGEX),
+    _type: z.literal("ailf.user"),
+    sanityUserId: z.string().min(1),
+    email: z.string().email(),
+    displayName: z.string().optional(),
+    teams: z.array(TeamReferenceSchema),
+    preferences: z.object({
+        primaryTeam: TeamReferenceSchema.optional(),
+    }),
+    updatedAt: z.string().datetime(),
+});

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -42,6 +42,7 @@ export type { AttributionMeta, DocAttribution, JudgmentAttribution, } from "./at
 export type { CriterionSubJudgment, DocCitation, DocCitationRole, GraderEmittedJudgment, GraderJudgment, } from "./grader-judgment.js";
 export type { LegacyGraderJudgment } from "./legacy-grader-judgment.js";
 export type { BaseChannel, ChannelScope, EmailChannel, EventType, KnownEventType, KnownMemberRole, MemberRole, NotificationChannel, NotificationChannelType, SlackChannel, Team, TeamId, TeamMember, TeamRef, TeamSlug, TeamStatus, WebhookChannel, } from "./team.js";
+export type { AilfUser, AilfUserPreferences, TeamReference } from "./user.js";
 type DocumentRef = _DocumentRef;
 /** Aggregated retrieval metrics for a feature area */
 export interface AreaRetrievalMetrics {
@@ -1488,8 +1489,36 @@ export interface ArtifactRef {
  * two becomes a compile error (W0049 review finding C1).
  */
 export type ArtifactManifest = Partial<Record<ArtifactType, ArtifactRef>>;
+/**
+ * Enrichment surfaces that gap-analysis writes onto a report. When a full
+ * eval scores tests but these are absent, the report renders as "no tests"
+ * despite carrying a score — the degraded condition `ReportDegradation`
+ * records.
+ */
+export declare const DEGRADED_ENRICHMENT_FIELDS: readonly ["documentManifest", "failureModes", "lowScoringJudgments", "recommendations", "testResults"];
+export type DegradedEnrichmentField = (typeof DEGRADED_ENRICHMENT_FIELDS)[number];
+/**
+ * Marks a published report as degraded: the eval ran and scored tests, but
+ * one or more enrichment surfaces never landed (e.g. gap-analysis skipped
+ * because `grader-judgments.json` was missing). Present so the dashboard and
+ * Studio can show "enrichment failed" rather than a misleading empty
+ * "no tests" state on a report that still has a score.
+ */
+export interface ReportDegradation {
+    /** Why the report is degraded. Single-variant union, widen as needed. */
+    reason: "enrichment-missing";
+    /** Enrichment surfaces absent on this report despite a full eval. */
+    missing: DegradedEnrichmentField[];
+    /** Human-readable explanation for dashboard / Studio empty-state copy. */
+    detail: string;
+}
 /** A published evaluation report — the atomic unit of the report store */
 export interface Report {
+    /**
+     * Set when the report is published in a degraded state — a full eval
+     * scored tests but enrichment did not complete. Absent on healthy reports.
+     */
+    degraded?: ReportDegradation;
     /**
      * Snapshot of the run manifest's `artifacts` slice at publish time (D0032).
      * The source of truth lives in `gs://…/runs/{runId}/manifest.json`; this

package/dist/_vendor/ailf-core/types/index.js CHANGED Viewed

@@ -42,3 +42,16 @@ export function isLegacyFailureMode(mode) {
  * that imports it from @sanity/ailf-core.
  */
 export { NOISE_THRESHOLD as DEFAULT_NOISE_THRESHOLD } from "../../ailf-shared/index.js";
+/**
+ * Enrichment surfaces that gap-analysis writes onto a report. When a full
+ * eval scores tests but these are absent, the report renders as "no tests"
+ * despite carrying a score — the degraded condition `ReportDegradation`
+ * records.
+ */
+export const DEGRADED_ENRICHMENT_FIELDS = [
+    "documentManifest",
+    "failureModes",
+    "lowScoringJudgments",
+    "recommendations",
+    "testResults",
+];

package/dist/_vendor/ailf-core/types/user.d.ts ADDED Viewed

@@ -0,0 +1,49 @@
+/**
+ * A Sanity reference to an `ailf.team` document.
+ *
+ * Members of an array (`AilfUser.teams[]`) carry a `_key`; the single-valued
+ * `preferences.primaryTeam` does not. The team slug downstream consumers need
+ * is a derived, read-time value from a GROQ projection — never stored here.
+ */
+export interface TeamReference {
+    _type: "reference";
+    _ref: string;
+    _key?: string;
+}
+/**
+ * Per-user UI preferences. Room to grow (default view, density, …) — kept
+ * minimal for v0 (YAGNI).
+ */
+export interface AilfUserPreferences {
+    /**
+     * Reference to the user's default team — one of `AilfUser.teams[]`. Distinct
+     * from `teams[]` so "which team's view do I default to" can differ from "all
+     * teams I affiliate with". The slug is derived in GROQ at read time.
+     */
+    primaryTeam?: TeamReference;
+}
+/**
+ * Per-account user document — one per Sanity account, keyed by a deterministic
+ * `_id` of `ailf.user.${sanityUserId}`. Stores self-declared team affiliation
+ * (references to `ailf.team`) plus UI preferences, and is the primary source
+ * for dashboard personalization. Stores minimal PII: `sanityUserId`, `email`,
+ * and `displayName` only.
+ *
+ * @see docs/design-docs/user-settings.md
+ */
+export interface AilfUser {
+    /** Deterministic: `ailf.user.${sanityUserId}`. */
+    _id: string;
+    _type: "ailf.user";
+    /** `CurrentUser.id` — the stable, globally-unique key, mirrored for GROQ. */
+    sanityUserId: string;
+    /** Denormalized for display / joins (lowercased at write time). */
+    email: string;
+    /** `CurrentUser.name` snapshot. */
+    displayName?: string;
+    /** Self-declared affiliation — drives personalization only. */
+    teams: TeamReference[];
+    preferences: AilfUserPreferences;
+    /** ISO 8601 UTC — stamped on each save. */
+    updatedAt: string;
+}

package/dist/_vendor/ailf-core/types/user.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/_vendor/ailf-shared/document-ref.d.ts CHANGED Viewed

@@ -22,8 +22,36 @@ export interface DocumentRef {
      * Named `revision` (not `_rev`) for the same Sanity reserved-name reason.
      */
     revision?: string;
-    /** URL-path identifier (e.g., "groq-introduction") */
+    /** URL-path identifier (e.g., "groq-introduction") — leaf segment only. */
     slug: string;
+    /**
+     * Full URL path under `/docs/` (e.g., `content-lake/groq-introduction`).
+     * Composed from the article's `primarySection->slug.current` and
+     * `slug.current` via {@link buildContextDocPath}. Optional — historical
+     * reports written before W0287 carry only `slug`; consumers must fall
+     * back to `slug` for display when `path` is absent.
+     */
+    path?: string;
     /** Human-readable document title */
     title: string;
 }
+/**
+ * Compose the canonical `/docs/`-relative path for a context-doc reference.
+ *
+ * Single source of truth across producers (eval doc fetcher, repo-task
+ * mirroring) and consumers (dashboard projections). Resolution order:
+ *
+ * 1. An explicit `path` (e.g. authored on a YAML/repo-mirrored task) wins.
+ * 2. Otherwise compose `sectionSlug + "/" + slug` when both are present.
+ * 3. Otherwise `null` — neither caller can build a working docs URL, so
+ *    consumers should disable the link rather than emit a 404.
+ *
+ * The leaf `slug` alone is never returned as the path because
+ * `article.slug.current` is leaf-only on sanity.io/docs; the hierarchy
+ * lives on `primarySection->slug.current`.
+ */
+export declare function buildContextDocPath(input: {
+    path?: string | null;
+    sectionSlug?: string | null;
+    slug?: string | null;
+}): string | null;

package/dist/_vendor/ailf-shared/document-ref.js CHANGED Viewed

@@ -1 +1,23 @@
-export {};
+/**
+ * Compose the canonical `/docs/`-relative path for a context-doc reference.
+ *
+ * Single source of truth across producers (eval doc fetcher, repo-task
+ * mirroring) and consumers (dashboard projections). Resolution order:
+ *
+ * 1. An explicit `path` (e.g. authored on a YAML/repo-mirrored task) wins.
+ * 2. Otherwise compose `sectionSlug + "/" + slug` when both are present.
+ * 3. Otherwise `null` — neither caller can build a working docs URL, so
+ *    consumers should disable the link rather than emit a 404.
+ *
+ * The leaf `slug` alone is never returned as the path because
+ * `article.slug.current` is leaf-only on sanity.io/docs; the hierarchy
+ * lives on `primarySection->slug.current`.
+ */
+export function buildContextDocPath(input) {
+    if (input.path)
+        return input.path;
+    if (input.sectionSlug && input.slug) {
+        return `${input.sectionSlug}/${input.slug}`;
+    }
+    return null;
+}

package/dist/_vendor/ailf-shared/generated/help-content.js CHANGED Viewed

@@ -44,6 +44,17 @@ export const HELP_TOPICS = [
             "scoring-model"
         ]
     },
+    {
+        "id": "failure-modes",
+        "title": "Failure Modes",
+        "body": "## What this view is for\n\nThe Recommendations view tells you which fixes to make. This view tells you what\nkind of problem you have. It groups the run's weaknesses by the documentation\nissue behind them, so you can see patterns across the whole evaluation rather\nthan one fix at a time. If most of your weak spots are the same kind of problem,\nthat is a signal about how to spend your docs effort.\n\n## What you are looking at\n\nRecent reports show **interpretive cards** drawn from the run's diagnosis:\n\n- **Weakest area** names the single feature area dragging the score down most,\n  the failure mode behind it, and a confidence level with the sample size, so\n  you know how strong the signal is.\n- **Failure mode** highlights one category of problem, which scoring dimension\n  it shows up in, and how often it occurred across the tests that were checked.\n- **Area summary** gives a plain-language read on how an area is doing and why.\n\nOlder reports show a **category breakdown** instead. Each failure category is a\nchip with a count. Selecting a chip lists the gaps in that category, and each\ngap shows an estimated score lift if fixed, a confidence level, a short\nremediation note, and the specific tasks that exposed it. You can click a task\nto jump to it.\n\n## The failure modes\n\nEach weakness is sorted into one of these categories. The category is the\nfastest way to know what kind of work the fix needs:\n\n- **Missing docs**: the doc the model needed does not exist or is not indexed.\n  The fix is to write new documentation.\n- **Incorrect docs**: a doc has a factual error or a wrong example. The fix is\n  to correct it.\n- **Outdated docs**: a doc exists but reflects a previous API surface. The fix\n  is to bring it up to date.\n- **Poor structure**: the information is correct but hard for an agent to find\n  or skim. The fix is to reorganize or clarify.\n- **Model limitation**: the model struggles even with correct docs available.\n  This is not a documentation problem, so treat it as context rather than a\n  to-do.\n- **Unclassified**: the run could not categorize the weakness. Use the linked\n  tasks and the grader's notes to judge it yourself.\n\nDepending on the evaluation mode you may see additional categories, including\nones specific to agent behavior such as tool misuse or missing error handling.\n\n## How to use it\n\nStart with the category that has the most gaps or the highest combined lift. The\ncategory tells you the shape of the work before you open a single page: write,\ncorrect, update, or restructure. Categories that are not documentation problems,\nsuch as model limitation, are worth noting but are not yours to fix in the docs.\n\n## Related views\n\n- **Recommendations** turns these weaknesses into a ranked list of specific\n  edits.\n- **Low-scoring judgments** shows the grader's raw notes on the tests that\n  scored lowest, which is the most granular signal behind any failure mode.\n\n## When this view is empty\n\nIf a report shows no failure modes, the evaluation either classified nothing\nworth flagging or the run predates this view. A clean result here usually means\nthe docs held up across the evaluated tasks.",
+        "source": "docs/help/failure-modes.md",
+        "related": [
+            "recommendations",
+            "scoring-model",
+            "negative-doc-lift"
+        ]
+    },
     {
         "id": "getting-started",
         "title": "Getting Started",
@@ -57,11 +68,12 @@ export const HELP_TOPICS = [
     {
         "id": "interpreting-diagnostics",
         "title": "Interpreting Diagnostics",
-        "body": "## The diagnostics tab\n\nWhen you open a report and click the **Diagnostics** tab, you see a health\nsummary of your documentation across all feature areas. This is the most\nactionable view in the dashboard — it tells you exactly where to focus your doc\nimprovement efforts.\n\n## Health categories\n\nFeature areas are grouped into three health bands:\n\n- **Strong (80+)** — Docs are working well. AI agents produce correct, complete\n  implementations. No action needed unless you see regression.\n- **Needs Attention (70–79)** — Docs are okay but have gaps. There may be\n  specific dimensions (like code correctness or doc coverage) dragging the score\n  down. Worth investigating.\n- **Weak (below 70)** — Docs are not providing enough support. AI agents\n  consistently struggle with these features. These need priority attention.\n\n## Strengths vs. Issues\n\nThe diagnostics tab has two sub-views:\n\n**Strengths** highlights what's working: high-scoring areas, strong dimensions,\nand areas where agents successfully find and use your docs. Use this to\nunderstand what good looks like in your docs — and replicate it elsewhere.\n\n**Issues** lists the problems: weak areas, dimensions scoring below 50, negative\ndoc lift, retrieval problems, and (if gap analysis was run) specific\nrecommendations with estimated score lift.\n\n## Key diagnostic signals\n\n| Signal                         | What it means                              | What to do                               |\n| ------------------------------ | ------------------------------------------ | ---------------------------------------- |\n| **Negative doc lift**          | Docs are worse than no docs                | Rewrite or remove the offending docs     |\n| **Large retrieval gap**        | Good docs exist but agents can't find them | Improve page titles, metadata, SEO       |\n| **Low code correctness**       | Agents find the docs but produce bad code  | Add or fix code examples                 |\n| **Low doc coverage**           | The docs don't cover what the task needs   | Write new documentation                  |\n| **Efficiency anomaly (>100%)** | Agents do better without gold docs         | Injected docs may be confusing the model |",
+        "body": "## Reading the health of your docs\n\nA report scores each feature area on how well your documentation lets AI coding\ntools implement that feature. Reading those scores well is what turns a number\ninto a plan: it tells you where the docs are working, where they are not, and\nwhat kind of problem you are dealing with.\n\n## Health bands\n\nEach area's score falls into one of three bands:\n\n- **Strong (80 and above)**: docs are working well. Agents produce correct,\n  complete implementations. No action needed unless you see a regression.\n- **Needs attention (70 to 79)**: docs are okay but have gaps. A specific\n  dimension such as code correctness or doc coverage may be dragging the score\n  down. Worth investigating.\n- **Weak (below 70)**: docs are not providing enough support. Agents\n  consistently struggle with these features. These need priority attention.\n\n## Strong areas are signal too\n\nIt is easy to focus only on what is broken, but the strong areas are worth\nreading. They show what good looks like in your docs: clear structure, accurate\nexamples, the patterns agents can follow. When you fix a weak area, that is the\nbar to copy.\n\n## Key diagnostic signals\n\nA low score has a reason behind it. These signals tell you which reason, and\nwhat to do about it:\n\n| Signal                         | What it means                               | What to do                               |\n| ------------------------------ | ------------------------------------------- | ---------------------------------------- |\n| **Negative doc lift**          | Docs are worse than no docs                 | Rewrite or remove the offending docs     |\n| **Large retrieval gap**        | Good docs exist but agents cannot find them | Improve page titles, metadata, structure |\n| **Low code correctness**       | Agents find the docs but produce bad code   | Add or fix code examples                 |\n| **Low doc coverage**           | The docs do not cover what the task needs   | Write new documentation                  |\n| **Efficiency anomaly (>100%)** | Agents do better without the docs           | Injected docs may be confusing the model |\n\n## Where to go next\n\nWhen you know which areas are weak and why, the **Recommendations** view turns\nthat into a ranked list of specific edits, and the **Failure modes** view groups\nthe weaknesses by the kind of documentation problem behind them.",
         "source": "docs/help/interpreting-diagnostics.md",
         "related": [
-            "scoring-model",
-            "weaknesses-recommendations"
+            "recommendations",
+            "failure-modes",
+            "scoring-model"
         ]
     },
     {
@@ -74,6 +86,17 @@ export const HELP_TOPICS = [
             "comparing-runs"
         ]
     },
+    {
+        "id": "recommendations",
+        "title": "Recommendations",
+        "body": "## What this view is for\n\nThis is the \"what do I fix\" view. The scores tell you how well your\ndocumentation supports AI coding tools. This view turns those scores into a\nranked list of specific changes, so you can spend your time on the edits that\nshould move the score the most.\n\nEverything here comes from the same evaluation run you are looking at, and it\npoints at your own documentation pages rather than giving generic advice.\n\n## What you are looking at\n\nRecent reports show a set of **diagnosis cards**. Each card answers one question\nabout the run.\n\n**Top recommendations** is the main card. It opens with a short summary, then\nlists a few suggested changes ranked by priority. Each suggestion has:\n\n- A **priority** tag of high, medium, or low that tells you what to do first.\n- A **title** that names the change in one line.\n- A **description** of the specific fix, usually quoting the exact symbol,\n  query, or pattern involved.\n- A **doc reference** showing which page, and the section when it is known, the\n  change applies to. Every reference points to a real page that was part of this\n  run, so you can open it and start editing.\n\nYou may also see supporting cards:\n\n- **Doc attribution spotlight** shows which documentation pages most influenced\n  the results, and whether each one helped or hurt. Use it to confirm a\n  recommendation is pointing at the right page.\n- **Low-confidence attribution** lists results where the link between a doc and\n  an outcome was uncertain. Treat anything flagged here as a lead to verify, not\n  a settled conclusion.\n- **Regression vs baseline** appears when you are comparing against an earlier\n  run. It shows which areas moved up or down and the likely reason for each\n  change.\n\n## How to use it\n\nWork top down. Start with the high-priority suggestions, open the referenced\npage, and make the change. Priority reflects how much each change is expected to\nhelp, so the top of the list is usually where your effort goes furthest.\n\nThe recommendations are written by a model that reads this run's results. They\nare grounded in your actual docs and cannot reference a page that was not in the\nrun, but they are still suggestions. Read the linked page before acting, and use\nthe confidence signals to decide how much to trust each item.\n\n## Where this comes from\n\nA recommendation is the end of a chain: a test scored low, the grader said why,\nthe run classified that into a failure mode, and this view proposes the edit. If\nyou want to see the failure modes themselves, grouped by category, open the\n**Failure modes** view. If you want the grader's raw notes on the lowest scores,\nopen the **Low-scoring judgments** view.\n\n## Older reports\n\nReports created before the diagnosis cards shipped show a simpler list instead.\nEach row names a feature area, the failure mode behind it, an estimated score\nlift if you fix it, a confidence level, and the tasks that exposed the gap. The\nestimated lift is conservative. It assumes fixing the gap raises the weak\ndimension only to the median of the others, so the real improvement can be\nhigher.\n\n## When this view is empty\n\nIf a report shows no recommendations, the evaluation either ran and found\nnothing worth flagging, or the run predates this feature. A score with no\nrecommendations is usually a good sign, because it means the docs held up across\nthe evaluated tasks.",
+        "source": "docs/help/recommendations.md",
+        "related": [
+            "failure-modes",
+            "interpreting-diagnostics",
+            "scoring-model"
+        ]
+    },
     {
         "id": "retrieval-gap",
         "title": "Retrieval Gap & Infrastructure Efficiency",
@@ -96,17 +119,6 @@ export const HELP_TOPICS = [
             "eval-modes"
         ]
     },
-    {
-        "id": "weaknesses-recommendations",
-        "title": "Weaknesses & Recommendations",
-        "body": "## Understanding weaknesses\n\nThe Issues sub-tab in Diagnostics lists every area or dimension that scored\nbelow threshold. Each weakness entry shows:\n\n- **The feature area** — Which product feature is affected (e.g., GROQ,\n  Functions, Webhooks).\n- **The bottleneck dimension** — Which scoring dimension is dragging the area\n  down: task completion, code correctness, or doc coverage.\n- **The score** — How far below threshold the dimension scored.\n\n## Gap analysis recommendations\n\nWhen an evaluation runs with gap analysis enabled, the dashboard shows\n**prioritized recommendations** — specific actions ranked by estimated impact.\n\nEach recommendation includes:\n\n- **Failure mode** — The type of doc problem identified:\n  - `missing-docs` — The functionality isn't documented at all.\n  - `incorrect-docs` — The docs contain factual errors.\n  - `outdated-docs` — The docs describe an old API version or pattern.\n  - `poor-structure` — The docs exist but are hard to find or understand.\n- **Estimated lift** — How many score points fixing this gap would add. Based on\n  raising the bottleneck dimension to the median of non-bottleneck dimensions.\n  Conservative estimate — actual improvement may be higher.\n- **Confidence** — How sure the analysis is about this diagnosis (high, medium,\n  or low).\n- **Affected tasks** — Which specific evaluation tasks exposed this gap.\n\n## Diagnosis cards\n\nEvery published report now carries a **diagnosis artifact** — a set of cards\nproduced by the post-pipeline hook (`ailf interpret`). The Studio diagnosis\npanel renders these cards directly; the dashboard's Recommendations and\nFailure-modes panels migrate to the same source in a follow-up.\n\nThe hook runs by default for every pipeline invocation. To opt out for a single\nrun, pass `--no-summary`; to opt out in CI, set `AILF_INTERPRET_ON_RUN=0` in the\nworkflow env block; to opt out project-wide, set `summary.onRun: never` in\n`.ailf/config.yaml`.\n\n## Low-scoring judgments\n\nBelow the recommendations, you'll find the **grader's explanations** for tests\nthat scored below 70. These are the raw assessments from the grading model\nexplaining exactly what went wrong — missing API calls, incorrect patterns,\nhallucinated features, etc.\n\nEach judgment shows the task, the dimension, the score, and the grader's natural\nlanguage reason. These are the most granular diagnostic signal available and\noften point directly to the doc section that needs fixing.",
-        "source": "docs/help/weaknesses-recommendations.md",
-        "related": [
-            "interpreting-diagnostics",
-            "scoring-model",
-            "negative-doc-lift"
-        ]
-    },
     {
         "id": "how-agents-work",
         "title": "How AI Agents Find Documentation",

package/dist/_vendor/ailf-shared/index.d.ts CHANGED Viewed

@@ -18,7 +18,7 @@
  * surface against future regressions.
  */
 export { computeCanaryDrift, type CanaryDriftReport, type CanaryReportSlim, type DriftEntry, type DriftThresholds, type DriftVerdict, } from "./canary-drift.js";
-export { type DocumentRef } from "./document-ref.js";
+export { buildContextDocPath, type DocumentRef } from "./document-ref.js";
 export { makeEditorialReference, type EditorialReference, type MakeEditorialReferenceArgs, } from "./editorial-reference.js";
 export { isKnownEventType, KNOWN_EVENT_TYPES, type EventType, type KnownEventType, } from "./event-types.js";
 export { FEATURE_FLAGS, type FeatureFlag, type FeatureFlagKey, } from "./feature-flags.js";

package/dist/_vendor/ailf-shared/index.js CHANGED Viewed

@@ -18,6 +18,7 @@
  * surface against future regressions.
  */
 export { computeCanaryDrift, } from "./canary-drift.js";
+export { buildContextDocPath } from "./document-ref.js";
 export { makeEditorialReference, } from "./editorial-reference.js";
 export { isKnownEventType, KNOWN_EVENT_TYPES, } from "./event-types.js";
 export { FEATURE_FLAGS, } from "./feature-flags.js";

package/dist/_vendor/ailf-shared/owner-teams.js CHANGED Viewed

@@ -14,11 +14,21 @@
  * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
  */
 export const KNOWN_OWNER_TEAMS = [
+    "ai-growth",
+    "billing-and-integrations",
+    "content-agent",
     "content-lake",
-    "core-docs",
-    "growth",
-    "media",
-    "platform",
+    "data",
+    "design-and-research",
+    "docs",
+    "editorial-experience",
+    "engineering",
+    "identity",
+    "media-library",
+    "product",
+    "runtime",
+    "sdk",
+    "ssi",
     "studio",
 ];
 /**
@@ -26,8 +36,11 @@ export const KNOWN_OWNER_TEAMS = [
  * drift has been observed belong here. Unknown values pass through.
  */
 const OWNER_TEAM_ALIASES = {
-    coredocs: "core-docs",
-    docs: "core-docs",
+    "core-docs": "docs",
+    coredocs: "docs",
+    documentation: "docs",
+    growth: "ai-growth",
+    media: "media-library",
     studio_team: "studio",
     "studio-team": "studio",
 };

package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js CHANGED Viewed

@@ -16,6 +16,7 @@
 import { mkdirSync, writeFileSync } from "fs";
 import { join } from "path";
 import { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
+import { buildContextDocPath } from "../../_vendor/ailf-shared/index.js";
 import { fetchUrlContent, } from "../../pipeline/fetch-url-content.js";
 import { createPerspectiveClient, createPublishedClient, getSanityClient, } from "../../sanity/client.js";
 import { extractSymbolsForDoc, renderDocument, } from "../../sanity/document-renderers.js";
@@ -376,7 +377,20 @@ export class SanityDocFetcher {
             : getSanityClient(toSanityOverrides(source));
         const allMetadata = await client.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: [...allSlugs] });
         return allMetadata
-            .map((m) => ({ _id: m._id, _rev: m._rev, slug: m.slug, title: m.title }))
+            .map((m) => {
+            const path = buildContextDocPath({
+                sectionSlug: m.sectionSlug,
+                slug: m.slug,
+            });
+            return {
+                _id: m._id,
+                _rev: m._rev,
+                slug: m.slug,
+                ...(m.sectionSlug ? { sectionSlug: m.sectionSlug } : {}),
+                ...(path ? { path } : {}),
+                title: m.title,
+            };
+        })
             .sort((a, b) => a.slug.localeCompare(b.slug));
     }
     // -----------------------------------------------------------------------

package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts CHANGED Viewed

@@ -76,9 +76,9 @@ export declare const GraderJudgmentSchema: z.ZodObject<{
         documentId: z.ZodString;
         slug: z.ZodOptional<z.ZodString>;
         role: z.ZodEnum<{
+            missing: "missing";
             supports: "supports";
             contradicts: "contradicts";
-            missing: "missing";
             irrelevant: "irrelevant";
         }>;
         hallucinated: z.ZodOptional<z.ZodBoolean>;
@@ -145,9 +145,9 @@ export declare const GraderEmittedJudgmentSchema: z.ZodObject<{
         documentId: z.ZodString;
         slug: z.ZodOptional<z.ZodString>;
         role: z.ZodEnum<{
+            missing: "missing";
             supports: "supports";
             contradicts: "contradicts";
-            missing: "missing";
             irrelevant: "irrelevant";
         }>;
         hallucinated: z.ZodOptional<z.ZodBoolean>;

package/dist/adapters/task-sources/content-lake-task-source.js CHANGED Viewed

@@ -15,6 +15,7 @@
  * @see packages/core/src/ports/task-source.ts — TaskSource port
  * @see docs/decisions/D0038-content-lake-authorable-task-modes.md
  */
+import { buildContextDocPath } from "../../_vendor/ailf-shared/index.js";
 import { filterByChangedDocs } from "./changed-docs-filter.js";
 import { ContentLakeAuthorableTaskSchema } from "./repo-schemas.js";
 // ---------------------------------------------------------------------------
@@ -223,9 +224,11 @@ function mapCanonicalDocRef(raw) {
         case "slug":
             return raw.slug ? { slug: raw.slug, reason } : null;
         case "path": {
-            // Prefer explicit path field; fall back to deriving from doc reference
-            const path = raw.path ||
-                (raw.sectionSlug && raw.slug ? `${raw.sectionSlug}/${raw.slug}` : null);
+            const path = buildContextDocPath({
+                path: raw.path,
+                sectionSlug: raw.sectionSlug,
+                slug: raw.slug,
+            });
             return path ? { path, reason } : null;
         }
         case "id": {
@@ -233,10 +236,12 @@ function mapCanonicalDocRef(raw) {
             const id = raw.docId || raw.docRefId || null;
             if (!id)
                 return null;
-            // Carry slug and derived path as optional DX annotations
-            const derivedPath = raw.sectionSlug && raw.slug
-                ? `${raw.sectionSlug}/${raw.slug}`
-                : undefined;
+            // Carry slug and derived path as optional DX annotations — single
+            // source of truth in `buildContextDocPath` (@sanity/ailf-shared).
+            const derivedPath = buildContextDocPath({
+                sectionSlug: raw.sectionSlug,
+                slug: raw.slug,
+            });
             return {
                 id,
                 reason,

package/dist/orchestration/steps/compute-attribution-step.d.ts CHANGED Viewed

@@ -35,10 +35,10 @@
  * @see docs/decisions/D0050-per-entry-attribution-layout.md
  * @see docs/decisions/D0052-judgment-ref-granularity.md
  */
-import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
+import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
 export declare class ComputeAttributionStep implements PipelineStep {
     readonly name = "compute-attribution";
     readonly optional = true;
     check(ctx: AppContext): ValidationIssue[];
-    execute(ctx: AppContext, _state?: unknown): Promise<StepResult>;
+    execute(ctx: AppContext, state?: PipelineState): Promise<StepResult>;
 }

package/dist/orchestration/steps/compute-attribution-step.js CHANGED Viewed

@@ -40,6 +40,7 @@ import { resolve } from "node:path";
 import { isSlugRef } from "../../_vendor/ailf-core/index.js";
 import { calibrationSetVersion, embeddingModel, ensembleVersion, } from "../../pipeline/attribution.js";
 import { V0_WEIGHTS, computeJudgmentAttribution, } from "../../pipeline/compute-attribution.js";
+import { classifyEnrichmentInputs, degradedEnrichmentError, } from "../../pipeline/enrichment-preconditions.js";
 // ---------------------------------------------------------------------------
 // Step implementation
 // ---------------------------------------------------------------------------
@@ -79,12 +80,26 @@ export class ComputeAttributionStep {
         }
         return issues;
     }
-    async execute(ctx, _state) {
+    async execute(ctx, state) {
         const start = Date.now();
         const root = ctx.config.rootDir;
         const judgmentsPath = resolve(root, "results", "latest", "grader-judgments.json");
         const summaryPath = resolve(root, "results", "latest", "score-summary.json");
-        if (!existsSync(judgmentsPath)) {
+        // Mirror gap-analysis: a full eval that scored tests but persisted no
+        // grader judgments is a degraded run, not a benign skip. Fail loud so the
+        // outcome surfaces in pipeline-result and on the job document. A remote
+        // cache hit restores score-summary.json without grader-judgments.json, so
+        // its missing judgments are legitimate — never fail loud on a cache hit.
+        const fromRemoteCache = (state?.remoteCacheHits?.size ?? 0) > 0;
+        const inputs = classifyEnrichmentInputs(root);
+        if (inputs.kind === "judgments-missing-after-eval" && !fromRemoteCache) {
+            return {
+                durationMs: Date.now() - start,
+                status: "failed",
+                error: degradedEnrichmentError("compute-attribution", inputs.scoredTestCount),
+            };
+        }
+        if (inputs.kind !== "ready") {
             return { status: "skipped", reason: "No grader-judgments.json" };
         }
         if (!existsSync(summaryPath)) {

package/dist/orchestration/steps/gap-analysis-step.d.ts CHANGED Viewed

@@ -14,10 +14,10 @@
  *
  * This is an optional step — failure doesn't stop the pipeline.
  */
-import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
+import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
 export declare class GapAnalysisStep implements PipelineStep {
     readonly name = "gap-analysis";
     readonly optional = true;
     check(ctx: AppContext): ValidationIssue[];
-    execute(ctx: AppContext): Promise<StepResult>;
+    execute(ctx: AppContext, state?: PipelineState): Promise<StepResult>;
 }