@sanity/ailf 5.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/config/diagnosis-cards.ts +318 -0
  2. package/config/models.ts +12 -0
  3. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
  4. package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
  5. package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
  6. package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
  7. package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
  8. package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
  9. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
  10. package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
  11. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
  12. package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
  13. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
  14. package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
  15. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  16. package/dist/_vendor/ailf-core/index.js +4 -0
  17. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
  18. package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
  19. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
  20. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
  21. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
  22. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +171 -0
  23. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
  24. package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
  25. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
  26. package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
  27. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
  28. package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +104 -0
  29. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
  30. package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +96 -0
  31. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +39 -0
  32. package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +52 -0
  33. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
  34. package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +77 -0
  35. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
  36. package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
  37. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
  38. package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +126 -0
  39. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
  40. package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +107 -0
  41. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
  42. package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +114 -0
  43. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
  44. package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +273 -0
  45. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
  46. package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
  47. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
  48. package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
  49. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
  50. package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
  51. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
  52. package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
  53. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
  54. package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
  55. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +16 -0
  56. package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +86 -0
  57. package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +10 -0
  58. package/dist/_vendor/ailf-core/services/diagnosis/registry.js +10 -0
  59. package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +119 -2
  60. package/dist/_vendor/ailf-core/services/diagnosis-runner.js +136 -2
  61. package/dist/_vendor/ailf-core/services/index.d.ts +5 -1
  62. package/dist/_vendor/ailf-core/services/index.js +15 -2
  63. package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
  64. package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
  65. package/dist/_vendor/ailf-core/types/diagnosis.d.ts +112 -10
  66. package/dist/_vendor/ailf-core/types/diagnosis.js +3 -1
  67. package/dist/_vendor/ailf-core/types/index.d.ts +1 -1
  68. package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
  69. package/dist/adapters/llm/fake-llm-client.js +38 -1
  70. package/dist/adapters/llm/openai-llm-client.js +52 -3
  71. package/dist/cli-program.js +3 -0
  72. package/dist/commands/interpret.d.ts +50 -0
  73. package/dist/commands/interpret.js +212 -0
  74. package/dist/composition-root.d.ts +21 -23
  75. package/dist/composition-root.js +107 -41
  76. package/dist/config/diagnosis-cards.ts +318 -0
  77. package/dist/config/models.ts +12 -0
  78. package/dist/grader/agent-harness.d.ts +5 -10
  79. package/dist/grader/agent-harness.js +5 -13
  80. package/dist/grader/common.d.ts +5 -13
  81. package/dist/grader/common.js +5 -17
  82. package/dist/grader/index.d.ts +15 -29
  83. package/dist/grader/index.js +15 -66
  84. package/dist/grader/knowledge-probe.d.ts +5 -10
  85. package/dist/grader/knowledge-probe.js +5 -14
  86. package/dist/grader/literacy.d.ts +5 -9
  87. package/dist/grader/literacy.js +5 -13
  88. package/dist/grader/mcp.d.ts +5 -10
  89. package/dist/grader/mcp.js +5 -14
  90. package/package.json +2 -2
@@ -1,21 +1,9 @@
1
1
  /**
2
- * Cross-cutting failure modes — valid for any dimension family.
2
+ * Cross-cutting failure modes — re-export shim (D-05).
3
3
  *
4
- * Phase 3 GRAD-03 (Plan 03-02). The four cross-cutting modes capture failures
5
- * that aren't tied to a specific dimension family: infrastructure failures,
6
- * model ceiling effects, false-floor (model already knew the answer; docs
7
- * added no value), and the low-confidence fallback. The per-dimension
8
- * taxonomies (literacy, MCP, knowledge-probe, agent-harness) extend this
9
- * cross-cutting list.
4
+ * Canonical data relocated to @sanity/ailf-core.
5
+ * Existing callers of this file continue to work unchanged.
10
6
  *
11
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
12
- * §"Per-dimension failure-mode taxonomies" (lines 239-283 — the v0 lists)
13
- * @see docs/decisions/D0005-grader-model-separation.md — single grader model;
14
- * taxonomies travel with the rubric prompt for reproducibility.
7
+ * @see packages/core/src/grader/failure-modes/common.ts
15
8
  */
16
- export const COMMON_FAILURE_MODES = [
17
- "api-error", // infrastructure failure, not a docs problem
18
- "model-limitation", // high ceiling, model can't reach it
19
- "false-floor", // model already knew the answer; docs added no value
20
- "unclassified", // grader could not pick a mode (low-confidence fallback)
21
- ];
9
+ export { COMMON_FAILURE_MODES } from "../_vendor/ailf-core/index.js";
@@ -1,38 +1,24 @@
1
1
  /**
2
2
  * Per-dimension failure-mode taxonomy barrel.
3
3
  *
4
+ * D-05: taxonomy data relocated to @sanity/ailf-core so card files in
5
+ * packages/core/src/services/diagnosis/cards/ can import without violating
6
+ * the core→eval import direction rule.
7
+ *
8
+ * This file is now a re-export shim — all behavior lives in
9
+ * packages/core/src/grader/failure-modes/. Existing eval-side callers
10
+ * (rubrics.ts, rubric-resolution.ts, calibration.test.ts) continue to
11
+ * work with zero source changes.
12
+ *
4
13
  * Named re-exports only (W0124 — never `export *`).
5
14
  *
6
15
  * Consumers:
7
- * - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()` to
8
- * stamp a per-template legal-mode list onto every rubric template entry.
9
- * - `packages/eval/src/pipeline/compiler/rubric-resolution.ts` — reads
10
- * `template.failureModes` at prompt-assembly time and announces the legal
11
- * modes to the grader before the structured-shape footer (Plan 03-01).
12
- * - `packages/eval/src/grader/__tests__/calibration.test.ts` — fixture-driven
13
- * ≥90% non-`unclassified` static calibration check (ROADMAP success
14
- * criterion 1).
16
+ * - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()`
17
+ * - `packages/eval/src/pipeline/compiler/rubric-resolution.ts`
18
+ * - `packages/eval/src/grader/__tests__/calibration.test.ts`
15
19
  *
20
+ * @see packages/core/src/grader/failure-modes/index.ts — canonical location
16
21
  * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
17
- * §"Per-dimension failure-mode taxonomies" (lines 239-283).
18
- * @see docs/decisions/D0005-grader-model-separation.md — single grader model;
19
- * taxonomies travel with the rubric prompt for reproducibility.
20
- */
21
- export { COMMON_FAILURE_MODES, type CommonFailureMode } from "./common.js";
22
- export { LITERACY_FAILURE_MODES, type LiteracyFailureMode } from "./literacy.js";
23
- export { MCP_FAILURE_MODES, type MCPFailureMode } from "./mcp.js";
24
- export { KP_FAILURE_MODES, type KPFailureMode } from "./knowledge-probe.js";
25
- export { AGENT_FAILURE_MODES, type AgentFailureMode } from "./agent-harness.js";
26
- /**
27
- * Return the legal failure-mode list for a given rubric dimension.
28
- *
29
- * Accepts both family-level keys (`mcp-behavior`, `knowledge-probe`,
30
- * `agent-harness`) and the per-template `dimension` strings used in
31
- * `config/rubrics.ts` (`task-completion`, `input-validation`,
32
- * `factual-correctness`, `process-quality`, …). The cross-cutting
33
- * `COMMON_FAILURE_MODES` is always included.
34
- *
35
- * Unknown dimensions fall through to `COMMON_FAILURE_MODES` only — safe
36
- * default, the grader can still pick `unclassified`.
22
+ * @see docs/decisions/D0005-grader-model-separation.md
37
23
  */
38
- export declare function failureModesForDimension(dimension: string): readonly string[];
24
+ export { AGENT_FAILURE_MODES, CANONICAL_DIMENSIONS, COMMON_FAILURE_MODES, KP_FAILURE_MODES, LITERACY_FAILURE_MODES, MCP_FAILURE_MODES, failureModesForDimension, isCanonicalFailureMode, type AgentFailureMode, type CommonFailureMode, type KPFailureMode, type LiteracyFailureMode, type MCPFailureMode, } from "../_vendor/ailf-core/index.d.ts";
@@ -1,75 +1,24 @@
1
1
  /**
2
2
  * Per-dimension failure-mode taxonomy barrel.
3
3
  *
4
+ * D-05: taxonomy data relocated to @sanity/ailf-core so card files in
5
+ * packages/core/src/services/diagnosis/cards/ can import without violating
6
+ * the core→eval import direction rule.
7
+ *
8
+ * This file is now a re-export shim — all behavior lives in
9
+ * packages/core/src/grader/failure-modes/. Existing eval-side callers
10
+ * (rubrics.ts, rubric-resolution.ts, calibration.test.ts) continue to
11
+ * work with zero source changes.
12
+ *
4
13
  * Named re-exports only (W0124 — never `export *`).
5
14
  *
6
15
  * Consumers:
7
- * - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()` to
8
- * stamp a per-template legal-mode list onto every rubric template entry.
9
- * - `packages/eval/src/pipeline/compiler/rubric-resolution.ts` — reads
10
- * `template.failureModes` at prompt-assembly time and announces the legal
11
- * modes to the grader before the structured-shape footer (Plan 03-01).
12
- * - `packages/eval/src/grader/__tests__/calibration.test.ts` — fixture-driven
13
- * ≥90% non-`unclassified` static calibration check (ROADMAP success
14
- * criterion 1).
16
+ * - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()`
17
+ * - `packages/eval/src/pipeline/compiler/rubric-resolution.ts`
18
+ * - `packages/eval/src/grader/__tests__/calibration.test.ts`
15
19
  *
20
+ * @see packages/core/src/grader/failure-modes/index.ts — canonical location
16
21
  * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
17
- * §"Per-dimension failure-mode taxonomies" (lines 239-283).
18
- * @see docs/decisions/D0005-grader-model-separation.md — single grader model;
19
- * taxonomies travel with the rubric prompt for reproducibility.
20
- */
21
- export { COMMON_FAILURE_MODES } from "./common.js";
22
- export { LITERACY_FAILURE_MODES } from "./literacy.js";
23
- export { MCP_FAILURE_MODES } from "./mcp.js";
24
- export { KP_FAILURE_MODES } from "./knowledge-probe.js";
25
- export { AGENT_FAILURE_MODES } from "./agent-harness.js";
26
- import { COMMON_FAILURE_MODES } from "./common.js";
27
- import { LITERACY_FAILURE_MODES } from "./literacy.js";
28
- import { MCP_FAILURE_MODES } from "./mcp.js";
29
- import { KP_FAILURE_MODES } from "./knowledge-probe.js";
30
- import { AGENT_FAILURE_MODES } from "./agent-harness.js";
31
- /**
32
- * Return the legal failure-mode list for a given rubric dimension.
33
- *
34
- * Accepts both family-level keys (`mcp-behavior`, `knowledge-probe`,
35
- * `agent-harness`) and the per-template `dimension` strings used in
36
- * `config/rubrics.ts` (`task-completion`, `input-validation`,
37
- * `factual-correctness`, `process-quality`, …). The cross-cutting
38
- * `COMMON_FAILURE_MODES` is always included.
39
- *
40
- * Unknown dimensions fall through to `COMMON_FAILURE_MODES` only — safe
41
- * default, the grader can still pick `unclassified`.
22
+ * @see docs/decisions/D0005-grader-model-separation.md
42
23
  */
43
- export function failureModesForDimension(dimension) {
44
- switch (dimension) {
45
- // ── Literacy family ──────────────────────────────────────
46
- case "task-completion":
47
- case "code-correctness":
48
- case "doc-coverage":
49
- return [...COMMON_FAILURE_MODES, ...LITERACY_FAILURE_MODES];
50
- // ── MCP family ───────────────────────────────────────────
51
- // `mcp-behavior` is the family-level key (profile / depends-on
52
- // shorthand). The per-template `dimension` strings are the four
53
- // entries from config/rubrics.ts mcp-* templates.
54
- case "mcp-behavior":
55
- case "input-validation":
56
- case "output-correctness":
57
- case "error-handling":
58
- case "security":
59
- return [...COMMON_FAILURE_MODES, ...MCP_FAILURE_MODES];
60
- // ── Knowledge-probe family ───────────────────────────────
61
- case "knowledge-probe":
62
- case "factual-correctness":
63
- case "completeness":
64
- case "currency":
65
- return [...COMMON_FAILURE_MODES, ...KP_FAILURE_MODES];
66
- // ── Agent-harness family ─────────────────────────────────
67
- case "agent-harness":
68
- case "process-quality":
69
- case "agent-output":
70
- case "tool-usage":
71
- return [...COMMON_FAILURE_MODES, ...AGENT_FAILURE_MODES];
72
- default:
73
- return COMMON_FAILURE_MODES;
74
- }
75
- }
24
+ export { AGENT_FAILURE_MODES, CANONICAL_DIMENSIONS, COMMON_FAILURE_MODES, KP_FAILURE_MODES, LITERACY_FAILURE_MODES, MCP_FAILURE_MODES, failureModesForDimension, isCanonicalFailureMode, } from "../_vendor/ailf-core/index.js";
@@ -1,14 +1,9 @@
1
1
  /**
2
- * Knowledge-probe failure modes — valid for the `knowledge-probe` dimension
3
- * family (factual-correctness, completeness, currency).
2
+ * Knowledge-probe failure modes — re-export shim (D-05).
4
3
  *
5
- * Phase 3 GRAD-03 (Plan 03-02). Knowledge-probe failures track the model's
6
- * ability to recall facts about Sanity's surface area; the v0 modes
7
- * differentiate factual errors from omissions, currency drift, and
8
- * hallucination.
4
+ * Canonical data relocated to @sanity/ailf-core.
5
+ * Existing callers of this file continue to work unchanged.
9
6
  *
10
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
11
- * §"Per-dimension failure-mode taxonomies" (lines 239-283).
7
+ * @see packages/core/src/grader/failure-modes/knowledge-probe.ts
12
8
  */
13
- export declare const KP_FAILURE_MODES: readonly ["factual-error", "incompleteness", "currency-violation", "hallucination"];
14
- export type KPFailureMode = (typeof KP_FAILURE_MODES)[number];
9
+ export { KP_FAILURE_MODES, type KPFailureMode } from "../_vendor/ailf-core/index.d.ts";
@@ -1,18 +1,9 @@
1
1
  /**
2
- * Knowledge-probe failure modes — valid for the `knowledge-probe` dimension
3
- * family (factual-correctness, completeness, currency).
2
+ * Knowledge-probe failure modes — re-export shim (D-05).
4
3
  *
5
- * Phase 3 GRAD-03 (Plan 03-02). Knowledge-probe failures track the model's
6
- * ability to recall facts about Sanity's surface area; the v0 modes
7
- * differentiate factual errors from omissions, currency drift, and
8
- * hallucination.
4
+ * Canonical data relocated to @sanity/ailf-core.
5
+ * Existing callers of this file continue to work unchanged.
9
6
  *
10
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
11
- * §"Per-dimension failure-mode taxonomies" (lines 239-283).
7
+ * @see packages/core/src/grader/failure-modes/knowledge-probe.ts
12
8
  */
13
- export const KP_FAILURE_MODES = [
14
- "factual-error", // assistant asserts something demonstrably false
15
- "incompleteness", // assistant covers part of the answer; misses key piece
16
- "currency-violation", // assistant cites stale facts beyond doc currency horizon
17
- "hallucination", // assistant invents details not present in any doc
18
- ];
9
+ export { KP_FAILURE_MODES } from "../_vendor/ailf-core/index.js";
@@ -1,13 +1,9 @@
1
1
  /**
2
- * Literacy failure modes — valid for `task-completion`, `code-correctness`,
3
- * `doc-coverage` (the literacy dimension family).
2
+ * Literacy failure modes — re-export shim (D-05).
4
3
  *
5
- * Phase 3 GRAD-03 (Plan 03-02). The v0 list is the four canonical
6
- * documentation-quality failure modes. Conservative on purpose; calibration
7
- * (Plan 03-02 Task 3) reveals whether expansion is needed in a follow-on.
4
+ * Canonical data relocated to @sanity/ailf-core.
5
+ * Existing callers of this file continue to work unchanged.
8
6
  *
9
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
10
- * §"Per-dimension failure-mode taxonomies" (lines 239-283).
7
+ * @see packages/core/src/grader/failure-modes/literacy.ts
11
8
  */
12
- export declare const LITERACY_FAILURE_MODES: readonly ["missing-docs", "outdated-docs", "incorrect-docs", "poor-structure"];
13
- export type LiteracyFailureMode = (typeof LITERACY_FAILURE_MODES)[number];
9
+ export { LITERACY_FAILURE_MODES, type LiteracyFailureMode, } from "../_vendor/ailf-core/index.d.ts";
@@ -1,17 +1,9 @@
1
1
  /**
2
- * Literacy failure modes — valid for `task-completion`, `code-correctness`,
3
- * `doc-coverage` (the literacy dimension family).
2
+ * Literacy failure modes — re-export shim (D-05).
4
3
  *
5
- * Phase 3 GRAD-03 (Plan 03-02). The v0 list is the four canonical
6
- * documentation-quality failure modes. Conservative on purpose; calibration
7
- * (Plan 03-02 Task 3) reveals whether expansion is needed in a follow-on.
4
+ * Canonical data relocated to @sanity/ailf-core.
5
+ * Existing callers of this file continue to work unchanged.
8
6
  *
9
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
10
- * §"Per-dimension failure-mode taxonomies" (lines 239-283).
7
+ * @see packages/core/src/grader/failure-modes/literacy.ts
11
8
  */
12
- export const LITERACY_FAILURE_MODES = [
13
- "missing-docs", // relevant doc didn't exist
14
- "outdated-docs", // doc reflects an older API/version
15
- "incorrect-docs", // doc states something factually wrong
16
- "poor-structure", // doc exists but is hard to find or follow
17
- ];
9
+ export { LITERACY_FAILURE_MODES, } from "../_vendor/ailf-core/index.js";
@@ -1,14 +1,9 @@
1
1
  /**
2
- * MCP failure modes — valid for the `mcp-behavior` dimension family
3
- * (input-validation, output-correctness, error-handling, security).
2
+ * MCP failure modes — re-export shim (D-05).
4
3
  *
5
- * Phase 3 GRAD-03 (Plan 03-02). `missing-docs` is intentionally re-exported
6
- * from the literacy family MCP server tasks frequently fail because the
7
- * MCP spec itself is under-documented; that's a literacy failure even when
8
- * surfaced through MCP grading.
4
+ * Canonical data relocated to @sanity/ailf-core.
5
+ * Existing callers of this file continue to work unchanged.
9
6
  *
10
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
11
- * §"Per-dimension failure-mode taxonomies" (lines 239-283).
7
+ * @see packages/core/src/grader/failure-modes/mcp.ts
12
8
  */
13
- export declare const MCP_FAILURE_MODES: readonly ["spec-mismatch", "missing-error-handling", "over-privileged", "missing-docs"];
14
- export type MCPFailureMode = (typeof MCP_FAILURE_MODES)[number];
9
+ export { MCP_FAILURE_MODES, type MCPFailureMode } from "../_vendor/ailf-core/index.d.ts";
@@ -1,18 +1,9 @@
1
1
  /**
2
- * MCP failure modes — valid for the `mcp-behavior` dimension family
3
- * (input-validation, output-correctness, error-handling, security).
2
+ * MCP failure modes — re-export shim (D-05).
4
3
  *
5
- * Phase 3 GRAD-03 (Plan 03-02). `missing-docs` is intentionally re-exported
6
- * from the literacy family MCP server tasks frequently fail because the
7
- * MCP spec itself is under-documented; that's a literacy failure even when
8
- * surfaced through MCP grading.
4
+ * Canonical data relocated to @sanity/ailf-core.
5
+ * Existing callers of this file continue to work unchanged.
9
6
  *
10
- * @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
11
- * §"Per-dimension failure-mode taxonomies" (lines 239-283).
7
+ * @see packages/core/src/grader/failure-modes/mcp.ts
12
8
  */
13
- export const MCP_FAILURE_MODES = [
14
- "spec-mismatch", // tool/server output doesn't match published MCP spec
15
- "missing-error-handling", // tool failure path under-documented or absent
16
- "over-privileged", // tool exposes operations the doc didn't sanction
17
- "missing-docs", // re-export from literacy (cross-cutting)
18
- ];
9
+ export { MCP_FAILURE_MODES } from "../_vendor/ailf-core/index.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "5.0.0",
3
+ "version": "6.0.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -61,7 +61,7 @@
61
61
  },
62
62
  "scripts": {
63
63
  "build": "tsc && tsc -p tsconfig.scripts.json && tsx scripts/bundle-workspace-deps.ts",
64
- "generate-configs": "tsx src/cli.ts generate-configs",
64
+ "generate-configs": "tsx src/cli.ts generate-configs && tsx scripts/generate-diagnosis-config.ts",
65
65
  "fetch-docs": "tsx src/cli.ts fetch-docs",
66
66
  "measure-retrieval": "tsx src/cli.ts measure-retrieval",
67
67
  "eval": "tsx src/cli.ts eval",