@sanity/ailf 5.0.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/diagnosis-cards.ts +318 -0
- package/config/models.ts +12 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +4 -0
- package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +171 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +104 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +96 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +39 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +52 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +77 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +126 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +107 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +114 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +273 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +16 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +86 -0
- package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +10 -0
- package/dist/_vendor/ailf-core/services/diagnosis/registry.js +10 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +119 -2
- package/dist/_vendor/ailf-core/services/diagnosis-runner.js +136 -2
- package/dist/_vendor/ailf-core/services/index.d.ts +5 -1
- package/dist/_vendor/ailf-core/services/index.js +15 -2
- package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
- package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
- package/dist/_vendor/ailf-core/types/diagnosis.d.ts +112 -10
- package/dist/_vendor/ailf-core/types/diagnosis.js +3 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +1 -1
- package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
- package/dist/adapters/llm/fake-llm-client.js +38 -1
- package/dist/adapters/llm/openai-llm-client.js +52 -3
- package/dist/cli-program.js +3 -0
- package/dist/commands/interpret.d.ts +50 -0
- package/dist/commands/interpret.js +212 -0
- package/dist/composition-root.d.ts +21 -23
- package/dist/composition-root.js +107 -41
- package/dist/config/diagnosis-cards.ts +318 -0
- package/dist/config/models.ts +12 -0
- package/dist/grader/agent-harness.d.ts +5 -10
- package/dist/grader/agent-harness.js +5 -13
- package/dist/grader/common.d.ts +5 -13
- package/dist/grader/common.js +5 -17
- package/dist/grader/index.d.ts +15 -29
- package/dist/grader/index.js +15 -66
- package/dist/grader/knowledge-probe.d.ts +5 -10
- package/dist/grader/knowledge-probe.js +5 -14
- package/dist/grader/literacy.d.ts +5 -9
- package/dist/grader/literacy.js +5 -13
- package/dist/grader/mcp.d.ts +5 -10
- package/dist/grader/mcp.js +5 -14
- package/package.json +2 -2
package/dist/grader/common.js
CHANGED
|
@@ -1,21 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Cross-cutting failure modes —
|
|
2
|
+
* Cross-cutting failure modes — re-export shim (D-05).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* model ceiling effects, false-floor (model already knew the answer; docs
|
|
7
|
-
* added no value), and the low-confidence fallback. The per-dimension
|
|
8
|
-
* taxonomies (literacy, MCP, knowledge-probe, agent-harness) extend this
|
|
9
|
-
* cross-cutting list.
|
|
4
|
+
* Canonical data relocated to @sanity/ailf-core.
|
|
5
|
+
* Existing callers of this file continue to work unchanged.
|
|
10
6
|
*
|
|
11
|
-
* @see
|
|
12
|
-
* §"Per-dimension failure-mode taxonomies" (lines 239-283 — the v0 lists)
|
|
13
|
-
* @see docs/decisions/D0005-grader-model-separation.md — single grader model;
|
|
14
|
-
* taxonomies travel with the rubric prompt for reproducibility.
|
|
7
|
+
* @see packages/core/src/grader/failure-modes/common.ts
|
|
15
8
|
*/
|
|
16
|
-
export
|
|
17
|
-
"api-error", // infrastructure failure, not a docs problem
|
|
18
|
-
"model-limitation", // high ceiling, model can't reach it
|
|
19
|
-
"false-floor", // model already knew the answer; docs added no value
|
|
20
|
-
"unclassified", // grader could not pick a mode (low-confidence fallback)
|
|
21
|
-
];
|
|
9
|
+
export { COMMON_FAILURE_MODES } from "../_vendor/ailf-core/index.js";
|
package/dist/grader/index.d.ts
CHANGED
|
@@ -1,38 +1,24 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Per-dimension failure-mode taxonomy barrel.
|
|
3
3
|
*
|
|
4
|
+
* D-05: taxonomy data relocated to @sanity/ailf-core so card files in
|
|
5
|
+
* packages/core/src/services/diagnosis/cards/ can import without violating
|
|
6
|
+
* the core→eval import direction rule.
|
|
7
|
+
*
|
|
8
|
+
* This file is now a re-export shim — all behavior lives in
|
|
9
|
+
* packages/core/src/grader/failure-modes/. Existing eval-side callers
|
|
10
|
+
* (rubrics.ts, rubric-resolution.ts, calibration.test.ts) continue to
|
|
11
|
+
* work with zero source changes.
|
|
12
|
+
*
|
|
4
13
|
* Named re-exports only (W0124 — never `export *`).
|
|
5
14
|
*
|
|
6
15
|
* Consumers:
|
|
7
|
-
* - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()`
|
|
8
|
-
*
|
|
9
|
-
* - `packages/eval/src/
|
|
10
|
-
* `template.failureModes` at prompt-assembly time and announces the legal
|
|
11
|
-
* modes to the grader before the structured-shape footer (Plan 03-01).
|
|
12
|
-
* - `packages/eval/src/grader/__tests__/calibration.test.ts` — fixture-driven
|
|
13
|
-
* ≥90% non-`unclassified` static calibration check (ROADMAP success
|
|
14
|
-
* criterion 1).
|
|
16
|
+
* - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()`
|
|
17
|
+
* - `packages/eval/src/pipeline/compiler/rubric-resolution.ts`
|
|
18
|
+
* - `packages/eval/src/grader/__tests__/calibration.test.ts`
|
|
15
19
|
*
|
|
20
|
+
* @see packages/core/src/grader/failure-modes/index.ts — canonical location
|
|
16
21
|
* @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
|
|
17
|
-
*
|
|
18
|
-
* @see docs/decisions/D0005-grader-model-separation.md — single grader model;
|
|
19
|
-
* taxonomies travel with the rubric prompt for reproducibility.
|
|
20
|
-
*/
|
|
21
|
-
export { COMMON_FAILURE_MODES, type CommonFailureMode } from "./common.js";
|
|
22
|
-
export { LITERACY_FAILURE_MODES, type LiteracyFailureMode } from "./literacy.js";
|
|
23
|
-
export { MCP_FAILURE_MODES, type MCPFailureMode } from "./mcp.js";
|
|
24
|
-
export { KP_FAILURE_MODES, type KPFailureMode } from "./knowledge-probe.js";
|
|
25
|
-
export { AGENT_FAILURE_MODES, type AgentFailureMode } from "./agent-harness.js";
|
|
26
|
-
/**
|
|
27
|
-
* Return the legal failure-mode list for a given rubric dimension.
|
|
28
|
-
*
|
|
29
|
-
* Accepts both family-level keys (`mcp-behavior`, `knowledge-probe`,
|
|
30
|
-
* `agent-harness`) and the per-template `dimension` strings used in
|
|
31
|
-
* `config/rubrics.ts` (`task-completion`, `input-validation`,
|
|
32
|
-
* `factual-correctness`, `process-quality`, …). The cross-cutting
|
|
33
|
-
* `COMMON_FAILURE_MODES` is always included.
|
|
34
|
-
*
|
|
35
|
-
* Unknown dimensions fall through to `COMMON_FAILURE_MODES` only — safe
|
|
36
|
-
* default, the grader can still pick `unclassified`.
|
|
22
|
+
* @see docs/decisions/D0005-grader-model-separation.md
|
|
37
23
|
*/
|
|
38
|
-
export
|
|
24
|
+
export { AGENT_FAILURE_MODES, CANONICAL_DIMENSIONS, COMMON_FAILURE_MODES, KP_FAILURE_MODES, LITERACY_FAILURE_MODES, MCP_FAILURE_MODES, failureModesForDimension, isCanonicalFailureMode, type AgentFailureMode, type CommonFailureMode, type KPFailureMode, type LiteracyFailureMode, type MCPFailureMode, } from "../_vendor/ailf-core/index.d.ts";
|
package/dist/grader/index.js
CHANGED
|
@@ -1,75 +1,24 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Per-dimension failure-mode taxonomy barrel.
|
|
3
3
|
*
|
|
4
|
+
* D-05: taxonomy data relocated to @sanity/ailf-core so card files in
|
|
5
|
+
* packages/core/src/services/diagnosis/cards/ can import without violating
|
|
6
|
+
* the core→eval import direction rule.
|
|
7
|
+
*
|
|
8
|
+
* This file is now a re-export shim — all behavior lives in
|
|
9
|
+
* packages/core/src/grader/failure-modes/. Existing eval-side callers
|
|
10
|
+
* (rubrics.ts, rubric-resolution.ts, calibration.test.ts) continue to
|
|
11
|
+
* work with zero source changes.
|
|
12
|
+
*
|
|
4
13
|
* Named re-exports only (W0124 — never `export *`).
|
|
5
14
|
*
|
|
6
15
|
* Consumers:
|
|
7
|
-
* - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()`
|
|
8
|
-
*
|
|
9
|
-
* - `packages/eval/src/
|
|
10
|
-
* `template.failureModes` at prompt-assembly time and announces the legal
|
|
11
|
-
* modes to the grader before the structured-shape footer (Plan 03-01).
|
|
12
|
-
* - `packages/eval/src/grader/__tests__/calibration.test.ts` — fixture-driven
|
|
13
|
-
* ≥90% non-`unclassified` static calibration check (ROADMAP success
|
|
14
|
-
* criterion 1).
|
|
16
|
+
* - `packages/eval/config/rubrics.ts` — calls `failureModesForDimension()`
|
|
17
|
+
* - `packages/eval/src/pipeline/compiler/rubric-resolution.ts`
|
|
18
|
+
* - `packages/eval/src/grader/__tests__/calibration.test.ts`
|
|
15
19
|
*
|
|
20
|
+
* @see packages/core/src/grader/failure-modes/index.ts — canonical location
|
|
16
21
|
* @see docs/design-docs/actionability-ladder/03-structured-grader-judgments.md
|
|
17
|
-
*
|
|
18
|
-
* @see docs/decisions/D0005-grader-model-separation.md — single grader model;
|
|
19
|
-
* taxonomies travel with the rubric prompt for reproducibility.
|
|
20
|
-
*/
|
|
21
|
-
export { COMMON_FAILURE_MODES } from "./common.js";
|
|
22
|
-
export { LITERACY_FAILURE_MODES } from "./literacy.js";
|
|
23
|
-
export { MCP_FAILURE_MODES } from "./mcp.js";
|
|
24
|
-
export { KP_FAILURE_MODES } from "./knowledge-probe.js";
|
|
25
|
-
export { AGENT_FAILURE_MODES } from "./agent-harness.js";
|
|
26
|
-
import { COMMON_FAILURE_MODES } from "./common.js";
|
|
27
|
-
import { LITERACY_FAILURE_MODES } from "./literacy.js";
|
|
28
|
-
import { MCP_FAILURE_MODES } from "./mcp.js";
|
|
29
|
-
import { KP_FAILURE_MODES } from "./knowledge-probe.js";
|
|
30
|
-
import { AGENT_FAILURE_MODES } from "./agent-harness.js";
|
|
31
|
-
/**
|
|
32
|
-
* Return the legal failure-mode list for a given rubric dimension.
|
|
33
|
-
*
|
|
34
|
-
* Accepts both family-level keys (`mcp-behavior`, `knowledge-probe`,
|
|
35
|
-
* `agent-harness`) and the per-template `dimension` strings used in
|
|
36
|
-
* `config/rubrics.ts` (`task-completion`, `input-validation`,
|
|
37
|
-
* `factual-correctness`, `process-quality`, …). The cross-cutting
|
|
38
|
-
* `COMMON_FAILURE_MODES` is always included.
|
|
39
|
-
*
|
|
40
|
-
* Unknown dimensions fall through to `COMMON_FAILURE_MODES` only — safe
|
|
41
|
-
* default, the grader can still pick `unclassified`.
|
|
22
|
+
* @see docs/decisions/D0005-grader-model-separation.md
|
|
42
23
|
*/
|
|
43
|
-
export
|
|
44
|
-
switch (dimension) {
|
|
45
|
-
// ── Literacy family ──────────────────────────────────────
|
|
46
|
-
case "task-completion":
|
|
47
|
-
case "code-correctness":
|
|
48
|
-
case "doc-coverage":
|
|
49
|
-
return [...COMMON_FAILURE_MODES, ...LITERACY_FAILURE_MODES];
|
|
50
|
-
// ── MCP family ───────────────────────────────────────────
|
|
51
|
-
// `mcp-behavior` is the family-level key (profile / depends-on
|
|
52
|
-
// shorthand). The per-template `dimension` strings are the four
|
|
53
|
-
// entries from config/rubrics.ts mcp-* templates.
|
|
54
|
-
case "mcp-behavior":
|
|
55
|
-
case "input-validation":
|
|
56
|
-
case "output-correctness":
|
|
57
|
-
case "error-handling":
|
|
58
|
-
case "security":
|
|
59
|
-
return [...COMMON_FAILURE_MODES, ...MCP_FAILURE_MODES];
|
|
60
|
-
// ── Knowledge-probe family ───────────────────────────────
|
|
61
|
-
case "knowledge-probe":
|
|
62
|
-
case "factual-correctness":
|
|
63
|
-
case "completeness":
|
|
64
|
-
case "currency":
|
|
65
|
-
return [...COMMON_FAILURE_MODES, ...KP_FAILURE_MODES];
|
|
66
|
-
// ── Agent-harness family ─────────────────────────────────
|
|
67
|
-
case "agent-harness":
|
|
68
|
-
case "process-quality":
|
|
69
|
-
case "agent-output":
|
|
70
|
-
case "tool-usage":
|
|
71
|
-
return [...COMMON_FAILURE_MODES, ...AGENT_FAILURE_MODES];
|
|
72
|
-
default:
|
|
73
|
-
return COMMON_FAILURE_MODES;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
24
|
+
export { AGENT_FAILURE_MODES, CANONICAL_DIMENSIONS, COMMON_FAILURE_MODES, KP_FAILURE_MODES, LITERACY_FAILURE_MODES, MCP_FAILURE_MODES, failureModesForDimension, isCanonicalFailureMode, } from "../_vendor/ailf-core/index.js";
|
|
@@ -1,14 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Knowledge-probe failure modes —
|
|
3
|
-
* family (factual-correctness, completeness, currency).
|
|
2
|
+
* Knowledge-probe failure modes — re-export shim (D-05).
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* differentiate factual errors from omissions, currency drift, and
|
|
8
|
-
* hallucination.
|
|
4
|
+
* Canonical data relocated to @sanity/ailf-core.
|
|
5
|
+
* Existing callers of this file continue to work unchanged.
|
|
9
6
|
*
|
|
10
|
-
* @see
|
|
11
|
-
* §"Per-dimension failure-mode taxonomies" (lines 239-283).
|
|
7
|
+
* @see packages/core/src/grader/failure-modes/knowledge-probe.ts
|
|
12
8
|
*/
|
|
13
|
-
export
|
|
14
|
-
export type KPFailureMode = (typeof KP_FAILURE_MODES)[number];
|
|
9
|
+
export { KP_FAILURE_MODES, type KPFailureMode } from "../_vendor/ailf-core/index.d.ts";
|
|
@@ -1,18 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Knowledge-probe failure modes —
|
|
3
|
-
* family (factual-correctness, completeness, currency).
|
|
2
|
+
* Knowledge-probe failure modes — re-export shim (D-05).
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* differentiate factual errors from omissions, currency drift, and
|
|
8
|
-
* hallucination.
|
|
4
|
+
* Canonical data relocated to @sanity/ailf-core.
|
|
5
|
+
* Existing callers of this file continue to work unchanged.
|
|
9
6
|
*
|
|
10
|
-
* @see
|
|
11
|
-
* §"Per-dimension failure-mode taxonomies" (lines 239-283).
|
|
7
|
+
* @see packages/core/src/grader/failure-modes/knowledge-probe.ts
|
|
12
8
|
*/
|
|
13
|
-
export
|
|
14
|
-
"factual-error", // assistant asserts something demonstrably false
|
|
15
|
-
"incompleteness", // assistant covers part of the answer; misses key piece
|
|
16
|
-
"currency-violation", // assistant cites stale facts beyond doc currency horizon
|
|
17
|
-
"hallucination", // assistant invents details not present in any doc
|
|
18
|
-
];
|
|
9
|
+
export { KP_FAILURE_MODES } from "../_vendor/ailf-core/index.js";
|
|
@@ -1,13 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Literacy failure modes —
|
|
3
|
-
* `doc-coverage` (the literacy dimension family).
|
|
2
|
+
* Literacy failure modes — re-export shim (D-05).
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* (Plan 03-02 Task 3) reveals whether expansion is needed in a follow-on.
|
|
4
|
+
* Canonical data relocated to @sanity/ailf-core.
|
|
5
|
+
* Existing callers of this file continue to work unchanged.
|
|
8
6
|
*
|
|
9
|
-
* @see
|
|
10
|
-
* §"Per-dimension failure-mode taxonomies" (lines 239-283).
|
|
7
|
+
* @see packages/core/src/grader/failure-modes/literacy.ts
|
|
11
8
|
*/
|
|
12
|
-
export
|
|
13
|
-
export type LiteracyFailureMode = (typeof LITERACY_FAILURE_MODES)[number];
|
|
9
|
+
export { LITERACY_FAILURE_MODES, type LiteracyFailureMode, } from "../_vendor/ailf-core/index.d.ts";
|
package/dist/grader/literacy.js
CHANGED
|
@@ -1,17 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Literacy failure modes —
|
|
3
|
-
* `doc-coverage` (the literacy dimension family).
|
|
2
|
+
* Literacy failure modes — re-export shim (D-05).
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* (Plan 03-02 Task 3) reveals whether expansion is needed in a follow-on.
|
|
4
|
+
* Canonical data relocated to @sanity/ailf-core.
|
|
5
|
+
* Existing callers of this file continue to work unchanged.
|
|
8
6
|
*
|
|
9
|
-
* @see
|
|
10
|
-
* §"Per-dimension failure-mode taxonomies" (lines 239-283).
|
|
7
|
+
* @see packages/core/src/grader/failure-modes/literacy.ts
|
|
11
8
|
*/
|
|
12
|
-
export
|
|
13
|
-
"missing-docs", // relevant doc didn't exist
|
|
14
|
-
"outdated-docs", // doc reflects an older API/version
|
|
15
|
-
"incorrect-docs", // doc states something factually wrong
|
|
16
|
-
"poor-structure", // doc exists but is hard to find or follow
|
|
17
|
-
];
|
|
9
|
+
export { LITERACY_FAILURE_MODES, } from "../_vendor/ailf-core/index.js";
|
package/dist/grader/mcp.d.ts
CHANGED
|
@@ -1,14 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* MCP failure modes —
|
|
3
|
-
* (input-validation, output-correctness, error-handling, security).
|
|
2
|
+
* MCP failure modes — re-export shim (D-05).
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* MCP spec itself is under-documented; that's a literacy failure even when
|
|
8
|
-
* surfaced through MCP grading.
|
|
4
|
+
* Canonical data relocated to @sanity/ailf-core.
|
|
5
|
+
* Existing callers of this file continue to work unchanged.
|
|
9
6
|
*
|
|
10
|
-
* @see
|
|
11
|
-
* §"Per-dimension failure-mode taxonomies" (lines 239-283).
|
|
7
|
+
* @see packages/core/src/grader/failure-modes/mcp.ts
|
|
12
8
|
*/
|
|
13
|
-
export
|
|
14
|
-
export type MCPFailureMode = (typeof MCP_FAILURE_MODES)[number];
|
|
9
|
+
export { MCP_FAILURE_MODES, type MCPFailureMode } from "../_vendor/ailf-core/index.d.ts";
|
package/dist/grader/mcp.js
CHANGED
|
@@ -1,18 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* MCP failure modes —
|
|
3
|
-
* (input-validation, output-correctness, error-handling, security).
|
|
2
|
+
* MCP failure modes — re-export shim (D-05).
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* MCP spec itself is under-documented; that's a literacy failure even when
|
|
8
|
-
* surfaced through MCP grading.
|
|
4
|
+
* Canonical data relocated to @sanity/ailf-core.
|
|
5
|
+
* Existing callers of this file continue to work unchanged.
|
|
9
6
|
*
|
|
10
|
-
* @see
|
|
11
|
-
* §"Per-dimension failure-mode taxonomies" (lines 239-283).
|
|
7
|
+
* @see packages/core/src/grader/failure-modes/mcp.ts
|
|
12
8
|
*/
|
|
13
|
-
export
|
|
14
|
-
"spec-mismatch", // tool/server output doesn't match published MCP spec
|
|
15
|
-
"missing-error-handling", // tool failure path under-documented or absent
|
|
16
|
-
"over-privileged", // tool exposes operations the doc didn't sanction
|
|
17
|
-
"missing-docs", // re-export from literacy (cross-cutting)
|
|
18
|
-
];
|
|
9
|
+
export { MCP_FAILURE_MODES } from "../_vendor/ailf-core/index.js";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sanity/ailf",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "6.0.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -61,7 +61,7 @@
|
|
|
61
61
|
},
|
|
62
62
|
"scripts": {
|
|
63
63
|
"build": "tsc && tsc -p tsconfig.scripts.json && tsx scripts/bundle-workspace-deps.ts",
|
|
64
|
-
"generate-configs": "tsx src/cli.ts generate-configs",
|
|
64
|
+
"generate-configs": "tsx src/cli.ts generate-configs && tsx scripts/generate-diagnosis-config.ts",
|
|
65
65
|
"fetch-docs": "tsx src/cli.ts fetch-docs",
|
|
66
66
|
"measure-retrieval": "tsx src/cli.ts measure-retrieval",
|
|
67
67
|
"eval": "tsx src/cli.ts eval",
|