@sanity/ailf 4.6.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/agent-harness-tools.yaml +42 -0
- package/canonical/grader-references/knowledge-probe-recall.yaml +36 -0
- package/canonical/grader-references/mcp-server-spec.yaml +51 -0
- package/canonical/grader-references/portable-text.yaml +48 -0
- package/config/diagnosis-cards.ts +318 -0
- package/config/models.ts +12 -0
- package/config/rubrics.ts +38 -2
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +60 -2
- package/dist/_vendor/ailf-core/artifact-registry.js +288 -7
- package/dist/_vendor/ailf-core/examples/index.d.ts +125 -26
- package/dist/_vendor/ailf-core/examples/index.js +146 -47
- package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/agent-harness.js +16 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/common.d.ts +14 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/common.js +18 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/index.d.ts +45 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/index.js +109 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/knowledge-probe.js +17 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/literacy.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/literacy.js +17 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/mcp.d.ts +13 -0
- package/dist/_vendor/ailf-core/grader/failure-modes/mcp.js +17 -0
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +4 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +15 -0
- package/dist/_vendor/ailf-core/schemas/branded-string.d.ts +40 -0
- package/dist/_vendor/ailf-core/schemas/branded-string.js +45 -0
- package/dist/_vendor/ailf-core/schemas/confidence-schema.d.ts +36 -0
- package/dist/_vendor/ailf-core/schemas/confidence-schema.js +32 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +8 -4
- package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/index.js +9 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +34 -8
- package/dist/_vendor/ailf-core/schemas/pipeline.js +23 -1
- package/dist/_vendor/ailf-core/services/diagnosis/card-validators.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/diagnosis/card-validators.js +40 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/area-summary.test.js +131 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js +171 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.d.ts +7 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/no-issues.test.js +155 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.d.ts +17 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/area-summary.js +43 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.d.ts +46 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/doc-attribution-spotlight.js +104 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.d.ts +28 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/failure-mode-summary.js +96 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/index.d.ts +39 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/index.js +52 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.d.ts +27 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/low-confidence-attribution.js +77 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.d.ts +32 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/no-issues.js +71 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.d.ts +44 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/regression-vs-baseline.js +126 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/top-recommendations.js +107 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.d.ts +43 -0
- package/dist/_vendor/ailf-core/services/diagnosis/cards/weakest-area.js +114 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.d.ts +72 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompt-builders.js +273 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.d.ts +17 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/doc-attribution-spotlight.system.js +58 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.d.ts +10 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/index.js +10 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.d.ts +15 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/low-confidence-attribution.system.js +53 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.d.ts +14 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/regression-vs-baseline.system.js +63 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.d.ts +16 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/top-recommendations.system.js +78 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.d.ts +16 -0
- package/dist/_vendor/ailf-core/services/diagnosis/prompts/weakest-area.system.js +86 -0
- package/dist/_vendor/ailf-core/services/diagnosis/registry.d.ts +50 -0
- package/dist/_vendor/ailf-core/services/diagnosis/registry.js +35 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.d.ts +136 -0
- package/dist/_vendor/ailf-core/services/diagnosis-runner.js +153 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +6 -0
- package/dist/_vendor/ailf-core/services/index.js +18 -0
- package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts +64 -0
- package/dist/_vendor/ailf-core/services/llm-client-factory.js +54 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.js +3 -2
- package/dist/_vendor/ailf-core/types/attribution.d.ts +82 -0
- package/dist/_vendor/ailf-core/types/attribution.js +18 -0
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +26 -1
- package/dist/_vendor/ailf-core/types/branded-ids.js +80 -4
- package/dist/_vendor/ailf-core/types/confidence.d.ts +1 -1
- package/dist/_vendor/ailf-core/types/confidence.js +7 -0
- package/dist/_vendor/ailf-core/types/diagnosis.d.ts +271 -0
- package/dist/_vendor/ailf-core/types/diagnosis.js +19 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +16 -1
- package/dist/_vendor/ailf-core/types/grader-judgment.d.ts +125 -0
- package/dist/_vendor/ailf-core/types/grader-judgment.js +30 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +80 -29
- package/dist/_vendor/ailf-core/types/index.js +15 -1
- package/dist/_vendor/ailf-core/types/legacy-grader-judgment.d.ts +55 -0
- package/dist/_vendor/ailf-core/types/legacy-grader-judgment.js +30 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/types/repo-config.d.ts +8 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +1 -0
- package/dist/adapters/api-client/build-request.js +3 -0
- package/dist/adapters/attribution/attribution-meta-writer.d.ts +35 -0
- package/dist/adapters/attribution/attribution-meta-writer.js +34 -0
- package/dist/adapters/attribution/index.d.ts +9 -0
- package/dist/adapters/attribution/index.js +8 -0
- package/dist/adapters/attribution/per-entry-attribution-writer.d.ts +56 -0
- package/dist/adapters/attribution/per-entry-attribution-writer.js +49 -0
- package/dist/adapters/config-sources/file-config-adapter.js +1 -0
- package/dist/adapters/grader-outputs/index.d.ts +10 -0
- package/dist/adapters/grader-outputs/index.js +8 -0
- package/dist/adapters/grader-outputs/legacy/index.d.ts +11 -0
- package/dist/adapters/grader-outputs/legacy/index.js +10 -0
- package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.d.ts +49 -0
- package/dist/adapters/grader-outputs/legacy/promptfoo-grader-output-legacy.js +48 -0
- package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +102 -0
- package/dist/adapters/grader-outputs/promptfoo-grader-output.js +93 -0
- package/dist/adapters/index.d.ts +3 -0
- package/dist/adapters/index.js +4 -0
- package/dist/adapters/llm/fake-llm-client.d.ts +20 -0
- package/dist/adapters/llm/fake-llm-client.js +38 -1
- package/dist/adapters/llm/openai-llm-client.js +52 -3
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +5 -1
- package/dist/adapters/task-sources/content-lake-task-source.js +28 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +79 -11
- package/dist/adapters/task-sources/repo-schemas.js +19 -2
- package/dist/cli-program.js +3 -0
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/explain-handler.js +1 -1
- package/dist/commands/interpret.d.ts +50 -0
- package/dist/commands/interpret.js +212 -0
- package/dist/commands/lookup-doc.d.ts +1 -1
- package/dist/commands/lookup-doc.js +3 -3
- package/dist/commands/pipeline-action.d.ts +6 -0
- package/dist/commands/pipeline-action.js +2 -0
- package/dist/commands/remote-pipeline.js +1 -0
- package/dist/composition-root.d.ts +57 -23
- package/dist/composition-root.js +155 -41
- package/dist/config/diagnosis-cards.ts +318 -0
- package/dist/config/models.ts +12 -0
- package/dist/config/rubrics.ts +38 -2
- package/dist/grader/agent-harness.d.ts +9 -0
- package/dist/grader/agent-harness.js +9 -0
- package/dist/grader/common.d.ts +9 -0
- package/dist/grader/common.js +9 -0
- package/dist/grader/index.d.ts +24 -0
- package/dist/grader/index.js +24 -0
- package/dist/grader/knowledge-probe.d.ts +9 -0
- package/dist/grader/knowledge-probe.js +9 -0
- package/dist/grader/literacy.d.ts +9 -0
- package/dist/grader/literacy.js +9 -0
- package/dist/grader/mcp.d.ts +9 -0
- package/dist/grader/mcp.js +9 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +5 -0
- package/dist/orchestration/steps/calculate-scores-step.js +23 -1
- package/dist/orchestration/steps/compute-attribution-step.d.ts +44 -0
- package/dist/orchestration/steps/compute-attribution-step.js +279 -0
- package/dist/orchestration/steps/gap-analysis-step.js +35 -7
- package/dist/orchestration/steps/index.d.ts +1 -0
- package/dist/orchestration/steps/index.js +1 -0
- package/dist/pipeline/attribution.d.ts +15 -0
- package/dist/pipeline/attribution.js +18 -9
- package/dist/pipeline/borderline-consensus-runner.d.ts +63 -0
- package/dist/pipeline/borderline-consensus-runner.js +124 -0
- package/dist/pipeline/borderline-detector.d.ts +24 -0
- package/dist/pipeline/borderline-detector.js +26 -0
- package/dist/pipeline/calculate-scores.d.ts +114 -3
- package/dist/pipeline/calculate-scores.js +426 -24
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +35 -17
- package/dist/pipeline/compiler/rubric-resolution.d.ts +15 -0
- package/dist/pipeline/compiler/rubric-resolution.js +9 -1
- package/dist/pipeline/compute-attribution.d.ts +80 -0
- package/dist/pipeline/compute-attribution.js +196 -0
- package/dist/pipeline/failure-modes.d.ts +52 -17
- package/dist/pipeline/failure-modes.js +178 -117
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/package.json +7 -5
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared Zod `.refine()` helpers for diagnosis card body schemas (D-05).
|
|
3
|
+
*
|
|
4
|
+
* Card body schemas that mention failure modes MUST encode `dimension` AND
|
|
5
|
+
* call `.refine(buildFailureModeRefinement())` to prevent cross-dimension
|
|
6
|
+
* hallucinations from passing Zod (CONTEXT D-05). The refinement turns a
|
|
7
|
+
* "Zod-passes, semantically wrong" LLM output into a `parseFailed: true`
|
|
8
|
+
* degraded card, which the budget logic then absorbs.
|
|
9
|
+
*
|
|
10
|
+
* This file is a sibling of cards/ (NOT inside cards/), so it is NOT inside
|
|
11
|
+
* the D0045 trust-boundary scan root. It is a pure utility, not a boundary
|
|
12
|
+
* parser.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/core/src/grader/failure-modes/index.ts
|
|
15
|
+
* @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-CONTEXT.md (D-05)
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* Returns `true` when the `failureMode` in `body` is in the canonical
|
|
19
|
+
* taxonomy for `body.dimension`. Used directly and via `buildFailureModeRefinement`.
|
|
20
|
+
*/
|
|
21
|
+
export declare function isFailureModeInDimensionTaxonomy(body: {
|
|
22
|
+
dimension: string;
|
|
23
|
+
failureMode: string;
|
|
24
|
+
}): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Returns a Zod `.refine()` predicate that approves canonical
|
|
27
|
+
* (dimension, failureMode) pairs and rejects cross-dimension pairs.
|
|
28
|
+
*
|
|
29
|
+
* Usage:
|
|
30
|
+
* ```ts
|
|
31
|
+
* export const FailureModeSummaryBodySchema = z
|
|
32
|
+
* .object({ dimension: z.string(), failureMode: z.string(), ... })
|
|
33
|
+
* .refine(buildFailureModeRefinement(), {
|
|
34
|
+
* message: "failureMode not in canonical taxonomy for this dimension",
|
|
35
|
+
* }) satisfies z.ZodType<FailureModeSummaryBody>
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export declare function buildFailureModeRefinement<TBody extends {
|
|
39
|
+
dimension: string;
|
|
40
|
+
failureMode: string;
|
|
41
|
+
}>(): (body: TBody) => boolean;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared Zod `.refine()` helpers for diagnosis card body schemas (D-05).
|
|
3
|
+
*
|
|
4
|
+
* Card body schemas that mention failure modes MUST encode `dimension` AND
|
|
5
|
+
* call `.refine(buildFailureModeRefinement())` to prevent cross-dimension
|
|
6
|
+
* hallucinations from passing Zod (CONTEXT D-05). The refinement turns a
|
|
7
|
+
* "Zod-passes, semantically wrong" LLM output into a `parseFailed: true`
|
|
8
|
+
* degraded card, which the budget logic then absorbs.
|
|
9
|
+
*
|
|
10
|
+
* This file is a sibling of cards/ (NOT inside cards/), so it is NOT inside
|
|
11
|
+
* the D0045 trust-boundary scan root. It is a pure utility, not a boundary
|
|
12
|
+
* parser.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/core/src/grader/failure-modes/index.ts
|
|
15
|
+
* @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-CONTEXT.md (D-05)
|
|
16
|
+
*/
|
|
17
|
+
import { failureModesForDimension } from "../../grader/failure-modes/index.js";
|
|
18
|
+
/**
|
|
19
|
+
* Returns `true` when the `failureMode` in `body` is in the canonical
|
|
20
|
+
* taxonomy for `body.dimension`. Used directly and via `buildFailureModeRefinement`.
|
|
21
|
+
*/
|
|
22
|
+
export function isFailureModeInDimensionTaxonomy(body) {
|
|
23
|
+
return failureModesForDimension(body.dimension).includes(body.failureMode);
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Returns a Zod `.refine()` predicate that approves canonical
|
|
27
|
+
* (dimension, failureMode) pairs and rejects cross-dimension pairs.
|
|
28
|
+
*
|
|
29
|
+
* Usage:
|
|
30
|
+
* ```ts
|
|
31
|
+
* export const FailureModeSummaryBodySchema = z
|
|
32
|
+
* .object({ dimension: z.string(), failureMode: z.string(), ... })
|
|
33
|
+
* .refine(buildFailureModeRefinement(), {
|
|
34
|
+
* message: "failureMode not in canonical taxonomy for this dimension",
|
|
35
|
+
* }) satisfies z.ZodType<FailureModeSummaryBody>
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export function buildFailureModeRefinement() {
|
|
39
|
+
return (body) => isFailureModeInDimensionTaxonomy(body);
|
|
40
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* area-summary.test.ts — Tests 1-4 for the deterministic area-summary card.
|
|
3
|
+
*
|
|
4
|
+
* D0042: vitest only, no node:test.
|
|
5
|
+
* D0045: AreaSummaryBodySchema satisfies z.ZodType<AreaSummaryBody> compile-time gate.
|
|
6
|
+
*/
|
|
7
|
+
import { describe, expect, it } from "vitest";
|
|
8
|
+
import { AreaSummaryBodySchema, generateAreaSummary } from "../area-summary.js";
|
|
9
|
+
// Test 1: schema satisfies compile-time gate
|
|
10
|
+
const _satisfiesCheck = AreaSummaryBodySchema;
|
|
11
|
+
const silentLogger = {
|
|
12
|
+
debug: () => { },
|
|
13
|
+
info: () => { },
|
|
14
|
+
warn: () => { },
|
|
15
|
+
error: () => { },
|
|
16
|
+
step: () => { },
|
|
17
|
+
section: () => { },
|
|
18
|
+
table: () => { },
|
|
19
|
+
};
|
|
20
|
+
const noopProgress = {
|
|
21
|
+
phaseStart: () => { },
|
|
22
|
+
phaseProgress: () => { },
|
|
23
|
+
phaseComplete: () => { },
|
|
24
|
+
};
|
|
25
|
+
function makeCtx() {
|
|
26
|
+
return {
|
|
27
|
+
llm: undefined,
|
|
28
|
+
model: "anthropic:claude-sonnet-4-6",
|
|
29
|
+
logger: silentLogger,
|
|
30
|
+
progress: noopProgress,
|
|
31
|
+
versions: {
|
|
32
|
+
graderJudgmentsVersion: "1.0.0",
|
|
33
|
+
ensembleVersion: "1.0.0",
|
|
34
|
+
diagnosisVersion: "0.1.0",
|
|
35
|
+
cardVersion: "1.0.0",
|
|
36
|
+
},
|
|
37
|
+
runId: "run-001",
|
|
38
|
+
reportId: "report-001",
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
function makeReportWithScores(scores) {
|
|
42
|
+
return {
|
|
43
|
+
id: "report-001",
|
|
44
|
+
completedAt: "2026-01-01T00:00:00Z",
|
|
45
|
+
durationMs: 1000,
|
|
46
|
+
provenance: {
|
|
47
|
+
runId: "run-001",
|
|
48
|
+
mode: "standard",
|
|
49
|
+
areas: [],
|
|
50
|
+
taskIds: [],
|
|
51
|
+
models: [],
|
|
52
|
+
graderModel: "gpt-4o",
|
|
53
|
+
source: "local",
|
|
54
|
+
evalFingerprint: "",
|
|
55
|
+
trigger: "manual",
|
|
56
|
+
git: undefined,
|
|
57
|
+
},
|
|
58
|
+
summary: {
|
|
59
|
+
belowCritical: [],
|
|
60
|
+
lowestArea: "groq",
|
|
61
|
+
lowestScore: Math.min(...(scores.length ? scores : [0])),
|
|
62
|
+
overall: {
|
|
63
|
+
avgCeilingScore: 80,
|
|
64
|
+
avgScore: 75,
|
|
65
|
+
avgDocLift: 5,
|
|
66
|
+
avgDocQualityGap: 20,
|
|
67
|
+
avgFloorScore: 70,
|
|
68
|
+
negativeDocLiftCount: 0,
|
|
69
|
+
},
|
|
70
|
+
scores: scores.map((s, i) => ({
|
|
71
|
+
feature: `area-${i + 1}`,
|
|
72
|
+
ceilingScore: 80,
|
|
73
|
+
codeCorrectness: s,
|
|
74
|
+
docCoverage: s,
|
|
75
|
+
docLift: 5,
|
|
76
|
+
docQualityGap: 20,
|
|
77
|
+
floorScore: s - 5,
|
|
78
|
+
negativeDocLift: false,
|
|
79
|
+
taskCompletion: s,
|
|
80
|
+
testCount: 10,
|
|
81
|
+
totalCost: 0.05,
|
|
82
|
+
totalScore: s,
|
|
83
|
+
})),
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
describe("AreaSummaryBodySchema (Test 1)", () => {
|
|
88
|
+
it("satisfies z.ZodType<AreaSummaryBody> (compile-time check assigned above)", () => {
|
|
89
|
+
// Runtime check: valid payload parses successfully
|
|
90
|
+
const result = AreaSummaryBodySchema.safeParse({ summary: "test summary" });
|
|
91
|
+
expect(result.success).toBe(true);
|
|
92
|
+
// The _satisfiesCheck assignment above is the actual compile-time gate
|
|
93
|
+
expect(_satisfiesCheck).toBeDefined();
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
describe("generateAreaSummary (Test 2)", () => {
|
|
97
|
+
it("returns a ready card with summary including area count and mean score for 3 areas at [80, 60, 70]", async () => {
|
|
98
|
+
const report = makeReportWithScores([80, 60, 70]);
|
|
99
|
+
const card = await generateAreaSummary(report, makeCtx());
|
|
100
|
+
expect(card.status).toBe("ready");
|
|
101
|
+
if (card.status === "ready") {
|
|
102
|
+
expect(card.cardType).toBe("area-summary");
|
|
103
|
+
expect(card.meta.cardVersion).toBe("area-summary@0.1.0");
|
|
104
|
+
expect(card.meta.generatedAt).toBeTruthy();
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
});
|
|
108
|
+
describe("generateAreaSummary — summary content (Test 3)", () => {
|
|
109
|
+
it("summary includes BOTH the area count AND mean score to one decimal", async () => {
|
|
110
|
+
const report = makeReportWithScores([80, 60, 70]);
|
|
111
|
+
const card = await generateAreaSummary(report, makeCtx());
|
|
112
|
+
expect(card.status).toBe("ready");
|
|
113
|
+
if (card.status === "ready") {
|
|
114
|
+
const body = card.body;
|
|
115
|
+
// Mean of [80, 60, 70] = 70.0
|
|
116
|
+
expect(body.summary).toContain("3");
|
|
117
|
+
expect(body.summary).toContain("70.0");
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
describe("generateAreaSummary — zero areas guard (Test 4)", () => {
|
|
122
|
+
it("returns missing card with 'report has no areas' reason for zero-area report", async () => {
|
|
123
|
+
const report = makeReportWithScores([]);
|
|
124
|
+
const card = await generateAreaSummary(report, makeCtx());
|
|
125
|
+
expect(card.status).toBe("missing");
|
|
126
|
+
if (card.status === "missing") {
|
|
127
|
+
expect(card.cardType).toBe("area-summary");
|
|
128
|
+
expect(card.reason).toContain("no areas");
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
});
|
package/dist/_vendor/ailf-core/services/diagnosis/cards/__tests__/failure-mode-summary.test.js
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* failure-mode-summary.test.ts — Tests 5-9 for deterministic failure-mode-summary card.
|
|
3
|
+
*
|
|
4
|
+
* D0042: vitest only.
|
|
5
|
+
* D-05: schema must reject cross-dimension (dimension, failureMode) pairs.
|
|
6
|
+
*/
|
|
7
|
+
import { describe, expect, it } from "vitest";
|
|
8
|
+
import { FailureModeSummaryBodySchema, generateFailureModeSummary, } from "../failure-mode-summary.js";
|
|
9
|
+
import { failureModesForDimension } from "../../../../grader/failure-modes/index.js";
|
|
10
|
+
// Compile-time gate
|
|
11
|
+
const _satisfiesCheck = FailureModeSummaryBodySchema;
|
|
12
|
+
const silentLogger = {
|
|
13
|
+
debug: () => { },
|
|
14
|
+
info: () => { },
|
|
15
|
+
warn: () => { },
|
|
16
|
+
error: () => { },
|
|
17
|
+
step: () => { },
|
|
18
|
+
section: () => { },
|
|
19
|
+
table: () => { },
|
|
20
|
+
};
|
|
21
|
+
const noopProgress = {
|
|
22
|
+
phaseStart: () => { },
|
|
23
|
+
phaseProgress: () => { },
|
|
24
|
+
phaseComplete: () => { },
|
|
25
|
+
};
|
|
26
|
+
function makeCtx() {
|
|
27
|
+
return {
|
|
28
|
+
llm: undefined,
|
|
29
|
+
model: "anthropic:claude-sonnet-4-6",
|
|
30
|
+
logger: silentLogger,
|
|
31
|
+
progress: noopProgress,
|
|
32
|
+
versions: {
|
|
33
|
+
graderJudgmentsVersion: "1.0.0",
|
|
34
|
+
ensembleVersion: "1.0.0",
|
|
35
|
+
diagnosisVersion: "0.1.0",
|
|
36
|
+
cardVersion: "1.0.0",
|
|
37
|
+
},
|
|
38
|
+
runId: "run-001",
|
|
39
|
+
reportId: "report-001",
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
function makeReport(failureModes) {
|
|
43
|
+
return {
|
|
44
|
+
id: "report-001",
|
|
45
|
+
completedAt: "2026-01-01T00:00:00Z",
|
|
46
|
+
durationMs: 1000,
|
|
47
|
+
provenance: {
|
|
48
|
+
runId: "run-001",
|
|
49
|
+
mode: "standard",
|
|
50
|
+
areas: [],
|
|
51
|
+
taskIds: [],
|
|
52
|
+
models: [],
|
|
53
|
+
graderModel: "gpt-4o",
|
|
54
|
+
source: "local",
|
|
55
|
+
evalFingerprint: "",
|
|
56
|
+
trigger: "manual",
|
|
57
|
+
git: undefined,
|
|
58
|
+
},
|
|
59
|
+
summary: {
|
|
60
|
+
belowCritical: [],
|
|
61
|
+
lowestArea: "groq",
|
|
62
|
+
lowestScore: 50,
|
|
63
|
+
overall: {
|
|
64
|
+
avgCeilingScore: 80,
|
|
65
|
+
avgScore: 70,
|
|
66
|
+
avgDocLift: 5,
|
|
67
|
+
avgDocQualityGap: 20,
|
|
68
|
+
avgFloorScore: 65,
|
|
69
|
+
negativeDocLiftCount: 0,
|
|
70
|
+
},
|
|
71
|
+
scores: [
|
|
72
|
+
{
|
|
73
|
+
feature: "groq",
|
|
74
|
+
ceilingScore: 80,
|
|
75
|
+
codeCorrectness: 70,
|
|
76
|
+
docCoverage: 65,
|
|
77
|
+
docLift: 5,
|
|
78
|
+
docQualityGap: 20,
|
|
79
|
+
floorScore: 65,
|
|
80
|
+
negativeDocLift: false,
|
|
81
|
+
taskCompletion: 70,
|
|
82
|
+
testCount: 10,
|
|
83
|
+
totalCost: 0.05,
|
|
84
|
+
totalScore: 70,
|
|
85
|
+
},
|
|
86
|
+
],
|
|
87
|
+
failureModes,
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
describe("FailureModeSummaryBodySchema — field structure (Test 5)", () => {
|
|
92
|
+
it("includes dimension, failureMode, count, sampleSize, summary fields", () => {
|
|
93
|
+
const canonicalMode = failureModesForDimension("task-completion")[0];
|
|
94
|
+
const result = FailureModeSummaryBodySchema.safeParse({
|
|
95
|
+
summary: "test summary",
|
|
96
|
+
dimension: "task-completion",
|
|
97
|
+
failureMode: canonicalMode,
|
|
98
|
+
count: 5,
|
|
99
|
+
sampleSize: 20,
|
|
100
|
+
});
|
|
101
|
+
expect(result.success).toBe(true);
|
|
102
|
+
// compile-time gate
|
|
103
|
+
expect(_satisfiesCheck).toBeDefined();
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
describe("FailureModeSummaryBodySchema — D-05 refine rejects cross-dimension (Test 6)", () => {
|
|
107
|
+
it("rejects mcp-tool-not-found for task-completion dimension", () => {
|
|
108
|
+
// "mcp-tool-not-found" doesn't exist in the taxonomy but we want to test
|
|
109
|
+
// cross-dimension hallucination. Use a valid MCP mode in a literacy dimension.
|
|
110
|
+
const mcpOnlyMode = "spec-mismatch"; // valid for mcp-behavior, not task-completion
|
|
111
|
+
const result = FailureModeSummaryBodySchema.safeParse({
|
|
112
|
+
summary: "test",
|
|
113
|
+
dimension: "task-completion",
|
|
114
|
+
failureMode: mcpOnlyMode,
|
|
115
|
+
count: 1,
|
|
116
|
+
sampleSize: 5,
|
|
117
|
+
});
|
|
118
|
+
expect(result.success).toBe(false);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
describe("FailureModeSummaryBodySchema — accepts canonical pair (Test 7)", () => {
|
|
122
|
+
it("accepts a valid (dimension, failureMode) pair from the taxonomy", () => {
|
|
123
|
+
const canonicalMode = failureModesForDimension("task-completion")[0];
|
|
124
|
+
const result = FailureModeSummaryBodySchema.safeParse({
|
|
125
|
+
summary: "test summary about missing docs",
|
|
126
|
+
dimension: "task-completion",
|
|
127
|
+
failureMode: canonicalMode,
|
|
128
|
+
count: 3,
|
|
129
|
+
sampleSize: 15,
|
|
130
|
+
});
|
|
131
|
+
expect(result.success).toBe(true);
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
describe("generateFailureModeSummary — happy path (Test 8)", () => {
|
|
135
|
+
it("returns a ready card with the dominant failure mode for a report with failure modes", async () => {
|
|
136
|
+
const canonicalMode = failureModesForDimension("task-completion")[0];
|
|
137
|
+
const report = makeReport({
|
|
138
|
+
counts: { "task-completion": 7 },
|
|
139
|
+
topTitles: [
|
|
140
|
+
{
|
|
141
|
+
id: `task-completion::${canonicalMode}`,
|
|
142
|
+
category: canonicalMode,
|
|
143
|
+
severity: "high",
|
|
144
|
+
title: canonicalMode,
|
|
145
|
+
count: 7,
|
|
146
|
+
},
|
|
147
|
+
],
|
|
148
|
+
totalJudgments: 20,
|
|
149
|
+
classificationRate: 0.35,
|
|
150
|
+
});
|
|
151
|
+
const card = await generateFailureModeSummary(report, makeCtx());
|
|
152
|
+
expect(card.status).toBe("ready");
|
|
153
|
+
if (card.status === "ready") {
|
|
154
|
+
expect(card.cardType).toBe("failure-mode-summary");
|
|
155
|
+
expect(card.meta.cardVersion).toBe("failure-mode-summary@0.1.0");
|
|
156
|
+
const body = card.body;
|
|
157
|
+
expect(body.count).toBeGreaterThan(0);
|
|
158
|
+
}
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
describe("generateFailureModeSummary — empty failure modes (Test 9)", () => {
|
|
162
|
+
it("returns missing card when report has no failure modes", async () => {
|
|
163
|
+
const report = makeReport(undefined);
|
|
164
|
+
const card = await generateFailureModeSummary(report, makeCtx());
|
|
165
|
+
expect(card.status).toBe("missing");
|
|
166
|
+
if (card.status === "missing") {
|
|
167
|
+
expect(card.cardType).toBe("failure-mode-summary");
|
|
168
|
+
expect(card.reason).toContain("no failure modes");
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
});
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* no-issues.test.ts — Tests 10-12 for the deterministic no-issues card.
|
|
3
|
+
*
|
|
4
|
+
* D0042: vitest only.
|
|
5
|
+
* AI-SPEC §1b failure-mode #7: sycophancy guard — firing rate ≤30%.
|
|
6
|
+
*/
|
|
7
|
+
import { describe, expect, it } from "vitest";
|
|
8
|
+
import { NO_ISSUES_THRESHOLD, NoIssuesBodySchema, generateNoIssues, } from "../no-issues.js";
|
|
9
|
+
// Compile-time gate
|
|
10
|
+
const _satisfiesCheck = NoIssuesBodySchema;
|
|
11
|
+
const silentLogger = {
|
|
12
|
+
debug: () => { },
|
|
13
|
+
info: () => { },
|
|
14
|
+
warn: () => { },
|
|
15
|
+
error: () => { },
|
|
16
|
+
step: () => { },
|
|
17
|
+
section: () => { },
|
|
18
|
+
table: () => { },
|
|
19
|
+
};
|
|
20
|
+
const noopProgress = {
|
|
21
|
+
phaseStart: () => { },
|
|
22
|
+
phaseProgress: () => { },
|
|
23
|
+
phaseComplete: () => { },
|
|
24
|
+
};
|
|
25
|
+
function makeCtx() {
|
|
26
|
+
return {
|
|
27
|
+
llm: undefined,
|
|
28
|
+
model: "anthropic:claude-sonnet-4-6",
|
|
29
|
+
logger: silentLogger,
|
|
30
|
+
progress: noopProgress,
|
|
31
|
+
versions: {
|
|
32
|
+
graderJudgmentsVersion: "1.0.0",
|
|
33
|
+
ensembleVersion: "1.0.0",
|
|
34
|
+
diagnosisVersion: "0.1.0",
|
|
35
|
+
cardVersion: "1.0.0",
|
|
36
|
+
},
|
|
37
|
+
runId: "run-001",
|
|
38
|
+
reportId: "report-001",
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
function makeReportWithScores(scores) {
|
|
42
|
+
return {
|
|
43
|
+
id: "report-001",
|
|
44
|
+
completedAt: "2026-01-01T00:00:00Z",
|
|
45
|
+
durationMs: 1000,
|
|
46
|
+
provenance: {
|
|
47
|
+
runId: "run-001",
|
|
48
|
+
mode: "standard",
|
|
49
|
+
areas: [],
|
|
50
|
+
taskIds: [],
|
|
51
|
+
models: [],
|
|
52
|
+
graderModel: "gpt-4o",
|
|
53
|
+
source: "local",
|
|
54
|
+
evalFingerprint: "",
|
|
55
|
+
trigger: "manual",
|
|
56
|
+
git: undefined,
|
|
57
|
+
},
|
|
58
|
+
summary: {
|
|
59
|
+
belowCritical: [],
|
|
60
|
+
lowestArea: scores.length ? `area-0` : "",
|
|
61
|
+
lowestScore: scores.length ? Math.min(...scores) : 0,
|
|
62
|
+
overall: {
|
|
63
|
+
avgCeilingScore: 90,
|
|
64
|
+
avgScore: scores.length
|
|
65
|
+
? scores.reduce((a, b) => a + b, 0) / scores.length
|
|
66
|
+
: 0,
|
|
67
|
+
avgDocLift: 5,
|
|
68
|
+
avgDocQualityGap: 10,
|
|
69
|
+
avgFloorScore: 80,
|
|
70
|
+
negativeDocLiftCount: 0,
|
|
71
|
+
},
|
|
72
|
+
scores: scores.map((s, i) => ({
|
|
73
|
+
feature: `area-${i}`,
|
|
74
|
+
ceilingScore: 95,
|
|
75
|
+
codeCorrectness: s,
|
|
76
|
+
docCoverage: s,
|
|
77
|
+
docLift: 5,
|
|
78
|
+
docQualityGap: 5,
|
|
79
|
+
floorScore: s - 5,
|
|
80
|
+
negativeDocLift: false,
|
|
81
|
+
taskCompletion: s,
|
|
82
|
+
testCount: 10,
|
|
83
|
+
totalCost: 0.05,
|
|
84
|
+
totalScore: s,
|
|
85
|
+
})),
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
describe("NO_ISSUES_THRESHOLD (Test 10)", () => {
|
|
90
|
+
it("is exported as a const with a numeric value", () => {
|
|
91
|
+
expect(typeof NO_ISSUES_THRESHOLD).toBe("number");
|
|
92
|
+
expect(NO_ISSUES_THRESHOLD).toBeGreaterThan(0);
|
|
93
|
+
expect(NO_ISSUES_THRESHOLD).toBeLessThanOrEqual(100);
|
|
94
|
+
// compile-time gate
|
|
95
|
+
expect(_satisfiesCheck).toBeDefined();
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
describe("generateNoIssues — ready when all areas ≥ threshold (Test 11a)", () => {
|
|
99
|
+
it("returns ready card with thresholdScore when all areas score ≥ threshold", async () => {
|
|
100
|
+
const scores = [
|
|
101
|
+
NO_ISSUES_THRESHOLD,
|
|
102
|
+
NO_ISSUES_THRESHOLD + 5,
|
|
103
|
+
NO_ISSUES_THRESHOLD + 10,
|
|
104
|
+
];
|
|
105
|
+
const report = makeReportWithScores(scores);
|
|
106
|
+
const card = await generateNoIssues(report, makeCtx());
|
|
107
|
+
expect(card.status).toBe("ready");
|
|
108
|
+
if (card.status === "ready") {
|
|
109
|
+
expect(card.cardType).toBe("no-issues");
|
|
110
|
+
expect(card.meta.cardVersion).toBe("no-issues@0.1.0");
|
|
111
|
+
const body = card.body;
|
|
112
|
+
expect(body.thresholdScore).toBe(NO_ISSUES_THRESHOLD);
|
|
113
|
+
expect(body.summary).toBeTruthy();
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
describe("generateNoIssues — missing when any area below threshold (Test 11b)", () => {
|
|
118
|
+
it("returns missing when at least one area scores below the threshold", async () => {
|
|
119
|
+
const scores = [NO_ISSUES_THRESHOLD - 1, NO_ISSUES_THRESHOLD + 10];
|
|
120
|
+
const report = makeReportWithScores(scores);
|
|
121
|
+
const card = await generateNoIssues(report, makeCtx());
|
|
122
|
+
expect(card.status).toBe("missing");
|
|
123
|
+
if (card.status === "missing") {
|
|
124
|
+
expect(card.cardType).toBe("no-issues");
|
|
125
|
+
expect(card.reason).toContain("below threshold");
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
describe("generateNoIssues — calibration firing rate ≤30% (Test 12)", () => {
|
|
130
|
+
it("fires ≤30% on a 10-report fixture set spanning low to high scores", async () => {
|
|
131
|
+
// 10 reports with scores from 0 to 100 in 10-point increments
|
|
132
|
+
// Only reports where ALL areas are ≥ NO_ISSUES_THRESHOLD should fire
|
|
133
|
+
const fixtureSet = [
|
|
134
|
+
makeReportWithScores([0, 10]), // far below threshold
|
|
135
|
+
makeReportWithScores([20, 30]), // below threshold
|
|
136
|
+
makeReportWithScores([40, 50]), // below threshold
|
|
137
|
+
makeReportWithScores([60, 65]), // likely below threshold (≤85)
|
|
138
|
+
makeReportWithScores([70, 75]), // below threshold if threshold=85
|
|
139
|
+
makeReportWithScores([75, 80]), // below threshold if threshold=85
|
|
140
|
+
makeReportWithScores([80, 82]), // below threshold if threshold=85
|
|
141
|
+
makeReportWithScores([90, 92]), // above threshold
|
|
142
|
+
makeReportWithScores([95, 97]), // above threshold
|
|
143
|
+
makeReportWithScores([98, 99]), // above threshold
|
|
144
|
+
];
|
|
145
|
+
const ctx = makeCtx();
|
|
146
|
+
let firingCount = 0;
|
|
147
|
+
for (const report of fixtureSet) {
|
|
148
|
+
const card = await generateNoIssues(report, ctx);
|
|
149
|
+
if (card.status === "ready")
|
|
150
|
+
firingCount++;
|
|
151
|
+
}
|
|
152
|
+
const firingRate = firingCount / fixtureSet.length;
|
|
153
|
+
expect(firingRate).toBeLessThanOrEqual(0.3);
|
|
154
|
+
});
|
|
155
|
+
});
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* area-summary card — deterministic projection over Report.summary.scores.
|
|
3
|
+
*
|
|
4
|
+
* Pure computation, no LLM call. Produces a human-readable sentence
|
|
5
|
+
* summarizing the number of evaluated areas and their mean composite score.
|
|
6
|
+
*
|
|
7
|
+
* Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
|
|
8
|
+
* mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
|
|
9
|
+
*
|
|
10
|
+
* @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3 lines 569-587
|
|
11
|
+
*/
|
|
12
|
+
import { z } from "zod";
|
|
13
|
+
import type { CardGenerator } from "../../diagnosis-runner.js";
|
|
14
|
+
export declare const AreaSummaryBodySchema: z.ZodObject<{
|
|
15
|
+
summary: z.ZodString;
|
|
16
|
+
}, z.core.$strip>;
|
|
17
|
+
export declare const generateAreaSummary: CardGenerator;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* area-summary card — deterministic projection over Report.summary.scores.
|
|
3
|
+
*
|
|
4
|
+
* Pure computation, no LLM call. Produces a human-readable sentence
|
|
5
|
+
* summarizing the number of evaluated areas and their mean composite score.
|
|
6
|
+
*
|
|
7
|
+
* Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
|
|
8
|
+
* mandatory (cards/ is a SCAN_ROOT in check-trust-boundary-satisfies.ts).
|
|
9
|
+
*
|
|
10
|
+
* @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-AI-SPEC.md §3 lines 569-587
|
|
11
|
+
*/
|
|
12
|
+
import { z } from "zod";
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Body schema (D0045 trust boundary — satisfies required)
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
export const AreaSummaryBodySchema = z.object({
|
|
17
|
+
summary: z.string().min(1).max(500),
|
|
18
|
+
});
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Generator
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
export const generateAreaSummary = async (report) => {
|
|
23
|
+
const scores = report.summary.scores;
|
|
24
|
+
if (!scores || scores.length === 0) {
|
|
25
|
+
return {
|
|
26
|
+
status: "missing",
|
|
27
|
+
cardType: "area-summary",
|
|
28
|
+
reason: "report has no areas",
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
const meanScore = scores.reduce((sum, s) => sum + s.totalScore, 0) / scores.length;
|
|
32
|
+
const summary = `Across ${scores.length} areas, mean score ${meanScore.toFixed(1)}.`;
|
|
33
|
+
const body = AreaSummaryBodySchema.parse({ summary });
|
|
34
|
+
return {
|
|
35
|
+
status: "ready",
|
|
36
|
+
cardType: "area-summary",
|
|
37
|
+
body,
|
|
38
|
+
meta: {
|
|
39
|
+
cardVersion: "area-summary@0.1.0",
|
|
40
|
+
generatedAt: new Date().toISOString(),
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* doc-attribution-spotlight card — LLM-driven doc-citation spotlight.
|
|
3
|
+
*
|
|
4
|
+
* Model: claude-sonnet-4-6 (routine per AI-SPEC §4 model routing)
|
|
5
|
+
* Version: doc-attribution-spotlight@0.1.0
|
|
6
|
+
*
|
|
7
|
+
* Landmine 11: reads `ctx.judgmentAttributions` (NOT Report.summary).
|
|
8
|
+
* Returns `status: "missing"` when attributions are undefined or empty.
|
|
9
|
+
*
|
|
10
|
+
* Mitigations:
|
|
11
|
+
* - failure-mode #5: docCitations[].docSlug refined against the manifest
|
|
12
|
+
* allow-list so hallucinated slugs fail Zod parse
|
|
13
|
+
*
|
|
14
|
+
* Schema is in the D0045 trust-boundary scan root; `satisfies` clause is
|
|
15
|
+
* mandatory.
|
|
16
|
+
*
|
|
17
|
+
* @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-RESEARCH.md (Landmine 11)
|
|
18
|
+
* @see docs/decisions/D0052-judgment-ref-granularity.md
|
|
19
|
+
*/
|
|
20
|
+
import { z } from "zod";
|
|
21
|
+
import type { CardGenerator } from "../../diagnosis-runner.js";
|
|
22
|
+
/**
|
|
23
|
+
* Module-level static shape. Per-call adds the allow-list refine on docSlug.
|
|
24
|
+
*/
|
|
25
|
+
export declare const DocAttributionSpotlightBodySchema: z.ZodObject<{
|
|
26
|
+
summary: z.ZodString;
|
|
27
|
+
docCitations: z.ZodArray<z.ZodObject<{
|
|
28
|
+
docSlug: z.ZodString;
|
|
29
|
+
confidence: z.ZodObject<{
|
|
30
|
+
level: z.ZodEnum<{
|
|
31
|
+
low: "low";
|
|
32
|
+
medium: "medium";
|
|
33
|
+
high: "high";
|
|
34
|
+
}>;
|
|
35
|
+
signalsPresent: z.ZodNumber;
|
|
36
|
+
derivation: z.ZodString;
|
|
37
|
+
}, z.core.$strip>;
|
|
38
|
+
role: z.ZodEnum<{
|
|
39
|
+
missing: "missing";
|
|
40
|
+
supports: "supports";
|
|
41
|
+
contradicts: "contradicts";
|
|
42
|
+
irrelevant: "irrelevant";
|
|
43
|
+
}>;
|
|
44
|
+
}, z.core.$strip>>;
|
|
45
|
+
}, z.core.$strip>;
|
|
46
|
+
export declare const generateDocAttributionSpotlight: CardGenerator;
|