npm - @sanity/ailf - Versions diffs - 5.0.0 → 6.1.0 - Mend

@sanity/ailf 5.0.0 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

package/dist/_vendor/ailf-core/services/diagnosis-runner.js CHANGED Viewed

@@ -1,12 +1,22 @@
 /**
  * Diagnosis runner — engine entry point (D0048).
  *
- * Phase 1 lands the version constant only; the runner factory + cache
- * lookup land in Phase 5.
+ * Phase 5 implements the factory body; Phase 1 shipped `diagnosisVersion` only.
+ * `GeneratorContext.judgmentAttributions` is sourced once per `.run({...})` via
+ * `deps.loadAttributions(runId)` reading Phase 4's
+ * `runs/{runId}/attribution/{entryKey}.json` per-entry artifacts (RESEARCH
+ * Landmine 11).
  *
  * @see docs/decisions/D0048-engine-homes-for-cli-api-parity.md
  * @see .planning/phases/01-foundation-contracts-cross-cutting-schemas/01-CONTEXT.md (D-02)
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-RESEARCH.md (Landmine 11)
+ * @see .planning/phases/05-diagnosis-engine-cli-llm-cards/05-CONTEXT.md (D-02, D-06, D-10)
  */
+import { z } from "zod";
+import { ARTIFACT_REGISTRY, encodeDiagnosisPathVersion, } from "../artifact-registry.js";
+// ---------------------------------------------------------------------------
+// Version constant (Phase 1 / VER-01 / D-02)
+// ---------------------------------------------------------------------------
 /**
  * Bumped when the runner's selection logic, prompt orchestration, or
  * card-set composition changes in a way that should invalidate cached
@@ -17,3 +27,127 @@
  * across vitest workers (cross-cutting hazard #2).
  */
 export const diagnosisVersion = "0.1.0";
+// ---------------------------------------------------------------------------
+// Private helpers
+// ---------------------------------------------------------------------------
+/**
+ * Build the deterministic cache path that incorporates all four version
+ * segments AND the model id (AI-SPEC §3 lines 463-473 + D-02).
+ *
+ * The artifact path from `ARTIFACT_REGISTRY.diagnosis.objectPath(...)` is
+ * already version-scoped; we append `::${model}` to include model identity
+ * in the key without changing the artifact path shape.
+ */
+function buildCacheKey(report, versions, model) {
+    const artifactPath = ARTIFACT_REGISTRY.diagnosis.objectPath(report.provenance.runId, report.id, encodeDiagnosisPathVersion(versions.diagnosisVersion, versions.cardVersion));
+    // Embed the remaining two version axes + model in the key string. The
+    // artifact path already carries diagnosisVersion + cardVersion; the other
+    // two axes are appended here so any single-segment bump produces a
+    // distinct key.
+    return `${artifactPath}::grader=${versions.graderJudgmentsVersion}::ensemble=${versions.ensembleVersion}::model=${model}`;
+}
+/**
+ * Per-card invocation — never panics. ZodError or any other thrown value
+ * both translate to a degraded card (AI-SPEC §3 lines 530-552).
+ */
+async function runOne(generator, report, ctx, cardType) {
+    try {
+        return await generator(report, ctx);
+    }
+    catch (err) {
+        const meta = {
+            cardVersion: `${cardType}@unknown`,
+            generatedAt: new Date().toISOString(),
+        };
+        const isZodErr = err instanceof z.ZodError;
+        return {
+            status: "degraded",
+            cardType,
+            reason: err instanceof Error ? err.message : String(err),
+            parseFailed: isZodErr,
+            meta,
+        };
+    }
+}
+// ---------------------------------------------------------------------------
+// Factory (AI-SPEC §3 lines 458-523 + D-02 / Landmine-11 deltas)
+// ---------------------------------------------------------------------------
+/**
+ * Build a `DiagnosisRunner` whose `.run({report, versions, baseline?, refresh?})`
+ * produces a `Diagnosis` with cards in registry-order.
+ *
+ * No module-scope `let` — all state lives in the `deps` closure and per-run
+ * local variables (AI-SPEC §3 Pitfall 1).
+ */
+export function createDiagnosisRunner(deps) {
+    return {
+        async run({ report, versions, baseline, refresh }) {
+            const cachePath = buildCacheKey(report, versions, deps.model);
+            // Cache lookup (bypassed when --refresh).
+            if (!refresh) {
+                const cached = await deps.diagnosisReader(cachePath);
+                if (cached !== null)
+                    return cached;
+            }
+            // One-shot attribution load (Landmine 11 — Phase 4 per-entry artifacts).
+            let judgmentAttributions;
+            try {
+                judgmentAttributions = await deps.loadAttributions(report.provenance.runId);
+            }
+            catch (err) {
+                deps.logger.warn("diagnosis-runner: loadAttributions failed", {
+                    runId: report.provenance.runId,
+                    error: err instanceof Error ? err.message : String(err),
+                });
+                judgmentAttributions = undefined;
+            }
+            const ctx = {
+                llm: deps.llm,
+                model: deps.model,
+                logger: deps.logger,
+                progress: deps.progress,
+                versions,
+                runId: report.provenance.runId, // D-10: provenance.runId, NOT report.runId
+                reportId: report.id,
+                judgmentAttributions, // Landmine 11
+                ...(baseline ? { baseline } : {}),
+            };
+            const cardTypes = Object.keys(deps.registry);
+            const cards = [];
+            let parseFailures = 0;
+            for (const cardType of cardTypes) {
+                const generator = deps.registry[cardType];
+                // Budget enforcement: once ≤1 budget is breached, downgrade
+                // subsequent parse-failing cards to "missing" before even running
+                // the generator (AI-SPEC §3 lines 496-510 + must-have #4).
+                // We still RUN the generator here to match the behavior spec —
+                // the budget check happens AFTER the card result is obtained.
+                const card = await runOne(generator, report, ctx, cardType);
+                if (card.status === "degraded" && card.parseFailed) {
+                    if (parseFailures >= 1) {
+                        // Budget exceeded — demote to missing.
+                        deps.logger.warn(`diagnosis-runner: parse-failure budget exceeded for card "${cardType}"; demoting to missing`, { reportId: report.id });
+                        cards.push({
+                            status: "missing",
+                            cardType,
+                            reason: "degraded-budget-exceeded",
+                        });
+                        continue;
+                    }
+                    parseFailures++;
+                }
+                cards.push(card);
+            }
+            const diagnosis = {
+                runId: report.provenance.runId, // D-10: provenance.runId
+                reportId: report.id,
+                inputs: versions,
+                cards,
+                generatedAt: new Date().toISOString(),
+            };
+            // Unconditional write — a refreshed call replaces the cached Diagnosis.
+            await deps.diagnosisWriter(cachePath, diagnosis);
+            return diagnosis;
+        },
+    };
+}

package/dist/_vendor/ailf-core/services/index.d.ts CHANGED Viewed

@@ -13,5 +13,9 @@ export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskS
 export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";
 export { buildSlimReportSummary } from "./slim-report-summary.js";
 export { reportToMarkdown, type RenderableReport, } from "./report-to-markdown.js";
-export { diagnosisVersion } from "./diagnosis-runner.js";
+export { createDiagnosisRunner, diagnosisVersion, type CardGenerator, type CardRegistry, type DiagnosisRunner, type DiagnosisRunnerDeps, type DiagnosisRunnerRunArgs, type GeneratorContext, } from "./diagnosis-runner.js";
 export { cardRegistry, type CardDefinition } from "./diagnosis/registry.js";
+export { createLLMClient, type LLMClientAdapters, type LLMClientFactoryConfig, type LLMClientKeys, } from "./llm-client-factory.js";
+export { buildFailureModeRefinement, isFailureModeInDimensionTaxonomy, } from "./diagnosis/card-validators.js";
+export { CARD_REGISTRY_VERSION, DIAGNOSIS_CARD_GENERATORS, generateAreaSummary, generateFailureModeSummary, generateNoIssues, generateTopRecommendations, generateWeakestArea, generateLowConfidenceAttribution, generateDocAttributionSpotlight, generateRegressionVsBaseline, } from "./diagnosis/cards/index.js";
+export { buildTopRecommendationsPrompt, buildWeakestAreaPrompt, buildLowConfidenceAttributionPrompt, buildDocAttributionSpotlightPrompt, buildRegressionVsBaselinePrompt, buildDocSlugAllowList, } from "./diagnosis/prompt-builders.js";

package/dist/_vendor/ailf-core/services/index.js CHANGED Viewed

@@ -14,7 +14,20 @@ export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resol
 export { buildSlimReportSummary } from "./slim-report-summary.js";
 export { reportToMarkdown, } from "./report-to-markdown.js";
 // ---------------------------------------------------------------------------
-// Actionability ladder Phase 1 — diagnosis runner + card registry
+// Actionability ladder Phase 1 + Phase 5 — diagnosis runner + card registry
 // ---------------------------------------------------------------------------
-export { diagnosisVersion } from "./diagnosis-runner.js";
+export { createDiagnosisRunner, diagnosisVersion, } from "./diagnosis-runner.js";
 export { cardRegistry } from "./diagnosis/registry.js";
+// ---------------------------------------------------------------------------
+// Phase 5 — LLM client factory (D-01 hoist)
+// ---------------------------------------------------------------------------
+export { createLLMClient, } from "./llm-client-factory.js";
+// ---------------------------------------------------------------------------
+// Phase 5 — card validators (D-05 refine helpers)
+// ---------------------------------------------------------------------------
+export { buildFailureModeRefinement, isFailureModeInDimensionTaxonomy, } from "./diagnosis/card-validators.js";
+// ---------------------------------------------------------------------------
+// Phase 5 Plan 05 — card generators barrel + prompt builders
+// ---------------------------------------------------------------------------
+export { CARD_REGISTRY_VERSION, DIAGNOSIS_CARD_GENERATORS, generateAreaSummary, generateFailureModeSummary, generateNoIssues, generateTopRecommendations, generateWeakestArea, generateLowConfidenceAttribution, generateDocAttributionSpotlight, generateRegressionVsBaseline, } from "./diagnosis/cards/index.js";
+export { buildTopRecommendationsPrompt, buildWeakestAreaPrompt, buildLowConfidenceAttributionPrompt, buildDocAttributionSpotlightPrompt, buildRegressionVsBaselinePrompt, buildDocSlugAllowList, } from "./diagnosis/prompt-builders.js";

package/dist/_vendor/ailf-core/services/llm-client-factory.d.ts ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ * LLM client factory — hoisted from packages/eval/src/composition-root.ts
+ * so packages/api can build a DiagnosisRunner without importing eval (D-01).
+ *
+ * Adapter CLASSES stay in packages/eval/src/adapters/llm/. Only the factory
+ * function lives here. Adapter constructors are injected via `LLMClientAdapters`
+ * so core never static-imports vendor SDK code (D0051 invariant / T-05-01-01).
+ *
+ * @see docs/decisions/D0051-llm-client-port.md
+ * @see packages/eval/src/composition-root.ts — call site (updated to use this)
+ */
+import type { LLMClient } from "../ports/llm-client.js";
+import type { Logger } from "../ports/logger.js";
+/**
+ * Narrow config slice consumed by the LLM client factory.
+ * Does NOT depend on `ResolvedConfig` from packages/eval — only the
+ * llmProvider field is needed here.
+ */
+export interface LLMClientFactoryConfig {
+    readonly llmProvider?: "anthropic" | "openai";
+}
+/**
+ * Typed key bag passed to `createLLMClient`. The composition root reads
+ * env once and supplies values here; the factory stays pure so tests don't
+ * have to mutate `process.env`.
+ */
+export interface LLMClientKeys {
+    readonly anthropicApiKey?: string;
+    readonly openaiApiKey?: string;
+}
+/**
+ * Constructor callbacks for adapter classes that live in packages/eval.
+ * The eval composition root passes real constructors; tests pass spies.
+ *
+ * This pattern satisfies T-05-01-01: core never static-imports
+ * openai / @anthropic-ai/sdk. The vendor code stays in eval.
+ */
+export interface LLMClientAdapters {
+    readonly newAnthropicClient: (opts: {
+        apiKey: string;
+        logger: Logger;
+    }) => LLMClient;
+    readonly newOpenAIClient: (opts: {
+        apiKey: string;
+        logger: Logger;
+    }) => LLMClient;
+}
+/**
+ * Select the LLMClient adapter based on `config.llmProvider` and the
+ * supplied API keys. Returns `undefined` when no usable credential is
+ * present — `AppContext.llmClient` stays unset and consumers handle that
+ * explicitly.
+ *
+ * Adapters never read `process.env` themselves (per
+ * `.claude/rules/typescript.md`); env mapping happens at the call site
+ * (typically `createAppContext`).
+ *
+ * Adapter classes stay in packages/eval; they are passed in via `deps.adapters`
+ * so this factory has zero eval imports (D-01 / T-05-01-01).
+ */
+export declare function createLLMClient(config: LLMClientFactoryConfig, keys: LLMClientKeys, deps: {
+    logger: Logger;
+    adapters: LLMClientAdapters;
+}): LLMClient | undefined;

package/dist/_vendor/ailf-core/services/llm-client-factory.js ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * LLM client factory — hoisted from packages/eval/src/composition-root.ts
+ * so packages/api can build a DiagnosisRunner without importing eval (D-01).
+ *
+ * Adapter CLASSES stay in packages/eval/src/adapters/llm/. Only the factory
+ * function lives here. Adapter constructors are injected via `LLMClientAdapters`
+ * so core never static-imports vendor SDK code (D0051 invariant / T-05-01-01).
+ *
+ * @see docs/decisions/D0051-llm-client-port.md
+ * @see packages/eval/src/composition-root.ts — call site (updated to use this)
+ */
+// ---------------------------------------------------------------------------
+// Factory function
+// ---------------------------------------------------------------------------
+/**
+ * Select the LLMClient adapter based on `config.llmProvider` and the
+ * supplied API keys. Returns `undefined` when no usable credential is
+ * present — `AppContext.llmClient` stays unset and consumers handle that
+ * explicitly.
+ *
+ * Adapters never read `process.env` themselves (per
+ * `.claude/rules/typescript.md`); env mapping happens at the call site
+ * (typically `createAppContext`).
+ *
+ * Adapter classes stay in packages/eval; they are passed in via `deps.adapters`
+ * so this factory has zero eval imports (D-01 / T-05-01-01).
+ */
+export function createLLMClient(config, keys, deps) {
+    const { logger, adapters } = deps;
+    const explicit = config.llmProvider;
+    const anthropicKey = keys.anthropicApiKey;
+    const openaiKey = keys.openaiApiKey;
+    // Auto-select: prefer Anthropic when both are present (matches the
+    // current grader's default model in config/models.ts).
+    const provider = explicit ?? (anthropicKey ? "anthropic" : openaiKey ? "openai" : undefined);
+    if (!provider) {
+        logger.debug("LLM client: not wired — no Anthropic or OpenAI API key supplied");
+        return undefined;
+    }
+    if (provider === "anthropic") {
+        if (!anthropicKey) {
+            logger.warn('llmProvider="anthropic" but no Anthropic API key supplied — LLMClient not wired');
+            return undefined;
+        }
+        logger.debug("LLM client: AnthropicLLMClient");
+        return adapters.newAnthropicClient({ apiKey: anthropicKey, logger });
+    }
+    if (!openaiKey) {
+        logger.warn('llmProvider="openai" but no OpenAI API key supplied — LLMClient not wired');
+        return undefined;
+    }
+    logger.debug("LLM client: OpenAILLMClient");
+    return adapters.newOpenAIClient({ apiKey: openaiKey, logger });
+}

package/dist/_vendor/ailf-core/types/diagnosis.d.ts CHANGED Viewed

@@ -8,14 +8,18 @@
  * discriminator inside the `ready` variant.
  *
  * Phase 1 lands placeholder body shapes; Phase 5 enriches each per
- * Doc 05 specs.
+ * AI-SPEC §3 and CONTEXT D-05/D-07. The `DiagnosisCard` discriminated
+ * union surface (arms + `cardType` literals) is stable — only the
+ * `body: <BodyInterface>` references resolve to richer shapes.
  *
  * @see docs/decisions/D0049-shared-confidence-contract.md
  * @see docs/decisions/D0052-judgment-ref-granularity.md
  * @see docs/decisions/D0050-artifact-registry-post-hoc-versioned-extension.md
  */
+import type { Confidence } from "./confidence.js";
 import type { RunId } from "./branded-ids.js";
 import type { ReportId } from "./index.js";
+import type { ModelId } from "../ports/llm-client.js";
 /**
  * The four-version cache envelope. Every cached `Diagnosis` carries the
  * versions of the inputs that produced it; any bump in any segment
@@ -51,47 +55,144 @@ export interface CardMeta {
     latencyMs?: number;
     /** ISO 8601 UTC timestamp. */
     generatedAt: string;
+    cost?: number;
+    model?: ModelId;
 }
 /**
  * A single actionable suggestion surfaced by a recommendations card.
- * The full Phase 5 shape may add fields (per Doc 05 specs); Phase 1
- * locks the minimum required surface.
+ *
+ * Phase 5 adds `docSlug` (the canonical doc page to rewrite) and
+ * `sectionHeading` (null when the suggestion targets the whole page)
+ * per AI-SPEC actionability-specificity rubric + failure-mode #2
+ * mitigation.
  */
 export interface ActionSuggestion {
     title: string;
     body: string;
     priority: "high" | "medium" | "low";
+    /** Canonical slug of the documentation page this suggestion targets. */
+    docSlug: string;
+    /**
+     * Heading within `docSlug` that should be revised, or `null` when the
+     * suggestion targets the page as a whole.
+     */
+    sectionHeading: string | null;
+}
+/**
+ * Minimal judgment reference per D0052 (taskId × modelId × dimension).
+ * Used by `LowConfidenceAttributionBody.judgmentRefs` to cite the
+ * specific judgments that drove a low-confidence finding.
+ */
+export interface JudgmentRef {
+    taskId: string;
+    modelId: string;
+    dimension: string;
 }
 /**
- * Phase 1 body placeholders. Each shape is intentionally minimal; Phase 5
- * card files enrich them per Doc 05 specs and assert
- * `satisfies z.ZodType<Extract<DiagnosisCard, { status: "ready"; cardType: "X" }>["body"]>`
- * against these declarations.
+ * Phase 5 enriched body shapes. Each keeps `summary: string` (load-bearing
+ * for CLI default render per AI-SPEC §6) and adds fields the corresponding
+ * Zod schema needs (asserting `satisfies z.ZodType<T>` in the card file).
  */
+/** area-summary: deterministic — keep only summary (no behavioral claims). */
 export interface AreaSummaryBody {
     summary: string;
 }
+/**
+ * failure-mode-summary: deterministic + D-05 dimension/failureMode gate.
+ * `count` = frequency in the report; `sampleSize` = judgment count for the
+ * dimension (per AI-SPEC failure-mode #3 mitigation).
+ */
 export interface FailureModeSummaryBody {
     summary: string;
+    /** Rubric dimension this summary targets (e.g. "task-completion"). */
+    dimension: string;
+    /** Canonical failure mode within this dimension. */
+    failureMode: string;
+    /** Number of judgments in this report with this failure mode. */
+    count: number;
+    /** Total judgments for this dimension — calibration denominator. */
+    sampleSize: number;
 }
+/**
+ * no-issues: deterministic + AI-SPEC failure-mode #7 sycophancy guard.
+ * `thresholdScore` surfaces the threshold used to qualify as "no issues"
+ * so readers can see the criterion behind the positive assessment.
+ */
 export interface NoIssuesBody {
     summary: string;
+    /** Minimum composite score that qualified this area as "no issues". */
+    thresholdScore: number;
 }
+/**
+ * top-recommendations: LLM-driven. `suggestions` reuses the enriched
+ * `ActionSuggestion` shape (docSlug + sectionHeading per AI-SPEC
+ * actionability-specificity rubric + failure-mode #2 mitigation).
+ */
 export interface TopRecommendationsBody {
     summary: string;
     suggestions: ActionSuggestion[];
 }
+/**
+ * weakest-area: LLM-driven. Adds area identification, dimension/failureMode
+ * context, and a small-sample calibration guard (AI-SPEC failure-mode #3).
+ */
 export interface WeakestAreaBody {
     summary: string;
+    /** Documentation area with the lowest composite score. */
+    area: string;
+    /** Primary dimension driving the low score. */
+    dimension: string;
+    /** Dominant failure mode in this area. */
+    failureMode: string;
+    /** Number of judgments sampled for this area — calibration denominator. */
+    sampleSize: number;
+    /** Calibrated confidence per D0049 (ensemble-stdev derivation). */
+    confidence: Confidence;
 }
-export interface LowConfidenceAttributionBody {
+/**
+ * regression-vs-baseline: LLM-driven. `deltas` is the per-area diff
+ * (JS-computed pre-call, max 10 entries); `drivers` is LLM prose;
+ * `overallTrend` is a 4-bucket summary per AI-SPEC §3 lines 605-613.
+ */
+export interface RegressionVsBaselineBody {
     summary: string;
+    /**
+     * Per-area score deltas (max 10). `drivers` carries the LLM's prose
+     * reasoning about what caused the change.
+     */
+    deltas: {
+        area: string;
+        direction: "improved" | "regressed" | "unchanged";
+        pointsDelta: number;
+        drivers: string[];
+    }[];
+    /** 4-bucket aggregate trend across all deltas. */
+    overallTrend: "net-improved" | "net-regressed" | "mixed" | "stable";
 }
-export interface DocAttributionSpotlightBody {
+/**
+ * low-confidence-attribution: LLM-driven. `judgmentRefs` cites the
+ * specific judgments (D0052 triple) that drove the low-confidence finding.
+ */
+export interface LowConfidenceAttributionBody {
     summary: string;
+    /** Judgment references (D0052) driving this low-confidence finding. */
+    judgmentRefs: JudgmentRef[];
 }
-export interface RegressionVsBaselineBody {
+/**
+ * doc-attribution-spotlight: LLM-driven. `docCitations` carries per-doc
+ * attribution roles and confidence calibration (AI-SPEC failure-mode #5).
+ */
+export interface DocAttributionSpotlightBody {
     summary: string;
+    /**
+     * Per-doc attribution records. `role` classifies how the doc contributed;
+     * `confidence` calibrates the attribution certainty (D0049).
+     */
+    docCitations: {
+        docSlug: string;
+        confidence: Confidence;
+        role: "supports" | "contradicts" | "missing" | "irrelevant";
+    }[];
 }
 /**
  * Outer-`status` discriminated union: 8 ready variants (one per
@@ -102,6 +203,10 @@ export interface RegressionVsBaselineBody {
  * No `not-yet-generated` variant — old-report fallback is a Phase 7
  * concern at the slim-shape boundary, handled at fetch-time, not in
  * `DiagnosisCard` itself.
+ *
+ * D-07: only the `body: <BodyInterface>` references resolve to richer
+ * shapes. The union arms, status literals, and cardType literals are
+ * identical to Phase 1.
  */
 export type DiagnosisCard = {
     status: "ready";

package/dist/_vendor/ailf-core/types/diagnosis.js CHANGED Viewed

@@ -8,7 +8,9 @@
  * discriminator inside the `ready` variant.
  *
  * Phase 1 lands placeholder body shapes; Phase 5 enriches each per
- * Doc 05 specs.
+ * AI-SPEC §3 and CONTEXT D-05/D-07. The `DiagnosisCard` discriminated
+ * union surface (arms + `cardType` literals) is stable — only the
+ * `body: <BodyInterface>` references resolve to richer shapes.
  *
  * @see docs/decisions/D0049-shared-confidence-contract.md
  * @see docs/decisions/D0052-judgment-ref-granularity.md

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -36,7 +36,8 @@ export { CONVENTIONAL_DERIVATIONS, isConfidence } from "./confidence.js";
 export type { ArtifactId, AssociationAxis, AssociationValues, Brand, EntryKey, Err, FixtureId, IdValidationError, JudgmentId, NewReportId, Ok, ProviderId, PromptId, Result, ResultId, RubricId, RunFingerprint, RunId, SuiteId, TaskId, TaskSlug, TraceId, } from "./branded-ids.js";
 export { err, fixtureId, generateJudgmentId, generateRunId, judgmentId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
 export type { AgentHarnessTaskDefinition, ContentLakeAuthorableMode, ContentLakeAuthorableTask, CriterionRef, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PromptVars, PathDocRef, PerspectiveDocRef, ReservedPromptVarKey, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
-export type { ActionSuggestion, AreaSummaryBody, CardMeta, CardType, Diagnosis, DiagnosisCard, DocAttributionSpotlightBody, FailureModeSummaryBody, LowConfidenceAttributionBody, NoIssuesBody, RegressionVsBaselineBody, TopRecommendationsBody, VersionedInputs, WeakestAreaBody, } from "./diagnosis.js";
+export type { ActionSuggestion, AreaSummaryBody, CardMeta, CardType, Diagnosis, DiagnosisCard, DocAttributionSpotlightBody, FailureModeSummaryBody, JudgmentRef, LowConfidenceAttributionBody, NoIssuesBody, RegressionVsBaselineBody, TopRecommendationsBody, VersionedInputs, WeakestAreaBody, } from "./diagnosis.js";
+export type { SynthesisCostTelemetry, SynthesisPerCardTelemetry, } from "./synthesis-telemetry.js";
 export type { AttributionMeta, DocAttribution, JudgmentAttribution, } from "./attribution.js";
 export type { CriterionSubJudgment, DocCitation, DocCitationRole, GraderJudgment, } from "./grader-judgment.js";
 export type { LegacyGraderJudgment } from "./legacy-grader-judgment.js";
@@ -754,6 +755,12 @@ export interface PipelineResult {
     promptfooUrls?: PromptfooUrlEntry[];
     /** Results per step */
     steps: Record<string, StepResult>;
+    /** Report ID produced by PublishReportStep (when publish was enabled). Used by
+     * post-run hooks (e.g. runPostPipelineHooks) to target diagnosis and telemetry
+     * writeback at the correct Content Lake document. Absent when publish was
+     * skipped or the publish step did not produce a report. (Phase 6 / DIAG-06)
+     */
+    reportId?: string;
     /** Overall success (all non-skipped steps succeeded) */
     success: boolean;
     /** Summary of test execution outcomes. */

package/dist/_vendor/ailf-core/types/repo-config.d.ts CHANGED Viewed

@@ -106,6 +106,21 @@ export interface RepoTriggersConfig {
     "pr-task-change"?: TriggerConfig;
     schedule?: ScheduleTriggerConfig;
 }
+/**
+ * Post-run diagnosis summary policy (Phase 6 / DIAG-06).
+ * Controls whether `ailf run` auto-fires the in-process diagnosis runner
+ * at the end of a published pipeline. Precedence is resolved at the CLI
+ * layer — see `shouldRunPostSummary()` in `pipeline-action.ts`.
+ */
+export interface RepoSummaryConfig {
+    /**
+     * - `"auto"`   — fire only when `process.stdout.isTTY === true` AND
+     *                `CI !== "true"`.
+     * - `"always"` — fire unconditionally (bypasses TTY check).
+     * - `"never"`  — never fire.
+     */
+    onRun?: "auto" | "always" | "never";
+}
 /**
  * Parsed shape of `.ailf/config.yaml`.
  *
@@ -124,6 +139,7 @@ export interface RepoConfig {
     publish?: RepoPublishConfig;
     reportStore?: RepoReportStoreConfig;
     source?: RepoSourceConfig;
+    summary?: RepoSummaryConfig;
     taskSource?: RepoTaskSourceConfig;
     triggers?: RepoTriggersConfig;
 }

package/dist/_vendor/ailf-core/types/synthesis-telemetry.d.ts ADDED Viewed

@@ -0,0 +1,101 @@
+/**
+ * Synthesis cost telemetry types — canonical TS-first shapes for
+ * Phase 6 DIAG-06 cost and parse-failure observability.
+ *
+ * These interfaces are authored independently of their Zod adapter schema
+ * (Plan 06-02) per D0045: the Zod schema declares
+ * `satisfies z.ZodType<SynthesisCostTelemetry>` against this independent
+ * type so drift is a build error, not a runtime bug.
+ *
+ * The 14 attribute paths on `SynthesisCostTelemetry` + `SynthesisPerCardTelemetry`
+ * land on the `ailf.report` Sanity doc under `summary.synthesis.diagnosis.*`
+ * (D6-09). No new sibling doc type (D0033 / D6-09).
+ *
+ * @see docs/decisions/D0045-type-architecture-and-contract-enforcement.md
+ * @see .planning/phases/06-post-run-integration-cost-telemetry/06-CONTEXT.md §D6-09
+ * @see .planning/phases/06-post-run-integration-cost-telemetry/06-CONTEXT.md §D6-12
+ */
+import type { CardType } from "./diagnosis.js";
+/**
+ * Per-card telemetry row for the `synthesis_per_card` Airbyte stream
+ * (D6-11) and the `summary.synthesis.diagnosis.perCard[]` Sanity doc path
+ * (D6-09).
+ *
+ * Fields map directly to the 8 per-card attribute paths in D6-09:
+ * `…perCard[].cardType`, `…perCard[].cost`, `…perCard[].parseFailed`,
+ * `…perCard[].latencyMs`, `…perCard[].tokenInput`, `…perCard[].tokenOutput`,
+ * `…perCard[].cardVersion`, `…perCard[].generatedAt`.
+ *
+ * `cost` is undefined when the card did not make an LLM call (deterministic
+ * cards) and contributes 0 to the roll-up.
+ */
+export interface SynthesisPerCardTelemetry {
+    /** Card archetype — reuses `CardType` from diagnosis.ts:55-63; not redeclared. */
+    cardType: CardType;
+    /**
+     * Per-call USD cost captured from `LLMStructuredCompletion.cost`.
+     * `undefined` for deterministic cards (area-summary, failure-mode-summary,
+     * no-issues) which make no LLM call.
+     */
+    cost?: number;
+    /**
+     * Whether the card's Zod schema parse failed (produces a degraded card).
+     * Used for the 7-day rolling parse-failure rate in BigQuery (D6-15).
+     */
+    parseFailed: boolean;
+    /**
+     * End-to-end latency for the LLM call in milliseconds.
+     * `undefined` for deterministic cards.
+     */
+    latencyMs?: number;
+    /**
+     * Prompt tokens consumed by the LLM call.
+     * `undefined` for deterministic cards.
+     */
+    tokenInput?: number;
+    /**
+     * Completion tokens produced by the LLM call.
+     * `undefined` for deterministic cards.
+     */
+    tokenOutput?: number;
+    /** Per-card version string (e.g. `"top-recommendations@0.1.0"`). */
+    cardVersion: string;
+    /** ISO 8601 UTC timestamp when this card was generated. */
+    generatedAt: string;
+}
+/**
+ * Aggregate synthesis cost telemetry for a single Diagnosis run.
+ * Lands on the `ailf.report` Sanity doc under `summary.synthesis.diagnosis.*`
+ * (D6-09 / D6-10: parallel to `summary.overall.cost` — not additive).
+ *
+ * Written by the post-run hook (D6-08); not written by standalone
+ * `ailf interpret`.
+ *
+ * Field set matches the 4 top-level D6-09 attribute paths:
+ * `summary.synthesis.diagnosis.cost`,
+ * `summary.synthesis.diagnosis.parseFailureCount`,
+ * `summary.synthesis.diagnosis.parseFailureRate`,
+ * `summary.synthesis.diagnosis.perCard`.
+ */
+export interface SynthesisCostTelemetry {
+    /**
+     * Total USD cost across all LLM cards in this Diagnosis run.
+     * Roll-up: `sum(perCard[].cost ?? 0)` for ready + degraded cards.
+     * Missing cards contribute 0.
+     */
+    cost: number;
+    /**
+     * Number of cards whose Zod parse failed in this Diagnosis run.
+     * Counted across all 8 card types (including deterministic cards;
+     * a deterministic-card parse failure indicates a code bug).
+     */
+    parseFailureCount: number;
+    /**
+     * Parse-failure rate: `parseFailureCount / 8` (8 = fixed card registry size).
+     * Range: 0–1. Used as the denominator for the D6-15 BigQuery 7-day
+     * rolling rate view (`synthesis_parse_failure_rate_7d.sql`).
+     */
+    parseFailureRate: number;
+    /** Per-card telemetry rows — one entry per card in registry-order. */
+    perCard: SynthesisPerCardTelemetry[];
+}