npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.6 → 2.0.0-alpha.8 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.6 → 2.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.d.mts +59 -3
package/dist/index.d.ts +59 -3
package/dist/index.js +77 -33
package/dist/index.mjs +77 -33
package/dist/{profiles-CQnLkQ7b.d.ts → profiles-B3eNQ2py.d.ts} +8 -0
package/dist/{profiles-zm6diETo.d.mts → profiles-Py8c7zjJ.d.mts} +8 -0
package/dist/profiles.d.mts +1 -1
package/dist/profiles.d.ts +1 -1
package/package.json +1 -1

package/dist/index.d.mts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-zm6diETo.mjs';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-Py8c7zjJ.mjs';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
 /**
@@ -19,6 +19,15 @@ interface CompileOptions {
     toolRelevanceThreshold?: number;
     /** History compression — turns count threshold (default 8). */
     compressHistoryAfter?: number;
+    /**
+     * History compression — token threshold (alpha.7). When total history
+     * tokens exceed this AND there are more recent turns to keep, compress
+     * even when count threshold is below `compressHistoryAfter`. Catches
+     * fat-message bloat (tool-using agents pack many tool-call/result pairs
+     * into single assistant messages — count stays low, tokens explode).
+     * Default undefined (disabled — backward-compatible).
+     */
+    compressHistoryAboveTokens?: number;
     /**
      * Consumer-declared policy. Filters blocked models, enforces cost
      * ceiling, boosts preferred. See CompilePolicy in ir.ts.
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
  * network error is swallowed/forwarded to onError.
  */
 declare function record(input: RecordInput): Promise<void>;
+/**
+ * Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
+ *
+ * Exported so consumer proxies can `import { OutcomePayload } from
+ * '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
+ * TypeScript catches future schema additions (cache fields, advisory
+ * telemetry, etc.) at consumer build time, not silently at runtime.
+ *
+ * **Forward-compat rule:** consumer proxies should pass the body through to
+ * Supabase rather than reconstructing field-by-field. The recommended shape
+ * is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
+ * .insert(body)` directly). Filtering proxies break schema evolution
+ * silently — see s17 root-cause investigation 2026-05-10.
+ */
+interface OutcomePayload {
+    handle: string;
+    app_id?: string;
+    intent_archetype?: string;
+    /** The model that ACTUALLY RAN (post-fallback). */
+    model?: string;
+    /** The model v2 compile() originally targeted. NULL when no fallback. */
+    requested_model?: string;
+    provider?: string;
+    shape_key?: string;
+    learning_key?: string;
+    mutations_applied: string[];
+    tokens_in: number;
+    tokens_out: number;
+    estimated_tokens_in?: number;
+    latency_ms: number;
+    success: boolean;
+    empty_response: boolean;
+    error_type?: string;
+    tools_called?: string[];
+    oracle_score?: number;
+    oracle_dimensions?: Record<string, number>;
+    oracle_rationale?: string;
+    prompt_preview?: string;
+    response_preview?: string;
+    dialect_version: string;
+    cache_read_input_tokens?: number;
+    cache_creation_input_tokens?: number;
+    cost_usd_actual?: number;
+    ttft_ms?: number;
+    history_cacheable_tokens?: number;
+    history_tokens_at_compile?: number;
+}
 /**
  * Oracle contract — how an app tells the brain whether a response was good.
@@ -270,4 +326,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
  */
 declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
-export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
+export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-CQnLkQ7b.js';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-B3eNQ2py.js';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
 /**
@@ -19,6 +19,15 @@ interface CompileOptions {
     toolRelevanceThreshold?: number;
     /** History compression — turns count threshold (default 8). */
     compressHistoryAfter?: number;
+    /**
+     * History compression — token threshold (alpha.7). When total history
+     * tokens exceed this AND there are more recent turns to keep, compress
+     * even when count threshold is below `compressHistoryAfter`. Catches
+     * fat-message bloat (tool-using agents pack many tool-call/result pairs
+     * into single assistant messages — count stays low, tokens explode).
+     * Default undefined (disabled — backward-compatible).
+     */
+    compressHistoryAboveTokens?: number;
     /**
      * Consumer-declared policy. Filters blocked models, enforces cost
      * ceiling, boosts preferred. See CompilePolicy in ir.ts.
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
  * network error is swallowed/forwarded to onError.
  */
 declare function record(input: RecordInput): Promise<void>;
+/**
+ * Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
+ *
+ * Exported so consumer proxies can `import { OutcomePayload } from
+ * '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
+ * TypeScript catches future schema additions (cache fields, advisory
+ * telemetry, etc.) at consumer build time, not silently at runtime.
+ *
+ * **Forward-compat rule:** consumer proxies should pass the body through to
+ * Supabase rather than reconstructing field-by-field. The recommended shape
+ * is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
+ * .insert(body)` directly). Filtering proxies break schema evolution
+ * silently — see s17 root-cause investigation 2026-05-10.
+ */
+interface OutcomePayload {
+    handle: string;
+    app_id?: string;
+    intent_archetype?: string;
+    /** The model that ACTUALLY RAN (post-fallback). */
+    model?: string;
+    /** The model v2 compile() originally targeted. NULL when no fallback. */
+    requested_model?: string;
+    provider?: string;
+    shape_key?: string;
+    learning_key?: string;
+    mutations_applied: string[];
+    tokens_in: number;
+    tokens_out: number;
+    estimated_tokens_in?: number;
+    latency_ms: number;
+    success: boolean;
+    empty_response: boolean;
+    error_type?: string;
+    tools_called?: string[];
+    oracle_score?: number;
+    oracle_dimensions?: Record<string, number>;
+    oracle_rationale?: string;
+    prompt_preview?: string;
+    response_preview?: string;
+    dialect_version: string;
+    cache_read_input_tokens?: number;
+    cache_creation_input_tokens?: number;
+    cost_usd_actual?: number;
+    ttft_ms?: number;
+    history_cacheable_tokens?: number;
+    history_tokens_at_compile?: number;
+}
 /**
  * Oracle contract — how an app tells the brain whether a response was good.
@@ -270,4 +326,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
  */
 declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
-export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
+export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };

package/dist/index.js CHANGED Viewed

@@ -236,20 +236,37 @@ function passToolRelevance(ir, opts = {}) {
     ]
   };
 }
+function totalHistoryTokens(history) {
+  let total = 0;
+  for (const m of history) {
+    if (typeof m.content === "string") total += countTokens(m.content);
+  }
+  return total;
+}
 function passCompressHistory(ir, opts = {}) {
   const history = ir.history;
-  if (!history || history.length === 0) return { value: ir, mutations: [] };
+  if (!history || history.length === 0) {
+    return { value: ir, mutations: [], historyTokensTotal: 0 };
+  }
   const keepRecent = opts.keepRecent ?? 4;
   const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
-  if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
+  const summarizeAboveTokens = opts.summarizeAboveTokens;
+  const historyTokensTotal = totalHistoryTokens(history);
+  const countThresholdHit = history.length > summarizeOlderThan;
+  const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
+  if (!countThresholdHit && !tokenThresholdHit) {
+    return { value: ir, mutations: [], historyTokensTotal };
+  }
   const cutIndex = history.length - keepRecent;
   const old = history.slice(0, cutIndex);
   const recent = history.slice(cutIndex);
   const userTurns = old.filter((m) => m.role === "user");
   const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
+  const oldTokens = totalHistoryTokens(old);
+  const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
   const summary = {
     role: "system",
-    content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
+    content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
   };
   return {
     value: { ...ir, history: [summary, ...recent] },
@@ -258,9 +275,10 @@ function passCompressHistory(ir, opts = {}) {
         id: `compress-history-${old.length}`,
         source: "static_pass",
         passName: "compress_history",
-        description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
+        description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
       }
-    ]
+    ],
+    historyTokensTotal
   };
 }
 function passApplyCliffs(ir, profile, estimatedInputTokens) {
@@ -506,7 +524,11 @@ function lowerAnthropic(ir, profile, hints) {
       system: systemBlocks,
       messages,
       tools,
-      max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
+      // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
+      // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
+      // Profile is the single source of truth; consumers wanting a tighter
+      // budget can pass providerOverrides.anthropic.max_tokens explicitly.
+      max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
     },
     diagnostics: {
       cacheableTokens,
@@ -1280,7 +1302,8 @@ function compile(ir, opts = {}) {
     threshold: opts.toolRelevanceThreshold
   });
   const compressed = passCompressHistory(toolFiltered.value, {
-    summarizeOlderThan: opts.compressHistoryAfter
+    summarizeOlderThan: opts.compressHistoryAfter,
+    summarizeAboveTokens: opts.compressHistoryAboveTokens
   });
   let workingIR = compressed.value;
   const accumulatedMutations = [
@@ -1324,7 +1347,8 @@ function compile(ir, opts = {}) {
     historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
     cacheableTokens: lowered.diagnostics.cacheableTokens,
     estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
-    historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
+    historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
+    historyTokensTotal: compressed.historyTokensTotal
   };
   const advisories = runAdvisor(
     ir,
@@ -1431,7 +1455,8 @@ function registerCompile(appId, archetype, ir, result) {
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
     startedAt: Date.now(),
-    historyCacheableTokens: result.diagnostics.historyCacheableTokens
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens,
+    historyTokensTotal: result.diagnostics.historyTokensTotal
   });
 }
 async function record(input) {
@@ -1505,7 +1530,8 @@ function buildPayload(input, reg) {
     cache_creation_input_tokens: input.cacheCreationInputTokens,
     cost_usd_actual: costUsdActual,
     ttft_ms: input.ttftMs,
-    history_cacheable_tokens: reg?.historyCacheableTokens
+    history_cacheable_tokens: reg?.historyCacheableTokens,
+    history_tokens_at_compile: reg?.historyTokensTotal
   };
 }
 function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1784,31 +1810,31 @@ async function call(ir, opts = {}) {
       fetchImpl: opts.fetchImpl,
       providerOverrides: opts.providerOverrides
     });
-    if (exec.ok) {
+    const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
+    if (validated.ok) {
       attempts.push({ model: targetModel, status: "success" });
       const latencyMs2 = Date.now() - start;
-      const responseWithStructured = withStructuredOutput(exec.response, ir);
       await record({
         handle: initial.handle,
-        tokensIn: responseWithStructured.tokens.input,
-        tokensOut: responseWithStructured.tokens.output,
+        tokensIn: validated.response.tokens.input,
+        tokensOut: validated.response.tokens.output,
         latencyMs: latencyMs2,
         success: true,
-        emptyResponse: responseWithStructured.tokens.output === 0,
-        toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
+        emptyResponse: validated.response.tokens.output === 0,
+        toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
         actualModel: targetModel !== initial.target ? targetModel : void 0,
         mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
         promptPreview: extractPromptPreview(ir),
-        responsePreview: responseWithStructured.text.slice(0, 200),
-        cacheReadInputTokens: responseWithStructured.tokens.cached,
-        cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
+        responsePreview: validated.response.text.slice(0, 200),
+        cacheReadInputTokens: validated.response.tokens.cached,
+        cacheCreationInputTokens: validated.response.tokens.cacheCreated
       });
       return {
         handle: initial.handle,
         actualModel: targetModel,
         requestedModel: initial.target,
         provider: activeCompile.provider,
-        response: responseWithStructured,
+        response: validated.response,
         latencyMs: latencyMs2,
         mutationsApplied: activeCompile.mutationsApplied,
         attempts
@@ -1816,12 +1842,12 @@ async function call(ir, opts = {}) {
     }
     attempts.push({
       model: targetModel,
-      status: exec.errorType,
-      errorCode: exec.errorCode,
-      message: exec.message
+      status: validated.errorType,
+      errorCode: validated.errorCode,
+      message: validated.message
     });
-    lastErr = exec;
-    if (exec.errorType === "terminal" || opts.noFallback) {
+    lastErr = validated;
+    if (validated.errorType === "terminal" || opts.noFallback) {
       break;
     }
   }
@@ -1858,17 +1884,35 @@ function extractPromptPreview(ir) {
   if (lastHist) return lastHist.slice(0, 200);
   return void 0;
 }
-function withStructuredOutput(response, ir) {
-  if (!ir.constraints?.structuredOutput) return response;
-  if (!response.text) return response;
+function validateStructuredContract(exec, ir) {
+  if (!ir.constraints?.structuredOutput) {
+    return { ok: true, response: exec.response };
+  }
+  const finish = (exec.response.finishReason ?? "").toLowerCase();
+  if (finish === "max_tokens" || finish === "length") {
+    return {
+      ok: false,
+      status: exec.status,
+      errorType: "retryable",
+      errorCode: "max_tokens_on_structured_output",
+      message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
+      raw: exec.response.raw
+    };
+  }
+  if (!exec.response.text) {
+    return { ok: true, response: exec.response };
+  }
   try {
-    const parsed = JSON.parse(response.text);
-    return { ...response, structuredOutput: parsed };
+    const parsed = JSON.parse(exec.response.text);
+    return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
   } catch (err) {
     return {
-      ...response,
-      structuredOutput: null,
-      parseError: err instanceof Error ? err.message : String(err)
+      ok: false,
+      status: exec.status,
+      errorType: "retryable",
+      errorCode: "structured_output_parse_failed",
+      message: err instanceof Error ? err.message : String(err),
+      raw: exec.response.raw
     };
   }
 }

package/dist/index.mjs CHANGED Viewed

@@ -120,20 +120,37 @@ function passToolRelevance(ir, opts = {}) {
     ]
   };
 }
+function totalHistoryTokens(history) {
+  let total = 0;
+  for (const m of history) {
+    if (typeof m.content === "string") total += countTokens(m.content);
+  }
+  return total;
+}
 function passCompressHistory(ir, opts = {}) {
   const history = ir.history;
-  if (!history || history.length === 0) return { value: ir, mutations: [] };
+  if (!history || history.length === 0) {
+    return { value: ir, mutations: [], historyTokensTotal: 0 };
+  }
   const keepRecent = opts.keepRecent ?? 4;
   const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
-  if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
+  const summarizeAboveTokens = opts.summarizeAboveTokens;
+  const historyTokensTotal = totalHistoryTokens(history);
+  const countThresholdHit = history.length > summarizeOlderThan;
+  const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
+  if (!countThresholdHit && !tokenThresholdHit) {
+    return { value: ir, mutations: [], historyTokensTotal };
+  }
   const cutIndex = history.length - keepRecent;
   const old = history.slice(0, cutIndex);
   const recent = history.slice(cutIndex);
   const userTurns = old.filter((m) => m.role === "user");
   const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
+  const oldTokens = totalHistoryTokens(old);
+  const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
   const summary = {
     role: "system",
-    content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
+    content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
   };
   return {
     value: { ...ir, history: [summary, ...recent] },
@@ -142,9 +159,10 @@ function passCompressHistory(ir, opts = {}) {
         id: `compress-history-${old.length}`,
         source: "static_pass",
         passName: "compress_history",
-        description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
+        description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
       }
-    ]
+    ],
+    historyTokensTotal
   };
 }
 function passApplyCliffs(ir, profile, estimatedInputTokens) {
@@ -390,7 +408,11 @@ function lowerAnthropic(ir, profile, hints) {
       system: systemBlocks,
       messages,
       tools,
-      max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
+      // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
+      // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
+      // Profile is the single source of truth; consumers wanting a tighter
+      // budget can pass providerOverrides.anthropic.max_tokens explicitly.
+      max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
     },
     diagnostics: {
       cacheableTokens,
@@ -762,7 +784,8 @@ function compile(ir, opts = {}) {
     threshold: opts.toolRelevanceThreshold
   });
   const compressed = passCompressHistory(toolFiltered.value, {
-    summarizeOlderThan: opts.compressHistoryAfter
+    summarizeOlderThan: opts.compressHistoryAfter,
+    summarizeAboveTokens: opts.compressHistoryAboveTokens
   });
   let workingIR = compressed.value;
   const accumulatedMutations = [
@@ -806,7 +829,8 @@ function compile(ir, opts = {}) {
     historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
     cacheableTokens: lowered.diagnostics.cacheableTokens,
     estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
-    historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
+    historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
+    historyTokensTotal: compressed.historyTokensTotal
   };
   const advisories = runAdvisor(
     ir,
@@ -913,7 +937,8 @@ function registerCompile(appId, archetype, ir, result) {
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
     startedAt: Date.now(),
-    historyCacheableTokens: result.diagnostics.historyCacheableTokens
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens,
+    historyTokensTotal: result.diagnostics.historyTokensTotal
   });
 }
 async function record(input) {
@@ -987,7 +1012,8 @@ function buildPayload(input, reg) {
     cache_creation_input_tokens: input.cacheCreationInputTokens,
     cost_usd_actual: costUsdActual,
     ttft_ms: input.ttftMs,
-    history_cacheable_tokens: reg?.historyCacheableTokens
+    history_cacheable_tokens: reg?.historyCacheableTokens,
+    history_tokens_at_compile: reg?.historyTokensTotal
   };
 }
 function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1266,31 +1292,31 @@ async function call(ir, opts = {}) {
       fetchImpl: opts.fetchImpl,
       providerOverrides: opts.providerOverrides
     });
-    if (exec.ok) {
+    const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
+    if (validated.ok) {
       attempts.push({ model: targetModel, status: "success" });
       const latencyMs2 = Date.now() - start;
-      const responseWithStructured = withStructuredOutput(exec.response, ir);
       await record({
         handle: initial.handle,
-        tokensIn: responseWithStructured.tokens.input,
-        tokensOut: responseWithStructured.tokens.output,
+        tokensIn: validated.response.tokens.input,
+        tokensOut: validated.response.tokens.output,
         latencyMs: latencyMs2,
         success: true,
-        emptyResponse: responseWithStructured.tokens.output === 0,
-        toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
+        emptyResponse: validated.response.tokens.output === 0,
+        toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
         actualModel: targetModel !== initial.target ? targetModel : void 0,
         mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
         promptPreview: extractPromptPreview(ir),
-        responsePreview: responseWithStructured.text.slice(0, 200),
-        cacheReadInputTokens: responseWithStructured.tokens.cached,
-        cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
+        responsePreview: validated.response.text.slice(0, 200),
+        cacheReadInputTokens: validated.response.tokens.cached,
+        cacheCreationInputTokens: validated.response.tokens.cacheCreated
       });
       return {
         handle: initial.handle,
         actualModel: targetModel,
         requestedModel: initial.target,
         provider: activeCompile.provider,
-        response: responseWithStructured,
+        response: validated.response,
         latencyMs: latencyMs2,
         mutationsApplied: activeCompile.mutationsApplied,
         attempts
@@ -1298,12 +1324,12 @@ async function call(ir, opts = {}) {
     }
     attempts.push({
       model: targetModel,
-      status: exec.errorType,
-      errorCode: exec.errorCode,
-      message: exec.message
+      status: validated.errorType,
+      errorCode: validated.errorCode,
+      message: validated.message
     });
-    lastErr = exec;
-    if (exec.errorType === "terminal" || opts.noFallback) {
+    lastErr = validated;
+    if (validated.errorType === "terminal" || opts.noFallback) {
       break;
     }
   }
@@ -1340,17 +1366,35 @@ function extractPromptPreview(ir) {
   if (lastHist) return lastHist.slice(0, 200);
   return void 0;
 }
-function withStructuredOutput(response, ir) {
-  if (!ir.constraints?.structuredOutput) return response;
-  if (!response.text) return response;
+function validateStructuredContract(exec, ir) {
+  if (!ir.constraints?.structuredOutput) {
+    return { ok: true, response: exec.response };
+  }
+  const finish = (exec.response.finishReason ?? "").toLowerCase();
+  if (finish === "max_tokens" || finish === "length") {
+    return {
+      ok: false,
+      status: exec.status,
+      errorType: "retryable",
+      errorCode: "max_tokens_on_structured_output",
+      message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
+      raw: exec.response.raw
+    };
+  }
+  if (!exec.response.text) {
+    return { ok: true, response: exec.response };
+  }
   try {
-    const parsed = JSON.parse(response.text);
-    return { ...response, structuredOutput: parsed };
+    const parsed = JSON.parse(exec.response.text);
+    return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
   } catch (err) {
     return {
-      ...response,
-      structuredOutput: null,
-      parseError: err instanceof Error ? err.message : String(err)
+      ok: false,
+      status: exec.status,
+      errorType: "retryable",
+      errorCode: "structured_output_parse_failed",
+      message: err instanceof Error ? err.message : String(err),
+      raw: exec.response.raw
     };
   }
 }

package/dist/{profiles-CQnLkQ7b.d.ts → profiles-B3eNQ2py.d.ts} RENAMED Viewed

@@ -330,6 +330,14 @@ interface CompileResult {
          * from history caching. alpha.5.
          */
         historyCacheableTokens: number;
+        /**
+         * Total tokens in input `history` (pre-compression). Computed regardless
+         * of whether `passCompressHistory` fired — surfaces how close a tuple is
+         * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
+         * watchers can see the bloat axis the count-based threshold misses.
+         * 0 when history is empty. alpha.7.
+         */
+        historyTokensTotal: number;
     };
 }
 /**

package/dist/{profiles-zm6diETo.d.mts → profiles-Py8c7zjJ.d.mts} RENAMED Viewed

@@ -330,6 +330,14 @@ interface CompileResult {
          * from history caching. alpha.5.
          */
         historyCacheableTokens: number;
+        /**
+         * Total tokens in input `history` (pre-compression). Computed regardless
+         * of whether `passCompressHistory` fired — surfaces how close a tuple is
+         * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
+         * watchers can see the bloat axis the count-based threshold misses.
+         * 0 when history is empty. alpha.7.
+         */
+        historyTokensTotal: number;
     };
 }
 /**

package/dist/profiles.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
 import './dialect.mjs';

package/dist/profiles.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
 import './dialect.js';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@warmdrift/kgauto-compiler",
-  "version": "2.0.0-alpha.6",
+  "version": "2.0.0-alpha.8",
   "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",