npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.3 → 2.0.0-alpha.5 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +3 -3
package/dist/index.d.mts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +116 -15
package/dist/index.mjs +116 -15
package/dist/{profiles-BiyrF36f.d.mts → profiles-DHdCRBVH.d.mts} +82 -0
package/dist/{profiles-C5lVqF8_.d.ts → profiles-MGq5Tnjv.d.ts} +82 -0
package/dist/profiles.d.mts +1 -1
package/dist/profiles.d.ts +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# @warmdrift/kgauto-compiler — v2.0.0-alpha.3
+# @warmdrift/kgauto-compiler — v2.0.0-alpha.5
 > Prompt compiler + central learning brain for multi-model AI apps.
 > **Swap models without rewriting prompts.**
@@ -18,8 +18,8 @@ mutations.
 - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
   the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
   v1 is fully retired from production.
-- **Tests:** 132/132 passing
-- **Build:** clean (43KB ESM, 60KB CJS)
+- **Tests:** 180/180 passing
+- **Build:** clean (47KB ESM, 64KB CJS)
 - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
   awaiting dedicated Supabase provisioning.
 - **Mutation engine:** v2.1 (after enough outcome data accumulates).

package/dist/index.d.mts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-BiyrF36f.mjs';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BiyrF36f.mjs';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-DHdCRBVH.mjs';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
 /**

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-C5lVqF8_.js';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-C5lVqF8_.js';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-MGq5Tnjv.js';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
 /**

package/dist/index.js CHANGED Viewed

@@ -489,10 +489,15 @@ function lower(ir, profile, hints = {}) {
 }
 function lowerAnthropic(ir, profile, hints) {
   const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
-  const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
+  const history = ir.history ?? [];
+  const policy = ir.historyCachePolicy;
+  const markIndex = resolveHistoryMarkIndex(history.length, policy);
+  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
   const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
   const cacheableTokens = computeCacheableTokens(systemBlocks);
-  const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
+  const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
+  const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
+  const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
   return {
     request: {
       provider: "anthropic",
@@ -504,6 +509,7 @@ function lowerAnthropic(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -536,17 +542,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
   }
   return blocks;
 }
-function buildAnthropicMessages(history, currentTurn) {
+function buildAnthropicMessages(history, currentTurn, markIndex) {
   const out = [];
-  for (const m of history) {
+  for (let i = 0; i < history.length; i++) {
+    const m = history[i];
     if (m.role === "system") continue;
-    out.push({ role: m.role, content: m.parts ?? m.content });
+    const shouldMark = i === markIndex;
+    out.push({
+      role: m.role,
+      content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
+    });
   }
   if (currentTurn && currentTurn.role !== "system") {
     out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
   }
   return out;
 }
+function attachAnthropicCacheControl(m) {
+  if (Array.isArray(m.parts) && m.parts.length > 0) {
+    const blocks = m.parts;
+    const last = blocks[blocks.length - 1];
+    const withMarker = {
+      ...last,
+      cache_control: { type: "ephemeral" }
+    };
+    return [...blocks.slice(0, -1), withMarker];
+  }
+  return [
+    {
+      type: "text",
+      text: m.content,
+      cache_control: { type: "ephemeral" }
+    }
+  ];
+}
+function resolveHistoryMarkIndex(historyLen, policy) {
+  if (!policy || policy.strategy === "none") return -1;
+  if (historyLen === 0) return -1;
+  if (policy.strategy === "all-but-latest") {
+    return historyLen - 1;
+  }
+  const idx = historyLen - 1 - policy.suffix;
+  return idx >= 0 ? idx : -1;
+}
+function sumHistoryTokens(history, throughIndex) {
+  let total = 0;
+  for (let i = 0; i <= throughIndex && i < history.length; i++) {
+    const m = history[i];
+    if (m.role === "system") continue;
+    if (Array.isArray(m.parts)) {
+      for (const p of m.parts) {
+        if (typeof p.text === "string") total += countTokens(p.text);
+      }
+    } else if (typeof m.content === "string") {
+      total += countTokens(m.content);
+    }
+  }
+  return total;
+}
 function toAnthropicTools(tools) {
   return tools.map((t) => ({
     name: t.name,
@@ -581,6 +634,9 @@ function lowerGoogle(ir, profile, hints) {
   const minTokens = profile.lowering.cache.minTokens ?? 4096;
   const meetsMin = cacheableTokens >= minTokens;
   const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "google",
@@ -592,6 +648,7 @@ function lowerGoogle(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens: meetsMin ? cacheableTokens : 0,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -639,6 +696,9 @@ function lowerOpenAI(ir, profile, hints) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "openai",
@@ -648,7 +708,11 @@ function lowerOpenAI(ir, profile, hints) {
       response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
       reasoning_effort: hints.forceTerseOutput ? "low" : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function toOpenAITools(tools) {
@@ -675,6 +739,9 @@ function lowerDeepSeek(ir, profile) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "deepseek",
@@ -689,7 +756,11 @@ function lowerDeepSeek(ir, profile) {
         }
       })) : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function sortSections(sections) {
@@ -1181,7 +1252,8 @@ function compile(ir, opts = {}) {
       historyKept: workingIR.history?.length ?? 0,
       historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
       cacheableTokens: lowered.diagnostics.cacheableTokens,
-      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
+      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
+      historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
     }
   };
 }
@@ -1266,7 +1338,8 @@ function registerCompile(appId, archetype, ir, result) {
     learningKey: learningKey(archetype, result.target, shape),
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
-    startedAt: Date.now()
+    startedAt: Date.now(),
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens
   });
 }
 async function record(input) {
@@ -1309,6 +1382,9 @@ function buildPayload(input, reg) {
   const compileTarget = reg?.model;
   const actual = input.actualModel ?? compileTarget;
   const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
+  const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
+  const costModel = actual;
+  const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
   return {
     handle: input.handle,
     app_id: reg?.appId,
@@ -1318,7 +1394,7 @@ function buildPayload(input, reg) {
     provider: reg?.provider,
     shape_key: reg?.shapeKey,
     learning_key: reg?.learningKey,
-    mutations_applied: reg?.mutationsApplied ?? [],
+    mutations_applied: mutationsApplied,
     tokens_in: input.tokensIn,
     tokens_out: input.tokensOut,
     estimated_tokens_in: reg?.estimatedTokensIn,
@@ -1332,9 +1408,22 @@ function buildPayload(input, reg) {
     oracle_rationale: input.oracleScore?.rationale,
     prompt_preview: input.promptPreview,
     response_preview: input.responsePreview,
-    dialect_version: "v1"
+    dialect_version: "v1",
+    cache_read_input_tokens: input.cacheReadInputTokens,
+    cache_creation_input_tokens: input.cacheCreationInputTokens,
+    cost_usd_actual: costUsdActual,
+    ttft_ms: input.ttftMs,
+    history_cacheable_tokens: reg?.historyCacheableTokens
   };
 }
+function computeCostUsd(modelId, tokensIn, tokensOut) {
+  if (tokensIn === 0 && tokensOut === 0) return void 0;
+  const profile = tryGetProfile(modelId);
+  if (!profile) return void 0;
+  const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
+  const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
+  return Math.round((inUsd + outUsd) * 1e6) / 1e6;
+}
 // src/ir.ts
 var CallError = class extends Error {
@@ -1607,7 +1696,7 @@ async function call(ir, opts = {}) {
       attempts.push({ model: targetModel, status: "success" });
       const latencyMs2 = Date.now() - start;
       const responseWithStructured = withStructuredOutput(exec.response, ir);
-      void record({
+      await record({
         handle: initial.handle,
         tokensIn: responseWithStructured.tokens.input,
         tokensOut: responseWithStructured.tokens.output,
@@ -1616,7 +1705,11 @@ async function call(ir, opts = {}) {
         emptyResponse: responseWithStructured.tokens.output === 0,
         toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
         actualModel: targetModel !== initial.target ? targetModel : void 0,
-        responsePreview: responseWithStructured.text.slice(0, 200)
+        mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
+        promptPreview: extractPromptPreview(ir),
+        responsePreview: responseWithStructured.text.slice(0, 200),
+        cacheReadInputTokens: responseWithStructured.tokens.cached,
+        cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
       });
       return {
         handle: initial.handle,
@@ -1641,13 +1734,14 @@ async function call(ir, opts = {}) {
     }
   }
   const latencyMs = Date.now() - start;
-  void record({
+  await record({
     handle: initial.handle,
     tokensIn: 0,
     tokensOut: 0,
     latencyMs,
     success: false,
-    errorType: lastErr?.errorCode
+    errorType: lastErr?.errorCode,
+    promptPreview: extractPromptPreview(ir)
   });
   throw new CallError(
     `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
@@ -1665,6 +1759,13 @@ function compileAndRegister(ir, opts) {
   registerCompile(ir.appId, ir.intent.archetype, ir, result);
   return result;
 }
+function extractPromptPreview(ir) {
+  const turn = ir.currentTurn?.content;
+  if (turn) return turn.slice(0, 200);
+  const lastHist = ir.history?.[ir.history.length - 1]?.content;
+  if (lastHist) return lastHist.slice(0, 200);
+  return void 0;
+}
 function withStructuredOutput(response, ir) {
   if (!ir.constraints?.structuredOutput) return response;
   if (!response.text) return response;

package/dist/index.mjs CHANGED Viewed

@@ -374,10 +374,15 @@ function lower(ir, profile, hints = {}) {
 }
 function lowerAnthropic(ir, profile, hints) {
   const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
-  const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
+  const history = ir.history ?? [];
+  const policy = ir.historyCachePolicy;
+  const markIndex = resolveHistoryMarkIndex(history.length, policy);
+  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
   const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
   const cacheableTokens = computeCacheableTokens(systemBlocks);
-  const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
+  const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
+  const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
+  const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
   return {
     request: {
       provider: "anthropic",
@@ -389,6 +394,7 @@ function lowerAnthropic(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -421,17 +427,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
   }
   return blocks;
 }
-function buildAnthropicMessages(history, currentTurn) {
+function buildAnthropicMessages(history, currentTurn, markIndex) {
   const out = [];
-  for (const m of history) {
+  for (let i = 0; i < history.length; i++) {
+    const m = history[i];
     if (m.role === "system") continue;
-    out.push({ role: m.role, content: m.parts ?? m.content });
+    const shouldMark = i === markIndex;
+    out.push({
+      role: m.role,
+      content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
+    });
   }
   if (currentTurn && currentTurn.role !== "system") {
     out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
   }
   return out;
 }
+function attachAnthropicCacheControl(m) {
+  if (Array.isArray(m.parts) && m.parts.length > 0) {
+    const blocks = m.parts;
+    const last = blocks[blocks.length - 1];
+    const withMarker = {
+      ...last,
+      cache_control: { type: "ephemeral" }
+    };
+    return [...blocks.slice(0, -1), withMarker];
+  }
+  return [
+    {
+      type: "text",
+      text: m.content,
+      cache_control: { type: "ephemeral" }
+    }
+  ];
+}
+function resolveHistoryMarkIndex(historyLen, policy) {
+  if (!policy || policy.strategy === "none") return -1;
+  if (historyLen === 0) return -1;
+  if (policy.strategy === "all-but-latest") {
+    return historyLen - 1;
+  }
+  const idx = historyLen - 1 - policy.suffix;
+  return idx >= 0 ? idx : -1;
+}
+function sumHistoryTokens(history, throughIndex) {
+  let total = 0;
+  for (let i = 0; i <= throughIndex && i < history.length; i++) {
+    const m = history[i];
+    if (m.role === "system") continue;
+    if (Array.isArray(m.parts)) {
+      for (const p of m.parts) {
+        if (typeof p.text === "string") total += countTokens(p.text);
+      }
+    } else if (typeof m.content === "string") {
+      total += countTokens(m.content);
+    }
+  }
+  return total;
+}
 function toAnthropicTools(tools) {
   return tools.map((t) => ({
     name: t.name,
@@ -466,6 +519,9 @@ function lowerGoogle(ir, profile, hints) {
   const minTokens = profile.lowering.cache.minTokens ?? 4096;
   const meetsMin = cacheableTokens >= minTokens;
   const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "google",
@@ -477,6 +533,7 @@ function lowerGoogle(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens: meetsMin ? cacheableTokens : 0,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -524,6 +581,9 @@ function lowerOpenAI(ir, profile, hints) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "openai",
@@ -533,7 +593,11 @@ function lowerOpenAI(ir, profile, hints) {
       response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
       reasoning_effort: hints.forceTerseOutput ? "low" : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function toOpenAITools(tools) {
@@ -560,6 +624,9 @@ function lowerDeepSeek(ir, profile) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "deepseek",
@@ -574,7 +641,11 @@ function lowerDeepSeek(ir, profile) {
         }
       })) : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function sortSections(sections) {
@@ -664,7 +735,8 @@ function compile(ir, opts = {}) {
       historyKept: workingIR.history?.length ?? 0,
       historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
       cacheableTokens: lowered.diagnostics.cacheableTokens,
-      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
+      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
+      historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
     }
   };
 }
@@ -749,7 +821,8 @@ function registerCompile(appId, archetype, ir, result) {
     learningKey: learningKey(archetype, result.target, shape),
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
-    startedAt: Date.now()
+    startedAt: Date.now(),
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens
   });
 }
 async function record(input) {
@@ -792,6 +865,9 @@ function buildPayload(input, reg) {
   const compileTarget = reg?.model;
   const actual = input.actualModel ?? compileTarget;
   const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
+  const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
+  const costModel = actual;
+  const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
   return {
     handle: input.handle,
     app_id: reg?.appId,
@@ -801,7 +877,7 @@ function buildPayload(input, reg) {
     provider: reg?.provider,
     shape_key: reg?.shapeKey,
     learning_key: reg?.learningKey,
-    mutations_applied: reg?.mutationsApplied ?? [],
+    mutations_applied: mutationsApplied,
     tokens_in: input.tokensIn,
     tokens_out: input.tokensOut,
     estimated_tokens_in: reg?.estimatedTokensIn,
@@ -815,9 +891,22 @@ function buildPayload(input, reg) {
     oracle_rationale: input.oracleScore?.rationale,
     prompt_preview: input.promptPreview,
     response_preview: input.responsePreview,
-    dialect_version: "v1"
+    dialect_version: "v1",
+    cache_read_input_tokens: input.cacheReadInputTokens,
+    cache_creation_input_tokens: input.cacheCreationInputTokens,
+    cost_usd_actual: costUsdActual,
+    ttft_ms: input.ttftMs,
+    history_cacheable_tokens: reg?.historyCacheableTokens
   };
 }
+function computeCostUsd(modelId, tokensIn, tokensOut) {
+  if (tokensIn === 0 && tokensOut === 0) return void 0;
+  const profile = tryGetProfile(modelId);
+  if (!profile) return void 0;
+  const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
+  const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
+  return Math.round((inUsd + outUsd) * 1e6) / 1e6;
+}
 // src/ir.ts
 var CallError = class extends Error {
@@ -1090,7 +1179,7 @@ async function call(ir, opts = {}) {
       attempts.push({ model: targetModel, status: "success" });
       const latencyMs2 = Date.now() - start;
       const responseWithStructured = withStructuredOutput(exec.response, ir);
-      void record({
+      await record({
         handle: initial.handle,
         tokensIn: responseWithStructured.tokens.input,
         tokensOut: responseWithStructured.tokens.output,
@@ -1099,7 +1188,11 @@ async function call(ir, opts = {}) {
         emptyResponse: responseWithStructured.tokens.output === 0,
         toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
         actualModel: targetModel !== initial.target ? targetModel : void 0,
-        responsePreview: responseWithStructured.text.slice(0, 200)
+        mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
+        promptPreview: extractPromptPreview(ir),
+        responsePreview: responseWithStructured.text.slice(0, 200),
+        cacheReadInputTokens: responseWithStructured.tokens.cached,
+        cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
       });
       return {
         handle: initial.handle,
@@ -1124,13 +1217,14 @@ async function call(ir, opts = {}) {
     }
   }
   const latencyMs = Date.now() - start;
-  void record({
+  await record({
     handle: initial.handle,
     tokensIn: 0,
     tokensOut: 0,
     latencyMs,
     success: false,
-    errorType: lastErr?.errorCode
+    errorType: lastErr?.errorCode,
+    promptPreview: extractPromptPreview(ir)
   });
   throw new CallError(
     `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
@@ -1148,6 +1242,13 @@ function compileAndRegister(ir, opts) {
   registerCompile(ir.appId, ir.intent.archetype, ir, result);
   return result;
 }
+function extractPromptPreview(ir) {
+  const turn = ir.currentTurn?.content;
+  if (turn) return turn.slice(0, 200);
+  const lastHist = ir.history?.[ir.history.length - 1]?.content;
+  if (lastHist) return lastHist.slice(0, 200);
+  return void 0;
+}
 function withStructuredOutput(response, ir) {
   if (!ir.constraints?.structuredOutput) return response;
   if (!response.text) return response;

package/dist/{profiles-BiyrF36f.d.mts → profiles-DHdCRBVH.d.mts} RENAMED Viewed

@@ -91,6 +91,40 @@ interface Constraints {
     /** Override target model selection — if set, compiler uses this instead of routing. */
     forceModel?: string;
 }
+/**
+ * Cache marker policy for the messages array (history + currentTurn).
+ *
+ * Anthropic positional caching: a `cache_control` marker on a content block
+ * tells the API "remember the prefix up through this block." On a subsequent
+ * request whose first N tokens match, those N billed at the cached rate
+ * (10% of the input price). Without a marker, every call re-pays for the
+ * entire history.
+ *
+ * - `'none'` (default when omitted): no history cache marker. System-level
+ *   cache markers from `PromptSection.cacheable=true` still apply.
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
+ *   (the last history entry). On the next call, that entire history prefix
+ *   is cacheable. Good fit for chat/agent loops where every prior turn is
+ *   stable.
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
+ *   `history`. Use when the last few turns are volatile (e.g., scratchpad,
+ *   draft revisions) but the earlier prefix is stable.
+ *
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
+ * stable prefix is reused). The compiler still computes
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
+ *
+ * alpha.5.
+ */
+type HistoryCachePolicy = {
+    strategy: 'none';
+} | {
+    strategy: 'all-but-latest';
+} | {
+    strategy: 'fixed-suffix';
+    suffix: number;
+};
 /**
  * Consumer-declared policy for model selection. Lives outside the IR
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
     models: string[];
     /** Compile constraints. */
     constraints?: Constraints;
+    /**
+     * Cache marker placement policy for the messages array. Default = no
+     * history cache markers. See `HistoryCachePolicy` for semantics.
+     * alpha.5.
+     */
+    historyCachePolicy?: HistoryCachePolicy;
 }
 type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
 /**
@@ -240,6 +280,16 @@ interface CompileResult {
         historyDropped: number;
         cacheableTokens: number;
         estimatedCacheSavingsUsd: number;
+        /**
+         * Tokens in `history` (and `currentTurn` when before the marker) that
+         * fall within the cacheable prefix per `historyCachePolicy`. Always
+         * computed; only Anthropic actually emits a wire-format marker. For
+         * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
+         * prefix that implicit caching may pick up — useful telemetry for the
+         * brain to learn which (app, model, archetype) tuples benefit most
+         * from history caching. alpha.5.
+         */
+        historyCacheableTokens: number;
     };
 }
 /**
@@ -386,6 +436,38 @@ interface RecordInput {
      * the originally-requested model.
      */
     actualModel?: string;
+    /**
+     * Override `mutations_applied` for this outcome. Set by `call()` when
+     * fallback fires — the served compile's mutations (which actually shaped
+     * the request that went on the wire) replace the initial compile's
+     * mutations (registered against the handle). Without this override, fallback
+     * traffic is attributed to the initial compile's mutations and the brain's
+     * mutation effectiveness stats become misleading.
+     *
+     * alpha.4: extends s11 truth-in-logging to mutations.
+     */
+    mutationsApplied?: string[];
+    /**
+     * Cache read input tokens, when supported by the provider.
+     * - Anthropic: `usage.cache_read_input_tokens`
+     * - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
+     * - OpenAI: `usage.prompt_tokens_details.cached_tokens`
+     *
+     * Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
+     * `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
+     */
+    cacheReadInputTokens?: number;
+    /**
+     * Cache creation input tokens (Anthropic-specific).
+     * `usage.cache_creation_input_tokens`. The first call that pays the 25%
+     * upcharge to write a cache marker; subsequent calls hit `cacheRead`.
+     */
+    cacheCreationInputTokens?: number;
+    /**
+     * Time to first token (ms). Optional; populated when the provider/SDK
+     * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
+     */
+    ttftMs?: number;
 }
 /**

package/dist/{profiles-C5lVqF8_.d.ts → profiles-MGq5Tnjv.d.ts} RENAMED Viewed

@@ -91,6 +91,40 @@ interface Constraints {
     /** Override target model selection — if set, compiler uses this instead of routing. */
     forceModel?: string;
 }
+/**
+ * Cache marker policy for the messages array (history + currentTurn).
+ *
+ * Anthropic positional caching: a `cache_control` marker on a content block
+ * tells the API "remember the prefix up through this block." On a subsequent
+ * request whose first N tokens match, those N billed at the cached rate
+ * (10% of the input price). Without a marker, every call re-pays for the
+ * entire history.
+ *
+ * - `'none'` (default when omitted): no history cache marker. System-level
+ *   cache markers from `PromptSection.cacheable=true` still apply.
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
+ *   (the last history entry). On the next call, that entire history prefix
+ *   is cacheable. Good fit for chat/agent loops where every prior turn is
+ *   stable.
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
+ *   `history`. Use when the last few turns are volatile (e.g., scratchpad,
+ *   draft revisions) but the earlier prefix is stable.
+ *
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
+ * stable prefix is reused). The compiler still computes
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
+ *
+ * alpha.5.
+ */
+type HistoryCachePolicy = {
+    strategy: 'none';
+} | {
+    strategy: 'all-but-latest';
+} | {
+    strategy: 'fixed-suffix';
+    suffix: number;
+};
 /**
  * Consumer-declared policy for model selection. Lives outside the IR
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
     models: string[];
     /** Compile constraints. */
     constraints?: Constraints;
+    /**
+     * Cache marker placement policy for the messages array. Default = no
+     * history cache markers. See `HistoryCachePolicy` for semantics.
+     * alpha.5.
+     */
+    historyCachePolicy?: HistoryCachePolicy;
 }
 type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
 /**
@@ -240,6 +280,16 @@ interface CompileResult {
         historyDropped: number;
         cacheableTokens: number;
         estimatedCacheSavingsUsd: number;
+        /**
+         * Tokens in `history` (and `currentTurn` when before the marker) that
+         * fall within the cacheable prefix per `historyCachePolicy`. Always
+         * computed; only Anthropic actually emits a wire-format marker. For
+         * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
+         * prefix that implicit caching may pick up — useful telemetry for the
+         * brain to learn which (app, model, archetype) tuples benefit most
+         * from history caching. alpha.5.
+         */
+        historyCacheableTokens: number;
     };
 }
 /**
@@ -386,6 +436,38 @@ interface RecordInput {
      * the originally-requested model.
      */
     actualModel?: string;
+    /**
+     * Override `mutations_applied` for this outcome. Set by `call()` when
+     * fallback fires — the served compile's mutations (which actually shaped
+     * the request that went on the wire) replace the initial compile's
+     * mutations (registered against the handle). Without this override, fallback
+     * traffic is attributed to the initial compile's mutations and the brain's
+     * mutation effectiveness stats become misleading.
+     *
+     * alpha.4: extends s11 truth-in-logging to mutations.
+     */
+    mutationsApplied?: string[];
+    /**
+     * Cache read input tokens, when supported by the provider.
+     * - Anthropic: `usage.cache_read_input_tokens`
+     * - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
+     * - OpenAI: `usage.prompt_tokens_details.cached_tokens`
+     *
+     * Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
+     * `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
+     */
+    cacheReadInputTokens?: number;
+    /**
+     * Cache creation input tokens (Anthropic-specific).
+     * `usage.cache_creation_input_tokens`. The first call that pays the 25%
+     * upcharge to write a cache marker; subsequent calls hit `cacheRead`.
+     */
+    cacheCreationInputTokens?: number;
+    /**
+     * Time to first token (ms). Optional; populated when the provider/SDK
+     * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
+     */
+    ttftMs?: number;
 }
 /**

package/dist/profiles.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BiyrF36f.mjs';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
 import './dialect.mjs';

package/dist/profiles.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-C5lVqF8_.js';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
 import './dialect.js';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@warmdrift/kgauto-compiler",
-  "version": "2.0.0-alpha.3",
+  "version": "2.0.0-alpha.5",
   "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",