npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.4 → 2.0.0-alpha.5 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.4 → 2.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +3 -3
package/dist/index.d.mts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +84 -10
package/dist/index.mjs +84 -10
package/dist/{profiles-CH_nKPjp.d.mts → profiles-DHdCRBVH.d.mts} +50 -0
package/dist/{profiles-CDttLtaD.d.ts → profiles-MGq5Tnjv.d.ts} +50 -0
package/dist/profiles.d.mts +1 -1
package/dist/profiles.d.ts +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# @warmdrift/kgauto-compiler — v2.0.0-alpha.4
+# @warmdrift/kgauto-compiler — v2.0.0-alpha.5
 > Prompt compiler + central learning brain for multi-model AI apps.
 > **Swap models without rewriting prompts.**
@@ -18,8 +18,8 @@ mutations.
 - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
   the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
   v1 is fully retired from production.
-- **Tests:** 147/147 passing
-- **Build:** clean (43KB ESM, 60KB CJS)
+- **Tests:** 180/180 passing
+- **Build:** clean (47KB ESM, 64KB CJS)
 - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
   awaiting dedicated Supabase provisioning.
 - **Mutation engine:** v2.1 (after enough outcome data accumulates).

package/dist/index.d.mts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CH_nKPjp.mjs';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-DHdCRBVH.mjs';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
 /**

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CDttLtaD.js';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-MGq5Tnjv.js';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
 /**

package/dist/index.js CHANGED Viewed

@@ -489,10 +489,15 @@ function lower(ir, profile, hints = {}) {
 }
 function lowerAnthropic(ir, profile, hints) {
   const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
-  const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
+  const history = ir.history ?? [];
+  const policy = ir.historyCachePolicy;
+  const markIndex = resolveHistoryMarkIndex(history.length, policy);
+  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
   const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
   const cacheableTokens = computeCacheableTokens(systemBlocks);
-  const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
+  const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
+  const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
+  const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
   return {
     request: {
       provider: "anthropic",
@@ -504,6 +509,7 @@ function lowerAnthropic(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -536,17 +542,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
   }
   return blocks;
 }
-function buildAnthropicMessages(history, currentTurn) {
+function buildAnthropicMessages(history, currentTurn, markIndex) {
   const out = [];
-  for (const m of history) {
+  for (let i = 0; i < history.length; i++) {
+    const m = history[i];
     if (m.role === "system") continue;
-    out.push({ role: m.role, content: m.parts ?? m.content });
+    const shouldMark = i === markIndex;
+    out.push({
+      role: m.role,
+      content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
+    });
   }
   if (currentTurn && currentTurn.role !== "system") {
     out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
   }
   return out;
 }
+function attachAnthropicCacheControl(m) {
+  if (Array.isArray(m.parts) && m.parts.length > 0) {
+    const blocks = m.parts;
+    const last = blocks[blocks.length - 1];
+    const withMarker = {
+      ...last,
+      cache_control: { type: "ephemeral" }
+    };
+    return [...blocks.slice(0, -1), withMarker];
+  }
+  return [
+    {
+      type: "text",
+      text: m.content,
+      cache_control: { type: "ephemeral" }
+    }
+  ];
+}
+function resolveHistoryMarkIndex(historyLen, policy) {
+  if (!policy || policy.strategy === "none") return -1;
+  if (historyLen === 0) return -1;
+  if (policy.strategy === "all-but-latest") {
+    return historyLen - 1;
+  }
+  const idx = historyLen - 1 - policy.suffix;
+  return idx >= 0 ? idx : -1;
+}
+function sumHistoryTokens(history, throughIndex) {
+  let total = 0;
+  for (let i = 0; i <= throughIndex && i < history.length; i++) {
+    const m = history[i];
+    if (m.role === "system") continue;
+    if (Array.isArray(m.parts)) {
+      for (const p of m.parts) {
+        if (typeof p.text === "string") total += countTokens(p.text);
+      }
+    } else if (typeof m.content === "string") {
+      total += countTokens(m.content);
+    }
+  }
+  return total;
+}
 function toAnthropicTools(tools) {
   return tools.map((t) => ({
     name: t.name,
@@ -581,6 +634,9 @@ function lowerGoogle(ir, profile, hints) {
   const minTokens = profile.lowering.cache.minTokens ?? 4096;
   const meetsMin = cacheableTokens >= minTokens;
   const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "google",
@@ -592,6 +648,7 @@ function lowerGoogle(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens: meetsMin ? cacheableTokens : 0,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -639,6 +696,9 @@ function lowerOpenAI(ir, profile, hints) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "openai",
@@ -648,7 +708,11 @@ function lowerOpenAI(ir, profile, hints) {
       response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
       reasoning_effort: hints.forceTerseOutput ? "low" : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function toOpenAITools(tools) {
@@ -675,6 +739,9 @@ function lowerDeepSeek(ir, profile) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "deepseek",
@@ -689,7 +756,11 @@ function lowerDeepSeek(ir, profile) {
         }
       })) : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function sortSections(sections) {
@@ -1181,7 +1252,8 @@ function compile(ir, opts = {}) {
       historyKept: workingIR.history?.length ?? 0,
       historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
       cacheableTokens: lowered.diagnostics.cacheableTokens,
-      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
+      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
+      historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
     }
   };
 }
@@ -1266,7 +1338,8 @@ function registerCompile(appId, archetype, ir, result) {
     learningKey: learningKey(archetype, result.target, shape),
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
-    startedAt: Date.now()
+    startedAt: Date.now(),
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens
   });
 }
 async function record(input) {
@@ -1339,7 +1412,8 @@ function buildPayload(input, reg) {
     cache_read_input_tokens: input.cacheReadInputTokens,
     cache_creation_input_tokens: input.cacheCreationInputTokens,
     cost_usd_actual: costUsdActual,
-    ttft_ms: input.ttftMs
+    ttft_ms: input.ttftMs,
+    history_cacheable_tokens: reg?.historyCacheableTokens
   };
 }
 function computeCostUsd(modelId, tokensIn, tokensOut) {

package/dist/index.mjs CHANGED Viewed

@@ -374,10 +374,15 @@ function lower(ir, profile, hints = {}) {
 }
 function lowerAnthropic(ir, profile, hints) {
   const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
-  const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
+  const history = ir.history ?? [];
+  const policy = ir.historyCachePolicy;
+  const markIndex = resolveHistoryMarkIndex(history.length, policy);
+  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
   const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
   const cacheableTokens = computeCacheableTokens(systemBlocks);
-  const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
+  const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
+  const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
+  const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
   return {
     request: {
       provider: "anthropic",
@@ -389,6 +394,7 @@ function lowerAnthropic(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -421,17 +427,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
   }
   return blocks;
 }
-function buildAnthropicMessages(history, currentTurn) {
+function buildAnthropicMessages(history, currentTurn, markIndex) {
   const out = [];
-  for (const m of history) {
+  for (let i = 0; i < history.length; i++) {
+    const m = history[i];
     if (m.role === "system") continue;
-    out.push({ role: m.role, content: m.parts ?? m.content });
+    const shouldMark = i === markIndex;
+    out.push({
+      role: m.role,
+      content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
+    });
   }
   if (currentTurn && currentTurn.role !== "system") {
     out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
   }
   return out;
 }
+function attachAnthropicCacheControl(m) {
+  if (Array.isArray(m.parts) && m.parts.length > 0) {
+    const blocks = m.parts;
+    const last = blocks[blocks.length - 1];
+    const withMarker = {
+      ...last,
+      cache_control: { type: "ephemeral" }
+    };
+    return [...blocks.slice(0, -1), withMarker];
+  }
+  return [
+    {
+      type: "text",
+      text: m.content,
+      cache_control: { type: "ephemeral" }
+    }
+  ];
+}
+function resolveHistoryMarkIndex(historyLen, policy) {
+  if (!policy || policy.strategy === "none") return -1;
+  if (historyLen === 0) return -1;
+  if (policy.strategy === "all-but-latest") {
+    return historyLen - 1;
+  }
+  const idx = historyLen - 1 - policy.suffix;
+  return idx >= 0 ? idx : -1;
+}
+function sumHistoryTokens(history, throughIndex) {
+  let total = 0;
+  for (let i = 0; i <= throughIndex && i < history.length; i++) {
+    const m = history[i];
+    if (m.role === "system") continue;
+    if (Array.isArray(m.parts)) {
+      for (const p of m.parts) {
+        if (typeof p.text === "string") total += countTokens(p.text);
+      }
+    } else if (typeof m.content === "string") {
+      total += countTokens(m.content);
+    }
+  }
+  return total;
+}
 function toAnthropicTools(tools) {
   return tools.map((t) => ({
     name: t.name,
@@ -466,6 +519,9 @@ function lowerGoogle(ir, profile, hints) {
   const minTokens = profile.lowering.cache.minTokens ?? 4096;
   const meetsMin = cacheableTokens >= minTokens;
   const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "google",
@@ -477,6 +533,7 @@ function lowerGoogle(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens: meetsMin ? cacheableTokens : 0,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -524,6 +581,9 @@ function lowerOpenAI(ir, profile, hints) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "openai",
@@ -533,7 +593,11 @@ function lowerOpenAI(ir, profile, hints) {
       response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
       reasoning_effort: hints.forceTerseOutput ? "low" : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function toOpenAITools(tools) {
@@ -560,6 +624,9 @@ function lowerDeepSeek(ir, profile) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = ir.history ?? [];
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "deepseek",
@@ -574,7 +641,11 @@ function lowerDeepSeek(ir, profile) {
         }
       })) : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function sortSections(sections) {
@@ -664,7 +735,8 @@ function compile(ir, opts = {}) {
       historyKept: workingIR.history?.length ?? 0,
       historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
       cacheableTokens: lowered.diagnostics.cacheableTokens,
-      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
+      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
+      historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
     }
   };
 }
@@ -749,7 +821,8 @@ function registerCompile(appId, archetype, ir, result) {
     learningKey: learningKey(archetype, result.target, shape),
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
-    startedAt: Date.now()
+    startedAt: Date.now(),
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens
   });
 }
 async function record(input) {
@@ -822,7 +895,8 @@ function buildPayload(input, reg) {
     cache_read_input_tokens: input.cacheReadInputTokens,
     cache_creation_input_tokens: input.cacheCreationInputTokens,
     cost_usd_actual: costUsdActual,
-    ttft_ms: input.ttftMs
+    ttft_ms: input.ttftMs,
+    history_cacheable_tokens: reg?.historyCacheableTokens
   };
 }
 function computeCostUsd(modelId, tokensIn, tokensOut) {

package/dist/{profiles-CH_nKPjp.d.mts → profiles-DHdCRBVH.d.mts} RENAMED Viewed

@@ -91,6 +91,40 @@ interface Constraints {
     /** Override target model selection — if set, compiler uses this instead of routing. */
     forceModel?: string;
 }
+/**
+ * Cache marker policy for the messages array (history + currentTurn).
+ *
+ * Anthropic positional caching: a `cache_control` marker on a content block
+ * tells the API "remember the prefix up through this block." On a subsequent
+ * request whose first N tokens match, those N billed at the cached rate
+ * (10% of the input price). Without a marker, every call re-pays for the
+ * entire history.
+ *
+ * - `'none'` (default when omitted): no history cache marker. System-level
+ *   cache markers from `PromptSection.cacheable=true` still apply.
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
+ *   (the last history entry). On the next call, that entire history prefix
+ *   is cacheable. Good fit for chat/agent loops where every prior turn is
+ *   stable.
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
+ *   `history`. Use when the last few turns are volatile (e.g., scratchpad,
+ *   draft revisions) but the earlier prefix is stable.
+ *
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
+ * stable prefix is reused). The compiler still computes
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
+ *
+ * alpha.5.
+ */
+type HistoryCachePolicy = {
+    strategy: 'none';
+} | {
+    strategy: 'all-but-latest';
+} | {
+    strategy: 'fixed-suffix';
+    suffix: number;
+};
 /**
  * Consumer-declared policy for model selection. Lives outside the IR
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
     models: string[];
     /** Compile constraints. */
     constraints?: Constraints;
+    /**
+     * Cache marker placement policy for the messages array. Default = no
+     * history cache markers. See `HistoryCachePolicy` for semantics.
+     * alpha.5.
+     */
+    historyCachePolicy?: HistoryCachePolicy;
 }
 type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
 /**
@@ -240,6 +280,16 @@ interface CompileResult {
         historyDropped: number;
         cacheableTokens: number;
         estimatedCacheSavingsUsd: number;
+        /**
+         * Tokens in `history` (and `currentTurn` when before the marker) that
+         * fall within the cacheable prefix per `historyCachePolicy`. Always
+         * computed; only Anthropic actually emits a wire-format marker. For
+         * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
+         * prefix that implicit caching may pick up — useful telemetry for the
+         * brain to learn which (app, model, archetype) tuples benefit most
+         * from history caching. alpha.5.
+         */
+        historyCacheableTokens: number;
     };
 }
 /**

package/dist/{profiles-CDttLtaD.d.ts → profiles-MGq5Tnjv.d.ts} RENAMED Viewed

@@ -91,6 +91,40 @@ interface Constraints {
     /** Override target model selection — if set, compiler uses this instead of routing. */
     forceModel?: string;
 }
+/**
+ * Cache marker policy for the messages array (history + currentTurn).
+ *
+ * Anthropic positional caching: a `cache_control` marker on a content block
+ * tells the API "remember the prefix up through this block." On a subsequent
+ * request whose first N tokens match, those N billed at the cached rate
+ * (10% of the input price). Without a marker, every call re-pays for the
+ * entire history.
+ *
+ * - `'none'` (default when omitted): no history cache marker. System-level
+ *   cache markers from `PromptSection.cacheable=true` still apply.
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
+ *   (the last history entry). On the next call, that entire history prefix
+ *   is cacheable. Good fit for chat/agent loops where every prior turn is
+ *   stable.
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
+ *   `history`. Use when the last few turns are volatile (e.g., scratchpad,
+ *   draft revisions) but the earlier prefix is stable.
+ *
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
+ * stable prefix is reused). The compiler still computes
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
+ *
+ * alpha.5.
+ */
+type HistoryCachePolicy = {
+    strategy: 'none';
+} | {
+    strategy: 'all-but-latest';
+} | {
+    strategy: 'fixed-suffix';
+    suffix: number;
+};
 /**
  * Consumer-declared policy for model selection. Lives outside the IR
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
     models: string[];
     /** Compile constraints. */
     constraints?: Constraints;
+    /**
+     * Cache marker placement policy for the messages array. Default = no
+     * history cache markers. See `HistoryCachePolicy` for semantics.
+     * alpha.5.
+     */
+    historyCachePolicy?: HistoryCachePolicy;
 }
 type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
 /**
@@ -240,6 +280,16 @@ interface CompileResult {
         historyDropped: number;
         cacheableTokens: number;
         estimatedCacheSavingsUsd: number;
+        /**
+         * Tokens in `history` (and `currentTurn` when before the marker) that
+         * fall within the cacheable prefix per `historyCachePolicy`. Always
+         * computed; only Anthropic actually emits a wire-format marker. For
+         * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
+         * prefix that implicit caching may pick up — useful telemetry for the
+         * brain to learn which (app, model, archetype) tuples benefit most
+         * from history caching. alpha.5.
+         */
+        historyCacheableTokens: number;
     };
 }
 /**

package/dist/profiles.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
 import './dialect.mjs';

package/dist/profiles.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
 import './dialect.js';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@warmdrift/kgauto-compiler",
-  "version": "2.0.0-alpha.4",
+  "version": "2.0.0-alpha.5",
   "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",