npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.4 → 2.0.0-alpha.6 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.4 → 2.0.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +45 -3
package/dist/index.d.mts +38 -3
package/dist/index.d.ts +38 -3
package/dist/index.js +186 -19
package/dist/index.mjs +185 -19
package/dist/{profiles-CDttLtaD.d.ts → profiles-CQnLkQ7b.d.ts} +91 -1
package/dist/{profiles-CH_nKPjp.d.mts → profiles-zm6diETo.d.mts} +91 -1
package/dist/profiles.d.mts +1 -1
package/dist/profiles.d.ts +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# @warmdrift/kgauto-compiler — v2.0.0-alpha.4
+# @warmdrift/kgauto-compiler — v2.0.0-alpha.6
 > Prompt compiler + central learning brain for multi-model AI apps.
 > **Swap models without rewriting prompts.**
@@ -18,8 +18,8 @@ mutations.
 - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
   the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
   v1 is fully retired from production.
-- **Tests:** 147/147 passing
-- **Build:** clean (43KB ESM, 60KB CJS)
+- **Tests:** 201/201 passing
+- **Build:** clean (47KB ESM, 68KB CJS)
 - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
   awaiting dedicated Supabase provisioning.
 - **Mutation engine:** v2.1 (after enough outcome data accumulates).
@@ -154,6 +154,48 @@ The 5 prod empty-responses in tt-intelligence's `gemini-2.5-flash` dashboard
 calls? v2 catches those automatically — `expectedShortOutput` constraint plus
 the `force_thinking_budget_zero` cliff guard.
+## Tools
+Tools are first-class IR fields. The compiler's tool-relevance pass drops
+tools that don't apply to the current intent before lowering — saves
+context budget on every call.
+```ts
+const tools: ToolDefinition[] = [
+  {
+    name: 'web_search',
+    description: 'Search the public web',
+    parameters: { type: 'object', properties: { q: { type: 'string' } } },
+    relevanceByIntent: {
+      ask: 0.9,        // primary tool for ask
+      hunt: 0.9,
+      classify: 0.0,   // never useful for classification
+      summarize: 0.0,
+      extract: 0.1,
+    },
+  },
+  // ...
+];
+```
+Each tool declares per-intent relevance scores 0..1. The pass keeps tools
+where `relevanceByIntent[currentIntent] >= toolRelevanceThreshold` (default
+`0.2`). Missing entries default to neutral (`0.5`) — kept by default. Set
+explicit `0.0` to hard-exclude.
+Tool definitions eat ~350 tokens of context per tool (L-051), so trimming
+matters: 12 declared tools, only 3 relevant → 9 × 350 = 3150 tokens
+recovered per call.
+The `tool-bloat` advisory (alpha.6) fires when more than 10 tools survive
+the relevance pass on a short-output archetype (`classify`, `extract`,
+`summarize`, `transform`, `critique`) — those archetypes typically use
+≤3 tools, so a kept-count >10 indicates either missing `relevanceByIntent`
+or scores set too generously.
+DeepSeek profiles cap tools to 1 (sequential-only). Other providers
+inherit the count from the IR after the relevance pass.
 ## Brain provisioning
 1. Create a NEW Supabase project (suggested name: `kgauto-brain`)

package/dist/index.d.mts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CH_nKPjp.mjs';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-zm6diETo.mjs';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
 /**
@@ -189,6 +189,41 @@ declare function resetTokenizer(): void;
  */
 declare function countTokens(text: string): number;
+/**
+ * Best-practice advisor — alpha.6 Phase 1.
+ *
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
+ * after `lower()` in the compile pipeline; the result lands on
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
+ *
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
+ * s15 empirical seed of brain anti-patterns:
+ *
+ *   1. `caching-off-on-claude`        system >2000 chars on Anthropic, no cacheable=true
+ *   2. `single-chunk-system`          Anthropic, only one PromptSection >1000 chars
+ *   3. `tool-bloat`                   >10 tools on a short-output archetype
+ *   4. `history-uncached-on-claude`   Anthropic, ≥2 history messages, no historyCachePolicy
+ *
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
+ * No side effects. No randomness. Deterministic for a given IR.
+ *
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
+ * misses real waste. They may tune with brain evidence over time; for now
+ * they're literals in the rule bodies. Make them configurable when the
+ * cost-watcher's R-rules graduate to here.
+ */
+/** Subset of CompileResult fields the advisor needs. */
+type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
+/**
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
+ * (same as the rule list above) so output is stable across runs.
+ */
+declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
 /**
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
  *
@@ -235,4 +270,4 @@ declare function countTokens(text: string): number;
  */
 declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
-export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
+export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CDttLtaD.js';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-CQnLkQ7b.js';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
 /**
@@ -189,6 +189,41 @@ declare function resetTokenizer(): void;
  */
 declare function countTokens(text: string): number;
+/**
+ * Best-practice advisor — alpha.6 Phase 1.
+ *
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
+ * after `lower()` in the compile pipeline; the result lands on
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
+ *
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
+ * s15 empirical seed of brain anti-patterns:
+ *
+ *   1. `caching-off-on-claude`        system >2000 chars on Anthropic, no cacheable=true
+ *   2. `single-chunk-system`          Anthropic, only one PromptSection >1000 chars
+ *   3. `tool-bloat`                   >10 tools on a short-output archetype
+ *   4. `history-uncached-on-claude`   Anthropic, ≥2 history messages, no historyCachePolicy
+ *
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
+ * No side effects. No randomness. Deterministic for a given IR.
+ *
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
+ * misses real waste. They may tune with brain evidence over time; for now
+ * they're literals in the rule bodies. Make them configurable when the
+ * cost-watcher's R-rules graduate to here.
+ */
+/** Subset of CompileResult fields the advisor needs. */
+type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
+/**
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
+ * (same as the rule list above) so output is stable across runs.
+ */
+declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
 /**
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
  *
@@ -235,4 +270,4 @@ declare function countTokens(text: string): number;
  */
 declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
-export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
+export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };

package/dist/index.js CHANGED Viewed

@@ -43,6 +43,7 @@ __export(index_exports, {
   profilesByProvider: () => profilesByProvider,
   record: () => record,
   resetTokenizer: () => resetTokenizer,
+  runAdvisor: () => runAdvisor,
   setTokenizer: () => setTokenizer,
   tryGetProfile: () => tryGetProfile
 });
@@ -489,10 +490,15 @@ function lower(ir, profile, hints = {}) {
 }
 function lowerAnthropic(ir, profile, hints) {
   const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
-  const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
+  const policy = ir.historyCachePolicy;
+  const markIndex = resolveHistoryMarkIndex(history.length, policy);
+  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
   const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
   const cacheableTokens = computeCacheableTokens(systemBlocks);
-  const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
+  const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
+  const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
+  const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
   return {
     request: {
       provider: "anthropic",
@@ -504,6 +510,7 @@ function lowerAnthropic(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -536,17 +543,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
   }
   return blocks;
 }
-function buildAnthropicMessages(history, currentTurn) {
+function buildAnthropicMessages(history, currentTurn, markIndex) {
   const out = [];
-  for (const m of history) {
+  for (let i = 0; i < history.length; i++) {
+    const m = history[i];
     if (m.role === "system") continue;
-    out.push({ role: m.role, content: m.parts ?? m.content });
+    const shouldMark = i === markIndex;
+    out.push({
+      role: m.role,
+      content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
+    });
   }
   if (currentTurn && currentTurn.role !== "system") {
     out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
   }
   return out;
 }
+function attachAnthropicCacheControl(m) {
+  if (Array.isArray(m.parts) && m.parts.length > 0) {
+    const blocks = m.parts;
+    const last = blocks[blocks.length - 1];
+    const withMarker = {
+      ...last,
+      cache_control: { type: "ephemeral" }
+    };
+    return [...blocks.slice(0, -1), withMarker];
+  }
+  return [
+    {
+      type: "text",
+      text: m.content,
+      cache_control: { type: "ephemeral" }
+    }
+  ];
+}
+function resolveHistoryMarkIndex(historyLen, policy) {
+  if (!policy || policy.strategy === "none") return -1;
+  if (historyLen === 0) return -1;
+  if (policy.strategy === "all-but-latest") {
+    return historyLen - 1;
+  }
+  const idx = historyLen - 1 - policy.suffix;
+  return idx >= 0 ? idx : -1;
+}
+function sumHistoryTokens(history, throughIndex) {
+  let total = 0;
+  for (let i = 0; i <= throughIndex && i < history.length; i++) {
+    const m = history[i];
+    if (m.role === "system") continue;
+    if (Array.isArray(m.parts)) {
+      for (const p of m.parts) {
+        if (typeof p.text === "string") total += countTokens(p.text);
+      }
+    } else if (typeof m.content === "string") {
+      total += countTokens(m.content);
+    }
+  }
+  return total;
+}
 function toAnthropicTools(tools) {
   return tools.map((t) => ({
     name: t.name,
@@ -581,6 +635,9 @@ function lowerGoogle(ir, profile, hints) {
   const minTokens = profile.lowering.cache.minTokens ?? 4096;
   const meetsMin = cacheableTokens >= minTokens;
   const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "google",
@@ -592,6 +649,7 @@ function lowerGoogle(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens: meetsMin ? cacheableTokens : 0,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -639,6 +697,9 @@ function lowerOpenAI(ir, profile, hints) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "openai",
@@ -648,7 +709,11 @@ function lowerOpenAI(ir, profile, hints) {
       response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
       reasoning_effort: hints.forceTerseOutput ? "low" : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function toOpenAITools(tools) {
@@ -675,6 +740,9 @@ function lowerDeepSeek(ir, profile) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "deepseek",
@@ -689,7 +757,11 @@ function lowerDeepSeek(ir, profile) {
         }
       })) : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function sortSections(sections) {
@@ -1114,6 +1186,85 @@ function profilesByProvider(provider) {
   return PROFILES_RAW.filter((p) => p.provider === provider);
 }
+// src/advisor.ts
+function runAdvisor(ir, result, profile) {
+  const out = [];
+  out.push(...detectCachingOff(ir, profile));
+  out.push(...detectSingleChunkSystem(ir, profile));
+  out.push(...detectToolBloat(ir, result));
+  out.push(...detectHistoryUncached(ir, profile));
+  return out;
+}
+function detectCachingOff(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
+  if (totalChars < 2e3) return [];
+  const anyCacheable = ir.sections.some((s) => s.cacheable === true);
+  if (anyCacheable) return [];
+  return [
+    {
+      level: "warn",
+      code: "caching-off-on-claude",
+      message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
+      suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
+function detectSingleChunkSystem(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  if (ir.sections.length !== 1) return [];
+  const only = ir.sections[0];
+  if (!only || only.text.length <= 1e3) return [];
+  return [
+    {
+      level: "info",
+      code: "single-chunk-system",
+      message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
+      suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
+function detectToolBloat(ir, result) {
+  const SHORT_OUTPUT = /* @__PURE__ */ new Set([
+    "classify",
+    "extract",
+    "summarize",
+    "transform",
+    "critique"
+  ]);
+  if (!ir.tools || ir.tools.length === 0) return [];
+  const toolsKept = result.diagnostics.toolsKept;
+  if (toolsKept <= 10) return [];
+  if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
+  return [
+    {
+      level: "warn",
+      code: "tool-bloat",
+      message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
+      suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
+      docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
+    }
+  ];
+}
+function detectHistoryUncached(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  if (!ir.history || ir.history.length < 2) return [];
+  if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
+    return [];
+  }
+  return [
+    {
+      level: "warn",
+      code: "history-uncached-on-claude",
+      message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
+      suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
 // src/compile.ts
 var counter = 0;
 function makeHandle() {
@@ -1164,6 +1315,27 @@ function compile(ir, opts = {}) {
   const handle = makeHandle();
   const finalShape = computeShape(workingIR, inputTokens);
   const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
+  const diagnostics = {
+    sectionsKept: workingIR.sections.length,
+    sectionsDropped: ir.sections.length - workingIR.sections.length,
+    toolsKept: workingIR.tools?.length ?? 0,
+    toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
+    historyKept: workingIR.history?.length ?? 0,
+    historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
+    cacheableTokens: lowered.diagnostics.cacheableTokens,
+    estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
+    historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
+  };
+  const advisories = runAdvisor(
+    ir,
+    {
+      target: profile.id,
+      provider: profile.provider,
+      tokensIn: inputTokens,
+      diagnostics
+    },
+    profile
+  );
   return {
     handle,
     target: profile.id,
@@ -1173,16 +1345,8 @@ function compile(ir, opts = {}) {
     estimatedCostUsd: target.estimatedCostUsd,
     mutationsApplied: accumulatedMutations,
     fallbackChain,
-    diagnostics: {
-      sectionsKept: workingIR.sections.length,
-      sectionsDropped: ir.sections.length - workingIR.sections.length,
-      toolsKept: workingIR.tools?.length ?? 0,
-      toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
-      historyKept: workingIR.history?.length ?? 0,
-      historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
-      cacheableTokens: lowered.diagnostics.cacheableTokens,
-      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
-    }
+    advisories,
+    diagnostics
   };
 }
 function validateIR(ir) {
@@ -1266,7 +1430,8 @@ function registerCompile(appId, archetype, ir, result) {
     learningKey: learningKey(archetype, result.target, shape),
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
-    startedAt: Date.now()
+    startedAt: Date.now(),
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens
   });
 }
 async function record(input) {
@@ -1339,7 +1504,8 @@ function buildPayload(input, reg) {
     cache_read_input_tokens: input.cacheReadInputTokens,
     cache_creation_input_tokens: input.cacheCreationInputTokens,
     cost_usd_actual: costUsdActual,
-    ttft_ms: input.ttftMs
+    ttft_ms: input.ttftMs,
+    history_cacheable_tokens: reg?.historyCacheableTokens
   };
 }
 function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1826,6 +1992,7 @@ function compile2(ir, opts) {
   profilesByProvider,
   record,
   resetTokenizer,
+  runAdvisor,
   setTokenizer,
   tryGetProfile
 });

package/dist/index.mjs CHANGED Viewed

@@ -374,10 +374,15 @@ function lower(ir, profile, hints = {}) {
 }
 function lowerAnthropic(ir, profile, hints) {
   const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
-  const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
+  const policy = ir.historyCachePolicy;
+  const markIndex = resolveHistoryMarkIndex(history.length, policy);
+  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
   const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
   const cacheableTokens = computeCacheableTokens(systemBlocks);
-  const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
+  const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
+  const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
+  const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
   return {
     request: {
       provider: "anthropic",
@@ -389,6 +394,7 @@ function lowerAnthropic(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -421,17 +427,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
   }
   return blocks;
 }
-function buildAnthropicMessages(history, currentTurn) {
+function buildAnthropicMessages(history, currentTurn, markIndex) {
   const out = [];
-  for (const m of history) {
+  for (let i = 0; i < history.length; i++) {
+    const m = history[i];
     if (m.role === "system") continue;
-    out.push({ role: m.role, content: m.parts ?? m.content });
+    const shouldMark = i === markIndex;
+    out.push({
+      role: m.role,
+      content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
+    });
   }
   if (currentTurn && currentTurn.role !== "system") {
     out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
   }
   return out;
 }
+function attachAnthropicCacheControl(m) {
+  if (Array.isArray(m.parts) && m.parts.length > 0) {
+    const blocks = m.parts;
+    const last = blocks[blocks.length - 1];
+    const withMarker = {
+      ...last,
+      cache_control: { type: "ephemeral" }
+    };
+    return [...blocks.slice(0, -1), withMarker];
+  }
+  return [
+    {
+      type: "text",
+      text: m.content,
+      cache_control: { type: "ephemeral" }
+    }
+  ];
+}
+function resolveHistoryMarkIndex(historyLen, policy) {
+  if (!policy || policy.strategy === "none") return -1;
+  if (historyLen === 0) return -1;
+  if (policy.strategy === "all-but-latest") {
+    return historyLen - 1;
+  }
+  const idx = historyLen - 1 - policy.suffix;
+  return idx >= 0 ? idx : -1;
+}
+function sumHistoryTokens(history, throughIndex) {
+  let total = 0;
+  for (let i = 0; i <= throughIndex && i < history.length; i++) {
+    const m = history[i];
+    if (m.role === "system") continue;
+    if (Array.isArray(m.parts)) {
+      for (const p of m.parts) {
+        if (typeof p.text === "string") total += countTokens(p.text);
+      }
+    } else if (typeof m.content === "string") {
+      total += countTokens(m.content);
+    }
+  }
+  return total;
+}
 function toAnthropicTools(tools) {
   return tools.map((t) => ({
     name: t.name,
@@ -466,6 +519,9 @@ function lowerGoogle(ir, profile, hints) {
   const minTokens = profile.lowering.cache.minTokens ?? 4096;
   const meetsMin = cacheableTokens >= minTokens;
   const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "google",
@@ -477,6 +533,7 @@ function lowerGoogle(ir, profile, hints) {
     },
     diagnostics: {
       cacheableTokens: meetsMin ? cacheableTokens : 0,
+      historyCacheableTokens,
       estimatedCacheSavingsUsd: cacheSavings
     }
   };
@@ -524,6 +581,9 @@ function lowerOpenAI(ir, profile, hints) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "openai",
@@ -533,7 +593,11 @@ function lowerOpenAI(ir, profile, hints) {
       response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
       reasoning_effort: hints.forceTerseOutput ? "low" : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function toOpenAITools(tools) {
@@ -560,6 +624,9 @@ function lowerDeepSeek(ir, profile) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
+  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
+  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
     request: {
       provider: "deepseek",
@@ -574,7 +641,11 @@ function lowerDeepSeek(ir, profile) {
         }
       })) : void 0
     },
-    diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
+    diagnostics: {
+      cacheableTokens: 0,
+      historyCacheableTokens,
+      estimatedCacheSavingsUsd: 0
+    }
   };
 }
 function sortSections(sections) {
@@ -597,6 +668,85 @@ function setNestedField(obj, path, value) {
   cursor[parts[parts.length - 1]] = value;
 }
+// src/advisor.ts
+function runAdvisor(ir, result, profile) {
+  const out = [];
+  out.push(...detectCachingOff(ir, profile));
+  out.push(...detectSingleChunkSystem(ir, profile));
+  out.push(...detectToolBloat(ir, result));
+  out.push(...detectHistoryUncached(ir, profile));
+  return out;
+}
+function detectCachingOff(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
+  if (totalChars < 2e3) return [];
+  const anyCacheable = ir.sections.some((s) => s.cacheable === true);
+  if (anyCacheable) return [];
+  return [
+    {
+      level: "warn",
+      code: "caching-off-on-claude",
+      message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
+      suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
+function detectSingleChunkSystem(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  if (ir.sections.length !== 1) return [];
+  const only = ir.sections[0];
+  if (!only || only.text.length <= 1e3) return [];
+  return [
+    {
+      level: "info",
+      code: "single-chunk-system",
+      message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
+      suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
+function detectToolBloat(ir, result) {
+  const SHORT_OUTPUT = /* @__PURE__ */ new Set([
+    "classify",
+    "extract",
+    "summarize",
+    "transform",
+    "critique"
+  ]);
+  if (!ir.tools || ir.tools.length === 0) return [];
+  const toolsKept = result.diagnostics.toolsKept;
+  if (toolsKept <= 10) return [];
+  if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
+  return [
+    {
+      level: "warn",
+      code: "tool-bloat",
+      message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
+      suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
+      docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
+    }
+  ];
+}
+function detectHistoryUncached(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  if (!ir.history || ir.history.length < 2) return [];
+  if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
+    return [];
+  }
+  return [
+    {
+      level: "warn",
+      code: "history-uncached-on-claude",
+      message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
+      suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
 // src/compile.ts
 var counter = 0;
 function makeHandle() {
@@ -647,6 +797,27 @@ function compile(ir, opts = {}) {
   const handle = makeHandle();
   const finalShape = computeShape(workingIR, inputTokens);
   const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
+  const diagnostics = {
+    sectionsKept: workingIR.sections.length,
+    sectionsDropped: ir.sections.length - workingIR.sections.length,
+    toolsKept: workingIR.tools?.length ?? 0,
+    toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
+    historyKept: workingIR.history?.length ?? 0,
+    historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
+    cacheableTokens: lowered.diagnostics.cacheableTokens,
+    estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
+    historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
+  };
+  const advisories = runAdvisor(
+    ir,
+    {
+      target: profile.id,
+      provider: profile.provider,
+      tokensIn: inputTokens,
+      diagnostics
+    },
+    profile
+  );
   return {
     handle,
     target: profile.id,
@@ -656,16 +827,8 @@ function compile(ir, opts = {}) {
     estimatedCostUsd: target.estimatedCostUsd,
     mutationsApplied: accumulatedMutations,
     fallbackChain,
-    diagnostics: {
-      sectionsKept: workingIR.sections.length,
-      sectionsDropped: ir.sections.length - workingIR.sections.length,
-      toolsKept: workingIR.tools?.length ?? 0,
-      toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
-      historyKept: workingIR.history?.length ?? 0,
-      historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
-      cacheableTokens: lowered.diagnostics.cacheableTokens,
-      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
-    }
+    advisories,
+    diagnostics
   };
 }
 function validateIR(ir) {
@@ -749,7 +912,8 @@ function registerCompile(appId, archetype, ir, result) {
     learningKey: learningKey(archetype, result.target, shape),
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
-    startedAt: Date.now()
+    startedAt: Date.now(),
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens
   });
 }
 async function record(input) {
@@ -822,7 +986,8 @@ function buildPayload(input, reg) {
     cache_read_input_tokens: input.cacheReadInputTokens,
     cache_creation_input_tokens: input.cacheCreationInputTokens,
     cost_usd_actual: costUsdActual,
-    ttft_ms: input.ttftMs
+    ttft_ms: input.ttftMs,
+    history_cacheable_tokens: reg?.historyCacheableTokens
   };
 }
 function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1308,6 +1473,7 @@ export {
   profilesByProvider,
   record,
   resetTokenizer,
+  runAdvisor,
   setTokenizer,
   tryGetProfile
 };

package/dist/{profiles-CDttLtaD.d.ts → profiles-CQnLkQ7b.d.ts} RENAMED Viewed

@@ -91,6 +91,40 @@ interface Constraints {
     /** Override target model selection — if set, compiler uses this instead of routing. */
     forceModel?: string;
 }
+/**
+ * Cache marker policy for the messages array (history + currentTurn).
+ *
+ * Anthropic positional caching: a `cache_control` marker on a content block
+ * tells the API "remember the prefix up through this block." On a subsequent
+ * request whose first N tokens match, those N billed at the cached rate
+ * (10% of the input price). Without a marker, every call re-pays for the
+ * entire history.
+ *
+ * - `'none'` (default when omitted): no history cache marker. System-level
+ *   cache markers from `PromptSection.cacheable=true` still apply.
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
+ *   (the last history entry). On the next call, that entire history prefix
+ *   is cacheable. Good fit for chat/agent loops where every prior turn is
+ *   stable.
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
+ *   `history`. Use when the last few turns are volatile (e.g., scratchpad,
+ *   draft revisions) but the earlier prefix is stable.
+ *
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
+ * stable prefix is reused). The compiler still computes
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
+ *
+ * alpha.5.
+ */
+type HistoryCachePolicy = {
+    strategy: 'none';
+} | {
+    strategy: 'all-but-latest';
+} | {
+    strategy: 'fixed-suffix';
+    suffix: number;
+};
 /**
  * Consumer-declared policy for model selection. Lives outside the IR
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
     models: string[];
     /** Compile constraints. */
     constraints?: Constraints;
+    /**
+     * Cache marker placement policy for the messages array. Default = no
+     * history cache markers. See `HistoryCachePolicy` for semantics.
+     * alpha.5.
+     */
+    historyCachePolicy?: HistoryCachePolicy;
 }
 type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
 /**
@@ -213,6 +253,41 @@ type CompiledRequest = {
     }>;
     tools?: unknown[];
 };
+/**
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
+ * and emits one entry per detected gap.
+ *
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
+ * interfaces/kgauto.md anchor for context.
+ *
+ * alpha.6 Phase 1 starter rules:
+ *   - `caching-off-on-claude` (warn)       system >2000 chars on Anthropic, no cacheable=true
+ *   - `single-chunk-system` (info)         Anthropic, only one PromptSection >1000 chars
+ *   - `tool-bloat` (warn)                  >10 tools on a short-output archetype
+ *   - `history-uncached-on-claude` (warn)  Anthropic, ≥2 history messages, no historyCachePolicy
+ *
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
+ */
+interface BestPracticeAdvisory {
+    /**
+     * Severity. `info` = informational; `warn` = behavioral pattern that's
+     * usually expensive or wrong; `critical` = likely bug or production-grade
+     * misuse. Phase 1 ships info + warn only.
+     */
+    level: 'info' | 'warn' | 'critical';
+    /** Stable kebab-case code. Consumers filter / gate by this. */
+    code: string;
+    /** Human-readable explanation of what was detected. */
+    message: string;
+    /** Optional: how to fix — actionable diff or pattern. */
+    suggestion?: string;
+    /** Optional: link to docs anchor for more context. */
+    docsUrl?: string;
+}
 interface CompileResult {
     /** Unique handle for this call — pass to record() to correlate the outcome. */
     handle: string;
@@ -230,6 +305,11 @@ interface CompileResult {
     mutationsApplied: MutationApplied[];
     /** Fallback chain — try these in order if target fails. */
     fallbackChain: string[];
+    /**
+     * Best-practice advisories emitted by the compiler. Non-fatal. Empty
+     * array when no rules fired. alpha.6 Phase 1.
+     */
+    advisories: BestPracticeAdvisory[];
     /** Diagnostics for caller-side logging. */
     diagnostics: {
         sectionsKept: number;
@@ -240,6 +320,16 @@ interface CompileResult {
         historyDropped: number;
         cacheableTokens: number;
         estimatedCacheSavingsUsd: number;
+        /**
+         * Tokens in `history` (and `currentTurn` when before the marker) that
+         * fall within the cacheable prefix per `historyCachePolicy`. Always
+         * computed; only Anthropic actually emits a wire-format marker. For
+         * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
+         * prefix that implicit caching may pick up — useful telemetry for the
+         * brain to learn which (app, model, archetype) tuples benefit most
+         * from history caching. alpha.5.
+         */
+        historyCacheableTokens: number;
     };
 }
 /**
@@ -518,4 +608,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
 declare function allProfiles(): readonly ModelProfile[];
 declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
-export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
+export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };

package/dist/{profiles-CH_nKPjp.d.mts → profiles-zm6diETo.d.mts} RENAMED Viewed

@@ -91,6 +91,40 @@ interface Constraints {
     /** Override target model selection — if set, compiler uses this instead of routing. */
     forceModel?: string;
 }
+/**
+ * Cache marker policy for the messages array (history + currentTurn).
+ *
+ * Anthropic positional caching: a `cache_control` marker on a content block
+ * tells the API "remember the prefix up through this block." On a subsequent
+ * request whose first N tokens match, those N billed at the cached rate
+ * (10% of the input price). Without a marker, every call re-pays for the
+ * entire history.
+ *
+ * - `'none'` (default when omitted): no history cache marker. System-level
+ *   cache markers from `PromptSection.cacheable=true` still apply.
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
+ *   (the last history entry). On the next call, that entire history prefix
+ *   is cacheable. Good fit for chat/agent loops where every prior turn is
+ *   stable.
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
+ *   `history`. Use when the last few turns are volatile (e.g., scratchpad,
+ *   draft revisions) but the earlier prefix is stable.
+ *
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
+ * stable prefix is reused). The compiler still computes
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
+ *
+ * alpha.5.
+ */
+type HistoryCachePolicy = {
+    strategy: 'none';
+} | {
+    strategy: 'all-but-latest';
+} | {
+    strategy: 'fixed-suffix';
+    suffix: number;
+};
 /**
  * Consumer-declared policy for model selection. Lives outside the IR
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
     models: string[];
     /** Compile constraints. */
     constraints?: Constraints;
+    /**
+     * Cache marker placement policy for the messages array. Default = no
+     * history cache markers. See `HistoryCachePolicy` for semantics.
+     * alpha.5.
+     */
+    historyCachePolicy?: HistoryCachePolicy;
 }
 type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
 /**
@@ -213,6 +253,41 @@ type CompiledRequest = {
     }>;
     tools?: unknown[];
 };
+/**
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
+ * and emits one entry per detected gap.
+ *
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
+ * interfaces/kgauto.md anchor for context.
+ *
+ * alpha.6 Phase 1 starter rules:
+ *   - `caching-off-on-claude` (warn)       system >2000 chars on Anthropic, no cacheable=true
+ *   - `single-chunk-system` (info)         Anthropic, only one PromptSection >1000 chars
+ *   - `tool-bloat` (warn)                  >10 tools on a short-output archetype
+ *   - `history-uncached-on-claude` (warn)  Anthropic, ≥2 history messages, no historyCachePolicy
+ *
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
+ */
+interface BestPracticeAdvisory {
+    /**
+     * Severity. `info` = informational; `warn` = behavioral pattern that's
+     * usually expensive or wrong; `critical` = likely bug or production-grade
+     * misuse. Phase 1 ships info + warn only.
+     */
+    level: 'info' | 'warn' | 'critical';
+    /** Stable kebab-case code. Consumers filter / gate by this. */
+    code: string;
+    /** Human-readable explanation of what was detected. */
+    message: string;
+    /** Optional: how to fix — actionable diff or pattern. */
+    suggestion?: string;
+    /** Optional: link to docs anchor for more context. */
+    docsUrl?: string;
+}
 interface CompileResult {
     /** Unique handle for this call — pass to record() to correlate the outcome. */
     handle: string;
@@ -230,6 +305,11 @@ interface CompileResult {
     mutationsApplied: MutationApplied[];
     /** Fallback chain — try these in order if target fails. */
     fallbackChain: string[];
+    /**
+     * Best-practice advisories emitted by the compiler. Non-fatal. Empty
+     * array when no rules fired. alpha.6 Phase 1.
+     */
+    advisories: BestPracticeAdvisory[];
     /** Diagnostics for caller-side logging. */
     diagnostics: {
         sectionsKept: number;
@@ -240,6 +320,16 @@ interface CompileResult {
         historyDropped: number;
         cacheableTokens: number;
         estimatedCacheSavingsUsd: number;
+        /**
+         * Tokens in `history` (and `currentTurn` when before the marker) that
+         * fall within the cacheable prefix per `historyCachePolicy`. Always
+         * computed; only Anthropic actually emits a wire-format marker. For
+         * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
+         * prefix that implicit caching may pick up — useful telemetry for the
+         * brain to learn which (app, model, archetype) tuples benefit most
+         * from history caching. alpha.5.
+         */
+        historyCacheableTokens: number;
     };
 }
 /**
@@ -518,4 +608,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
 declare function allProfiles(): readonly ModelProfile[];
 declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
-export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
+export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };

package/dist/profiles.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
 import './dialect.mjs';

package/dist/profiles.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
 import './dialect.js';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@warmdrift/kgauto-compiler",
-  "version": "2.0.0-alpha.4",
+  "version": "2.0.0-alpha.6",
   "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",