@warmdrift/kgauto-compiler 2.0.0-alpha.4 → 2.0.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @warmdrift/kgauto-compiler — v2.0.0-alpha.4
1
+ # @warmdrift/kgauto-compiler — v2.0.0-alpha.6
2
2
 
3
3
  > Prompt compiler + central learning brain for multi-model AI apps.
4
4
  > **Swap models without rewriting prompts.**
@@ -18,8 +18,8 @@ mutations.
18
18
  - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
19
19
  the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
20
20
  v1 is fully retired from production.
21
- - **Tests:** 147/147 passing
22
- - **Build:** clean (43KB ESM, 60KB CJS)
21
+ - **Tests:** 201/201 passing
22
+ - **Build:** clean (47KB ESM, 68KB CJS)
23
23
  - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
24
24
  awaiting dedicated Supabase provisioning.
25
25
  - **Mutation engine:** v2.1 (after enough outcome data accumulates).
@@ -154,6 +154,48 @@ The 5 prod empty-responses in tt-intelligence's `gemini-2.5-flash` dashboard
154
154
  calls? v2 catches those automatically — `expectedShortOutput` constraint plus
155
155
  the `force_thinking_budget_zero` cliff guard.
156
156
 
157
+ ## Tools
158
+
159
+ Tools are first-class IR fields. The compiler's tool-relevance pass drops
160
+ tools that don't apply to the current intent before lowering — saves
161
+ context budget on every call.
162
+
163
+ ```ts
164
+ const tools: ToolDefinition[] = [
165
+ {
166
+ name: 'web_search',
167
+ description: 'Search the public web',
168
+ parameters: { type: 'object', properties: { q: { type: 'string' } } },
169
+ relevanceByIntent: {
170
+ ask: 0.9, // primary tool for ask
171
+ hunt: 0.9,
172
+ classify: 0.0, // never useful for classification
173
+ summarize: 0.0,
174
+ extract: 0.1,
175
+ },
176
+ },
177
+ // ...
178
+ ];
179
+ ```
180
+
181
+ Each tool declares per-intent relevance scores 0..1. The pass keeps tools
182
+ where `relevanceByIntent[currentIntent] >= toolRelevanceThreshold` (default
183
+ `0.2`). Missing entries default to neutral (`0.5`) — kept by default. Set
184
+ explicit `0.0` to hard-exclude.
185
+
186
+ Tool definitions eat ~350 tokens of context per tool (L-051), so trimming
187
+ matters: 12 declared tools, only 3 relevant → 9 × 350 = 3150 tokens
188
+ recovered per call.
189
+
190
+ The `tool-bloat` advisory (alpha.6) fires when more than 10 tools survive
191
+ the relevance pass on a short-output archetype (`classify`, `extract`,
192
+ `summarize`, `transform`, `critique`) — those archetypes typically use
193
+ ≤3 tools, so a kept-count >10 indicates either missing `relevanceByIntent`
194
+ or scores set too generously.
195
+
196
+ DeepSeek profiles cap tools to 1 (sequential-only). Other providers
197
+ inherit the count from the IR after the relevance pass.
198
+
157
199
  ## Brain provisioning
158
200
 
159
201
  1. Create a NEW Supabase project (suggested name: `kgauto-brain`)
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CH_nKPjp.mjs';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-zm6diETo.mjs';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
4
4
 
5
5
  /**
@@ -189,6 +189,41 @@ declare function resetTokenizer(): void;
189
189
  */
190
190
  declare function countTokens(text: string): number;
191
191
 
192
+ /**
193
+ * Best-practice advisor — alpha.6 Phase 1.
194
+ *
195
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
196
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
197
+ * after `lower()` in the compile pipeline; the result lands on
198
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
199
+ *
200
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
201
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
202
+ * s15 empirical seed of brain anti-patterns:
203
+ *
204
+ * 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
205
+ * 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
206
+ * 3. `tool-bloat` >10 tools on a short-output archetype
207
+ * 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
208
+ *
209
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
210
+ * No side effects. No randomness. Deterministic for a given IR.
211
+ *
212
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
213
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
214
+ * misses real waste. They may tune with brain evidence over time; for now
215
+ * they're literals in the rule bodies. Make them configurable when the
216
+ * cost-watcher's R-rules graduate to here.
217
+ */
218
+
219
+ /** Subset of CompileResult fields the advisor needs. */
220
+ type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
221
+ /**
222
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
223
+ * (same as the rule list above) so output is stable across runs.
224
+ */
225
+ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
226
+
192
227
  /**
193
228
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
194
229
  *
@@ -235,4 +270,4 @@ declare function countTokens(text: string): number;
235
270
  */
236
271
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
237
272
 
238
- export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
273
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-CDttLtaD.js';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-CQnLkQ7b.js';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
4
4
 
5
5
  /**
@@ -189,6 +189,41 @@ declare function resetTokenizer(): void;
189
189
  */
190
190
  declare function countTokens(text: string): number;
191
191
 
192
+ /**
193
+ * Best-practice advisor — alpha.6 Phase 1.
194
+ *
195
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
196
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
197
+ * after `lower()` in the compile pipeline; the result lands on
198
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
199
+ *
200
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
201
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
202
+ * s15 empirical seed of brain anti-patterns:
203
+ *
204
+ * 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
205
+ * 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
206
+ * 3. `tool-bloat` >10 tools on a short-output archetype
207
+ * 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
208
+ *
209
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
210
+ * No side effects. No randomness. Deterministic for a given IR.
211
+ *
212
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
213
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
214
+ * misses real waste. They may tune with brain evidence over time; for now
215
+ * they're literals in the rule bodies. Make them configurable when the
216
+ * cost-watcher's R-rules graduate to here.
217
+ */
218
+
219
+ /** Subset of CompileResult fields the advisor needs. */
220
+ type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
221
+ /**
222
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
223
+ * (same as the rule list above) so output is stable across runs.
224
+ */
225
+ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
226
+
192
227
  /**
193
228
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
194
229
  *
@@ -235,4 +270,4 @@ declare function countTokens(text: string): number;
235
270
  */
236
271
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
237
272
 
238
- export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
273
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.js CHANGED
@@ -43,6 +43,7 @@ __export(index_exports, {
43
43
  profilesByProvider: () => profilesByProvider,
44
44
  record: () => record,
45
45
  resetTokenizer: () => resetTokenizer,
46
+ runAdvisor: () => runAdvisor,
46
47
  setTokenizer: () => setTokenizer,
47
48
  tryGetProfile: () => tryGetProfile
48
49
  });
@@ -489,10 +490,15 @@ function lower(ir, profile, hints = {}) {
489
490
  }
490
491
  function lowerAnthropic(ir, profile, hints) {
491
492
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
492
- const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
493
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
494
+ const policy = ir.historyCachePolicy;
495
+ const markIndex = resolveHistoryMarkIndex(history.length, policy);
496
+ const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
493
497
  const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
494
498
  const cacheableTokens = computeCacheableTokens(systemBlocks);
495
- const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
499
+ const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
500
+ const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
501
+ const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
496
502
  return {
497
503
  request: {
498
504
  provider: "anthropic",
@@ -504,6 +510,7 @@ function lowerAnthropic(ir, profile, hints) {
504
510
  },
505
511
  diagnostics: {
506
512
  cacheableTokens,
513
+ historyCacheableTokens,
507
514
  estimatedCacheSavingsUsd: cacheSavings
508
515
  }
509
516
  };
@@ -536,17 +543,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
536
543
  }
537
544
  return blocks;
538
545
  }
539
- function buildAnthropicMessages(history, currentTurn) {
546
+ function buildAnthropicMessages(history, currentTurn, markIndex) {
540
547
  const out = [];
541
- for (const m of history) {
548
+ for (let i = 0; i < history.length; i++) {
549
+ const m = history[i];
542
550
  if (m.role === "system") continue;
543
- out.push({ role: m.role, content: m.parts ?? m.content });
551
+ const shouldMark = i === markIndex;
552
+ out.push({
553
+ role: m.role,
554
+ content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
555
+ });
544
556
  }
545
557
  if (currentTurn && currentTurn.role !== "system") {
546
558
  out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
547
559
  }
548
560
  return out;
549
561
  }
562
+ function attachAnthropicCacheControl(m) {
563
+ if (Array.isArray(m.parts) && m.parts.length > 0) {
564
+ const blocks = m.parts;
565
+ const last = blocks[blocks.length - 1];
566
+ const withMarker = {
567
+ ...last,
568
+ cache_control: { type: "ephemeral" }
569
+ };
570
+ return [...blocks.slice(0, -1), withMarker];
571
+ }
572
+ return [
573
+ {
574
+ type: "text",
575
+ text: m.content,
576
+ cache_control: { type: "ephemeral" }
577
+ }
578
+ ];
579
+ }
580
+ function resolveHistoryMarkIndex(historyLen, policy) {
581
+ if (!policy || policy.strategy === "none") return -1;
582
+ if (historyLen === 0) return -1;
583
+ if (policy.strategy === "all-but-latest") {
584
+ return historyLen - 1;
585
+ }
586
+ const idx = historyLen - 1 - policy.suffix;
587
+ return idx >= 0 ? idx : -1;
588
+ }
589
+ function sumHistoryTokens(history, throughIndex) {
590
+ let total = 0;
591
+ for (let i = 0; i <= throughIndex && i < history.length; i++) {
592
+ const m = history[i];
593
+ if (m.role === "system") continue;
594
+ if (Array.isArray(m.parts)) {
595
+ for (const p of m.parts) {
596
+ if (typeof p.text === "string") total += countTokens(p.text);
597
+ }
598
+ } else if (typeof m.content === "string") {
599
+ total += countTokens(m.content);
600
+ }
601
+ }
602
+ return total;
603
+ }
550
604
  function toAnthropicTools(tools) {
551
605
  return tools.map((t) => ({
552
606
  name: t.name,
@@ -581,6 +635,9 @@ function lowerGoogle(ir, profile, hints) {
581
635
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
582
636
  const meetsMin = cacheableTokens >= minTokens;
583
637
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
638
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
639
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
640
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
584
641
  return {
585
642
  request: {
586
643
  provider: "google",
@@ -592,6 +649,7 @@ function lowerGoogle(ir, profile, hints) {
592
649
  },
593
650
  diagnostics: {
594
651
  cacheableTokens: meetsMin ? cacheableTokens : 0,
652
+ historyCacheableTokens,
595
653
  estimatedCacheSavingsUsd: cacheSavings
596
654
  }
597
655
  };
@@ -639,6 +697,9 @@ function lowerOpenAI(ir, profile, hints) {
639
697
  content: ir.currentTurn.parts ?? ir.currentTurn.content
640
698
  });
641
699
  }
700
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
701
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
702
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
642
703
  return {
643
704
  request: {
644
705
  provider: "openai",
@@ -648,7 +709,11 @@ function lowerOpenAI(ir, profile, hints) {
648
709
  response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
649
710
  reasoning_effort: hints.forceTerseOutput ? "low" : void 0
650
711
  },
651
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
712
+ diagnostics: {
713
+ cacheableTokens: 0,
714
+ historyCacheableTokens,
715
+ estimatedCacheSavingsUsd: 0
716
+ }
652
717
  };
653
718
  }
654
719
  function toOpenAITools(tools) {
@@ -675,6 +740,9 @@ function lowerDeepSeek(ir, profile) {
675
740
  content: ir.currentTurn.parts ?? ir.currentTurn.content
676
741
  });
677
742
  }
743
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
744
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
745
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
678
746
  return {
679
747
  request: {
680
748
  provider: "deepseek",
@@ -689,7 +757,11 @@ function lowerDeepSeek(ir, profile) {
689
757
  }
690
758
  })) : void 0
691
759
  },
692
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
760
+ diagnostics: {
761
+ cacheableTokens: 0,
762
+ historyCacheableTokens,
763
+ estimatedCacheSavingsUsd: 0
764
+ }
693
765
  };
694
766
  }
695
767
  function sortSections(sections) {
@@ -1114,6 +1186,85 @@ function profilesByProvider(provider) {
1114
1186
  return PROFILES_RAW.filter((p) => p.provider === provider);
1115
1187
  }
1116
1188
 
1189
+ // src/advisor.ts
1190
+ function runAdvisor(ir, result, profile) {
1191
+ const out = [];
1192
+ out.push(...detectCachingOff(ir, profile));
1193
+ out.push(...detectSingleChunkSystem(ir, profile));
1194
+ out.push(...detectToolBloat(ir, result));
1195
+ out.push(...detectHistoryUncached(ir, profile));
1196
+ return out;
1197
+ }
1198
+ function detectCachingOff(ir, profile) {
1199
+ if (profile.provider !== "anthropic") return [];
1200
+ const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
1201
+ if (totalChars < 2e3) return [];
1202
+ const anyCacheable = ir.sections.some((s) => s.cacheable === true);
1203
+ if (anyCacheable) return [];
1204
+ return [
1205
+ {
1206
+ level: "warn",
1207
+ code: "caching-off-on-claude",
1208
+ message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
1209
+ suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
1210
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1211
+ }
1212
+ ];
1213
+ }
1214
+ function detectSingleChunkSystem(ir, profile) {
1215
+ if (profile.provider !== "anthropic") return [];
1216
+ if (ir.sections.length !== 1) return [];
1217
+ const only = ir.sections[0];
1218
+ if (!only || only.text.length <= 1e3) return [];
1219
+ return [
1220
+ {
1221
+ level: "info",
1222
+ code: "single-chunk-system",
1223
+ message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
1224
+ suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
1225
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1226
+ }
1227
+ ];
1228
+ }
1229
+ function detectToolBloat(ir, result) {
1230
+ const SHORT_OUTPUT = /* @__PURE__ */ new Set([
1231
+ "classify",
1232
+ "extract",
1233
+ "summarize",
1234
+ "transform",
1235
+ "critique"
1236
+ ]);
1237
+ if (!ir.tools || ir.tools.length === 0) return [];
1238
+ const toolsKept = result.diagnostics.toolsKept;
1239
+ if (toolsKept <= 10) return [];
1240
+ if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
1241
+ return [
1242
+ {
1243
+ level: "warn",
1244
+ code: "tool-bloat",
1245
+ message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
1246
+ suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
1247
+ docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
1248
+ }
1249
+ ];
1250
+ }
1251
+ function detectHistoryUncached(ir, profile) {
1252
+ if (profile.provider !== "anthropic") return [];
1253
+ if (!ir.history || ir.history.length < 2) return [];
1254
+ if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
1255
+ return [];
1256
+ }
1257
+ return [
1258
+ {
1259
+ level: "warn",
1260
+ code: "history-uncached-on-claude",
1261
+ message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
1262
+ suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
1263
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1264
+ }
1265
+ ];
1266
+ }
1267
+
1117
1268
  // src/compile.ts
1118
1269
  var counter = 0;
1119
1270
  function makeHandle() {
@@ -1164,6 +1315,27 @@ function compile(ir, opts = {}) {
1164
1315
  const handle = makeHandle();
1165
1316
  const finalShape = computeShape(workingIR, inputTokens);
1166
1317
  const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
1318
+ const diagnostics = {
1319
+ sectionsKept: workingIR.sections.length,
1320
+ sectionsDropped: ir.sections.length - workingIR.sections.length,
1321
+ toolsKept: workingIR.tools?.length ?? 0,
1322
+ toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
1323
+ historyKept: workingIR.history?.length ?? 0,
1324
+ historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1325
+ cacheableTokens: lowered.diagnostics.cacheableTokens,
1326
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1327
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
1328
+ };
1329
+ const advisories = runAdvisor(
1330
+ ir,
1331
+ {
1332
+ target: profile.id,
1333
+ provider: profile.provider,
1334
+ tokensIn: inputTokens,
1335
+ diagnostics
1336
+ },
1337
+ profile
1338
+ );
1167
1339
  return {
1168
1340
  handle,
1169
1341
  target: profile.id,
@@ -1173,16 +1345,8 @@ function compile(ir, opts = {}) {
1173
1345
  estimatedCostUsd: target.estimatedCostUsd,
1174
1346
  mutationsApplied: accumulatedMutations,
1175
1347
  fallbackChain,
1176
- diagnostics: {
1177
- sectionsKept: workingIR.sections.length,
1178
- sectionsDropped: ir.sections.length - workingIR.sections.length,
1179
- toolsKept: workingIR.tools?.length ?? 0,
1180
- toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
1181
- historyKept: workingIR.history?.length ?? 0,
1182
- historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1183
- cacheableTokens: lowered.diagnostics.cacheableTokens,
1184
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
1185
- }
1348
+ advisories,
1349
+ diagnostics
1186
1350
  };
1187
1351
  }
1188
1352
  function validateIR(ir) {
@@ -1266,7 +1430,8 @@ function registerCompile(appId, archetype, ir, result) {
1266
1430
  learningKey: learningKey(archetype, result.target, shape),
1267
1431
  estimatedTokensIn: tokens,
1268
1432
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
1269
- startedAt: Date.now()
1433
+ startedAt: Date.now(),
1434
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens
1270
1435
  });
1271
1436
  }
1272
1437
  async function record(input) {
@@ -1339,7 +1504,8 @@ function buildPayload(input, reg) {
1339
1504
  cache_read_input_tokens: input.cacheReadInputTokens,
1340
1505
  cache_creation_input_tokens: input.cacheCreationInputTokens,
1341
1506
  cost_usd_actual: costUsdActual,
1342
- ttft_ms: input.ttftMs
1507
+ ttft_ms: input.ttftMs,
1508
+ history_cacheable_tokens: reg?.historyCacheableTokens
1343
1509
  };
1344
1510
  }
1345
1511
  function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1826,6 +1992,7 @@ function compile2(ir, opts) {
1826
1992
  profilesByProvider,
1827
1993
  record,
1828
1994
  resetTokenizer,
1995
+ runAdvisor,
1829
1996
  setTokenizer,
1830
1997
  tryGetProfile
1831
1998
  });
package/dist/index.mjs CHANGED
@@ -374,10 +374,15 @@ function lower(ir, profile, hints = {}) {
374
374
  }
375
375
  function lowerAnthropic(ir, profile, hints) {
376
376
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
377
- const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
377
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
378
+ const policy = ir.historyCachePolicy;
379
+ const markIndex = resolveHistoryMarkIndex(history.length, policy);
380
+ const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
378
381
  const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
379
382
  const cacheableTokens = computeCacheableTokens(systemBlocks);
380
- const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
383
+ const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
384
+ const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
385
+ const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
381
386
  return {
382
387
  request: {
383
388
  provider: "anthropic",
@@ -389,6 +394,7 @@ function lowerAnthropic(ir, profile, hints) {
389
394
  },
390
395
  diagnostics: {
391
396
  cacheableTokens,
397
+ historyCacheableTokens,
392
398
  estimatedCacheSavingsUsd: cacheSavings
393
399
  }
394
400
  };
@@ -421,17 +427,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
421
427
  }
422
428
  return blocks;
423
429
  }
424
- function buildAnthropicMessages(history, currentTurn) {
430
+ function buildAnthropicMessages(history, currentTurn, markIndex) {
425
431
  const out = [];
426
- for (const m of history) {
432
+ for (let i = 0; i < history.length; i++) {
433
+ const m = history[i];
427
434
  if (m.role === "system") continue;
428
- out.push({ role: m.role, content: m.parts ?? m.content });
435
+ const shouldMark = i === markIndex;
436
+ out.push({
437
+ role: m.role,
438
+ content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
439
+ });
429
440
  }
430
441
  if (currentTurn && currentTurn.role !== "system") {
431
442
  out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
432
443
  }
433
444
  return out;
434
445
  }
446
+ function attachAnthropicCacheControl(m) {
447
+ if (Array.isArray(m.parts) && m.parts.length > 0) {
448
+ const blocks = m.parts;
449
+ const last = blocks[blocks.length - 1];
450
+ const withMarker = {
451
+ ...last,
452
+ cache_control: { type: "ephemeral" }
453
+ };
454
+ return [...blocks.slice(0, -1), withMarker];
455
+ }
456
+ return [
457
+ {
458
+ type: "text",
459
+ text: m.content,
460
+ cache_control: { type: "ephemeral" }
461
+ }
462
+ ];
463
+ }
464
+ function resolveHistoryMarkIndex(historyLen, policy) {
465
+ if (!policy || policy.strategy === "none") return -1;
466
+ if (historyLen === 0) return -1;
467
+ if (policy.strategy === "all-but-latest") {
468
+ return historyLen - 1;
469
+ }
470
+ const idx = historyLen - 1 - policy.suffix;
471
+ return idx >= 0 ? idx : -1;
472
+ }
473
+ function sumHistoryTokens(history, throughIndex) {
474
+ let total = 0;
475
+ for (let i = 0; i <= throughIndex && i < history.length; i++) {
476
+ const m = history[i];
477
+ if (m.role === "system") continue;
478
+ if (Array.isArray(m.parts)) {
479
+ for (const p of m.parts) {
480
+ if (typeof p.text === "string") total += countTokens(p.text);
481
+ }
482
+ } else if (typeof m.content === "string") {
483
+ total += countTokens(m.content);
484
+ }
485
+ }
486
+ return total;
487
+ }
435
488
  function toAnthropicTools(tools) {
436
489
  return tools.map((t) => ({
437
490
  name: t.name,
@@ -466,6 +519,9 @@ function lowerGoogle(ir, profile, hints) {
466
519
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
467
520
  const meetsMin = cacheableTokens >= minTokens;
468
521
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
522
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
523
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
524
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
469
525
  return {
470
526
  request: {
471
527
  provider: "google",
@@ -477,6 +533,7 @@ function lowerGoogle(ir, profile, hints) {
477
533
  },
478
534
  diagnostics: {
479
535
  cacheableTokens: meetsMin ? cacheableTokens : 0,
536
+ historyCacheableTokens,
480
537
  estimatedCacheSavingsUsd: cacheSavings
481
538
  }
482
539
  };
@@ -524,6 +581,9 @@ function lowerOpenAI(ir, profile, hints) {
524
581
  content: ir.currentTurn.parts ?? ir.currentTurn.content
525
582
  });
526
583
  }
584
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
585
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
586
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
527
587
  return {
528
588
  request: {
529
589
  provider: "openai",
@@ -533,7 +593,11 @@ function lowerOpenAI(ir, profile, hints) {
533
593
  response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
534
594
  reasoning_effort: hints.forceTerseOutput ? "low" : void 0
535
595
  },
536
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
596
+ diagnostics: {
597
+ cacheableTokens: 0,
598
+ historyCacheableTokens,
599
+ estimatedCacheSavingsUsd: 0
600
+ }
537
601
  };
538
602
  }
539
603
  function toOpenAITools(tools) {
@@ -560,6 +624,9 @@ function lowerDeepSeek(ir, profile) {
560
624
  content: ir.currentTurn.parts ?? ir.currentTurn.content
561
625
  });
562
626
  }
627
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
628
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
629
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
563
630
  return {
564
631
  request: {
565
632
  provider: "deepseek",
@@ -574,7 +641,11 @@ function lowerDeepSeek(ir, profile) {
574
641
  }
575
642
  })) : void 0
576
643
  },
577
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
644
+ diagnostics: {
645
+ cacheableTokens: 0,
646
+ historyCacheableTokens,
647
+ estimatedCacheSavingsUsd: 0
648
+ }
578
649
  };
579
650
  }
580
651
  function sortSections(sections) {
@@ -597,6 +668,85 @@ function setNestedField(obj, path, value) {
597
668
  cursor[parts[parts.length - 1]] = value;
598
669
  }
599
670
 
671
+ // src/advisor.ts
672
+ function runAdvisor(ir, result, profile) {
673
+ const out = [];
674
+ out.push(...detectCachingOff(ir, profile));
675
+ out.push(...detectSingleChunkSystem(ir, profile));
676
+ out.push(...detectToolBloat(ir, result));
677
+ out.push(...detectHistoryUncached(ir, profile));
678
+ return out;
679
+ }
680
+ function detectCachingOff(ir, profile) {
681
+ if (profile.provider !== "anthropic") return [];
682
+ const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
683
+ if (totalChars < 2e3) return [];
684
+ const anyCacheable = ir.sections.some((s) => s.cacheable === true);
685
+ if (anyCacheable) return [];
686
+ return [
687
+ {
688
+ level: "warn",
689
+ code: "caching-off-on-claude",
690
+ message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
691
+ suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
692
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
693
+ }
694
+ ];
695
+ }
696
+ function detectSingleChunkSystem(ir, profile) {
697
+ if (profile.provider !== "anthropic") return [];
698
+ if (ir.sections.length !== 1) return [];
699
+ const only = ir.sections[0];
700
+ if (!only || only.text.length <= 1e3) return [];
701
+ return [
702
+ {
703
+ level: "info",
704
+ code: "single-chunk-system",
705
+ message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
706
+ suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
707
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
708
+ }
709
+ ];
710
+ }
711
+ function detectToolBloat(ir, result) {
712
+ const SHORT_OUTPUT = /* @__PURE__ */ new Set([
713
+ "classify",
714
+ "extract",
715
+ "summarize",
716
+ "transform",
717
+ "critique"
718
+ ]);
719
+ if (!ir.tools || ir.tools.length === 0) return [];
720
+ const toolsKept = result.diagnostics.toolsKept;
721
+ if (toolsKept <= 10) return [];
722
+ if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
723
+ return [
724
+ {
725
+ level: "warn",
726
+ code: "tool-bloat",
727
+ message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
728
+ suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
729
+ docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
730
+ }
731
+ ];
732
+ }
733
+ function detectHistoryUncached(ir, profile) {
734
+ if (profile.provider !== "anthropic") return [];
735
+ if (!ir.history || ir.history.length < 2) return [];
736
+ if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
737
+ return [];
738
+ }
739
+ return [
740
+ {
741
+ level: "warn",
742
+ code: "history-uncached-on-claude",
743
+ message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
744
+ suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
745
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
746
+ }
747
+ ];
748
+ }
749
+
600
750
  // src/compile.ts
601
751
  var counter = 0;
602
752
  function makeHandle() {
@@ -647,6 +797,27 @@ function compile(ir, opts = {}) {
647
797
  const handle = makeHandle();
648
798
  const finalShape = computeShape(workingIR, inputTokens);
649
799
  const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
800
+ const diagnostics = {
801
+ sectionsKept: workingIR.sections.length,
802
+ sectionsDropped: ir.sections.length - workingIR.sections.length,
803
+ toolsKept: workingIR.tools?.length ?? 0,
804
+ toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
805
+ historyKept: workingIR.history?.length ?? 0,
806
+ historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
807
+ cacheableTokens: lowered.diagnostics.cacheableTokens,
808
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
809
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
810
+ };
811
+ const advisories = runAdvisor(
812
+ ir,
813
+ {
814
+ target: profile.id,
815
+ provider: profile.provider,
816
+ tokensIn: inputTokens,
817
+ diagnostics
818
+ },
819
+ profile
820
+ );
650
821
  return {
651
822
  handle,
652
823
  target: profile.id,
@@ -656,16 +827,8 @@ function compile(ir, opts = {}) {
656
827
  estimatedCostUsd: target.estimatedCostUsd,
657
828
  mutationsApplied: accumulatedMutations,
658
829
  fallbackChain,
659
- diagnostics: {
660
- sectionsKept: workingIR.sections.length,
661
- sectionsDropped: ir.sections.length - workingIR.sections.length,
662
- toolsKept: workingIR.tools?.length ?? 0,
663
- toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
664
- historyKept: workingIR.history?.length ?? 0,
665
- historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
666
- cacheableTokens: lowered.diagnostics.cacheableTokens,
667
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
668
- }
830
+ advisories,
831
+ diagnostics
669
832
  };
670
833
  }
671
834
  function validateIR(ir) {
@@ -749,7 +912,8 @@ function registerCompile(appId, archetype, ir, result) {
749
912
  learningKey: learningKey(archetype, result.target, shape),
750
913
  estimatedTokensIn: tokens,
751
914
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
752
- startedAt: Date.now()
915
+ startedAt: Date.now(),
916
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens
753
917
  });
754
918
  }
755
919
  async function record(input) {
@@ -822,7 +986,8 @@ function buildPayload(input, reg) {
822
986
  cache_read_input_tokens: input.cacheReadInputTokens,
823
987
  cache_creation_input_tokens: input.cacheCreationInputTokens,
824
988
  cost_usd_actual: costUsdActual,
825
- ttft_ms: input.ttftMs
989
+ ttft_ms: input.ttftMs,
990
+ history_cacheable_tokens: reg?.historyCacheableTokens
826
991
  };
827
992
  }
828
993
  function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1308,6 +1473,7 @@ export {
1308
1473
  profilesByProvider,
1309
1474
  record,
1310
1475
  resetTokenizer,
1476
+ runAdvisor,
1311
1477
  setTokenizer,
1312
1478
  tryGetProfile
1313
1479
  };
@@ -91,6 +91,40 @@ interface Constraints {
91
91
  /** Override target model selection — if set, compiler uses this instead of routing. */
92
92
  forceModel?: string;
93
93
  }
94
+ /**
95
+ * Cache marker policy for the messages array (history + currentTurn).
96
+ *
97
+ * Anthropic positional caching: a `cache_control` marker on a content block
98
+ * tells the API "remember the prefix up through this block." On a subsequent
99
+ * request whose first N tokens match, those N billed at the cached rate
100
+ * (10% of the input price). Without a marker, every call re-pays for the
101
+ * entire history.
102
+ *
103
+ * - `'none'` (default when omitted): no history cache marker. System-level
104
+ * cache markers from `PromptSection.cacheable=true` still apply.
105
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
106
+ * (the last history entry). On the next call, that entire history prefix
107
+ * is cacheable. Good fit for chat/agent loops where every prior turn is
108
+ * stable.
109
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
110
+ * `history`. Use when the last few turns are volatile (e.g., scratchpad,
111
+ * draft revisions) but the earlier prefix is stable.
112
+ *
113
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
114
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
115
+ * stable prefix is reused). The compiler still computes
116
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
117
+ *
118
+ * alpha.5.
119
+ */
120
+ type HistoryCachePolicy = {
121
+ strategy: 'none';
122
+ } | {
123
+ strategy: 'all-but-latest';
124
+ } | {
125
+ strategy: 'fixed-suffix';
126
+ suffix: number;
127
+ };
94
128
  /**
95
129
  * Consumer-declared policy for model selection. Lives outside the IR
96
130
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
146
180
  models: string[];
147
181
  /** Compile constraints. */
148
182
  constraints?: Constraints;
183
+ /**
184
+ * Cache marker placement policy for the messages array. Default = no
185
+ * history cache markers. See `HistoryCachePolicy` for semantics.
186
+ * alpha.5.
187
+ */
188
+ historyCachePolicy?: HistoryCachePolicy;
149
189
  }
150
190
  type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
151
191
  /**
@@ -213,6 +253,41 @@ type CompiledRequest = {
213
253
  }>;
214
254
  tools?: unknown[];
215
255
  };
256
+ /**
257
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
258
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
259
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
260
+ * and emits one entry per detected gap.
261
+ *
262
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
263
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
264
+ * interfaces/kgauto.md anchor for context.
265
+ *
266
+ * alpha.6 Phase 1 starter rules:
267
+ * - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
268
+ * - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
269
+ * - `tool-bloat` (warn) >10 tools on a short-output archetype
270
+ * - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
271
+ *
272
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
273
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
274
+ */
275
+ interface BestPracticeAdvisory {
276
+ /**
277
+ * Severity. `info` = informational; `warn` = behavioral pattern that's
278
+ * usually expensive or wrong; `critical` = likely bug or production-grade
279
+ * misuse. Phase 1 ships info + warn only.
280
+ */
281
+ level: 'info' | 'warn' | 'critical';
282
+ /** Stable kebab-case code. Consumers filter / gate by this. */
283
+ code: string;
284
+ /** Human-readable explanation of what was detected. */
285
+ message: string;
286
+ /** Optional: how to fix — actionable diff or pattern. */
287
+ suggestion?: string;
288
+ /** Optional: link to docs anchor for more context. */
289
+ docsUrl?: string;
290
+ }
216
291
  interface CompileResult {
217
292
  /** Unique handle for this call — pass to record() to correlate the outcome. */
218
293
  handle: string;
@@ -230,6 +305,11 @@ interface CompileResult {
230
305
  mutationsApplied: MutationApplied[];
231
306
  /** Fallback chain — try these in order if target fails. */
232
307
  fallbackChain: string[];
308
+ /**
309
+ * Best-practice advisories emitted by the compiler. Non-fatal. Empty
310
+ * array when no rules fired. alpha.6 Phase 1.
311
+ */
312
+ advisories: BestPracticeAdvisory[];
233
313
  /** Diagnostics for caller-side logging. */
234
314
  diagnostics: {
235
315
  sectionsKept: number;
@@ -240,6 +320,16 @@ interface CompileResult {
240
320
  historyDropped: number;
241
321
  cacheableTokens: number;
242
322
  estimatedCacheSavingsUsd: number;
323
+ /**
324
+ * Tokens in `history` (and `currentTurn` when before the marker) that
325
+ * fall within the cacheable prefix per `historyCachePolicy`. Always
326
+ * computed; only Anthropic actually emits a wire-format marker. For
327
+ * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
328
+ * prefix that implicit caching may pick up — useful telemetry for the
329
+ * brain to learn which (app, model, archetype) tuples benefit most
330
+ * from history caching. alpha.5.
331
+ */
332
+ historyCacheableTokens: number;
243
333
  };
244
334
  }
245
335
  /**
@@ -518,4 +608,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
518
608
  declare function allProfiles(): readonly ModelProfile[];
519
609
  declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
520
610
 
521
- export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
611
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
@@ -91,6 +91,40 @@ interface Constraints {
91
91
  /** Override target model selection — if set, compiler uses this instead of routing. */
92
92
  forceModel?: string;
93
93
  }
94
+ /**
95
+ * Cache marker policy for the messages array (history + currentTurn).
96
+ *
97
+ * Anthropic positional caching: a `cache_control` marker on a content block
98
+ * tells the API "remember the prefix up through this block." On a subsequent
99
+ * request whose first N tokens match, those N billed at the cached rate
100
+ * (10% of the input price). Without a marker, every call re-pays for the
101
+ * entire history.
102
+ *
103
+ * - `'none'` (default when omitted): no history cache marker. System-level
104
+ * cache markers from `PromptSection.cacheable=true` still apply.
105
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
106
+ * (the last history entry). On the next call, that entire history prefix
107
+ * is cacheable. Good fit for chat/agent loops where every prior turn is
108
+ * stable.
109
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
110
+ * `history`. Use when the last few turns are volatile (e.g., scratchpad,
111
+ * draft revisions) but the earlier prefix is stable.
112
+ *
113
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
114
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
115
+ * stable prefix is reused). The compiler still computes
116
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
117
+ *
118
+ * alpha.5.
119
+ */
120
+ type HistoryCachePolicy = {
121
+ strategy: 'none';
122
+ } | {
123
+ strategy: 'all-but-latest';
124
+ } | {
125
+ strategy: 'fixed-suffix';
126
+ suffix: number;
127
+ };
94
128
  /**
95
129
  * Consumer-declared policy for model selection. Lives outside the IR
96
130
  * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
@@ -146,6 +180,12 @@ interface PromptIR {
146
180
  models: string[];
147
181
  /** Compile constraints. */
148
182
  constraints?: Constraints;
183
+ /**
184
+ * Cache marker placement policy for the messages array. Default = no
185
+ * history cache markers. See `HistoryCachePolicy` for semantics.
186
+ * alpha.5.
187
+ */
188
+ historyCachePolicy?: HistoryCachePolicy;
149
189
  }
150
190
  type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
151
191
  /**
@@ -213,6 +253,41 @@ type CompiledRequest = {
213
253
  }>;
214
254
  tools?: unknown[];
215
255
  };
256
+ /**
257
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
258
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
259
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
260
+ * and emits one entry per detected gap.
261
+ *
262
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
263
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
264
+ * interfaces/kgauto.md anchor for context.
265
+ *
266
+ * alpha.6 Phase 1 starter rules:
267
+ * - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
268
+ * - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
269
+ * - `tool-bloat` (warn) >10 tools on a short-output archetype
270
+ * - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
271
+ *
272
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
273
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
274
+ */
275
+ interface BestPracticeAdvisory {
276
+ /**
277
+ * Severity. `info` = informational; `warn` = behavioral pattern that's
278
+ * usually expensive or wrong; `critical` = likely bug or production-grade
279
+ * misuse. Phase 1 ships info + warn only.
280
+ */
281
+ level: 'info' | 'warn' | 'critical';
282
+ /** Stable kebab-case code. Consumers filter / gate by this. */
283
+ code: string;
284
+ /** Human-readable explanation of what was detected. */
285
+ message: string;
286
+ /** Optional: how to fix — actionable diff or pattern. */
287
+ suggestion?: string;
288
+ /** Optional: link to docs anchor for more context. */
289
+ docsUrl?: string;
290
+ }
216
291
  interface CompileResult {
217
292
  /** Unique handle for this call — pass to record() to correlate the outcome. */
218
293
  handle: string;
@@ -230,6 +305,11 @@ interface CompileResult {
230
305
  mutationsApplied: MutationApplied[];
231
306
  /** Fallback chain — try these in order if target fails. */
232
307
  fallbackChain: string[];
308
+ /**
309
+ * Best-practice advisories emitted by the compiler. Non-fatal. Empty
310
+ * array when no rules fired. alpha.6 Phase 1.
311
+ */
312
+ advisories: BestPracticeAdvisory[];
233
313
  /** Diagnostics for caller-side logging. */
234
314
  diagnostics: {
235
315
  sectionsKept: number;
@@ -240,6 +320,16 @@ interface CompileResult {
240
320
  historyDropped: number;
241
321
  cacheableTokens: number;
242
322
  estimatedCacheSavingsUsd: number;
323
+ /**
324
+ * Tokens in `history` (and `currentTurn` when before the marker) that
325
+ * fall within the cacheable prefix per `historyCachePolicy`. Always
326
+ * computed; only Anthropic actually emits a wire-format marker. For
327
+ * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
328
+ * prefix that implicit caching may pick up — useful telemetry for the
329
+ * brain to learn which (app, model, archetype) tuples benefit most
330
+ * from history caching. alpha.5.
331
+ */
332
+ historyCacheableTokens: number;
243
333
  };
244
334
  }
245
335
  /**
@@ -518,4 +608,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
518
608
  declare function allProfiles(): readonly ModelProfile[];
519
609
  declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
520
610
 
521
- export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
611
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CH_nKPjp.mjs';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
2
2
  import './dialect.mjs';
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CDttLtaD.js';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
2
2
  import './dialect.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.4",
3
+ "version": "2.0.0-alpha.6",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",