@warmdrift/kgauto-compiler 2.0.0-alpha.5 → 2.0.0-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @warmdrift/kgauto-compiler — v2.0.0-alpha.5
1
+ # @warmdrift/kgauto-compiler — v2.0.0-alpha.6
2
2
 
3
3
  > Prompt compiler + central learning brain for multi-model AI apps.
4
4
  > **Swap models without rewriting prompts.**
@@ -18,8 +18,8 @@ mutations.
18
18
  - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
19
19
  the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
20
20
  v1 is fully retired from production.
21
- - **Tests:** 180/180 passing
22
- - **Build:** clean (47KB ESM, 64KB CJS)
21
+ - **Tests:** 201/201 passing
22
+ - **Build:** clean (47KB ESM, 68KB CJS)
23
23
  - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
24
24
  awaiting dedicated Supabase provisioning.
25
25
  - **Mutation engine:** v2.1 (after enough outcome data accumulates).
@@ -154,6 +154,48 @@ The 5 prod empty-responses in tt-intelligence's `gemini-2.5-flash` dashboard
154
154
  calls? v2 catches those automatically — `expectedShortOutput` constraint plus
155
155
  the `force_thinking_budget_zero` cliff guard.
156
156
 
157
+ ## Tools
158
+
159
+ Tools are first-class IR fields. The compiler's tool-relevance pass drops
160
+ tools that don't apply to the current intent before lowering — saves
161
+ context budget on every call.
162
+
163
+ ```ts
164
+ const tools: ToolDefinition[] = [
165
+ {
166
+ name: 'web_search',
167
+ description: 'Search the public web',
168
+ parameters: { type: 'object', properties: { q: { type: 'string' } } },
169
+ relevanceByIntent: {
170
+ ask: 0.9, // primary tool for ask
171
+ hunt: 0.9,
172
+ classify: 0.0, // never useful for classification
173
+ summarize: 0.0,
174
+ extract: 0.1,
175
+ },
176
+ },
177
+ // ...
178
+ ];
179
+ ```
180
+
181
+ Each tool declares per-intent relevance scores 0..1. The pass keeps tools
182
+ where `relevanceByIntent[currentIntent] >= toolRelevanceThreshold` (default
183
+ `0.2`). Missing entries default to neutral (`0.5`) — kept by default. Set
184
+ explicit `0.0` to hard-exclude.
185
+
186
+ Tool definitions eat ~350 tokens of context per tool (L-051), so trimming
187
+ matters: 12 declared tools, only 3 relevant → 9 × 350 = 3150 tokens
188
+ recovered per call.
189
+
190
+ The `tool-bloat` advisory (alpha.6) fires when more than 10 tools survive
191
+ the relevance pass on a short-output archetype (`classify`, `extract`,
192
+ `summarize`, `transform`, `critique`) — those archetypes typically use
193
+ ≤3 tools, so a kept-count >10 indicates either missing `relevanceByIntent`
194
+ or scores set too generously.
195
+
196
+ DeepSeek profiles cap tools to 1 (sequential-only). Other providers
197
+ inherit the count from the IR after the relevance pass.
198
+
157
199
  ## Brain provisioning
158
200
 
159
201
  1. Create a NEW Supabase project (suggested name: `kgauto-brain`)
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-DHdCRBVH.mjs';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-zm6diETo.mjs';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
4
4
 
5
5
  /**
@@ -189,6 +189,41 @@ declare function resetTokenizer(): void;
189
189
  */
190
190
  declare function countTokens(text: string): number;
191
191
 
192
+ /**
193
+ * Best-practice advisor — alpha.6 Phase 1.
194
+ *
195
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
196
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
197
+ * after `lower()` in the compile pipeline; the result lands on
198
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
199
+ *
200
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
201
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
202
+ * s15 empirical seed of brain anti-patterns:
203
+ *
204
+ * 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
205
+ * 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
206
+ * 3. `tool-bloat` >10 tools on a short-output archetype
207
+ * 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
208
+ *
209
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
210
+ * No side effects. No randomness. Deterministic for a given IR.
211
+ *
212
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
213
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
214
+ * misses real waste. They may tune with brain evidence over time; for now
215
+ * they're literals in the rule bodies. Make them configurable when the
216
+ * cost-watcher's R-rules graduate to here.
217
+ */
218
+
219
+ /** Subset of CompileResult fields the advisor needs. */
220
+ type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
221
+ /**
222
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
223
+ * (same as the rule list above) so output is stable across runs.
224
+ */
225
+ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
226
+
192
227
  /**
193
228
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
194
229
  *
@@ -235,4 +270,4 @@ declare function countTokens(text: string): number;
235
270
  */
236
271
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
237
272
 
238
- export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
273
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-MGq5Tnjv.js';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-CQnLkQ7b.js';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
4
4
 
5
5
  /**
@@ -189,6 +189,41 @@ declare function resetTokenizer(): void;
189
189
  */
190
190
  declare function countTokens(text: string): number;
191
191
 
192
+ /**
193
+ * Best-practice advisor — alpha.6 Phase 1.
194
+ *
195
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
196
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
197
+ * after `lower()` in the compile pipeline; the result lands on
198
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
199
+ *
200
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
201
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
202
+ * s15 empirical seed of brain anti-patterns:
203
+ *
204
+ * 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
205
+ * 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
206
+ * 3. `tool-bloat` >10 tools on a short-output archetype
207
+ * 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
208
+ *
209
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
210
+ * No side effects. No randomness. Deterministic for a given IR.
211
+ *
212
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
213
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
214
+ * misses real waste. They may tune with brain evidence over time; for now
215
+ * they're literals in the rule bodies. Make them configurable when the
216
+ * cost-watcher's R-rules graduate to here.
217
+ */
218
+
219
+ /** Subset of CompileResult fields the advisor needs. */
220
+ type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
221
+ /**
222
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
223
+ * (same as the rule list above) so output is stable across runs.
224
+ */
225
+ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
226
+
192
227
  /**
193
228
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
194
229
  *
@@ -235,4 +270,4 @@ declare function countTokens(text: string): number;
235
270
  */
236
271
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
237
272
 
238
- export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
273
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.js CHANGED
@@ -43,6 +43,7 @@ __export(index_exports, {
43
43
  profilesByProvider: () => profilesByProvider,
44
44
  record: () => record,
45
45
  resetTokenizer: () => resetTokenizer,
46
+ runAdvisor: () => runAdvisor,
46
47
  setTokenizer: () => setTokenizer,
47
48
  tryGetProfile: () => tryGetProfile
48
49
  });
@@ -489,7 +490,7 @@ function lower(ir, profile, hints = {}) {
489
490
  }
490
491
  function lowerAnthropic(ir, profile, hints) {
491
492
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
492
- const history = ir.history ?? [];
493
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
493
494
  const policy = ir.historyCachePolicy;
494
495
  const markIndex = resolveHistoryMarkIndex(history.length, policy);
495
496
  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
@@ -634,7 +635,7 @@ function lowerGoogle(ir, profile, hints) {
634
635
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
635
636
  const meetsMin = cacheableTokens >= minTokens;
636
637
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
637
- const history = ir.history ?? [];
638
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
638
639
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
639
640
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
640
641
  return {
@@ -696,7 +697,7 @@ function lowerOpenAI(ir, profile, hints) {
696
697
  content: ir.currentTurn.parts ?? ir.currentTurn.content
697
698
  });
698
699
  }
699
- const history = ir.history ?? [];
700
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
700
701
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
701
702
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
702
703
  return {
@@ -739,7 +740,7 @@ function lowerDeepSeek(ir, profile) {
739
740
  content: ir.currentTurn.parts ?? ir.currentTurn.content
740
741
  });
741
742
  }
742
- const history = ir.history ?? [];
743
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
743
744
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
744
745
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
745
746
  return {
@@ -1185,6 +1186,85 @@ function profilesByProvider(provider) {
1185
1186
  return PROFILES_RAW.filter((p) => p.provider === provider);
1186
1187
  }
1187
1188
 
1189
+ // src/advisor.ts
1190
+ function runAdvisor(ir, result, profile) {
1191
+ const out = [];
1192
+ out.push(...detectCachingOff(ir, profile));
1193
+ out.push(...detectSingleChunkSystem(ir, profile));
1194
+ out.push(...detectToolBloat(ir, result));
1195
+ out.push(...detectHistoryUncached(ir, profile));
1196
+ return out;
1197
+ }
1198
+ function detectCachingOff(ir, profile) {
1199
+ if (profile.provider !== "anthropic") return [];
1200
+ const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
1201
+ if (totalChars < 2e3) return [];
1202
+ const anyCacheable = ir.sections.some((s) => s.cacheable === true);
1203
+ if (anyCacheable) return [];
1204
+ return [
1205
+ {
1206
+ level: "warn",
1207
+ code: "caching-off-on-claude",
1208
+ message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
1209
+ suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
1210
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1211
+ }
1212
+ ];
1213
+ }
1214
+ function detectSingleChunkSystem(ir, profile) {
1215
+ if (profile.provider !== "anthropic") return [];
1216
+ if (ir.sections.length !== 1) return [];
1217
+ const only = ir.sections[0];
1218
+ if (!only || only.text.length <= 1e3) return [];
1219
+ return [
1220
+ {
1221
+ level: "info",
1222
+ code: "single-chunk-system",
1223
+ message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
1224
+ suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
1225
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1226
+ }
1227
+ ];
1228
+ }
1229
+ function detectToolBloat(ir, result) {
1230
+ const SHORT_OUTPUT = /* @__PURE__ */ new Set([
1231
+ "classify",
1232
+ "extract",
1233
+ "summarize",
1234
+ "transform",
1235
+ "critique"
1236
+ ]);
1237
+ if (!ir.tools || ir.tools.length === 0) return [];
1238
+ const toolsKept = result.diagnostics.toolsKept;
1239
+ if (toolsKept <= 10) return [];
1240
+ if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
1241
+ return [
1242
+ {
1243
+ level: "warn",
1244
+ code: "tool-bloat",
1245
+ message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
1246
+ suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
1247
+ docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
1248
+ }
1249
+ ];
1250
+ }
1251
+ function detectHistoryUncached(ir, profile) {
1252
+ if (profile.provider !== "anthropic") return [];
1253
+ if (!ir.history || ir.history.length < 2) return [];
1254
+ if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
1255
+ return [];
1256
+ }
1257
+ return [
1258
+ {
1259
+ level: "warn",
1260
+ code: "history-uncached-on-claude",
1261
+ message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
1262
+ suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
1263
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1264
+ }
1265
+ ];
1266
+ }
1267
+
1188
1268
  // src/compile.ts
1189
1269
  var counter = 0;
1190
1270
  function makeHandle() {
@@ -1235,6 +1315,27 @@ function compile(ir, opts = {}) {
1235
1315
  const handle = makeHandle();
1236
1316
  const finalShape = computeShape(workingIR, inputTokens);
1237
1317
  const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
1318
+ const diagnostics = {
1319
+ sectionsKept: workingIR.sections.length,
1320
+ sectionsDropped: ir.sections.length - workingIR.sections.length,
1321
+ toolsKept: workingIR.tools?.length ?? 0,
1322
+ toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
1323
+ historyKept: workingIR.history?.length ?? 0,
1324
+ historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1325
+ cacheableTokens: lowered.diagnostics.cacheableTokens,
1326
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1327
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
1328
+ };
1329
+ const advisories = runAdvisor(
1330
+ ir,
1331
+ {
1332
+ target: profile.id,
1333
+ provider: profile.provider,
1334
+ tokensIn: inputTokens,
1335
+ diagnostics
1336
+ },
1337
+ profile
1338
+ );
1238
1339
  return {
1239
1340
  handle,
1240
1341
  target: profile.id,
@@ -1244,17 +1345,8 @@ function compile(ir, opts = {}) {
1244
1345
  estimatedCostUsd: target.estimatedCostUsd,
1245
1346
  mutationsApplied: accumulatedMutations,
1246
1347
  fallbackChain,
1247
- diagnostics: {
1248
- sectionsKept: workingIR.sections.length,
1249
- sectionsDropped: ir.sections.length - workingIR.sections.length,
1250
- toolsKept: workingIR.tools?.length ?? 0,
1251
- toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
1252
- historyKept: workingIR.history?.length ?? 0,
1253
- historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1254
- cacheableTokens: lowered.diagnostics.cacheableTokens,
1255
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1256
- historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
1257
- }
1348
+ advisories,
1349
+ diagnostics
1258
1350
  };
1259
1351
  }
1260
1352
  function validateIR(ir) {
@@ -1900,6 +1992,7 @@ function compile2(ir, opts) {
1900
1992
  profilesByProvider,
1901
1993
  record,
1902
1994
  resetTokenizer,
1995
+ runAdvisor,
1903
1996
  setTokenizer,
1904
1997
  tryGetProfile
1905
1998
  });
package/dist/index.mjs CHANGED
@@ -374,7 +374,7 @@ function lower(ir, profile, hints = {}) {
374
374
  }
375
375
  function lowerAnthropic(ir, profile, hints) {
376
376
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
377
- const history = ir.history ?? [];
377
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
378
378
  const policy = ir.historyCachePolicy;
379
379
  const markIndex = resolveHistoryMarkIndex(history.length, policy);
380
380
  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
@@ -519,7 +519,7 @@ function lowerGoogle(ir, profile, hints) {
519
519
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
520
520
  const meetsMin = cacheableTokens >= minTokens;
521
521
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
522
- const history = ir.history ?? [];
522
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
523
523
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
524
524
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
525
525
  return {
@@ -581,7 +581,7 @@ function lowerOpenAI(ir, profile, hints) {
581
581
  content: ir.currentTurn.parts ?? ir.currentTurn.content
582
582
  });
583
583
  }
584
- const history = ir.history ?? [];
584
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
585
585
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
586
586
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
587
587
  return {
@@ -624,7 +624,7 @@ function lowerDeepSeek(ir, profile) {
624
624
  content: ir.currentTurn.parts ?? ir.currentTurn.content
625
625
  });
626
626
  }
627
- const history = ir.history ?? [];
627
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
628
628
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
629
629
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
630
630
  return {
@@ -668,6 +668,85 @@ function setNestedField(obj, path, value) {
668
668
  cursor[parts[parts.length - 1]] = value;
669
669
  }
670
670
 
671
+ // src/advisor.ts
672
+ function runAdvisor(ir, result, profile) {
673
+ const out = [];
674
+ out.push(...detectCachingOff(ir, profile));
675
+ out.push(...detectSingleChunkSystem(ir, profile));
676
+ out.push(...detectToolBloat(ir, result));
677
+ out.push(...detectHistoryUncached(ir, profile));
678
+ return out;
679
+ }
680
+ function detectCachingOff(ir, profile) {
681
+ if (profile.provider !== "anthropic") return [];
682
+ const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
683
+ if (totalChars < 2e3) return [];
684
+ const anyCacheable = ir.sections.some((s) => s.cacheable === true);
685
+ if (anyCacheable) return [];
686
+ return [
687
+ {
688
+ level: "warn",
689
+ code: "caching-off-on-claude",
690
+ message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
691
+ suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
692
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
693
+ }
694
+ ];
695
+ }
696
+ function detectSingleChunkSystem(ir, profile) {
697
+ if (profile.provider !== "anthropic") return [];
698
+ if (ir.sections.length !== 1) return [];
699
+ const only = ir.sections[0];
700
+ if (!only || only.text.length <= 1e3) return [];
701
+ return [
702
+ {
703
+ level: "info",
704
+ code: "single-chunk-system",
705
+ message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
706
+ suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
707
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
708
+ }
709
+ ];
710
+ }
711
+ function detectToolBloat(ir, result) {
712
+ const SHORT_OUTPUT = /* @__PURE__ */ new Set([
713
+ "classify",
714
+ "extract",
715
+ "summarize",
716
+ "transform",
717
+ "critique"
718
+ ]);
719
+ if (!ir.tools || ir.tools.length === 0) return [];
720
+ const toolsKept = result.diagnostics.toolsKept;
721
+ if (toolsKept <= 10) return [];
722
+ if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
723
+ return [
724
+ {
725
+ level: "warn",
726
+ code: "tool-bloat",
727
+ message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
728
+ suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
729
+ docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
730
+ }
731
+ ];
732
+ }
733
+ function detectHistoryUncached(ir, profile) {
734
+ if (profile.provider !== "anthropic") return [];
735
+ if (!ir.history || ir.history.length < 2) return [];
736
+ if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
737
+ return [];
738
+ }
739
+ return [
740
+ {
741
+ level: "warn",
742
+ code: "history-uncached-on-claude",
743
+ message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
744
+ suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
745
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
746
+ }
747
+ ];
748
+ }
749
+
671
750
  // src/compile.ts
672
751
  var counter = 0;
673
752
  function makeHandle() {
@@ -718,6 +797,27 @@ function compile(ir, opts = {}) {
718
797
  const handle = makeHandle();
719
798
  const finalShape = computeShape(workingIR, inputTokens);
720
799
  const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
800
+ const diagnostics = {
801
+ sectionsKept: workingIR.sections.length,
802
+ sectionsDropped: ir.sections.length - workingIR.sections.length,
803
+ toolsKept: workingIR.tools?.length ?? 0,
804
+ toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
805
+ historyKept: workingIR.history?.length ?? 0,
806
+ historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
807
+ cacheableTokens: lowered.diagnostics.cacheableTokens,
808
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
809
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
810
+ };
811
+ const advisories = runAdvisor(
812
+ ir,
813
+ {
814
+ target: profile.id,
815
+ provider: profile.provider,
816
+ tokensIn: inputTokens,
817
+ diagnostics
818
+ },
819
+ profile
820
+ );
721
821
  return {
722
822
  handle,
723
823
  target: profile.id,
@@ -727,17 +827,8 @@ function compile(ir, opts = {}) {
727
827
  estimatedCostUsd: target.estimatedCostUsd,
728
828
  mutationsApplied: accumulatedMutations,
729
829
  fallbackChain,
730
- diagnostics: {
731
- sectionsKept: workingIR.sections.length,
732
- sectionsDropped: ir.sections.length - workingIR.sections.length,
733
- toolsKept: workingIR.tools?.length ?? 0,
734
- toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
735
- historyKept: workingIR.history?.length ?? 0,
736
- historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
737
- cacheableTokens: lowered.diagnostics.cacheableTokens,
738
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
739
- historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
740
- }
830
+ advisories,
831
+ diagnostics
741
832
  };
742
833
  }
743
834
  function validateIR(ir) {
@@ -1382,6 +1473,7 @@ export {
1382
1473
  profilesByProvider,
1383
1474
  record,
1384
1475
  resetTokenizer,
1476
+ runAdvisor,
1385
1477
  setTokenizer,
1386
1478
  tryGetProfile
1387
1479
  };
@@ -253,6 +253,41 @@ type CompiledRequest = {
253
253
  }>;
254
254
  tools?: unknown[];
255
255
  };
256
+ /**
257
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
258
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
259
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
260
+ * and emits one entry per detected gap.
261
+ *
262
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
263
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
264
+ * interfaces/kgauto.md anchor for context.
265
+ *
266
+ * alpha.6 Phase 1 starter rules:
267
+ * - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
268
+ * - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
269
+ * - `tool-bloat` (warn) >10 tools on a short-output archetype
270
+ * - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
271
+ *
272
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
273
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
274
+ */
275
+ interface BestPracticeAdvisory {
276
+ /**
277
+ * Severity. `info` = informational; `warn` = behavioral pattern that's
278
+ * usually expensive or wrong; `critical` = likely bug or production-grade
279
+ * misuse. Phase 1 ships info + warn only.
280
+ */
281
+ level: 'info' | 'warn' | 'critical';
282
+ /** Stable kebab-case code. Consumers filter / gate by this. */
283
+ code: string;
284
+ /** Human-readable explanation of what was detected. */
285
+ message: string;
286
+ /** Optional: how to fix — actionable diff or pattern. */
287
+ suggestion?: string;
288
+ /** Optional: link to docs anchor for more context. */
289
+ docsUrl?: string;
290
+ }
256
291
  interface CompileResult {
257
292
  /** Unique handle for this call — pass to record() to correlate the outcome. */
258
293
  handle: string;
@@ -270,6 +305,11 @@ interface CompileResult {
270
305
  mutationsApplied: MutationApplied[];
271
306
  /** Fallback chain — try these in order if target fails. */
272
307
  fallbackChain: string[];
308
+ /**
309
+ * Best-practice advisories emitted by the compiler. Non-fatal. Empty
310
+ * array when no rules fired. alpha.6 Phase 1.
311
+ */
312
+ advisories: BestPracticeAdvisory[];
273
313
  /** Diagnostics for caller-side logging. */
274
314
  diagnostics: {
275
315
  sectionsKept: number;
@@ -568,4 +608,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
568
608
  declare function allProfiles(): readonly ModelProfile[];
569
609
  declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
570
610
 
571
- export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
611
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
@@ -253,6 +253,41 @@ type CompiledRequest = {
253
253
  }>;
254
254
  tools?: unknown[];
255
255
  };
256
+ /**
257
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
258
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
259
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
260
+ * and emits one entry per detected gap.
261
+ *
262
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
263
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
264
+ * interfaces/kgauto.md anchor for context.
265
+ *
266
+ * alpha.6 Phase 1 starter rules:
267
+ * - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
268
+ * - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
269
+ * - `tool-bloat` (warn) >10 tools on a short-output archetype
270
+ * - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
271
+ *
272
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
273
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
274
+ */
275
+ interface BestPracticeAdvisory {
276
+ /**
277
+ * Severity. `info` = informational; `warn` = behavioral pattern that's
278
+ * usually expensive or wrong; `critical` = likely bug or production-grade
279
+ * misuse. Phase 1 ships info + warn only.
280
+ */
281
+ level: 'info' | 'warn' | 'critical';
282
+ /** Stable kebab-case code. Consumers filter / gate by this. */
283
+ code: string;
284
+ /** Human-readable explanation of what was detected. */
285
+ message: string;
286
+ /** Optional: how to fix — actionable diff or pattern. */
287
+ suggestion?: string;
288
+ /** Optional: link to docs anchor for more context. */
289
+ docsUrl?: string;
290
+ }
256
291
  interface CompileResult {
257
292
  /** Unique handle for this call — pass to record() to correlate the outcome. */
258
293
  handle: string;
@@ -270,6 +305,11 @@ interface CompileResult {
270
305
  mutationsApplied: MutationApplied[];
271
306
  /** Fallback chain — try these in order if target fails. */
272
307
  fallbackChain: string[];
308
+ /**
309
+ * Best-practice advisories emitted by the compiler. Non-fatal. Empty
310
+ * array when no rules fired. alpha.6 Phase 1.
311
+ */
312
+ advisories: BestPracticeAdvisory[];
273
313
  /** Diagnostics for caller-side logging. */
274
314
  diagnostics: {
275
315
  sectionsKept: number;
@@ -568,4 +608,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
568
608
  declare function allProfiles(): readonly ModelProfile[];
569
609
  declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
570
610
 
571
- export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
611
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
2
2
  import './dialect.mjs';
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
2
2
  import './dialect.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.5",
3
+ "version": "2.0.0-alpha.6",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",