@warmdrift/kgauto-compiler 2.0.0-alpha.5 → 2.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @warmdrift/kgauto-compiler — v2.0.0-alpha.5
1
+ # @warmdrift/kgauto-compiler — v2.0.0-alpha.6
2
2
 
3
3
  > Prompt compiler + central learning brain for multi-model AI apps.
4
4
  > **Swap models without rewriting prompts.**
@@ -18,8 +18,8 @@ mutations.
18
18
  - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
19
19
  the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
20
20
  v1 is fully retired from production.
21
- - **Tests:** 180/180 passing
22
- - **Build:** clean (47KB ESM, 64KB CJS)
21
+ - **Tests:** 201/201 passing
22
+ - **Build:** clean (47KB ESM, 68KB CJS)
23
23
  - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
24
24
  awaiting dedicated Supabase provisioning.
25
25
  - **Mutation engine:** v2.1 (after enough outcome data accumulates).
@@ -154,6 +154,48 @@ The 5 prod empty-responses in tt-intelligence's `gemini-2.5-flash` dashboard
154
154
  calls? v2 catches those automatically — `expectedShortOutput` constraint plus
155
155
  the `force_thinking_budget_zero` cliff guard.
156
156
 
157
+ ## Tools
158
+
159
+ Tools are first-class IR fields. The compiler's tool-relevance pass drops
160
+ tools that don't apply to the current intent before lowering — saves
161
+ context budget on every call.
162
+
163
+ ```ts
164
+ const tools: ToolDefinition[] = [
165
+ {
166
+ name: 'web_search',
167
+ description: 'Search the public web',
168
+ parameters: { type: 'object', properties: { q: { type: 'string' } } },
169
+ relevanceByIntent: {
170
+ ask: 0.9, // primary tool for ask
171
+ hunt: 0.9,
172
+ classify: 0.0, // never useful for classification
173
+ summarize: 0.0,
174
+ extract: 0.1,
175
+ },
176
+ },
177
+ // ...
178
+ ];
179
+ ```
180
+
181
+ Each tool declares per-intent relevance scores 0..1. The pass keeps tools
182
+ where `relevanceByIntent[currentIntent] >= toolRelevanceThreshold` (default
183
+ `0.2`). Missing entries default to neutral (`0.5`) — kept by default. Set
184
+ explicit `0.0` to hard-exclude.
185
+
186
+ Tool definitions eat ~350 tokens of context per tool (L-051), so trimming
187
+ matters: 12 declared tools, only 3 relevant → 9 × 350 = 3150 tokens
188
+ recovered per call.
189
+
190
+ The `tool-bloat` advisory (alpha.6) fires when more than 10 tools survive
191
+ the relevance pass on a short-output archetype (`classify`, `extract`,
192
+ `summarize`, `transform`, `critique`) — those archetypes typically use
193
+ ≤3 tools, so a kept-count >10 indicates either missing `relevanceByIntent`
194
+ or scores set too generously.
195
+
196
+ DeepSeek profiles cap tools to 1 (sequential-only). Other providers
197
+ inherit the count from the IR after the relevance pass.
198
+
157
199
  ## Brain provisioning
158
200
 
159
201
  1. Create a NEW Supabase project (suggested name: `kgauto-brain`)
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-DHdCRBVH.mjs';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-Py8c7zjJ.mjs';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
4
4
 
5
5
  /**
@@ -19,6 +19,15 @@ interface CompileOptions {
19
19
  toolRelevanceThreshold?: number;
20
20
  /** History compression — turns count threshold (default 8). */
21
21
  compressHistoryAfter?: number;
22
+ /**
23
+ * History compression — token threshold (alpha.7). When total history
24
+ * tokens exceed this AND there are more recent turns to keep, compress
25
+ * even when count threshold is below `compressHistoryAfter`. Catches
26
+ * fat-message bloat (tool-using agents pack many tool-call/result pairs
27
+ * into single assistant messages — count stays low, tokens explode).
28
+ * Default undefined (disabled — backward-compatible).
29
+ */
30
+ compressHistoryAboveTokens?: number;
22
31
  /**
23
32
  * Consumer-declared policy. Filters blocked models, enforces cost
24
33
  * ceiling, boosts preferred. See CompilePolicy in ir.ts.
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
114
123
  * network error is swallowed/forwarded to onError.
115
124
  */
116
125
  declare function record(input: RecordInput): Promise<void>;
126
+ /**
127
+ * Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
128
+ *
129
+ * Exported so consumer proxies can `import { OutcomePayload } from
130
+ * '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
131
+ * TypeScript catches future schema additions (cache fields, advisory
132
+ * telemetry, etc.) at consumer build time, not silently at runtime.
133
+ *
134
+ * **Forward-compat rule:** consumer proxies should pass the body through to
135
+ * Supabase rather than reconstructing field-by-field. The recommended shape
136
+ * is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
137
+ * .insert(body)` directly). Filtering proxies break schema evolution
138
+ * silently — see s17 root-cause investigation 2026-05-10.
139
+ */
140
+ interface OutcomePayload {
141
+ handle: string;
142
+ app_id?: string;
143
+ intent_archetype?: string;
144
+ /** The model that ACTUALLY RAN (post-fallback). */
145
+ model?: string;
146
+ /** The model v2 compile() originally targeted. NULL when no fallback. */
147
+ requested_model?: string;
148
+ provider?: string;
149
+ shape_key?: string;
150
+ learning_key?: string;
151
+ mutations_applied: string[];
152
+ tokens_in: number;
153
+ tokens_out: number;
154
+ estimated_tokens_in?: number;
155
+ latency_ms: number;
156
+ success: boolean;
157
+ empty_response: boolean;
158
+ error_type?: string;
159
+ tools_called?: string[];
160
+ oracle_score?: number;
161
+ oracle_dimensions?: Record<string, number>;
162
+ oracle_rationale?: string;
163
+ prompt_preview?: string;
164
+ response_preview?: string;
165
+ dialect_version: string;
166
+ cache_read_input_tokens?: number;
167
+ cache_creation_input_tokens?: number;
168
+ cost_usd_actual?: number;
169
+ ttft_ms?: number;
170
+ history_cacheable_tokens?: number;
171
+ history_tokens_at_compile?: number;
172
+ }
117
173
 
118
174
  /**
119
175
  * Oracle contract — how an app tells the brain whether a response was good.
@@ -189,6 +245,41 @@ declare function resetTokenizer(): void;
189
245
  */
190
246
  declare function countTokens(text: string): number;
191
247
 
248
+ /**
249
+ * Best-practice advisor — alpha.6 Phase 1.
250
+ *
251
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
252
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
253
+ * after `lower()` in the compile pipeline; the result lands on
254
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
255
+ *
256
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
257
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
258
+ * s15 empirical seed of brain anti-patterns:
259
+ *
260
+ * 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
261
+ * 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
262
+ * 3. `tool-bloat` >10 tools on a short-output archetype
263
+ * 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
264
+ *
265
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
266
+ * No side effects. No randomness. Deterministic for a given IR.
267
+ *
268
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
269
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
270
+ * misses real waste. They may tune with brain evidence over time; for now
271
+ * they're literals in the rule bodies. Make them configurable when the
272
+ * cost-watcher's R-rules graduate to here.
273
+ */
274
+
275
+ /** Subset of CompileResult fields the advisor needs. */
276
+ type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
277
+ /**
278
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
279
+ * (same as the rule list above) so output is stable across runs.
280
+ */
281
+ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
282
+
192
283
  /**
193
284
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
194
285
  *
@@ -235,4 +326,4 @@ declare function countTokens(text: string): number;
235
326
  */
236
327
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
237
328
 
238
- export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
329
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-MGq5Tnjv.js';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-B3eNQ2py.js';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
3
3
  export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
4
4
 
5
5
  /**
@@ -19,6 +19,15 @@ interface CompileOptions {
19
19
  toolRelevanceThreshold?: number;
20
20
  /** History compression — turns count threshold (default 8). */
21
21
  compressHistoryAfter?: number;
22
+ /**
23
+ * History compression — token threshold (alpha.7). When total history
24
+ * tokens exceed this AND there are more recent turns to keep, compress
25
+ * even when count threshold is below `compressHistoryAfter`. Catches
26
+ * fat-message bloat (tool-using agents pack many tool-call/result pairs
27
+ * into single assistant messages — count stays low, tokens explode).
28
+ * Default undefined (disabled — backward-compatible).
29
+ */
30
+ compressHistoryAboveTokens?: number;
22
31
  /**
23
32
  * Consumer-declared policy. Filters blocked models, enforces cost
24
33
  * ceiling, boosts preferred. See CompilePolicy in ir.ts.
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
114
123
  * network error is swallowed/forwarded to onError.
115
124
  */
116
125
  declare function record(input: RecordInput): Promise<void>;
126
+ /**
127
+ * Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
128
+ *
129
+ * Exported so consumer proxies can `import { OutcomePayload } from
130
+ * '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
131
+ * TypeScript catches future schema additions (cache fields, advisory
132
+ * telemetry, etc.) at consumer build time, not silently at runtime.
133
+ *
134
+ * **Forward-compat rule:** consumer proxies should pass the body through to
135
+ * Supabase rather than reconstructing field-by-field. The recommended shape
136
+ * is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
137
+ * .insert(body)` directly). Filtering proxies break schema evolution
138
+ * silently — see s17 root-cause investigation 2026-05-10.
139
+ */
140
+ interface OutcomePayload {
141
+ handle: string;
142
+ app_id?: string;
143
+ intent_archetype?: string;
144
+ /** The model that ACTUALLY RAN (post-fallback). */
145
+ model?: string;
146
+ /** The model v2 compile() originally targeted. NULL when no fallback. */
147
+ requested_model?: string;
148
+ provider?: string;
149
+ shape_key?: string;
150
+ learning_key?: string;
151
+ mutations_applied: string[];
152
+ tokens_in: number;
153
+ tokens_out: number;
154
+ estimated_tokens_in?: number;
155
+ latency_ms: number;
156
+ success: boolean;
157
+ empty_response: boolean;
158
+ error_type?: string;
159
+ tools_called?: string[];
160
+ oracle_score?: number;
161
+ oracle_dimensions?: Record<string, number>;
162
+ oracle_rationale?: string;
163
+ prompt_preview?: string;
164
+ response_preview?: string;
165
+ dialect_version: string;
166
+ cache_read_input_tokens?: number;
167
+ cache_creation_input_tokens?: number;
168
+ cost_usd_actual?: number;
169
+ ttft_ms?: number;
170
+ history_cacheable_tokens?: number;
171
+ history_tokens_at_compile?: number;
172
+ }
117
173
 
118
174
  /**
119
175
  * Oracle contract — how an app tells the brain whether a response was good.
@@ -189,6 +245,41 @@ declare function resetTokenizer(): void;
189
245
  */
190
246
  declare function countTokens(text: string): number;
191
247
 
248
+ /**
249
+ * Best-practice advisor — alpha.6 Phase 1.
250
+ *
251
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
252
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
253
+ * after `lower()` in the compile pipeline; the result lands on
254
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
255
+ *
256
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
257
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
258
+ * s15 empirical seed of brain anti-patterns:
259
+ *
260
+ * 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
261
+ * 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
262
+ * 3. `tool-bloat` >10 tools on a short-output archetype
263
+ * 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
264
+ *
265
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
266
+ * No side effects. No randomness. Deterministic for a given IR.
267
+ *
268
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
269
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
270
+ * misses real waste. They may tune with brain evidence over time; for now
271
+ * they're literals in the rule bodies. Make them configurable when the
272
+ * cost-watcher's R-rules graduate to here.
273
+ */
274
+
275
+ /** Subset of CompileResult fields the advisor needs. */
276
+ type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
277
+ /**
278
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
279
+ * (same as the rule list above) so output is stable across runs.
280
+ */
281
+ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
282
+
192
283
  /**
193
284
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
194
285
  *
@@ -235,4 +326,4 @@ declare function countTokens(text: string): number;
235
326
  */
236
327
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
237
328
 
238
- export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
329
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.js CHANGED
@@ -43,6 +43,7 @@ __export(index_exports, {
43
43
  profilesByProvider: () => profilesByProvider,
44
44
  record: () => record,
45
45
  resetTokenizer: () => resetTokenizer,
46
+ runAdvisor: () => runAdvisor,
46
47
  setTokenizer: () => setTokenizer,
47
48
  tryGetProfile: () => tryGetProfile
48
49
  });
@@ -235,20 +236,37 @@ function passToolRelevance(ir, opts = {}) {
235
236
  ]
236
237
  };
237
238
  }
239
+ function totalHistoryTokens(history) {
240
+ let total = 0;
241
+ for (const m of history) {
242
+ if (typeof m.content === "string") total += countTokens(m.content);
243
+ }
244
+ return total;
245
+ }
238
246
  function passCompressHistory(ir, opts = {}) {
239
247
  const history = ir.history;
240
- if (!history || history.length === 0) return { value: ir, mutations: [] };
248
+ if (!history || history.length === 0) {
249
+ return { value: ir, mutations: [], historyTokensTotal: 0 };
250
+ }
241
251
  const keepRecent = opts.keepRecent ?? 4;
242
252
  const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
243
- if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
253
+ const summarizeAboveTokens = opts.summarizeAboveTokens;
254
+ const historyTokensTotal = totalHistoryTokens(history);
255
+ const countThresholdHit = history.length > summarizeOlderThan;
256
+ const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
257
+ if (!countThresholdHit && !tokenThresholdHit) {
258
+ return { value: ir, mutations: [], historyTokensTotal };
259
+ }
244
260
  const cutIndex = history.length - keepRecent;
245
261
  const old = history.slice(0, cutIndex);
246
262
  const recent = history.slice(cutIndex);
247
263
  const userTurns = old.filter((m) => m.role === "user");
248
264
  const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
265
+ const oldTokens = totalHistoryTokens(old);
266
+ const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
249
267
  const summary = {
250
268
  role: "system",
251
- content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
269
+ content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
252
270
  };
253
271
  return {
254
272
  value: { ...ir, history: [summary, ...recent] },
@@ -257,9 +275,10 @@ function passCompressHistory(ir, opts = {}) {
257
275
  id: `compress-history-${old.length}`,
258
276
  source: "static_pass",
259
277
  passName: "compress_history",
260
- description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
278
+ description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
261
279
  }
262
- ]
280
+ ],
281
+ historyTokensTotal
263
282
  };
264
283
  }
265
284
  function passApplyCliffs(ir, profile, estimatedInputTokens) {
@@ -489,7 +508,7 @@ function lower(ir, profile, hints = {}) {
489
508
  }
490
509
  function lowerAnthropic(ir, profile, hints) {
491
510
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
492
- const history = ir.history ?? [];
511
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
493
512
  const policy = ir.historyCachePolicy;
494
513
  const markIndex = resolveHistoryMarkIndex(history.length, policy);
495
514
  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
@@ -634,7 +653,7 @@ function lowerGoogle(ir, profile, hints) {
634
653
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
635
654
  const meetsMin = cacheableTokens >= minTokens;
636
655
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
637
- const history = ir.history ?? [];
656
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
638
657
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
639
658
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
640
659
  return {
@@ -696,7 +715,7 @@ function lowerOpenAI(ir, profile, hints) {
696
715
  content: ir.currentTurn.parts ?? ir.currentTurn.content
697
716
  });
698
717
  }
699
- const history = ir.history ?? [];
718
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
700
719
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
701
720
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
702
721
  return {
@@ -739,7 +758,7 @@ function lowerDeepSeek(ir, profile) {
739
758
  content: ir.currentTurn.parts ?? ir.currentTurn.content
740
759
  });
741
760
  }
742
- const history = ir.history ?? [];
761
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
743
762
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
744
763
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
745
764
  return {
@@ -1185,6 +1204,85 @@ function profilesByProvider(provider) {
1185
1204
  return PROFILES_RAW.filter((p) => p.provider === provider);
1186
1205
  }
1187
1206
 
1207
+ // src/advisor.ts
1208
+ function runAdvisor(ir, result, profile) {
1209
+ const out = [];
1210
+ out.push(...detectCachingOff(ir, profile));
1211
+ out.push(...detectSingleChunkSystem(ir, profile));
1212
+ out.push(...detectToolBloat(ir, result));
1213
+ out.push(...detectHistoryUncached(ir, profile));
1214
+ return out;
1215
+ }
1216
+ function detectCachingOff(ir, profile) {
1217
+ if (profile.provider !== "anthropic") return [];
1218
+ const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
1219
+ if (totalChars < 2e3) return [];
1220
+ const anyCacheable = ir.sections.some((s) => s.cacheable === true);
1221
+ if (anyCacheable) return [];
1222
+ return [
1223
+ {
1224
+ level: "warn",
1225
+ code: "caching-off-on-claude",
1226
+ message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
1227
+ suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
1228
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1229
+ }
1230
+ ];
1231
+ }
1232
+ function detectSingleChunkSystem(ir, profile) {
1233
+ if (profile.provider !== "anthropic") return [];
1234
+ if (ir.sections.length !== 1) return [];
1235
+ const only = ir.sections[0];
1236
+ if (!only || only.text.length <= 1e3) return [];
1237
+ return [
1238
+ {
1239
+ level: "info",
1240
+ code: "single-chunk-system",
1241
+ message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
1242
+ suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
1243
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1244
+ }
1245
+ ];
1246
+ }
1247
+ function detectToolBloat(ir, result) {
1248
+ const SHORT_OUTPUT = /* @__PURE__ */ new Set([
1249
+ "classify",
1250
+ "extract",
1251
+ "summarize",
1252
+ "transform",
1253
+ "critique"
1254
+ ]);
1255
+ if (!ir.tools || ir.tools.length === 0) return [];
1256
+ const toolsKept = result.diagnostics.toolsKept;
1257
+ if (toolsKept <= 10) return [];
1258
+ if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
1259
+ return [
1260
+ {
1261
+ level: "warn",
1262
+ code: "tool-bloat",
1263
+ message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
1264
+ suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
1265
+ docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
1266
+ }
1267
+ ];
1268
+ }
1269
+ function detectHistoryUncached(ir, profile) {
1270
+ if (profile.provider !== "anthropic") return [];
1271
+ if (!ir.history || ir.history.length < 2) return [];
1272
+ if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
1273
+ return [];
1274
+ }
1275
+ return [
1276
+ {
1277
+ level: "warn",
1278
+ code: "history-uncached-on-claude",
1279
+ message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
1280
+ suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
1281
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1282
+ }
1283
+ ];
1284
+ }
1285
+
1188
1286
  // src/compile.ts
1189
1287
  var counter = 0;
1190
1288
  function makeHandle() {
@@ -1200,7 +1298,8 @@ function compile(ir, opts = {}) {
1200
1298
  threshold: opts.toolRelevanceThreshold
1201
1299
  });
1202
1300
  const compressed = passCompressHistory(toolFiltered.value, {
1203
- summarizeOlderThan: opts.compressHistoryAfter
1301
+ summarizeOlderThan: opts.compressHistoryAfter,
1302
+ summarizeAboveTokens: opts.compressHistoryAboveTokens
1204
1303
  });
1205
1304
  let workingIR = compressed.value;
1206
1305
  const accumulatedMutations = [
@@ -1235,6 +1334,28 @@ function compile(ir, opts = {}) {
1235
1334
  const handle = makeHandle();
1236
1335
  const finalShape = computeShape(workingIR, inputTokens);
1237
1336
  const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
1337
+ const diagnostics = {
1338
+ sectionsKept: workingIR.sections.length,
1339
+ sectionsDropped: ir.sections.length - workingIR.sections.length,
1340
+ toolsKept: workingIR.tools?.length ?? 0,
1341
+ toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
1342
+ historyKept: workingIR.history?.length ?? 0,
1343
+ historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1344
+ cacheableTokens: lowered.diagnostics.cacheableTokens,
1345
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1346
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
1347
+ historyTokensTotal: compressed.historyTokensTotal
1348
+ };
1349
+ const advisories = runAdvisor(
1350
+ ir,
1351
+ {
1352
+ target: profile.id,
1353
+ provider: profile.provider,
1354
+ tokensIn: inputTokens,
1355
+ diagnostics
1356
+ },
1357
+ profile
1358
+ );
1238
1359
  return {
1239
1360
  handle,
1240
1361
  target: profile.id,
@@ -1244,17 +1365,8 @@ function compile(ir, opts = {}) {
1244
1365
  estimatedCostUsd: target.estimatedCostUsd,
1245
1366
  mutationsApplied: accumulatedMutations,
1246
1367
  fallbackChain,
1247
- diagnostics: {
1248
- sectionsKept: workingIR.sections.length,
1249
- sectionsDropped: ir.sections.length - workingIR.sections.length,
1250
- toolsKept: workingIR.tools?.length ?? 0,
1251
- toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
1252
- historyKept: workingIR.history?.length ?? 0,
1253
- historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1254
- cacheableTokens: lowered.diagnostics.cacheableTokens,
1255
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1256
- historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
1257
- }
1368
+ advisories,
1369
+ diagnostics
1258
1370
  };
1259
1371
  }
1260
1372
  function validateIR(ir) {
@@ -1339,7 +1451,8 @@ function registerCompile(appId, archetype, ir, result) {
1339
1451
  estimatedTokensIn: tokens,
1340
1452
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
1341
1453
  startedAt: Date.now(),
1342
- historyCacheableTokens: result.diagnostics.historyCacheableTokens
1454
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens,
1455
+ historyTokensTotal: result.diagnostics.historyTokensTotal
1343
1456
  });
1344
1457
  }
1345
1458
  async function record(input) {
@@ -1413,7 +1526,8 @@ function buildPayload(input, reg) {
1413
1526
  cache_creation_input_tokens: input.cacheCreationInputTokens,
1414
1527
  cost_usd_actual: costUsdActual,
1415
1528
  ttft_ms: input.ttftMs,
1416
- history_cacheable_tokens: reg?.historyCacheableTokens
1529
+ history_cacheable_tokens: reg?.historyCacheableTokens,
1530
+ history_tokens_at_compile: reg?.historyTokensTotal
1417
1531
  };
1418
1532
  }
1419
1533
  function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1900,6 +2014,7 @@ function compile2(ir, opts) {
1900
2014
  profilesByProvider,
1901
2015
  record,
1902
2016
  resetTokenizer,
2017
+ runAdvisor,
1903
2018
  setTokenizer,
1904
2019
  tryGetProfile
1905
2020
  });
package/dist/index.mjs CHANGED
@@ -120,20 +120,37 @@ function passToolRelevance(ir, opts = {}) {
120
120
  ]
121
121
  };
122
122
  }
123
+ function totalHistoryTokens(history) {
124
+ let total = 0;
125
+ for (const m of history) {
126
+ if (typeof m.content === "string") total += countTokens(m.content);
127
+ }
128
+ return total;
129
+ }
123
130
  function passCompressHistory(ir, opts = {}) {
124
131
  const history = ir.history;
125
- if (!history || history.length === 0) return { value: ir, mutations: [] };
132
+ if (!history || history.length === 0) {
133
+ return { value: ir, mutations: [], historyTokensTotal: 0 };
134
+ }
126
135
  const keepRecent = opts.keepRecent ?? 4;
127
136
  const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
128
- if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
137
+ const summarizeAboveTokens = opts.summarizeAboveTokens;
138
+ const historyTokensTotal = totalHistoryTokens(history);
139
+ const countThresholdHit = history.length > summarizeOlderThan;
140
+ const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
141
+ if (!countThresholdHit && !tokenThresholdHit) {
142
+ return { value: ir, mutations: [], historyTokensTotal };
143
+ }
129
144
  const cutIndex = history.length - keepRecent;
130
145
  const old = history.slice(0, cutIndex);
131
146
  const recent = history.slice(cutIndex);
132
147
  const userTurns = old.filter((m) => m.role === "user");
133
148
  const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
149
+ const oldTokens = totalHistoryTokens(old);
150
+ const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
134
151
  const summary = {
135
152
  role: "system",
136
- content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
153
+ content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
137
154
  };
138
155
  return {
139
156
  value: { ...ir, history: [summary, ...recent] },
@@ -142,9 +159,10 @@ function passCompressHistory(ir, opts = {}) {
142
159
  id: `compress-history-${old.length}`,
143
160
  source: "static_pass",
144
161
  passName: "compress_history",
145
- description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
162
+ description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
146
163
  }
147
- ]
164
+ ],
165
+ historyTokensTotal
148
166
  };
149
167
  }
150
168
  function passApplyCliffs(ir, profile, estimatedInputTokens) {
@@ -374,7 +392,7 @@ function lower(ir, profile, hints = {}) {
374
392
  }
375
393
  function lowerAnthropic(ir, profile, hints) {
376
394
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
377
- const history = ir.history ?? [];
395
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
378
396
  const policy = ir.historyCachePolicy;
379
397
  const markIndex = resolveHistoryMarkIndex(history.length, policy);
380
398
  const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
@@ -519,7 +537,7 @@ function lowerGoogle(ir, profile, hints) {
519
537
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
520
538
  const meetsMin = cacheableTokens >= minTokens;
521
539
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
522
- const history = ir.history ?? [];
540
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
523
541
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
524
542
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
525
543
  return {
@@ -581,7 +599,7 @@ function lowerOpenAI(ir, profile, hints) {
581
599
  content: ir.currentTurn.parts ?? ir.currentTurn.content
582
600
  });
583
601
  }
584
- const history = ir.history ?? [];
602
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
585
603
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
586
604
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
587
605
  return {
@@ -624,7 +642,7 @@ function lowerDeepSeek(ir, profile) {
624
642
  content: ir.currentTurn.parts ?? ir.currentTurn.content
625
643
  });
626
644
  }
627
- const history = ir.history ?? [];
645
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
628
646
  const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
629
647
  const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
630
648
  return {
@@ -668,6 +686,85 @@ function setNestedField(obj, path, value) {
668
686
  cursor[parts[parts.length - 1]] = value;
669
687
  }
670
688
 
689
+ // src/advisor.ts
690
+ function runAdvisor(ir, result, profile) {
691
+ const out = [];
692
+ out.push(...detectCachingOff(ir, profile));
693
+ out.push(...detectSingleChunkSystem(ir, profile));
694
+ out.push(...detectToolBloat(ir, result));
695
+ out.push(...detectHistoryUncached(ir, profile));
696
+ return out;
697
+ }
698
+ function detectCachingOff(ir, profile) {
699
+ if (profile.provider !== "anthropic") return [];
700
+ const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
701
+ if (totalChars < 2e3) return [];
702
+ const anyCacheable = ir.sections.some((s) => s.cacheable === true);
703
+ if (anyCacheable) return [];
704
+ return [
705
+ {
706
+ level: "warn",
707
+ code: "caching-off-on-claude",
708
+ message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
709
+ suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
710
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
711
+ }
712
+ ];
713
+ }
714
+ function detectSingleChunkSystem(ir, profile) {
715
+ if (profile.provider !== "anthropic") return [];
716
+ if (ir.sections.length !== 1) return [];
717
+ const only = ir.sections[0];
718
+ if (!only || only.text.length <= 1e3) return [];
719
+ return [
720
+ {
721
+ level: "info",
722
+ code: "single-chunk-system",
723
+ message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
724
+ suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
725
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
726
+ }
727
+ ];
728
+ }
729
+ function detectToolBloat(ir, result) {
730
+ const SHORT_OUTPUT = /* @__PURE__ */ new Set([
731
+ "classify",
732
+ "extract",
733
+ "summarize",
734
+ "transform",
735
+ "critique"
736
+ ]);
737
+ if (!ir.tools || ir.tools.length === 0) return [];
738
+ const toolsKept = result.diagnostics.toolsKept;
739
+ if (toolsKept <= 10) return [];
740
+ if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
741
+ return [
742
+ {
743
+ level: "warn",
744
+ code: "tool-bloat",
745
+ message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
746
+ suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
747
+ docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
748
+ }
749
+ ];
750
+ }
751
+ function detectHistoryUncached(ir, profile) {
752
+ if (profile.provider !== "anthropic") return [];
753
+ if (!ir.history || ir.history.length < 2) return [];
754
+ if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
755
+ return [];
756
+ }
757
+ return [
758
+ {
759
+ level: "warn",
760
+ code: "history-uncached-on-claude",
761
+ message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
762
+ suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
763
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
764
+ }
765
+ ];
766
+ }
767
+
671
768
  // src/compile.ts
672
769
  var counter = 0;
673
770
  function makeHandle() {
@@ -683,7 +780,8 @@ function compile(ir, opts = {}) {
683
780
  threshold: opts.toolRelevanceThreshold
684
781
  });
685
782
  const compressed = passCompressHistory(toolFiltered.value, {
686
- summarizeOlderThan: opts.compressHistoryAfter
783
+ summarizeOlderThan: opts.compressHistoryAfter,
784
+ summarizeAboveTokens: opts.compressHistoryAboveTokens
687
785
  });
688
786
  let workingIR = compressed.value;
689
787
  const accumulatedMutations = [
@@ -718,6 +816,28 @@ function compile(ir, opts = {}) {
718
816
  const handle = makeHandle();
719
817
  const finalShape = computeShape(workingIR, inputTokens);
720
818
  const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
819
+ const diagnostics = {
820
+ sectionsKept: workingIR.sections.length,
821
+ sectionsDropped: ir.sections.length - workingIR.sections.length,
822
+ toolsKept: workingIR.tools?.length ?? 0,
823
+ toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
824
+ historyKept: workingIR.history?.length ?? 0,
825
+ historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
826
+ cacheableTokens: lowered.diagnostics.cacheableTokens,
827
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
828
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
829
+ historyTokensTotal: compressed.historyTokensTotal
830
+ };
831
+ const advisories = runAdvisor(
832
+ ir,
833
+ {
834
+ target: profile.id,
835
+ provider: profile.provider,
836
+ tokensIn: inputTokens,
837
+ diagnostics
838
+ },
839
+ profile
840
+ );
721
841
  return {
722
842
  handle,
723
843
  target: profile.id,
@@ -727,17 +847,8 @@ function compile(ir, opts = {}) {
727
847
  estimatedCostUsd: target.estimatedCostUsd,
728
848
  mutationsApplied: accumulatedMutations,
729
849
  fallbackChain,
730
- diagnostics: {
731
- sectionsKept: workingIR.sections.length,
732
- sectionsDropped: ir.sections.length - workingIR.sections.length,
733
- toolsKept: workingIR.tools?.length ?? 0,
734
- toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
735
- historyKept: workingIR.history?.length ?? 0,
736
- historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
737
- cacheableTokens: lowered.diagnostics.cacheableTokens,
738
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
739
- historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
740
- }
850
+ advisories,
851
+ diagnostics
741
852
  };
742
853
  }
743
854
  function validateIR(ir) {
@@ -822,7 +933,8 @@ function registerCompile(appId, archetype, ir, result) {
822
933
  estimatedTokensIn: tokens,
823
934
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
824
935
  startedAt: Date.now(),
825
- historyCacheableTokens: result.diagnostics.historyCacheableTokens
936
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens,
937
+ historyTokensTotal: result.diagnostics.historyTokensTotal
826
938
  });
827
939
  }
828
940
  async function record(input) {
@@ -896,7 +1008,8 @@ function buildPayload(input, reg) {
896
1008
  cache_creation_input_tokens: input.cacheCreationInputTokens,
897
1009
  cost_usd_actual: costUsdActual,
898
1010
  ttft_ms: input.ttftMs,
899
- history_cacheable_tokens: reg?.historyCacheableTokens
1011
+ history_cacheable_tokens: reg?.historyCacheableTokens,
1012
+ history_tokens_at_compile: reg?.historyTokensTotal
900
1013
  };
901
1014
  }
902
1015
  function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1382,6 +1495,7 @@ export {
1382
1495
  profilesByProvider,
1383
1496
  record,
1384
1497
  resetTokenizer,
1498
+ runAdvisor,
1385
1499
  setTokenizer,
1386
1500
  tryGetProfile
1387
1501
  };
@@ -253,6 +253,41 @@ type CompiledRequest = {
253
253
  }>;
254
254
  tools?: unknown[];
255
255
  };
256
+ /**
257
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
258
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
259
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
260
+ * and emits one entry per detected gap.
261
+ *
262
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
263
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
264
+ * interfaces/kgauto.md anchor for context.
265
+ *
266
+ * alpha.6 Phase 1 starter rules:
267
+ * - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
268
+ * - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
269
+ * - `tool-bloat` (warn) >10 tools on a short-output archetype
270
+ * - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
271
+ *
272
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
273
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
274
+ */
275
+ interface BestPracticeAdvisory {
276
+ /**
277
+ * Severity. `info` = informational; `warn` = behavioral pattern that's
278
+ * usually expensive or wrong; `critical` = likely bug or production-grade
279
+ * misuse. Phase 1 ships info + warn only.
280
+ */
281
+ level: 'info' | 'warn' | 'critical';
282
+ /** Stable kebab-case code. Consumers filter / gate by this. */
283
+ code: string;
284
+ /** Human-readable explanation of what was detected. */
285
+ message: string;
286
+ /** Optional: how to fix — actionable diff or pattern. */
287
+ suggestion?: string;
288
+ /** Optional: link to docs anchor for more context. */
289
+ docsUrl?: string;
290
+ }
256
291
  interface CompileResult {
257
292
  /** Unique handle for this call — pass to record() to correlate the outcome. */
258
293
  handle: string;
@@ -270,6 +305,11 @@ interface CompileResult {
270
305
  mutationsApplied: MutationApplied[];
271
306
  /** Fallback chain — try these in order if target fails. */
272
307
  fallbackChain: string[];
308
+ /**
309
+ * Best-practice advisories emitted by the compiler. Non-fatal. Empty
310
+ * array when no rules fired. alpha.6 Phase 1.
311
+ */
312
+ advisories: BestPracticeAdvisory[];
273
313
  /** Diagnostics for caller-side logging. */
274
314
  diagnostics: {
275
315
  sectionsKept: number;
@@ -290,6 +330,14 @@ interface CompileResult {
290
330
  * from history caching. alpha.5.
291
331
  */
292
332
  historyCacheableTokens: number;
333
+ /**
334
+ * Total tokens in input `history` (pre-compression). Computed regardless
335
+ * of whether `passCompressHistory` fired — surfaces how close a tuple is
336
+ * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
337
+ * watchers can see the bloat axis the count-based threshold misses.
338
+ * 0 when history is empty. alpha.7.
339
+ */
340
+ historyTokensTotal: number;
293
341
  };
294
342
  }
295
343
  /**
@@ -568,4 +616,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
568
616
  declare function allProfiles(): readonly ModelProfile[];
569
617
  declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
570
618
 
571
- export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
619
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
@@ -253,6 +253,41 @@ type CompiledRequest = {
253
253
  }>;
254
254
  tools?: unknown[];
255
255
  };
256
+ /**
257
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
258
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
259
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
260
+ * and emits one entry per detected gap.
261
+ *
262
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
263
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
264
+ * interfaces/kgauto.md anchor for context.
265
+ *
266
+ * alpha.6 Phase 1 starter rules:
267
+ * - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
268
+ * - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
269
+ * - `tool-bloat` (warn) >10 tools on a short-output archetype
270
+ * - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
271
+ *
272
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
273
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
274
+ */
275
+ interface BestPracticeAdvisory {
276
+ /**
277
+ * Severity. `info` = informational; `warn` = behavioral pattern that's
278
+ * usually expensive or wrong; `critical` = likely bug or production-grade
279
+ * misuse. Phase 1 ships info + warn only.
280
+ */
281
+ level: 'info' | 'warn' | 'critical';
282
+ /** Stable kebab-case code. Consumers filter / gate by this. */
283
+ code: string;
284
+ /** Human-readable explanation of what was detected. */
285
+ message: string;
286
+ /** Optional: how to fix — actionable diff or pattern. */
287
+ suggestion?: string;
288
+ /** Optional: link to docs anchor for more context. */
289
+ docsUrl?: string;
290
+ }
256
291
  interface CompileResult {
257
292
  /** Unique handle for this call — pass to record() to correlate the outcome. */
258
293
  handle: string;
@@ -270,6 +305,11 @@ interface CompileResult {
270
305
  mutationsApplied: MutationApplied[];
271
306
  /** Fallback chain — try these in order if target fails. */
272
307
  fallbackChain: string[];
308
+ /**
309
+ * Best-practice advisories emitted by the compiler. Non-fatal. Empty
310
+ * array when no rules fired. alpha.6 Phase 1.
311
+ */
312
+ advisories: BestPracticeAdvisory[];
273
313
  /** Diagnostics for caller-side logging. */
274
314
  diagnostics: {
275
315
  sectionsKept: number;
@@ -290,6 +330,14 @@ interface CompileResult {
290
330
  * from history caching. alpha.5.
291
331
  */
292
332
  historyCacheableTokens: number;
333
+ /**
334
+ * Total tokens in input `history` (pre-compression). Computed regardless
335
+ * of whether `passCompressHistory` fired — surfaces how close a tuple is
336
+ * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
337
+ * watchers can see the bloat axis the count-based threshold misses.
338
+ * 0 when history is empty. alpha.7.
339
+ */
340
+ historyTokensTotal: number;
293
341
  };
294
342
  }
295
343
  /**
@@ -568,4 +616,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
568
616
  declare function allProfiles(): readonly ModelProfile[];
569
617
  declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
570
618
 
571
- export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
619
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
2
2
  import './dialect.mjs';
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
2
2
  import './dialect.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warmdrift/kgauto-compiler",
3
- "version": "2.0.0-alpha.5",
3
+ "version": "2.0.0-alpha.7",
4
4
  "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",