npm - @warmdrift/kgauto-compiler - Versions diffs - 2.0.0-alpha.5 → 2.0.0-alpha.7 - Mend

@warmdrift/kgauto-compiler 2.0.0-alpha.5 → 2.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +45 -3
package/dist/index.d.mts +94 -3
package/dist/index.d.ts +94 -3
package/dist/index.js +138 -23
package/dist/index.mjs +137 -23
package/dist/{profiles-MGq5Tnjv.d.ts → profiles-B3eNQ2py.d.ts} +49 -1
package/dist/{profiles-DHdCRBVH.d.mts → profiles-Py8c7zjJ.d.mts} +49 -1
package/dist/profiles.d.mts +1 -1
package/dist/profiles.d.ts +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# @warmdrift/kgauto-compiler — v2.0.0-alpha.5
+# @warmdrift/kgauto-compiler — v2.0.0-alpha.6
 > Prompt compiler + central learning brain for multi-model AI apps.
 > **Swap models without rewriting prompts.**
@@ -18,8 +18,8 @@ mutations.
 - **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
   the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
   v1 is fully retired from production.
-- **Tests:** 180/180 passing
-- **Build:** clean (47KB ESM, 64KB CJS)
+- **Tests:** 201/201 passing
+- **Build:** clean (47KB ESM, 68KB CJS)
 - **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
   awaiting dedicated Supabase provisioning.
 - **Mutation engine:** v2.1 (after enough outcome data accumulates).
@@ -154,6 +154,48 @@ The 5 prod empty-responses in tt-intelligence's `gemini-2.5-flash` dashboard
 calls? v2 catches those automatically — `expectedShortOutput` constraint plus
 the `force_thinking_budget_zero` cliff guard.
+## Tools
+Tools are first-class IR fields. The compiler's tool-relevance pass drops
+tools that don't apply to the current intent before lowering — saves
+context budget on every call.
+```ts
+const tools: ToolDefinition[] = [
+  {
+    name: 'web_search',
+    description: 'Search the public web',
+    parameters: { type: 'object', properties: { q: { type: 'string' } } },
+    relevanceByIntent: {
+      ask: 0.9,        // primary tool for ask
+      hunt: 0.9,
+      classify: 0.0,   // never useful for classification
+      summarize: 0.0,
+      extract: 0.1,
+    },
+  },
+  // ...
+];
+```
+Each tool declares per-intent relevance scores 0..1. The pass keeps tools
+where `relevanceByIntent[currentIntent] >= toolRelevanceThreshold` (default
+`0.2`). Missing entries default to neutral (`0.5`) — kept by default. Set
+explicit `0.0` to hard-exclude.
+Tool definitions eat ~350 tokens of context per tool (L-051), so trimming
+matters: 12 declared tools, only 3 relevant → 9 × 350 = 3150 tokens
+recovered per call.
+The `tool-bloat` advisory (alpha.6) fires when more than 10 tools survive
+the relevance pass on a short-output archetype (`classify`, `extract`,
+`summarize`, `transform`, `critique`) — those archetypes typically use
+≤3 tools, so a kept-count >10 indicates either missing `relevanceByIntent`
+or scores set too generously.
+DeepSeek profiles cap tools to 1 (sequential-only). Other providers
+inherit the count from the IR after the relevance pass.
 ## Brain provisioning
 1. Create a NEW Supabase project (suggested name: `kgauto-brain`)

package/dist/index.d.mts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-DHdCRBVH.mjs';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-Py8c7zjJ.mjs';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
 /**
@@ -19,6 +19,15 @@ interface CompileOptions {
     toolRelevanceThreshold?: number;
     /** History compression — turns count threshold (default 8). */
     compressHistoryAfter?: number;
+    /**
+     * History compression — token threshold (alpha.7). When total history
+     * tokens exceed this AND there are more recent turns to keep, compress
+     * even when count threshold is below `compressHistoryAfter`. Catches
+     * fat-message bloat (tool-using agents pack many tool-call/result pairs
+     * into single assistant messages — count stays low, tokens explode).
+     * Default undefined (disabled — backward-compatible).
+     */
+    compressHistoryAboveTokens?: number;
     /**
      * Consumer-declared policy. Filters blocked models, enforces cost
      * ceiling, boosts preferred. See CompilePolicy in ir.ts.
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
  * network error is swallowed/forwarded to onError.
  */
 declare function record(input: RecordInput): Promise<void>;
+/**
+ * Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
+ *
+ * Exported so consumer proxies can `import { OutcomePayload } from
+ * '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
+ * TypeScript catches future schema additions (cache fields, advisory
+ * telemetry, etc.) at consumer build time, not silently at runtime.
+ *
+ * **Forward-compat rule:** consumer proxies should pass the body through to
+ * Supabase rather than reconstructing field-by-field. The recommended shape
+ * is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
+ * .insert(body)` directly). Filtering proxies break schema evolution
+ * silently — see s17 root-cause investigation 2026-05-10.
+ */
+interface OutcomePayload {
+    handle: string;
+    app_id?: string;
+    intent_archetype?: string;
+    /** The model that ACTUALLY RAN (post-fallback). */
+    model?: string;
+    /** The model v2 compile() originally targeted. NULL when no fallback. */
+    requested_model?: string;
+    provider?: string;
+    shape_key?: string;
+    learning_key?: string;
+    mutations_applied: string[];
+    tokens_in: number;
+    tokens_out: number;
+    estimated_tokens_in?: number;
+    latency_ms: number;
+    success: boolean;
+    empty_response: boolean;
+    error_type?: string;
+    tools_called?: string[];
+    oracle_score?: number;
+    oracle_dimensions?: Record<string, number>;
+    oracle_rationale?: string;
+    prompt_preview?: string;
+    response_preview?: string;
+    dialect_version: string;
+    cache_read_input_tokens?: number;
+    cache_creation_input_tokens?: number;
+    cost_usd_actual?: number;
+    ttft_ms?: number;
+    history_cacheable_tokens?: number;
+    history_tokens_at_compile?: number;
+}
 /**
  * Oracle contract — how an app tells the brain whether a response was good.
@@ -189,6 +245,41 @@ declare function resetTokenizer(): void;
  */
 declare function countTokens(text: string): number;
+/**
+ * Best-practice advisor — alpha.6 Phase 1.
+ *
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
+ * after `lower()` in the compile pipeline; the result lands on
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
+ *
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
+ * s15 empirical seed of brain anti-patterns:
+ *
+ *   1. `caching-off-on-claude`        system >2000 chars on Anthropic, no cacheable=true
+ *   2. `single-chunk-system`          Anthropic, only one PromptSection >1000 chars
+ *   3. `tool-bloat`                   >10 tools on a short-output archetype
+ *   4. `history-uncached-on-claude`   Anthropic, ≥2 history messages, no historyCachePolicy
+ *
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
+ * No side effects. No randomness. Deterministic for a given IR.
+ *
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
+ * misses real waste. They may tune with brain evidence over time; for now
+ * they're literals in the rule bodies. Make them configurable when the
+ * cost-watcher's R-rules graduate to here.
+ */
+/** Subset of CompileResult fields the advisor needs. */
+type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
+/**
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
+ * (same as the rule list above) so output is stable across runs.
+ */
+declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
 /**
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
  *
@@ -235,4 +326,4 @@ declare function countTokens(text: string): number;
  */
 declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
-export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
+export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-MGq5Tnjv.js';
-export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
+import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-B3eNQ2py.js';
+export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
 export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
 /**
@@ -19,6 +19,15 @@ interface CompileOptions {
     toolRelevanceThreshold?: number;
     /** History compression — turns count threshold (default 8). */
     compressHistoryAfter?: number;
+    /**
+     * History compression — token threshold (alpha.7). When total history
+     * tokens exceed this AND there are more recent turns to keep, compress
+     * even when count threshold is below `compressHistoryAfter`. Catches
+     * fat-message bloat (tool-using agents pack many tool-call/result pairs
+     * into single assistant messages — count stays low, tokens explode).
+     * Default undefined (disabled — backward-compatible).
+     */
+    compressHistoryAboveTokens?: number;
     /**
      * Consumer-declared policy. Filters blocked models, enforces cost
      * ceiling, boosts preferred. See CompilePolicy in ir.ts.
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
  * network error is swallowed/forwarded to onError.
  */
 declare function record(input: RecordInput): Promise<void>;
+/**
+ * Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
+ *
+ * Exported so consumer proxies can `import { OutcomePayload } from
+ * '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
+ * TypeScript catches future schema additions (cache fields, advisory
+ * telemetry, etc.) at consumer build time, not silently at runtime.
+ *
+ * **Forward-compat rule:** consumer proxies should pass the body through to
+ * Supabase rather than reconstructing field-by-field. The recommended shape
+ * is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
+ * .insert(body)` directly). Filtering proxies break schema evolution
+ * silently — see s17 root-cause investigation 2026-05-10.
+ */
+interface OutcomePayload {
+    handle: string;
+    app_id?: string;
+    intent_archetype?: string;
+    /** The model that ACTUALLY RAN (post-fallback). */
+    model?: string;
+    /** The model v2 compile() originally targeted. NULL when no fallback. */
+    requested_model?: string;
+    provider?: string;
+    shape_key?: string;
+    learning_key?: string;
+    mutations_applied: string[];
+    tokens_in: number;
+    tokens_out: number;
+    estimated_tokens_in?: number;
+    latency_ms: number;
+    success: boolean;
+    empty_response: boolean;
+    error_type?: string;
+    tools_called?: string[];
+    oracle_score?: number;
+    oracle_dimensions?: Record<string, number>;
+    oracle_rationale?: string;
+    prompt_preview?: string;
+    response_preview?: string;
+    dialect_version: string;
+    cache_read_input_tokens?: number;
+    cache_creation_input_tokens?: number;
+    cost_usd_actual?: number;
+    ttft_ms?: number;
+    history_cacheable_tokens?: number;
+    history_tokens_at_compile?: number;
+}
 /**
  * Oracle contract — how an app tells the brain whether a response was good.
@@ -189,6 +245,41 @@ declare function resetTokenizer(): void;
  */
 declare function countTokens(text: string): number;
+/**
+ * Best-practice advisor — alpha.6 Phase 1.
+ *
+ * Inspects an IR + the selected profile + compile diagnostics and emits a
+ * list of `BestPracticeAdvisory` entries describing detected gaps. Runs
+ * after `lower()` in the compile pipeline; the result lands on
+ * `CompileResult.advisories` for the consumer to log, surface, or filter.
+ *
+ * Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
+ * Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
+ * s15 empirical seed of brain anti-patterns:
+ *
+ *   1. `caching-off-on-claude`        system >2000 chars on Anthropic, no cacheable=true
+ *   2. `single-chunk-system`          Anthropic, only one PromptSection >1000 chars
+ *   3. `tool-bloat`                   >10 tools on a short-output archetype
+ *   4. `history-uncached-on-claude`   Anthropic, ≥2 history messages, no historyCachePolicy
+ *
+ * Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
+ * No side effects. No randomness. Deterministic for a given IR.
+ *
+ * The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
+ * to balance noise vs. signal — too low fires on innocuous calls, too high
+ * misses real waste. They may tune with brain evidence over time; for now
+ * they're literals in the rule bodies. Make them configurable when the
+ * cost-watcher's R-rules graduate to here.
+ */
+/** Subset of CompileResult fields the advisor needs. */
+type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
+/**
+ * Run all Phase 1 rules and return collected advisories. Order is fixed
+ * (same as the rule list above) so output is stable across runs.
+ */
+declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
 /**
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
  *
@@ -235,4 +326,4 @@ declare function countTokens(text: string): number;
  */
 declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
-export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
+export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };

package/dist/index.js CHANGED Viewed

@@ -43,6 +43,7 @@ __export(index_exports, {
   profilesByProvider: () => profilesByProvider,
   record: () => record,
   resetTokenizer: () => resetTokenizer,
+  runAdvisor: () => runAdvisor,
   setTokenizer: () => setTokenizer,
   tryGetProfile: () => tryGetProfile
 });
@@ -235,20 +236,37 @@ function passToolRelevance(ir, opts = {}) {
     ]
   };
 }
+function totalHistoryTokens(history) {
+  let total = 0;
+  for (const m of history) {
+    if (typeof m.content === "string") total += countTokens(m.content);
+  }
+  return total;
+}
 function passCompressHistory(ir, opts = {}) {
   const history = ir.history;
-  if (!history || history.length === 0) return { value: ir, mutations: [] };
+  if (!history || history.length === 0) {
+    return { value: ir, mutations: [], historyTokensTotal: 0 };
+  }
   const keepRecent = opts.keepRecent ?? 4;
   const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
-  if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
+  const summarizeAboveTokens = opts.summarizeAboveTokens;
+  const historyTokensTotal = totalHistoryTokens(history);
+  const countThresholdHit = history.length > summarizeOlderThan;
+  const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
+  if (!countThresholdHit && !tokenThresholdHit) {
+    return { value: ir, mutations: [], historyTokensTotal };
+  }
   const cutIndex = history.length - keepRecent;
   const old = history.slice(0, cutIndex);
   const recent = history.slice(cutIndex);
   const userTurns = old.filter((m) => m.role === "user");
   const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
+  const oldTokens = totalHistoryTokens(old);
+  const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
   const summary = {
     role: "system",
-    content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
+    content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
   };
   return {
     value: { ...ir, history: [summary, ...recent] },
@@ -257,9 +275,10 @@ function passCompressHistory(ir, opts = {}) {
         id: `compress-history-${old.length}`,
         source: "static_pass",
         passName: "compress_history",
-        description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
+        description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
       }
-    ]
+    ],
+    historyTokensTotal
   };
 }
 function passApplyCliffs(ir, profile, estimatedInputTokens) {
@@ -489,7 +508,7 @@ function lower(ir, profile, hints = {}) {
 }
 function lowerAnthropic(ir, profile, hints) {
   const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
-  const history = ir.history ?? [];
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
   const policy = ir.historyCachePolicy;
   const markIndex = resolveHistoryMarkIndex(history.length, policy);
   const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
@@ -634,7 +653,7 @@ function lowerGoogle(ir, profile, hints) {
   const minTokens = profile.lowering.cache.minTokens ?? 4096;
   const meetsMin = cacheableTokens >= minTokens;
   const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
-  const history = ir.history ?? [];
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
   const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
   const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
@@ -696,7 +715,7 @@ function lowerOpenAI(ir, profile, hints) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
-  const history = ir.history ?? [];
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
   const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
   const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
@@ -739,7 +758,7 @@ function lowerDeepSeek(ir, profile) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
-  const history = ir.history ?? [];
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
   const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
   const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
@@ -1185,6 +1204,85 @@ function profilesByProvider(provider) {
   return PROFILES_RAW.filter((p) => p.provider === provider);
 }
+// src/advisor.ts
+function runAdvisor(ir, result, profile) {
+  const out = [];
+  out.push(...detectCachingOff(ir, profile));
+  out.push(...detectSingleChunkSystem(ir, profile));
+  out.push(...detectToolBloat(ir, result));
+  out.push(...detectHistoryUncached(ir, profile));
+  return out;
+}
+function detectCachingOff(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
+  if (totalChars < 2e3) return [];
+  const anyCacheable = ir.sections.some((s) => s.cacheable === true);
+  if (anyCacheable) return [];
+  return [
+    {
+      level: "warn",
+      code: "caching-off-on-claude",
+      message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
+      suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
+function detectSingleChunkSystem(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  if (ir.sections.length !== 1) return [];
+  const only = ir.sections[0];
+  if (!only || only.text.length <= 1e3) return [];
+  return [
+    {
+      level: "info",
+      code: "single-chunk-system",
+      message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
+      suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
+function detectToolBloat(ir, result) {
+  const SHORT_OUTPUT = /* @__PURE__ */ new Set([
+    "classify",
+    "extract",
+    "summarize",
+    "transform",
+    "critique"
+  ]);
+  if (!ir.tools || ir.tools.length === 0) return [];
+  const toolsKept = result.diagnostics.toolsKept;
+  if (toolsKept <= 10) return [];
+  if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
+  return [
+    {
+      level: "warn",
+      code: "tool-bloat",
+      message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
+      suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
+      docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
+    }
+  ];
+}
+function detectHistoryUncached(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  if (!ir.history || ir.history.length < 2) return [];
+  if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
+    return [];
+  }
+  return [
+    {
+      level: "warn",
+      code: "history-uncached-on-claude",
+      message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
+      suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
 // src/compile.ts
 var counter = 0;
 function makeHandle() {
@@ -1200,7 +1298,8 @@ function compile(ir, opts = {}) {
     threshold: opts.toolRelevanceThreshold
   });
   const compressed = passCompressHistory(toolFiltered.value, {
-    summarizeOlderThan: opts.compressHistoryAfter
+    summarizeOlderThan: opts.compressHistoryAfter,
+    summarizeAboveTokens: opts.compressHistoryAboveTokens
   });
   let workingIR = compressed.value;
   const accumulatedMutations = [
@@ -1235,6 +1334,28 @@ function compile(ir, opts = {}) {
   const handle = makeHandle();
   const finalShape = computeShape(workingIR, inputTokens);
   const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
+  const diagnostics = {
+    sectionsKept: workingIR.sections.length,
+    sectionsDropped: ir.sections.length - workingIR.sections.length,
+    toolsKept: workingIR.tools?.length ?? 0,
+    toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
+    historyKept: workingIR.history?.length ?? 0,
+    historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
+    cacheableTokens: lowered.diagnostics.cacheableTokens,
+    estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
+    historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
+    historyTokensTotal: compressed.historyTokensTotal
+  };
+  const advisories = runAdvisor(
+    ir,
+    {
+      target: profile.id,
+      provider: profile.provider,
+      tokensIn: inputTokens,
+      diagnostics
+    },
+    profile
+  );
   return {
     handle,
     target: profile.id,
@@ -1244,17 +1365,8 @@ function compile(ir, opts = {}) {
     estimatedCostUsd: target.estimatedCostUsd,
     mutationsApplied: accumulatedMutations,
     fallbackChain,
-    diagnostics: {
-      sectionsKept: workingIR.sections.length,
-      sectionsDropped: ir.sections.length - workingIR.sections.length,
-      toolsKept: workingIR.tools?.length ?? 0,
-      toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
-      historyKept: workingIR.history?.length ?? 0,
-      historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
-      cacheableTokens: lowered.diagnostics.cacheableTokens,
-      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
-      historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
-    }
+    advisories,
+    diagnostics
   };
 }
 function validateIR(ir) {
@@ -1339,7 +1451,8 @@ function registerCompile(appId, archetype, ir, result) {
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
     startedAt: Date.now(),
-    historyCacheableTokens: result.diagnostics.historyCacheableTokens
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens,
+    historyTokensTotal: result.diagnostics.historyTokensTotal
   });
 }
 async function record(input) {
@@ -1413,7 +1526,8 @@ function buildPayload(input, reg) {
     cache_creation_input_tokens: input.cacheCreationInputTokens,
     cost_usd_actual: costUsdActual,
     ttft_ms: input.ttftMs,
-    history_cacheable_tokens: reg?.historyCacheableTokens
+    history_cacheable_tokens: reg?.historyCacheableTokens,
+    history_tokens_at_compile: reg?.historyTokensTotal
   };
 }
 function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1900,6 +2014,7 @@ function compile2(ir, opts) {
   profilesByProvider,
   record,
   resetTokenizer,
+  runAdvisor,
   setTokenizer,
   tryGetProfile
 });

package/dist/index.mjs CHANGED Viewed

@@ -120,20 +120,37 @@ function passToolRelevance(ir, opts = {}) {
     ]
   };
 }
+function totalHistoryTokens(history) {
+  let total = 0;
+  for (const m of history) {
+    if (typeof m.content === "string") total += countTokens(m.content);
+  }
+  return total;
+}
 function passCompressHistory(ir, opts = {}) {
   const history = ir.history;
-  if (!history || history.length === 0) return { value: ir, mutations: [] };
+  if (!history || history.length === 0) {
+    return { value: ir, mutations: [], historyTokensTotal: 0 };
+  }
   const keepRecent = opts.keepRecent ?? 4;
   const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
-  if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
+  const summarizeAboveTokens = opts.summarizeAboveTokens;
+  const historyTokensTotal = totalHistoryTokens(history);
+  const countThresholdHit = history.length > summarizeOlderThan;
+  const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
+  if (!countThresholdHit && !tokenThresholdHit) {
+    return { value: ir, mutations: [], historyTokensTotal };
+  }
   const cutIndex = history.length - keepRecent;
   const old = history.slice(0, cutIndex);
   const recent = history.slice(cutIndex);
   const userTurns = old.filter((m) => m.role === "user");
   const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
+  const oldTokens = totalHistoryTokens(old);
+  const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
   const summary = {
     role: "system",
-    content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
+    content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
   };
   return {
     value: { ...ir, history: [summary, ...recent] },
@@ -142,9 +159,10 @@ function passCompressHistory(ir, opts = {}) {
         id: `compress-history-${old.length}`,
         source: "static_pass",
         passName: "compress_history",
-        description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
+        description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
       }
-    ]
+    ],
+    historyTokensTotal
   };
 }
 function passApplyCliffs(ir, profile, estimatedInputTokens) {
@@ -374,7 +392,7 @@ function lower(ir, profile, hints = {}) {
 }
 function lowerAnthropic(ir, profile, hints) {
   const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
-  const history = ir.history ?? [];
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
   const policy = ir.historyCachePolicy;
   const markIndex = resolveHistoryMarkIndex(history.length, policy);
   const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
@@ -519,7 +537,7 @@ function lowerGoogle(ir, profile, hints) {
   const minTokens = profile.lowering.cache.minTokens ?? 4096;
   const meetsMin = cacheableTokens >= minTokens;
   const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
-  const history = ir.history ?? [];
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
   const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
   const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
@@ -581,7 +599,7 @@ function lowerOpenAI(ir, profile, hints) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
-  const history = ir.history ?? [];
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
   const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
   const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
@@ -624,7 +642,7 @@ function lowerDeepSeek(ir, profile) {
       content: ir.currentTurn.parts ?? ir.currentTurn.content
     });
   }
-  const history = ir.history ?? [];
+  const history = (ir.history ?? []).filter((m) => m.role !== "system");
   const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
   const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
   return {
@@ -668,6 +686,85 @@ function setNestedField(obj, path, value) {
   cursor[parts[parts.length - 1]] = value;
 }
+// src/advisor.ts
+function runAdvisor(ir, result, profile) {
+  const out = [];
+  out.push(...detectCachingOff(ir, profile));
+  out.push(...detectSingleChunkSystem(ir, profile));
+  out.push(...detectToolBloat(ir, result));
+  out.push(...detectHistoryUncached(ir, profile));
+  return out;
+}
+function detectCachingOff(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
+  if (totalChars < 2e3) return [];
+  const anyCacheable = ir.sections.some((s) => s.cacheable === true);
+  if (anyCacheable) return [];
+  return [
+    {
+      level: "warn",
+      code: "caching-off-on-claude",
+      message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
+      suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
+function detectSingleChunkSystem(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  if (ir.sections.length !== 1) return [];
+  const only = ir.sections[0];
+  if (!only || only.text.length <= 1e3) return [];
+  return [
+    {
+      level: "info",
+      code: "single-chunk-system",
+      message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
+      suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
+function detectToolBloat(ir, result) {
+  const SHORT_OUTPUT = /* @__PURE__ */ new Set([
+    "classify",
+    "extract",
+    "summarize",
+    "transform",
+    "critique"
+  ]);
+  if (!ir.tools || ir.tools.length === 0) return [];
+  const toolsKept = result.diagnostics.toolsKept;
+  if (toolsKept <= 10) return [];
+  if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
+  return [
+    {
+      level: "warn",
+      code: "tool-bloat",
+      message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
+      suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
+      docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
+    }
+  ];
+}
+function detectHistoryUncached(ir, profile) {
+  if (profile.provider !== "anthropic") return [];
+  if (!ir.history || ir.history.length < 2) return [];
+  if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
+    return [];
+  }
+  return [
+    {
+      level: "warn",
+      code: "history-uncached-on-claude",
+      message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
+      suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
+      docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
+    }
+  ];
+}
 // src/compile.ts
 var counter = 0;
 function makeHandle() {
@@ -683,7 +780,8 @@ function compile(ir, opts = {}) {
     threshold: opts.toolRelevanceThreshold
   });
   const compressed = passCompressHistory(toolFiltered.value, {
-    summarizeOlderThan: opts.compressHistoryAfter
+    summarizeOlderThan: opts.compressHistoryAfter,
+    summarizeAboveTokens: opts.compressHistoryAboveTokens
   });
   let workingIR = compressed.value;
   const accumulatedMutations = [
@@ -718,6 +816,28 @@ function compile(ir, opts = {}) {
   const handle = makeHandle();
   const finalShape = computeShape(workingIR, inputTokens);
   const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
+  const diagnostics = {
+    sectionsKept: workingIR.sections.length,
+    sectionsDropped: ir.sections.length - workingIR.sections.length,
+    toolsKept: workingIR.tools?.length ?? 0,
+    toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
+    historyKept: workingIR.history?.length ?? 0,
+    historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
+    cacheableTokens: lowered.diagnostics.cacheableTokens,
+    estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
+    historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
+    historyTokensTotal: compressed.historyTokensTotal
+  };
+  const advisories = runAdvisor(
+    ir,
+    {
+      target: profile.id,
+      provider: profile.provider,
+      tokensIn: inputTokens,
+      diagnostics
+    },
+    profile
+  );
   return {
     handle,
     target: profile.id,
@@ -727,17 +847,8 @@ function compile(ir, opts = {}) {
     estimatedCostUsd: target.estimatedCostUsd,
     mutationsApplied: accumulatedMutations,
     fallbackChain,
-    diagnostics: {
-      sectionsKept: workingIR.sections.length,
-      sectionsDropped: ir.sections.length - workingIR.sections.length,
-      toolsKept: workingIR.tools?.length ?? 0,
-      toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
-      historyKept: workingIR.history?.length ?? 0,
-      historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
-      cacheableTokens: lowered.diagnostics.cacheableTokens,
-      estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
-      historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
-    }
+    advisories,
+    diagnostics
   };
 }
 function validateIR(ir) {
@@ -822,7 +933,8 @@ function registerCompile(appId, archetype, ir, result) {
     estimatedTokensIn: tokens,
     mutationsApplied: result.mutationsApplied.map((m) => m.id),
     startedAt: Date.now(),
-    historyCacheableTokens: result.diagnostics.historyCacheableTokens
+    historyCacheableTokens: result.diagnostics.historyCacheableTokens,
+    historyTokensTotal: result.diagnostics.historyTokensTotal
   });
 }
 async function record(input) {
@@ -896,7 +1008,8 @@ function buildPayload(input, reg) {
     cache_creation_input_tokens: input.cacheCreationInputTokens,
     cost_usd_actual: costUsdActual,
     ttft_ms: input.ttftMs,
-    history_cacheable_tokens: reg?.historyCacheableTokens
+    history_cacheable_tokens: reg?.historyCacheableTokens,
+    history_tokens_at_compile: reg?.historyTokensTotal
   };
 }
 function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -1382,6 +1495,7 @@ export {
   profilesByProvider,
   record,
   resetTokenizer,
+  runAdvisor,
   setTokenizer,
   tryGetProfile
 };

package/dist/{profiles-MGq5Tnjv.d.ts → profiles-B3eNQ2py.d.ts} RENAMED Viewed

@@ -253,6 +253,41 @@ type CompiledRequest = {
     }>;
     tools?: unknown[];
 };
+/**
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
+ * and emits one entry per detected gap.
+ *
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
+ * interfaces/kgauto.md anchor for context.
+ *
+ * alpha.6 Phase 1 starter rules:
+ *   - `caching-off-on-claude` (warn)       system >2000 chars on Anthropic, no cacheable=true
+ *   - `single-chunk-system` (info)         Anthropic, only one PromptSection >1000 chars
+ *   - `tool-bloat` (warn)                  >10 tools on a short-output archetype
+ *   - `history-uncached-on-claude` (warn)  Anthropic, ≥2 history messages, no historyCachePolicy
+ *
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
+ */
+interface BestPracticeAdvisory {
+    /**
+     * Severity. `info` = informational; `warn` = behavioral pattern that's
+     * usually expensive or wrong; `critical` = likely bug or production-grade
+     * misuse. Phase 1 ships info + warn only.
+     */
+    level: 'info' | 'warn' | 'critical';
+    /** Stable kebab-case code. Consumers filter / gate by this. */
+    code: string;
+    /** Human-readable explanation of what was detected. */
+    message: string;
+    /** Optional: how to fix — actionable diff or pattern. */
+    suggestion?: string;
+    /** Optional: link to docs anchor for more context. */
+    docsUrl?: string;
+}
 interface CompileResult {
     /** Unique handle for this call — pass to record() to correlate the outcome. */
     handle: string;
@@ -270,6 +305,11 @@ interface CompileResult {
     mutationsApplied: MutationApplied[];
     /** Fallback chain — try these in order if target fails. */
     fallbackChain: string[];
+    /**
+     * Best-practice advisories emitted by the compiler. Non-fatal. Empty
+     * array when no rules fired. alpha.6 Phase 1.
+     */
+    advisories: BestPracticeAdvisory[];
     /** Diagnostics for caller-side logging. */
     diagnostics: {
         sectionsKept: number;
@@ -290,6 +330,14 @@ interface CompileResult {
          * from history caching. alpha.5.
          */
         historyCacheableTokens: number;
+        /**
+         * Total tokens in input `history` (pre-compression). Computed regardless
+         * of whether `passCompressHistory` fired — surfaces how close a tuple is
+         * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
+         * watchers can see the bloat axis the count-based threshold misses.
+         * 0 when history is empty. alpha.7.
+         */
+        historyTokensTotal: number;
     };
 }
 /**
@@ -568,4 +616,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
 declare function allProfiles(): readonly ModelProfile[];
 declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
-export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
+export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };

package/dist/{profiles-DHdCRBVH.d.mts → profiles-Py8c7zjJ.d.mts} RENAMED Viewed

@@ -253,6 +253,41 @@ type CompiledRequest = {
     }>;
     tools?: unknown[];
 };
+/**
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
+ * and emits one entry per detected gap.
+ *
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
+ * interfaces/kgauto.md anchor for context.
+ *
+ * alpha.6 Phase 1 starter rules:
+ *   - `caching-off-on-claude` (warn)       system >2000 chars on Anthropic, no cacheable=true
+ *   - `single-chunk-system` (info)         Anthropic, only one PromptSection >1000 chars
+ *   - `tool-bloat` (warn)                  >10 tools on a short-output archetype
+ *   - `history-uncached-on-claude` (warn)  Anthropic, ≥2 history messages, no historyCachePolicy
+ *
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
+ */
+interface BestPracticeAdvisory {
+    /**
+     * Severity. `info` = informational; `warn` = behavioral pattern that's
+     * usually expensive or wrong; `critical` = likely bug or production-grade
+     * misuse. Phase 1 ships info + warn only.
+     */
+    level: 'info' | 'warn' | 'critical';
+    /** Stable kebab-case code. Consumers filter / gate by this. */
+    code: string;
+    /** Human-readable explanation of what was detected. */
+    message: string;
+    /** Optional: how to fix — actionable diff or pattern. */
+    suggestion?: string;
+    /** Optional: link to docs anchor for more context. */
+    docsUrl?: string;
+}
 interface CompileResult {
     /** Unique handle for this call — pass to record() to correlate the outcome. */
     handle: string;
@@ -270,6 +305,11 @@ interface CompileResult {
     mutationsApplied: MutationApplied[];
     /** Fallback chain — try these in order if target fails. */
     fallbackChain: string[];
+    /**
+     * Best-practice advisories emitted by the compiler. Non-fatal. Empty
+     * array when no rules fired. alpha.6 Phase 1.
+     */
+    advisories: BestPracticeAdvisory[];
     /** Diagnostics for caller-side logging. */
     diagnostics: {
         sectionsKept: number;
@@ -290,6 +330,14 @@ interface CompileResult {
          * from history caching. alpha.5.
          */
         historyCacheableTokens: number;
+        /**
+         * Total tokens in input `history` (pre-compression). Computed regardless
+         * of whether `passCompressHistory` fired — surfaces how close a tuple is
+         * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
+         * watchers can see the bloat axis the count-based threshold misses.
+         * 0 when history is empty. alpha.7.
+         */
+        historyTokensTotal: number;
     };
 }
 /**
@@ -568,4 +616,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
 declare function allProfiles(): readonly ModelProfile[];
 declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
-export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
+export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };

package/dist/profiles.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DHdCRBVH.mjs';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
 import './dialect.mjs';

package/dist/profiles.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-MGq5Tnjv.js';
+export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
 import './dialect.js';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@warmdrift/kgauto-compiler",
-  "version": "2.0.0-alpha.5",
+  "version": "2.0.0-alpha.7",
   "description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",