@warmdrift/kgauto-compiler 2.0.0-alpha.4 → 2.0.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -3
- package/dist/index.d.mts +38 -3
- package/dist/index.d.ts +38 -3
- package/dist/index.js +186 -19
- package/dist/index.mjs +185 -19
- package/dist/{profiles-CDttLtaD.d.ts → profiles-CQnLkQ7b.d.ts} +91 -1
- package/dist/{profiles-CH_nKPjp.d.mts → profiles-zm6diETo.d.mts} +91 -1
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# @warmdrift/kgauto-compiler — v2.0.0-alpha.
|
|
1
|
+
# @warmdrift/kgauto-compiler — v2.0.0-alpha.6
|
|
2
2
|
|
|
3
3
|
> Prompt compiler + central learning brain for multi-model AI apps.
|
|
4
4
|
> **Swap models without rewriting prompts.**
|
|
@@ -18,8 +18,8 @@ mutations.
|
|
|
18
18
|
- **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
|
|
19
19
|
the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
|
|
20
20
|
v1 is fully retired from production.
|
|
21
|
-
- **Tests:**
|
|
22
|
-
- **Build:** clean (
|
|
21
|
+
- **Tests:** 201/201 passing
|
|
22
|
+
- **Build:** clean (47KB ESM, 68KB CJS)
|
|
23
23
|
- **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
|
|
24
24
|
awaiting dedicated Supabase provisioning.
|
|
25
25
|
- **Mutation engine:** v2.1 (after enough outcome data accumulates).
|
|
@@ -154,6 +154,48 @@ The 5 prod empty-responses in tt-intelligence's `gemini-2.5-flash` dashboard
|
|
|
154
154
|
calls? v2 catches those automatically — `expectedShortOutput` constraint plus
|
|
155
155
|
the `force_thinking_budget_zero` cliff guard.
|
|
156
156
|
|
|
157
|
+
## Tools
|
|
158
|
+
|
|
159
|
+
Tools are first-class IR fields. The compiler's tool-relevance pass drops
|
|
160
|
+
tools that don't apply to the current intent before lowering — saves
|
|
161
|
+
context budget on every call.
|
|
162
|
+
|
|
163
|
+
```ts
|
|
164
|
+
const tools: ToolDefinition[] = [
|
|
165
|
+
{
|
|
166
|
+
name: 'web_search',
|
|
167
|
+
description: 'Search the public web',
|
|
168
|
+
parameters: { type: 'object', properties: { q: { type: 'string' } } },
|
|
169
|
+
relevanceByIntent: {
|
|
170
|
+
ask: 0.9, // primary tool for ask
|
|
171
|
+
hunt: 0.9,
|
|
172
|
+
classify: 0.0, // never useful for classification
|
|
173
|
+
summarize: 0.0,
|
|
174
|
+
extract: 0.1,
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
// ...
|
|
178
|
+
];
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
Each tool declares per-intent relevance scores 0..1. The pass keeps tools
|
|
182
|
+
where `relevanceByIntent[currentIntent] >= toolRelevanceThreshold` (default
|
|
183
|
+
`0.2`). Missing entries default to neutral (`0.5`) — kept by default. Set
|
|
184
|
+
explicit `0.0` to hard-exclude.
|
|
185
|
+
|
|
186
|
+
Tool definitions eat ~350 tokens of context per tool (L-051), so trimming
|
|
187
|
+
matters: 12 declared tools, only 3 relevant → 9 × 350 = 3150 tokens
|
|
188
|
+
recovered per call.
|
|
189
|
+
|
|
190
|
+
The `tool-bloat` advisory (alpha.6) fires when more than 10 tools survive
|
|
191
|
+
the relevance pass on a short-output archetype (`classify`, `extract`,
|
|
192
|
+
`summarize`, `transform`, `critique`) — those archetypes typically use
|
|
193
|
+
≤3 tools, so a kept-count >10 indicates either missing `relevanceByIntent`
|
|
194
|
+
or scores set too generously.
|
|
195
|
+
|
|
196
|
+
DeepSeek profiles cap tools to 1 (sequential-only). Other providers
|
|
197
|
+
inherit the count from the IR after the relevance pass.
|
|
198
|
+
|
|
157
199
|
## Brain provisioning
|
|
158
200
|
|
|
159
201
|
1. Create a NEW Supabase project (suggested name: `kgauto-brain`)
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-zm6diETo.mjs';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -189,6 +189,41 @@ declare function resetTokenizer(): void;
|
|
|
189
189
|
*/
|
|
190
190
|
declare function countTokens(text: string): number;
|
|
191
191
|
|
|
192
|
+
/**
|
|
193
|
+
* Best-practice advisor — alpha.6 Phase 1.
|
|
194
|
+
*
|
|
195
|
+
* Inspects an IR + the selected profile + compile diagnostics and emits a
|
|
196
|
+
* list of `BestPracticeAdvisory` entries describing detected gaps. Runs
|
|
197
|
+
* after `lower()` in the compile pipeline; the result lands on
|
|
198
|
+
* `CompileResult.advisories` for the consumer to log, surface, or filter.
|
|
199
|
+
*
|
|
200
|
+
* Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
|
|
201
|
+
* Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
|
|
202
|
+
* s15 empirical seed of brain anti-patterns:
|
|
203
|
+
*
|
|
204
|
+
* 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
|
|
205
|
+
* 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
|
|
206
|
+
* 3. `tool-bloat` >10 tools on a short-output archetype
|
|
207
|
+
* 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
|
|
208
|
+
*
|
|
209
|
+
* Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
|
|
210
|
+
* No side effects. No randomness. Deterministic for a given IR.
|
|
211
|
+
*
|
|
212
|
+
* The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
|
|
213
|
+
* to balance noise vs. signal — too low fires on innocuous calls, too high
|
|
214
|
+
* misses real waste. They may tune with brain evidence over time; for now
|
|
215
|
+
* they're literals in the rule bodies. Make them configurable when the
|
|
216
|
+
* cost-watcher's R-rules graduate to here.
|
|
217
|
+
*/
|
|
218
|
+
|
|
219
|
+
/** Subset of CompileResult fields the advisor needs. */
|
|
220
|
+
type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
|
|
221
|
+
/**
|
|
222
|
+
* Run all Phase 1 rules and return collected advisories. Order is fixed
|
|
223
|
+
* (same as the rule list above) so output is stable across runs.
|
|
224
|
+
*/
|
|
225
|
+
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
|
|
226
|
+
|
|
192
227
|
/**
|
|
193
228
|
* @warmdrift/kgauto v2 — prompt compiler + central learning brain.
|
|
194
229
|
*
|
|
@@ -235,4 +270,4 @@ declare function countTokens(text: string): number;
|
|
|
235
270
|
*/
|
|
236
271
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
237
272
|
|
|
238
|
-
export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
|
|
273
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-CQnLkQ7b.js';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -189,6 +189,41 @@ declare function resetTokenizer(): void;
|
|
|
189
189
|
*/
|
|
190
190
|
declare function countTokens(text: string): number;
|
|
191
191
|
|
|
192
|
+
/**
|
|
193
|
+
* Best-practice advisor — alpha.6 Phase 1.
|
|
194
|
+
*
|
|
195
|
+
* Inspects an IR + the selected profile + compile diagnostics and emits a
|
|
196
|
+
* list of `BestPracticeAdvisory` entries describing detected gaps. Runs
|
|
197
|
+
* after `lower()` in the compile pipeline; the result lands on
|
|
198
|
+
* `CompileResult.advisories` for the consumer to log, surface, or filter.
|
|
199
|
+
*
|
|
200
|
+
* Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
|
|
201
|
+
* Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
|
|
202
|
+
* s15 empirical seed of brain anti-patterns:
|
|
203
|
+
*
|
|
204
|
+
* 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
|
|
205
|
+
* 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
|
|
206
|
+
* 3. `tool-bloat` >10 tools on a short-output archetype
|
|
207
|
+
* 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
|
|
208
|
+
*
|
|
209
|
+
* Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
|
|
210
|
+
* No side effects. No randomness. Deterministic for a given IR.
|
|
211
|
+
*
|
|
212
|
+
* The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
|
|
213
|
+
* to balance noise vs. signal — too low fires on innocuous calls, too high
|
|
214
|
+
* misses real waste. They may tune with brain evidence over time; for now
|
|
215
|
+
* they're literals in the rule bodies. Make them configurable when the
|
|
216
|
+
* cost-watcher's R-rules graduate to here.
|
|
217
|
+
*/
|
|
218
|
+
|
|
219
|
+
/** Subset of CompileResult fields the advisor needs. */
|
|
220
|
+
type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
|
|
221
|
+
/**
|
|
222
|
+
* Run all Phase 1 rules and return collected advisories. Order is fixed
|
|
223
|
+
* (same as the rule list above) so output is stable across runs.
|
|
224
|
+
*/
|
|
225
|
+
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
|
|
226
|
+
|
|
192
227
|
/**
|
|
193
228
|
* @warmdrift/kgauto v2 — prompt compiler + central learning brain.
|
|
194
229
|
*
|
|
@@ -235,4 +270,4 @@ declare function countTokens(text: string): number;
|
|
|
235
270
|
*/
|
|
236
271
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
237
272
|
|
|
238
|
-
export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
|
|
273
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.js
CHANGED
|
@@ -43,6 +43,7 @@ __export(index_exports, {
|
|
|
43
43
|
profilesByProvider: () => profilesByProvider,
|
|
44
44
|
record: () => record,
|
|
45
45
|
resetTokenizer: () => resetTokenizer,
|
|
46
|
+
runAdvisor: () => runAdvisor,
|
|
46
47
|
setTokenizer: () => setTokenizer,
|
|
47
48
|
tryGetProfile: () => tryGetProfile
|
|
48
49
|
});
|
|
@@ -489,10 +490,15 @@ function lower(ir, profile, hints = {}) {
|
|
|
489
490
|
}
|
|
490
491
|
function lowerAnthropic(ir, profile, hints) {
|
|
491
492
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
492
|
-
const
|
|
493
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
494
|
+
const policy = ir.historyCachePolicy;
|
|
495
|
+
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
496
|
+
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
493
497
|
const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
|
|
494
498
|
const cacheableTokens = computeCacheableTokens(systemBlocks);
|
|
495
|
-
const
|
|
499
|
+
const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
|
|
500
|
+
const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
|
|
501
|
+
const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
|
|
496
502
|
return {
|
|
497
503
|
request: {
|
|
498
504
|
provider: "anthropic",
|
|
@@ -504,6 +510,7 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
504
510
|
},
|
|
505
511
|
diagnostics: {
|
|
506
512
|
cacheableTokens,
|
|
513
|
+
historyCacheableTokens,
|
|
507
514
|
estimatedCacheSavingsUsd: cacheSavings
|
|
508
515
|
}
|
|
509
516
|
};
|
|
@@ -536,17 +543,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
|
|
|
536
543
|
}
|
|
537
544
|
return blocks;
|
|
538
545
|
}
|
|
539
|
-
function buildAnthropicMessages(history, currentTurn) {
|
|
546
|
+
function buildAnthropicMessages(history, currentTurn, markIndex) {
|
|
540
547
|
const out = [];
|
|
541
|
-
for (
|
|
548
|
+
for (let i = 0; i < history.length; i++) {
|
|
549
|
+
const m = history[i];
|
|
542
550
|
if (m.role === "system") continue;
|
|
543
|
-
|
|
551
|
+
const shouldMark = i === markIndex;
|
|
552
|
+
out.push({
|
|
553
|
+
role: m.role,
|
|
554
|
+
content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
|
|
555
|
+
});
|
|
544
556
|
}
|
|
545
557
|
if (currentTurn && currentTurn.role !== "system") {
|
|
546
558
|
out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
|
|
547
559
|
}
|
|
548
560
|
return out;
|
|
549
561
|
}
|
|
562
|
+
function attachAnthropicCacheControl(m) {
|
|
563
|
+
if (Array.isArray(m.parts) && m.parts.length > 0) {
|
|
564
|
+
const blocks = m.parts;
|
|
565
|
+
const last = blocks[blocks.length - 1];
|
|
566
|
+
const withMarker = {
|
|
567
|
+
...last,
|
|
568
|
+
cache_control: { type: "ephemeral" }
|
|
569
|
+
};
|
|
570
|
+
return [...blocks.slice(0, -1), withMarker];
|
|
571
|
+
}
|
|
572
|
+
return [
|
|
573
|
+
{
|
|
574
|
+
type: "text",
|
|
575
|
+
text: m.content,
|
|
576
|
+
cache_control: { type: "ephemeral" }
|
|
577
|
+
}
|
|
578
|
+
];
|
|
579
|
+
}
|
|
580
|
+
function resolveHistoryMarkIndex(historyLen, policy) {
|
|
581
|
+
if (!policy || policy.strategy === "none") return -1;
|
|
582
|
+
if (historyLen === 0) return -1;
|
|
583
|
+
if (policy.strategy === "all-but-latest") {
|
|
584
|
+
return historyLen - 1;
|
|
585
|
+
}
|
|
586
|
+
const idx = historyLen - 1 - policy.suffix;
|
|
587
|
+
return idx >= 0 ? idx : -1;
|
|
588
|
+
}
|
|
589
|
+
function sumHistoryTokens(history, throughIndex) {
|
|
590
|
+
let total = 0;
|
|
591
|
+
for (let i = 0; i <= throughIndex && i < history.length; i++) {
|
|
592
|
+
const m = history[i];
|
|
593
|
+
if (m.role === "system") continue;
|
|
594
|
+
if (Array.isArray(m.parts)) {
|
|
595
|
+
for (const p of m.parts) {
|
|
596
|
+
if (typeof p.text === "string") total += countTokens(p.text);
|
|
597
|
+
}
|
|
598
|
+
} else if (typeof m.content === "string") {
|
|
599
|
+
total += countTokens(m.content);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
return total;
|
|
603
|
+
}
|
|
550
604
|
function toAnthropicTools(tools) {
|
|
551
605
|
return tools.map((t) => ({
|
|
552
606
|
name: t.name,
|
|
@@ -581,6 +635,9 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
581
635
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
582
636
|
const meetsMin = cacheableTokens >= minTokens;
|
|
583
637
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
638
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
639
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
640
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
584
641
|
return {
|
|
585
642
|
request: {
|
|
586
643
|
provider: "google",
|
|
@@ -592,6 +649,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
592
649
|
},
|
|
593
650
|
diagnostics: {
|
|
594
651
|
cacheableTokens: meetsMin ? cacheableTokens : 0,
|
|
652
|
+
historyCacheableTokens,
|
|
595
653
|
estimatedCacheSavingsUsd: cacheSavings
|
|
596
654
|
}
|
|
597
655
|
};
|
|
@@ -639,6 +697,9 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
639
697
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
640
698
|
});
|
|
641
699
|
}
|
|
700
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
701
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
702
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
642
703
|
return {
|
|
643
704
|
request: {
|
|
644
705
|
provider: "openai",
|
|
@@ -648,7 +709,11 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
648
709
|
response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
|
|
649
710
|
reasoning_effort: hints.forceTerseOutput ? "low" : void 0
|
|
650
711
|
},
|
|
651
|
-
diagnostics: {
|
|
712
|
+
diagnostics: {
|
|
713
|
+
cacheableTokens: 0,
|
|
714
|
+
historyCacheableTokens,
|
|
715
|
+
estimatedCacheSavingsUsd: 0
|
|
716
|
+
}
|
|
652
717
|
};
|
|
653
718
|
}
|
|
654
719
|
function toOpenAITools(tools) {
|
|
@@ -675,6 +740,9 @@ function lowerDeepSeek(ir, profile) {
|
|
|
675
740
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
676
741
|
});
|
|
677
742
|
}
|
|
743
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
744
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
745
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
678
746
|
return {
|
|
679
747
|
request: {
|
|
680
748
|
provider: "deepseek",
|
|
@@ -689,7 +757,11 @@ function lowerDeepSeek(ir, profile) {
|
|
|
689
757
|
}
|
|
690
758
|
})) : void 0
|
|
691
759
|
},
|
|
692
|
-
diagnostics: {
|
|
760
|
+
diagnostics: {
|
|
761
|
+
cacheableTokens: 0,
|
|
762
|
+
historyCacheableTokens,
|
|
763
|
+
estimatedCacheSavingsUsd: 0
|
|
764
|
+
}
|
|
693
765
|
};
|
|
694
766
|
}
|
|
695
767
|
function sortSections(sections) {
|
|
@@ -1114,6 +1186,85 @@ function profilesByProvider(provider) {
|
|
|
1114
1186
|
return PROFILES_RAW.filter((p) => p.provider === provider);
|
|
1115
1187
|
}
|
|
1116
1188
|
|
|
1189
|
+
// src/advisor.ts
|
|
1190
|
+
function runAdvisor(ir, result, profile) {
|
|
1191
|
+
const out = [];
|
|
1192
|
+
out.push(...detectCachingOff(ir, profile));
|
|
1193
|
+
out.push(...detectSingleChunkSystem(ir, profile));
|
|
1194
|
+
out.push(...detectToolBloat(ir, result));
|
|
1195
|
+
out.push(...detectHistoryUncached(ir, profile));
|
|
1196
|
+
return out;
|
|
1197
|
+
}
|
|
1198
|
+
function detectCachingOff(ir, profile) {
|
|
1199
|
+
if (profile.provider !== "anthropic") return [];
|
|
1200
|
+
const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
|
|
1201
|
+
if (totalChars < 2e3) return [];
|
|
1202
|
+
const anyCacheable = ir.sections.some((s) => s.cacheable === true);
|
|
1203
|
+
if (anyCacheable) return [];
|
|
1204
|
+
return [
|
|
1205
|
+
{
|
|
1206
|
+
level: "warn",
|
|
1207
|
+
code: "caching-off-on-claude",
|
|
1208
|
+
message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
|
|
1209
|
+
suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
|
|
1210
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1211
|
+
}
|
|
1212
|
+
];
|
|
1213
|
+
}
|
|
1214
|
+
function detectSingleChunkSystem(ir, profile) {
|
|
1215
|
+
if (profile.provider !== "anthropic") return [];
|
|
1216
|
+
if (ir.sections.length !== 1) return [];
|
|
1217
|
+
const only = ir.sections[0];
|
|
1218
|
+
if (!only || only.text.length <= 1e3) return [];
|
|
1219
|
+
return [
|
|
1220
|
+
{
|
|
1221
|
+
level: "info",
|
|
1222
|
+
code: "single-chunk-system",
|
|
1223
|
+
message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
|
|
1224
|
+
suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
|
|
1225
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1226
|
+
}
|
|
1227
|
+
];
|
|
1228
|
+
}
|
|
1229
|
+
function detectToolBloat(ir, result) {
|
|
1230
|
+
const SHORT_OUTPUT = /* @__PURE__ */ new Set([
|
|
1231
|
+
"classify",
|
|
1232
|
+
"extract",
|
|
1233
|
+
"summarize",
|
|
1234
|
+
"transform",
|
|
1235
|
+
"critique"
|
|
1236
|
+
]);
|
|
1237
|
+
if (!ir.tools || ir.tools.length === 0) return [];
|
|
1238
|
+
const toolsKept = result.diagnostics.toolsKept;
|
|
1239
|
+
if (toolsKept <= 10) return [];
|
|
1240
|
+
if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
|
|
1241
|
+
return [
|
|
1242
|
+
{
|
|
1243
|
+
level: "warn",
|
|
1244
|
+
code: "tool-bloat",
|
|
1245
|
+
message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
|
|
1246
|
+
suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
|
|
1247
|
+
docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
|
|
1248
|
+
}
|
|
1249
|
+
];
|
|
1250
|
+
}
|
|
1251
|
+
function detectHistoryUncached(ir, profile) {
|
|
1252
|
+
if (profile.provider !== "anthropic") return [];
|
|
1253
|
+
if (!ir.history || ir.history.length < 2) return [];
|
|
1254
|
+
if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
|
|
1255
|
+
return [];
|
|
1256
|
+
}
|
|
1257
|
+
return [
|
|
1258
|
+
{
|
|
1259
|
+
level: "warn",
|
|
1260
|
+
code: "history-uncached-on-claude",
|
|
1261
|
+
message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
|
|
1262
|
+
suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
|
|
1263
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1264
|
+
}
|
|
1265
|
+
];
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1117
1268
|
// src/compile.ts
|
|
1118
1269
|
var counter = 0;
|
|
1119
1270
|
function makeHandle() {
|
|
@@ -1164,6 +1315,27 @@ function compile(ir, opts = {}) {
|
|
|
1164
1315
|
const handle = makeHandle();
|
|
1165
1316
|
const finalShape = computeShape(workingIR, inputTokens);
|
|
1166
1317
|
const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
|
|
1318
|
+
const diagnostics = {
|
|
1319
|
+
sectionsKept: workingIR.sections.length,
|
|
1320
|
+
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
1321
|
+
toolsKept: workingIR.tools?.length ?? 0,
|
|
1322
|
+
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
1323
|
+
historyKept: workingIR.history?.length ?? 0,
|
|
1324
|
+
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1325
|
+
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1326
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
1327
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
1328
|
+
};
|
|
1329
|
+
const advisories = runAdvisor(
|
|
1330
|
+
ir,
|
|
1331
|
+
{
|
|
1332
|
+
target: profile.id,
|
|
1333
|
+
provider: profile.provider,
|
|
1334
|
+
tokensIn: inputTokens,
|
|
1335
|
+
diagnostics
|
|
1336
|
+
},
|
|
1337
|
+
profile
|
|
1338
|
+
);
|
|
1167
1339
|
return {
|
|
1168
1340
|
handle,
|
|
1169
1341
|
target: profile.id,
|
|
@@ -1173,16 +1345,8 @@ function compile(ir, opts = {}) {
|
|
|
1173
1345
|
estimatedCostUsd: target.estimatedCostUsd,
|
|
1174
1346
|
mutationsApplied: accumulatedMutations,
|
|
1175
1347
|
fallbackChain,
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
1179
|
-
toolsKept: workingIR.tools?.length ?? 0,
|
|
1180
|
-
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
1181
|
-
historyKept: workingIR.history?.length ?? 0,
|
|
1182
|
-
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1183
|
-
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1184
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
|
|
1185
|
-
}
|
|
1348
|
+
advisories,
|
|
1349
|
+
diagnostics
|
|
1186
1350
|
};
|
|
1187
1351
|
}
|
|
1188
1352
|
function validateIR(ir) {
|
|
@@ -1266,7 +1430,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
1266
1430
|
learningKey: learningKey(archetype, result.target, shape),
|
|
1267
1431
|
estimatedTokensIn: tokens,
|
|
1268
1432
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
1269
|
-
startedAt: Date.now()
|
|
1433
|
+
startedAt: Date.now(),
|
|
1434
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
1270
1435
|
});
|
|
1271
1436
|
}
|
|
1272
1437
|
async function record(input) {
|
|
@@ -1339,7 +1504,8 @@ function buildPayload(input, reg) {
|
|
|
1339
1504
|
cache_read_input_tokens: input.cacheReadInputTokens,
|
|
1340
1505
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
1341
1506
|
cost_usd_actual: costUsdActual,
|
|
1342
|
-
ttft_ms: input.ttftMs
|
|
1507
|
+
ttft_ms: input.ttftMs,
|
|
1508
|
+
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1343
1509
|
};
|
|
1344
1510
|
}
|
|
1345
1511
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -1826,6 +1992,7 @@ function compile2(ir, opts) {
|
|
|
1826
1992
|
profilesByProvider,
|
|
1827
1993
|
record,
|
|
1828
1994
|
resetTokenizer,
|
|
1995
|
+
runAdvisor,
|
|
1829
1996
|
setTokenizer,
|
|
1830
1997
|
tryGetProfile
|
|
1831
1998
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -374,10 +374,15 @@ function lower(ir, profile, hints = {}) {
|
|
|
374
374
|
}
|
|
375
375
|
function lowerAnthropic(ir, profile, hints) {
|
|
376
376
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
377
|
-
const
|
|
377
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
378
|
+
const policy = ir.historyCachePolicy;
|
|
379
|
+
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
380
|
+
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
378
381
|
const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
|
|
379
382
|
const cacheableTokens = computeCacheableTokens(systemBlocks);
|
|
380
|
-
const
|
|
383
|
+
const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
|
|
384
|
+
const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
|
|
385
|
+
const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
|
|
381
386
|
return {
|
|
382
387
|
request: {
|
|
383
388
|
provider: "anthropic",
|
|
@@ -389,6 +394,7 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
389
394
|
},
|
|
390
395
|
diagnostics: {
|
|
391
396
|
cacheableTokens,
|
|
397
|
+
historyCacheableTokens,
|
|
392
398
|
estimatedCacheSavingsUsd: cacheSavings
|
|
393
399
|
}
|
|
394
400
|
};
|
|
@@ -421,17 +427,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
|
|
|
421
427
|
}
|
|
422
428
|
return blocks;
|
|
423
429
|
}
|
|
424
|
-
function buildAnthropicMessages(history, currentTurn) {
|
|
430
|
+
function buildAnthropicMessages(history, currentTurn, markIndex) {
|
|
425
431
|
const out = [];
|
|
426
|
-
for (
|
|
432
|
+
for (let i = 0; i < history.length; i++) {
|
|
433
|
+
const m = history[i];
|
|
427
434
|
if (m.role === "system") continue;
|
|
428
|
-
|
|
435
|
+
const shouldMark = i === markIndex;
|
|
436
|
+
out.push({
|
|
437
|
+
role: m.role,
|
|
438
|
+
content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
|
|
439
|
+
});
|
|
429
440
|
}
|
|
430
441
|
if (currentTurn && currentTurn.role !== "system") {
|
|
431
442
|
out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
|
|
432
443
|
}
|
|
433
444
|
return out;
|
|
434
445
|
}
|
|
446
|
+
function attachAnthropicCacheControl(m) {
|
|
447
|
+
if (Array.isArray(m.parts) && m.parts.length > 0) {
|
|
448
|
+
const blocks = m.parts;
|
|
449
|
+
const last = blocks[blocks.length - 1];
|
|
450
|
+
const withMarker = {
|
|
451
|
+
...last,
|
|
452
|
+
cache_control: { type: "ephemeral" }
|
|
453
|
+
};
|
|
454
|
+
return [...blocks.slice(0, -1), withMarker];
|
|
455
|
+
}
|
|
456
|
+
return [
|
|
457
|
+
{
|
|
458
|
+
type: "text",
|
|
459
|
+
text: m.content,
|
|
460
|
+
cache_control: { type: "ephemeral" }
|
|
461
|
+
}
|
|
462
|
+
];
|
|
463
|
+
}
|
|
464
|
+
function resolveHistoryMarkIndex(historyLen, policy) {
|
|
465
|
+
if (!policy || policy.strategy === "none") return -1;
|
|
466
|
+
if (historyLen === 0) return -1;
|
|
467
|
+
if (policy.strategy === "all-but-latest") {
|
|
468
|
+
return historyLen - 1;
|
|
469
|
+
}
|
|
470
|
+
const idx = historyLen - 1 - policy.suffix;
|
|
471
|
+
return idx >= 0 ? idx : -1;
|
|
472
|
+
}
|
|
473
|
+
function sumHistoryTokens(history, throughIndex) {
|
|
474
|
+
let total = 0;
|
|
475
|
+
for (let i = 0; i <= throughIndex && i < history.length; i++) {
|
|
476
|
+
const m = history[i];
|
|
477
|
+
if (m.role === "system") continue;
|
|
478
|
+
if (Array.isArray(m.parts)) {
|
|
479
|
+
for (const p of m.parts) {
|
|
480
|
+
if (typeof p.text === "string") total += countTokens(p.text);
|
|
481
|
+
}
|
|
482
|
+
} else if (typeof m.content === "string") {
|
|
483
|
+
total += countTokens(m.content);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
return total;
|
|
487
|
+
}
|
|
435
488
|
function toAnthropicTools(tools) {
|
|
436
489
|
return tools.map((t) => ({
|
|
437
490
|
name: t.name,
|
|
@@ -466,6 +519,9 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
466
519
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
467
520
|
const meetsMin = cacheableTokens >= minTokens;
|
|
468
521
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
522
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
523
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
524
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
469
525
|
return {
|
|
470
526
|
request: {
|
|
471
527
|
provider: "google",
|
|
@@ -477,6 +533,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
477
533
|
},
|
|
478
534
|
diagnostics: {
|
|
479
535
|
cacheableTokens: meetsMin ? cacheableTokens : 0,
|
|
536
|
+
historyCacheableTokens,
|
|
480
537
|
estimatedCacheSavingsUsd: cacheSavings
|
|
481
538
|
}
|
|
482
539
|
};
|
|
@@ -524,6 +581,9 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
524
581
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
525
582
|
});
|
|
526
583
|
}
|
|
584
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
585
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
586
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
527
587
|
return {
|
|
528
588
|
request: {
|
|
529
589
|
provider: "openai",
|
|
@@ -533,7 +593,11 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
533
593
|
response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
|
|
534
594
|
reasoning_effort: hints.forceTerseOutput ? "low" : void 0
|
|
535
595
|
},
|
|
536
|
-
diagnostics: {
|
|
596
|
+
diagnostics: {
|
|
597
|
+
cacheableTokens: 0,
|
|
598
|
+
historyCacheableTokens,
|
|
599
|
+
estimatedCacheSavingsUsd: 0
|
|
600
|
+
}
|
|
537
601
|
};
|
|
538
602
|
}
|
|
539
603
|
function toOpenAITools(tools) {
|
|
@@ -560,6 +624,9 @@ function lowerDeepSeek(ir, profile) {
|
|
|
560
624
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
561
625
|
});
|
|
562
626
|
}
|
|
627
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
628
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
629
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
563
630
|
return {
|
|
564
631
|
request: {
|
|
565
632
|
provider: "deepseek",
|
|
@@ -574,7 +641,11 @@ function lowerDeepSeek(ir, profile) {
|
|
|
574
641
|
}
|
|
575
642
|
})) : void 0
|
|
576
643
|
},
|
|
577
|
-
diagnostics: {
|
|
644
|
+
diagnostics: {
|
|
645
|
+
cacheableTokens: 0,
|
|
646
|
+
historyCacheableTokens,
|
|
647
|
+
estimatedCacheSavingsUsd: 0
|
|
648
|
+
}
|
|
578
649
|
};
|
|
579
650
|
}
|
|
580
651
|
function sortSections(sections) {
|
|
@@ -597,6 +668,85 @@ function setNestedField(obj, path, value) {
|
|
|
597
668
|
cursor[parts[parts.length - 1]] = value;
|
|
598
669
|
}
|
|
599
670
|
|
|
671
|
+
// src/advisor.ts
|
|
672
|
+
function runAdvisor(ir, result, profile) {
|
|
673
|
+
const out = [];
|
|
674
|
+
out.push(...detectCachingOff(ir, profile));
|
|
675
|
+
out.push(...detectSingleChunkSystem(ir, profile));
|
|
676
|
+
out.push(...detectToolBloat(ir, result));
|
|
677
|
+
out.push(...detectHistoryUncached(ir, profile));
|
|
678
|
+
return out;
|
|
679
|
+
}
|
|
680
|
+
function detectCachingOff(ir, profile) {
|
|
681
|
+
if (profile.provider !== "anthropic") return [];
|
|
682
|
+
const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
|
|
683
|
+
if (totalChars < 2e3) return [];
|
|
684
|
+
const anyCacheable = ir.sections.some((s) => s.cacheable === true);
|
|
685
|
+
if (anyCacheable) return [];
|
|
686
|
+
return [
|
|
687
|
+
{
|
|
688
|
+
level: "warn",
|
|
689
|
+
code: "caching-off-on-claude",
|
|
690
|
+
message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
|
|
691
|
+
suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
|
|
692
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
693
|
+
}
|
|
694
|
+
];
|
|
695
|
+
}
|
|
696
|
+
function detectSingleChunkSystem(ir, profile) {
|
|
697
|
+
if (profile.provider !== "anthropic") return [];
|
|
698
|
+
if (ir.sections.length !== 1) return [];
|
|
699
|
+
const only = ir.sections[0];
|
|
700
|
+
if (!only || only.text.length <= 1e3) return [];
|
|
701
|
+
return [
|
|
702
|
+
{
|
|
703
|
+
level: "info",
|
|
704
|
+
code: "single-chunk-system",
|
|
705
|
+
message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
|
|
706
|
+
suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
|
|
707
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
708
|
+
}
|
|
709
|
+
];
|
|
710
|
+
}
|
|
711
|
+
function detectToolBloat(ir, result) {
|
|
712
|
+
const SHORT_OUTPUT = /* @__PURE__ */ new Set([
|
|
713
|
+
"classify",
|
|
714
|
+
"extract",
|
|
715
|
+
"summarize",
|
|
716
|
+
"transform",
|
|
717
|
+
"critique"
|
|
718
|
+
]);
|
|
719
|
+
if (!ir.tools || ir.tools.length === 0) return [];
|
|
720
|
+
const toolsKept = result.diagnostics.toolsKept;
|
|
721
|
+
if (toolsKept <= 10) return [];
|
|
722
|
+
if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
|
|
723
|
+
return [
|
|
724
|
+
{
|
|
725
|
+
level: "warn",
|
|
726
|
+
code: "tool-bloat",
|
|
727
|
+
message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
|
|
728
|
+
suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
|
|
729
|
+
docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
|
|
730
|
+
}
|
|
731
|
+
];
|
|
732
|
+
}
|
|
733
|
+
function detectHistoryUncached(ir, profile) {
|
|
734
|
+
if (profile.provider !== "anthropic") return [];
|
|
735
|
+
if (!ir.history || ir.history.length < 2) return [];
|
|
736
|
+
if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
|
|
737
|
+
return [];
|
|
738
|
+
}
|
|
739
|
+
return [
|
|
740
|
+
{
|
|
741
|
+
level: "warn",
|
|
742
|
+
code: "history-uncached-on-claude",
|
|
743
|
+
message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
|
|
744
|
+
suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
|
|
745
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
746
|
+
}
|
|
747
|
+
];
|
|
748
|
+
}
|
|
749
|
+
|
|
600
750
|
// src/compile.ts
|
|
601
751
|
var counter = 0;
|
|
602
752
|
function makeHandle() {
|
|
@@ -647,6 +797,27 @@ function compile(ir, opts = {}) {
|
|
|
647
797
|
const handle = makeHandle();
|
|
648
798
|
const finalShape = computeShape(workingIR, inputTokens);
|
|
649
799
|
const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
|
|
800
|
+
const diagnostics = {
|
|
801
|
+
sectionsKept: workingIR.sections.length,
|
|
802
|
+
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
803
|
+
toolsKept: workingIR.tools?.length ?? 0,
|
|
804
|
+
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
805
|
+
historyKept: workingIR.history?.length ?? 0,
|
|
806
|
+
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
807
|
+
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
808
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
809
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
810
|
+
};
|
|
811
|
+
const advisories = runAdvisor(
|
|
812
|
+
ir,
|
|
813
|
+
{
|
|
814
|
+
target: profile.id,
|
|
815
|
+
provider: profile.provider,
|
|
816
|
+
tokensIn: inputTokens,
|
|
817
|
+
diagnostics
|
|
818
|
+
},
|
|
819
|
+
profile
|
|
820
|
+
);
|
|
650
821
|
return {
|
|
651
822
|
handle,
|
|
652
823
|
target: profile.id,
|
|
@@ -656,16 +827,8 @@ function compile(ir, opts = {}) {
|
|
|
656
827
|
estimatedCostUsd: target.estimatedCostUsd,
|
|
657
828
|
mutationsApplied: accumulatedMutations,
|
|
658
829
|
fallbackChain,
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
662
|
-
toolsKept: workingIR.tools?.length ?? 0,
|
|
663
|
-
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
664
|
-
historyKept: workingIR.history?.length ?? 0,
|
|
665
|
-
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
666
|
-
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
667
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
|
|
668
|
-
}
|
|
830
|
+
advisories,
|
|
831
|
+
diagnostics
|
|
669
832
|
};
|
|
670
833
|
}
|
|
671
834
|
function validateIR(ir) {
|
|
@@ -749,7 +912,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
749
912
|
learningKey: learningKey(archetype, result.target, shape),
|
|
750
913
|
estimatedTokensIn: tokens,
|
|
751
914
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
752
|
-
startedAt: Date.now()
|
|
915
|
+
startedAt: Date.now(),
|
|
916
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
753
917
|
});
|
|
754
918
|
}
|
|
755
919
|
async function record(input) {
|
|
@@ -822,7 +986,8 @@ function buildPayload(input, reg) {
|
|
|
822
986
|
cache_read_input_tokens: input.cacheReadInputTokens,
|
|
823
987
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
824
988
|
cost_usd_actual: costUsdActual,
|
|
825
|
-
ttft_ms: input.ttftMs
|
|
989
|
+
ttft_ms: input.ttftMs,
|
|
990
|
+
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
826
991
|
};
|
|
827
992
|
}
|
|
828
993
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -1308,6 +1473,7 @@ export {
|
|
|
1308
1473
|
profilesByProvider,
|
|
1309
1474
|
record,
|
|
1310
1475
|
resetTokenizer,
|
|
1476
|
+
runAdvisor,
|
|
1311
1477
|
setTokenizer,
|
|
1312
1478
|
tryGetProfile
|
|
1313
1479
|
};
|
|
@@ -91,6 +91,40 @@ interface Constraints {
|
|
|
91
91
|
/** Override target model selection — if set, compiler uses this instead of routing. */
|
|
92
92
|
forceModel?: string;
|
|
93
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* Cache marker policy for the messages array (history + currentTurn).
|
|
96
|
+
*
|
|
97
|
+
* Anthropic positional caching: a `cache_control` marker on a content block
|
|
98
|
+
* tells the API "remember the prefix up through this block." On a subsequent
|
|
99
|
+
* request whose first N tokens match, those N billed at the cached rate
|
|
100
|
+
* (10% of the input price). Without a marker, every call re-pays for the
|
|
101
|
+
* entire history.
|
|
102
|
+
*
|
|
103
|
+
* - `'none'` (default when omitted): no history cache marker. System-level
|
|
104
|
+
* cache markers from `PromptSection.cacheable=true` still apply.
|
|
105
|
+
* - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
|
|
106
|
+
* (the last history entry). On the next call, that entire history prefix
|
|
107
|
+
* is cacheable. Good fit for chat/agent loops where every prior turn is
|
|
108
|
+
* stable.
|
|
109
|
+
* - `'fixed-suffix'`: marks the message `suffix` positions from the end of
|
|
110
|
+
* `history`. Use when the last few turns are volatile (e.g., scratchpad,
|
|
111
|
+
* draft revisions) but the earlier prefix is stable.
|
|
112
|
+
*
|
|
113
|
+
* For non-Anthropic providers, no wire-format marker is emitted (Gemini /
|
|
114
|
+
* OpenAI / DeepSeek implicit caching takes effect automatically when a
|
|
115
|
+
* stable prefix is reused). The compiler still computes
|
|
116
|
+
* `diagnostics.historyCacheableTokens` for telemetry on every provider.
|
|
117
|
+
*
|
|
118
|
+
* alpha.5.
|
|
119
|
+
*/
|
|
120
|
+
type HistoryCachePolicy = {
|
|
121
|
+
strategy: 'none';
|
|
122
|
+
} | {
|
|
123
|
+
strategy: 'all-but-latest';
|
|
124
|
+
} | {
|
|
125
|
+
strategy: 'fixed-suffix';
|
|
126
|
+
suffix: number;
|
|
127
|
+
};
|
|
94
128
|
/**
|
|
95
129
|
* Consumer-declared policy for model selection. Lives outside the IR
|
|
96
130
|
* (passed via CompileOptions) because it's a SESSION/APP-level constraint,
|
|
@@ -146,6 +180,12 @@ interface PromptIR {
|
|
|
146
180
|
models: string[];
|
|
147
181
|
/** Compile constraints. */
|
|
148
182
|
constraints?: Constraints;
|
|
183
|
+
/**
|
|
184
|
+
* Cache marker placement policy for the messages array. Default = no
|
|
185
|
+
* history cache markers. See `HistoryCachePolicy` for semantics.
|
|
186
|
+
* alpha.5.
|
|
187
|
+
*/
|
|
188
|
+
historyCachePolicy?: HistoryCachePolicy;
|
|
149
189
|
}
|
|
150
190
|
type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
|
|
151
191
|
/**
|
|
@@ -213,6 +253,41 @@ type CompiledRequest = {
|
|
|
213
253
|
}>;
|
|
214
254
|
tools?: unknown[];
|
|
215
255
|
};
|
|
256
|
+
/**
|
|
257
|
+
* Best-practice advisory emitted by the compiler at compile time. Non-fatal —
|
|
258
|
+
* consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
|
|
259
|
+
* or ignore. The advisor inspects the IR + selected profile + diagnostics
|
|
260
|
+
* and emits one entry per detected gap.
|
|
261
|
+
*
|
|
262
|
+
* Codes are stable across releases. `suggestion` and `docsUrl` are optional
|
|
263
|
+
* but encouraged: suggestion = the actionable diff; docsUrl = the
|
|
264
|
+
* interfaces/kgauto.md anchor for context.
|
|
265
|
+
*
|
|
266
|
+
* alpha.6 Phase 1 starter rules:
|
|
267
|
+
* - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
|
|
268
|
+
* - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
|
|
269
|
+
* - `tool-bloat` (warn) >10 tools on a short-output archetype
|
|
270
|
+
* - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
|
|
271
|
+
*
|
|
272
|
+
* Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
|
|
273
|
+
* telemetry on `advisories_fired`) are alpha.7+ territory.
|
|
274
|
+
*/
|
|
275
|
+
interface BestPracticeAdvisory {
|
|
276
|
+
/**
|
|
277
|
+
* Severity. `info` = informational; `warn` = behavioral pattern that's
|
|
278
|
+
* usually expensive or wrong; `critical` = likely bug or production-grade
|
|
279
|
+
* misuse. Phase 1 ships info + warn only.
|
|
280
|
+
*/
|
|
281
|
+
level: 'info' | 'warn' | 'critical';
|
|
282
|
+
/** Stable kebab-case code. Consumers filter / gate by this. */
|
|
283
|
+
code: string;
|
|
284
|
+
/** Human-readable explanation of what was detected. */
|
|
285
|
+
message: string;
|
|
286
|
+
/** Optional: how to fix — actionable diff or pattern. */
|
|
287
|
+
suggestion?: string;
|
|
288
|
+
/** Optional: link to docs anchor for more context. */
|
|
289
|
+
docsUrl?: string;
|
|
290
|
+
}
|
|
216
291
|
interface CompileResult {
|
|
217
292
|
/** Unique handle for this call — pass to record() to correlate the outcome. */
|
|
218
293
|
handle: string;
|
|
@@ -230,6 +305,11 @@ interface CompileResult {
|
|
|
230
305
|
mutationsApplied: MutationApplied[];
|
|
231
306
|
/** Fallback chain — try these in order if target fails. */
|
|
232
307
|
fallbackChain: string[];
|
|
308
|
+
/**
|
|
309
|
+
* Best-practice advisories emitted by the compiler. Non-fatal. Empty
|
|
310
|
+
* array when no rules fired. alpha.6 Phase 1.
|
|
311
|
+
*/
|
|
312
|
+
advisories: BestPracticeAdvisory[];
|
|
233
313
|
/** Diagnostics for caller-side logging. */
|
|
234
314
|
diagnostics: {
|
|
235
315
|
sectionsKept: number;
|
|
@@ -240,6 +320,16 @@ interface CompileResult {
|
|
|
240
320
|
historyDropped: number;
|
|
241
321
|
cacheableTokens: number;
|
|
242
322
|
estimatedCacheSavingsUsd: number;
|
|
323
|
+
/**
|
|
324
|
+
* Tokens in `history` (and `currentTurn` when before the marker) that
|
|
325
|
+
* fall within the cacheable prefix per `historyCachePolicy`. Always
|
|
326
|
+
* computed; only Anthropic actually emits a wire-format marker. For
|
|
327
|
+
* Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
|
|
328
|
+
* prefix that implicit caching may pick up — useful telemetry for the
|
|
329
|
+
* brain to learn which (app, model, archetype) tuples benefit most
|
|
330
|
+
* from history caching. alpha.5.
|
|
331
|
+
*/
|
|
332
|
+
historyCacheableTokens: number;
|
|
243
333
|
};
|
|
244
334
|
}
|
|
245
335
|
/**
|
|
@@ -518,4 +608,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
|
518
608
|
declare function allProfiles(): readonly ModelProfile[];
|
|
519
609
|
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
520
610
|
|
|
521
|
-
export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
611
|
+
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
@@ -91,6 +91,40 @@ interface Constraints {
|
|
|
91
91
|
/** Override target model selection — if set, compiler uses this instead of routing. */
|
|
92
92
|
forceModel?: string;
|
|
93
93
|
}
|
|
94
|
+
/**
|
|
95
|
+
* Cache marker policy for the messages array (history + currentTurn).
|
|
96
|
+
*
|
|
97
|
+
* Anthropic positional caching: a `cache_control` marker on a content block
|
|
98
|
+
* tells the API "remember the prefix up through this block." On a subsequent
|
|
99
|
+
* request whose first N tokens match, those N billed at the cached rate
|
|
100
|
+
* (10% of the input price). Without a marker, every call re-pays for the
|
|
101
|
+
* entire history.
|
|
102
|
+
*
|
|
103
|
+
* - `'none'` (default when omitted): no history cache marker. System-level
|
|
104
|
+
* cache markers from `PromptSection.cacheable=true` still apply.
|
|
105
|
+
* - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
|
|
106
|
+
* (the last history entry). On the next call, that entire history prefix
|
|
107
|
+
* is cacheable. Good fit for chat/agent loops where every prior turn is
|
|
108
|
+
* stable.
|
|
109
|
+
* - `'fixed-suffix'`: marks the message `suffix` positions from the end of
|
|
110
|
+
* `history`. Use when the last few turns are volatile (e.g., scratchpad,
|
|
111
|
+
* draft revisions) but the earlier prefix is stable.
|
|
112
|
+
*
|
|
113
|
+
* For non-Anthropic providers, no wire-format marker is emitted (Gemini /
|
|
114
|
+
* OpenAI / DeepSeek implicit caching takes effect automatically when a
|
|
115
|
+
* stable prefix is reused). The compiler still computes
|
|
116
|
+
* `diagnostics.historyCacheableTokens` for telemetry on every provider.
|
|
117
|
+
*
|
|
118
|
+
* alpha.5.
|
|
119
|
+
*/
|
|
120
|
+
type HistoryCachePolicy = {
|
|
121
|
+
strategy: 'none';
|
|
122
|
+
} | {
|
|
123
|
+
strategy: 'all-but-latest';
|
|
124
|
+
} | {
|
|
125
|
+
strategy: 'fixed-suffix';
|
|
126
|
+
suffix: number;
|
|
127
|
+
};
|
|
94
128
|
/**
|
|
95
129
|
* Consumer-declared policy for model selection. Lives outside the IR
|
|
96
130
|
* (passed via CompileOptions) because it's a SESSION/APP-level constraint,
|
|
@@ -146,6 +180,12 @@ interface PromptIR {
|
|
|
146
180
|
models: string[];
|
|
147
181
|
/** Compile constraints. */
|
|
148
182
|
constraints?: Constraints;
|
|
183
|
+
/**
|
|
184
|
+
* Cache marker placement policy for the messages array. Default = no
|
|
185
|
+
* history cache markers. See `HistoryCachePolicy` for semantics.
|
|
186
|
+
* alpha.5.
|
|
187
|
+
*/
|
|
188
|
+
historyCachePolicy?: HistoryCachePolicy;
|
|
149
189
|
}
|
|
150
190
|
type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
|
|
151
191
|
/**
|
|
@@ -213,6 +253,41 @@ type CompiledRequest = {
|
|
|
213
253
|
}>;
|
|
214
254
|
tools?: unknown[];
|
|
215
255
|
};
|
|
256
|
+
/**
|
|
257
|
+
* Best-practice advisory emitted by the compiler at compile time. Non-fatal —
|
|
258
|
+
* consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
|
|
259
|
+
* or ignore. The advisor inspects the IR + selected profile + diagnostics
|
|
260
|
+
* and emits one entry per detected gap.
|
|
261
|
+
*
|
|
262
|
+
* Codes are stable across releases. `suggestion` and `docsUrl` are optional
|
|
263
|
+
* but encouraged: suggestion = the actionable diff; docsUrl = the
|
|
264
|
+
* interfaces/kgauto.md anchor for context.
|
|
265
|
+
*
|
|
266
|
+
* alpha.6 Phase 1 starter rules:
|
|
267
|
+
* - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
|
|
268
|
+
* - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
|
|
269
|
+
* - `tool-bloat` (warn) >10 tools on a short-output archetype
|
|
270
|
+
* - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
|
|
271
|
+
*
|
|
272
|
+
* Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
|
|
273
|
+
* telemetry on `advisories_fired`) are alpha.7+ territory.
|
|
274
|
+
*/
|
|
275
|
+
interface BestPracticeAdvisory {
|
|
276
|
+
/**
|
|
277
|
+
* Severity. `info` = informational; `warn` = behavioral pattern that's
|
|
278
|
+
* usually expensive or wrong; `critical` = likely bug or production-grade
|
|
279
|
+
* misuse. Phase 1 ships info + warn only.
|
|
280
|
+
*/
|
|
281
|
+
level: 'info' | 'warn' | 'critical';
|
|
282
|
+
/** Stable kebab-case code. Consumers filter / gate by this. */
|
|
283
|
+
code: string;
|
|
284
|
+
/** Human-readable explanation of what was detected. */
|
|
285
|
+
message: string;
|
|
286
|
+
/** Optional: how to fix — actionable diff or pattern. */
|
|
287
|
+
suggestion?: string;
|
|
288
|
+
/** Optional: link to docs anchor for more context. */
|
|
289
|
+
docsUrl?: string;
|
|
290
|
+
}
|
|
216
291
|
interface CompileResult {
|
|
217
292
|
/** Unique handle for this call — pass to record() to correlate the outcome. */
|
|
218
293
|
handle: string;
|
|
@@ -230,6 +305,11 @@ interface CompileResult {
|
|
|
230
305
|
mutationsApplied: MutationApplied[];
|
|
231
306
|
/** Fallback chain — try these in order if target fails. */
|
|
232
307
|
fallbackChain: string[];
|
|
308
|
+
/**
|
|
309
|
+
* Best-practice advisories emitted by the compiler. Non-fatal. Empty
|
|
310
|
+
* array when no rules fired. alpha.6 Phase 1.
|
|
311
|
+
*/
|
|
312
|
+
advisories: BestPracticeAdvisory[];
|
|
233
313
|
/** Diagnostics for caller-side logging. */
|
|
234
314
|
diagnostics: {
|
|
235
315
|
sectionsKept: number;
|
|
@@ -240,6 +320,16 @@ interface CompileResult {
|
|
|
240
320
|
historyDropped: number;
|
|
241
321
|
cacheableTokens: number;
|
|
242
322
|
estimatedCacheSavingsUsd: number;
|
|
323
|
+
/**
|
|
324
|
+
* Tokens in `history` (and `currentTurn` when before the marker) that
|
|
325
|
+
* fall within the cacheable prefix per `historyCachePolicy`. Always
|
|
326
|
+
* computed; only Anthropic actually emits a wire-format marker. For
|
|
327
|
+
* Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
|
|
328
|
+
* prefix that implicit caching may pick up — useful telemetry for the
|
|
329
|
+
* brain to learn which (app, model, archetype) tuples benefit most
|
|
330
|
+
* from history caching. alpha.5.
|
|
331
|
+
*/
|
|
332
|
+
historyCacheableTokens: number;
|
|
243
333
|
};
|
|
244
334
|
}
|
|
245
335
|
/**
|
|
@@ -518,4 +608,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
|
518
608
|
declare function allProfiles(): readonly ModelProfile[];
|
|
519
609
|
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
520
610
|
|
|
521
|
-
export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
611
|
+
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-zm6diETo.mjs';
|
|
2
2
|
import './dialect.mjs';
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-CQnLkQ7b.js';
|
|
2
2
|
import './dialect.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.6",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|