@warmdrift/kgauto-compiler 2.0.0-alpha.5 → 2.0.0-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -3
- package/dist/index.d.mts +94 -3
- package/dist/index.d.ts +94 -3
- package/dist/index.js +138 -23
- package/dist/index.mjs +137 -23
- package/dist/{profiles-MGq5Tnjv.d.ts → profiles-B3eNQ2py.d.ts} +49 -1
- package/dist/{profiles-DHdCRBVH.d.mts → profiles-Py8c7zjJ.d.mts} +49 -1
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# @warmdrift/kgauto-compiler — v2.0.0-alpha.
|
|
1
|
+
# @warmdrift/kgauto-compiler — v2.0.0-alpha.6
|
|
2
2
|
|
|
3
3
|
> Prompt compiler + central learning brain for multi-model AI apps.
|
|
4
4
|
> **Swap models without rewriting prompts.**
|
|
@@ -18,8 +18,8 @@ mutations.
|
|
|
18
18
|
- **Package:** alpha — coexists with v1 (`@warmdrift/kgauto@1.2.0`) under
|
|
19
19
|
the temporary name `@warmdrift/kgauto-compiler`. Renames to v2 final once
|
|
20
20
|
v1 is fully retired from production.
|
|
21
|
-
- **Tests:**
|
|
22
|
-
- **Build:** clean (47KB ESM,
|
|
21
|
+
- **Tests:** 201/201 passing
|
|
22
|
+
- **Build:** clean (47KB ESM, 68KB CJS)
|
|
23
23
|
- **Brain:** schema ready (see `brain/migrations/001_initial_schema.sql`);
|
|
24
24
|
awaiting dedicated Supabase provisioning.
|
|
25
25
|
- **Mutation engine:** v2.1 (after enough outcome data accumulates).
|
|
@@ -154,6 +154,48 @@ The 5 prod empty-responses in tt-intelligence's `gemini-2.5-flash` dashboard
|
|
|
154
154
|
calls? v2 catches those automatically — `expectedShortOutput` constraint plus
|
|
155
155
|
the `force_thinking_budget_zero` cliff guard.
|
|
156
156
|
|
|
157
|
+
## Tools
|
|
158
|
+
|
|
159
|
+
Tools are first-class IR fields. The compiler's tool-relevance pass drops
|
|
160
|
+
tools that don't apply to the current intent before lowering — saves
|
|
161
|
+
context budget on every call.
|
|
162
|
+
|
|
163
|
+
```ts
|
|
164
|
+
const tools: ToolDefinition[] = [
|
|
165
|
+
{
|
|
166
|
+
name: 'web_search',
|
|
167
|
+
description: 'Search the public web',
|
|
168
|
+
parameters: { type: 'object', properties: { q: { type: 'string' } } },
|
|
169
|
+
relevanceByIntent: {
|
|
170
|
+
ask: 0.9, // primary tool for ask
|
|
171
|
+
hunt: 0.9,
|
|
172
|
+
classify: 0.0, // never useful for classification
|
|
173
|
+
summarize: 0.0,
|
|
174
|
+
extract: 0.1,
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
// ...
|
|
178
|
+
];
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
Each tool declares per-intent relevance scores 0..1. The pass keeps tools
|
|
182
|
+
where `relevanceByIntent[currentIntent] >= toolRelevanceThreshold` (default
|
|
183
|
+
`0.2`). Missing entries default to neutral (`0.5`) — kept by default. Set
|
|
184
|
+
explicit `0.0` to hard-exclude.
|
|
185
|
+
|
|
186
|
+
Tool definitions eat ~350 tokens of context per tool (L-051), so trimming
|
|
187
|
+
matters: 12 declared tools, only 3 relevant → 9 × 350 = 3150 tokens
|
|
188
|
+
recovered per call.
|
|
189
|
+
|
|
190
|
+
The `tool-bloat` advisory (alpha.6) fires when more than 10 tools survive
|
|
191
|
+
the relevance pass on a short-output archetype (`classify`, `extract`,
|
|
192
|
+
`summarize`, `transform`, `critique`) — those archetypes typically use
|
|
193
|
+
≤3 tools, so a kept-count >10 indicates either missing `relevanceByIntent`
|
|
194
|
+
or scores set too generously.
|
|
195
|
+
|
|
196
|
+
DeepSeek profiles cap tools to 1 (sequential-only). Other providers
|
|
197
|
+
inherit the count from the IR after the relevance pass.
|
|
198
|
+
|
|
157
199
|
## Brain provisioning
|
|
158
200
|
|
|
159
201
|
1. Create a NEW Supabase project (suggested name: `kgauto-brain`)
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-Py8c7zjJ.mjs';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -19,6 +19,15 @@ interface CompileOptions {
|
|
|
19
19
|
toolRelevanceThreshold?: number;
|
|
20
20
|
/** History compression — turns count threshold (default 8). */
|
|
21
21
|
compressHistoryAfter?: number;
|
|
22
|
+
/**
|
|
23
|
+
* History compression — token threshold (alpha.7). When total history
|
|
24
|
+
* tokens exceed this AND there are more recent turns to keep, compress
|
|
25
|
+
* even when count threshold is below `compressHistoryAfter`. Catches
|
|
26
|
+
* fat-message bloat (tool-using agents pack many tool-call/result pairs
|
|
27
|
+
* into single assistant messages — count stays low, tokens explode).
|
|
28
|
+
* Default undefined (disabled — backward-compatible).
|
|
29
|
+
*/
|
|
30
|
+
compressHistoryAboveTokens?: number;
|
|
22
31
|
/**
|
|
23
32
|
* Consumer-declared policy. Filters blocked models, enforces cost
|
|
24
33
|
* ceiling, boosts preferred. See CompilePolicy in ir.ts.
|
|
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
|
|
|
114
123
|
* network error is swallowed/forwarded to onError.
|
|
115
124
|
*/
|
|
116
125
|
declare function record(input: RecordInput): Promise<void>;
|
|
126
|
+
/**
|
|
127
|
+
* Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
|
|
128
|
+
*
|
|
129
|
+
* Exported so consumer proxies can `import { OutcomePayload } from
|
|
130
|
+
* '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
|
|
131
|
+
* TypeScript catches future schema additions (cache fields, advisory
|
|
132
|
+
* telemetry, etc.) at consumer build time, not silently at runtime.
|
|
133
|
+
*
|
|
134
|
+
* **Forward-compat rule:** consumer proxies should pass the body through to
|
|
135
|
+
* Supabase rather than reconstructing field-by-field. The recommended shape
|
|
136
|
+
* is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
|
|
137
|
+
* .insert(body)` directly). Filtering proxies break schema evolution
|
|
138
|
+
* silently — see s17 root-cause investigation 2026-05-10.
|
|
139
|
+
*/
|
|
140
|
+
interface OutcomePayload {
|
|
141
|
+
handle: string;
|
|
142
|
+
app_id?: string;
|
|
143
|
+
intent_archetype?: string;
|
|
144
|
+
/** The model that ACTUALLY RAN (post-fallback). */
|
|
145
|
+
model?: string;
|
|
146
|
+
/** The model v2 compile() originally targeted. NULL when no fallback. */
|
|
147
|
+
requested_model?: string;
|
|
148
|
+
provider?: string;
|
|
149
|
+
shape_key?: string;
|
|
150
|
+
learning_key?: string;
|
|
151
|
+
mutations_applied: string[];
|
|
152
|
+
tokens_in: number;
|
|
153
|
+
tokens_out: number;
|
|
154
|
+
estimated_tokens_in?: number;
|
|
155
|
+
latency_ms: number;
|
|
156
|
+
success: boolean;
|
|
157
|
+
empty_response: boolean;
|
|
158
|
+
error_type?: string;
|
|
159
|
+
tools_called?: string[];
|
|
160
|
+
oracle_score?: number;
|
|
161
|
+
oracle_dimensions?: Record<string, number>;
|
|
162
|
+
oracle_rationale?: string;
|
|
163
|
+
prompt_preview?: string;
|
|
164
|
+
response_preview?: string;
|
|
165
|
+
dialect_version: string;
|
|
166
|
+
cache_read_input_tokens?: number;
|
|
167
|
+
cache_creation_input_tokens?: number;
|
|
168
|
+
cost_usd_actual?: number;
|
|
169
|
+
ttft_ms?: number;
|
|
170
|
+
history_cacheable_tokens?: number;
|
|
171
|
+
history_tokens_at_compile?: number;
|
|
172
|
+
}
|
|
117
173
|
|
|
118
174
|
/**
|
|
119
175
|
* Oracle contract — how an app tells the brain whether a response was good.
|
|
@@ -189,6 +245,41 @@ declare function resetTokenizer(): void;
|
|
|
189
245
|
*/
|
|
190
246
|
declare function countTokens(text: string): number;
|
|
191
247
|
|
|
248
|
+
/**
|
|
249
|
+
* Best-practice advisor — alpha.6 Phase 1.
|
|
250
|
+
*
|
|
251
|
+
* Inspects an IR + the selected profile + compile diagnostics and emits a
|
|
252
|
+
* list of `BestPracticeAdvisory` entries describing detected gaps. Runs
|
|
253
|
+
* after `lower()` in the compile pipeline; the result lands on
|
|
254
|
+
* `CompileResult.advisories` for the consumer to log, surface, or filter.
|
|
255
|
+
*
|
|
256
|
+
* Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
|
|
257
|
+
* Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
|
|
258
|
+
* s15 empirical seed of brain anti-patterns:
|
|
259
|
+
*
|
|
260
|
+
* 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
|
|
261
|
+
* 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
|
|
262
|
+
* 3. `tool-bloat` >10 tools on a short-output archetype
|
|
263
|
+
* 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
|
|
264
|
+
*
|
|
265
|
+
* Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
|
|
266
|
+
* No side effects. No randomness. Deterministic for a given IR.
|
|
267
|
+
*
|
|
268
|
+
* The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
|
|
269
|
+
* to balance noise vs. signal — too low fires on innocuous calls, too high
|
|
270
|
+
* misses real waste. They may tune with brain evidence over time; for now
|
|
271
|
+
* they're literals in the rule bodies. Make them configurable when the
|
|
272
|
+
* cost-watcher's R-rules graduate to here.
|
|
273
|
+
*/
|
|
274
|
+
|
|
275
|
+
/** Subset of CompileResult fields the advisor needs. */
|
|
276
|
+
type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
|
|
277
|
+
/**
|
|
278
|
+
* Run all Phase 1 rules and return collected advisories. Order is fixed
|
|
279
|
+
* (same as the rule list above) so output is stable across runs.
|
|
280
|
+
*/
|
|
281
|
+
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
|
|
282
|
+
|
|
192
283
|
/**
|
|
193
284
|
* @warmdrift/kgauto v2 — prompt compiler + central learning brain.
|
|
194
285
|
*
|
|
@@ -235,4 +326,4 @@ declare function countTokens(text: string): number;
|
|
|
235
326
|
*/
|
|
236
327
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
237
328
|
|
|
238
|
-
export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
|
|
329
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-B3eNQ2py.js';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -19,6 +19,15 @@ interface CompileOptions {
|
|
|
19
19
|
toolRelevanceThreshold?: number;
|
|
20
20
|
/** History compression — turns count threshold (default 8). */
|
|
21
21
|
compressHistoryAfter?: number;
|
|
22
|
+
/**
|
|
23
|
+
* History compression — token threshold (alpha.7). When total history
|
|
24
|
+
* tokens exceed this AND there are more recent turns to keep, compress
|
|
25
|
+
* even when count threshold is below `compressHistoryAfter`. Catches
|
|
26
|
+
* fat-message bloat (tool-using agents pack many tool-call/result pairs
|
|
27
|
+
* into single assistant messages — count stays low, tokens explode).
|
|
28
|
+
* Default undefined (disabled — backward-compatible).
|
|
29
|
+
*/
|
|
30
|
+
compressHistoryAboveTokens?: number;
|
|
22
31
|
/**
|
|
23
32
|
* Consumer-declared policy. Filters blocked models, enforces cost
|
|
24
33
|
* ceiling, boosts preferred. See CompilePolicy in ir.ts.
|
|
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
|
|
|
114
123
|
* network error is swallowed/forwarded to onError.
|
|
115
124
|
*/
|
|
116
125
|
declare function record(input: RecordInput): Promise<void>;
|
|
126
|
+
/**
|
|
127
|
+
* Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
|
|
128
|
+
*
|
|
129
|
+
* Exported so consumer proxies can `import { OutcomePayload } from
|
|
130
|
+
* '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
|
|
131
|
+
* TypeScript catches future schema additions (cache fields, advisory
|
|
132
|
+
* telemetry, etc.) at consumer build time, not silently at runtime.
|
|
133
|
+
*
|
|
134
|
+
* **Forward-compat rule:** consumer proxies should pass the body through to
|
|
135
|
+
* Supabase rather than reconstructing field-by-field. The recommended shape
|
|
136
|
+
* is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
|
|
137
|
+
* .insert(body)` directly). Filtering proxies break schema evolution
|
|
138
|
+
* silently — see s17 root-cause investigation 2026-05-10.
|
|
139
|
+
*/
|
|
140
|
+
interface OutcomePayload {
|
|
141
|
+
handle: string;
|
|
142
|
+
app_id?: string;
|
|
143
|
+
intent_archetype?: string;
|
|
144
|
+
/** The model that ACTUALLY RAN (post-fallback). */
|
|
145
|
+
model?: string;
|
|
146
|
+
/** The model v2 compile() originally targeted. NULL when no fallback. */
|
|
147
|
+
requested_model?: string;
|
|
148
|
+
provider?: string;
|
|
149
|
+
shape_key?: string;
|
|
150
|
+
learning_key?: string;
|
|
151
|
+
mutations_applied: string[];
|
|
152
|
+
tokens_in: number;
|
|
153
|
+
tokens_out: number;
|
|
154
|
+
estimated_tokens_in?: number;
|
|
155
|
+
latency_ms: number;
|
|
156
|
+
success: boolean;
|
|
157
|
+
empty_response: boolean;
|
|
158
|
+
error_type?: string;
|
|
159
|
+
tools_called?: string[];
|
|
160
|
+
oracle_score?: number;
|
|
161
|
+
oracle_dimensions?: Record<string, number>;
|
|
162
|
+
oracle_rationale?: string;
|
|
163
|
+
prompt_preview?: string;
|
|
164
|
+
response_preview?: string;
|
|
165
|
+
dialect_version: string;
|
|
166
|
+
cache_read_input_tokens?: number;
|
|
167
|
+
cache_creation_input_tokens?: number;
|
|
168
|
+
cost_usd_actual?: number;
|
|
169
|
+
ttft_ms?: number;
|
|
170
|
+
history_cacheable_tokens?: number;
|
|
171
|
+
history_tokens_at_compile?: number;
|
|
172
|
+
}
|
|
117
173
|
|
|
118
174
|
/**
|
|
119
175
|
* Oracle contract — how an app tells the brain whether a response was good.
|
|
@@ -189,6 +245,41 @@ declare function resetTokenizer(): void;
|
|
|
189
245
|
*/
|
|
190
246
|
declare function countTokens(text: string): number;
|
|
191
247
|
|
|
248
|
+
/**
|
|
249
|
+
* Best-practice advisor — alpha.6 Phase 1.
|
|
250
|
+
*
|
|
251
|
+
* Inspects an IR + the selected profile + compile diagnostics and emits a
|
|
252
|
+
* list of `BestPracticeAdvisory` entries describing detected gaps. Runs
|
|
253
|
+
* after `lower()` in the compile pipeline; the result lands on
|
|
254
|
+
* `CompileResult.advisories` for the consumer to log, surface, or filter.
|
|
255
|
+
*
|
|
256
|
+
* Driven by interfaces/kgauto.md `best-practice-advisories` (IC, 2026-05-07).
|
|
257
|
+
* Phase 1 ships 4 starter rules sourced from the s14 kgauto comment +
|
|
258
|
+
* s15 empirical seed of brain anti-patterns:
|
|
259
|
+
*
|
|
260
|
+
* 1. `caching-off-on-claude` system >2000 chars on Anthropic, no cacheable=true
|
|
261
|
+
* 2. `single-chunk-system` Anthropic, only one PromptSection >1000 chars
|
|
262
|
+
* 3. `tool-bloat` >10 tools on a short-output archetype
|
|
263
|
+
* 4. `history-uncached-on-claude` Anthropic, ≥2 history messages, no historyCachePolicy
|
|
264
|
+
*
|
|
265
|
+
* Each rule is a pure function: (ir, result, profile) → BestPracticeAdvisory[].
|
|
266
|
+
* No side effects. No randomness. Deterministic for a given IR.
|
|
267
|
+
*
|
|
268
|
+
* The thresholds (2000 chars, 1000 chars, 10 tools, 2 history) are chosen
|
|
269
|
+
* to balance noise vs. signal — too low fires on innocuous calls, too high
|
|
270
|
+
* misses real waste. They may tune with brain evidence over time; for now
|
|
271
|
+
* they're literals in the rule bodies. Make them configurable when the
|
|
272
|
+
* cost-watcher's R-rules graduate to here.
|
|
273
|
+
*/
|
|
274
|
+
|
|
275
|
+
/** Subset of CompileResult fields the advisor needs. */
|
|
276
|
+
type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
|
|
277
|
+
/**
|
|
278
|
+
* Run all Phase 1 rules and return collected advisories. Order is fixed
|
|
279
|
+
* (same as the rule list above) so output is stable across runs.
|
|
280
|
+
*/
|
|
281
|
+
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
|
|
282
|
+
|
|
192
283
|
/**
|
|
193
284
|
* @warmdrift/kgauto v2 — prompt compiler + central learning brain.
|
|
194
285
|
*
|
|
@@ -235,4 +326,4 @@ declare function countTokens(text: string): number;
|
|
|
235
326
|
*/
|
|
236
327
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
237
328
|
|
|
238
|
-
export { ApiKeys, type AppOracle, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, setTokenizer };
|
|
329
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.js
CHANGED
|
@@ -43,6 +43,7 @@ __export(index_exports, {
|
|
|
43
43
|
profilesByProvider: () => profilesByProvider,
|
|
44
44
|
record: () => record,
|
|
45
45
|
resetTokenizer: () => resetTokenizer,
|
|
46
|
+
runAdvisor: () => runAdvisor,
|
|
46
47
|
setTokenizer: () => setTokenizer,
|
|
47
48
|
tryGetProfile: () => tryGetProfile
|
|
48
49
|
});
|
|
@@ -235,20 +236,37 @@ function passToolRelevance(ir, opts = {}) {
|
|
|
235
236
|
]
|
|
236
237
|
};
|
|
237
238
|
}
|
|
239
|
+
function totalHistoryTokens(history) {
|
|
240
|
+
let total = 0;
|
|
241
|
+
for (const m of history) {
|
|
242
|
+
if (typeof m.content === "string") total += countTokens(m.content);
|
|
243
|
+
}
|
|
244
|
+
return total;
|
|
245
|
+
}
|
|
238
246
|
function passCompressHistory(ir, opts = {}) {
|
|
239
247
|
const history = ir.history;
|
|
240
|
-
if (!history || history.length === 0)
|
|
248
|
+
if (!history || history.length === 0) {
|
|
249
|
+
return { value: ir, mutations: [], historyTokensTotal: 0 };
|
|
250
|
+
}
|
|
241
251
|
const keepRecent = opts.keepRecent ?? 4;
|
|
242
252
|
const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
|
|
243
|
-
|
|
253
|
+
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
254
|
+
const historyTokensTotal = totalHistoryTokens(history);
|
|
255
|
+
const countThresholdHit = history.length > summarizeOlderThan;
|
|
256
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
|
|
257
|
+
if (!countThresholdHit && !tokenThresholdHit) {
|
|
258
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
259
|
+
}
|
|
244
260
|
const cutIndex = history.length - keepRecent;
|
|
245
261
|
const old = history.slice(0, cutIndex);
|
|
246
262
|
const recent = history.slice(cutIndex);
|
|
247
263
|
const userTurns = old.filter((m) => m.role === "user");
|
|
248
264
|
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
265
|
+
const oldTokens = totalHistoryTokens(old);
|
|
266
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
249
267
|
const summary = {
|
|
250
268
|
role: "system",
|
|
251
|
-
content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
|
|
269
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
252
270
|
};
|
|
253
271
|
return {
|
|
254
272
|
value: { ...ir, history: [summary, ...recent] },
|
|
@@ -257,9 +275,10 @@ function passCompressHistory(ir, opts = {}) {
|
|
|
257
275
|
id: `compress-history-${old.length}`,
|
|
258
276
|
source: "static_pass",
|
|
259
277
|
passName: "compress_history",
|
|
260
|
-
description: `Compressed ${old.length} old turns into 1 summary
|
|
278
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
261
279
|
}
|
|
262
|
-
]
|
|
280
|
+
],
|
|
281
|
+
historyTokensTotal
|
|
263
282
|
};
|
|
264
283
|
}
|
|
265
284
|
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
@@ -489,7 +508,7 @@ function lower(ir, profile, hints = {}) {
|
|
|
489
508
|
}
|
|
490
509
|
function lowerAnthropic(ir, profile, hints) {
|
|
491
510
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
492
|
-
const history = ir.history ?? [];
|
|
511
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
493
512
|
const policy = ir.historyCachePolicy;
|
|
494
513
|
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
495
514
|
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
@@ -634,7 +653,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
634
653
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
635
654
|
const meetsMin = cacheableTokens >= minTokens;
|
|
636
655
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
637
|
-
const history = ir.history ?? [];
|
|
656
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
638
657
|
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
639
658
|
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
640
659
|
return {
|
|
@@ -696,7 +715,7 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
696
715
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
697
716
|
});
|
|
698
717
|
}
|
|
699
|
-
const history = ir.history ?? [];
|
|
718
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
700
719
|
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
701
720
|
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
702
721
|
return {
|
|
@@ -739,7 +758,7 @@ function lowerDeepSeek(ir, profile) {
|
|
|
739
758
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
740
759
|
});
|
|
741
760
|
}
|
|
742
|
-
const history = ir.history ?? [];
|
|
761
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
743
762
|
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
744
763
|
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
745
764
|
return {
|
|
@@ -1185,6 +1204,85 @@ function profilesByProvider(provider) {
|
|
|
1185
1204
|
return PROFILES_RAW.filter((p) => p.provider === provider);
|
|
1186
1205
|
}
|
|
1187
1206
|
|
|
1207
|
+
// src/advisor.ts
|
|
1208
|
+
function runAdvisor(ir, result, profile) {
|
|
1209
|
+
const out = [];
|
|
1210
|
+
out.push(...detectCachingOff(ir, profile));
|
|
1211
|
+
out.push(...detectSingleChunkSystem(ir, profile));
|
|
1212
|
+
out.push(...detectToolBloat(ir, result));
|
|
1213
|
+
out.push(...detectHistoryUncached(ir, profile));
|
|
1214
|
+
return out;
|
|
1215
|
+
}
|
|
1216
|
+
function detectCachingOff(ir, profile) {
|
|
1217
|
+
if (profile.provider !== "anthropic") return [];
|
|
1218
|
+
const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
|
|
1219
|
+
if (totalChars < 2e3) return [];
|
|
1220
|
+
const anyCacheable = ir.sections.some((s) => s.cacheable === true);
|
|
1221
|
+
if (anyCacheable) return [];
|
|
1222
|
+
return [
|
|
1223
|
+
{
|
|
1224
|
+
level: "warn",
|
|
1225
|
+
code: "caching-off-on-claude",
|
|
1226
|
+
message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
|
|
1227
|
+
suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
|
|
1228
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1229
|
+
}
|
|
1230
|
+
];
|
|
1231
|
+
}
|
|
1232
|
+
function detectSingleChunkSystem(ir, profile) {
|
|
1233
|
+
if (profile.provider !== "anthropic") return [];
|
|
1234
|
+
if (ir.sections.length !== 1) return [];
|
|
1235
|
+
const only = ir.sections[0];
|
|
1236
|
+
if (!only || only.text.length <= 1e3) return [];
|
|
1237
|
+
return [
|
|
1238
|
+
{
|
|
1239
|
+
level: "info",
|
|
1240
|
+
code: "single-chunk-system",
|
|
1241
|
+
message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
|
|
1242
|
+
suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
|
|
1243
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1244
|
+
}
|
|
1245
|
+
];
|
|
1246
|
+
}
|
|
1247
|
+
function detectToolBloat(ir, result) {
|
|
1248
|
+
const SHORT_OUTPUT = /* @__PURE__ */ new Set([
|
|
1249
|
+
"classify",
|
|
1250
|
+
"extract",
|
|
1251
|
+
"summarize",
|
|
1252
|
+
"transform",
|
|
1253
|
+
"critique"
|
|
1254
|
+
]);
|
|
1255
|
+
if (!ir.tools || ir.tools.length === 0) return [];
|
|
1256
|
+
const toolsKept = result.diagnostics.toolsKept;
|
|
1257
|
+
if (toolsKept <= 10) return [];
|
|
1258
|
+
if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
|
|
1259
|
+
return [
|
|
1260
|
+
{
|
|
1261
|
+
level: "warn",
|
|
1262
|
+
code: "tool-bloat",
|
|
1263
|
+
message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
|
|
1264
|
+
suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
|
|
1265
|
+
docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
|
|
1266
|
+
}
|
|
1267
|
+
];
|
|
1268
|
+
}
|
|
1269
|
+
function detectHistoryUncached(ir, profile) {
|
|
1270
|
+
if (profile.provider !== "anthropic") return [];
|
|
1271
|
+
if (!ir.history || ir.history.length < 2) return [];
|
|
1272
|
+
if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
|
|
1273
|
+
return [];
|
|
1274
|
+
}
|
|
1275
|
+
return [
|
|
1276
|
+
{
|
|
1277
|
+
level: "warn",
|
|
1278
|
+
code: "history-uncached-on-claude",
|
|
1279
|
+
message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
|
|
1280
|
+
suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
|
|
1281
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1282
|
+
}
|
|
1283
|
+
];
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1188
1286
|
// src/compile.ts
|
|
1189
1287
|
var counter = 0;
|
|
1190
1288
|
function makeHandle() {
|
|
@@ -1200,7 +1298,8 @@ function compile(ir, opts = {}) {
|
|
|
1200
1298
|
threshold: opts.toolRelevanceThreshold
|
|
1201
1299
|
});
|
|
1202
1300
|
const compressed = passCompressHistory(toolFiltered.value, {
|
|
1203
|
-
summarizeOlderThan: opts.compressHistoryAfter
|
|
1301
|
+
summarizeOlderThan: opts.compressHistoryAfter,
|
|
1302
|
+
summarizeAboveTokens: opts.compressHistoryAboveTokens
|
|
1204
1303
|
});
|
|
1205
1304
|
let workingIR = compressed.value;
|
|
1206
1305
|
const accumulatedMutations = [
|
|
@@ -1235,6 +1334,28 @@ function compile(ir, opts = {}) {
|
|
|
1235
1334
|
const handle = makeHandle();
|
|
1236
1335
|
const finalShape = computeShape(workingIR, inputTokens);
|
|
1237
1336
|
const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
|
|
1337
|
+
const diagnostics = {
|
|
1338
|
+
sectionsKept: workingIR.sections.length,
|
|
1339
|
+
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
1340
|
+
toolsKept: workingIR.tools?.length ?? 0,
|
|
1341
|
+
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
1342
|
+
historyKept: workingIR.history?.length ?? 0,
|
|
1343
|
+
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1344
|
+
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1345
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
1346
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
|
|
1347
|
+
historyTokensTotal: compressed.historyTokensTotal
|
|
1348
|
+
};
|
|
1349
|
+
const advisories = runAdvisor(
|
|
1350
|
+
ir,
|
|
1351
|
+
{
|
|
1352
|
+
target: profile.id,
|
|
1353
|
+
provider: profile.provider,
|
|
1354
|
+
tokensIn: inputTokens,
|
|
1355
|
+
diagnostics
|
|
1356
|
+
},
|
|
1357
|
+
profile
|
|
1358
|
+
);
|
|
1238
1359
|
return {
|
|
1239
1360
|
handle,
|
|
1240
1361
|
target: profile.id,
|
|
@@ -1244,17 +1365,8 @@ function compile(ir, opts = {}) {
|
|
|
1244
1365
|
estimatedCostUsd: target.estimatedCostUsd,
|
|
1245
1366
|
mutationsApplied: accumulatedMutations,
|
|
1246
1367
|
fallbackChain,
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
1250
|
-
toolsKept: workingIR.tools?.length ?? 0,
|
|
1251
|
-
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
1252
|
-
historyKept: workingIR.history?.length ?? 0,
|
|
1253
|
-
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1254
|
-
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1255
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
1256
|
-
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
1257
|
-
}
|
|
1368
|
+
advisories,
|
|
1369
|
+
diagnostics
|
|
1258
1370
|
};
|
|
1259
1371
|
}
|
|
1260
1372
|
function validateIR(ir) {
|
|
@@ -1339,7 +1451,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
1339
1451
|
estimatedTokensIn: tokens,
|
|
1340
1452
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
1341
1453
|
startedAt: Date.now(),
|
|
1342
|
-
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
1454
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
1455
|
+
historyTokensTotal: result.diagnostics.historyTokensTotal
|
|
1343
1456
|
});
|
|
1344
1457
|
}
|
|
1345
1458
|
async function record(input) {
|
|
@@ -1413,7 +1526,8 @@ function buildPayload(input, reg) {
|
|
|
1413
1526
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
1414
1527
|
cost_usd_actual: costUsdActual,
|
|
1415
1528
|
ttft_ms: input.ttftMs,
|
|
1416
|
-
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1529
|
+
history_cacheable_tokens: reg?.historyCacheableTokens,
|
|
1530
|
+
history_tokens_at_compile: reg?.historyTokensTotal
|
|
1417
1531
|
};
|
|
1418
1532
|
}
|
|
1419
1533
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -1900,6 +2014,7 @@ function compile2(ir, opts) {
|
|
|
1900
2014
|
profilesByProvider,
|
|
1901
2015
|
record,
|
|
1902
2016
|
resetTokenizer,
|
|
2017
|
+
runAdvisor,
|
|
1903
2018
|
setTokenizer,
|
|
1904
2019
|
tryGetProfile
|
|
1905
2020
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -120,20 +120,37 @@ function passToolRelevance(ir, opts = {}) {
|
|
|
120
120
|
]
|
|
121
121
|
};
|
|
122
122
|
}
|
|
123
|
+
function totalHistoryTokens(history) {
|
|
124
|
+
let total = 0;
|
|
125
|
+
for (const m of history) {
|
|
126
|
+
if (typeof m.content === "string") total += countTokens(m.content);
|
|
127
|
+
}
|
|
128
|
+
return total;
|
|
129
|
+
}
|
|
123
130
|
function passCompressHistory(ir, opts = {}) {
|
|
124
131
|
const history = ir.history;
|
|
125
|
-
if (!history || history.length === 0)
|
|
132
|
+
if (!history || history.length === 0) {
|
|
133
|
+
return { value: ir, mutations: [], historyTokensTotal: 0 };
|
|
134
|
+
}
|
|
126
135
|
const keepRecent = opts.keepRecent ?? 4;
|
|
127
136
|
const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
|
|
128
|
-
|
|
137
|
+
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
138
|
+
const historyTokensTotal = totalHistoryTokens(history);
|
|
139
|
+
const countThresholdHit = history.length > summarizeOlderThan;
|
|
140
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
|
|
141
|
+
if (!countThresholdHit && !tokenThresholdHit) {
|
|
142
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
143
|
+
}
|
|
129
144
|
const cutIndex = history.length - keepRecent;
|
|
130
145
|
const old = history.slice(0, cutIndex);
|
|
131
146
|
const recent = history.slice(cutIndex);
|
|
132
147
|
const userTurns = old.filter((m) => m.role === "user");
|
|
133
148
|
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
149
|
+
const oldTokens = totalHistoryTokens(old);
|
|
150
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
134
151
|
const summary = {
|
|
135
152
|
role: "system",
|
|
136
|
-
content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
|
|
153
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
137
154
|
};
|
|
138
155
|
return {
|
|
139
156
|
value: { ...ir, history: [summary, ...recent] },
|
|
@@ -142,9 +159,10 @@ function passCompressHistory(ir, opts = {}) {
|
|
|
142
159
|
id: `compress-history-${old.length}`,
|
|
143
160
|
source: "static_pass",
|
|
144
161
|
passName: "compress_history",
|
|
145
|
-
description: `Compressed ${old.length} old turns into 1 summary
|
|
162
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
146
163
|
}
|
|
147
|
-
]
|
|
164
|
+
],
|
|
165
|
+
historyTokensTotal
|
|
148
166
|
};
|
|
149
167
|
}
|
|
150
168
|
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
@@ -374,7 +392,7 @@ function lower(ir, profile, hints = {}) {
|
|
|
374
392
|
}
|
|
375
393
|
function lowerAnthropic(ir, profile, hints) {
|
|
376
394
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
377
|
-
const history = ir.history ?? [];
|
|
395
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
378
396
|
const policy = ir.historyCachePolicy;
|
|
379
397
|
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
380
398
|
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
@@ -519,7 +537,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
519
537
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
520
538
|
const meetsMin = cacheableTokens >= minTokens;
|
|
521
539
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
522
|
-
const history = ir.history ?? [];
|
|
540
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
523
541
|
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
524
542
|
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
525
543
|
return {
|
|
@@ -581,7 +599,7 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
581
599
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
582
600
|
});
|
|
583
601
|
}
|
|
584
|
-
const history = ir.history ?? [];
|
|
602
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
585
603
|
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
586
604
|
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
587
605
|
return {
|
|
@@ -624,7 +642,7 @@ function lowerDeepSeek(ir, profile) {
|
|
|
624
642
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
625
643
|
});
|
|
626
644
|
}
|
|
627
|
-
const history = ir.history ?? [];
|
|
645
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
628
646
|
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
629
647
|
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
630
648
|
return {
|
|
@@ -668,6 +686,85 @@ function setNestedField(obj, path, value) {
|
|
|
668
686
|
cursor[parts[parts.length - 1]] = value;
|
|
669
687
|
}
|
|
670
688
|
|
|
689
|
+
// src/advisor.ts
|
|
690
|
+
function runAdvisor(ir, result, profile) {
|
|
691
|
+
const out = [];
|
|
692
|
+
out.push(...detectCachingOff(ir, profile));
|
|
693
|
+
out.push(...detectSingleChunkSystem(ir, profile));
|
|
694
|
+
out.push(...detectToolBloat(ir, result));
|
|
695
|
+
out.push(...detectHistoryUncached(ir, profile));
|
|
696
|
+
return out;
|
|
697
|
+
}
|
|
698
|
+
function detectCachingOff(ir, profile) {
|
|
699
|
+
if (profile.provider !== "anthropic") return [];
|
|
700
|
+
const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
|
|
701
|
+
if (totalChars < 2e3) return [];
|
|
702
|
+
const anyCacheable = ir.sections.some((s) => s.cacheable === true);
|
|
703
|
+
if (anyCacheable) return [];
|
|
704
|
+
return [
|
|
705
|
+
{
|
|
706
|
+
level: "warn",
|
|
707
|
+
code: "caching-off-on-claude",
|
|
708
|
+
message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
|
|
709
|
+
suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
|
|
710
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
711
|
+
}
|
|
712
|
+
];
|
|
713
|
+
}
|
|
714
|
+
function detectSingleChunkSystem(ir, profile) {
|
|
715
|
+
if (profile.provider !== "anthropic") return [];
|
|
716
|
+
if (ir.sections.length !== 1) return [];
|
|
717
|
+
const only = ir.sections[0];
|
|
718
|
+
if (!only || only.text.length <= 1e3) return [];
|
|
719
|
+
return [
|
|
720
|
+
{
|
|
721
|
+
level: "info",
|
|
722
|
+
code: "single-chunk-system",
|
|
723
|
+
message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
|
|
724
|
+
suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
|
|
725
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
726
|
+
}
|
|
727
|
+
];
|
|
728
|
+
}
|
|
729
|
+
function detectToolBloat(ir, result) {
|
|
730
|
+
const SHORT_OUTPUT = /* @__PURE__ */ new Set([
|
|
731
|
+
"classify",
|
|
732
|
+
"extract",
|
|
733
|
+
"summarize",
|
|
734
|
+
"transform",
|
|
735
|
+
"critique"
|
|
736
|
+
]);
|
|
737
|
+
if (!ir.tools || ir.tools.length === 0) return [];
|
|
738
|
+
const toolsKept = result.diagnostics.toolsKept;
|
|
739
|
+
if (toolsKept <= 10) return [];
|
|
740
|
+
if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
|
|
741
|
+
return [
|
|
742
|
+
{
|
|
743
|
+
level: "warn",
|
|
744
|
+
code: "tool-bloat",
|
|
745
|
+
message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
|
|
746
|
+
suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
|
|
747
|
+
docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
|
|
748
|
+
}
|
|
749
|
+
];
|
|
750
|
+
}
|
|
751
|
+
function detectHistoryUncached(ir, profile) {
|
|
752
|
+
if (profile.provider !== "anthropic") return [];
|
|
753
|
+
if (!ir.history || ir.history.length < 2) return [];
|
|
754
|
+
if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
|
|
755
|
+
return [];
|
|
756
|
+
}
|
|
757
|
+
return [
|
|
758
|
+
{
|
|
759
|
+
level: "warn",
|
|
760
|
+
code: "history-uncached-on-claude",
|
|
761
|
+
message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
|
|
762
|
+
suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
|
|
763
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
764
|
+
}
|
|
765
|
+
];
|
|
766
|
+
}
|
|
767
|
+
|
|
671
768
|
// src/compile.ts
|
|
672
769
|
var counter = 0;
|
|
673
770
|
function makeHandle() {
|
|
@@ -683,7 +780,8 @@ function compile(ir, opts = {}) {
|
|
|
683
780
|
threshold: opts.toolRelevanceThreshold
|
|
684
781
|
});
|
|
685
782
|
const compressed = passCompressHistory(toolFiltered.value, {
|
|
686
|
-
summarizeOlderThan: opts.compressHistoryAfter
|
|
783
|
+
summarizeOlderThan: opts.compressHistoryAfter,
|
|
784
|
+
summarizeAboveTokens: opts.compressHistoryAboveTokens
|
|
687
785
|
});
|
|
688
786
|
let workingIR = compressed.value;
|
|
689
787
|
const accumulatedMutations = [
|
|
@@ -718,6 +816,28 @@ function compile(ir, opts = {}) {
|
|
|
718
816
|
const handle = makeHandle();
|
|
719
817
|
const finalShape = computeShape(workingIR, inputTokens);
|
|
720
818
|
const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
|
|
819
|
+
const diagnostics = {
|
|
820
|
+
sectionsKept: workingIR.sections.length,
|
|
821
|
+
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
822
|
+
toolsKept: workingIR.tools?.length ?? 0,
|
|
823
|
+
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
824
|
+
historyKept: workingIR.history?.length ?? 0,
|
|
825
|
+
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
826
|
+
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
827
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
828
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
|
|
829
|
+
historyTokensTotal: compressed.historyTokensTotal
|
|
830
|
+
};
|
|
831
|
+
const advisories = runAdvisor(
|
|
832
|
+
ir,
|
|
833
|
+
{
|
|
834
|
+
target: profile.id,
|
|
835
|
+
provider: profile.provider,
|
|
836
|
+
tokensIn: inputTokens,
|
|
837
|
+
diagnostics
|
|
838
|
+
},
|
|
839
|
+
profile
|
|
840
|
+
);
|
|
721
841
|
return {
|
|
722
842
|
handle,
|
|
723
843
|
target: profile.id,
|
|
@@ -727,17 +847,8 @@ function compile(ir, opts = {}) {
|
|
|
727
847
|
estimatedCostUsd: target.estimatedCostUsd,
|
|
728
848
|
mutationsApplied: accumulatedMutations,
|
|
729
849
|
fallbackChain,
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
733
|
-
toolsKept: workingIR.tools?.length ?? 0,
|
|
734
|
-
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
735
|
-
historyKept: workingIR.history?.length ?? 0,
|
|
736
|
-
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
737
|
-
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
738
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
739
|
-
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
740
|
-
}
|
|
850
|
+
advisories,
|
|
851
|
+
diagnostics
|
|
741
852
|
};
|
|
742
853
|
}
|
|
743
854
|
function validateIR(ir) {
|
|
@@ -822,7 +933,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
822
933
|
estimatedTokensIn: tokens,
|
|
823
934
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
824
935
|
startedAt: Date.now(),
|
|
825
|
-
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
936
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
937
|
+
historyTokensTotal: result.diagnostics.historyTokensTotal
|
|
826
938
|
});
|
|
827
939
|
}
|
|
828
940
|
async function record(input) {
|
|
@@ -896,7 +1008,8 @@ function buildPayload(input, reg) {
|
|
|
896
1008
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
897
1009
|
cost_usd_actual: costUsdActual,
|
|
898
1010
|
ttft_ms: input.ttftMs,
|
|
899
|
-
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1011
|
+
history_cacheable_tokens: reg?.historyCacheableTokens,
|
|
1012
|
+
history_tokens_at_compile: reg?.historyTokensTotal
|
|
900
1013
|
};
|
|
901
1014
|
}
|
|
902
1015
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -1382,6 +1495,7 @@ export {
|
|
|
1382
1495
|
profilesByProvider,
|
|
1383
1496
|
record,
|
|
1384
1497
|
resetTokenizer,
|
|
1498
|
+
runAdvisor,
|
|
1385
1499
|
setTokenizer,
|
|
1386
1500
|
tryGetProfile
|
|
1387
1501
|
};
|
|
@@ -253,6 +253,41 @@ type CompiledRequest = {
|
|
|
253
253
|
}>;
|
|
254
254
|
tools?: unknown[];
|
|
255
255
|
};
|
|
256
|
+
/**
|
|
257
|
+
* Best-practice advisory emitted by the compiler at compile time. Non-fatal —
|
|
258
|
+
* consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
|
|
259
|
+
* or ignore. The advisor inspects the IR + selected profile + diagnostics
|
|
260
|
+
* and emits one entry per detected gap.
|
|
261
|
+
*
|
|
262
|
+
* Codes are stable across releases. `suggestion` and `docsUrl` are optional
|
|
263
|
+
* but encouraged: suggestion = the actionable diff; docsUrl = the
|
|
264
|
+
* interfaces/kgauto.md anchor for context.
|
|
265
|
+
*
|
|
266
|
+
* alpha.6 Phase 1 starter rules:
|
|
267
|
+
* - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
|
|
268
|
+
* - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
|
|
269
|
+
* - `tool-bloat` (warn) >10 tools on a short-output archetype
|
|
270
|
+
* - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
|
|
271
|
+
*
|
|
272
|
+
* Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
|
|
273
|
+
* telemetry on `advisories_fired`) are alpha.7+ territory.
|
|
274
|
+
*/
|
|
275
|
+
interface BestPracticeAdvisory {
|
|
276
|
+
/**
|
|
277
|
+
* Severity. `info` = informational; `warn` = behavioral pattern that's
|
|
278
|
+
* usually expensive or wrong; `critical` = likely bug or production-grade
|
|
279
|
+
* misuse. Phase 1 ships info + warn only.
|
|
280
|
+
*/
|
|
281
|
+
level: 'info' | 'warn' | 'critical';
|
|
282
|
+
/** Stable kebab-case code. Consumers filter / gate by this. */
|
|
283
|
+
code: string;
|
|
284
|
+
/** Human-readable explanation of what was detected. */
|
|
285
|
+
message: string;
|
|
286
|
+
/** Optional: how to fix — actionable diff or pattern. */
|
|
287
|
+
suggestion?: string;
|
|
288
|
+
/** Optional: link to docs anchor for more context. */
|
|
289
|
+
docsUrl?: string;
|
|
290
|
+
}
|
|
256
291
|
interface CompileResult {
|
|
257
292
|
/** Unique handle for this call — pass to record() to correlate the outcome. */
|
|
258
293
|
handle: string;
|
|
@@ -270,6 +305,11 @@ interface CompileResult {
|
|
|
270
305
|
mutationsApplied: MutationApplied[];
|
|
271
306
|
/** Fallback chain — try these in order if target fails. */
|
|
272
307
|
fallbackChain: string[];
|
|
308
|
+
/**
|
|
309
|
+
* Best-practice advisories emitted by the compiler. Non-fatal. Empty
|
|
310
|
+
* array when no rules fired. alpha.6 Phase 1.
|
|
311
|
+
*/
|
|
312
|
+
advisories: BestPracticeAdvisory[];
|
|
273
313
|
/** Diagnostics for caller-side logging. */
|
|
274
314
|
diagnostics: {
|
|
275
315
|
sectionsKept: number;
|
|
@@ -290,6 +330,14 @@ interface CompileResult {
|
|
|
290
330
|
* from history caching. alpha.5.
|
|
291
331
|
*/
|
|
292
332
|
historyCacheableTokens: number;
|
|
333
|
+
/**
|
|
334
|
+
* Total tokens in input `history` (pre-compression). Computed regardless
|
|
335
|
+
* of whether `passCompressHistory` fired — surfaces how close a tuple is
|
|
336
|
+
* to its `compressHistoryAboveTokens` threshold so dashboards / cost-
|
|
337
|
+
* watchers can see the bloat axis the count-based threshold misses.
|
|
338
|
+
* 0 when history is empty. alpha.7.
|
|
339
|
+
*/
|
|
340
|
+
historyTokensTotal: number;
|
|
293
341
|
};
|
|
294
342
|
}
|
|
295
343
|
/**
|
|
@@ -568,4 +616,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
|
568
616
|
declare function allProfiles(): readonly ModelProfile[];
|
|
569
617
|
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
570
618
|
|
|
571
|
-
export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
619
|
+
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
@@ -253,6 +253,41 @@ type CompiledRequest = {
|
|
|
253
253
|
}>;
|
|
254
254
|
tools?: unknown[];
|
|
255
255
|
};
|
|
256
|
+
/**
|
|
257
|
+
* Best-practice advisory emitted by the compiler at compile time. Non-fatal —
|
|
258
|
+
* consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
|
|
259
|
+
* or ignore. The advisor inspects the IR + selected profile + diagnostics
|
|
260
|
+
* and emits one entry per detected gap.
|
|
261
|
+
*
|
|
262
|
+
* Codes are stable across releases. `suggestion` and `docsUrl` are optional
|
|
263
|
+
* but encouraged: suggestion = the actionable diff; docsUrl = the
|
|
264
|
+
* interfaces/kgauto.md anchor for context.
|
|
265
|
+
*
|
|
266
|
+
* alpha.6 Phase 1 starter rules:
|
|
267
|
+
* - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
|
|
268
|
+
* - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
|
|
269
|
+
* - `tool-bloat` (warn) >10 tools on a short-output archetype
|
|
270
|
+
* - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
|
|
271
|
+
*
|
|
272
|
+
* Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
|
|
273
|
+
* telemetry on `advisories_fired`) are alpha.7+ territory.
|
|
274
|
+
*/
|
|
275
|
+
interface BestPracticeAdvisory {
|
|
276
|
+
/**
|
|
277
|
+
* Severity. `info` = informational; `warn` = behavioral pattern that's
|
|
278
|
+
* usually expensive or wrong; `critical` = likely bug or production-grade
|
|
279
|
+
* misuse. Phase 1 ships info + warn only.
|
|
280
|
+
*/
|
|
281
|
+
level: 'info' | 'warn' | 'critical';
|
|
282
|
+
/** Stable kebab-case code. Consumers filter / gate by this. */
|
|
283
|
+
code: string;
|
|
284
|
+
/** Human-readable explanation of what was detected. */
|
|
285
|
+
message: string;
|
|
286
|
+
/** Optional: how to fix — actionable diff or pattern. */
|
|
287
|
+
suggestion?: string;
|
|
288
|
+
/** Optional: link to docs anchor for more context. */
|
|
289
|
+
docsUrl?: string;
|
|
290
|
+
}
|
|
256
291
|
interface CompileResult {
|
|
257
292
|
/** Unique handle for this call — pass to record() to correlate the outcome. */
|
|
258
293
|
handle: string;
|
|
@@ -270,6 +305,11 @@ interface CompileResult {
|
|
|
270
305
|
mutationsApplied: MutationApplied[];
|
|
271
306
|
/** Fallback chain — try these in order if target fails. */
|
|
272
307
|
fallbackChain: string[];
|
|
308
|
+
/**
|
|
309
|
+
* Best-practice advisories emitted by the compiler. Non-fatal. Empty
|
|
310
|
+
* array when no rules fired. alpha.6 Phase 1.
|
|
311
|
+
*/
|
|
312
|
+
advisories: BestPracticeAdvisory[];
|
|
273
313
|
/** Diagnostics for caller-side logging. */
|
|
274
314
|
diagnostics: {
|
|
275
315
|
sectionsKept: number;
|
|
@@ -290,6 +330,14 @@ interface CompileResult {
|
|
|
290
330
|
* from history caching. alpha.5.
|
|
291
331
|
*/
|
|
292
332
|
historyCacheableTokens: number;
|
|
333
|
+
/**
|
|
334
|
+
* Total tokens in input `history` (pre-compression). Computed regardless
|
|
335
|
+
* of whether `passCompressHistory` fired — surfaces how close a tuple is
|
|
336
|
+
* to its `compressHistoryAboveTokens` threshold so dashboards / cost-
|
|
337
|
+
* watchers can see the bloat axis the count-based threshold misses.
|
|
338
|
+
* 0 when history is empty. alpha.7.
|
|
339
|
+
*/
|
|
340
|
+
historyTokensTotal: number;
|
|
293
341
|
};
|
|
294
342
|
}
|
|
295
343
|
/**
|
|
@@ -568,4 +616,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
|
568
616
|
declare function allProfiles(): readonly ModelProfile[];
|
|
569
617
|
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
570
618
|
|
|
571
|
-
export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
619
|
+
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
|
|
2
2
|
import './dialect.mjs';
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
|
|
2
2
|
import './dialect.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.7",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|