@warmdrift/kgauto-compiler 2.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +240 -0
- package/dist/chunk-3KVKELZN.mjs +657 -0
- package/dist/chunk-5TI6PNSK.mjs +95 -0
- package/dist/dialect.d.mts +99 -0
- package/dist/dialect.d.ts +99 -0
- package/dist/dialect.js +127 -0
- package/dist/dialect.mjs +22 -0
- package/dist/index.d.mts +509 -0
- package/dist/index.d.ts +509 -0
- package/dist/index.js +2559 -0
- package/dist/index.mjs +1784 -0
- package/dist/profiles-Bgri1pe7.d.ts +728 -0
- package/dist/profiles-DO6R9moS.d.mts +728 -0
- package/dist/profiles.d.mts +2 -0
- package/dist/profiles.d.ts +2 -0
- package/dist/profiles.js +685 -0
- package/dist/profiles.mjs +14 -0
- package/package.json +59 -0
|
@@ -0,0 +1,728 @@
|
|
|
1
|
+
import { IntentArchetypeName } from './dialect.mjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Intermediate Representation — the structured form of a prompt.
|
|
5
|
+
*
|
|
6
|
+
* Everything kgauto v2 does, it does on the IR. The IR is constructed by the
|
|
7
|
+
* caller (or by `parse()` from a string-style prompt for backwards-compat),
|
|
8
|
+
* transformed by the compiler passes, and lowered to a target-specific wire
|
|
9
|
+
* request only at the very end.
|
|
10
|
+
*
|
|
11
|
+
* The IR carries STRUCTURE, not just text. Sections are first-class. Tools
|
|
12
|
+
* carry per-intent relevance. History knows its turn-age. Constraints are
|
|
13
|
+
* explicit. This is what lets passes do real work.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* A semantically-named section of the system prompt. Sections enable
|
|
18
|
+
* intent-aware slicing (drop sections not tagged for this intent), dedupe
|
|
19
|
+
* (collapse identical sections across files), and cache marking (identify
|
|
20
|
+
* the stable prefix).
|
|
21
|
+
*/
|
|
22
|
+
interface PromptSection {
|
|
23
|
+
/** Stable identifier — used for slicing, dedupe, and cache markers. */
|
|
24
|
+
id: string;
|
|
25
|
+
/** Section text. */
|
|
26
|
+
text: string;
|
|
27
|
+
/**
|
|
28
|
+
* Which intents this section applies to. Empty = applies to all intents.
|
|
29
|
+
* Pass `compile()` will drop sections whose intents array doesn't include
|
|
30
|
+
* the current intent.
|
|
31
|
+
*/
|
|
32
|
+
intents?: IntentArchetypeName[];
|
|
33
|
+
/**
|
|
34
|
+
* If true, this section is part of the stable cacheable prefix. The lower
|
|
35
|
+
* pass uses this to place cache markers correctly per target.
|
|
36
|
+
*/
|
|
37
|
+
cacheable?: boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Section weight when ordering — lower = earlier in the assembled prompt.
|
|
40
|
+
* Defaults to insertion order.
|
|
41
|
+
*/
|
|
42
|
+
weight?: number;
|
|
43
|
+
}
|
|
44
|
+
interface ToolDefinition {
|
|
45
|
+
name: string;
|
|
46
|
+
description?: string;
|
|
47
|
+
parameters?: Record<string, unknown>;
|
|
48
|
+
/**
|
|
49
|
+
* Per-intent relevance scores. Compile uses these to drop irrelevant tools.
|
|
50
|
+
* Missing intents default to 0.5 (neutral).
|
|
51
|
+
*/
|
|
52
|
+
relevanceByIntent?: Partial<Record<IntentArchetypeName, number>>;
|
|
53
|
+
/** Pass-through for provider-specific fields (Anthropic input_schema, etc.). */
|
|
54
|
+
[key: string]: unknown;
|
|
55
|
+
}
|
|
56
|
+
interface Message {
|
|
57
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
58
|
+
content: string;
|
|
59
|
+
/** Optional structured parts (tool calls, results) — passed through to lowering. */
|
|
60
|
+
parts?: unknown[];
|
|
61
|
+
/** For tool messages — which tool this corresponds to. */
|
|
62
|
+
toolName?: string;
|
|
63
|
+
/** For tool messages — the call id. */
|
|
64
|
+
toolCallId?: string;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* The compile-time intent declaration. `name` is the app's local label;
|
|
68
|
+
* `archetype` is the canonical dialect-v1 archetype the app maps it to.
|
|
69
|
+
*
|
|
70
|
+
* Apps with their own intent vocabulary (tt-intelligence's "ask"/"hunt"/
|
|
71
|
+
* "dashboard") declare the mapping here. The brain learns by archetype, not
|
|
72
|
+
* by app-local name.
|
|
73
|
+
*/
|
|
74
|
+
interface IntentDeclaration {
|
|
75
|
+
/** App-local intent name (free-form, for app's own debugging). */
|
|
76
|
+
name: string;
|
|
77
|
+
/** Canonical dialect-v1 archetype. Required for cross-app learning. */
|
|
78
|
+
archetype: IntentArchetypeName;
|
|
79
|
+
}
|
|
80
|
+
interface Constraints {
|
|
81
|
+
/** Hard latency ceiling — compiler will down-rank slow models. Advisory. */
|
|
82
|
+
maxLatencyMs?: number;
|
|
83
|
+
/** Hard cost ceiling per call (USD). Advisory. */
|
|
84
|
+
maxCostUsd?: number;
|
|
85
|
+
/** Caller wants structured (JSON) output. */
|
|
86
|
+
structuredOutput?: boolean;
|
|
87
|
+
/** Hint: caller expects a short response (used to disable thinking on Gemini). */
|
|
88
|
+
expectedShortOutput?: boolean;
|
|
89
|
+
/** Hint: max response words. */
|
|
90
|
+
maxResponseWords?: number;
|
|
91
|
+
/** Override target model selection — if set, compiler uses this instead of routing. */
|
|
92
|
+
forceModel?: string;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Cache marker policy for the messages array (history + currentTurn).
|
|
96
|
+
*
|
|
97
|
+
* Anthropic positional caching: a `cache_control` marker on a content block
|
|
98
|
+
* tells the API "remember the prefix up through this block." On a subsequent
|
|
99
|
+
* request whose first N tokens match, those N billed at the cached rate
|
|
100
|
+
* (10% of the input price). Without a marker, every call re-pays for the
|
|
101
|
+
* entire history.
|
|
102
|
+
*
|
|
103
|
+
* - `'none'` (default when omitted): no history cache marker. System-level
|
|
104
|
+
* cache markers from `PromptSection.cacheable=true` still apply.
|
|
105
|
+
* - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
|
|
106
|
+
* (the last history entry). On the next call, that entire history prefix
|
|
107
|
+
* is cacheable. Good fit for chat/agent loops where every prior turn is
|
|
108
|
+
* stable.
|
|
109
|
+
* - `'fixed-suffix'`: marks the message `suffix` positions from the end of
|
|
110
|
+
* `history`. Use when the last few turns are volatile (e.g., scratchpad,
|
|
111
|
+
* draft revisions) but the earlier prefix is stable.
|
|
112
|
+
*
|
|
113
|
+
* For non-Anthropic providers, no wire-format marker is emitted (Gemini /
|
|
114
|
+
* OpenAI / DeepSeek implicit caching takes effect automatically when a
|
|
115
|
+
* stable prefix is reused). The compiler still computes
|
|
116
|
+
* `diagnostics.historyCacheableTokens` for telemetry on every provider.
|
|
117
|
+
*
|
|
118
|
+
* alpha.5.
|
|
119
|
+
*/
|
|
120
|
+
type HistoryCachePolicy = {
|
|
121
|
+
strategy: 'none';
|
|
122
|
+
} | {
|
|
123
|
+
strategy: 'all-but-latest';
|
|
124
|
+
} | {
|
|
125
|
+
strategy: 'fixed-suffix';
|
|
126
|
+
suffix: number;
|
|
127
|
+
};
|
|
128
|
+
/**
|
|
129
|
+
* Consumer-declared policy for model selection. Lives outside the IR
|
|
130
|
+
* (passed via CompileOptions) because it's a SESSION/APP-level constraint,
|
|
131
|
+
* not a per-call shape.
|
|
132
|
+
*
|
|
133
|
+
* The original tt-intelligence scenario (s11): user capped Anthropic
|
|
134
|
+
* spending on Sonnet for cost reasons. v2 compile() kept picking Sonnet
|
|
135
|
+
* as the best target, Hunter's preflight hit the cap and fell back to
|
|
136
|
+
* Flash — every single call. CompilePolicy.blockedModels lets the
|
|
137
|
+
* consumer tell kgauto "don't pick Sonnet right now" and the compiler
|
|
138
|
+
* routes to the next-best option directly. No wasted preflight tax.
|
|
139
|
+
*
|
|
140
|
+
* This is the "coach knows the constraints" feature — kgauto stops
|
|
141
|
+
* recommending things the consumer has already ruled out.
|
|
142
|
+
*/
|
|
143
|
+
interface CompilePolicy {
|
|
144
|
+
/**
|
|
145
|
+
* Model IDs the consumer has gated. Compile() will never select these.
|
|
146
|
+
* Use for: cost caps, account-level rate limits, "this model is broken
|
|
147
|
+
* for our workload" decisions.
|
|
148
|
+
*/
|
|
149
|
+
blockedModels?: string[];
|
|
150
|
+
/**
|
|
151
|
+
* Hard ceiling on estimated input cost per call (USD). Models whose
|
|
152
|
+
* estimated cost exceeds this are rejected. Use for: budget enforcement
|
|
153
|
+
* on high-volume routes.
|
|
154
|
+
*/
|
|
155
|
+
maxCostPerCallUsd?: number;
|
|
156
|
+
/**
|
|
157
|
+
* Model IDs the consumer prefers. When multiple models fit, preferred
|
|
158
|
+
* models get a rank boost (large enough to overcome small quality
|
|
159
|
+
* differences but not large enough to override hard rejects).
|
|
160
|
+
*/
|
|
161
|
+
preferredModels?: string[];
|
|
162
|
+
/**
|
|
163
|
+
* Customer-posture tag (master plan §1.2, alpha.9).
|
|
164
|
+
*
|
|
165
|
+
* - `'locked'` — compliance/contract/brand-promise. Caller passes
|
|
166
|
+
* exactly one model; no fallback is desired. kgauto
|
|
167
|
+
* never walks the chain.
|
|
168
|
+
* - `'preferred'` — user-selected primary, fallback chain as safety
|
|
169
|
+
* net. On 429/5xx, walk the chain and surface
|
|
170
|
+
* `fellOverFrom` so the consumer can show "Claude
|
|
171
|
+
* was busy; we used Pro for this answer."
|
|
172
|
+
* - `'open'` — library picks the chain. Model identity is
|
|
173
|
+
* irrelevant; output is the contract.
|
|
174
|
+
*
|
|
175
|
+
* The field is **informational** — kgauto's execution path is already
|
|
176
|
+
* determined by the shape of `ir.models`. Posture surfaces in
|
|
177
|
+
* telemetry so the cost-watcher can distinguish "locked failed, no
|
|
178
|
+
* fallback was tried" from "open chain exhausted." Default: when
|
|
179
|
+
* `ir.models.length === 1` posture is treated as `'locked'` by the
|
|
180
|
+
* advisor; otherwise unspecified.
|
|
181
|
+
*/
|
|
182
|
+
posture?: 'locked' | 'preferred' | 'open';
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* The IR — the input to compile().
|
|
186
|
+
*/
|
|
187
|
+
interface PromptIR {
|
|
188
|
+
/** App identifier — required for multi-tenant brain. */
|
|
189
|
+
appId: string;
|
|
190
|
+
/** Intent declaration — what is this call doing? */
|
|
191
|
+
intent: IntentDeclaration;
|
|
192
|
+
/** Structured system prompt sections. */
|
|
193
|
+
sections: PromptSection[];
|
|
194
|
+
/** Available tools (compiler may drop based on intent relevance + budget). */
|
|
195
|
+
tools?: ToolDefinition[];
|
|
196
|
+
/** Conversation history (compiler may compress old turns). */
|
|
197
|
+
history?: Message[];
|
|
198
|
+
/** The user's current turn — never dropped. */
|
|
199
|
+
currentTurn?: Message;
|
|
200
|
+
/** Allowed model IDs, in caller-preference order. Compiler picks among these. */
|
|
201
|
+
models: string[];
|
|
202
|
+
/** Compile constraints. */
|
|
203
|
+
constraints?: Constraints;
|
|
204
|
+
/**
|
|
205
|
+
* Cache marker placement policy for the messages array. Default = no
|
|
206
|
+
* history cache markers. See `HistoryCachePolicy` for semantics.
|
|
207
|
+
* alpha.5.
|
|
208
|
+
*/
|
|
209
|
+
historyCachePolicy?: HistoryCachePolicy;
|
|
210
|
+
}
|
|
211
|
+
type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
|
|
212
|
+
/**
|
|
213
|
+
* Mutation IDs that fired during compile. Empty in v1 (no mutation engine
|
|
214
|
+
* yet). Populated when the brain is online and pushing mutations.
|
|
215
|
+
*/
|
|
216
|
+
type MutationApplied = {
|
|
217
|
+
id: string;
|
|
218
|
+
source: string;
|
|
219
|
+
passName: string;
|
|
220
|
+
description: string;
|
|
221
|
+
};
|
|
222
|
+
/**
|
|
223
|
+
* Target-specific wire request. Shape varies by provider — caller passes the
|
|
224
|
+
* right field to the right SDK.
|
|
225
|
+
*/
|
|
226
|
+
type CompiledRequest = {
|
|
227
|
+
provider: 'anthropic';
|
|
228
|
+
model: string;
|
|
229
|
+
system: Array<{
|
|
230
|
+
type: 'text';
|
|
231
|
+
text: string;
|
|
232
|
+
cache_control?: {
|
|
233
|
+
type: 'ephemeral';
|
|
234
|
+
};
|
|
235
|
+
}>;
|
|
236
|
+
messages: Array<{
|
|
237
|
+
role: string;
|
|
238
|
+
content: unknown;
|
|
239
|
+
}>;
|
|
240
|
+
tools?: unknown[];
|
|
241
|
+
max_tokens?: number;
|
|
242
|
+
} | {
|
|
243
|
+
provider: 'google';
|
|
244
|
+
model: string;
|
|
245
|
+
systemInstruction?: {
|
|
246
|
+
role: 'system';
|
|
247
|
+
parts: Array<{
|
|
248
|
+
text: string;
|
|
249
|
+
}>;
|
|
250
|
+
};
|
|
251
|
+
contents: Array<{
|
|
252
|
+
role: string;
|
|
253
|
+
parts: unknown[];
|
|
254
|
+
}>;
|
|
255
|
+
tools?: unknown[];
|
|
256
|
+
generationConfig?: Record<string, unknown>;
|
|
257
|
+
cachedContent?: string;
|
|
258
|
+
} | {
|
|
259
|
+
provider: 'openai';
|
|
260
|
+
model: string;
|
|
261
|
+
messages: Array<{
|
|
262
|
+
role: string;
|
|
263
|
+
content: unknown;
|
|
264
|
+
}>;
|
|
265
|
+
tools?: unknown[];
|
|
266
|
+
response_format?: unknown;
|
|
267
|
+
reasoning_effort?: string;
|
|
268
|
+
} | {
|
|
269
|
+
provider: 'deepseek';
|
|
270
|
+
model: string;
|
|
271
|
+
messages: Array<{
|
|
272
|
+
role: string;
|
|
273
|
+
content: unknown;
|
|
274
|
+
}>;
|
|
275
|
+
tools?: unknown[];
|
|
276
|
+
};
|
|
277
|
+
/**
|
|
278
|
+
* Best-practice advisory emitted by the compiler at compile time. Non-fatal —
|
|
279
|
+
* consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
|
|
280
|
+
* or ignore. The advisor inspects the IR + selected profile + diagnostics
|
|
281
|
+
* and emits one entry per detected gap.
|
|
282
|
+
*
|
|
283
|
+
* Codes are stable across releases. `suggestion` and `docsUrl` are optional
|
|
284
|
+
* but encouraged: suggestion = the actionable diff; docsUrl = the
|
|
285
|
+
* interfaces/kgauto.md anchor for context.
|
|
286
|
+
*
|
|
287
|
+
* alpha.6 Phase 1 starter rules:
|
|
288
|
+
* - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
|
|
289
|
+
* - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
|
|
290
|
+
* - `tool-bloat` (warn) >10 tools on a short-output archetype
|
|
291
|
+
* - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
|
|
292
|
+
*
|
|
293
|
+
* Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
|
|
294
|
+
* telemetry on `advisories_fired`) are alpha.7+ territory.
|
|
295
|
+
*/
|
|
296
|
+
interface BestPracticeAdvisory {
|
|
297
|
+
/**
|
|
298
|
+
* Severity. `info` = informational; `warn` = behavioral pattern that's
|
|
299
|
+
* usually expensive or wrong; `critical` = likely bug or production-grade
|
|
300
|
+
* misuse. Phase 1 ships info + warn only.
|
|
301
|
+
*/
|
|
302
|
+
level: 'info' | 'warn' | 'critical';
|
|
303
|
+
/** Stable kebab-case code. Consumers filter / gate by this. */
|
|
304
|
+
code: string;
|
|
305
|
+
/** Human-readable explanation of what was detected. */
|
|
306
|
+
message: string;
|
|
307
|
+
/** Optional: how to fix — actionable diff or pattern. */
|
|
308
|
+
suggestion?: string;
|
|
309
|
+
/** Optional: link to docs anchor for more context. */
|
|
310
|
+
docsUrl?: string;
|
|
311
|
+
}
|
|
312
|
+
interface CompileResult {
|
|
313
|
+
/** Unique handle for this call — pass to record() to correlate the outcome. */
|
|
314
|
+
handle: string;
|
|
315
|
+
/** Selected target model id. */
|
|
316
|
+
target: string;
|
|
317
|
+
/** Selected provider. */
|
|
318
|
+
provider: Provider;
|
|
319
|
+
/** The wire request — pass the appropriate fields to your SDK. */
|
|
320
|
+
request: CompiledRequest;
|
|
321
|
+
/** Estimated tokens (input). */
|
|
322
|
+
tokensIn: number;
|
|
323
|
+
/** Estimated cost in USD (input portion). */
|
|
324
|
+
estimatedCostUsd: number;
|
|
325
|
+
/** Mutations that fired during compile (informational). */
|
|
326
|
+
mutationsApplied: MutationApplied[];
|
|
327
|
+
/** Fallback chain — try these in order if target fails. */
|
|
328
|
+
fallbackChain: string[];
|
|
329
|
+
/**
|
|
330
|
+
* Best-practice advisories emitted by the compiler. Non-fatal. Empty
|
|
331
|
+
* array when no rules fired. alpha.6 Phase 1.
|
|
332
|
+
*/
|
|
333
|
+
advisories: BestPracticeAdvisory[];
|
|
334
|
+
/** Diagnostics for caller-side logging. */
|
|
335
|
+
diagnostics: {
|
|
336
|
+
sectionsKept: number;
|
|
337
|
+
sectionsDropped: number;
|
|
338
|
+
toolsKept: number;
|
|
339
|
+
toolsDropped: number;
|
|
340
|
+
historyKept: number;
|
|
341
|
+
historyDropped: number;
|
|
342
|
+
cacheableTokens: number;
|
|
343
|
+
estimatedCacheSavingsUsd: number;
|
|
344
|
+
/**
|
|
345
|
+
* Tokens in `history` (and `currentTurn` when before the marker) that
|
|
346
|
+
* fall within the cacheable prefix per `historyCachePolicy`. Always
|
|
347
|
+
* computed; only Anthropic actually emits a wire-format marker. For
|
|
348
|
+
* Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
|
|
349
|
+
* prefix that implicit caching may pick up — useful telemetry for the
|
|
350
|
+
* brain to learn which (app, model, archetype) tuples benefit most
|
|
351
|
+
* from history caching. alpha.5.
|
|
352
|
+
*/
|
|
353
|
+
historyCacheableTokens: number;
|
|
354
|
+
/**
|
|
355
|
+
* Total tokens in input `history` (pre-compression). Computed regardless
|
|
356
|
+
* of whether `passCompressHistory` fired — surfaces how close a tuple is
|
|
357
|
+
* to its `compressHistoryAboveTokens` threshold so dashboards / cost-
|
|
358
|
+
* watchers can see the bloat axis the count-based threshold misses.
|
|
359
|
+
* 0 when history is empty. alpha.7.
|
|
360
|
+
*/
|
|
361
|
+
historyTokensTotal: number;
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Token usage normalized across providers. `cached` and `cacheCreated` are
|
|
366
|
+
* Anthropic prompt-cache reads/writes (Gemini implicit caching populates
|
|
367
|
+
* `cached` from `usageMetadata.cachedContentTokenCount`; OpenAI populates
|
|
368
|
+
* from `prompt_tokens_details.cached_tokens`).
|
|
369
|
+
*/
|
|
370
|
+
interface NormalizedTokens {
|
|
371
|
+
input: number;
|
|
372
|
+
output: number;
|
|
373
|
+
total: number;
|
|
374
|
+
cached?: number;
|
|
375
|
+
cacheCreated?: number;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Tool call in a provider-agnostic shape. Anthropic `tool_use` blocks,
|
|
379
|
+
* Google `functionCall` parts, and OpenAI/DeepSeek `tool_calls[]` all
|
|
380
|
+
* collapse to this.
|
|
381
|
+
*/
|
|
382
|
+
interface ToolCall {
|
|
383
|
+
id: string;
|
|
384
|
+
name: string;
|
|
385
|
+
args: Record<string, unknown>;
|
|
386
|
+
}
|
|
387
|
+
interface NormalizedResponse {
|
|
388
|
+
/** Main text body. Empty string if response had no text content. */
|
|
389
|
+
text: string;
|
|
390
|
+
/**
|
|
391
|
+
* Parsed structured output. Populated when ir.constraints.structuredOutput
|
|
392
|
+
* is true and JSON.parse(text) succeeds. Null otherwise.
|
|
393
|
+
*/
|
|
394
|
+
structuredOutput: unknown | null;
|
|
395
|
+
/** Tool calls in normalized shape. Empty array if none. */
|
|
396
|
+
toolCalls: ToolCall[];
|
|
397
|
+
tokens: NormalizedTokens;
|
|
398
|
+
/** Provider-specific finish reason, passed through unchanged. */
|
|
399
|
+
finishReason?: string;
|
|
400
|
+
/** Untouched provider response — escape hatch for consumers needing fields not yet normalized. */
|
|
401
|
+
raw: unknown;
|
|
402
|
+
/** Set when structuredOutput parsing was attempted and failed. */
|
|
403
|
+
parseError?: string;
|
|
404
|
+
}
|
|
405
|
+
interface ApiKeys {
|
|
406
|
+
anthropic?: string;
|
|
407
|
+
google?: string;
|
|
408
|
+
openai?: string;
|
|
409
|
+
deepseek?: string;
|
|
410
|
+
}
|
|
411
|
+
/**
|
|
412
|
+
* Per-provider override fields shallow-merged into the lowered request before
|
|
413
|
+
* execution. Lets consumers reach Gemini `safetySettings`, Anthropic
|
|
414
|
+
* `tool_choice`, OpenAI `seed` etc. without bypassing kgauto.
|
|
415
|
+
*/
|
|
416
|
+
interface ProviderOverrides {
|
|
417
|
+
anthropic?: Record<string, unknown>;
|
|
418
|
+
google?: Record<string, unknown>;
|
|
419
|
+
openai?: Record<string, unknown>;
|
|
420
|
+
deepseek?: Record<string, unknown>;
|
|
421
|
+
}
|
|
422
|
+
interface CallOptions {
|
|
423
|
+
/** Forwarded to compile(). */
|
|
424
|
+
policy?: CompilePolicy;
|
|
425
|
+
toolRelevanceThreshold?: number;
|
|
426
|
+
compressHistoryAfter?: number;
|
|
427
|
+
/** Override API keys (defaults: process.env). */
|
|
428
|
+
apiKeys?: ApiKeys;
|
|
429
|
+
/** Provider-specific request fields shallow-merged into the lowered request. */
|
|
430
|
+
providerOverrides?: ProviderOverrides;
|
|
431
|
+
/** Override fetch (for tests). */
|
|
432
|
+
fetchImpl?: typeof fetch;
|
|
433
|
+
/** Disable retry/fallback walk on retryable errors. Default: enabled. */
|
|
434
|
+
noFallback?: boolean;
|
|
435
|
+
/**
|
|
436
|
+
* alpha.10. Disable the silent auto-filter of unreachable models from the
|
|
437
|
+
* fallback walk. Default: false (filter ON). Opt-out exists for tests +
|
|
438
|
+
* the rare consumer that wants the legacy "fail at execute() with auth
|
|
439
|
+
* error" behavior. When ON (default), models whose provider has no
|
|
440
|
+
* resolvable API key are dropped from `targetsToTry` before the first
|
|
441
|
+
* network call; if the chain empties entirely, throws CallError with
|
|
442
|
+
* `lastErrorCode = 'no_reachable_models'`.
|
|
443
|
+
*
|
|
444
|
+
* Reachability source: `apiKeys` (this CallOptions) + `process.env` (via
|
|
445
|
+
* `PROVIDER_ENV_KEYS`). Override env via env.ts's `ReachabilityOpts.envSource`
|
|
446
|
+
* is not exposed here — `call()` always uses process.env. Use
|
|
447
|
+
* `getDefaultFallbackChain({ reachability: { envSource } })` upstream
|
|
448
|
+
* for hermetic test runs.
|
|
449
|
+
*/
|
|
450
|
+
noAutoFilter?: boolean;
|
|
451
|
+
}
|
|
452
|
+
interface CallAttempt {
|
|
453
|
+
model: string;
|
|
454
|
+
status: 'success' | 'retryable' | 'terminal';
|
|
455
|
+
errorCode?: string;
|
|
456
|
+
message?: string;
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Why fallback fired. Normalized for `CallResult.fallbackReason` (alpha.9).
|
|
460
|
+
*
|
|
461
|
+
* - `rate_limit` provider returned 429
|
|
462
|
+
* - `provider_error` 5xx, network, or other retryable upstream issue
|
|
463
|
+
* - `cost_cap` preflight policy.maxCostPerCallUsd rejected target
|
|
464
|
+
* - `cliff` alpha.8 contract violation (MAX_TOKENS on
|
|
465
|
+
* structured output, parse-failed JSON)
|
|
466
|
+
* - `contract_violation` other compile-time-contract failures (reserved
|
|
467
|
+
* for alpha.10+ — e.g. mid-stream policy rejects)
|
|
468
|
+
*/
|
|
469
|
+
type FallbackReason = 'rate_limit' | 'provider_error' | 'cost_cap' | 'cliff' | 'contract_violation';
|
|
470
|
+
interface CallResult {
|
|
471
|
+
/** Compile handle (still valid for record() if consumer wants to add oracle scores later). */
|
|
472
|
+
handle: string;
|
|
473
|
+
/** The model that ACTUALLY served the response (post-fallback). */
|
|
474
|
+
actualModel: string;
|
|
475
|
+
/** What compile() originally targeted. */
|
|
476
|
+
requestedModel: string;
|
|
477
|
+
provider: Provider;
|
|
478
|
+
response: NormalizedResponse;
|
|
479
|
+
latencyMs: number;
|
|
480
|
+
/** Mutations that fired during compile (informational, mirrors CompileResult.mutationsApplied). */
|
|
481
|
+
mutationsApplied: MutationApplied[];
|
|
482
|
+
/** One entry per provider attempt — observability for retry/fallback walks. */
|
|
483
|
+
attempts: CallAttempt[];
|
|
484
|
+
/**
|
|
485
|
+
* Alpha.9 normalization of fallback-walk telemetry. When the chain
|
|
486
|
+
* succeeded on the first attempt, these collapse to:
|
|
487
|
+
* - `servedBy === requestedModel`
|
|
488
|
+
* - `fellOverFrom` undefined
|
|
489
|
+
* - `fallbackReason` undefined
|
|
490
|
+
*
|
|
491
|
+
* When fallback fired:
|
|
492
|
+
* - `servedBy` = `actualModel` (the model that produced the response)
|
|
493
|
+
* - `fellOverFrom` = `requestedModel` (what the caller / compile() asked for)
|
|
494
|
+
* - `fallbackReason` = normalized cause derived from the first
|
|
495
|
+
* non-success attempt's `errorCode`
|
|
496
|
+
*
|
|
497
|
+
* Consumer UX use: show "Claude was busy; we used Pro for this answer"
|
|
498
|
+
* when `fellOverFrom` is set (master plan §3.6).
|
|
499
|
+
*/
|
|
500
|
+
/** Model that actually answered. Equal to `actualModel`; kept distinct for clarity. */
|
|
501
|
+
servedBy: string;
|
|
502
|
+
/** Set only when fallback fired. Equal to `requestedModel` in that case. */
|
|
503
|
+
fellOverFrom?: string;
|
|
504
|
+
/** Set only when fallback fired. Normalized cause. */
|
|
505
|
+
fallbackReason?: FallbackReason;
|
|
506
|
+
/**
|
|
507
|
+
* alpha.10. Models that auto-filter dropped from the fallback walk because
|
|
508
|
+
* their provider had no reachable API key. Empty when nothing was filtered
|
|
509
|
+
* (the common case once consumers have all the keys they need). Surfaces
|
|
510
|
+
* silent self-heal so consumers can log/audit what happened without
|
|
511
|
+
* defeating the "kgauto just gets" UX.
|
|
512
|
+
*
|
|
513
|
+
* Empty array (not undefined) when filter ran but dropped nothing —
|
|
514
|
+
* distinguishes "filter ran cleanly" from "filter was disabled" (`undefined`
|
|
515
|
+
* when `noAutoFilter: true`).
|
|
516
|
+
*/
|
|
517
|
+
unreachableFiltered?: string[];
|
|
518
|
+
}
|
|
519
|
+
/**
|
|
520
|
+
* Thrown when call() exhausts the fallback chain without success.
|
|
521
|
+
* `attempts` carries every model tried + classification.
|
|
522
|
+
*/
|
|
523
|
+
declare class CallError extends Error {
|
|
524
|
+
readonly attempts: CallAttempt[];
|
|
525
|
+
readonly lastErrorCode?: string;
|
|
526
|
+
readonly lastStatus?: number;
|
|
527
|
+
constructor(message: string, attempts: CallAttempt[], lastStatus?: number, lastErrorCode?: string);
|
|
528
|
+
}
|
|
529
|
+
interface OracleScore {
|
|
530
|
+
/** 0..1 overall quality. */
|
|
531
|
+
score: number;
|
|
532
|
+
/** Optional per-dimension breakdown. */
|
|
533
|
+
dimensions?: Record<string, number>;
|
|
534
|
+
/** Free-form explanation for debugging. */
|
|
535
|
+
rationale?: string;
|
|
536
|
+
}
|
|
537
|
+
interface RecordInput {
|
|
538
|
+
/** Handle from CompileResult. */
|
|
539
|
+
handle: string;
|
|
540
|
+
/** Actual tokens consumed (post-call). */
|
|
541
|
+
tokensIn: number;
|
|
542
|
+
tokensOut: number;
|
|
543
|
+
/** Wall-clock latency in ms. */
|
|
544
|
+
latencyMs: number;
|
|
545
|
+
/** True iff the call returned a usable response. */
|
|
546
|
+
success: boolean;
|
|
547
|
+
/** True iff the call returned 0 output tokens despite success. */
|
|
548
|
+
emptyResponse?: boolean;
|
|
549
|
+
/** Provider error code if any. */
|
|
550
|
+
errorType?: string;
|
|
551
|
+
/** Tools actually invoked by the model. */
|
|
552
|
+
toolsCalled?: string[];
|
|
553
|
+
/** Oracle quality score — required for learning to fire. */
|
|
554
|
+
oracleScore?: OracleScore;
|
|
555
|
+
/** Optional: scrubbed prompt/response previews for debugging. */
|
|
556
|
+
promptPreview?: string;
|
|
557
|
+
responsePreview?: string;
|
|
558
|
+
/**
|
|
559
|
+
* The model that ACTUALLY RAN. Set this when consumer-side fallback ran
|
|
560
|
+
* a different model than v2 compile() targeted. Brain stores this as
|
|
561
|
+
* `model` (the truth) and the original target as `requested_model`.
|
|
562
|
+
*
|
|
563
|
+
* Omit when no fallback occurred — brain stores compile target as `model`
|
|
564
|
+
* (still the truth in that case) and `requested_model` stays NULL.
|
|
565
|
+
*
|
|
566
|
+
* s11 fix: prevents the brain from misattributing fallback traffic to
|
|
567
|
+
* the originally-requested model.
|
|
568
|
+
*/
|
|
569
|
+
actualModel?: string;
|
|
570
|
+
/**
|
|
571
|
+
* Override `mutations_applied` for this outcome. Set by `call()` when
|
|
572
|
+
* fallback fires — the served compile's mutations (which actually shaped
|
|
573
|
+
* the request that went on the wire) replace the initial compile's
|
|
574
|
+
* mutations (registered against the handle). Without this override, fallback
|
|
575
|
+
* traffic is attributed to the initial compile's mutations and the brain's
|
|
576
|
+
* mutation effectiveness stats become misleading.
|
|
577
|
+
*
|
|
578
|
+
* alpha.4: extends s11 truth-in-logging to mutations.
|
|
579
|
+
*/
|
|
580
|
+
mutationsApplied?: string[];
|
|
581
|
+
/**
|
|
582
|
+
* Cache read input tokens, when supported by the provider.
|
|
583
|
+
* - Anthropic: `usage.cache_read_input_tokens`
|
|
584
|
+
* - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
|
|
585
|
+
* - OpenAI: `usage.prompt_tokens_details.cached_tokens`
|
|
586
|
+
*
|
|
587
|
+
* Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
|
|
588
|
+
* `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
|
|
589
|
+
*/
|
|
590
|
+
cacheReadInputTokens?: number;
|
|
591
|
+
/**
|
|
592
|
+
* Cache creation input tokens (Anthropic-specific).
|
|
593
|
+
* `usage.cache_creation_input_tokens`. The first call that pays the 25%
|
|
594
|
+
* upcharge to write a cache marker; subsequent calls hit `cacheRead`.
|
|
595
|
+
*/
|
|
596
|
+
cacheCreationInputTokens?: number;
|
|
597
|
+
/**
|
|
598
|
+
* Time to first token (ms). Optional; populated when the provider/SDK
|
|
599
|
+
* surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
|
|
600
|
+
*/
|
|
601
|
+
ttftMs?: number;
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
/**
|
|
605
|
+
* Model profiles — executable knowledge about each provider/model.
|
|
606
|
+
*
|
|
607
|
+
* Unlike v1 which carried `known_failures` as prose strings, v2 makes them
|
|
608
|
+
* executable: cliffs trigger guards, lowering describes the wire format,
|
|
609
|
+
* recovery handlers describe what to do after specific failures.
|
|
610
|
+
*
|
|
611
|
+
* Each profile is the answer to "if I want to call THIS model with THIS
|
|
612
|
+
* shape of work, what does it need from me, and what should I do when it
|
|
613
|
+
* fails?"
|
|
614
|
+
*/
|
|
615
|
+
|
|
616
|
+
type StructuredOutputCapability = 'native' | 'grammar' | 'none';
|
|
617
|
+
type SystemPromptMode = 'inline' | 'separate' | 'as_developer' | 'unsupported';
|
|
618
|
+
type CacheStrategy = 'cache_control' | 'cachedContent' | 'unsupported';
|
|
619
|
+
interface CliffRule {
|
|
620
|
+
/** What metric triggers this cliff. */
|
|
621
|
+
metric: 'input_tokens' | 'tool_count' | 'history_turns' | 'thinking_with_short_output';
|
|
622
|
+
/** Threshold — meaning depends on metric. */
|
|
623
|
+
threshold: number;
|
|
624
|
+
/** What action to take when triggered. */
|
|
625
|
+
action: 'downgrade_quality_warning' | 'drop_to_top_relevant' | 'force_thinking_budget_zero' | 'force_terse_output' | 'escalate_target' | 'strip_tools';
|
|
626
|
+
/**
|
|
627
|
+
* Optional: only fire this cliff when the IR's intent.archetype matches.
|
|
628
|
+
* Used for archetype-specific failure modes (e.g. Gemini Flash returns
|
|
629
|
+
* empty when summarize is offered tools).
|
|
630
|
+
*/
|
|
631
|
+
whenIntent?: IntentArchetypeName;
|
|
632
|
+
/** Human-readable reason for digest reporting. */
|
|
633
|
+
reason: string;
|
|
634
|
+
}
|
|
635
|
+
interface RecoveryRule {
|
|
636
|
+
/** What signal triggers recovery. */
|
|
637
|
+
signal: 'empty_response_after_tool' | 'empty_response' | 'malformed_function_call' | 'rate_limit' | 'model_not_found' | 'context_overflow';
|
|
638
|
+
/** Action: retry with adjusted params, or escalate to next fallback. */
|
|
639
|
+
action: 'retry_with_params' | 'escalate' | 'log_only';
|
|
640
|
+
/** When action=retry_with_params, the param adjustments to apply. */
|
|
641
|
+
retryParams?: Record<string, unknown>;
|
|
642
|
+
/** Max retries with this rule. */
|
|
643
|
+
maxRetries?: number;
|
|
644
|
+
/** Human-readable reason for digest reporting. */
|
|
645
|
+
reason: string;
|
|
646
|
+
}
|
|
647
|
+
interface LoweringSpec {
|
|
648
|
+
/** Where the system prompt goes. */
|
|
649
|
+
system: {
|
|
650
|
+
mode: SystemPromptMode;
|
|
651
|
+
field?: string;
|
|
652
|
+
};
|
|
653
|
+
/** Cache strategy + parameters. */
|
|
654
|
+
cache: {
|
|
655
|
+
strategy: CacheStrategy;
|
|
656
|
+
/** Min tokens before caching is worth it (provider rules). */
|
|
657
|
+
minTokens?: number;
|
|
658
|
+
/** Discount factor on cached input (0.1 = 10% of normal price). */
|
|
659
|
+
discount?: number;
|
|
660
|
+
/** TTL hint in seconds. */
|
|
661
|
+
ttlSeconds?: number;
|
|
662
|
+
};
|
|
663
|
+
/** Tool format identifier — see lower.ts for supported formats. */
|
|
664
|
+
tools?: {
|
|
665
|
+
format: 'anthropic' | 'google' | 'openai' | 'deepseek';
|
|
666
|
+
};
|
|
667
|
+
/** Thinking config — present iff this model has a thinking knob. */
|
|
668
|
+
thinking?: {
|
|
669
|
+
/** Field path on the request. */
|
|
670
|
+
field: string;
|
|
671
|
+
/** Default value when caller hasn't specified. */
|
|
672
|
+
default?: number | 'auto' | 'off';
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
interface ModelProfile {
|
|
676
|
+
id: string;
|
|
677
|
+
provider: Provider;
|
|
678
|
+
status: 'current' | 'preview' | 'legacy';
|
|
679
|
+
maxContextTokens: number;
|
|
680
|
+
maxOutputTokens: number;
|
|
681
|
+
maxTools: number;
|
|
682
|
+
parallelToolCalls: boolean;
|
|
683
|
+
structuredOutput: StructuredOutputCapability;
|
|
684
|
+
systemPromptMode: SystemPromptMode;
|
|
685
|
+
streaming: boolean;
|
|
686
|
+
cliffs: CliffRule[];
|
|
687
|
+
costInputPer1m: number;
|
|
688
|
+
costOutputPer1m: number;
|
|
689
|
+
lowering: LoweringSpec;
|
|
690
|
+
recovery: RecoveryRule[];
|
|
691
|
+
strengths: string[];
|
|
692
|
+
weaknesses: string[];
|
|
693
|
+
notes?: string;
|
|
694
|
+
verifiedAgainstDocs?: string;
|
|
695
|
+
/**
|
|
696
|
+
* Hand-curated per-archetype performance score on a 0-10 scale.
|
|
697
|
+
*
|
|
698
|
+
* 10 = frontier on this archetype (e.g. Opus 4.7 on critique)
|
|
699
|
+
* 8 = strong second tier (Sonnet on plan, Pro on extract)
|
|
700
|
+
* 7 = competent (Haiku on classify, Flash on hunt)
|
|
701
|
+
* 5 = acceptable for tolerant archetypes (Flash-Lite on classify)
|
|
702
|
+
* 3 = degraded (Flash on critique, DeepSeek on hunt)
|
|
703
|
+
*
|
|
704
|
+
* Missing archetypes default to `5` (no data, neutral). Each non-default
|
|
705
|
+
* value should carry a one-line rationale in the profile's note or inline
|
|
706
|
+
* comment citing brain evidence, family prior, or "starter hypothesis —
|
|
707
|
+
* verify with telemetry."
|
|
708
|
+
*
|
|
709
|
+
* Source today: hand-curated from master plan §3.3 + §6.2 starter tables.
|
|
710
|
+
* Source tomorrow (alpha.10+): brain `archetype_model_evidence` view.
|
|
711
|
+
*
|
|
712
|
+
* Anti-hallucination guardrail (master plan §2.5): when the watcher's
|
|
713
|
+
* `--audit-fields` flag flags a profile stale (>90 days since
|
|
714
|
+
* verifiedAgainstDocs), the archetypePerf values get re-audited
|
|
715
|
+
* alongside capability fields. AI-trained intuition is NOT a valid
|
|
716
|
+
* source — only docs or brain evidence.
|
|
717
|
+
*
|
|
718
|
+
* alpha.9.
|
|
719
|
+
*/
|
|
720
|
+
archetypePerf?: Partial<Record<IntentArchetypeName, number>>;
|
|
721
|
+
}
|
|
722
|
+
declare const ALIASES: Record<string, string>;
|
|
723
|
+
declare function getProfile(id: string): ModelProfile;
|
|
724
|
+
declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
725
|
+
declare function allProfiles(): readonly ModelProfile[];
|
|
726
|
+
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
727
|
+
|
|
728
|
+
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, type Provider as f, ALIASES as g, type CacheStrategy as h, type CallAttempt as i, CallError as j, type CliffRule as k, type Constraints as l, type Message as m, type MutationApplied as n, type NormalizedTokens as o, type PromptSection as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|