@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -3
- package/dist/chunk-JQGRWJZO.mjs +1216 -0
- package/dist/chunk-NBO4R5PC.mjs +313 -0
- package/dist/chunk-RO22VFIF.mjs +29 -0
- package/dist/chunk-WXCFWUCN.mjs +678 -0
- package/dist/glassbox/index.d.mts +59 -0
- package/dist/glassbox/index.d.ts +59 -0
- package/dist/glassbox/index.js +312 -0
- package/dist/glassbox/index.mjs +12 -0
- package/dist/glassbox-routes/index.d.mts +242 -0
- package/dist/glassbox-routes/index.d.ts +242 -0
- package/dist/glassbox-routes/index.js +2458 -0
- package/dist/glassbox-routes/index.mjs +658 -0
- package/dist/index.d.mts +1195 -11
- package/dist/index.d.ts +1195 -11
- package/dist/index.js +3503 -236
- package/dist/index.mjs +1588 -78
- package/dist/ir-BIAT9gJk.d.ts +1031 -0
- package/dist/ir-De2AQtlr.d.mts +1031 -0
- package/dist/profiles.d.mts +137 -2
- package/dist/profiles.d.ts +137 -2
- package/dist/profiles.js +820 -11
- package/dist/profiles.mjs +5 -1
- package/dist/types-BjrIFPGe.d.mts +131 -0
- package/dist/types-D_JAhCv4.d.ts +131 -0
- package/package.json +12 -2
- package/dist/chunk-MBEI5UOM.mjs +0 -409
- package/dist/profiles-BiyrF36f.d.mts +0 -489
- package/dist/profiles-C5lVqF8_.d.ts +0 -489
|
@@ -0,0 +1,1031 @@
|
|
|
1
|
+
import { IntentArchetypeName } from './dialect.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Intermediate Representation — the structured form of a prompt.
|
|
5
|
+
*
|
|
6
|
+
* Everything kgauto v2 does, it does on the IR. The IR is constructed by the
|
|
7
|
+
* caller (or by `parse()` from a string-style prompt for backwards-compat),
|
|
8
|
+
* transformed by the compiler passes, and lowered to a target-specific wire
|
|
9
|
+
* request only at the very end.
|
|
10
|
+
*
|
|
11
|
+
* The IR carries STRUCTURE, not just text. Sections are first-class. Tools
|
|
12
|
+
* carry per-intent relevance. History knows its turn-age. Constraints are
|
|
13
|
+
* explicit. This is what lets passes do real work.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* A semantically-named section of the system prompt. Sections enable
|
|
18
|
+
* intent-aware slicing (drop sections not tagged for this intent), dedupe
|
|
19
|
+
* (collapse identical sections across files), and cache marking (identify
|
|
20
|
+
* the stable prefix).
|
|
21
|
+
*/
|
|
22
|
+
interface PromptSection {
|
|
23
|
+
/** Stable identifier — used for slicing, dedupe, and cache markers. */
|
|
24
|
+
id: string;
|
|
25
|
+
/** Section text. */
|
|
26
|
+
text: string;
|
|
27
|
+
/**
|
|
28
|
+
* Which intents this section applies to. Empty = applies to all intents.
|
|
29
|
+
* Pass `compile()` will drop sections whose intents array doesn't include
|
|
30
|
+
* the current intent.
|
|
31
|
+
*/
|
|
32
|
+
intents?: IntentArchetypeName[];
|
|
33
|
+
/**
|
|
34
|
+
* If true, this section is part of the stable cacheable prefix. The lower
|
|
35
|
+
* pass uses this to place cache markers correctly per target.
|
|
36
|
+
*/
|
|
37
|
+
cacheable?: boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Section weight when ordering — lower = earlier in the assembled prompt.
|
|
40
|
+
* Defaults to insertion order.
|
|
41
|
+
*/
|
|
42
|
+
weight?: number;
|
|
43
|
+
/**
|
|
44
|
+
* alpha.29+ — declares the section's semantic kind so kgauto can apply
|
|
45
|
+
* model-aware rewrites at compile time. Default `'arbitrary'` (when
|
|
46
|
+
* unset) for full back-compat — pre-alpha.29 sections continue working
|
|
47
|
+
* unchanged.
|
|
48
|
+
*
|
|
49
|
+
* alpha.29 ships rewrites for `tool_call_contract` only. Other kinds are
|
|
50
|
+
* type-accepted but pass through. alpha.30+ will add rewrites for
|
|
51
|
+
* `narration_contract`, `role_intro`, etc.
|
|
52
|
+
*
|
|
53
|
+
* See `translator.ts` for the rewrite engine that consumes this field.
|
|
54
|
+
*/
|
|
55
|
+
kind?: SectionKind;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* alpha.29+ — semantic kind tag for a `PromptSection`. The translator
|
|
59
|
+
* (`v2/src/translator.ts`) consumes this to apply model-aware rewrites at
|
|
60
|
+
* compile time. CLOSED union; future kinds extend it explicitly in named
|
|
61
|
+
* alpha releases.
|
|
62
|
+
*
|
|
63
|
+
* alpha.29 ships rewrites for `tool_call_contract` only. Other kinds are
|
|
64
|
+
* type-accepted but pass through.
|
|
65
|
+
*
|
|
66
|
+
* - `role_intro` — "You are a helpful assistant", persona blocks
|
|
67
|
+
* - `tool_call_contract` — tool-use rules ("call X then Y"); the alpha.29
|
|
68
|
+
* translator rewrites this for models with a
|
|
69
|
+
* sequential-tool cliff on the active archetype
|
|
70
|
+
* - `narration_contract` — output-format rules ("don't narrate your steps");
|
|
71
|
+
* alpha.30+ candidate
|
|
72
|
+
* - `user_turn` — when sections carry user content rather than
|
|
73
|
+
* system context (rare)
|
|
74
|
+
* - `reference` — supporting reference data the model may consult
|
|
75
|
+
* - `arbitrary` — explicit pass-through (default when unset)
|
|
76
|
+
*/
|
|
77
|
+
type SectionKind = 'role_intro' | 'tool_call_contract' | 'narration_contract' | 'user_turn' | 'reference' | 'arbitrary';
|
|
78
|
+
interface ToolDefinition {
|
|
79
|
+
name: string;
|
|
80
|
+
description?: string;
|
|
81
|
+
parameters?: Record<string, unknown>;
|
|
82
|
+
/**
|
|
83
|
+
* Per-intent relevance scores. Compile uses these to drop irrelevant tools.
|
|
84
|
+
* Missing intents default to 0.5 (neutral).
|
|
85
|
+
*/
|
|
86
|
+
relevanceByIntent?: Partial<Record<IntentArchetypeName, number>>;
|
|
87
|
+
/** Pass-through for provider-specific fields (Anthropic input_schema, etc.). */
|
|
88
|
+
[key: string]: unknown;
|
|
89
|
+
}
|
|
90
|
+
interface Message {
|
|
91
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
92
|
+
content: string;
|
|
93
|
+
/** Optional structured parts (tool calls, results) — passed through to lowering. */
|
|
94
|
+
parts?: unknown[];
|
|
95
|
+
/** For tool messages — which tool this corresponds to. */
|
|
96
|
+
toolName?: string;
|
|
97
|
+
/** For tool messages — the call id. */
|
|
98
|
+
toolCallId?: string;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* The compile-time intent declaration. `name` is the app's local label;
|
|
102
|
+
* `archetype` is the canonical dialect-v1 archetype the app maps it to.
|
|
103
|
+
*
|
|
104
|
+
* Apps with their own intent vocabulary (tt-intelligence's "ask"/"hunt"/
|
|
105
|
+
* "dashboard") declare the mapping here. The brain learns by archetype, not
|
|
106
|
+
* by app-local name.
|
|
107
|
+
*/
|
|
108
|
+
interface IntentDeclaration {
|
|
109
|
+
/** App-local intent name (free-form, for app's own debugging). */
|
|
110
|
+
name: string;
|
|
111
|
+
/** Canonical dialect-v1 archetype. Required for cross-app learning. */
|
|
112
|
+
archetype: IntentArchetypeName;
|
|
113
|
+
}
|
|
114
|
+
interface Constraints {
|
|
115
|
+
/** Hard latency ceiling — compiler will down-rank slow models. Advisory. */
|
|
116
|
+
maxLatencyMs?: number;
|
|
117
|
+
/** Hard cost ceiling per call (USD). Advisory. */
|
|
118
|
+
maxCostUsd?: number;
|
|
119
|
+
/** Caller wants structured (JSON) output. */
|
|
120
|
+
structuredOutput?: boolean;
|
|
121
|
+
/** Hint: caller expects a short response (used to disable thinking on Gemini). */
|
|
122
|
+
expectedShortOutput?: boolean;
|
|
123
|
+
/** Hint: max response words. */
|
|
124
|
+
maxResponseWords?: number;
|
|
125
|
+
/** Override target model selection — if set, compiler uses this instead of routing. */
|
|
126
|
+
forceModel?: string;
|
|
127
|
+
/**
|
|
128
|
+
* alpha.20: consumer-declared tool-orchestration shape for this call.
|
|
129
|
+
* - 'parallel': model may fire multiple tool calls per step (current
|
|
130
|
+
* default behavior; the L-040 cliff applies — DeepSeek's
|
|
131
|
+
* `tool_count >= 1` cliff trims tools because parallel-tool throughput
|
|
132
|
+
* collapses to sequential semantics).
|
|
133
|
+
* - 'sequential': consumer commits to one tool call per step (the agentic
|
|
134
|
+
* loop pattern). DeepSeek V4-Flash + V4-Pro can compete cleanly in
|
|
135
|
+
* this mode — the L-040 cliff is silenced and the hunt chain shifts
|
|
136
|
+
* to a DeepSeek-tier-1 ordering.
|
|
137
|
+
* - 'either': consumer doesn't care; library picks the parallel chain
|
|
138
|
+
* (status-quo default) and may upgrade to brain-driven per-mode perf
|
|
139
|
+
* selection in a future release.
|
|
140
|
+
*
|
|
141
|
+
* Affects:
|
|
142
|
+
* - Chain composition for `archetype: 'hunt'` (see
|
|
143
|
+
* `getDefaultFallbackChain` and `STARTER_CHAINS_BY_MODE`).
|
|
144
|
+
* - L-040 cliff in `passApplyCliffs` (silent when 'sequential').
|
|
145
|
+
*
|
|
146
|
+
* Default (when undefined): equivalent to 'parallel' for back-compat
|
|
147
|
+
* with every pre-alpha.20 caller.
|
|
148
|
+
*/
|
|
149
|
+
toolOrchestration?: 'parallel' | 'sequential' | 'either';
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Cache marker policy for the messages array (history + currentTurn).
|
|
153
|
+
*
|
|
154
|
+
* Anthropic positional caching: a `cache_control` marker on a content block
|
|
155
|
+
* tells the API "remember the prefix up through this block." On a subsequent
|
|
156
|
+
* request whose first N tokens match, those N billed at the cached rate
|
|
157
|
+
* (10% of the input price). Without a marker, every call re-pays for the
|
|
158
|
+
* entire history.
|
|
159
|
+
*
|
|
160
|
+
* - `'none'` (default when omitted): no history cache marker. System-level
|
|
161
|
+
* cache markers from `PromptSection.cacheable=true` still apply.
|
|
162
|
+
* - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
|
|
163
|
+
* (the last history entry). On the next call, that entire history prefix
|
|
164
|
+
* is cacheable. Good fit for chat/agent loops where every prior turn is
|
|
165
|
+
* stable.
|
|
166
|
+
* - `'fixed-suffix'`: marks the message `suffix` positions from the end of
|
|
167
|
+
* `history`. Use when the last few turns are volatile (e.g., scratchpad,
|
|
168
|
+
* draft revisions) but the earlier prefix is stable.
|
|
169
|
+
*
|
|
170
|
+
* For non-Anthropic providers, no wire-format marker is emitted (Gemini /
|
|
171
|
+
* OpenAI / DeepSeek implicit caching takes effect automatically when a
|
|
172
|
+
* stable prefix is reused). The compiler still computes
|
|
173
|
+
* `diagnostics.historyCacheableTokens` for telemetry on every provider.
|
|
174
|
+
*
|
|
175
|
+
* alpha.5.
|
|
176
|
+
*/
|
|
177
|
+
type HistoryCachePolicy = {
|
|
178
|
+
strategy: 'none';
|
|
179
|
+
} | {
|
|
180
|
+
strategy: 'all-but-latest';
|
|
181
|
+
} | {
|
|
182
|
+
strategy: 'fixed-suffix';
|
|
183
|
+
suffix: number;
|
|
184
|
+
};
|
|
185
|
+
/**
|
|
186
|
+
* Consumer-declared policy for model selection. Lives outside the IR
|
|
187
|
+
* (passed via CompileOptions) because it's a SESSION/APP-level constraint,
|
|
188
|
+
* not a per-call shape.
|
|
189
|
+
*
|
|
190
|
+
* The original tt-intelligence scenario (s11): user capped Anthropic
|
|
191
|
+
* spending on Sonnet for cost reasons. v2 compile() kept picking Sonnet
|
|
192
|
+
* as the best target, Hunter's preflight hit the cap and fell back to
|
|
193
|
+
* Flash — every single call. CompilePolicy.blockedModels lets the
|
|
194
|
+
* consumer tell kgauto "don't pick Sonnet right now" and the compiler
|
|
195
|
+
* routes to the next-best option directly. No wasted preflight tax.
|
|
196
|
+
*
|
|
197
|
+
* This is the "coach knows the constraints" feature — kgauto stops
|
|
198
|
+
* recommending things the consumer has already ruled out.
|
|
199
|
+
*/
|
|
200
|
+
interface CompilePolicy {
|
|
201
|
+
/**
|
|
202
|
+
* Model IDs the consumer has gated. Compile() will never select these.
|
|
203
|
+
* Use for: cost caps, account-level rate limits, "this model is broken
|
|
204
|
+
* for our workload" decisions.
|
|
205
|
+
*/
|
|
206
|
+
blockedModels?: string[];
|
|
207
|
+
/**
|
|
208
|
+
* Hard ceiling on estimated input cost per call (USD). Models whose
|
|
209
|
+
* estimated cost exceeds this are rejected. Use for: budget enforcement
|
|
210
|
+
* on high-volume routes.
|
|
211
|
+
*/
|
|
212
|
+
maxCostPerCallUsd?: number;
|
|
213
|
+
/**
|
|
214
|
+
* Model IDs the consumer prefers. When multiple models fit, preferred
|
|
215
|
+
* models get a rank boost (large enough to overcome small quality
|
|
216
|
+
* differences but not large enough to override hard rejects).
|
|
217
|
+
*/
|
|
218
|
+
preferredModels?: string[];
|
|
219
|
+
/**
|
|
220
|
+
* Customer-posture tag (master plan §1.2, alpha.9).
|
|
221
|
+
*
|
|
222
|
+
* - `'locked'` — compliance/contract/brand-promise. Caller passes
|
|
223
|
+
* exactly one model; no fallback is desired. kgauto
|
|
224
|
+
* never walks the chain.
|
|
225
|
+
* - `'preferred'` — user-selected primary, fallback chain as safety
|
|
226
|
+
* net. On 429/5xx, walk the chain and surface
|
|
227
|
+
* `fellOverFrom` so the consumer can show "Claude
|
|
228
|
+
* was busy; we used Pro for this answer."
|
|
229
|
+
* - `'open'` — library picks the chain. Model identity is
|
|
230
|
+
* irrelevant; output is the contract.
|
|
231
|
+
*
|
|
232
|
+
* The field is **informational** — kgauto's execution path is already
|
|
233
|
+
* determined by the shape of `ir.models`. Posture surfaces in
|
|
234
|
+
* telemetry so the cost-watcher can distinguish "locked failed, no
|
|
235
|
+
* fallback was tried" from "open chain exhausted." Default: when
|
|
236
|
+
* `ir.models.length === 1` posture is treated as `'locked'` by the
|
|
237
|
+
* advisor; otherwise unspecified.
|
|
238
|
+
*/
|
|
239
|
+
posture?: 'locked' | 'preferred' | 'open';
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* The IR — the input to compile().
|
|
243
|
+
*/
|
|
244
|
+
interface PromptIR {
|
|
245
|
+
/** App identifier — required for multi-tenant brain. */
|
|
246
|
+
appId: string;
|
|
247
|
+
/** Intent declaration — what is this call doing? */
|
|
248
|
+
intent: IntentDeclaration;
|
|
249
|
+
/** Structured system prompt sections. */
|
|
250
|
+
sections: PromptSection[];
|
|
251
|
+
/** Available tools (compiler may drop based on intent relevance + budget). */
|
|
252
|
+
tools?: ToolDefinition[];
|
|
253
|
+
/** Conversation history (compiler may compress old turns). */
|
|
254
|
+
history?: Message[];
|
|
255
|
+
/** The user's current turn — never dropped. */
|
|
256
|
+
currentTurn?: Message;
|
|
257
|
+
/** Allowed model IDs, in caller-preference order. Compiler picks among these. */
|
|
258
|
+
models: string[];
|
|
259
|
+
/** Compile constraints. */
|
|
260
|
+
constraints?: Constraints;
|
|
261
|
+
/**
|
|
262
|
+
* Cache marker placement policy for the messages array. Default = no
|
|
263
|
+
* history cache markers. See `HistoryCachePolicy` for semantics.
|
|
264
|
+
* alpha.5.
|
|
265
|
+
*/
|
|
266
|
+
historyCachePolicy?: HistoryCachePolicy;
|
|
267
|
+
}
|
|
268
|
+
type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
|
|
269
|
+
/**
|
|
270
|
+
* Mutation IDs that fired during compile. Empty in v1 (no mutation engine
|
|
271
|
+
* yet). Populated when the brain is online and pushing mutations.
|
|
272
|
+
*/
|
|
273
|
+
type MutationApplied = {
|
|
274
|
+
id: string;
|
|
275
|
+
source: string;
|
|
276
|
+
passName: string;
|
|
277
|
+
description: string;
|
|
278
|
+
};
|
|
279
|
+
/**
|
|
280
|
+
* Target-specific wire request. Shape varies by provider — caller passes the
|
|
281
|
+
* right field to the right SDK.
|
|
282
|
+
*/
|
|
283
|
+
type CompiledRequest = {
|
|
284
|
+
provider: 'anthropic';
|
|
285
|
+
model: string;
|
|
286
|
+
system: Array<{
|
|
287
|
+
type: 'text';
|
|
288
|
+
text: string;
|
|
289
|
+
cache_control?: {
|
|
290
|
+
type: 'ephemeral';
|
|
291
|
+
};
|
|
292
|
+
}>;
|
|
293
|
+
messages: Array<{
|
|
294
|
+
role: string;
|
|
295
|
+
content: unknown;
|
|
296
|
+
}>;
|
|
297
|
+
tools?: unknown[];
|
|
298
|
+
max_tokens?: number;
|
|
299
|
+
/**
|
|
300
|
+
* alpha.29 — emitted only when the translator's wire-overrides set
|
|
301
|
+
* `parallelToolCalls = false`. Shape per Anthropic Messages API docs:
|
|
302
|
+
* `{ type: 'auto', disable_parallel_tool_use: true }`. kgauto defaults
|
|
303
|
+
* to omitting `tool_choice` entirely (Anthropic defaults to auto + parallel),
|
|
304
|
+
* so this field's presence signals an explicit override.
|
|
305
|
+
*/
|
|
306
|
+
tool_choice?: {
|
|
307
|
+
type: 'auto' | 'any' | 'tool' | 'none';
|
|
308
|
+
disable_parallel_tool_use?: boolean;
|
|
309
|
+
name?: string;
|
|
310
|
+
};
|
|
311
|
+
} | {
|
|
312
|
+
provider: 'google';
|
|
313
|
+
model: string;
|
|
314
|
+
systemInstruction?: {
|
|
315
|
+
role: 'system';
|
|
316
|
+
parts: Array<{
|
|
317
|
+
text: string;
|
|
318
|
+
}>;
|
|
319
|
+
};
|
|
320
|
+
contents: Array<{
|
|
321
|
+
role: string;
|
|
322
|
+
parts: unknown[];
|
|
323
|
+
}>;
|
|
324
|
+
tools?: unknown[];
|
|
325
|
+
generationConfig?: Record<string, unknown>;
|
|
326
|
+
cachedContent?: string;
|
|
327
|
+
} | {
|
|
328
|
+
provider: 'openai';
|
|
329
|
+
model: string;
|
|
330
|
+
messages: Array<{
|
|
331
|
+
role: string;
|
|
332
|
+
content: unknown;
|
|
333
|
+
}>;
|
|
334
|
+
tools?: unknown[];
|
|
335
|
+
response_format?: unknown;
|
|
336
|
+
reasoning_effort?: string;
|
|
337
|
+
/**
|
|
338
|
+
* alpha.29 — emitted only when the translator's wire-overrides set
|
|
339
|
+
* `parallelToolCalls = false`. OpenAI defaults parallel_tool_calls=true
|
|
340
|
+
* server-side; we explicit-set to false only when overriding.
|
|
341
|
+
*/
|
|
342
|
+
parallel_tool_calls?: boolean;
|
|
343
|
+
} | {
|
|
344
|
+
provider: 'deepseek';
|
|
345
|
+
model: string;
|
|
346
|
+
messages: Array<{
|
|
347
|
+
role: string;
|
|
348
|
+
content: unknown;
|
|
349
|
+
}>;
|
|
350
|
+
tools?: unknown[];
|
|
351
|
+
};
|
|
352
|
+
/**
|
|
353
|
+
* Best-practice advisory emitted by the compiler at compile time. Non-fatal —
|
|
354
|
+
* consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
|
|
355
|
+
* or ignore. The advisor inspects the IR + selected profile + diagnostics
|
|
356
|
+
* and emits one entry per detected gap.
|
|
357
|
+
*
|
|
358
|
+
* Codes are stable across releases. `suggestion` and `docsUrl` are optional
|
|
359
|
+
* but encouraged: suggestion = the actionable diff; docsUrl = the
|
|
360
|
+
* interfaces/kgauto.md anchor for context.
|
|
361
|
+
*
|
|
362
|
+
* alpha.6 Phase 1 starter rules:
|
|
363
|
+
* - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
|
|
364
|
+
* - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
|
|
365
|
+
* - `tool-bloat` (warn) >10 tools on a short-output archetype
|
|
366
|
+
* - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
|
|
367
|
+
*
|
|
368
|
+
* Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
|
|
369
|
+
* telemetry on `advisories_fired`) are alpha.7+ territory.
|
|
370
|
+
*/
|
|
371
|
+
interface BestPracticeAdvisory {
|
|
372
|
+
/**
|
|
373
|
+
* Severity. `info` = informational; `warn` = behavioral pattern that's
|
|
374
|
+
* usually expensive or wrong; `critical` = likely bug or production-grade
|
|
375
|
+
* misuse. Phase 1 ships info + warn only.
|
|
376
|
+
*/
|
|
377
|
+
level: 'info' | 'warn' | 'critical';
|
|
378
|
+
/** Stable kebab-case code. Consumers filter / gate by this. */
|
|
379
|
+
code: string;
|
|
380
|
+
/** Human-readable explanation of what was detected. */
|
|
381
|
+
message: string;
|
|
382
|
+
/** Optional: how to fix — actionable diff or pattern. */
|
|
383
|
+
suggestion?: string;
|
|
384
|
+
/** Optional: link to docs anchor for more context. */
|
|
385
|
+
docsUrl?: string;
|
|
386
|
+
/**
|
|
387
|
+
* alpha.20 — actionable category for routing/dashboard surfacing. When set,
|
|
388
|
+
* the brain persists this as `recommendation_type` on
|
|
389
|
+
* `compile_outcome_advisories` so consumers can filter "show me all
|
|
390
|
+
* client-side issues that are caching-fix recommendations." Optional;
|
|
391
|
+
* absent on legacy or uncategorized rules.
|
|
392
|
+
*
|
|
393
|
+
* - `'model-swap'` — swap to a different model fixes this
|
|
394
|
+
* - `'prompt-fix'` — restructure prompt (sections, tools, format)
|
|
395
|
+
* - `'caching-fix'` — add cache markers (system or history)
|
|
396
|
+
* - `'no-ai-needed'` — the call shouldn't be using an AI model
|
|
397
|
+
* - `'tier-down'` — current model is overkill for this archetype
|
|
398
|
+
* - `'architecture-change'` — the issue isn't fixable at the kgauto layer
|
|
399
|
+
*/
|
|
400
|
+
recommendationType?: 'model-swap' | 'prompt-fix' | 'caching-fix' | 'no-ai-needed' | 'tier-down' | 'architecture-change';
|
|
401
|
+
/**
|
|
402
|
+
* alpha.28 — when a rule wants to surface a specific structural adaptation
|
|
403
|
+
* (not just a swap or a prompt fix), it attaches the adapter shape here.
|
|
404
|
+
* Shape is the canonical {@link Adapter} discriminated union defined in
|
|
405
|
+
* this module; `compatibility.ts` re-exports it so
|
|
406
|
+
* `getModelCompatibility()` and `BestPracticeAdvisory.suggestedAdaptation`
|
|
407
|
+
* share one source of truth.
|
|
408
|
+
*
|
|
409
|
+
* Today fired by `archetype-perf-floor-breach` (alpha.28) when a
|
|
410
|
+
* documented adapter exists for the chosen model's archetype cliff.
|
|
411
|
+
* Absent on rules without a structural adapter (caching-off-on-claude,
|
|
412
|
+
* tool-bloat, etc.) and on the `reject` branch of
|
|
413
|
+
* `archetype-perf-floor-breach` where no adapter would help.
|
|
414
|
+
*
|
|
415
|
+
* CLOSED discriminated union (R3 from consultation doc) — future adapter
|
|
416
|
+
* parameters extend the union in `compatibility.ts` in named alpha
|
|
417
|
+
* releases. No `| string` escape hatch; consumer code can write
|
|
418
|
+
* exhaustive `switch (suggestedAdaptation.parameter)`.
|
|
419
|
+
*
|
|
420
|
+
* Phase 2 cross-builder coherence: Builder A's
|
|
421
|
+
* `AdvisoryRecord.suggestedAdaptation` (in `glassbox-routes/types.ts`)
|
|
422
|
+
* MUST type to the same union. Phase 2 integration verifies.
|
|
423
|
+
*/
|
|
424
|
+
suggestedAdaptation?: Adapter;
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* alpha.28 — adapter shape attached to advisories and returned by
|
|
428
|
+
* `getModelCompatibility()`. A CLOSED discriminated union: future adapter
|
|
429
|
+
* parameters extend it explicitly in named alpha releases. NO `| string`
|
|
430
|
+
* escape hatch — consumer policy code SHOULD write exhaustive
|
|
431
|
+
* `switch (adapter.parameter)` and rely on the compiler to flag
|
|
432
|
+
* "I added a new adapter parameter and forgot to update consumer policy."
|
|
433
|
+
*
|
|
434
|
+
* Defined here (in `ir.ts`, the foundational types module) and re-exported
|
|
435
|
+
* from `compatibility.ts` for ergonomic consumer imports. Anchoring it
|
|
436
|
+
* here avoids the import cycle that would form if both files tried to be
|
|
437
|
+
* the source of truth (ir.ts → compatibility.ts → profiles.ts → ir.ts).
|
|
438
|
+
*
|
|
439
|
+
* alpha.28 variants:
|
|
440
|
+
* - `{ parameter: 'toolOrchestration'; value: 'sequential'; consequence }`
|
|
441
|
+
* Lifts DeepSeek V4-family on `hunt` from the sequential-tool cliff
|
|
442
|
+
* (L-040). `consequence` is consumer-renderable plain English.
|
|
443
|
+
*
|
|
444
|
+
* Future alpha releases will add e.g. `parallelToolCalls`, `maxTools`,
|
|
445
|
+
* `thinkingBudget` (per tt-intel-Cairn priority list).
|
|
446
|
+
*/
|
|
447
|
+
type Adapter = {
|
|
448
|
+
parameter: 'toolOrchestration';
|
|
449
|
+
value: 'sequential';
|
|
450
|
+
consequence: string;
|
|
451
|
+
};
|
|
452
|
+
/**
|
|
453
|
+
* alpha.29+ — record of a single section rewrite fired by the translator at
|
|
454
|
+
* compile time. Surfaces on `CompileResult.sectionRewritesApplied` and (in
|
|
455
|
+
* scrubbed wire form, without original/transformed text) on
|
|
456
|
+
* `TraceDetail.sectionRewritesApplied` for Glass-Box Coaching-card rendering.
|
|
457
|
+
*
|
|
458
|
+
* `originalText` / `transformedText` stay package-internal — they may carry
|
|
459
|
+
* consumer PII. The wire-shape variant (`TraceSectionRewrite` in
|
|
460
|
+
* `glassbox-routes/types.ts`) carries only `summary` for renderer use.
|
|
461
|
+
*/
|
|
462
|
+
interface SectionRewrite {
|
|
463
|
+
/** Stable id of the `PromptSection` that was rewritten. */
|
|
464
|
+
sectionId: string;
|
|
465
|
+
/** The `kind` discriminator that matched the rewrite rule. */
|
|
466
|
+
kind: SectionKind;
|
|
467
|
+
/**
|
|
468
|
+
* Stable identifier of the rule that fired (e.g.
|
|
469
|
+
* `'sequential-tool-cliff-below-floor'`). Future rules add named ids; the
|
|
470
|
+
* brain aggregates by this value for cross-app learning.
|
|
471
|
+
*/
|
|
472
|
+
rule: string;
|
|
473
|
+
/** The section's text BEFORE the rewrite fired. */
|
|
474
|
+
originalText: string;
|
|
475
|
+
/** The text the translator emitted into the IR for this section. */
|
|
476
|
+
transformedText: string;
|
|
477
|
+
/**
|
|
478
|
+
* Wire-level overrides emitted alongside the text rewrite. Merged into
|
|
479
|
+
* `CompileResult.wireOverrides` by `applySectionRewrites`. alpha.29 ships
|
|
480
|
+
* `parallelToolCalls`; the union extends as more wire-overrides surface.
|
|
481
|
+
*/
|
|
482
|
+
wireOverrides?: {
|
|
483
|
+
parallelToolCalls?: boolean;
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
interface CompileResult {
|
|
487
|
+
/** Unique handle for this call — pass to record() to correlate the outcome. */
|
|
488
|
+
handle: string;
|
|
489
|
+
/** Selected target model id. */
|
|
490
|
+
target: string;
|
|
491
|
+
/** Selected provider. */
|
|
492
|
+
provider: Provider;
|
|
493
|
+
/** The wire request — pass the appropriate fields to your SDK. */
|
|
494
|
+
request: CompiledRequest;
|
|
495
|
+
/** Estimated tokens (input). */
|
|
496
|
+
tokensIn: number;
|
|
497
|
+
/** Estimated cost in USD (input portion). */
|
|
498
|
+
estimatedCostUsd: number;
|
|
499
|
+
/** Mutations that fired during compile (informational). */
|
|
500
|
+
mutationsApplied: MutationApplied[];
|
|
501
|
+
/** Fallback chain — try these in order if target fails. */
|
|
502
|
+
fallbackChain: string[];
|
|
503
|
+
/**
|
|
504
|
+
* Best-practice advisories emitted by the compiler. Non-fatal. Empty
|
|
505
|
+
* array when no rules fired. alpha.6 Phase 1.
|
|
506
|
+
*/
|
|
507
|
+
advisories: BestPracticeAdvisory[];
|
|
508
|
+
/**
|
|
509
|
+
* alpha.29+ — per-section rewrites applied by the translator at compile
|
|
510
|
+
* time. Empty array means no rewrites fired (or pre-alpha.29 behavior —
|
|
511
|
+
* all sections default `kind: 'arbitrary'`, which is pass-through).
|
|
512
|
+
*
|
|
513
|
+
* Surfaces to:
|
|
514
|
+
* - Glass-Box Coaching card (via `TraceDetail.sectionRewritesApplied`,
|
|
515
|
+
* scrubbed of original/transformed text)
|
|
516
|
+
* - brain `compile_outcomes.section_rewrites_applied` (migration 019)
|
|
517
|
+
* for cross-app learning
|
|
518
|
+
*/
|
|
519
|
+
sectionRewritesApplied: SectionRewrite[];
|
|
520
|
+
/**
|
|
521
|
+
* alpha.29+ — wire-level overrides emitted by translator rewrites. The
|
|
522
|
+
* provider lowering pass threads these through to the wire request before
|
|
523
|
+
* emit. Today only `parallelToolCalls: boolean`; the type extends as more
|
|
524
|
+
* wire-overrides surface.
|
|
525
|
+
*
|
|
526
|
+
* Undefined when no rewrite emitted overrides — the common case.
|
|
527
|
+
*/
|
|
528
|
+
wireOverrides?: {
|
|
529
|
+
parallelToolCalls?: boolean;
|
|
530
|
+
};
|
|
531
|
+
/** Diagnostics for caller-side logging. */
|
|
532
|
+
diagnostics: {
|
|
533
|
+
sectionsKept: number;
|
|
534
|
+
sectionsDropped: number;
|
|
535
|
+
toolsKept: number;
|
|
536
|
+
toolsDropped: number;
|
|
537
|
+
historyKept: number;
|
|
538
|
+
historyDropped: number;
|
|
539
|
+
cacheableTokens: number;
|
|
540
|
+
estimatedCacheSavingsUsd: number;
|
|
541
|
+
/**
|
|
542
|
+
* Tokens in `history` (and `currentTurn` when before the marker) that
|
|
543
|
+
* fall within the cacheable prefix per `historyCachePolicy`. Always
|
|
544
|
+
* computed; only Anthropic actually emits a wire-format marker. For
|
|
545
|
+
* Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
|
|
546
|
+
* prefix that implicit caching may pick up — useful telemetry for the
|
|
547
|
+
* brain to learn which (app, model, archetype) tuples benefit most
|
|
548
|
+
* from history caching. alpha.5.
|
|
549
|
+
*/
|
|
550
|
+
historyCacheableTokens: number;
|
|
551
|
+
/**
|
|
552
|
+
* Total tokens in input `history` (pre-compression). Computed regardless
|
|
553
|
+
* of whether `passCompressHistory` fired — surfaces how close a tuple is
|
|
554
|
+
* to its `compressHistoryAboveTokens` threshold so dashboards / cost-
|
|
555
|
+
* watchers can see the bloat axis the count-based threshold misses.
|
|
556
|
+
* 0 when history is empty. alpha.7.
|
|
557
|
+
*/
|
|
558
|
+
historyTokensTotal: number;
|
|
559
|
+
/**
|
|
560
|
+
* alpha.20 E3. Consumer-declared tool-orchestration mode for this call,
|
|
561
|
+
* mirrored from `ir.constraints.toolOrchestration` for downstream
|
|
562
|
+
* observability (Glass-Box panel, brain telemetry, advisor logs).
|
|
563
|
+
* Undefined when the consumer hadn't adopted the constraint yet —
|
|
564
|
+
* treat as 'parallel' equivalent for back-compat.
|
|
565
|
+
*/
|
|
566
|
+
toolOrchestration?: 'parallel' | 'sequential' | 'either';
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
/**
|
|
570
|
+
* Token usage normalized across providers. `cached` and `cacheCreated` are
|
|
571
|
+
* Anthropic prompt-cache reads/writes (Gemini implicit caching populates
|
|
572
|
+
* `cached` from `usageMetadata.cachedContentTokenCount`; OpenAI populates
|
|
573
|
+
* from `prompt_tokens_details.cached_tokens`).
|
|
574
|
+
*/
|
|
575
|
+
interface NormalizedTokens {
|
|
576
|
+
input: number;
|
|
577
|
+
output: number;
|
|
578
|
+
total: number;
|
|
579
|
+
cached?: number;
|
|
580
|
+
cacheCreated?: number;
|
|
581
|
+
}
|
|
582
|
+
/**
|
|
583
|
+
* Tool call in a provider-agnostic shape. Anthropic `tool_use` blocks,
|
|
584
|
+
* Google `functionCall` parts, and OpenAI/DeepSeek `tool_calls[]` all
|
|
585
|
+
* collapse to this.
|
|
586
|
+
*/
|
|
587
|
+
interface ToolCall {
|
|
588
|
+
id: string;
|
|
589
|
+
name: string;
|
|
590
|
+
args: Record<string, unknown>;
|
|
591
|
+
}
|
|
592
|
+
interface NormalizedResponse {
|
|
593
|
+
/** Main text body. Empty string if response had no text content. */
|
|
594
|
+
text: string;
|
|
595
|
+
/**
|
|
596
|
+
* Parsed structured output. Populated when ir.constraints.structuredOutput
|
|
597
|
+
* is true and JSON.parse(text) succeeds. Null otherwise.
|
|
598
|
+
*/
|
|
599
|
+
structuredOutput: unknown | null;
|
|
600
|
+
/** Tool calls in normalized shape. Empty array if none. */
|
|
601
|
+
toolCalls: ToolCall[];
|
|
602
|
+
tokens: NormalizedTokens;
|
|
603
|
+
/** Provider-specific finish reason, passed through unchanged. */
|
|
604
|
+
finishReason?: string;
|
|
605
|
+
/** Untouched provider response — escape hatch for consumers needing fields not yet normalized. */
|
|
606
|
+
raw: unknown;
|
|
607
|
+
/** Set when structuredOutput parsing was attempted and failed. */
|
|
608
|
+
parseError?: string;
|
|
609
|
+
}
|
|
610
|
+
interface ApiKeys {
|
|
611
|
+
anthropic?: string;
|
|
612
|
+
google?: string;
|
|
613
|
+
openai?: string;
|
|
614
|
+
deepseek?: string;
|
|
615
|
+
}
|
|
616
|
+
/**
|
|
617
|
+
* Per-provider override fields shallow-merged into the lowered request before
|
|
618
|
+
* execution. Lets consumers reach Gemini `safetySettings`, Anthropic
|
|
619
|
+
* `tool_choice`, OpenAI `seed` etc. without bypassing kgauto.
|
|
620
|
+
*/
|
|
621
|
+
interface ProviderOverrides {
|
|
622
|
+
anthropic?: Record<string, unknown>;
|
|
623
|
+
google?: Record<string, unknown>;
|
|
624
|
+
openai?: Record<string, unknown>;
|
|
625
|
+
deepseek?: Record<string, unknown>;
|
|
626
|
+
}
|
|
627
|
+
interface CallOptions {
|
|
628
|
+
/** Forwarded to compile(). */
|
|
629
|
+
policy?: CompilePolicy;
|
|
630
|
+
toolRelevanceThreshold?: number;
|
|
631
|
+
compressHistoryAfter?: number;
|
|
632
|
+
/** Override API keys (defaults: process.env). */
|
|
633
|
+
apiKeys?: ApiKeys;
|
|
634
|
+
/** Provider-specific request fields shallow-merged into the lowered request. */
|
|
635
|
+
providerOverrides?: ProviderOverrides;
|
|
636
|
+
/** Override fetch (for tests). */
|
|
637
|
+
fetchImpl?: typeof fetch;
|
|
638
|
+
/** Disable retry/fallback walk on retryable errors. Default: enabled. */
|
|
639
|
+
noFallback?: boolean;
|
|
640
|
+
/**
|
|
641
|
+
* alpha.10. Disable the silent auto-filter of unreachable models from the
|
|
642
|
+
* fallback walk. Default: false (filter ON). Opt-out exists for tests +
|
|
643
|
+
* the rare consumer that wants the legacy "fail at execute() with auth
|
|
644
|
+
* error" behavior. When ON (default), models whose provider has no
|
|
645
|
+
* resolvable API key are dropped from `targetsToTry` before the first
|
|
646
|
+
* network call; if the chain empties entirely, throws CallError with
|
|
647
|
+
* `lastErrorCode = 'no_reachable_models'`.
|
|
648
|
+
*
|
|
649
|
+
* Reachability source: `apiKeys` (this CallOptions) + `process.env` (via
|
|
650
|
+
* `PROVIDER_ENV_KEYS`). Override env via env.ts's `ReachabilityOpts.envSource`
|
|
651
|
+
* is not exposed here — `call()` always uses process.env. Use
|
|
652
|
+
* `getDefaultFallbackChain({ reachability: { envSource } })` upstream
|
|
653
|
+
* for hermetic test runs.
|
|
654
|
+
*/
|
|
655
|
+
noAutoFilter?: boolean;
|
|
656
|
+
}
|
|
657
|
+
interface CallAttempt {
|
|
658
|
+
model: string;
|
|
659
|
+
status: 'success' | 'retryable' | 'terminal';
|
|
660
|
+
errorCode?: string;
|
|
661
|
+
message?: string;
|
|
662
|
+
}
|
|
663
|
+
/**
|
|
664
|
+
* Why fallback fired. Normalized for `CallResult.fallbackReason` (alpha.9).
|
|
665
|
+
*
|
|
666
|
+
* - `rate_limit` provider returned 429
|
|
667
|
+
* - `provider_error` 5xx, network, or other retryable upstream issue
|
|
668
|
+
* - `cost_cap` preflight policy.maxCostPerCallUsd rejected target
|
|
669
|
+
* - `cliff` alpha.8 contract violation (MAX_TOKENS on
|
|
670
|
+
* structured output, parse-failed JSON)
|
|
671
|
+
* - `contract_violation` other compile-time-contract failures (reserved
|
|
672
|
+
* for alpha.10+ — e.g. mid-stream policy rejects)
|
|
673
|
+
* - `provider_auth_failed` alpha.14 — initial provider returned 401/403
|
|
674
|
+
* (upstream key revocation, malformed-but-truthy
|
|
675
|
+
* key, billing lapse). The chain walks to the
|
|
676
|
+
* next non-same-provider target instead of
|
|
677
|
+
* short-circuiting; same-provider remaining
|
|
678
|
+
* entries skip with errorCode='auth_inferred'.
|
|
679
|
+
*/
|
|
680
|
+
type FallbackReason = 'rate_limit' | 'provider_error' | 'cost_cap' | 'cliff' | 'contract_violation' | 'provider_auth_failed';
|
|
681
|
+
interface CallResult {
|
|
682
|
+
/** Compile handle (still valid for record() if consumer wants to add oracle scores later). */
|
|
683
|
+
handle: string;
|
|
684
|
+
/** The model that ACTUALLY served the response (post-fallback). */
|
|
685
|
+
actualModel: string;
|
|
686
|
+
/** What compile() originally targeted. */
|
|
687
|
+
requestedModel: string;
|
|
688
|
+
provider: Provider;
|
|
689
|
+
response: NormalizedResponse;
|
|
690
|
+
latencyMs: number;
|
|
691
|
+
/** Mutations that fired during compile (informational, mirrors CompileResult.mutationsApplied). */
|
|
692
|
+
mutationsApplied: MutationApplied[];
|
|
693
|
+
/** One entry per provider attempt — observability for retry/fallback walks. */
|
|
694
|
+
attempts: CallAttempt[];
|
|
695
|
+
/**
|
|
696
|
+
* Alpha.9 normalization of fallback-walk telemetry. When the chain
|
|
697
|
+
* succeeded on the first attempt, these collapse to:
|
|
698
|
+
* - `servedBy === requestedModel`
|
|
699
|
+
* - `fellOverFrom` undefined
|
|
700
|
+
* - `fallbackReason` undefined
|
|
701
|
+
*
|
|
702
|
+
* When fallback fired:
|
|
703
|
+
* - `servedBy` = `actualModel` (the model that produced the response)
|
|
704
|
+
* - `fellOverFrom` = `requestedModel` (what the caller / compile() asked for)
|
|
705
|
+
* - `fallbackReason` = normalized cause derived from the first
|
|
706
|
+
* non-success attempt's `errorCode`
|
|
707
|
+
*
|
|
708
|
+
* Consumer UX use: show "Claude was busy; we used Pro for this answer"
|
|
709
|
+
* when `fellOverFrom` is set (master plan §3.6).
|
|
710
|
+
*/
|
|
711
|
+
/** Model that actually answered. Equal to `actualModel`; kept distinct for clarity. */
|
|
712
|
+
servedBy: string;
|
|
713
|
+
/** Set only when fallback fired. Equal to `requestedModel` in that case. */
|
|
714
|
+
fellOverFrom?: string;
|
|
715
|
+
/** Set only when fallback fired. Normalized cause. */
|
|
716
|
+
fallbackReason?: FallbackReason;
|
|
717
|
+
/**
|
|
718
|
+
* alpha.10. Models that auto-filter dropped from the fallback walk because
|
|
719
|
+
* their provider had no reachable API key. Empty when nothing was filtered
|
|
720
|
+
* (the common case once consumers have all the keys they need). Surfaces
|
|
721
|
+
* silent self-heal so consumers can log/audit what happened without
|
|
722
|
+
* defeating the "kgauto just gets" UX.
|
|
723
|
+
*
|
|
724
|
+
* Empty array (not undefined) when filter ran but dropped nothing —
|
|
725
|
+
* distinguishes "filter ran cleanly" from "filter was disabled" (`undefined`
|
|
726
|
+
* when `noAutoFilter: true`).
|
|
727
|
+
*/
|
|
728
|
+
unreachableFiltered?: string[];
|
|
729
|
+
/**
|
|
730
|
+
* alpha.16. Models that policy.blockedModels filtering dropped from the
|
|
731
|
+
* fallback walk. Defense-in-depth at the call() boundary — compile()'s
|
|
732
|
+
* passScoreTargets already excludes blocked entries from the initial
|
|
733
|
+
* target + fallbackChain, but if a consumer re-shapes the chain and
|
|
734
|
+
* threads policy through only partially, this filter catches the gap.
|
|
735
|
+
*
|
|
736
|
+
* Resolves TT-40 follow-on `policy-block-not-enforced-on-fallback-chain`
|
|
737
|
+
* (2026-05-15) where mutations_applied recorded the block intent but
|
|
738
|
+
* the call walker landed on the blocked model anyway.
|
|
739
|
+
*
|
|
740
|
+
* Undefined when no filter ran (no blockedModels set). Populated only
|
|
741
|
+
* when filter ran AND dropped at least one entry — empty drops are
|
|
742
|
+
* stored as `undefined` to keep brain telemetry quiet on the common
|
|
743
|
+
* case.
|
|
744
|
+
*/
|
|
745
|
+
policyBlockedFiltered?: string[];
|
|
746
|
+
/**
|
|
747
|
+
* alpha.17. Unique identifier for this call() invocation, generated at
|
|
748
|
+
* call() entry via crypto.randomUUID(). Returned on success and emitted
|
|
749
|
+
* as the routing key for Glass-Box observability events
|
|
750
|
+
* (compile.start, compile.done, execute.attempt, execute.success,
|
|
751
|
+
* fallback.walked, advisory.fired). Pass the same id to
|
|
752
|
+
* `subscribe(traceId)` from `@warmdrift/kgauto-compiler/glassbox` to
|
|
753
|
+
* tap the in-flight event stream.
|
|
754
|
+
*
|
|
755
|
+
* Always present on success. Additive, non-breaking.
|
|
756
|
+
*/
|
|
757
|
+
traceId: string;
|
|
758
|
+
}
|
|
759
|
+
/**
|
|
760
|
+
* Thrown when call() exhausts the fallback chain without success.
|
|
761
|
+
* `attempts` carries every model tried + classification.
|
|
762
|
+
*/
|
|
763
|
+
declare class CallError extends Error {
|
|
764
|
+
readonly attempts: CallAttempt[];
|
|
765
|
+
readonly lastErrorCode?: string;
|
|
766
|
+
readonly lastStatus?: number;
|
|
767
|
+
constructor(message: string, attempts: CallAttempt[], lastStatus?: number, lastErrorCode?: string);
|
|
768
|
+
}
|
|
769
|
+
interface OracleScore {
|
|
770
|
+
/** 0..1 overall quality. */
|
|
771
|
+
score: number;
|
|
772
|
+
/** Optional per-dimension breakdown. */
|
|
773
|
+
dimensions?: Record<string, number>;
|
|
774
|
+
/** Free-form explanation for debugging. */
|
|
775
|
+
rationale?: string;
|
|
776
|
+
}
|
|
777
|
+
interface RecordInput {
|
|
778
|
+
/** Handle from CompileResult. */
|
|
779
|
+
handle: string;
|
|
780
|
+
/** Actual tokens consumed (post-call). */
|
|
781
|
+
tokensIn: number;
|
|
782
|
+
tokensOut: number;
|
|
783
|
+
/** Wall-clock latency in ms. */
|
|
784
|
+
latencyMs: number;
|
|
785
|
+
/** True iff the call returned a usable response. */
|
|
786
|
+
success: boolean;
|
|
787
|
+
/** True iff the call returned 0 output tokens despite success. */
|
|
788
|
+
emptyResponse?: boolean;
|
|
789
|
+
/** Provider error code if any. */
|
|
790
|
+
errorType?: string;
|
|
791
|
+
/** Tools actually invoked by the model. */
|
|
792
|
+
toolsCalled?: string[];
|
|
793
|
+
/** Oracle quality score — required for learning to fire. */
|
|
794
|
+
oracleScore?: OracleScore;
|
|
795
|
+
/** Optional: scrubbed prompt/response previews for debugging. */
|
|
796
|
+
promptPreview?: string;
|
|
797
|
+
responsePreview?: string;
|
|
798
|
+
/**
|
|
799
|
+
* The model that ACTUALLY RAN. Set this when consumer-side fallback ran
|
|
800
|
+
* a different model than v2 compile() targeted. Brain stores this as
|
|
801
|
+
* `model` (the truth) and the original target as `requested_model`.
|
|
802
|
+
*
|
|
803
|
+
* Omit when no fallback occurred — brain stores compile target as `model`
|
|
804
|
+
* (still the truth in that case) and `requested_model` stays NULL.
|
|
805
|
+
*
|
|
806
|
+
* s11 fix: prevents the brain from misattributing fallback traffic to
|
|
807
|
+
* the originally-requested model.
|
|
808
|
+
*/
|
|
809
|
+
actualModel?: string;
|
|
810
|
+
/**
|
|
811
|
+
* Override `mutations_applied` for this outcome. Set by `call()` when
|
|
812
|
+
* fallback fires — the served compile's mutations (which actually shaped
|
|
813
|
+
* the request that went on the wire) replace the initial compile's
|
|
814
|
+
* mutations (registered against the handle). Without this override, fallback
|
|
815
|
+
* traffic is attributed to the initial compile's mutations and the brain's
|
|
816
|
+
* mutation effectiveness stats become misleading.
|
|
817
|
+
*
|
|
818
|
+
* alpha.4: extends s11 truth-in-logging to mutations.
|
|
819
|
+
*/
|
|
820
|
+
mutationsApplied?: string[];
|
|
821
|
+
/**
|
|
822
|
+
* Cache read input tokens, when supported by the provider.
|
|
823
|
+
* - Anthropic: `usage.cache_read_input_tokens`
|
|
824
|
+
* - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
|
|
825
|
+
* - OpenAI: `usage.prompt_tokens_details.cached_tokens`
|
|
826
|
+
*
|
|
827
|
+
* Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
|
|
828
|
+
* `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
|
|
829
|
+
*/
|
|
830
|
+
cacheReadInputTokens?: number;
|
|
831
|
+
/**
|
|
832
|
+
* Cache creation input tokens (Anthropic-specific).
|
|
833
|
+
* `usage.cache_creation_input_tokens`. The first call that pays the 25%
|
|
834
|
+
* upcharge to write a cache marker; subsequent calls hit `cacheRead`.
|
|
835
|
+
*/
|
|
836
|
+
cacheCreationInputTokens?: number;
|
|
837
|
+
/**
|
|
838
|
+
* Time to first token (ms). Optional; populated when the provider/SDK
|
|
839
|
+
* surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
|
|
840
|
+
*/
|
|
841
|
+
ttftMs?: number;
|
|
842
|
+
/**
|
|
843
|
+
* alpha.20 — advisories fired at compile() time. Persisted to the brain's
|
|
844
|
+
* `compile_outcome_advisories` sibling table via a second POST that fires
|
|
845
|
+
* AFTER the primary outcome insert succeeds. Best-effort: a failed
|
|
846
|
+
* advisory POST is logged via onError but does NOT throw or roll back the
|
|
847
|
+
* primary outcome row.
|
|
848
|
+
*
|
|
849
|
+
* Pass `result.advisories` from the CompileResult directly. The brain
|
|
850
|
+
* uses these to compute the `empty_rate_clean` comparator (rows with
|
|
851
|
+
* zero advisories fired) so consumers can distinguish "model is bad"
|
|
852
|
+
* from "client sent a bloated/uncached/malformed request."
|
|
853
|
+
*
|
|
854
|
+
* Empty array / undefined → no second POST fires.
|
|
855
|
+
*/
|
|
856
|
+
advisories?: BestPracticeAdvisory[];
|
|
857
|
+
/**
|
|
858
|
+
* alpha.28 — Glass-Box renderer substrate fields (migration 018).
|
|
859
|
+
*
|
|
860
|
+
* All optional. When omitted, brain stores NULL and the renderer falls
|
|
861
|
+
* back to "—" / hidden rows. Library callers (`call.ts`) populate what
|
|
862
|
+
* they observe; adapter / SDK consumers can populate the rest from their
|
|
863
|
+
* own provider response surface.
|
|
864
|
+
*/
|
|
865
|
+
/**
|
|
866
|
+
* Provider finish reason. Captured from NormalizedResponse.finishReason
|
|
867
|
+
* (Anthropic `stop_reason`, Google `finishReason`, OpenAI `finish_reason`).
|
|
868
|
+
* Lower-case canonicalization is the brain's job; consumers can pass
|
|
869
|
+
* raw provider strings.
|
|
870
|
+
*/
|
|
871
|
+
finishReason?: string;
|
|
872
|
+
/**
|
|
873
|
+
* End-to-end wall-clock latency in ms. Distinct from `latencyMs` only
|
|
874
|
+
* insofar as `latencyMs` was the historical name for the same metric;
|
|
875
|
+
* `totalMs` is the new column on `compile_outcomes` (migration 018).
|
|
876
|
+
* When omitted, brain mirrors `latency_ms`.
|
|
877
|
+
*/
|
|
878
|
+
totalMs?: number;
|
|
879
|
+
/** Tools kept after the tool-relevance pass. */
|
|
880
|
+
toolsCount?: number;
|
|
881
|
+
/** Number of history messages at compile time. */
|
|
882
|
+
historyDepth?: number;
|
|
883
|
+
/** Rendered system prompt size in characters. */
|
|
884
|
+
systemPromptChars?: number;
|
|
885
|
+
/** Model originally targeted when a fallback fired. */
|
|
886
|
+
fellOverFrom?: string;
|
|
887
|
+
/**
|
|
888
|
+
* Why the fallback fired. Closed set mirroring CallResult.fallbackReason —
|
|
889
|
+
* keep in sync with the wire-contract enum (TraceDetail.fallbackReason).
|
|
890
|
+
*/
|
|
891
|
+
fallbackReason?: 'rate_limit' | 'provider_auth_failed' | 'provider_error' | 'cliff' | 'cost_cap' | 'contract_violation';
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* alpha.20 Entry 4: kinds of consumer-declared outcomes feeding the quality
|
|
895
|
+
* loop. Surfaces in `recordOutcome()` as the verdict the consumer's UX is
|
|
896
|
+
* forwarding to the brain.
|
|
897
|
+
*
|
|
898
|
+
* - `approved` user explicitly approved (thumbs up, "looks good", accepted)
|
|
899
|
+
* - `rejected` user explicitly rejected (thumbs down, "redo", discarded)
|
|
900
|
+
* - `partial` accepted with edits or partial use (mixed signal)
|
|
901
|
+
* - `engaged` user engaged with the output (copy/scroll/dwell)
|
|
902
|
+
* - `abandoned` user abandoned the response (closed, navigated away)
|
|
903
|
+
* - `unknown` verdict could not be inferred — recorded for completeness
|
|
904
|
+
*/
|
|
905
|
+
type OutcomeKind = 'approved' | 'rejected' | 'partial' | 'engaged' | 'abandoned' | 'unknown';
|
|
906
|
+
/**
|
|
907
|
+
* Input to `recordOutcome()` — consumer's verdict on a previously-compiled
|
|
908
|
+
* call. Joins to the original `compile_outcomes` row via outcomeId,
|
|
909
|
+
* enabling per-(model, archetype) approve-rate measurement once N ≥ 10
|
|
910
|
+
* outcomes accumulate.
|
|
911
|
+
*/
|
|
912
|
+
interface RecordOutcomeInput {
|
|
913
|
+
/** Joins to compile_outcomes.id. Returned by compile() via CompileResult.outcomeId. */
|
|
914
|
+
outcomeId: number | string;
|
|
915
|
+
/** What did the user / system do with this output? */
|
|
916
|
+
outcome: OutcomeKind;
|
|
917
|
+
/** Optional 1-5 user rating (e.g., thumbs up/down with intensity, NPS-style). */
|
|
918
|
+
rating?: 1 | 2 | 3 | 4 | 5;
|
|
919
|
+
/** Optional free-text reason (e.g., user-typed feedback, system-inferred cause). */
|
|
920
|
+
reason?: string;
|
|
921
|
+
/**
|
|
922
|
+
* Optional model-reported confidence at compile time (0..1). Used for
|
|
923
|
+
* Brier-score calibration in later phases (alpha.21+) — pair this with
|
|
924
|
+
* the actual `outcome` to compute calibration error.
|
|
925
|
+
*/
|
|
926
|
+
observedConfidence?: number;
|
|
927
|
+
}
|
|
928
|
+
/**
|
|
929
|
+
* Return shape of `recordOutcome()`. Never throws — persistence failures
|
|
930
|
+
* surface as `ok: false` with a stable `reason` string.
|
|
931
|
+
*/
|
|
932
|
+
interface OutcomeResult {
|
|
933
|
+
/** True when the POST landed (2xx). False when brain not configured or POST failed. */
|
|
934
|
+
ok: boolean;
|
|
935
|
+
/** Stable reason code when ok=false. One of: 'brain_not_configured' | 'persistence_failed'. */
|
|
936
|
+
reason?: string;
|
|
937
|
+
}
|
|
938
|
+
/**
|
|
939
|
+
* alpha.21 (s78 Entry 1): provenance label on a chain entry. Surfaces WHY
|
|
940
|
+
* an entry sits where it sits so consumers can distinguish:
|
|
941
|
+
*
|
|
942
|
+
* - 'measured' brain has N>=10 rows with a measurable quality
|
|
943
|
+
* outcome backing this placement. The number lives on
|
|
944
|
+
* `ChainEntry.n`.
|
|
945
|
+
* - 'capability-fact' inclusion or exclusion driven by a published or
|
|
946
|
+
* measured CAPABILITY (L-040 cliff, ctx window cap,
|
|
947
|
+
* structured-output support). Not an opinion — a
|
|
948
|
+
* fact about what the model can/can't do.
|
|
949
|
+
* - 'judgment' engineer's pick, no measured backing yet. Cold-start
|
|
950
|
+
* prior; entirely valid until evidence accumulates.
|
|
951
|
+
*
|
|
952
|
+
* "Judgment" is HONEST, not a downgrade. Most of `STARTER_CHAINS` lands here
|
|
953
|
+
* in alpha.21 — that's the point: consumers can SEE the grounding gap and
|
|
954
|
+
* prioritize the measurement work that would graduate them to 'measured'.
|
|
955
|
+
*/
|
|
956
|
+
type Grounding = 'measured' | 'capability-fact' | 'judgment';
|
|
957
|
+
/**
|
|
958
|
+
* alpha.21 (s78 Entry 1): a single position in a fallback chain, carrying its
|
|
959
|
+
* provenance label and an optional human-readable reason. The shape replaces
|
|
960
|
+
* the old `string[]` representation everywhere chains are surfaced externally.
|
|
961
|
+
*
|
|
962
|
+
* `n` is REQUIRED when `grounding === 'measured'` — the runtime helper
|
|
963
|
+
* `makeMeasuredEntry()` enforces this. For 'capability-fact' and 'judgment'
|
|
964
|
+
* entries, `n` is undefined.
|
|
965
|
+
*/
|
|
966
|
+
interface ChainEntry {
|
|
967
|
+
/** Canonical model id (post-alias). */
|
|
968
|
+
id: string;
|
|
969
|
+
/** Why this entry sits in this position. */
|
|
970
|
+
grounding: Grounding;
|
|
971
|
+
/**
|
|
972
|
+
* Optional one-liner explaining the grounding decision. The inline comments
|
|
973
|
+
* that historically lived next to STARTER_CHAINS entries are now expressed
|
|
974
|
+
* here as machine-readable text.
|
|
975
|
+
*/
|
|
976
|
+
reason?: string;
|
|
977
|
+
/**
|
|
978
|
+
* When `grounding === 'measured'`, the brain row count that backs this
|
|
979
|
+
* placement. Undefined for 'capability-fact' and 'judgment' entries.
|
|
980
|
+
*/
|
|
981
|
+
n?: number;
|
|
982
|
+
}
|
|
983
|
+
/**
|
|
984
|
+
* alpha.21 introspection shape — a per-archetype chain with grounding on
|
|
985
|
+
* every position. Consumers reading this never see naked string ids;
|
|
986
|
+
* everything carries provenance.
|
|
987
|
+
*/
|
|
988
|
+
interface ChainWithGrounding {
|
|
989
|
+
archetype: IntentArchetypeName;
|
|
990
|
+
/** Ordered: position 0 = primary, rising index = fallback positions. */
|
|
991
|
+
entries: ChainEntry[];
|
|
992
|
+
}
|
|
993
|
+
/** alpha.23 (s78 Phase 3): per-axis metrics returned by the brain RPC. */
|
|
994
|
+
interface PerAxisMetrics {
|
|
995
|
+
appId: string;
|
|
996
|
+
archetype: string;
|
|
997
|
+
model: string;
|
|
998
|
+
windowDays: number;
|
|
999
|
+
/** Total brain rows for this tuple in the window. */
|
|
1000
|
+
nRows: number;
|
|
1001
|
+
/** Subset of nRows with zero advisories fired — the "clean signal" comparator. */
|
|
1002
|
+
nRowsClean: number;
|
|
1003
|
+
/** Count of compile_outcome_quality entries joining to this tuple's outcomes. */
|
|
1004
|
+
nQualityOutcomes: number;
|
|
1005
|
+
/** Approve rate from quality outcomes. null when nQualityOutcomes === 0. */
|
|
1006
|
+
magicRate: number | null;
|
|
1007
|
+
/** Whether magicRate >= consumer-declared qualityFloor. null when no floor declared OR no outcomes. */
|
|
1008
|
+
qualityFloorMet: boolean | null;
|
|
1009
|
+
costEfficiency: {
|
|
1010
|
+
avgCostUsd: number | null;
|
|
1011
|
+
avgCostUsdClean: number | null;
|
|
1012
|
+
avgInputTokens: number | null;
|
|
1013
|
+
avgOutputTokens: number | null;
|
|
1014
|
+
inputTokenRatio: number | null;
|
|
1015
|
+
};
|
|
1016
|
+
timeEfficiency: {
|
|
1017
|
+
avgLatencyMs: number | null;
|
|
1018
|
+
avgTtftMs: number | null;
|
|
1019
|
+
};
|
|
1020
|
+
reliability: {
|
|
1021
|
+
successRate: number | null;
|
|
1022
|
+
successRateClean: number | null;
|
|
1023
|
+
emptyRate: number | null;
|
|
1024
|
+
emptyRateClean: number | null;
|
|
1025
|
+
};
|
|
1026
|
+
evidenceFreshnessDays: number | null;
|
|
1027
|
+
}
|
|
1028
|
+
/** Per-axis metrics keyed by model — used for chain-comparison views. */
|
|
1029
|
+
type PerAxisMetricsByModel = Record<string, PerAxisMetrics>;
|
|
1030
|
+
|
|
1031
|
+
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type Grounding as G, type HistoryCachePolicy as H, type IntentDeclaration as I, type Message as M, type NormalizedResponse as N, type OutcomeResult as O, type ProviderOverrides as P, type RecordInput as R, type SectionRewrite as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type RecordOutcomeInput as e, type OracleScore as f, type CompileResult as g, type Adapter as h, type PerAxisMetrics as i, type Provider as j, type ChainEntry as k, type CallAttempt as l, CallError as m, type ChainWithGrounding as n, type Constraints as o, type MutationApplied as p, type NormalizedTokens as q, type OutcomeKind as r, type PerAxisMetricsByModel as s, type PromptSection as t, type SectionKind as u, type ToolDefinition as v };
|