@warmdrift/kgauto-compiler 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,728 @@
1
+ import { IntentArchetypeName } from './dialect.js';
2
+
3
+ /**
4
+ * Intermediate Representation — the structured form of a prompt.
5
+ *
6
+ * Everything kgauto v2 does, it does on the IR. The IR is constructed by the
7
+ * caller (or by `parse()` from a string-style prompt for backwards-compat),
8
+ * transformed by the compiler passes, and lowered to a target-specific wire
9
+ * request only at the very end.
10
+ *
11
+ * The IR carries STRUCTURE, not just text. Sections are first-class. Tools
12
+ * carry per-intent relevance. History knows its turn-age. Constraints are
13
+ * explicit. This is what lets passes do real work.
14
+ */
15
+
16
+ /**
17
+ * A semantically-named section of the system prompt. Sections enable
18
+ * intent-aware slicing (drop sections not tagged for this intent), dedupe
19
+ * (collapse identical sections across files), and cache marking (identify
20
+ * the stable prefix).
21
+ */
22
+ interface PromptSection {
23
+ /** Stable identifier — used for slicing, dedupe, and cache markers. */
24
+ id: string;
25
+ /** Section text. */
26
+ text: string;
27
+ /**
28
+ * Which intents this section applies to. Empty = applies to all intents.
29
+ * Pass `compile()` will drop sections whose intents array doesn't include
30
+ * the current intent.
31
+ */
32
+ intents?: IntentArchetypeName[];
33
+ /**
34
+ * If true, this section is part of the stable cacheable prefix. The lower
35
+ * pass uses this to place cache markers correctly per target.
36
+ */
37
+ cacheable?: boolean;
38
+ /**
39
+ * Section weight when ordering — lower = earlier in the assembled prompt.
40
+ * Defaults to insertion order.
41
+ */
42
+ weight?: number;
43
+ }
44
+ interface ToolDefinition {
45
+ name: string;
46
+ description?: string;
47
+ parameters?: Record<string, unknown>;
48
+ /**
49
+ * Per-intent relevance scores. Compile uses these to drop irrelevant tools.
50
+ * Missing intents default to 0.5 (neutral).
51
+ */
52
+ relevanceByIntent?: Partial<Record<IntentArchetypeName, number>>;
53
+ /** Pass-through for provider-specific fields (Anthropic input_schema, etc.). */
54
+ [key: string]: unknown;
55
+ }
56
+ interface Message {
57
+ role: 'system' | 'user' | 'assistant' | 'tool';
58
+ content: string;
59
+ /** Optional structured parts (tool calls, results) — passed through to lowering. */
60
+ parts?: unknown[];
61
+ /** For tool messages — which tool this corresponds to. */
62
+ toolName?: string;
63
+ /** For tool messages — the call id. */
64
+ toolCallId?: string;
65
+ }
66
+ /**
67
+ * The compile-time intent declaration. `name` is the app's local label;
68
+ * `archetype` is the canonical dialect-v1 archetype the app maps it to.
69
+ *
70
+ * Apps with their own intent vocabulary (tt-intelligence's "ask"/"hunt"/
71
+ * "dashboard") declare the mapping here. The brain learns by archetype, not
72
+ * by app-local name.
73
+ */
74
+ interface IntentDeclaration {
75
+ /** App-local intent name (free-form, for app's own debugging). */
76
+ name: string;
77
+ /** Canonical dialect-v1 archetype. Required for cross-app learning. */
78
+ archetype: IntentArchetypeName;
79
+ }
80
+ interface Constraints {
81
+ /** Hard latency ceiling — compiler will down-rank slow models. Advisory. */
82
+ maxLatencyMs?: number;
83
+ /** Hard cost ceiling per call (USD). Advisory. */
84
+ maxCostUsd?: number;
85
+ /** Caller wants structured (JSON) output. */
86
+ structuredOutput?: boolean;
87
+ /** Hint: caller expects a short response (used to disable thinking on Gemini). */
88
+ expectedShortOutput?: boolean;
89
+ /** Hint: max response words. */
90
+ maxResponseWords?: number;
91
+ /** Override target model selection — if set, compiler uses this instead of routing. */
92
+ forceModel?: string;
93
+ }
94
+ /**
95
+ * Cache marker policy for the messages array (history + currentTurn).
96
+ *
97
+ * Anthropic positional caching: a `cache_control` marker on a content block
98
+ * tells the API "remember the prefix up through this block." On a subsequent
99
+ * request whose first N tokens match, those N billed at the cached rate
100
+ * (10% of the input price). Without a marker, every call re-pays for the
101
+ * entire history.
102
+ *
103
+ * - `'none'` (default when omitted): no history cache marker. System-level
104
+ * cache markers from `PromptSection.cacheable=true` still apply.
105
+ * - `'all-but-latest'`: marks the message immediately preceding `currentTurn`
106
+ * (the last history entry). On the next call, that entire history prefix
107
+ * is cacheable. Good fit for chat/agent loops where every prior turn is
108
+ * stable.
109
+ * - `'fixed-suffix'`: marks the message `suffix` positions from the end of
110
+ * `history`. Use when the last few turns are volatile (e.g., scratchpad,
111
+ * draft revisions) but the earlier prefix is stable.
112
+ *
113
+ * For non-Anthropic providers, no wire-format marker is emitted (Gemini /
114
+ * OpenAI / DeepSeek implicit caching takes effect automatically when a
115
+ * stable prefix is reused). The compiler still computes
116
+ * `diagnostics.historyCacheableTokens` for telemetry on every provider.
117
+ *
118
+ * alpha.5.
119
+ */
120
+ type HistoryCachePolicy = {
121
+ strategy: 'none';
122
+ } | {
123
+ strategy: 'all-but-latest';
124
+ } | {
125
+ strategy: 'fixed-suffix';
126
+ suffix: number;
127
+ };
128
+ /**
129
+ * Consumer-declared policy for model selection. Lives outside the IR
130
+ * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
131
+ * not a per-call shape.
132
+ *
133
+ * The original tt-intelligence scenario (s11): user capped Anthropic
134
+ * spending on Sonnet for cost reasons. v2 compile() kept picking Sonnet
135
+ * as the best target, Hunter's preflight hit the cap and fell back to
136
+ * Flash — every single call. CompilePolicy.blockedModels lets the
137
+ * consumer tell kgauto "don't pick Sonnet right now" and the compiler
138
+ * routes to the next-best option directly. No wasted preflight tax.
139
+ *
140
+ * This is the "coach knows the constraints" feature — kgauto stops
141
+ * recommending things the consumer has already ruled out.
142
+ */
143
+ interface CompilePolicy {
144
+ /**
145
+ * Model IDs the consumer has gated. Compile() will never select these.
146
+ * Use for: cost caps, account-level rate limits, "this model is broken
147
+ * for our workload" decisions.
148
+ */
149
+ blockedModels?: string[];
150
+ /**
151
+ * Hard ceiling on estimated input cost per call (USD). Models whose
152
+ * estimated cost exceeds this are rejected. Use for: budget enforcement
153
+ * on high-volume routes.
154
+ */
155
+ maxCostPerCallUsd?: number;
156
+ /**
157
+ * Model IDs the consumer prefers. When multiple models fit, preferred
158
+ * models get a rank boost (large enough to overcome small quality
159
+ * differences but not large enough to override hard rejects).
160
+ */
161
+ preferredModels?: string[];
162
+ /**
163
+ * Customer-posture tag (master plan §1.2, alpha.9).
164
+ *
165
+ * - `'locked'` — compliance/contract/brand-promise. Caller passes
166
+ * exactly one model; no fallback is desired. kgauto
167
+ * never walks the chain.
168
+ * - `'preferred'` — user-selected primary, fallback chain as safety
169
+ * net. On 429/5xx, walk the chain and surface
170
+ * `fellOverFrom` so the consumer can show "Claude
171
+ * was busy; we used Pro for this answer."
172
+ * - `'open'` — library picks the chain. Model identity is
173
+ * irrelevant; output is the contract.
174
+ *
175
+ * The field is **informational** — kgauto's execution path is already
176
+ * determined by the shape of `ir.models`. Posture surfaces in
177
+ * telemetry so the cost-watcher can distinguish "locked failed, no
178
+ * fallback was tried" from "open chain exhausted." Default: when
179
+ * `ir.models.length === 1` posture is treated as `'locked'` by the
180
+ * advisor; otherwise unspecified.
181
+ */
182
+ posture?: 'locked' | 'preferred' | 'open';
183
+ }
184
+ /**
185
+ * The IR — the input to compile().
186
+ */
187
+ interface PromptIR {
188
+ /** App identifier — required for multi-tenant brain. */
189
+ appId: string;
190
+ /** Intent declaration — what is this call doing? */
191
+ intent: IntentDeclaration;
192
+ /** Structured system prompt sections. */
193
+ sections: PromptSection[];
194
+ /** Available tools (compiler may drop based on intent relevance + budget). */
195
+ tools?: ToolDefinition[];
196
+ /** Conversation history (compiler may compress old turns). */
197
+ history?: Message[];
198
+ /** The user's current turn — never dropped. */
199
+ currentTurn?: Message;
200
+ /** Allowed model IDs, in caller-preference order. Compiler picks among these. */
201
+ models: string[];
202
+ /** Compile constraints. */
203
+ constraints?: Constraints;
204
+ /**
205
+ * Cache marker placement policy for the messages array. Default = no
206
+ * history cache markers. See `HistoryCachePolicy` for semantics.
207
+ * alpha.5.
208
+ */
209
+ historyCachePolicy?: HistoryCachePolicy;
210
+ }
211
+ type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
212
+ /**
213
+ * Mutation IDs that fired during compile. Empty in v1 (no mutation engine
214
+ * yet). Populated when the brain is online and pushing mutations.
215
+ */
216
+ type MutationApplied = {
217
+ id: string;
218
+ source: string;
219
+ passName: string;
220
+ description: string;
221
+ };
222
+ /**
223
+ * Target-specific wire request. Shape varies by provider — caller passes the
224
+ * right field to the right SDK.
225
+ */
226
+ type CompiledRequest = {
227
+ provider: 'anthropic';
228
+ model: string;
229
+ system: Array<{
230
+ type: 'text';
231
+ text: string;
232
+ cache_control?: {
233
+ type: 'ephemeral';
234
+ };
235
+ }>;
236
+ messages: Array<{
237
+ role: string;
238
+ content: unknown;
239
+ }>;
240
+ tools?: unknown[];
241
+ max_tokens?: number;
242
+ } | {
243
+ provider: 'google';
244
+ model: string;
245
+ systemInstruction?: {
246
+ role: 'system';
247
+ parts: Array<{
248
+ text: string;
249
+ }>;
250
+ };
251
+ contents: Array<{
252
+ role: string;
253
+ parts: unknown[];
254
+ }>;
255
+ tools?: unknown[];
256
+ generationConfig?: Record<string, unknown>;
257
+ cachedContent?: string;
258
+ } | {
259
+ provider: 'openai';
260
+ model: string;
261
+ messages: Array<{
262
+ role: string;
263
+ content: unknown;
264
+ }>;
265
+ tools?: unknown[];
266
+ response_format?: unknown;
267
+ reasoning_effort?: string;
268
+ } | {
269
+ provider: 'deepseek';
270
+ model: string;
271
+ messages: Array<{
272
+ role: string;
273
+ content: unknown;
274
+ }>;
275
+ tools?: unknown[];
276
+ };
277
+ /**
278
+ * Best-practice advisory emitted by the compiler at compile time. Non-fatal —
279
+ * consumers log, surface in dev tools, gate on `level === 'critical'` in CI,
280
+ * or ignore. The advisor inspects the IR + selected profile + diagnostics
281
+ * and emits one entry per detected gap.
282
+ *
283
+ * Codes are stable across releases. `suggestion` and `docsUrl` are optional
284
+ * but encouraged: suggestion = the actionable diff; docsUrl = the
285
+ * interfaces/kgauto.md anchor for context.
286
+ *
287
+ * alpha.6 Phase 1 starter rules:
288
+ * - `caching-off-on-claude` (warn) system >2000 chars on Anthropic, no cacheable=true
289
+ * - `single-chunk-system` (info) Anthropic, only one PromptSection >1000 chars
290
+ * - `tool-bloat` (warn) >10 tools on a short-output archetype
291
+ * - `history-uncached-on-claude` (warn) Anthropic, ≥2 history messages, no historyCachePolicy
292
+ *
293
+ * Phase 2 (catalog as `bestPractices` block in profiles) and Phase 3 (brain
294
+ * telemetry on `advisories_fired`) are alpha.7+ territory.
295
+ */
296
+ interface BestPracticeAdvisory {
297
+ /**
298
+ * Severity. `info` = informational; `warn` = behavioral pattern that's
299
+ * usually expensive or wrong; `critical` = likely bug or production-grade
300
+ * misuse. Phase 1 ships info + warn only.
301
+ */
302
+ level: 'info' | 'warn' | 'critical';
303
+ /** Stable kebab-case code. Consumers filter / gate by this. */
304
+ code: string;
305
+ /** Human-readable explanation of what was detected. */
306
+ message: string;
307
+ /** Optional: how to fix — actionable diff or pattern. */
308
+ suggestion?: string;
309
+ /** Optional: link to docs anchor for more context. */
310
+ docsUrl?: string;
311
+ }
312
+ interface CompileResult {
313
+ /** Unique handle for this call — pass to record() to correlate the outcome. */
314
+ handle: string;
315
+ /** Selected target model id. */
316
+ target: string;
317
+ /** Selected provider. */
318
+ provider: Provider;
319
+ /** The wire request — pass the appropriate fields to your SDK. */
320
+ request: CompiledRequest;
321
+ /** Estimated tokens (input). */
322
+ tokensIn: number;
323
+ /** Estimated cost in USD (input portion). */
324
+ estimatedCostUsd: number;
325
+ /** Mutations that fired during compile (informational). */
326
+ mutationsApplied: MutationApplied[];
327
+ /** Fallback chain — try these in order if target fails. */
328
+ fallbackChain: string[];
329
+ /**
330
+ * Best-practice advisories emitted by the compiler. Non-fatal. Empty
331
+ * array when no rules fired. alpha.6 Phase 1.
332
+ */
333
+ advisories: BestPracticeAdvisory[];
334
+ /** Diagnostics for caller-side logging. */
335
+ diagnostics: {
336
+ sectionsKept: number;
337
+ sectionsDropped: number;
338
+ toolsKept: number;
339
+ toolsDropped: number;
340
+ historyKept: number;
341
+ historyDropped: number;
342
+ cacheableTokens: number;
343
+ estimatedCacheSavingsUsd: number;
344
+ /**
345
+ * Tokens in `history` (and `currentTurn` when before the marker) that
346
+ * fall within the cacheable prefix per `historyCachePolicy`. Always
347
+ * computed; only Anthropic actually emits a wire-format marker. For
348
+ * Gemini / OpenAI / DeepSeek, this represents the theoretical cacheable
349
+ * prefix that implicit caching may pick up — useful telemetry for the
350
+ * brain to learn which (app, model, archetype) tuples benefit most
351
+ * from history caching. alpha.5.
352
+ */
353
+ historyCacheableTokens: number;
354
+ /**
355
+ * Total tokens in input `history` (pre-compression). Computed regardless
356
+ * of whether `passCompressHistory` fired — surfaces how close a tuple is
357
+ * to its `compressHistoryAboveTokens` threshold so dashboards / cost-
358
+ * watchers can see the bloat axis the count-based threshold misses.
359
+ * 0 when history is empty. alpha.7.
360
+ */
361
+ historyTokensTotal: number;
362
+ };
363
+ }
364
+ /**
365
+ * Token usage normalized across providers. `cached` and `cacheCreated` are
366
+ * Anthropic prompt-cache reads/writes (Gemini implicit caching populates
367
+ * `cached` from `usageMetadata.cachedContentTokenCount`; OpenAI populates
368
+ * from `prompt_tokens_details.cached_tokens`).
369
+ */
370
+ interface NormalizedTokens {
371
+ input: number;
372
+ output: number;
373
+ total: number;
374
+ cached?: number;
375
+ cacheCreated?: number;
376
+ }
377
+ /**
378
+ * Tool call in a provider-agnostic shape. Anthropic `tool_use` blocks,
379
+ * Google `functionCall` parts, and OpenAI/DeepSeek `tool_calls[]` all
380
+ * collapse to this.
381
+ */
382
+ interface ToolCall {
383
+ id: string;
384
+ name: string;
385
+ args: Record<string, unknown>;
386
+ }
387
+ interface NormalizedResponse {
388
+ /** Main text body. Empty string if response had no text content. */
389
+ text: string;
390
+ /**
391
+ * Parsed structured output. Populated when ir.constraints.structuredOutput
392
+ * is true and JSON.parse(text) succeeds. Null otherwise.
393
+ */
394
+ structuredOutput: unknown | null;
395
+ /** Tool calls in normalized shape. Empty array if none. */
396
+ toolCalls: ToolCall[];
397
+ tokens: NormalizedTokens;
398
+ /** Provider-specific finish reason, passed through unchanged. */
399
+ finishReason?: string;
400
+ /** Untouched provider response — escape hatch for consumers needing fields not yet normalized. */
401
+ raw: unknown;
402
+ /** Set when structuredOutput parsing was attempted and failed. */
403
+ parseError?: string;
404
+ }
405
+ interface ApiKeys {
406
+ anthropic?: string;
407
+ google?: string;
408
+ openai?: string;
409
+ deepseek?: string;
410
+ }
411
+ /**
412
+ * Per-provider override fields shallow-merged into the lowered request before
413
+ * execution. Lets consumers reach Gemini `safetySettings`, Anthropic
414
+ * `tool_choice`, OpenAI `seed` etc. without bypassing kgauto.
415
+ */
416
+ interface ProviderOverrides {
417
+ anthropic?: Record<string, unknown>;
418
+ google?: Record<string, unknown>;
419
+ openai?: Record<string, unknown>;
420
+ deepseek?: Record<string, unknown>;
421
+ }
422
+ interface CallOptions {
423
+ /** Forwarded to compile(). */
424
+ policy?: CompilePolicy;
425
+ toolRelevanceThreshold?: number;
426
+ compressHistoryAfter?: number;
427
+ /** Override API keys (defaults: process.env). */
428
+ apiKeys?: ApiKeys;
429
+ /** Provider-specific request fields shallow-merged into the lowered request. */
430
+ providerOverrides?: ProviderOverrides;
431
+ /** Override fetch (for tests). */
432
+ fetchImpl?: typeof fetch;
433
+ /** Disable retry/fallback walk on retryable errors. Default: enabled. */
434
+ noFallback?: boolean;
435
+ /**
436
+ * alpha.10. Disable the silent auto-filter of unreachable models from the
437
+ * fallback walk. Default: false (filter ON). Opt-out exists for tests +
438
+ * the rare consumer that wants the legacy "fail at execute() with auth
439
+ * error" behavior. When ON (default), models whose provider has no
440
+ * resolvable API key are dropped from `targetsToTry` before the first
441
+ * network call; if the chain empties entirely, throws CallError with
442
+ * `lastErrorCode = 'no_reachable_models'`.
443
+ *
444
+ * Reachability source: `apiKeys` (this CallOptions) + `process.env` (via
445
+ * `PROVIDER_ENV_KEYS`). Override env via env.ts's `ReachabilityOpts.envSource`
446
+ * is not exposed here — `call()` always uses process.env. Use
447
+ * `getDefaultFallbackChain({ reachability: { envSource } })` upstream
448
+ * for hermetic test runs.
449
+ */
450
+ noAutoFilter?: boolean;
451
+ }
452
+ interface CallAttempt {
453
+ model: string;
454
+ status: 'success' | 'retryable' | 'terminal';
455
+ errorCode?: string;
456
+ message?: string;
457
+ }
458
+ /**
459
+ * Why fallback fired. Normalized for `CallResult.fallbackReason` (alpha.9).
460
+ *
461
+ * - `rate_limit` provider returned 429
462
+ * - `provider_error` 5xx, network, or other retryable upstream issue
463
+ * - `cost_cap` preflight policy.maxCostPerCallUsd rejected target
464
+ * - `cliff` alpha.8 contract violation (MAX_TOKENS on
465
+ * structured output, parse-failed JSON)
466
+ * - `contract_violation` other compile-time-contract failures (reserved
467
+ * for alpha.10+ — e.g. mid-stream policy rejects)
468
+ */
469
+ type FallbackReason = 'rate_limit' | 'provider_error' | 'cost_cap' | 'cliff' | 'contract_violation';
470
+ interface CallResult {
471
+ /** Compile handle (still valid for record() if consumer wants to add oracle scores later). */
472
+ handle: string;
473
+ /** The model that ACTUALLY served the response (post-fallback). */
474
+ actualModel: string;
475
+ /** What compile() originally targeted. */
476
+ requestedModel: string;
477
+ provider: Provider;
478
+ response: NormalizedResponse;
479
+ latencyMs: number;
480
+ /** Mutations that fired during compile (informational, mirrors CompileResult.mutationsApplied). */
481
+ mutationsApplied: MutationApplied[];
482
+ /** One entry per provider attempt — observability for retry/fallback walks. */
483
+ attempts: CallAttempt[];
484
+ /**
485
+ * Alpha.9 normalization of fallback-walk telemetry. When the chain
486
+ * succeeded on the first attempt, these collapse to:
487
+ * - `servedBy === requestedModel`
488
+ * - `fellOverFrom` undefined
489
+ * - `fallbackReason` undefined
490
+ *
491
+ * When fallback fired:
492
+ * - `servedBy` = `actualModel` (the model that produced the response)
493
+ * - `fellOverFrom` = `requestedModel` (what the caller / compile() asked for)
494
+ * - `fallbackReason` = normalized cause derived from the first
495
+ * non-success attempt's `errorCode`
496
+ *
497
+ * Consumer UX use: show "Claude was busy; we used Pro for this answer"
498
+ * when `fellOverFrom` is set (master plan §3.6).
499
+ */
500
+ /** Model that actually answered. Equal to `actualModel`; kept distinct for clarity. */
501
+ servedBy: string;
502
+ /** Set only when fallback fired. Equal to `requestedModel` in that case. */
503
+ fellOverFrom?: string;
504
+ /** Set only when fallback fired. Normalized cause. */
505
+ fallbackReason?: FallbackReason;
506
+ /**
507
+ * alpha.10. Models that auto-filter dropped from the fallback walk because
508
+ * their provider had no reachable API key. Empty when nothing was filtered
509
+ * (the common case once consumers have all the keys they need). Surfaces
510
+ * silent self-heal so consumers can log/audit what happened without
511
+ * defeating the "kgauto just gets" UX.
512
+ *
513
+ * Empty array (not undefined) when filter ran but dropped nothing —
514
+ * distinguishes "filter ran cleanly" from "filter was disabled" (`undefined`
515
+ * when `noAutoFilter: true`).
516
+ */
517
+ unreachableFiltered?: string[];
518
+ }
519
+ /**
520
+ * Thrown when call() exhausts the fallback chain without success.
521
+ * `attempts` carries every model tried + classification.
522
+ */
523
+ declare class CallError extends Error {
524
+ readonly attempts: CallAttempt[];
525
+ readonly lastErrorCode?: string;
526
+ readonly lastStatus?: number;
527
+ constructor(message: string, attempts: CallAttempt[], lastStatus?: number, lastErrorCode?: string);
528
+ }
529
+ interface OracleScore {
530
+ /** 0..1 overall quality. */
531
+ score: number;
532
+ /** Optional per-dimension breakdown. */
533
+ dimensions?: Record<string, number>;
534
+ /** Free-form explanation for debugging. */
535
+ rationale?: string;
536
+ }
537
+ interface RecordInput {
538
+ /** Handle from CompileResult. */
539
+ handle: string;
540
+ /** Actual tokens consumed (post-call). */
541
+ tokensIn: number;
542
+ tokensOut: number;
543
+ /** Wall-clock latency in ms. */
544
+ latencyMs: number;
545
+ /** True iff the call returned a usable response. */
546
+ success: boolean;
547
+ /** True iff the call returned 0 output tokens despite success. */
548
+ emptyResponse?: boolean;
549
+ /** Provider error code if any. */
550
+ errorType?: string;
551
+ /** Tools actually invoked by the model. */
552
+ toolsCalled?: string[];
553
+ /** Oracle quality score — required for learning to fire. */
554
+ oracleScore?: OracleScore;
555
+ /** Optional: scrubbed prompt/response previews for debugging. */
556
+ promptPreview?: string;
557
+ responsePreview?: string;
558
+ /**
559
+ * The model that ACTUALLY RAN. Set this when consumer-side fallback ran
560
+ * a different model than v2 compile() targeted. Brain stores this as
561
+ * `model` (the truth) and the original target as `requested_model`.
562
+ *
563
+ * Omit when no fallback occurred — brain stores compile target as `model`
564
+ * (still the truth in that case) and `requested_model` stays NULL.
565
+ *
566
+ * s11 fix: prevents the brain from misattributing fallback traffic to
567
+ * the originally-requested model.
568
+ */
569
+ actualModel?: string;
570
+ /**
571
+ * Override `mutations_applied` for this outcome. Set by `call()` when
572
+ * fallback fires — the served compile's mutations (which actually shaped
573
+ * the request that went on the wire) replace the initial compile's
574
+ * mutations (registered against the handle). Without this override, fallback
575
+ * traffic is attributed to the initial compile's mutations and the brain's
576
+ * mutation effectiveness stats become misleading.
577
+ *
578
+ * alpha.4: extends s11 truth-in-logging to mutations.
579
+ */
580
+ mutationsApplied?: string[];
581
+ /**
582
+ * Cache read input tokens, when supported by the provider.
583
+ * - Anthropic: `usage.cache_read_input_tokens`
584
+ * - Google (implicit caching): `usageMetadata.cachedContentTokenCount`
585
+ * - OpenAI: `usage.prompt_tokens_details.cached_tokens`
586
+ *
587
+ * Powers the cost-and-efficiency-watcher (interfaces/kgauto.md, alpha.4):
588
+ * `tokens_in - cache_read_input_tokens` is the un-cached new context per call.
589
+ */
590
+ cacheReadInputTokens?: number;
591
+ /**
592
+ * Cache creation input tokens (Anthropic-specific).
593
+ * `usage.cache_creation_input_tokens`. The first call that pays the 25%
594
+ * upcharge to write a cache marker; subsequent calls hit `cacheRead`.
595
+ */
596
+ cacheCreationInputTokens?: number;
597
+ /**
598
+ * Time to first token (ms). Optional; populated when the provider/SDK
599
+ * surfaces it. Distinct from `latencyMs` (end-to-end wall clock).
600
+ */
601
+ ttftMs?: number;
602
+ }
603
+
604
+ /**
605
+ * Model profiles — executable knowledge about each provider/model.
606
+ *
607
+ * Unlike v1 which carried `known_failures` as prose strings, v2 makes them
608
+ * executable: cliffs trigger guards, lowering describes the wire format,
609
+ * recovery handlers describe what to do after specific failures.
610
+ *
611
+ * Each profile is the answer to "if I want to call THIS model with THIS
612
+ * shape of work, what does it need from me, and what should I do when it
613
+ * fails?"
614
+ */
615
+
616
+ type StructuredOutputCapability = 'native' | 'grammar' | 'none';
617
+ type SystemPromptMode = 'inline' | 'separate' | 'as_developer' | 'unsupported';
618
+ type CacheStrategy = 'cache_control' | 'cachedContent' | 'unsupported';
619
+ interface CliffRule {
620
+ /** What metric triggers this cliff. */
621
+ metric: 'input_tokens' | 'tool_count' | 'history_turns' | 'thinking_with_short_output';
622
+ /** Threshold — meaning depends on metric. */
623
+ threshold: number;
624
+ /** What action to take when triggered. */
625
+ action: 'downgrade_quality_warning' | 'drop_to_top_relevant' | 'force_thinking_budget_zero' | 'force_terse_output' | 'escalate_target' | 'strip_tools';
626
+ /**
627
+ * Optional: only fire this cliff when the IR's intent.archetype matches.
628
+ * Used for archetype-specific failure modes (e.g. Gemini Flash returns
629
+ * empty when summarize is offered tools).
630
+ */
631
+ whenIntent?: IntentArchetypeName;
632
+ /** Human-readable reason for digest reporting. */
633
+ reason: string;
634
+ }
635
+ interface RecoveryRule {
636
+ /** What signal triggers recovery. */
637
+ signal: 'empty_response_after_tool' | 'empty_response' | 'malformed_function_call' | 'rate_limit' | 'model_not_found' | 'context_overflow';
638
+ /** Action: retry with adjusted params, or escalate to next fallback. */
639
+ action: 'retry_with_params' | 'escalate' | 'log_only';
640
+ /** When action=retry_with_params, the param adjustments to apply. */
641
+ retryParams?: Record<string, unknown>;
642
+ /** Max retries with this rule. */
643
+ maxRetries?: number;
644
+ /** Human-readable reason for digest reporting. */
645
+ reason: string;
646
+ }
647
+ interface LoweringSpec {
648
+ /** Where the system prompt goes. */
649
+ system: {
650
+ mode: SystemPromptMode;
651
+ field?: string;
652
+ };
653
+ /** Cache strategy + parameters. */
654
+ cache: {
655
+ strategy: CacheStrategy;
656
+ /** Min tokens before caching is worth it (provider rules). */
657
+ minTokens?: number;
658
+ /** Discount factor on cached input (0.1 = 10% of normal price). */
659
+ discount?: number;
660
+ /** TTL hint in seconds. */
661
+ ttlSeconds?: number;
662
+ };
663
+ /** Tool format identifier — see lower.ts for supported formats. */
664
+ tools?: {
665
+ format: 'anthropic' | 'google' | 'openai' | 'deepseek';
666
+ };
667
+ /** Thinking config — present iff this model has a thinking knob. */
668
+ thinking?: {
669
+ /** Field path on the request. */
670
+ field: string;
671
+ /** Default value when caller hasn't specified. */
672
+ default?: number | 'auto' | 'off';
673
+ };
674
+ }
675
+ interface ModelProfile {
676
+ id: string;
677
+ provider: Provider;
678
+ status: 'current' | 'preview' | 'legacy';
679
+ maxContextTokens: number;
680
+ maxOutputTokens: number;
681
+ maxTools: number;
682
+ parallelToolCalls: boolean;
683
+ structuredOutput: StructuredOutputCapability;
684
+ systemPromptMode: SystemPromptMode;
685
+ streaming: boolean;
686
+ cliffs: CliffRule[];
687
+ costInputPer1m: number;
688
+ costOutputPer1m: number;
689
+ lowering: LoweringSpec;
690
+ recovery: RecoveryRule[];
691
+ strengths: string[];
692
+ weaknesses: string[];
693
+ notes?: string;
694
+ verifiedAgainstDocs?: string;
695
+ /**
696
+ * Hand-curated per-archetype performance score on a 0-10 scale.
697
+ *
698
+ * 10 = frontier on this archetype (e.g. Opus 4.7 on critique)
699
+ * 8 = strong second tier (Sonnet on plan, Pro on extract)
700
+ * 7 = competent (Haiku on classify, Flash on hunt)
701
+ * 5 = acceptable for tolerant archetypes (Flash-Lite on classify)
702
+ * 3 = degraded (Flash on critique, DeepSeek on hunt)
703
+ *
704
+ * Missing archetypes default to `5` (no data, neutral). Each non-default
705
+ * value should carry a one-line rationale in the profile's note or inline
706
+ * comment citing brain evidence, family prior, or "starter hypothesis —
707
+ * verify with telemetry."
708
+ *
709
+ * Source today: hand-curated from master plan §3.3 + §6.2 starter tables.
710
+ * Source tomorrow (alpha.10+): brain `archetype_model_evidence` view.
711
+ *
712
+ * Anti-hallucination guardrail (master plan §2.5): when the watcher's
713
+ * `--audit-fields` flag flags a profile stale (>90 days since
714
+ * verifiedAgainstDocs), the archetypePerf values get re-audited
715
+ * alongside capability fields. AI-trained intuition is NOT a valid
716
+ * source — only docs or brain evidence.
717
+ *
718
+ * alpha.9.
719
+ */
720
+ archetypePerf?: Partial<Record<IntentArchetypeName, number>>;
721
+ }
722
+ declare const ALIASES: Record<string, string>;
723
+ declare function getProfile(id: string): ModelProfile;
724
+ declare function tryGetProfile(id: string): ModelProfile | undefined;
725
+ declare function allProfiles(): readonly ModelProfile[];
726
+ declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
727
+
728
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, type Provider as f, ALIASES as g, type CacheStrategy as h, type CallAttempt as i, CallError as j, type CliffRule as k, type Constraints as l, type Message as m, type MutationApplied as n, type NormalizedTokens as o, type PromptSection as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };