@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,489 +0,0 @@
1
- import { IntentArchetypeName } from './dialect.js';
2
-
3
- /**
4
- * Intermediate Representation — the structured form of a prompt.
5
- *
6
- * Everything kgauto v2 does, it does on the IR. The IR is constructed by the
7
- * caller (or by `parse()` from a string-style prompt for backwards-compat),
8
- * transformed by the compiler passes, and lowered to a target-specific wire
9
- * request only at the very end.
10
- *
11
- * The IR carries STRUCTURE, not just text. Sections are first-class. Tools
12
- * carry per-intent relevance. History knows its turn-age. Constraints are
13
- * explicit. This is what lets passes do real work.
14
- */
15
-
16
- /**
17
- * A semantically-named section of the system prompt. Sections enable
18
- * intent-aware slicing (drop sections not tagged for this intent), dedupe
19
- * (collapse identical sections across files), and cache marking (identify
20
- * the stable prefix).
21
- */
22
- interface PromptSection {
23
- /** Stable identifier — used for slicing, dedupe, and cache markers. */
24
- id: string;
25
- /** Section text. */
26
- text: string;
27
- /**
28
- * Which intents this section applies to. Empty = applies to all intents.
29
- * Pass `compile()` will drop sections whose intents array doesn't include
30
- * the current intent.
31
- */
32
- intents?: IntentArchetypeName[];
33
- /**
34
- * If true, this section is part of the stable cacheable prefix. The lower
35
- * pass uses this to place cache markers correctly per target.
36
- */
37
- cacheable?: boolean;
38
- /**
39
- * Section weight when ordering — lower = earlier in the assembled prompt.
40
- * Defaults to insertion order.
41
- */
42
- weight?: number;
43
- }
44
- interface ToolDefinition {
45
- name: string;
46
- description?: string;
47
- parameters?: Record<string, unknown>;
48
- /**
49
- * Per-intent relevance scores. Compile uses these to drop irrelevant tools.
50
- * Missing intents default to 0.5 (neutral).
51
- */
52
- relevanceByIntent?: Partial<Record<IntentArchetypeName, number>>;
53
- /** Pass-through for provider-specific fields (Anthropic input_schema, etc.). */
54
- [key: string]: unknown;
55
- }
56
- interface Message {
57
- role: 'system' | 'user' | 'assistant' | 'tool';
58
- content: string;
59
- /** Optional structured parts (tool calls, results) — passed through to lowering. */
60
- parts?: unknown[];
61
- /** For tool messages — which tool this corresponds to. */
62
- toolName?: string;
63
- /** For tool messages — the call id. */
64
- toolCallId?: string;
65
- }
66
- /**
67
- * The compile-time intent declaration. `name` is the app's local label;
68
- * `archetype` is the canonical dialect-v1 archetype the app maps it to.
69
- *
70
- * Apps with their own intent vocabulary (tt-intelligence's "ask"/"hunt"/
71
- * "dashboard") declare the mapping here. The brain learns by archetype, not
72
- * by app-local name.
73
- */
74
- interface IntentDeclaration {
75
- /** App-local intent name (free-form, for app's own debugging). */
76
- name: string;
77
- /** Canonical dialect-v1 archetype. Required for cross-app learning. */
78
- archetype: IntentArchetypeName;
79
- }
80
- interface Constraints {
81
- /** Hard latency ceiling — compiler will down-rank slow models. Advisory. */
82
- maxLatencyMs?: number;
83
- /** Hard cost ceiling per call (USD). Advisory. */
84
- maxCostUsd?: number;
85
- /** Caller wants structured (JSON) output. */
86
- structuredOutput?: boolean;
87
- /** Hint: caller expects a short response (used to disable thinking on Gemini). */
88
- expectedShortOutput?: boolean;
89
- /** Hint: max response words. */
90
- maxResponseWords?: number;
91
- /** Override target model selection — if set, compiler uses this instead of routing. */
92
- forceModel?: string;
93
- }
94
- /**
95
- * Consumer-declared policy for model selection. Lives outside the IR
96
- * (passed via CompileOptions) because it's a SESSION/APP-level constraint,
97
- * not a per-call shape.
98
- *
99
- * The original tt-intelligence scenario (s11): user capped Anthropic
100
- * spending on Sonnet for cost reasons. v2 compile() kept picking Sonnet
101
- * as the best target, Hunter's preflight hit the cap and fell back to
102
- * Flash — every single call. CompilePolicy.blockedModels lets the
103
- * consumer tell kgauto "don't pick Sonnet right now" and the compiler
104
- * routes to the next-best option directly. No wasted preflight tax.
105
- *
106
- * This is the "coach knows the constraints" feature — kgauto stops
107
- * recommending things the consumer has already ruled out.
108
- */
109
- interface CompilePolicy {
110
- /**
111
- * Model IDs the consumer has gated. Compile() will never select these.
112
- * Use for: cost caps, account-level rate limits, "this model is broken
113
- * for our workload" decisions.
114
- */
115
- blockedModels?: string[];
116
- /**
117
- * Hard ceiling on estimated input cost per call (USD). Models whose
118
- * estimated cost exceeds this are rejected. Use for: budget enforcement
119
- * on high-volume routes.
120
- */
121
- maxCostPerCallUsd?: number;
122
- /**
123
- * Model IDs the consumer prefers. When multiple models fit, preferred
124
- * models get a rank boost (large enough to overcome small quality
125
- * differences but not large enough to override hard rejects).
126
- */
127
- preferredModels?: string[];
128
- }
129
- /**
130
- * The IR — the input to compile().
131
- */
132
- interface PromptIR {
133
- /** App identifier — required for multi-tenant brain. */
134
- appId: string;
135
- /** Intent declaration — what is this call doing? */
136
- intent: IntentDeclaration;
137
- /** Structured system prompt sections. */
138
- sections: PromptSection[];
139
- /** Available tools (compiler may drop based on intent relevance + budget). */
140
- tools?: ToolDefinition[];
141
- /** Conversation history (compiler may compress old turns). */
142
- history?: Message[];
143
- /** The user's current turn — never dropped. */
144
- currentTurn?: Message;
145
- /** Allowed model IDs, in caller-preference order. Compiler picks among these. */
146
- models: string[];
147
- /** Compile constraints. */
148
- constraints?: Constraints;
149
- }
150
- type Provider = 'anthropic' | 'google' | 'openai' | 'deepseek' | 'mistral' | 'xai';
151
- /**
152
- * Mutation IDs that fired during compile. Empty in v1 (no mutation engine
153
- * yet). Populated when the brain is online and pushing mutations.
154
- */
155
- type MutationApplied = {
156
- id: string;
157
- source: string;
158
- passName: string;
159
- description: string;
160
- };
161
- /**
162
- * Target-specific wire request. Shape varies by provider — caller passes the
163
- * right field to the right SDK.
164
- */
165
- type CompiledRequest = {
166
- provider: 'anthropic';
167
- model: string;
168
- system: Array<{
169
- type: 'text';
170
- text: string;
171
- cache_control?: {
172
- type: 'ephemeral';
173
- };
174
- }>;
175
- messages: Array<{
176
- role: string;
177
- content: unknown;
178
- }>;
179
- tools?: unknown[];
180
- max_tokens?: number;
181
- } | {
182
- provider: 'google';
183
- model: string;
184
- systemInstruction?: {
185
- role: 'system';
186
- parts: Array<{
187
- text: string;
188
- }>;
189
- };
190
- contents: Array<{
191
- role: string;
192
- parts: unknown[];
193
- }>;
194
- tools?: unknown[];
195
- generationConfig?: Record<string, unknown>;
196
- cachedContent?: string;
197
- } | {
198
- provider: 'openai';
199
- model: string;
200
- messages: Array<{
201
- role: string;
202
- content: unknown;
203
- }>;
204
- tools?: unknown[];
205
- response_format?: unknown;
206
- reasoning_effort?: string;
207
- } | {
208
- provider: 'deepseek';
209
- model: string;
210
- messages: Array<{
211
- role: string;
212
- content: unknown;
213
- }>;
214
- tools?: unknown[];
215
- };
216
- interface CompileResult {
217
- /** Unique handle for this call — pass to record() to correlate the outcome. */
218
- handle: string;
219
- /** Selected target model id. */
220
- target: string;
221
- /** Selected provider. */
222
- provider: Provider;
223
- /** The wire request — pass the appropriate fields to your SDK. */
224
- request: CompiledRequest;
225
- /** Estimated tokens (input). */
226
- tokensIn: number;
227
- /** Estimated cost in USD (input portion). */
228
- estimatedCostUsd: number;
229
- /** Mutations that fired during compile (informational). */
230
- mutationsApplied: MutationApplied[];
231
- /** Fallback chain — try these in order if target fails. */
232
- fallbackChain: string[];
233
- /** Diagnostics for caller-side logging. */
234
- diagnostics: {
235
- sectionsKept: number;
236
- sectionsDropped: number;
237
- toolsKept: number;
238
- toolsDropped: number;
239
- historyKept: number;
240
- historyDropped: number;
241
- cacheableTokens: number;
242
- estimatedCacheSavingsUsd: number;
243
- };
244
- }
245
- /**
246
- * Token usage normalized across providers. `cached` and `cacheCreated` are
247
- * Anthropic prompt-cache reads/writes (Gemini implicit caching populates
248
- * `cached` from `usageMetadata.cachedContentTokenCount`; OpenAI populates
249
- * from `prompt_tokens_details.cached_tokens`).
250
- */
251
- interface NormalizedTokens {
252
- input: number;
253
- output: number;
254
- total: number;
255
- cached?: number;
256
- cacheCreated?: number;
257
- }
258
- /**
259
- * Tool call in a provider-agnostic shape. Anthropic `tool_use` blocks,
260
- * Google `functionCall` parts, and OpenAI/DeepSeek `tool_calls[]` all
261
- * collapse to this.
262
- */
263
- interface ToolCall {
264
- id: string;
265
- name: string;
266
- args: Record<string, unknown>;
267
- }
268
- interface NormalizedResponse {
269
- /** Main text body. Empty string if response had no text content. */
270
- text: string;
271
- /**
272
- * Parsed structured output. Populated when ir.constraints.structuredOutput
273
- * is true and JSON.parse(text) succeeds. Null otherwise.
274
- */
275
- structuredOutput: unknown | null;
276
- /** Tool calls in normalized shape. Empty array if none. */
277
- toolCalls: ToolCall[];
278
- tokens: NormalizedTokens;
279
- /** Provider-specific finish reason, passed through unchanged. */
280
- finishReason?: string;
281
- /** Untouched provider response — escape hatch for consumers needing fields not yet normalized. */
282
- raw: unknown;
283
- /** Set when structuredOutput parsing was attempted and failed. */
284
- parseError?: string;
285
- }
286
- interface ApiKeys {
287
- anthropic?: string;
288
- google?: string;
289
- openai?: string;
290
- deepseek?: string;
291
- }
292
- /**
293
- * Per-provider override fields shallow-merged into the lowered request before
294
- * execution. Lets consumers reach Gemini `safetySettings`, Anthropic
295
- * `tool_choice`, OpenAI `seed` etc. without bypassing kgauto.
296
- */
297
- interface ProviderOverrides {
298
- anthropic?: Record<string, unknown>;
299
- google?: Record<string, unknown>;
300
- openai?: Record<string, unknown>;
301
- deepseek?: Record<string, unknown>;
302
- }
303
- interface CallOptions {
304
- /** Forwarded to compile(). */
305
- policy?: CompilePolicy;
306
- toolRelevanceThreshold?: number;
307
- compressHistoryAfter?: number;
308
- /** Override API keys (defaults: process.env). */
309
- apiKeys?: ApiKeys;
310
- /** Provider-specific request fields shallow-merged into the lowered request. */
311
- providerOverrides?: ProviderOverrides;
312
- /** Override fetch (for tests). */
313
- fetchImpl?: typeof fetch;
314
- /** Disable retry/fallback walk on retryable errors. Default: enabled. */
315
- noFallback?: boolean;
316
- }
317
- interface CallAttempt {
318
- model: string;
319
- status: 'success' | 'retryable' | 'terminal';
320
- errorCode?: string;
321
- message?: string;
322
- }
323
- interface CallResult {
324
- /** Compile handle (still valid for record() if consumer wants to add oracle scores later). */
325
- handle: string;
326
- /** The model that ACTUALLY served the response (post-fallback). */
327
- actualModel: string;
328
- /** What compile() originally targeted. */
329
- requestedModel: string;
330
- provider: Provider;
331
- response: NormalizedResponse;
332
- latencyMs: number;
333
- /** Mutations that fired during compile (informational, mirrors CompileResult.mutationsApplied). */
334
- mutationsApplied: MutationApplied[];
335
- /** One entry per provider attempt — observability for retry/fallback walks. */
336
- attempts: CallAttempt[];
337
- }
338
- /**
339
- * Thrown when call() exhausts the fallback chain without success.
340
- * `attempts` carries every model tried + classification.
341
- */
342
- declare class CallError extends Error {
343
- readonly attempts: CallAttempt[];
344
- readonly lastErrorCode?: string;
345
- readonly lastStatus?: number;
346
- constructor(message: string, attempts: CallAttempt[], lastStatus?: number, lastErrorCode?: string);
347
- }
348
- interface OracleScore {
349
- /** 0..1 overall quality. */
350
- score: number;
351
- /** Optional per-dimension breakdown. */
352
- dimensions?: Record<string, number>;
353
- /** Free-form explanation for debugging. */
354
- rationale?: string;
355
- }
356
- interface RecordInput {
357
- /** Handle from CompileResult. */
358
- handle: string;
359
- /** Actual tokens consumed (post-call). */
360
- tokensIn: number;
361
- tokensOut: number;
362
- /** Wall-clock latency in ms. */
363
- latencyMs: number;
364
- /** True iff the call returned a usable response. */
365
- success: boolean;
366
- /** True iff the call returned 0 output tokens despite success. */
367
- emptyResponse?: boolean;
368
- /** Provider error code if any. */
369
- errorType?: string;
370
- /** Tools actually invoked by the model. */
371
- toolsCalled?: string[];
372
- /** Oracle quality score — required for learning to fire. */
373
- oracleScore?: OracleScore;
374
- /** Optional: scrubbed prompt/response previews for debugging. */
375
- promptPreview?: string;
376
- responsePreview?: string;
377
- /**
378
- * The model that ACTUALLY RAN. Set this when consumer-side fallback ran
379
- * a different model than v2 compile() targeted. Brain stores this as
380
- * `model` (the truth) and the original target as `requested_model`.
381
- *
382
- * Omit when no fallback occurred — brain stores compile target as `model`
383
- * (still the truth in that case) and `requested_model` stays NULL.
384
- *
385
- * s11 fix: prevents the brain from misattributing fallback traffic to
386
- * the originally-requested model.
387
- */
388
- actualModel?: string;
389
- }
390
-
391
- /**
392
- * Model profiles — executable knowledge about each provider/model.
393
- *
394
- * Unlike v1 which carried `known_failures` as prose strings, v2 makes them
395
- * executable: cliffs trigger guards, lowering describes the wire format,
396
- * recovery handlers describe what to do after specific failures.
397
- *
398
- * Each profile is the answer to "if I want to call THIS model with THIS
399
- * shape of work, what does it need from me, and what should I do when it
400
- * fails?"
401
- */
402
-
403
- type StructuredOutputCapability = 'native' | 'grammar' | 'none';
404
- type SystemPromptMode = 'inline' | 'separate' | 'as_developer' | 'unsupported';
405
- type CacheStrategy = 'cache_control' | 'cachedContent' | 'unsupported';
406
- interface CliffRule {
407
- /** What metric triggers this cliff. */
408
- metric: 'input_tokens' | 'tool_count' | 'history_turns' | 'thinking_with_short_output';
409
- /** Threshold — meaning depends on metric. */
410
- threshold: number;
411
- /** What action to take when triggered. */
412
- action: 'downgrade_quality_warning' | 'drop_to_top_relevant' | 'force_thinking_budget_zero' | 'force_terse_output' | 'escalate_target' | 'strip_tools';
413
- /**
414
- * Optional: only fire this cliff when the IR's intent.archetype matches.
415
- * Used for archetype-specific failure modes (e.g. Gemini Flash returns
416
- * empty when summarize is offered tools).
417
- */
418
- whenIntent?: IntentArchetypeName;
419
- /** Human-readable reason for digest reporting. */
420
- reason: string;
421
- }
422
- interface RecoveryRule {
423
- /** What signal triggers recovery. */
424
- signal: 'empty_response_after_tool' | 'empty_response' | 'malformed_function_call' | 'rate_limit' | 'model_not_found' | 'context_overflow';
425
- /** Action: retry with adjusted params, or escalate to next fallback. */
426
- action: 'retry_with_params' | 'escalate' | 'log_only';
427
- /** When action=retry_with_params, the param adjustments to apply. */
428
- retryParams?: Record<string, unknown>;
429
- /** Max retries with this rule. */
430
- maxRetries?: number;
431
- /** Human-readable reason for digest reporting. */
432
- reason: string;
433
- }
434
- interface LoweringSpec {
435
- /** Where the system prompt goes. */
436
- system: {
437
- mode: SystemPromptMode;
438
- field?: string;
439
- };
440
- /** Cache strategy + parameters. */
441
- cache: {
442
- strategy: CacheStrategy;
443
- /** Min tokens before caching is worth it (provider rules). */
444
- minTokens?: number;
445
- /** Discount factor on cached input (0.1 = 10% of normal price). */
446
- discount?: number;
447
- /** TTL hint in seconds. */
448
- ttlSeconds?: number;
449
- };
450
- /** Tool format identifier — see lower.ts for supported formats. */
451
- tools?: {
452
- format: 'anthropic' | 'google' | 'openai' | 'deepseek';
453
- };
454
- /** Thinking config — present iff this model has a thinking knob. */
455
- thinking?: {
456
- /** Field path on the request. */
457
- field: string;
458
- /** Default value when caller hasn't specified. */
459
- default?: number | 'auto' | 'off';
460
- };
461
- }
462
- interface ModelProfile {
463
- id: string;
464
- provider: Provider;
465
- status: 'current' | 'preview' | 'legacy';
466
- maxContextTokens: number;
467
- maxOutputTokens: number;
468
- maxTools: number;
469
- parallelToolCalls: boolean;
470
- structuredOutput: StructuredOutputCapability;
471
- systemPromptMode: SystemPromptMode;
472
- streaming: boolean;
473
- cliffs: CliffRule[];
474
- costInputPer1m: number;
475
- costOutputPer1m: number;
476
- lowering: LoweringSpec;
477
- recovery: RecoveryRule[];
478
- strengths: string[];
479
- weaknesses: string[];
480
- notes?: string;
481
- verifiedAgainstDocs?: string;
482
- }
483
- declare const ALIASES: Record<string, string>;
484
- declare function getProfile(id: string): ModelProfile;
485
- declare function tryGetProfile(id: string): ModelProfile | undefined;
486
- declare function allProfiles(): readonly ModelProfile[];
487
- declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
488
-
489
- export { type ApiKeys as A, type CompilePolicy as C, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };