@warmdrift/kgauto-compiler 2.0.0-alpha.8 → 2.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,7 +51,24 @@ var PROFILES_RAW = [
51
51
  ],
52
52
  strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
53
53
  weaknesses: ["cost", "latency"],
54
- notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output."
54
+ notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
55
+ // Frontier perf. Drops on archetypes where parallel-tool throughput
56
+ // (hunt) or low-budget cost-sensitivity (classify/summarize) matters
57
+ // more than reasoning depth.
58
+ archetypePerf: {
59
+ critique: 10,
60
+ plan: 10,
61
+ generate: 9,
62
+ ask: 9,
63
+ extract: 9,
64
+ transform: 9,
65
+ hunt: 8,
66
+ // strong but Flash dominates parallel tool throughput
67
+ summarize: 8,
68
+ // overkill for tolerant archetype; cost-out of frontier
69
+ classify: 8
70
+ // overkill; brain-validated cheaper models cover this
71
+ }
55
72
  },
56
73
  {
57
74
  id: "claude-opus-4-6",
@@ -83,7 +100,20 @@ var PROFILES_RAW = [
83
100
  ],
84
101
  strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
85
102
  weaknesses: ["cost", "latency"],
86
- notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only)."
103
+ notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
104
+ // One notch below 4.7 across the board — extended-thinking edge does
105
+ // not flip any archetype ranking. Legacy: chains should prefer 4.7.
106
+ archetypePerf: {
107
+ critique: 9,
108
+ plan: 9,
109
+ generate: 9,
110
+ ask: 9,
111
+ extract: 9,
112
+ transform: 9,
113
+ hunt: 7,
114
+ summarize: 8,
115
+ classify: 8
116
+ }
87
117
  },
88
118
  {
89
119
  id: "claude-sonnet-4-6",
@@ -107,7 +137,23 @@ var PROFILES_RAW = [
107
137
  ],
108
138
  strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
109
139
  weaknesses: [],
110
- notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output."
140
+ notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
141
+ // Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
142
+ // in starter chains; tier 1 cross-provider for hunt/summarize/classify.
143
+ archetypePerf: {
144
+ ask: 9,
145
+ generate: 9,
146
+ plan: 9,
147
+ critique: 9,
148
+ extract: 9,
149
+ transform: 9,
150
+ hunt: 7,
151
+ // strong but Flash beats on parallel tool throughput
152
+ summarize: 8,
153
+ // overkill for tolerant archetype
154
+ classify: 8
155
+ // overkill
156
+ }
111
157
  },
112
158
  {
113
159
  id: "claude-haiku-4-5",
@@ -137,7 +183,23 @@ var PROFILES_RAW = [
137
183
  ],
138
184
  strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
139
185
  weaknesses: ["complex_reasoning", "large_tool_sets"],
140
- notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`."
186
+ notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
187
+ // Tier 1 cross-provider anchor for short-output chains (classify/
188
+ // summarize/extract/transform). Falls off on plan/critique where
189
+ // reasoning depth matters; competes with Pro on cost+latency.
190
+ archetypePerf: {
191
+ classify: 8,
192
+ summarize: 8,
193
+ ask: 7,
194
+ transform: 7,
195
+ extract: 7,
196
+ hunt: 6,
197
+ // tool reliability drops at 16 — cliff guard fires
198
+ generate: 6,
199
+ plan: 5,
200
+ critique: 4
201
+ // reasoning depth gap vs Sonnet/Opus
202
+ }
141
203
  },
142
204
  // ── Google ──
143
205
  {
@@ -215,7 +277,131 @@ var PROFILES_RAW = [
215
277
  ],
216
278
  strengths: ["speed", "volume", "classification", "1m_context", "cost"],
217
279
  weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
218
- notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs."
280
+ notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
281
+ // Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
282
+ // 15-75 calls/step beats Sonnet — L-040), summarize, classify.
283
+ archetypePerf: {
284
+ hunt: 9,
285
+ // L-040: parallel tool throughput 15-75/step
286
+ classify: 7,
287
+ // brain-validated, 218 rows
288
+ summarize: 7,
289
+ // brain-validated; cliff strips tools when present
290
+ transform: 7,
291
+ ask: 7,
292
+ generate: 6,
293
+ plan: 5,
294
+ extract: 6,
295
+ // alpha.8 MAX_TOKENS history on structured output
296
+ critique: 4
297
+ // reasoning shallower than Sonnet/Opus
298
+ }
299
+ },
300
+ {
301
+ // ── Gemini 2.5 Flash-Lite ──
302
+ // Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
303
+ // it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
304
+ // stable. Positioned BELOW Flash on the cost/perf frontier:
305
+ // input $0.10/M (Flash $0.30/M) — 3× cheaper
306
+ // output $0.40/M (Flash $2.50/M) — 6× cheaper
307
+ // cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
308
+ // Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
309
+ // is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
310
+ // thresholds. The brain will validate/relax these as evidence accumulates
311
+ // per (archetype, model) tuple. Currently ZERO brain rows for this model.
312
+ id: "gemini-2.5-flash-lite",
313
+ verifiedAgainstDocs: "2026-05-13",
314
+ provider: "google",
315
+ status: "current",
316
+ maxContextTokens: 1048576,
317
+ maxOutputTokens: 65536,
318
+ maxTools: 128,
319
+ parallelToolCalls: true,
320
+ structuredOutput: "native",
321
+ systemPromptMode: "separate",
322
+ streaming: true,
323
+ cliffs: [
324
+ {
325
+ metric: "input_tokens",
326
+ threshold: 8e3,
327
+ action: "downgrade_quality_warning",
328
+ reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
329
+ },
330
+ {
331
+ metric: "tool_count",
332
+ threshold: 10,
333
+ action: "drop_to_top_relevant",
334
+ reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
335
+ },
336
+ {
337
+ metric: "thinking_with_short_output",
338
+ threshold: 1,
339
+ action: "force_thinking_budget_zero",
340
+ reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
341
+ },
342
+ {
343
+ // Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
344
+ // trust artifact, kgauto commit 3872832). Flash-Lite shares the
345
+ // same architectural family — almost certainly inherits this cliff.
346
+ // Ship the guard preemptively; brain telemetry confirms or relaxes.
347
+ metric: "tool_count",
348
+ threshold: 1,
349
+ whenIntent: "summarize",
350
+ action: "strip_tools",
351
+ reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
352
+ }
353
+ ],
354
+ costInputPer1m: 0.1,
355
+ costOutputPer1m: 0.4,
356
+ lowering: {
357
+ ...GOOGLE_LOWERING_BASE,
358
+ // Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
359
+ // $0.10/M input. Material for repeat-prompt workloads (classify shape).
360
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
361
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
362
+ },
363
+ recovery: [
364
+ {
365
+ signal: "empty_response_after_tool",
366
+ action: "retry_with_params",
367
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
368
+ maxRetries: 1,
369
+ reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
370
+ },
371
+ {
372
+ signal: "empty_response",
373
+ action: "retry_with_params",
374
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
375
+ maxRetries: 1,
376
+ reason: "Empty response \u2014 try with thinking off."
377
+ },
378
+ {
379
+ signal: "malformed_function_call",
380
+ action: "escalate",
381
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
382
+ }
383
+ ],
384
+ strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
385
+ weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
386
+ notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
387
+ // Tier 3 emergency floor for summarize/classify chains. ZERO brain
388
+ // rows — all values are starter hypotheses anchored to "smaller
389
+ // sibling of Flash, at-or-below Flash perf on every archetype." The
390
+ // first 50 brain rows per archetype will validate or relax these.
391
+ archetypePerf: {
392
+ classify: 6,
393
+ // starter hypothesis — verify (Flash is 7, lite likely ≤)
394
+ summarize: 6,
395
+ // starter hypothesis — verify; cliff strips tools
396
+ transform: 6,
397
+ // starter hypothesis — verify
398
+ ask: 5,
399
+ hunt: 5,
400
+ generate: 4,
401
+ extract: 4,
402
+ plan: 3,
403
+ critique: 3
404
+ }
219
405
  },
220
406
  {
221
407
  id: "gemini-2.5-pro",
@@ -251,7 +437,21 @@ var PROFILES_RAW = [
251
437
  }
252
438
  ],
253
439
  strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
254
- weaknesses: ["pricing_above_200k"]
440
+ weaknesses: ["pricing_above_200k"],
441
+ // Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
442
+ // Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
443
+ archetypePerf: {
444
+ critique: 9,
445
+ plan: 9,
446
+ ask: 8,
447
+ generate: 8,
448
+ extract: 8,
449
+ transform: 8,
450
+ hunt: 8,
451
+ // tier 1 cross-provider for hunt chain
452
+ summarize: 7,
453
+ classify: 7
454
+ }
255
455
  },
256
456
  {
257
457
  id: "gemini-3.1-pro-preview",
@@ -289,7 +489,23 @@ var PROFILES_RAW = [
289
489
  ],
290
490
  strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
291
491
  weaknesses: ["cost", "preview_status", "pricing_above_200k"],
292
- notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA."
492
+ notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
493
+ // Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
494
+ // coding / reasoning per Google's release notes. Preview status:
495
+ // chains should stay on 2.5 Pro until GA. Starter hypothesis.
496
+ archetypePerf: {
497
+ critique: 10,
498
+ // Google claims step-change on reasoning
499
+ plan: 10,
500
+ ask: 9,
501
+ generate: 9,
502
+ extract: 9,
503
+ transform: 8,
504
+ hunt: 9,
505
+ // step-change agentic per Google
506
+ summarize: 8,
507
+ classify: 7
508
+ }
293
509
  },
294
510
  // ── DeepSeek ──
295
511
  // 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
@@ -329,7 +545,24 @@ var PROFILES_RAW = [
329
545
  ],
330
546
  strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
331
547
  weaknesses: ["parallel_tools", "large_tool_sets"],
332
- notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES."
548
+ notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
549
+ // Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
550
+ // classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
551
+ // Falls off on hunt (sequential tools — L-040) and reasoning depth.
552
+ archetypePerf: {
553
+ classify: 7,
554
+ // brain-validated, 169 rows
555
+ summarize: 7,
556
+ // archetype-tolerant, no brain evidence yet
557
+ ask: 6,
558
+ transform: 6,
559
+ generate: 5,
560
+ plan: 5,
561
+ extract: 5,
562
+ critique: 4,
563
+ hunt: 4
564
+ // sequential tool calls only — L-040
565
+ }
333
566
  },
334
567
  {
335
568
  id: "deepseek-v4-pro",
@@ -365,7 +598,22 @@ var PROFILES_RAW = [
365
598
  ],
366
599
  strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
367
600
  weaknesses: ["parallel_tools", "large_tool_sets"],
368
- notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking."
601
+ notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
602
+ // Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
603
+ // bumped one notch over V4-Flash; same parallel-tool ceiling.
604
+ archetypePerf: {
605
+ plan: 7,
606
+ // §3.3 tier 3 for plan
607
+ critique: 6,
608
+ ask: 7,
609
+ generate: 6,
610
+ classify: 7,
611
+ summarize: 7,
612
+ extract: 6,
613
+ transform: 6,
614
+ hunt: 4
615
+ // sequential tools — same as V4-Flash
616
+ }
369
617
  }
370
618
  ];
371
619
  var ALIASES = {
package/dist/index.d.mts CHANGED
@@ -1,6 +1,7 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-Py8c7zjJ.mjs';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
3
- export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-NUZOIzGr.mjs';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-NUZOIzGr.mjs';
3
+ import { IntentArchetypeName } from './dialect.mjs';
4
+ export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
4
5
 
5
6
  /**
6
7
  * compile() — the main orchestrator.
@@ -275,10 +276,91 @@ declare function countTokens(text: string): number;
275
276
  /** Subset of CompileResult fields the advisor needs. */
276
277
  type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
277
278
  /**
278
- * Run all Phase 1 rules and return collected advisories. Order is fixed
279
- * (same as the rule list above) so output is stable across runs.
279
+ * Run all phased rules and return collected advisories. Order is fixed so
280
+ * output is stable across runs. The `policy` argument is alpha.9 the
281
+ * `single-model-array` rule needs to know whether the consumer explicitly
282
+ * declared `posture: 'locked'` (in which case single-model is intentional
283
+ * and shouldn't warn).
280
284
  */
281
- declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
285
+ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile, policy?: CompilePolicy): BestPracticeAdvisory[];
286
+
287
+ /**
288
+ * getDefaultFallbackChain — the alpha.9 cascading ship.
289
+ *
290
+ * Returns a per-archetype fallback chain that walks the cost/performance
291
+ * Pareto frontier (master plan §1.3 + §3). Three customer postures:
292
+ *
293
+ * locked — caller passes [theOneModel]; never call this function
294
+ * preferred — caller passes `primary`; chain returned is [primary, ...fallbacks]
295
+ * open — caller passes no `primary`; chain returned is [best, ...fallbacks]
296
+ *
297
+ * The chain at each step:
298
+ * 1. Costs strictly less than the previous (no expensive sideways moves)
299
+ * 2. Comes from a different provider than the previous step where possible
300
+ * (correlated outages don't kill consecutive attempts)
301
+ * 3. Stays above the archetype's perf floor (skip models scored <baseline
302
+ * for archetypes where degradation would be unacceptable)
303
+ *
304
+ * In alpha.9 the chain is **hand-curated** per archetype (§3.3 starter
305
+ * table). Brain-query mode lands in alpha.10. Policy.blockedModels filters
306
+ * the result; policy.maxCostPerCallUsd is NOT applied here because the
307
+ * function doesn't see the IR's token counts — that filtering happens at
308
+ * `passScoreTargets()` time inside compile().
309
+ *
310
+ * The function is **pure** — no brain query, no I/O, no randomness. Same
311
+ * inputs always produce the same chain.
312
+ */
313
+
314
+ /**
315
+ * Posture passed into `getDefaultFallbackChain`. The chain function only
316
+ * sees `'open'` and `'preferred'` — callers in `'locked'` posture should
317
+ * pass `models: [theOneModel]` directly and skip this function entirely.
318
+ *
319
+ * Equivalent to `CompilePolicy.posture` minus `'locked'`. Kept distinct so
320
+ * the type system enforces "don't ask for a chain when you don't want one."
321
+ */
322
+ type FallbackPosture = 'open' | 'preferred';
323
+ interface GetDefaultFallbackChainOpts {
324
+ /** The archetype the call is performing. Drives chain shape. */
325
+ archetype: IntentArchetypeName;
326
+ /**
327
+ * The user-selected or caller-anchored primary model. When provided, it
328
+ * appears at position 0 of the returned chain and fallbacks follow.
329
+ * When omitted, the function picks the best-perf model for the archetype
330
+ * as position 0 (open posture).
331
+ */
332
+ primary?: string;
333
+ /**
334
+ * Informational. `'preferred'` and `'open'` produce the same chain shape
335
+ * given the same `primary`/no-primary input — posture is a tag the brain
336
+ * uses to distinguish "user-anchored" from "library-anchored" telemetry.
337
+ */
338
+ posture?: FallbackPosture;
339
+ /**
340
+ * Cap on chain length. Default 3. Min 1. Useful when the consumer wants
341
+ * to keep the worst-case latency low (each fallback adds a round-trip).
342
+ */
343
+ maxDepth?: number;
344
+ /**
345
+ * Consumer-side gating. `blockedModels` are filtered from the chain.
346
+ * `preferredModels` is informational (no boost applied at this layer —
347
+ * compile()'s `passScoreTargets` handles preference ranking).
348
+ * `maxCostPerCallUsd` is NOT applied here — needs IR-level token
349
+ * estimation. Use compile()'s policy plumbing instead.
350
+ */
351
+ policy?: CompilePolicy;
352
+ }
353
+ declare function getDefaultFallbackChain(opts: GetDefaultFallbackChainOpts): string[];
354
+ /**
355
+ * Returns a shallow copy of the hand-curated starter chain for an archetype.
356
+ * Useful for tests + the `scripts/digest.mjs` operator readout.
357
+ */
358
+ declare function getStarterChain(archetype: IntentArchetypeName): string[];
359
+ /**
360
+ * Returns a shallow copy of all starter chains keyed by archetype.
361
+ * Useful for the `digest.mjs` readout and consumer audits.
362
+ */
363
+ declare function getAllStarterChains(): Record<IntentArchetypeName, string[]>;
282
364
 
283
365
  /**
284
366
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
@@ -326,4 +408,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
326
408
  */
327
409
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
328
410
 
329
- export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
411
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type FallbackPosture, type GetDefaultFallbackChainOpts, IntentArchetypeName, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, getAllStarterChains, getDefaultFallbackChain, getStarterChain, record, resetTokenizer, runAdvisor, setTokenizer };
package/dist/index.d.ts CHANGED
@@ -1,6 +1,7 @@
1
- import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-B3eNQ2py.js';
2
- export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
3
- export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
1
+ import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-BYVOc1eW.js';
2
+ export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BYVOc1eW.js';
3
+ import { IntentArchetypeName } from './dialect.js';
4
+ export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
4
5
 
5
6
  /**
6
7
  * compile() — the main orchestrator.
@@ -275,10 +276,91 @@ declare function countTokens(text: string): number;
275
276
  /** Subset of CompileResult fields the advisor needs. */
276
277
  type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
277
278
  /**
278
- * Run all Phase 1 rules and return collected advisories. Order is fixed
279
- * (same as the rule list above) so output is stable across runs.
279
+ * Run all phased rules and return collected advisories. Order is fixed so
280
+ * output is stable across runs. The `policy` argument is alpha.9 the
281
+ * `single-model-array` rule needs to know whether the consumer explicitly
282
+ * declared `posture: 'locked'` (in which case single-model is intentional
283
+ * and shouldn't warn).
280
284
  */
281
- declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
285
+ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile, policy?: CompilePolicy): BestPracticeAdvisory[];
286
+
287
+ /**
288
+ * getDefaultFallbackChain — the alpha.9 cascading ship.
289
+ *
290
+ * Returns a per-archetype fallback chain that walks the cost/performance
291
+ * Pareto frontier (master plan §1.3 + §3). Three customer postures:
292
+ *
293
+ * locked — caller passes [theOneModel]; never call this function
294
+ * preferred — caller passes `primary`; chain returned is [primary, ...fallbacks]
295
+ * open — caller passes no `primary`; chain returned is [best, ...fallbacks]
296
+ *
297
+ * The chain at each step:
298
+ * 1. Costs strictly less than the previous (no expensive sideways moves)
299
+ * 2. Comes from a different provider than the previous step where possible
300
+ * (correlated outages don't kill consecutive attempts)
301
+ * 3. Stays above the archetype's perf floor (skip models scored <baseline
302
+ * for archetypes where degradation would be unacceptable)
303
+ *
304
+ * In alpha.9 the chain is **hand-curated** per archetype (§3.3 starter
305
+ * table). Brain-query mode lands in alpha.10. Policy.blockedModels filters
306
+ * the result; policy.maxCostPerCallUsd is NOT applied here because the
307
+ * function doesn't see the IR's token counts — that filtering happens at
308
+ * `passScoreTargets()` time inside compile().
309
+ *
310
+ * The function is **pure** — no brain query, no I/O, no randomness. Same
311
+ * inputs always produce the same chain.
312
+ */
313
+
314
+ /**
315
+ * Posture passed into `getDefaultFallbackChain`. The chain function only
316
+ * sees `'open'` and `'preferred'` — callers in `'locked'` posture should
317
+ * pass `models: [theOneModel]` directly and skip this function entirely.
318
+ *
319
+ * Equivalent to `CompilePolicy.posture` minus `'locked'`. Kept distinct so
320
+ * the type system enforces "don't ask for a chain when you don't want one."
321
+ */
322
+ type FallbackPosture = 'open' | 'preferred';
323
+ interface GetDefaultFallbackChainOpts {
324
+ /** The archetype the call is performing. Drives chain shape. */
325
+ archetype: IntentArchetypeName;
326
+ /**
327
+ * The user-selected or caller-anchored primary model. When provided, it
328
+ * appears at position 0 of the returned chain and fallbacks follow.
329
+ * When omitted, the function picks the best-perf model for the archetype
330
+ * as position 0 (open posture).
331
+ */
332
+ primary?: string;
333
+ /**
334
+ * Informational. `'preferred'` and `'open'` produce the same chain shape
335
+ * given the same `primary`/no-primary input — posture is a tag the brain
336
+ * uses to distinguish "user-anchored" from "library-anchored" telemetry.
337
+ */
338
+ posture?: FallbackPosture;
339
+ /**
340
+ * Cap on chain length. Default 3. Min 1. Useful when the consumer wants
341
+ * to keep the worst-case latency low (each fallback adds a round-trip).
342
+ */
343
+ maxDepth?: number;
344
+ /**
345
+ * Consumer-side gating. `blockedModels` are filtered from the chain.
346
+ * `preferredModels` is informational (no boost applied at this layer —
347
+ * compile()'s `passScoreTargets` handles preference ranking).
348
+ * `maxCostPerCallUsd` is NOT applied here — needs IR-level token
349
+ * estimation. Use compile()'s policy plumbing instead.
350
+ */
351
+ policy?: CompilePolicy;
352
+ }
353
+ declare function getDefaultFallbackChain(opts: GetDefaultFallbackChainOpts): string[];
354
+ /**
355
+ * Returns a shallow copy of the hand-curated starter chain for an archetype.
356
+ * Useful for tests + the `scripts/digest.mjs` operator readout.
357
+ */
358
+ declare function getStarterChain(archetype: IntentArchetypeName): string[];
359
+ /**
360
+ * Returns a shallow copy of all starter chains keyed by archetype.
361
+ * Useful for the `digest.mjs` readout and consumer audits.
362
+ */
363
+ declare function getAllStarterChains(): Record<IntentArchetypeName, string[]>;
282
364
 
283
365
  /**
284
366
  * @warmdrift/kgauto v2 — prompt compiler + central learning brain.
@@ -326,4 +408,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
326
408
  */
327
409
  declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
328
410
 
329
- export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
411
+ export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type FallbackPosture, type GetDefaultFallbackChainOpts, IntentArchetypeName, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, getAllStarterChains, getDefaultFallbackChain, getStarterChain, record, resetTokenizer, runAdvisor, setTokenizer };