@warmdrift/kgauto-compiler 2.0.0-alpha.8 → 2.0.0-alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MBEI5UOM.mjs → chunk-3KVKELZN.mjs} +257 -9
- package/dist/index.d.mts +89 -7
- package/dist/index.d.ts +89 -7
- package/dist/index.js +417 -12
- package/dist/index.mjs +158 -4
- package/dist/{profiles-B3eNQ2py.d.ts → profiles-BYVOc1eW.d.ts} +82 -1
- package/dist/{profiles-Py8c7zjJ.d.mts → profiles-NUZOIzGr.d.mts} +82 -1
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/profiles.js +257 -9
- package/dist/profiles.mjs +1 -1
- package/package.json +1 -1
|
@@ -51,7 +51,24 @@ var PROFILES_RAW = [
|
|
|
51
51
|
],
|
|
52
52
|
strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
|
|
53
53
|
weaknesses: ["cost", "latency"],
|
|
54
|
-
notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output."
|
|
54
|
+
notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
|
|
55
|
+
// Frontier perf. Drops on archetypes where parallel-tool throughput
|
|
56
|
+
// (hunt) or low-budget cost-sensitivity (classify/summarize) matters
|
|
57
|
+
// more than reasoning depth.
|
|
58
|
+
archetypePerf: {
|
|
59
|
+
critique: 10,
|
|
60
|
+
plan: 10,
|
|
61
|
+
generate: 9,
|
|
62
|
+
ask: 9,
|
|
63
|
+
extract: 9,
|
|
64
|
+
transform: 9,
|
|
65
|
+
hunt: 8,
|
|
66
|
+
// strong but Flash dominates parallel tool throughput
|
|
67
|
+
summarize: 8,
|
|
68
|
+
// overkill for tolerant archetype; cost-out of frontier
|
|
69
|
+
classify: 8
|
|
70
|
+
// overkill; brain-validated cheaper models cover this
|
|
71
|
+
}
|
|
55
72
|
},
|
|
56
73
|
{
|
|
57
74
|
id: "claude-opus-4-6",
|
|
@@ -83,7 +100,20 @@ var PROFILES_RAW = [
|
|
|
83
100
|
],
|
|
84
101
|
strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
|
|
85
102
|
weaknesses: ["cost", "latency"],
|
|
86
|
-
notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only)."
|
|
103
|
+
notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
|
|
104
|
+
// One notch below 4.7 across the board — extended-thinking edge does
|
|
105
|
+
// not flip any archetype ranking. Legacy: chains should prefer 4.7.
|
|
106
|
+
archetypePerf: {
|
|
107
|
+
critique: 9,
|
|
108
|
+
plan: 9,
|
|
109
|
+
generate: 9,
|
|
110
|
+
ask: 9,
|
|
111
|
+
extract: 9,
|
|
112
|
+
transform: 9,
|
|
113
|
+
hunt: 7,
|
|
114
|
+
summarize: 8,
|
|
115
|
+
classify: 8
|
|
116
|
+
}
|
|
87
117
|
},
|
|
88
118
|
{
|
|
89
119
|
id: "claude-sonnet-4-6",
|
|
@@ -107,7 +137,23 @@ var PROFILES_RAW = [
|
|
|
107
137
|
],
|
|
108
138
|
strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
|
|
109
139
|
weaknesses: [],
|
|
110
|
-
notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output."
|
|
140
|
+
notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
|
|
141
|
+
// Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
|
|
142
|
+
// in starter chains; tier 1 cross-provider for hunt/summarize/classify.
|
|
143
|
+
archetypePerf: {
|
|
144
|
+
ask: 9,
|
|
145
|
+
generate: 9,
|
|
146
|
+
plan: 9,
|
|
147
|
+
critique: 9,
|
|
148
|
+
extract: 9,
|
|
149
|
+
transform: 9,
|
|
150
|
+
hunt: 7,
|
|
151
|
+
// strong but Flash beats on parallel tool throughput
|
|
152
|
+
summarize: 8,
|
|
153
|
+
// overkill for tolerant archetype
|
|
154
|
+
classify: 8
|
|
155
|
+
// overkill
|
|
156
|
+
}
|
|
111
157
|
},
|
|
112
158
|
{
|
|
113
159
|
id: "claude-haiku-4-5",
|
|
@@ -137,7 +183,23 @@ var PROFILES_RAW = [
|
|
|
137
183
|
],
|
|
138
184
|
strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
|
|
139
185
|
weaknesses: ["complex_reasoning", "large_tool_sets"],
|
|
140
|
-
notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`."
|
|
186
|
+
notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
|
|
187
|
+
// Tier 1 cross-provider anchor for short-output chains (classify/
|
|
188
|
+
// summarize/extract/transform). Falls off on plan/critique where
|
|
189
|
+
// reasoning depth matters; competes with Pro on cost+latency.
|
|
190
|
+
archetypePerf: {
|
|
191
|
+
classify: 8,
|
|
192
|
+
summarize: 8,
|
|
193
|
+
ask: 7,
|
|
194
|
+
transform: 7,
|
|
195
|
+
extract: 7,
|
|
196
|
+
hunt: 6,
|
|
197
|
+
// tool reliability drops at 16 — cliff guard fires
|
|
198
|
+
generate: 6,
|
|
199
|
+
plan: 5,
|
|
200
|
+
critique: 4
|
|
201
|
+
// reasoning depth gap vs Sonnet/Opus
|
|
202
|
+
}
|
|
141
203
|
},
|
|
142
204
|
// ── Google ──
|
|
143
205
|
{
|
|
@@ -215,7 +277,131 @@ var PROFILES_RAW = [
|
|
|
215
277
|
],
|
|
216
278
|
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
217
279
|
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
218
|
-
notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs."
|
|
280
|
+
notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
|
|
281
|
+
// Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
|
|
282
|
+
// 15-75 calls/step beats Sonnet — L-040), summarize, classify.
|
|
283
|
+
archetypePerf: {
|
|
284
|
+
hunt: 9,
|
|
285
|
+
// L-040: parallel tool throughput 15-75/step
|
|
286
|
+
classify: 7,
|
|
287
|
+
// brain-validated, 218 rows
|
|
288
|
+
summarize: 7,
|
|
289
|
+
// brain-validated; cliff strips tools when present
|
|
290
|
+
transform: 7,
|
|
291
|
+
ask: 7,
|
|
292
|
+
generate: 6,
|
|
293
|
+
plan: 5,
|
|
294
|
+
extract: 6,
|
|
295
|
+
// alpha.8 MAX_TOKENS history on structured output
|
|
296
|
+
critique: 4
|
|
297
|
+
// reasoning shallower than Sonnet/Opus
|
|
298
|
+
}
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
// ── Gemini 2.5 Flash-Lite ──
|
|
302
|
+
// Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
|
|
303
|
+
// it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
|
|
304
|
+
// stable. Positioned BELOW Flash on the cost/perf frontier:
|
|
305
|
+
// input $0.10/M (Flash $0.30/M) — 3× cheaper
|
|
306
|
+
// output $0.40/M (Flash $2.50/M) — 6× cheaper
|
|
307
|
+
// cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
|
|
308
|
+
// Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
|
|
309
|
+
// is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
|
|
310
|
+
// thresholds. The brain will validate/relax these as evidence accumulates
|
|
311
|
+
// per (archetype, model) tuple. Currently ZERO brain rows for this model.
|
|
312
|
+
id: "gemini-2.5-flash-lite",
|
|
313
|
+
verifiedAgainstDocs: "2026-05-13",
|
|
314
|
+
provider: "google",
|
|
315
|
+
status: "current",
|
|
316
|
+
maxContextTokens: 1048576,
|
|
317
|
+
maxOutputTokens: 65536,
|
|
318
|
+
maxTools: 128,
|
|
319
|
+
parallelToolCalls: true,
|
|
320
|
+
structuredOutput: "native",
|
|
321
|
+
systemPromptMode: "separate",
|
|
322
|
+
streaming: true,
|
|
323
|
+
cliffs: [
|
|
324
|
+
{
|
|
325
|
+
metric: "input_tokens",
|
|
326
|
+
threshold: 8e3,
|
|
327
|
+
action: "downgrade_quality_warning",
|
|
328
|
+
reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
metric: "tool_count",
|
|
332
|
+
threshold: 10,
|
|
333
|
+
action: "drop_to_top_relevant",
|
|
334
|
+
reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
metric: "thinking_with_short_output",
|
|
338
|
+
threshold: 1,
|
|
339
|
+
action: "force_thinking_budget_zero",
|
|
340
|
+
reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
// Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
|
|
344
|
+
// trust artifact, kgauto commit 3872832). Flash-Lite shares the
|
|
345
|
+
// same architectural family — almost certainly inherits this cliff.
|
|
346
|
+
// Ship the guard preemptively; brain telemetry confirms or relaxes.
|
|
347
|
+
metric: "tool_count",
|
|
348
|
+
threshold: 1,
|
|
349
|
+
whenIntent: "summarize",
|
|
350
|
+
action: "strip_tools",
|
|
351
|
+
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
352
|
+
}
|
|
353
|
+
],
|
|
354
|
+
costInputPer1m: 0.1,
|
|
355
|
+
costOutputPer1m: 0.4,
|
|
356
|
+
lowering: {
|
|
357
|
+
...GOOGLE_LOWERING_BASE,
|
|
358
|
+
// Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
|
|
359
|
+
// $0.10/M input. Material for repeat-prompt workloads (classify shape).
|
|
360
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
361
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
362
|
+
},
|
|
363
|
+
recovery: [
|
|
364
|
+
{
|
|
365
|
+
signal: "empty_response_after_tool",
|
|
366
|
+
action: "retry_with_params",
|
|
367
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
368
|
+
maxRetries: 1,
|
|
369
|
+
reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
|
|
370
|
+
},
|
|
371
|
+
{
|
|
372
|
+
signal: "empty_response",
|
|
373
|
+
action: "retry_with_params",
|
|
374
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
375
|
+
maxRetries: 1,
|
|
376
|
+
reason: "Empty response \u2014 try with thinking off."
|
|
377
|
+
},
|
|
378
|
+
{
|
|
379
|
+
signal: "malformed_function_call",
|
|
380
|
+
action: "escalate",
|
|
381
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
382
|
+
}
|
|
383
|
+
],
|
|
384
|
+
strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
385
|
+
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
386
|
+
notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
|
|
387
|
+
// Tier 3 emergency floor for summarize/classify chains. ZERO brain
|
|
388
|
+
// rows — all values are starter hypotheses anchored to "smaller
|
|
389
|
+
// sibling of Flash, at-or-below Flash perf on every archetype." The
|
|
390
|
+
// first 50 brain rows per archetype will validate or relax these.
|
|
391
|
+
archetypePerf: {
|
|
392
|
+
classify: 6,
|
|
393
|
+
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
394
|
+
summarize: 6,
|
|
395
|
+
// starter hypothesis — verify; cliff strips tools
|
|
396
|
+
transform: 6,
|
|
397
|
+
// starter hypothesis — verify
|
|
398
|
+
ask: 5,
|
|
399
|
+
hunt: 5,
|
|
400
|
+
generate: 4,
|
|
401
|
+
extract: 4,
|
|
402
|
+
plan: 3,
|
|
403
|
+
critique: 3
|
|
404
|
+
}
|
|
219
405
|
},
|
|
220
406
|
{
|
|
221
407
|
id: "gemini-2.5-pro",
|
|
@@ -251,7 +437,21 @@ var PROFILES_RAW = [
|
|
|
251
437
|
}
|
|
252
438
|
],
|
|
253
439
|
strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
|
|
254
|
-
weaknesses: ["pricing_above_200k"]
|
|
440
|
+
weaknesses: ["pricing_above_200k"],
|
|
441
|
+
// Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
|
|
442
|
+
// Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
|
|
443
|
+
archetypePerf: {
|
|
444
|
+
critique: 9,
|
|
445
|
+
plan: 9,
|
|
446
|
+
ask: 8,
|
|
447
|
+
generate: 8,
|
|
448
|
+
extract: 8,
|
|
449
|
+
transform: 8,
|
|
450
|
+
hunt: 8,
|
|
451
|
+
// tier 1 cross-provider for hunt chain
|
|
452
|
+
summarize: 7,
|
|
453
|
+
classify: 7
|
|
454
|
+
}
|
|
255
455
|
},
|
|
256
456
|
{
|
|
257
457
|
id: "gemini-3.1-pro-preview",
|
|
@@ -289,7 +489,23 @@ var PROFILES_RAW = [
|
|
|
289
489
|
],
|
|
290
490
|
strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
|
|
291
491
|
weaknesses: ["cost", "preview_status", "pricing_above_200k"],
|
|
292
|
-
notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA."
|
|
492
|
+
notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
|
|
493
|
+
// Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
|
|
494
|
+
// coding / reasoning per Google's release notes. Preview status:
|
|
495
|
+
// chains should stay on 2.5 Pro until GA. Starter hypothesis.
|
|
496
|
+
archetypePerf: {
|
|
497
|
+
critique: 10,
|
|
498
|
+
// Google claims step-change on reasoning
|
|
499
|
+
plan: 10,
|
|
500
|
+
ask: 9,
|
|
501
|
+
generate: 9,
|
|
502
|
+
extract: 9,
|
|
503
|
+
transform: 8,
|
|
504
|
+
hunt: 9,
|
|
505
|
+
// step-change agentic per Google
|
|
506
|
+
summarize: 8,
|
|
507
|
+
classify: 7
|
|
508
|
+
}
|
|
293
509
|
},
|
|
294
510
|
// ── DeepSeek ──
|
|
295
511
|
// 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
|
|
@@ -329,7 +545,24 @@ var PROFILES_RAW = [
|
|
|
329
545
|
],
|
|
330
546
|
strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
|
|
331
547
|
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
332
|
-
notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES."
|
|
548
|
+
notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
|
|
549
|
+
// Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
|
|
550
|
+
// classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
|
|
551
|
+
// Falls off on hunt (sequential tools — L-040) and reasoning depth.
|
|
552
|
+
archetypePerf: {
|
|
553
|
+
classify: 7,
|
|
554
|
+
// brain-validated, 169 rows
|
|
555
|
+
summarize: 7,
|
|
556
|
+
// archetype-tolerant, no brain evidence yet
|
|
557
|
+
ask: 6,
|
|
558
|
+
transform: 6,
|
|
559
|
+
generate: 5,
|
|
560
|
+
plan: 5,
|
|
561
|
+
extract: 5,
|
|
562
|
+
critique: 4,
|
|
563
|
+
hunt: 4
|
|
564
|
+
// sequential tool calls only — L-040
|
|
565
|
+
}
|
|
333
566
|
},
|
|
334
567
|
{
|
|
335
568
|
id: "deepseek-v4-pro",
|
|
@@ -365,7 +598,22 @@ var PROFILES_RAW = [
|
|
|
365
598
|
],
|
|
366
599
|
strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
|
|
367
600
|
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
368
|
-
notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking."
|
|
601
|
+
notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
|
|
602
|
+
// Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
|
|
603
|
+
// bumped one notch over V4-Flash; same parallel-tool ceiling.
|
|
604
|
+
archetypePerf: {
|
|
605
|
+
plan: 7,
|
|
606
|
+
// §3.3 tier 3 for plan
|
|
607
|
+
critique: 6,
|
|
608
|
+
ask: 7,
|
|
609
|
+
generate: 6,
|
|
610
|
+
classify: 7,
|
|
611
|
+
summarize: 7,
|
|
612
|
+
extract: 6,
|
|
613
|
+
transform: 6,
|
|
614
|
+
hunt: 4
|
|
615
|
+
// sequential tools — same as V4-Flash
|
|
616
|
+
}
|
|
369
617
|
}
|
|
370
618
|
];
|
|
371
619
|
var ALIASES = {
|
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
3
|
-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-NUZOIzGr.mjs';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-NUZOIzGr.mjs';
|
|
3
|
+
import { IntentArchetypeName } from './dialect.mjs';
|
|
4
|
+
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* compile() — the main orchestrator.
|
|
@@ -275,10 +276,91 @@ declare function countTokens(text: string): number;
|
|
|
275
276
|
/** Subset of CompileResult fields the advisor needs. */
|
|
276
277
|
type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
|
|
277
278
|
/**
|
|
278
|
-
* Run all
|
|
279
|
-
*
|
|
279
|
+
* Run all phased rules and return collected advisories. Order is fixed so
|
|
280
|
+
* output is stable across runs. The `policy` argument is alpha.9 — the
|
|
281
|
+
* `single-model-array` rule needs to know whether the consumer explicitly
|
|
282
|
+
* declared `posture: 'locked'` (in which case single-model is intentional
|
|
283
|
+
* and shouldn't warn).
|
|
280
284
|
*/
|
|
281
|
-
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
|
|
285
|
+
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile, policy?: CompilePolicy): BestPracticeAdvisory[];
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* getDefaultFallbackChain — the alpha.9 cascading ship.
|
|
289
|
+
*
|
|
290
|
+
* Returns a per-archetype fallback chain that walks the cost/performance
|
|
291
|
+
* Pareto frontier (master plan §1.3 + §3). Three customer postures:
|
|
292
|
+
*
|
|
293
|
+
* locked — caller passes [theOneModel]; never call this function
|
|
294
|
+
* preferred — caller passes `primary`; chain returned is [primary, ...fallbacks]
|
|
295
|
+
* open — caller passes no `primary`; chain returned is [best, ...fallbacks]
|
|
296
|
+
*
|
|
297
|
+
* The chain at each step:
|
|
298
|
+
* 1. Costs strictly less than the previous (no expensive sideways moves)
|
|
299
|
+
* 2. Comes from a different provider than the previous step where possible
|
|
300
|
+
* (correlated outages don't kill consecutive attempts)
|
|
301
|
+
* 3. Stays above the archetype's perf floor (skip models scored <baseline
|
|
302
|
+
* for archetypes where degradation would be unacceptable)
|
|
303
|
+
*
|
|
304
|
+
* In alpha.9 the chain is **hand-curated** per archetype (§3.3 starter
|
|
305
|
+
* table). Brain-query mode lands in alpha.10. Policy.blockedModels filters
|
|
306
|
+
* the result; policy.maxCostPerCallUsd is NOT applied here because the
|
|
307
|
+
* function doesn't see the IR's token counts — that filtering happens at
|
|
308
|
+
* `passScoreTargets()` time inside compile().
|
|
309
|
+
*
|
|
310
|
+
* The function is **pure** — no brain query, no I/O, no randomness. Same
|
|
311
|
+
* inputs always produce the same chain.
|
|
312
|
+
*/
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Posture passed into `getDefaultFallbackChain`. The chain function only
|
|
316
|
+
* sees `'open'` and `'preferred'` — callers in `'locked'` posture should
|
|
317
|
+
* pass `models: [theOneModel]` directly and skip this function entirely.
|
|
318
|
+
*
|
|
319
|
+
* Equivalent to `CompilePolicy.posture` minus `'locked'`. Kept distinct so
|
|
320
|
+
* the type system enforces "don't ask for a chain when you don't want one."
|
|
321
|
+
*/
|
|
322
|
+
type FallbackPosture = 'open' | 'preferred';
|
|
323
|
+
interface GetDefaultFallbackChainOpts {
|
|
324
|
+
/** The archetype the call is performing. Drives chain shape. */
|
|
325
|
+
archetype: IntentArchetypeName;
|
|
326
|
+
/**
|
|
327
|
+
* The user-selected or caller-anchored primary model. When provided, it
|
|
328
|
+
* appears at position 0 of the returned chain and fallbacks follow.
|
|
329
|
+
* When omitted, the function picks the best-perf model for the archetype
|
|
330
|
+
* as position 0 (open posture).
|
|
331
|
+
*/
|
|
332
|
+
primary?: string;
|
|
333
|
+
/**
|
|
334
|
+
* Informational. `'preferred'` and `'open'` produce the same chain shape
|
|
335
|
+
* given the same `primary`/no-primary input — posture is a tag the brain
|
|
336
|
+
* uses to distinguish "user-anchored" from "library-anchored" telemetry.
|
|
337
|
+
*/
|
|
338
|
+
posture?: FallbackPosture;
|
|
339
|
+
/**
|
|
340
|
+
* Cap on chain length. Default 3. Min 1. Useful when the consumer wants
|
|
341
|
+
* to keep the worst-case latency low (each fallback adds a round-trip).
|
|
342
|
+
*/
|
|
343
|
+
maxDepth?: number;
|
|
344
|
+
/**
|
|
345
|
+
* Consumer-side gating. `blockedModels` are filtered from the chain.
|
|
346
|
+
* `preferredModels` is informational (no boost applied at this layer —
|
|
347
|
+
* compile()'s `passScoreTargets` handles preference ranking).
|
|
348
|
+
* `maxCostPerCallUsd` is NOT applied here — needs IR-level token
|
|
349
|
+
* estimation. Use compile()'s policy plumbing instead.
|
|
350
|
+
*/
|
|
351
|
+
policy?: CompilePolicy;
|
|
352
|
+
}
|
|
353
|
+
declare function getDefaultFallbackChain(opts: GetDefaultFallbackChainOpts): string[];
|
|
354
|
+
/**
|
|
355
|
+
* Returns a shallow copy of the hand-curated starter chain for an archetype.
|
|
356
|
+
* Useful for tests + the `scripts/digest.mjs` operator readout.
|
|
357
|
+
*/
|
|
358
|
+
declare function getStarterChain(archetype: IntentArchetypeName): string[];
|
|
359
|
+
/**
|
|
360
|
+
* Returns a shallow copy of all starter chains keyed by archetype.
|
|
361
|
+
* Useful for the `digest.mjs` readout and consumer audits.
|
|
362
|
+
*/
|
|
363
|
+
declare function getAllStarterChains(): Record<IntentArchetypeName, string[]>;
|
|
282
364
|
|
|
283
365
|
/**
|
|
284
366
|
* @warmdrift/kgauto v2 — prompt compiler + central learning brain.
|
|
@@ -326,4 +408,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
|
|
|
326
408
|
*/
|
|
327
409
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
328
410
|
|
|
329
|
-
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
|
411
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type FallbackPosture, type GetDefaultFallbackChainOpts, IntentArchetypeName, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, getAllStarterChains, getDefaultFallbackChain, getStarterChain, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
3
|
-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-BYVOc1eW.js';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BYVOc1eW.js';
|
|
3
|
+
import { IntentArchetypeName } from './dialect.js';
|
|
4
|
+
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* compile() — the main orchestrator.
|
|
@@ -275,10 +276,91 @@ declare function countTokens(text: string): number;
|
|
|
275
276
|
/** Subset of CompileResult fields the advisor needs. */
|
|
276
277
|
type AdvisorContext = Pick<CompileResult, 'target' | 'provider' | 'tokensIn' | 'diagnostics'>;
|
|
277
278
|
/**
|
|
278
|
-
* Run all
|
|
279
|
-
*
|
|
279
|
+
* Run all phased rules and return collected advisories. Order is fixed so
|
|
280
|
+
* output is stable across runs. The `policy` argument is alpha.9 — the
|
|
281
|
+
* `single-model-array` rule needs to know whether the consumer explicitly
|
|
282
|
+
* declared `posture: 'locked'` (in which case single-model is intentional
|
|
283
|
+
* and shouldn't warn).
|
|
280
284
|
*/
|
|
281
|
-
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile): BestPracticeAdvisory[];
|
|
285
|
+
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile, policy?: CompilePolicy): BestPracticeAdvisory[];
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* getDefaultFallbackChain — the alpha.9 cascading ship.
|
|
289
|
+
*
|
|
290
|
+
* Returns a per-archetype fallback chain that walks the cost/performance
|
|
291
|
+
* Pareto frontier (master plan §1.3 + §3). Three customer postures:
|
|
292
|
+
*
|
|
293
|
+
* locked — caller passes [theOneModel]; never call this function
|
|
294
|
+
* preferred — caller passes `primary`; chain returned is [primary, ...fallbacks]
|
|
295
|
+
* open — caller passes no `primary`; chain returned is [best, ...fallbacks]
|
|
296
|
+
*
|
|
297
|
+
* The chain at each step:
|
|
298
|
+
* 1. Costs strictly less than the previous (no expensive sideways moves)
|
|
299
|
+
* 2. Comes from a different provider than the previous step where possible
|
|
300
|
+
* (correlated outages don't kill consecutive attempts)
|
|
301
|
+
* 3. Stays above the archetype's perf floor (skip models scored <baseline
|
|
302
|
+
* for archetypes where degradation would be unacceptable)
|
|
303
|
+
*
|
|
304
|
+
* In alpha.9 the chain is **hand-curated** per archetype (§3.3 starter
|
|
305
|
+
* table). Brain-query mode lands in alpha.10. Policy.blockedModels filters
|
|
306
|
+
* the result; policy.maxCostPerCallUsd is NOT applied here because the
|
|
307
|
+
* function doesn't see the IR's token counts — that filtering happens at
|
|
308
|
+
* `passScoreTargets()` time inside compile().
|
|
309
|
+
*
|
|
310
|
+
* The function is **pure** — no brain query, no I/O, no randomness. Same
|
|
311
|
+
* inputs always produce the same chain.
|
|
312
|
+
*/
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Posture passed into `getDefaultFallbackChain`. The chain function only
|
|
316
|
+
* sees `'open'` and `'preferred'` — callers in `'locked'` posture should
|
|
317
|
+
* pass `models: [theOneModel]` directly and skip this function entirely.
|
|
318
|
+
*
|
|
319
|
+
* Equivalent to `CompilePolicy.posture` minus `'locked'`. Kept distinct so
|
|
320
|
+
* the type system enforces "don't ask for a chain when you don't want one."
|
|
321
|
+
*/
|
|
322
|
+
type FallbackPosture = 'open' | 'preferred';
|
|
323
|
+
interface GetDefaultFallbackChainOpts {
|
|
324
|
+
/** The archetype the call is performing. Drives chain shape. */
|
|
325
|
+
archetype: IntentArchetypeName;
|
|
326
|
+
/**
|
|
327
|
+
* The user-selected or caller-anchored primary model. When provided, it
|
|
328
|
+
* appears at position 0 of the returned chain and fallbacks follow.
|
|
329
|
+
* When omitted, the function picks the best-perf model for the archetype
|
|
330
|
+
* as position 0 (open posture).
|
|
331
|
+
*/
|
|
332
|
+
primary?: string;
|
|
333
|
+
/**
|
|
334
|
+
* Informational. `'preferred'` and `'open'` produce the same chain shape
|
|
335
|
+
* given the same `primary`/no-primary input — posture is a tag the brain
|
|
336
|
+
* uses to distinguish "user-anchored" from "library-anchored" telemetry.
|
|
337
|
+
*/
|
|
338
|
+
posture?: FallbackPosture;
|
|
339
|
+
/**
|
|
340
|
+
* Cap on chain length. Default 3. Min 1. Useful when the consumer wants
|
|
341
|
+
* to keep the worst-case latency low (each fallback adds a round-trip).
|
|
342
|
+
*/
|
|
343
|
+
maxDepth?: number;
|
|
344
|
+
/**
|
|
345
|
+
* Consumer-side gating. `blockedModels` are filtered from the chain.
|
|
346
|
+
* `preferredModels` is informational (no boost applied at this layer —
|
|
347
|
+
* compile()'s `passScoreTargets` handles preference ranking).
|
|
348
|
+
* `maxCostPerCallUsd` is NOT applied here — needs IR-level token
|
|
349
|
+
* estimation. Use compile()'s policy plumbing instead.
|
|
350
|
+
*/
|
|
351
|
+
policy?: CompilePolicy;
|
|
352
|
+
}
|
|
353
|
+
declare function getDefaultFallbackChain(opts: GetDefaultFallbackChainOpts): string[];
|
|
354
|
+
/**
|
|
355
|
+
* Returns a shallow copy of the hand-curated starter chain for an archetype.
|
|
356
|
+
* Useful for tests + the `scripts/digest.mjs` operator readout.
|
|
357
|
+
*/
|
|
358
|
+
declare function getStarterChain(archetype: IntentArchetypeName): string[];
|
|
359
|
+
/**
|
|
360
|
+
* Returns a shallow copy of all starter chains keyed by archetype.
|
|
361
|
+
* Useful for the `digest.mjs` readout and consumer audits.
|
|
362
|
+
*/
|
|
363
|
+
declare function getAllStarterChains(): Record<IntentArchetypeName, string[]>;
|
|
282
364
|
|
|
283
365
|
/**
|
|
284
366
|
* @warmdrift/kgauto v2 — prompt compiler + central learning brain.
|
|
@@ -326,4 +408,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
|
|
|
326
408
|
*/
|
|
327
409
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
328
410
|
|
|
329
|
-
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
|
411
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type FallbackPosture, type GetDefaultFallbackChainOpts, IntentArchetypeName, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, getAllStarterChains, getDefaultFallbackChain, getStarterChain, record, resetTokenizer, runAdvisor, setTokenizer };
|