@warmdrift/kgauto-compiler 2.0.0-alpha.7 → 2.0.0-alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MBEI5UOM.mjs → chunk-3KVKELZN.mjs} +257 -9
- package/dist/index.d.mts +89 -7
- package/dist/index.d.ts +89 -7
- package/dist/index.js +463 -36
- package/dist/index.mjs +204 -28
- package/dist/{profiles-B3eNQ2py.d.ts → profiles-BYVOc1eW.d.ts} +82 -1
- package/dist/{profiles-Py8c7zjJ.d.mts → profiles-NUZOIzGr.d.mts} +82 -1
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/profiles.js +257 -9
- package/dist/profiles.mjs +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -36,7 +36,10 @@ __export(index_exports, {
|
|
|
36
36
|
configureBrain: () => configureBrain,
|
|
37
37
|
countTokens: () => countTokens,
|
|
38
38
|
execute: () => execute,
|
|
39
|
+
getAllStarterChains: () => getAllStarterChains,
|
|
40
|
+
getDefaultFallbackChain: () => getDefaultFallbackChain,
|
|
39
41
|
getProfile: () => getProfile,
|
|
42
|
+
getStarterChain: () => getStarterChain,
|
|
40
43
|
hashShape: () => hashShape,
|
|
41
44
|
isArchetype: () => isArchetype,
|
|
42
45
|
learningKey: () => learningKey,
|
|
@@ -524,7 +527,11 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
524
527
|
system: systemBlocks,
|
|
525
528
|
messages,
|
|
526
529
|
tools,
|
|
527
|
-
|
|
530
|
+
// alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
|
|
531
|
+
// floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
|
|
532
|
+
// Profile is the single source of truth; consumers wanting a tighter
|
|
533
|
+
// budget can pass providerOverrides.anthropic.max_tokens explicitly.
|
|
534
|
+
max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
|
|
528
535
|
},
|
|
529
536
|
diagnostics: {
|
|
530
537
|
cacheableTokens,
|
|
@@ -855,7 +862,24 @@ var PROFILES_RAW = [
|
|
|
855
862
|
],
|
|
856
863
|
strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
|
|
857
864
|
weaknesses: ["cost", "latency"],
|
|
858
|
-
notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output."
|
|
865
|
+
notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
|
|
866
|
+
// Frontier perf. Drops on archetypes where parallel-tool throughput
|
|
867
|
+
// (hunt) or low-budget cost-sensitivity (classify/summarize) matters
|
|
868
|
+
// more than reasoning depth.
|
|
869
|
+
archetypePerf: {
|
|
870
|
+
critique: 10,
|
|
871
|
+
plan: 10,
|
|
872
|
+
generate: 9,
|
|
873
|
+
ask: 9,
|
|
874
|
+
extract: 9,
|
|
875
|
+
transform: 9,
|
|
876
|
+
hunt: 8,
|
|
877
|
+
// strong but Flash dominates parallel tool throughput
|
|
878
|
+
summarize: 8,
|
|
879
|
+
// overkill for tolerant archetype; cost-out of frontier
|
|
880
|
+
classify: 8
|
|
881
|
+
// overkill; brain-validated cheaper models cover this
|
|
882
|
+
}
|
|
859
883
|
},
|
|
860
884
|
{
|
|
861
885
|
id: "claude-opus-4-6",
|
|
@@ -887,7 +911,20 @@ var PROFILES_RAW = [
|
|
|
887
911
|
],
|
|
888
912
|
strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
|
|
889
913
|
weaknesses: ["cost", "latency"],
|
|
890
|
-
notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only)."
|
|
914
|
+
notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
|
|
915
|
+
// One notch below 4.7 across the board — extended-thinking edge does
|
|
916
|
+
// not flip any archetype ranking. Legacy: chains should prefer 4.7.
|
|
917
|
+
archetypePerf: {
|
|
918
|
+
critique: 9,
|
|
919
|
+
plan: 9,
|
|
920
|
+
generate: 9,
|
|
921
|
+
ask: 9,
|
|
922
|
+
extract: 9,
|
|
923
|
+
transform: 9,
|
|
924
|
+
hunt: 7,
|
|
925
|
+
summarize: 8,
|
|
926
|
+
classify: 8
|
|
927
|
+
}
|
|
891
928
|
},
|
|
892
929
|
{
|
|
893
930
|
id: "claude-sonnet-4-6",
|
|
@@ -911,7 +948,23 @@ var PROFILES_RAW = [
|
|
|
911
948
|
],
|
|
912
949
|
strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
|
|
913
950
|
weaknesses: [],
|
|
914
|
-
notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output."
|
|
951
|
+
notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
|
|
952
|
+
// Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
|
|
953
|
+
// in starter chains; tier 1 cross-provider for hunt/summarize/classify.
|
|
954
|
+
archetypePerf: {
|
|
955
|
+
ask: 9,
|
|
956
|
+
generate: 9,
|
|
957
|
+
plan: 9,
|
|
958
|
+
critique: 9,
|
|
959
|
+
extract: 9,
|
|
960
|
+
transform: 9,
|
|
961
|
+
hunt: 7,
|
|
962
|
+
// strong but Flash beats on parallel tool throughput
|
|
963
|
+
summarize: 8,
|
|
964
|
+
// overkill for tolerant archetype
|
|
965
|
+
classify: 8
|
|
966
|
+
// overkill
|
|
967
|
+
}
|
|
915
968
|
},
|
|
916
969
|
{
|
|
917
970
|
id: "claude-haiku-4-5",
|
|
@@ -941,7 +994,23 @@ var PROFILES_RAW = [
|
|
|
941
994
|
],
|
|
942
995
|
strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
|
|
943
996
|
weaknesses: ["complex_reasoning", "large_tool_sets"],
|
|
944
|
-
notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`."
|
|
997
|
+
notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
|
|
998
|
+
// Tier 1 cross-provider anchor for short-output chains (classify/
|
|
999
|
+
// summarize/extract/transform). Falls off on plan/critique where
|
|
1000
|
+
// reasoning depth matters; competes with Pro on cost+latency.
|
|
1001
|
+
archetypePerf: {
|
|
1002
|
+
classify: 8,
|
|
1003
|
+
summarize: 8,
|
|
1004
|
+
ask: 7,
|
|
1005
|
+
transform: 7,
|
|
1006
|
+
extract: 7,
|
|
1007
|
+
hunt: 6,
|
|
1008
|
+
// tool reliability drops at 16 — cliff guard fires
|
|
1009
|
+
generate: 6,
|
|
1010
|
+
plan: 5,
|
|
1011
|
+
critique: 4
|
|
1012
|
+
// reasoning depth gap vs Sonnet/Opus
|
|
1013
|
+
}
|
|
945
1014
|
},
|
|
946
1015
|
// ── Google ──
|
|
947
1016
|
{
|
|
@@ -1019,7 +1088,131 @@ var PROFILES_RAW = [
|
|
|
1019
1088
|
],
|
|
1020
1089
|
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
1021
1090
|
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
1022
|
-
notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs."
|
|
1091
|
+
notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
|
|
1092
|
+
// Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
|
|
1093
|
+
// 15-75 calls/step beats Sonnet — L-040), summarize, classify.
|
|
1094
|
+
archetypePerf: {
|
|
1095
|
+
hunt: 9,
|
|
1096
|
+
// L-040: parallel tool throughput 15-75/step
|
|
1097
|
+
classify: 7,
|
|
1098
|
+
// brain-validated, 218 rows
|
|
1099
|
+
summarize: 7,
|
|
1100
|
+
// brain-validated; cliff strips tools when present
|
|
1101
|
+
transform: 7,
|
|
1102
|
+
ask: 7,
|
|
1103
|
+
generate: 6,
|
|
1104
|
+
plan: 5,
|
|
1105
|
+
extract: 6,
|
|
1106
|
+
// alpha.8 MAX_TOKENS history on structured output
|
|
1107
|
+
critique: 4
|
|
1108
|
+
// reasoning shallower than Sonnet/Opus
|
|
1109
|
+
}
|
|
1110
|
+
},
|
|
1111
|
+
{
|
|
1112
|
+
// ── Gemini 2.5 Flash-Lite ──
|
|
1113
|
+
// Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
|
|
1114
|
+
// it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
|
|
1115
|
+
// stable. Positioned BELOW Flash on the cost/perf frontier:
|
|
1116
|
+
// input $0.10/M (Flash $0.30/M) — 3× cheaper
|
|
1117
|
+
// output $0.40/M (Flash $2.50/M) — 6× cheaper
|
|
1118
|
+
// cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
|
|
1119
|
+
// Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
|
|
1120
|
+
// is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
|
|
1121
|
+
// thresholds. The brain will validate/relax these as evidence accumulates
|
|
1122
|
+
// per (archetype, model) tuple. Currently ZERO brain rows for this model.
|
|
1123
|
+
id: "gemini-2.5-flash-lite",
|
|
1124
|
+
verifiedAgainstDocs: "2026-05-13",
|
|
1125
|
+
provider: "google",
|
|
1126
|
+
status: "current",
|
|
1127
|
+
maxContextTokens: 1048576,
|
|
1128
|
+
maxOutputTokens: 65536,
|
|
1129
|
+
maxTools: 128,
|
|
1130
|
+
parallelToolCalls: true,
|
|
1131
|
+
structuredOutput: "native",
|
|
1132
|
+
systemPromptMode: "separate",
|
|
1133
|
+
streaming: true,
|
|
1134
|
+
cliffs: [
|
|
1135
|
+
{
|
|
1136
|
+
metric: "input_tokens",
|
|
1137
|
+
threshold: 8e3,
|
|
1138
|
+
action: "downgrade_quality_warning",
|
|
1139
|
+
reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
|
|
1140
|
+
},
|
|
1141
|
+
{
|
|
1142
|
+
metric: "tool_count",
|
|
1143
|
+
threshold: 10,
|
|
1144
|
+
action: "drop_to_top_relevant",
|
|
1145
|
+
reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
|
|
1146
|
+
},
|
|
1147
|
+
{
|
|
1148
|
+
metric: "thinking_with_short_output",
|
|
1149
|
+
threshold: 1,
|
|
1150
|
+
action: "force_thinking_budget_zero",
|
|
1151
|
+
reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
|
|
1152
|
+
},
|
|
1153
|
+
{
|
|
1154
|
+
// Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
|
|
1155
|
+
// trust artifact, kgauto commit 3872832). Flash-Lite shares the
|
|
1156
|
+
// same architectural family — almost certainly inherits this cliff.
|
|
1157
|
+
// Ship the guard preemptively; brain telemetry confirms or relaxes.
|
|
1158
|
+
metric: "tool_count",
|
|
1159
|
+
threshold: 1,
|
|
1160
|
+
whenIntent: "summarize",
|
|
1161
|
+
action: "strip_tools",
|
|
1162
|
+
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
1163
|
+
}
|
|
1164
|
+
],
|
|
1165
|
+
costInputPer1m: 0.1,
|
|
1166
|
+
costOutputPer1m: 0.4,
|
|
1167
|
+
lowering: {
|
|
1168
|
+
...GOOGLE_LOWERING_BASE,
|
|
1169
|
+
// Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
|
|
1170
|
+
// $0.10/M input. Material for repeat-prompt workloads (classify shape).
|
|
1171
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1172
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1173
|
+
},
|
|
1174
|
+
recovery: [
|
|
1175
|
+
{
|
|
1176
|
+
signal: "empty_response_after_tool",
|
|
1177
|
+
action: "retry_with_params",
|
|
1178
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1179
|
+
maxRetries: 1,
|
|
1180
|
+
reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
|
|
1181
|
+
},
|
|
1182
|
+
{
|
|
1183
|
+
signal: "empty_response",
|
|
1184
|
+
action: "retry_with_params",
|
|
1185
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1186
|
+
maxRetries: 1,
|
|
1187
|
+
reason: "Empty response \u2014 try with thinking off."
|
|
1188
|
+
},
|
|
1189
|
+
{
|
|
1190
|
+
signal: "malformed_function_call",
|
|
1191
|
+
action: "escalate",
|
|
1192
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
1193
|
+
}
|
|
1194
|
+
],
|
|
1195
|
+
strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
1196
|
+
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
1197
|
+
notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
|
|
1198
|
+
// Tier 3 emergency floor for summarize/classify chains. ZERO brain
|
|
1199
|
+
// rows — all values are starter hypotheses anchored to "smaller
|
|
1200
|
+
// sibling of Flash, at-or-below Flash perf on every archetype." The
|
|
1201
|
+
// first 50 brain rows per archetype will validate or relax these.
|
|
1202
|
+
archetypePerf: {
|
|
1203
|
+
classify: 6,
|
|
1204
|
+
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
1205
|
+
summarize: 6,
|
|
1206
|
+
// starter hypothesis — verify; cliff strips tools
|
|
1207
|
+
transform: 6,
|
|
1208
|
+
// starter hypothesis — verify
|
|
1209
|
+
ask: 5,
|
|
1210
|
+
hunt: 5,
|
|
1211
|
+
generate: 4,
|
|
1212
|
+
extract: 4,
|
|
1213
|
+
plan: 3,
|
|
1214
|
+
critique: 3
|
|
1215
|
+
}
|
|
1023
1216
|
},
|
|
1024
1217
|
{
|
|
1025
1218
|
id: "gemini-2.5-pro",
|
|
@@ -1055,7 +1248,21 @@ var PROFILES_RAW = [
|
|
|
1055
1248
|
}
|
|
1056
1249
|
],
|
|
1057
1250
|
strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
|
|
1058
|
-
weaknesses: ["pricing_above_200k"]
|
|
1251
|
+
weaknesses: ["pricing_above_200k"],
|
|
1252
|
+
// Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
|
|
1253
|
+
// Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
|
|
1254
|
+
archetypePerf: {
|
|
1255
|
+
critique: 9,
|
|
1256
|
+
plan: 9,
|
|
1257
|
+
ask: 8,
|
|
1258
|
+
generate: 8,
|
|
1259
|
+
extract: 8,
|
|
1260
|
+
transform: 8,
|
|
1261
|
+
hunt: 8,
|
|
1262
|
+
// tier 1 cross-provider for hunt chain
|
|
1263
|
+
summarize: 7,
|
|
1264
|
+
classify: 7
|
|
1265
|
+
}
|
|
1059
1266
|
},
|
|
1060
1267
|
{
|
|
1061
1268
|
id: "gemini-3.1-pro-preview",
|
|
@@ -1093,7 +1300,23 @@ var PROFILES_RAW = [
|
|
|
1093
1300
|
],
|
|
1094
1301
|
strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
|
|
1095
1302
|
weaknesses: ["cost", "preview_status", "pricing_above_200k"],
|
|
1096
|
-
notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA."
|
|
1303
|
+
notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
|
|
1304
|
+
// Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
|
|
1305
|
+
// coding / reasoning per Google's release notes. Preview status:
|
|
1306
|
+
// chains should stay on 2.5 Pro until GA. Starter hypothesis.
|
|
1307
|
+
archetypePerf: {
|
|
1308
|
+
critique: 10,
|
|
1309
|
+
// Google claims step-change on reasoning
|
|
1310
|
+
plan: 10,
|
|
1311
|
+
ask: 9,
|
|
1312
|
+
generate: 9,
|
|
1313
|
+
extract: 9,
|
|
1314
|
+
transform: 8,
|
|
1315
|
+
hunt: 9,
|
|
1316
|
+
// step-change agentic per Google
|
|
1317
|
+
summarize: 8,
|
|
1318
|
+
classify: 7
|
|
1319
|
+
}
|
|
1097
1320
|
},
|
|
1098
1321
|
// ── DeepSeek ──
|
|
1099
1322
|
// 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
|
|
@@ -1133,7 +1356,24 @@ var PROFILES_RAW = [
|
|
|
1133
1356
|
],
|
|
1134
1357
|
strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
|
|
1135
1358
|
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
1136
|
-
notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES."
|
|
1359
|
+
notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
|
|
1360
|
+
// Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
|
|
1361
|
+
// classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
|
|
1362
|
+
// Falls off on hunt (sequential tools — L-040) and reasoning depth.
|
|
1363
|
+
archetypePerf: {
|
|
1364
|
+
classify: 7,
|
|
1365
|
+
// brain-validated, 169 rows
|
|
1366
|
+
summarize: 7,
|
|
1367
|
+
// archetype-tolerant, no brain evidence yet
|
|
1368
|
+
ask: 6,
|
|
1369
|
+
transform: 6,
|
|
1370
|
+
generate: 5,
|
|
1371
|
+
plan: 5,
|
|
1372
|
+
extract: 5,
|
|
1373
|
+
critique: 4,
|
|
1374
|
+
hunt: 4
|
|
1375
|
+
// sequential tool calls only — L-040
|
|
1376
|
+
}
|
|
1137
1377
|
},
|
|
1138
1378
|
{
|
|
1139
1379
|
id: "deepseek-v4-pro",
|
|
@@ -1169,7 +1409,22 @@ var PROFILES_RAW = [
|
|
|
1169
1409
|
],
|
|
1170
1410
|
strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
|
|
1171
1411
|
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
1172
|
-
notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking."
|
|
1412
|
+
notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
|
|
1413
|
+
// Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
|
|
1414
|
+
// bumped one notch over V4-Flash; same parallel-tool ceiling.
|
|
1415
|
+
archetypePerf: {
|
|
1416
|
+
plan: 7,
|
|
1417
|
+
// §3.3 tier 3 for plan
|
|
1418
|
+
critique: 6,
|
|
1419
|
+
ask: 7,
|
|
1420
|
+
generate: 6,
|
|
1421
|
+
classify: 7,
|
|
1422
|
+
summarize: 7,
|
|
1423
|
+
extract: 6,
|
|
1424
|
+
transform: 6,
|
|
1425
|
+
hunt: 4
|
|
1426
|
+
// sequential tools — same as V4-Flash
|
|
1427
|
+
}
|
|
1173
1428
|
}
|
|
1174
1429
|
];
|
|
1175
1430
|
var ALIASES = {
|
|
@@ -1205,12 +1460,13 @@ function profilesByProvider(provider) {
|
|
|
1205
1460
|
}
|
|
1206
1461
|
|
|
1207
1462
|
// src/advisor.ts
|
|
1208
|
-
function runAdvisor(ir, result, profile) {
|
|
1463
|
+
function runAdvisor(ir, result, profile, policy) {
|
|
1209
1464
|
const out = [];
|
|
1210
1465
|
out.push(...detectCachingOff(ir, profile));
|
|
1211
1466
|
out.push(...detectSingleChunkSystem(ir, profile));
|
|
1212
1467
|
out.push(...detectToolBloat(ir, result));
|
|
1213
1468
|
out.push(...detectHistoryUncached(ir, profile));
|
|
1469
|
+
out.push(...detectSingleModelArray(ir, policy));
|
|
1214
1470
|
return out;
|
|
1215
1471
|
}
|
|
1216
1472
|
function detectCachingOff(ir, profile) {
|
|
@@ -1282,6 +1538,20 @@ function detectHistoryUncached(ir, profile) {
|
|
|
1282
1538
|
}
|
|
1283
1539
|
];
|
|
1284
1540
|
}
|
|
1541
|
+
function detectSingleModelArray(ir, policy) {
|
|
1542
|
+
if (ir.models.length !== 1) return [];
|
|
1543
|
+
if (policy?.posture === "locked") return [];
|
|
1544
|
+
const only = ir.models[0];
|
|
1545
|
+
return [
|
|
1546
|
+
{
|
|
1547
|
+
level: "warn",
|
|
1548
|
+
code: "single-model-array",
|
|
1549
|
+
message: `\`ir.models\` has length 1 (only "${only}") and posture is not 'locked'. A single-model chain has no safety net \u2014 the first 429 / 5xx / cliff hits the user as a failure. Master plan \xA71.2 closes the reliability gap with a 2-step minimum.`,
|
|
1550
|
+
suggestion: "Use `getDefaultFallbackChain({ archetype: ir.intent.archetype, primary: '" + only + "', posture: 'preferred' })` for a user-anchored chain, or `getDefaultFallbackChain({ archetype, posture: 'open' })` for library-picked. If single-model is intentional (compliance/brand promise), set `policy.posture = 'locked'` to silence this rule.",
|
|
1551
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#single-model-array"
|
|
1552
|
+
}
|
|
1553
|
+
];
|
|
1554
|
+
}
|
|
1285
1555
|
|
|
1286
1556
|
// src/compile.ts
|
|
1287
1557
|
var counter = 0;
|
|
@@ -1354,7 +1624,8 @@ function compile(ir, opts = {}) {
|
|
|
1354
1624
|
tokensIn: inputTokens,
|
|
1355
1625
|
diagnostics
|
|
1356
1626
|
},
|
|
1357
|
-
profile
|
|
1627
|
+
profile,
|
|
1628
|
+
opts.policy
|
|
1358
1629
|
);
|
|
1359
1630
|
return {
|
|
1360
1631
|
handle,
|
|
@@ -1806,44 +2077,48 @@ async function call(ir, opts = {}) {
|
|
|
1806
2077
|
fetchImpl: opts.fetchImpl,
|
|
1807
2078
|
providerOverrides: opts.providerOverrides
|
|
1808
2079
|
});
|
|
1809
|
-
|
|
2080
|
+
const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
|
|
2081
|
+
if (validated.ok) {
|
|
1810
2082
|
attempts.push({ model: targetModel, status: "success" });
|
|
1811
2083
|
const latencyMs2 = Date.now() - start;
|
|
1812
|
-
const responseWithStructured = withStructuredOutput(exec.response, ir);
|
|
1813
2084
|
await record({
|
|
1814
2085
|
handle: initial.handle,
|
|
1815
|
-
tokensIn:
|
|
1816
|
-
tokensOut:
|
|
2086
|
+
tokensIn: validated.response.tokens.input,
|
|
2087
|
+
tokensOut: validated.response.tokens.output,
|
|
1817
2088
|
latencyMs: latencyMs2,
|
|
1818
2089
|
success: true,
|
|
1819
|
-
emptyResponse:
|
|
1820
|
-
toolsCalled:
|
|
2090
|
+
emptyResponse: validated.response.tokens.output === 0,
|
|
2091
|
+
toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
|
|
1821
2092
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1822
2093
|
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
1823
2094
|
promptPreview: extractPromptPreview(ir),
|
|
1824
|
-
responsePreview:
|
|
1825
|
-
cacheReadInputTokens:
|
|
1826
|
-
cacheCreationInputTokens:
|
|
2095
|
+
responsePreview: validated.response.text.slice(0, 200),
|
|
2096
|
+
cacheReadInputTokens: validated.response.tokens.cached,
|
|
2097
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
1827
2098
|
});
|
|
2099
|
+
const fellOver = targetModel !== initial.target;
|
|
1828
2100
|
return {
|
|
1829
2101
|
handle: initial.handle,
|
|
1830
2102
|
actualModel: targetModel,
|
|
1831
2103
|
requestedModel: initial.target,
|
|
1832
2104
|
provider: activeCompile.provider,
|
|
1833
|
-
response:
|
|
2105
|
+
response: validated.response,
|
|
1834
2106
|
latencyMs: latencyMs2,
|
|
1835
2107
|
mutationsApplied: activeCompile.mutationsApplied,
|
|
1836
|
-
attempts
|
|
2108
|
+
attempts,
|
|
2109
|
+
servedBy: targetModel,
|
|
2110
|
+
fellOverFrom: fellOver ? initial.target : void 0,
|
|
2111
|
+
fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0
|
|
1837
2112
|
};
|
|
1838
2113
|
}
|
|
1839
2114
|
attempts.push({
|
|
1840
2115
|
model: targetModel,
|
|
1841
|
-
status:
|
|
1842
|
-
errorCode:
|
|
1843
|
-
message:
|
|
2116
|
+
status: validated.errorType,
|
|
2117
|
+
errorCode: validated.errorCode,
|
|
2118
|
+
message: validated.message
|
|
1844
2119
|
});
|
|
1845
|
-
lastErr =
|
|
1846
|
-
if (
|
|
2120
|
+
lastErr = validated;
|
|
2121
|
+
if (validated.errorType === "terminal" || opts.noFallback) {
|
|
1847
2122
|
break;
|
|
1848
2123
|
}
|
|
1849
2124
|
}
|
|
@@ -1880,20 +2155,49 @@ function extractPromptPreview(ir) {
|
|
|
1880
2155
|
if (lastHist) return lastHist.slice(0, 200);
|
|
1881
2156
|
return void 0;
|
|
1882
2157
|
}
|
|
1883
|
-
function
|
|
1884
|
-
if (!ir.constraints?.structuredOutput)
|
|
1885
|
-
|
|
2158
|
+
function validateStructuredContract(exec, ir) {
|
|
2159
|
+
if (!ir.constraints?.structuredOutput) {
|
|
2160
|
+
return { ok: true, response: exec.response };
|
|
2161
|
+
}
|
|
2162
|
+
const finish = (exec.response.finishReason ?? "").toLowerCase();
|
|
2163
|
+
if (finish === "max_tokens" || finish === "length") {
|
|
2164
|
+
return {
|
|
2165
|
+
ok: false,
|
|
2166
|
+
status: exec.status,
|
|
2167
|
+
errorType: "retryable",
|
|
2168
|
+
errorCode: "max_tokens_on_structured_output",
|
|
2169
|
+
message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
|
|
2170
|
+
raw: exec.response.raw
|
|
2171
|
+
};
|
|
2172
|
+
}
|
|
2173
|
+
if (!exec.response.text) {
|
|
2174
|
+
return { ok: true, response: exec.response };
|
|
2175
|
+
}
|
|
1886
2176
|
try {
|
|
1887
|
-
const parsed = JSON.parse(response.text);
|
|
1888
|
-
return { ...response, structuredOutput: parsed };
|
|
2177
|
+
const parsed = JSON.parse(exec.response.text);
|
|
2178
|
+
return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
|
|
1889
2179
|
} catch (err) {
|
|
1890
2180
|
return {
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
2181
|
+
ok: false,
|
|
2182
|
+
status: exec.status,
|
|
2183
|
+
errorType: "retryable",
|
|
2184
|
+
errorCode: "structured_output_parse_failed",
|
|
2185
|
+
message: err instanceof Error ? err.message : String(err),
|
|
2186
|
+
raw: exec.response.raw
|
|
1894
2187
|
};
|
|
1895
2188
|
}
|
|
1896
2189
|
}
|
|
2190
|
+
function normalizeFallbackReason(attempts) {
|
|
2191
|
+
const first = attempts.find((a) => a.status !== "success");
|
|
2192
|
+
if (!first) return void 0;
|
|
2193
|
+
const code = first.errorCode ?? "";
|
|
2194
|
+
if (code === "rate_limit_429" || code === "rate_limit") return "rate_limit";
|
|
2195
|
+
if (code === "max_tokens_on_structured_output" || code === "structured_output_parse_failed") {
|
|
2196
|
+
return "cliff";
|
|
2197
|
+
}
|
|
2198
|
+
if (code === "cost_cap_exceeded") return "cost_cap";
|
|
2199
|
+
return "provider_error";
|
|
2200
|
+
}
|
|
1897
2201
|
|
|
1898
2202
|
// src/oracle.ts
|
|
1899
2203
|
var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
|
|
@@ -1983,6 +2287,126 @@ function clamp(n) {
|
|
|
1983
2287
|
return Math.max(0, Math.min(1, n));
|
|
1984
2288
|
}
|
|
1985
2289
|
|
|
2290
|
+
// src/fallback.ts
|
|
2291
|
+
var STARTER_CHAINS = {
|
|
2292
|
+
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
2293
|
+
critique: [
|
|
2294
|
+
"claude-opus-4-7",
|
|
2295
|
+
"claude-sonnet-4-6",
|
|
2296
|
+
"gemini-2.5-pro"
|
|
2297
|
+
],
|
|
2298
|
+
// Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
|
|
2299
|
+
// to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
|
|
2300
|
+
plan: [
|
|
2301
|
+
"claude-sonnet-4-6",
|
|
2302
|
+
"claude-opus-4-7",
|
|
2303
|
+
"gemini-2.5-pro",
|
|
2304
|
+
"deepseek-v4-pro"
|
|
2305
|
+
],
|
|
2306
|
+
// Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
|
|
2307
|
+
// Flash floor for the open-posture chain.
|
|
2308
|
+
generate: [
|
|
2309
|
+
"claude-sonnet-4-6",
|
|
2310
|
+
"claude-haiku-4-5",
|
|
2311
|
+
"gemini-2.5-pro",
|
|
2312
|
+
"gemini-2.5-flash"
|
|
2313
|
+
],
|
|
2314
|
+
ask: [
|
|
2315
|
+
"claude-sonnet-4-6",
|
|
2316
|
+
"claude-haiku-4-5",
|
|
2317
|
+
"gemini-2.5-pro",
|
|
2318
|
+
"gemini-2.5-flash"
|
|
2319
|
+
],
|
|
2320
|
+
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
|
|
2321
|
+
// DeepSeek skipped (no brain evidence). Floor at Haiku.
|
|
2322
|
+
extract: [
|
|
2323
|
+
"claude-sonnet-4-6",
|
|
2324
|
+
"claude-haiku-4-5",
|
|
2325
|
+
"gemini-2.5-pro"
|
|
2326
|
+
],
|
|
2327
|
+
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
2328
|
+
transform: [
|
|
2329
|
+
"claude-sonnet-4-6",
|
|
2330
|
+
"claude-haiku-4-5",
|
|
2331
|
+
"gemini-2.5-pro",
|
|
2332
|
+
"gemini-2.5-flash"
|
|
2333
|
+
],
|
|
2334
|
+
// Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
|
|
2335
|
+
// Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
|
|
2336
|
+
// Haiku (reduced tool budget — cliff at 16 fires).
|
|
2337
|
+
hunt: [
|
|
2338
|
+
"gemini-2.5-flash",
|
|
2339
|
+
"gemini-2.5-pro",
|
|
2340
|
+
"claude-sonnet-4-6",
|
|
2341
|
+
"claude-haiku-4-5"
|
|
2342
|
+
],
|
|
2343
|
+
// Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
|
|
2344
|
+
// for quality safety; Flash-Lite emergency floor (onboarded s22).
|
|
2345
|
+
summarize: [
|
|
2346
|
+
"gemini-2.5-flash",
|
|
2347
|
+
"deepseek-v4-flash",
|
|
2348
|
+
"claude-haiku-4-5",
|
|
2349
|
+
"gemini-2.5-flash-lite"
|
|
2350
|
+
],
|
|
2351
|
+
// Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
|
|
2352
|
+
// Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
|
|
2353
|
+
classify: [
|
|
2354
|
+
"gemini-2.5-flash",
|
|
2355
|
+
"deepseek-v4-flash",
|
|
2356
|
+
"claude-haiku-4-5",
|
|
2357
|
+
"gemini-2.5-flash-lite"
|
|
2358
|
+
]
|
|
2359
|
+
};
|
|
2360
|
+
function getDefaultFallbackChain(opts) {
|
|
2361
|
+
const { archetype, primary, maxDepth = 3, policy } = opts;
|
|
2362
|
+
if (maxDepth < 1) {
|
|
2363
|
+
throw new Error(
|
|
2364
|
+
`getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
|
|
2365
|
+
);
|
|
2366
|
+
}
|
|
2367
|
+
const starter = STARTER_CHAINS[archetype];
|
|
2368
|
+
if (!starter) {
|
|
2369
|
+
throw new Error(
|
|
2370
|
+
`getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(STARTER_CHAINS).join(", ")}`
|
|
2371
|
+
);
|
|
2372
|
+
}
|
|
2373
|
+
let chain;
|
|
2374
|
+
if (primary) {
|
|
2375
|
+
chain = [primary, ...starter.filter((id) => id !== primary)];
|
|
2376
|
+
} else {
|
|
2377
|
+
chain = [...starter];
|
|
2378
|
+
}
|
|
2379
|
+
if (policy?.blockedModels && policy.blockedModels.length > 0) {
|
|
2380
|
+
const blocked = new Set(policy.blockedModels);
|
|
2381
|
+
chain = chain.filter((id) => !blocked.has(id));
|
|
2382
|
+
}
|
|
2383
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2384
|
+
const deduped = [];
|
|
2385
|
+
for (const id of chain) {
|
|
2386
|
+
if (!seen.has(id)) {
|
|
2387
|
+
seen.add(id);
|
|
2388
|
+
deduped.push(id);
|
|
2389
|
+
}
|
|
2390
|
+
}
|
|
2391
|
+
return deduped.slice(0, maxDepth);
|
|
2392
|
+
}
|
|
2393
|
+
function getStarterChain(archetype) {
|
|
2394
|
+
const chain = STARTER_CHAINS[archetype];
|
|
2395
|
+
if (!chain) {
|
|
2396
|
+
throw new Error(
|
|
2397
|
+
`getStarterChain: unknown archetype "${archetype}"`
|
|
2398
|
+
);
|
|
2399
|
+
}
|
|
2400
|
+
return [...chain];
|
|
2401
|
+
}
|
|
2402
|
+
function getAllStarterChains() {
|
|
2403
|
+
const out = {};
|
|
2404
|
+
for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
|
|
2405
|
+
out[archetype] = [...chain];
|
|
2406
|
+
}
|
|
2407
|
+
return out;
|
|
2408
|
+
}
|
|
2409
|
+
|
|
1986
2410
|
// src/index.ts
|
|
1987
2411
|
function compile2(ir, opts) {
|
|
1988
2412
|
const result = compile(ir, opts);
|
|
@@ -2007,7 +2431,10 @@ function compile2(ir, opts) {
|
|
|
2007
2431
|
configureBrain,
|
|
2008
2432
|
countTokens,
|
|
2009
2433
|
execute,
|
|
2434
|
+
getAllStarterChains,
|
|
2435
|
+
getDefaultFallbackChain,
|
|
2010
2436
|
getProfile,
|
|
2437
|
+
getStarterChain,
|
|
2011
2438
|
hashShape,
|
|
2012
2439
|
isArchetype,
|
|
2013
2440
|
learningKey,
|