@warmdrift/kgauto-compiler 2.0.0-alpha.8 → 2.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -36,7 +36,10 @@ __export(index_exports, {
36
36
  configureBrain: () => configureBrain,
37
37
  countTokens: () => countTokens,
38
38
  execute: () => execute,
39
+ getAllStarterChains: () => getAllStarterChains,
40
+ getDefaultFallbackChain: () => getDefaultFallbackChain,
39
41
  getProfile: () => getProfile,
42
+ getStarterChain: () => getStarterChain,
40
43
  hashShape: () => hashShape,
41
44
  isArchetype: () => isArchetype,
42
45
  learningKey: () => learningKey,
@@ -859,7 +862,24 @@ var PROFILES_RAW = [
859
862
  ],
860
863
  strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
861
864
  weaknesses: ["cost", "latency"],
862
- notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output."
865
+ notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
866
+ // Frontier perf. Drops on archetypes where parallel-tool throughput
867
+ // (hunt) or low-budget cost-sensitivity (classify/summarize) matters
868
+ // more than reasoning depth.
869
+ archetypePerf: {
870
+ critique: 10,
871
+ plan: 10,
872
+ generate: 9,
873
+ ask: 9,
874
+ extract: 9,
875
+ transform: 9,
876
+ hunt: 8,
877
+ // strong but Flash dominates parallel tool throughput
878
+ summarize: 8,
879
+ // overkill for tolerant archetype; cost-out of frontier
880
+ classify: 8
881
+ // overkill; brain-validated cheaper models cover this
882
+ }
863
883
  },
864
884
  {
865
885
  id: "claude-opus-4-6",
@@ -891,7 +911,20 @@ var PROFILES_RAW = [
891
911
  ],
892
912
  strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
893
913
  weaknesses: ["cost", "latency"],
894
- notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only)."
914
+ notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
915
+ // One notch below 4.7 across the board — extended-thinking edge does
916
+ // not flip any archetype ranking. Legacy: chains should prefer 4.7.
917
+ archetypePerf: {
918
+ critique: 9,
919
+ plan: 9,
920
+ generate: 9,
921
+ ask: 9,
922
+ extract: 9,
923
+ transform: 9,
924
+ hunt: 7,
925
+ summarize: 8,
926
+ classify: 8
927
+ }
895
928
  },
896
929
  {
897
930
  id: "claude-sonnet-4-6",
@@ -915,7 +948,23 @@ var PROFILES_RAW = [
915
948
  ],
916
949
  strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
917
950
  weaknesses: [],
918
- notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output."
951
+ notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
952
+ // Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
953
+ // in starter chains; tier 1 cross-provider for hunt/summarize/classify.
954
+ archetypePerf: {
955
+ ask: 9,
956
+ generate: 9,
957
+ plan: 9,
958
+ critique: 9,
959
+ extract: 9,
960
+ transform: 9,
961
+ hunt: 7,
962
+ // strong but Flash beats on parallel tool throughput
963
+ summarize: 8,
964
+ // overkill for tolerant archetype
965
+ classify: 8
966
+ // overkill
967
+ }
919
968
  },
920
969
  {
921
970
  id: "claude-haiku-4-5",
@@ -945,7 +994,23 @@ var PROFILES_RAW = [
945
994
  ],
946
995
  strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
947
996
  weaknesses: ["complex_reasoning", "large_tool_sets"],
948
- notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`."
997
+ notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
998
+ // Tier 1 cross-provider anchor for short-output chains (classify/
999
+ // summarize/extract/transform). Falls off on plan/critique where
1000
+ // reasoning depth matters; competes with Pro on cost+latency.
1001
+ archetypePerf: {
1002
+ classify: 8,
1003
+ summarize: 8,
1004
+ ask: 7,
1005
+ transform: 7,
1006
+ extract: 7,
1007
+ hunt: 6,
1008
+ // tool reliability drops at 16 — cliff guard fires
1009
+ generate: 6,
1010
+ plan: 5,
1011
+ critique: 4
1012
+ // reasoning depth gap vs Sonnet/Opus
1013
+ }
949
1014
  },
950
1015
  // ── Google ──
951
1016
  {
@@ -1023,7 +1088,131 @@ var PROFILES_RAW = [
1023
1088
  ],
1024
1089
  strengths: ["speed", "volume", "classification", "1m_context", "cost"],
1025
1090
  weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
1026
- notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs."
1091
+ notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
1092
+ // Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
1093
+ // 15-75 calls/step beats Sonnet — L-040), summarize, classify.
1094
+ archetypePerf: {
1095
+ hunt: 9,
1096
+ // L-040: parallel tool throughput 15-75/step
1097
+ classify: 7,
1098
+ // brain-validated, 218 rows
1099
+ summarize: 7,
1100
+ // brain-validated; cliff strips tools when present
1101
+ transform: 7,
1102
+ ask: 7,
1103
+ generate: 6,
1104
+ plan: 5,
1105
+ extract: 6,
1106
+ // alpha.8 MAX_TOKENS history on structured output
1107
+ critique: 4
1108
+ // reasoning shallower than Sonnet/Opus
1109
+ }
1110
+ },
1111
+ {
1112
+ // ── Gemini 2.5 Flash-Lite ──
1113
+ // Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
1114
+ // it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
1115
+ // stable. Positioned BELOW Flash on the cost/perf frontier:
1116
+ // input $0.10/M (Flash $0.30/M) — 3× cheaper
1117
+ // output $0.40/M (Flash $2.50/M) — 6× cheaper
1118
+ // cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
1119
+ // Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
1120
+ // is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
1121
+ // thresholds. The brain will validate/relax these as evidence accumulates
1122
+ // per (archetype, model) tuple. Currently ZERO brain rows for this model.
1123
+ id: "gemini-2.5-flash-lite",
1124
+ verifiedAgainstDocs: "2026-05-13",
1125
+ provider: "google",
1126
+ status: "current",
1127
+ maxContextTokens: 1048576,
1128
+ maxOutputTokens: 65536,
1129
+ maxTools: 128,
1130
+ parallelToolCalls: true,
1131
+ structuredOutput: "native",
1132
+ systemPromptMode: "separate",
1133
+ streaming: true,
1134
+ cliffs: [
1135
+ {
1136
+ metric: "input_tokens",
1137
+ threshold: 8e3,
1138
+ action: "downgrade_quality_warning",
1139
+ reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
1140
+ },
1141
+ {
1142
+ metric: "tool_count",
1143
+ threshold: 10,
1144
+ action: "drop_to_top_relevant",
1145
+ reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
1146
+ },
1147
+ {
1148
+ metric: "thinking_with_short_output",
1149
+ threshold: 1,
1150
+ action: "force_thinking_budget_zero",
1151
+ reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
1152
+ },
1153
+ {
1154
+ // Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
1155
+ // trust artifact, kgauto commit 3872832). Flash-Lite shares the
1156
+ // same architectural family — almost certainly inherits this cliff.
1157
+ // Ship the guard preemptively; brain telemetry confirms or relaxes.
1158
+ metric: "tool_count",
1159
+ threshold: 1,
1160
+ whenIntent: "summarize",
1161
+ action: "strip_tools",
1162
+ reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
1163
+ }
1164
+ ],
1165
+ costInputPer1m: 0.1,
1166
+ costOutputPer1m: 0.4,
1167
+ lowering: {
1168
+ ...GOOGLE_LOWERING_BASE,
1169
+ // Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
1170
+ // $0.10/M input. Material for repeat-prompt workloads (classify shape).
1171
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
1172
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
1173
+ },
1174
+ recovery: [
1175
+ {
1176
+ signal: "empty_response_after_tool",
1177
+ action: "retry_with_params",
1178
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1179
+ maxRetries: 1,
1180
+ reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
1181
+ },
1182
+ {
1183
+ signal: "empty_response",
1184
+ action: "retry_with_params",
1185
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1186
+ maxRetries: 1,
1187
+ reason: "Empty response \u2014 try with thinking off."
1188
+ },
1189
+ {
1190
+ signal: "malformed_function_call",
1191
+ action: "escalate",
1192
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
1193
+ }
1194
+ ],
1195
+ strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
1196
+ weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
1197
+ notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
1198
+ // Tier 3 emergency floor for summarize/classify chains. ZERO brain
1199
+ // rows — all values are starter hypotheses anchored to "smaller
1200
+ // sibling of Flash, at-or-below Flash perf on every archetype." The
1201
+ // first 50 brain rows per archetype will validate or relax these.
1202
+ archetypePerf: {
1203
+ classify: 6,
1204
+ // starter hypothesis — verify (Flash is 7, lite likely ≤)
1205
+ summarize: 6,
1206
+ // starter hypothesis — verify; cliff strips tools
1207
+ transform: 6,
1208
+ // starter hypothesis — verify
1209
+ ask: 5,
1210
+ hunt: 5,
1211
+ generate: 4,
1212
+ extract: 4,
1213
+ plan: 3,
1214
+ critique: 3
1215
+ }
1027
1216
  },
1028
1217
  {
1029
1218
  id: "gemini-2.5-pro",
@@ -1059,7 +1248,21 @@ var PROFILES_RAW = [
1059
1248
  }
1060
1249
  ],
1061
1250
  strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
1062
- weaknesses: ["pricing_above_200k"]
1251
+ weaknesses: ["pricing_above_200k"],
1252
+ // Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
1253
+ // Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
1254
+ archetypePerf: {
1255
+ critique: 9,
1256
+ plan: 9,
1257
+ ask: 8,
1258
+ generate: 8,
1259
+ extract: 8,
1260
+ transform: 8,
1261
+ hunt: 8,
1262
+ // tier 1 cross-provider for hunt chain
1263
+ summarize: 7,
1264
+ classify: 7
1265
+ }
1063
1266
  },
1064
1267
  {
1065
1268
  id: "gemini-3.1-pro-preview",
@@ -1097,7 +1300,23 @@ var PROFILES_RAW = [
1097
1300
  ],
1098
1301
  strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
1099
1302
  weaknesses: ["cost", "preview_status", "pricing_above_200k"],
1100
- notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA."
1303
+ notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
1304
+ // Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
1305
+ // coding / reasoning per Google's release notes. Preview status:
1306
+ // chains should stay on 2.5 Pro until GA. Starter hypothesis.
1307
+ archetypePerf: {
1308
+ critique: 10,
1309
+ // Google claims step-change on reasoning
1310
+ plan: 10,
1311
+ ask: 9,
1312
+ generate: 9,
1313
+ extract: 9,
1314
+ transform: 8,
1315
+ hunt: 9,
1316
+ // step-change agentic per Google
1317
+ summarize: 8,
1318
+ classify: 7
1319
+ }
1101
1320
  },
1102
1321
  // ── DeepSeek ──
1103
1322
  // 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
@@ -1137,7 +1356,24 @@ var PROFILES_RAW = [
1137
1356
  ],
1138
1357
  strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
1139
1358
  weaknesses: ["parallel_tools", "large_tool_sets"],
1140
- notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES."
1359
+ notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
1360
+ // Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
1361
+ // classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
1362
+ // Falls off on hunt (sequential tools — L-040) and reasoning depth.
1363
+ archetypePerf: {
1364
+ classify: 7,
1365
+ // brain-validated, 169 rows
1366
+ summarize: 7,
1367
+ // archetype-tolerant, no brain evidence yet
1368
+ ask: 6,
1369
+ transform: 6,
1370
+ generate: 5,
1371
+ plan: 5,
1372
+ extract: 5,
1373
+ critique: 4,
1374
+ hunt: 4
1375
+ // sequential tool calls only — L-040
1376
+ }
1141
1377
  },
1142
1378
  {
1143
1379
  id: "deepseek-v4-pro",
@@ -1173,7 +1409,22 @@ var PROFILES_RAW = [
1173
1409
  ],
1174
1410
  strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
1175
1411
  weaknesses: ["parallel_tools", "large_tool_sets"],
1176
- notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking."
1412
+ notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
1413
+ // Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
1414
+ // bumped one notch over V4-Flash; same parallel-tool ceiling.
1415
+ archetypePerf: {
1416
+ plan: 7,
1417
+ // §3.3 tier 3 for plan
1418
+ critique: 6,
1419
+ ask: 7,
1420
+ generate: 6,
1421
+ classify: 7,
1422
+ summarize: 7,
1423
+ extract: 6,
1424
+ transform: 6,
1425
+ hunt: 4
1426
+ // sequential tools — same as V4-Flash
1427
+ }
1177
1428
  }
1178
1429
  ];
1179
1430
  var ALIASES = {
@@ -1209,12 +1460,13 @@ function profilesByProvider(provider) {
1209
1460
  }
1210
1461
 
1211
1462
  // src/advisor.ts
1212
- function runAdvisor(ir, result, profile) {
1463
+ function runAdvisor(ir, result, profile, policy) {
1213
1464
  const out = [];
1214
1465
  out.push(...detectCachingOff(ir, profile));
1215
1466
  out.push(...detectSingleChunkSystem(ir, profile));
1216
1467
  out.push(...detectToolBloat(ir, result));
1217
1468
  out.push(...detectHistoryUncached(ir, profile));
1469
+ out.push(...detectSingleModelArray(ir, policy));
1218
1470
  return out;
1219
1471
  }
1220
1472
  function detectCachingOff(ir, profile) {
@@ -1286,6 +1538,20 @@ function detectHistoryUncached(ir, profile) {
1286
1538
  }
1287
1539
  ];
1288
1540
  }
1541
+ function detectSingleModelArray(ir, policy) {
1542
+ if (ir.models.length !== 1) return [];
1543
+ if (policy?.posture === "locked") return [];
1544
+ const only = ir.models[0];
1545
+ return [
1546
+ {
1547
+ level: "warn",
1548
+ code: "single-model-array",
1549
+ message: `\`ir.models\` has length 1 (only "${only}") and posture is not 'locked'. A single-model chain has no safety net \u2014 the first 429 / 5xx / cliff hits the user as a failure. Master plan \xA71.2 closes the reliability gap with a 2-step minimum.`,
1550
+ suggestion: "Use `getDefaultFallbackChain({ archetype: ir.intent.archetype, primary: '" + only + "', posture: 'preferred' })` for a user-anchored chain, or `getDefaultFallbackChain({ archetype, posture: 'open' })` for library-picked. If single-model is intentional (compliance/brand promise), set `policy.posture = 'locked'` to silence this rule.",
1551
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#single-model-array"
1552
+ }
1553
+ ];
1554
+ }
1289
1555
 
1290
1556
  // src/compile.ts
1291
1557
  var counter = 0;
@@ -1358,7 +1624,8 @@ function compile(ir, opts = {}) {
1358
1624
  tokensIn: inputTokens,
1359
1625
  diagnostics
1360
1626
  },
1361
- profile
1627
+ profile,
1628
+ opts.policy
1362
1629
  );
1363
1630
  return {
1364
1631
  handle,
@@ -1829,6 +2096,7 @@ async function call(ir, opts = {}) {
1829
2096
  cacheReadInputTokens: validated.response.tokens.cached,
1830
2097
  cacheCreationInputTokens: validated.response.tokens.cacheCreated
1831
2098
  });
2099
+ const fellOver = targetModel !== initial.target;
1832
2100
  return {
1833
2101
  handle: initial.handle,
1834
2102
  actualModel: targetModel,
@@ -1837,7 +2105,10 @@ async function call(ir, opts = {}) {
1837
2105
  response: validated.response,
1838
2106
  latencyMs: latencyMs2,
1839
2107
  mutationsApplied: activeCompile.mutationsApplied,
1840
- attempts
2108
+ attempts,
2109
+ servedBy: targetModel,
2110
+ fellOverFrom: fellOver ? initial.target : void 0,
2111
+ fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0
1841
2112
  };
1842
2113
  }
1843
2114
  attempts.push({
@@ -1916,6 +2187,17 @@ function validateStructuredContract(exec, ir) {
1916
2187
  };
1917
2188
  }
1918
2189
  }
2190
+ function normalizeFallbackReason(attempts) {
2191
+ const first = attempts.find((a) => a.status !== "success");
2192
+ if (!first) return void 0;
2193
+ const code = first.errorCode ?? "";
2194
+ if (code === "rate_limit_429" || code === "rate_limit") return "rate_limit";
2195
+ if (code === "max_tokens_on_structured_output" || code === "structured_output_parse_failed") {
2196
+ return "cliff";
2197
+ }
2198
+ if (code === "cost_cap_exceeded") return "cost_cap";
2199
+ return "provider_error";
2200
+ }
1919
2201
 
1920
2202
  // src/oracle.ts
1921
2203
  var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
@@ -2005,6 +2287,126 @@ function clamp(n) {
2005
2287
  return Math.max(0, Math.min(1, n));
2006
2288
  }
2007
2289
 
2290
+ // src/fallback.ts
2291
+ var STARTER_CHAINS = {
2292
+ // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
2293
+ critique: [
2294
+ "claude-opus-4-7",
2295
+ "claude-sonnet-4-6",
2296
+ "gemini-2.5-pro"
2297
+ ],
2298
+ // Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
2299
+ // to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
2300
+ plan: [
2301
+ "claude-sonnet-4-6",
2302
+ "claude-opus-4-7",
2303
+ "gemini-2.5-pro",
2304
+ "deepseek-v4-pro"
2305
+ ],
2306
+ // Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
2307
+ // Flash floor for the open-posture chain.
2308
+ generate: [
2309
+ "claude-sonnet-4-6",
2310
+ "claude-haiku-4-5",
2311
+ "gemini-2.5-pro",
2312
+ "gemini-2.5-flash"
2313
+ ],
2314
+ ask: [
2315
+ "claude-sonnet-4-6",
2316
+ "claude-haiku-4-5",
2317
+ "gemini-2.5-pro",
2318
+ "gemini-2.5-flash"
2319
+ ],
2320
+ // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
2321
+ // DeepSeek skipped (no brain evidence). Floor at Haiku.
2322
+ extract: [
2323
+ "claude-sonnet-4-6",
2324
+ "claude-haiku-4-5",
2325
+ "gemini-2.5-pro"
2326
+ ],
2327
+ // Forgiving archetype — Sonnet primary but Flash safely floors it.
2328
+ transform: [
2329
+ "claude-sonnet-4-6",
2330
+ "claude-haiku-4-5",
2331
+ "gemini-2.5-pro",
2332
+ "gemini-2.5-flash"
2333
+ ],
2334
+ // Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
2335
+ // Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
2336
+ // Haiku (reduced tool budget — cliff at 16 fires).
2337
+ hunt: [
2338
+ "gemini-2.5-flash",
2339
+ "gemini-2.5-pro",
2340
+ "claude-sonnet-4-6",
2341
+ "claude-haiku-4-5"
2342
+ ],
2343
+ // Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
2344
+ // for quality safety; Flash-Lite emergency floor (onboarded s22).
2345
+ summarize: [
2346
+ "gemini-2.5-flash",
2347
+ "deepseek-v4-flash",
2348
+ "claude-haiku-4-5",
2349
+ "gemini-2.5-flash-lite"
2350
+ ],
2351
+ // Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
2352
+ // Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
2353
+ classify: [
2354
+ "gemini-2.5-flash",
2355
+ "deepseek-v4-flash",
2356
+ "claude-haiku-4-5",
2357
+ "gemini-2.5-flash-lite"
2358
+ ]
2359
+ };
2360
+ function getDefaultFallbackChain(opts) {
2361
+ const { archetype, primary, maxDepth = 3, policy } = opts;
2362
+ if (maxDepth < 1) {
2363
+ throw new Error(
2364
+ `getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
2365
+ );
2366
+ }
2367
+ const starter = STARTER_CHAINS[archetype];
2368
+ if (!starter) {
2369
+ throw new Error(
2370
+ `getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(STARTER_CHAINS).join(", ")}`
2371
+ );
2372
+ }
2373
+ let chain;
2374
+ if (primary) {
2375
+ chain = [primary, ...starter.filter((id) => id !== primary)];
2376
+ } else {
2377
+ chain = [...starter];
2378
+ }
2379
+ if (policy?.blockedModels && policy.blockedModels.length > 0) {
2380
+ const blocked = new Set(policy.blockedModels);
2381
+ chain = chain.filter((id) => !blocked.has(id));
2382
+ }
2383
+ const seen = /* @__PURE__ */ new Set();
2384
+ const deduped = [];
2385
+ for (const id of chain) {
2386
+ if (!seen.has(id)) {
2387
+ seen.add(id);
2388
+ deduped.push(id);
2389
+ }
2390
+ }
2391
+ return deduped.slice(0, maxDepth);
2392
+ }
2393
+ function getStarterChain(archetype) {
2394
+ const chain = STARTER_CHAINS[archetype];
2395
+ if (!chain) {
2396
+ throw new Error(
2397
+ `getStarterChain: unknown archetype "${archetype}"`
2398
+ );
2399
+ }
2400
+ return [...chain];
2401
+ }
2402
+ function getAllStarterChains() {
2403
+ const out = {};
2404
+ for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
2405
+ out[archetype] = [...chain];
2406
+ }
2407
+ return out;
2408
+ }
2409
+
2008
2410
  // src/index.ts
2009
2411
  function compile2(ir, opts) {
2010
2412
  const result = compile(ir, opts);
@@ -2029,7 +2431,10 @@ function compile2(ir, opts) {
2029
2431
  configureBrain,
2030
2432
  countTokens,
2031
2433
  execute,
2434
+ getAllStarterChains,
2435
+ getDefaultFallbackChain,
2032
2436
  getProfile,
2437
+ getStarterChain,
2033
2438
  hashShape,
2034
2439
  isArchetype,
2035
2440
  learningKey,