@warmdrift/kgauto-compiler 2.0.0-alpha.20 → 2.0.0-alpha.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -25,6 +25,7 @@ __export(index_exports, {
25
25
  CallError: () => CallError,
26
26
  DIALECT_VERSION: () => DIALECT_VERSION,
27
27
  INTENT_ARCHETYPES: () => INTENT_ARCHETYPES,
28
+ MEASURED_GROUNDING_MIN_N: () => MEASURED_GROUNDING_MIN_N,
28
29
  PROVIDER_ENV_KEYS: () => PROVIDER_ENV_KEYS,
29
30
  allProfiles: () => allProfiles,
30
31
  bucketContext: () => bucketContext,
@@ -38,18 +39,25 @@ __export(index_exports, {
38
39
  countTokens: () => countTokens,
39
40
  execute: () => execute,
40
41
  getAllStarterChains: () => getAllStarterChains,
42
+ getAllStarterChainsWithGrounding: () => getAllStarterChainsWithGrounding,
41
43
  getArchetypePerfScore: () => getArchetypePerfScore,
42
44
  getDefaultFallbackChain: () => getDefaultFallbackChain,
45
+ getDefaultFallbackChainWithGrounding: () => getDefaultFallbackChainWithGrounding,
43
46
  getProfile: () => getProfile,
44
47
  getReachabilityDiagnostic: () => getReachabilityDiagnostic,
48
+ getSequentialStarterChain: () => getSequentialStarterChain,
49
+ getSequentialStarterChainWithGrounding: () => getSequentialStarterChainWithGrounding,
45
50
  getStarterChain: () => getStarterChain,
51
+ getStarterChainWithGrounding: () => getStarterChainWithGrounding,
46
52
  hashShape: () => hashShape,
47
53
  isArchetype: () => isArchetype,
54
+ isBrainQueryActiveFor: () => isBrainQueryActiveFor,
48
55
  isModelReachable: () => isModelReachable,
49
56
  isProviderReachable: () => isProviderReachable,
50
57
  learningKey: () => learningKey,
51
58
  loadAliasesFromBrain: () => loadAliasesFromBrain,
52
59
  loadArchetypePerfFromBrain: () => loadArchetypePerfFromBrain,
60
+ loadArchetypePerfNFromBrain: () => loadArchetypePerfNFromBrain,
53
61
  loadChainsFromBrain: () => loadChainsFromBrain,
54
62
  loadModelsFromBrain: () => loadModelsFromBrain,
55
63
  loadPricingFromBrain: () => loadPricingFromBrain,
@@ -1937,14 +1945,160 @@ function profilesByProvider(provider) {
1937
1945
  return PROFILES_RAW.filter((p) => p.provider === provider);
1938
1946
  }
1939
1947
 
1948
+ // src/brain-query.ts
1949
+ var FRESH_SNAPSHOT = {
1950
+ data: null,
1951
+ expiresAt: 0,
1952
+ refreshing: false,
1953
+ warned: false
1954
+ };
1955
+ var snapshot = { ...FRESH_SNAPSHOT };
1956
+ var runtime;
1957
+ function configureBrainQuery(rt) {
1958
+ runtime = rt;
1959
+ snapshot = { ...FRESH_SNAPSHOT };
1960
+ }
1961
+ function createBrainQueryCache(opts) {
1962
+ return () => {
1963
+ const rt = runtime;
1964
+ if (!rt || !rt.enabledTables.has(opts.table)) {
1965
+ return opts.bundledFallback();
1966
+ }
1967
+ const now = Date.now();
1968
+ const stale = snapshot.expiresAt <= now;
1969
+ if (stale && !snapshot.refreshing) {
1970
+ snapshot.refreshing = true;
1971
+ void asyncRefresh(rt);
1972
+ }
1973
+ if (snapshot.data) {
1974
+ const rows = snapshot.data[opts.table];
1975
+ if (Array.isArray(rows) && rows.length > 0) {
1976
+ try {
1977
+ return opts.mapRows(rows);
1978
+ } catch {
1979
+ return opts.bundledFallback();
1980
+ }
1981
+ }
1982
+ }
1983
+ return opts.bundledFallback();
1984
+ };
1985
+ }
1986
+ var pendingRefresh;
1987
+ async function asyncRefresh(rt) {
1988
+ const promise = doRefresh(rt);
1989
+ pendingRefresh = promise;
1990
+ try {
1991
+ await promise;
1992
+ } finally {
1993
+ if (pendingRefresh === promise) pendingRefresh = void 0;
1994
+ }
1995
+ }
1996
+ var DEFAULT_CONFIG_URL = "https://kgauto-dashboard.vercel.app/api/kgauto-v2/config";
1997
+ async function doRefresh(rt) {
1998
+ const url = rt.configEndpoint ?? DEFAULT_CONFIG_URL;
1999
+ try {
2000
+ const res = await rt.fetchImpl(url, { method: "GET" });
2001
+ if (!res.ok) {
2002
+ throw new Error(`brain-query ${res.status}: ${res.statusText}`);
2003
+ }
2004
+ const body = await res.json();
2005
+ if (runtime !== rt) return;
2006
+ snapshot = {
2007
+ data: body,
2008
+ expiresAt: Date.now() + rt.ttlMs,
2009
+ refreshing: false,
2010
+ warned: snapshot.warned
2011
+ };
2012
+ } catch (err) {
2013
+ if (runtime !== rt) return;
2014
+ snapshot.refreshing = false;
2015
+ snapshot.expiresAt = Date.now() + rt.ttlMs;
2016
+ if (!snapshot.warned) {
2017
+ snapshot.warned = true;
2018
+ (rt.onError ?? defaultOnError)(err);
2019
+ }
2020
+ }
2021
+ }
2022
+ function defaultOnError(err) {
2023
+ console.warn("[kgauto] brain-query failed (using bundled fallback):", err);
2024
+ }
2025
+ function isBrainQueryActiveFor(table) {
2026
+ return runtime !== void 0 && runtime.enabledTables.has(table);
2027
+ }
2028
+
2029
+ // src/archetype-perf-brain.ts
2030
+ function isPerfRow(x) {
2031
+ if (!x || typeof x !== "object") return false;
2032
+ const r = x;
2033
+ return typeof r.model_id === "string" && typeof r.archetype === "string" && typeof r.perf_score === "number";
2034
+ }
2035
+ function mapRowsToPerfMap(rows) {
2036
+ const out = /* @__PURE__ */ new Map();
2037
+ for (const row of rows) {
2038
+ if (!isPerfRow(row)) continue;
2039
+ const existing = out.get(row.model_id) ?? {};
2040
+ existing[row.archetype] = row.perf_score;
2041
+ out.set(row.model_id, existing);
2042
+ }
2043
+ return out;
2044
+ }
2045
+ function mapRowsToNMap(rows) {
2046
+ const out = /* @__PURE__ */ new Map();
2047
+ for (const row of rows) {
2048
+ if (!isPerfRow(row)) continue;
2049
+ if (typeof row.n !== "number") continue;
2050
+ const existing = out.get(row.model_id) ?? {};
2051
+ existing[row.archetype] = row.n;
2052
+ out.set(row.model_id, existing);
2053
+ }
2054
+ return out;
2055
+ }
2056
+ function bundledArchetypePerf() {
2057
+ const out = /* @__PURE__ */ new Map();
2058
+ for (const profile of allProfiles()) {
2059
+ if (profile.archetypePerf) out.set(profile.id, profile.archetypePerf);
2060
+ }
2061
+ return out;
2062
+ }
2063
+ function bundledArchetypePerfN() {
2064
+ return /* @__PURE__ */ new Map();
2065
+ }
2066
+ var loadArchetypePerfFromBrain = createBrainQueryCache({
2067
+ table: "kgauto_archetype_perf",
2068
+ mapRows: mapRowsToPerfMap,
2069
+ bundledFallback: bundledArchetypePerf
2070
+ });
2071
+ var loadArchetypePerfNFromBrain = createBrainQueryCache(
2072
+ {
2073
+ table: "kgauto_archetype_perf",
2074
+ mapRows: mapRowsToNMap,
2075
+ bundledFallback: bundledArchetypePerfN
2076
+ }
2077
+ );
2078
+ var MEASURED_GROUNDING_MIN_N = 10;
2079
+ function getArchetypePerfScore(modelId, archetype) {
2080
+ const score = loadArchetypePerfFromBrain().get(modelId)?.[archetype] ?? 5;
2081
+ const n = loadArchetypePerfNFromBrain().get(modelId)?.[archetype] ?? 0;
2082
+ const grounding = n >= MEASURED_GROUNDING_MIN_N ? "measured" : "judgment";
2083
+ return { score, n, grounding };
2084
+ }
2085
+
1940
2086
  // src/advisor.ts
1941
- function runAdvisor(ir, result, profile, policy) {
2087
+ var QUALITY_FLOOR_FOR_RECOMMENDATION = 6;
2088
+ var TIER_DOWN_COST_RATIO = 0.5;
2089
+ var COST_MISMATCHED_CHOSEN_SCORE_CEILING = 7;
2090
+ function runAdvisor(ir, result, profile, policy, phase2) {
1942
2091
  const out = [];
1943
2092
  out.push(...detectCachingOff(ir, profile));
1944
2093
  out.push(...detectSingleChunkSystem(ir, profile));
1945
2094
  out.push(...detectToolBloat(ir, result));
1946
2095
  out.push(...detectHistoryUncached(ir, profile));
1947
2096
  out.push(...detectSingleModelArray(ir, policy));
2097
+ if (policy?.posture !== "locked") {
2098
+ out.push(...detectCostMismatchedArchetype(ir, profile, phase2));
2099
+ out.push(...detectModelStaleEvidence(ir, profile));
2100
+ out.push(...detectTierDown(ir, profile, phase2));
2101
+ }
1948
2102
  return out;
1949
2103
  }
1950
2104
  function detectCachingOff(ir, profile) {
@@ -2030,6 +2184,91 @@ function detectSingleModelArray(ir, policy) {
2030
2184
  }
2031
2185
  ];
2032
2186
  }
2187
+ function detectCostMismatchedArchetype(ir, profile, phase2) {
2188
+ if (!phase2 || phase2.fallbackChain.length === 0) return [];
2189
+ if (!phase2.profileResolver) return [];
2190
+ const archetype = ir.intent.archetype;
2191
+ const chosenScore = getArchetypePerfScore(profile.id, archetype);
2192
+ const chosenHasRoomToGrow = chosenScore.grounding === "judgment" || chosenScore.score < COST_MISMATCHED_CHOSEN_SCORE_CEILING;
2193
+ if (!chosenHasRoomToGrow) return [];
2194
+ let bestAlt = null;
2195
+ for (const altId of phase2.fallbackChain) {
2196
+ const altProfile = phase2.profileResolver(altId);
2197
+ if (!altProfile) continue;
2198
+ if (altProfile.id === profile.id) continue;
2199
+ const altScore = getArchetypePerfScore(altProfile.id, archetype);
2200
+ if (altScore.score < QUALITY_FLOOR_FOR_RECOMMENDATION) continue;
2201
+ if (altScore.score < chosenScore.score) continue;
2202
+ if (altProfile.costInputPer1m >= profile.costInputPer1m) continue;
2203
+ if (!bestAlt || altScore.score > bestAlt.score.score || altScore.score === bestAlt.score.score && altProfile.costInputPer1m < bestAlt.profile.costInputPer1m) {
2204
+ bestAlt = { id: altId, profile: altProfile, score: altScore };
2205
+ }
2206
+ }
2207
+ if (!bestAlt) return [];
2208
+ const tierDownWouldFire = bestAlt.score.grounding === "measured" && bestAlt.profile.costInputPer1m <= profile.costInputPer1m * TIER_DOWN_COST_RATIO;
2209
+ if (tierDownWouldFire) return [];
2210
+ const chosenGrounding = chosenScore.grounding === "judgment" ? `archetypePerf.${archetype}=judgment` : `archetypePerf.${archetype}=${chosenScore.score}`;
2211
+ const altGrounding = bestAlt.score.grounding === "measured" ? `archetypePerf.${archetype}=${bestAlt.score.score}, measured, n=${bestAlt.score.n}` : `archetypePerf.${archetype}=${bestAlt.score.score}, judgment`;
2212
+ return [
2213
+ {
2214
+ level: "warn",
2215
+ code: "cost-mismatched-archetype",
2216
+ message: `Cost-mismatched-archetype: target=${profile.id} (${chosenGrounding}) selected for ${archetype}. Alternative ${bestAlt.id} (${altGrounding}) is cheaper ($${bestAlt.profile.costInputPer1m}/$${bestAlt.profile.costOutputPer1m} vs $${profile.costInputPer1m}/$${profile.costOutputPer1m} per 1M) at equal-or-better quality.`,
2217
+ suggestion: `Consider declaring \`${bestAlt.id}\` as the primary model for this archetype, or relax to posture='open' to let kgauto select among the chain. If the chosen model is required for compliance/brand reasons, set \`policy.posture = 'locked'\` to silence this rule.`,
2218
+ recommendationType: profile.provider === bestAlt.profile.provider ? "tier-down" : "model-swap",
2219
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
2220
+ }
2221
+ ];
2222
+ }
2223
+ function detectModelStaleEvidence(ir, profile) {
2224
+ if (!isBrainQueryActiveFor("kgauto_archetype_perf")) return [];
2225
+ const archetype = ir.intent.archetype;
2226
+ const chosen = getArchetypePerfScore(profile.id, archetype);
2227
+ if (chosen.grounding !== "judgment") return [];
2228
+ return [
2229
+ {
2230
+ level: "info",
2231
+ code: "model-stale-evidence",
2232
+ message: `Model-stale-evidence: target=${profile.id} archetype=${archetype} is judgment-grounded (n=${chosen.n}) despite brain-query mode being active. Measurement substrate is wired but the brain hasn't accumulated >=10 outcomes for this (model, archetype) tuple yet \u2014 routing decisions remain pre-measured for this slot.`,
2233
+ suggestion: "Verify that `record()` is being called on every call() outcome with the appropriate `actualModel` and `mutationsApplied` fields. Once the brain accumulates n>=10 rows on this tuple, the score promotes from judgment to measured automatically (5-min SWR cache). No code change required from your side \u2014 this is the substrate signaling the gap.",
2234
+ recommendationType: "prompt-fix",
2235
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
2236
+ }
2237
+ ];
2238
+ }
2239
+ function detectTierDown(ir, profile, phase2) {
2240
+ if (!phase2 || phase2.fallbackChain.length === 0) return [];
2241
+ if (!phase2.profileResolver) return [];
2242
+ const archetype = ir.intent.archetype;
2243
+ const chosenScore = getArchetypePerfScore(profile.id, archetype);
2244
+ const chosenCost = profile.costInputPer1m;
2245
+ let bestAlt = null;
2246
+ for (const altId of phase2.fallbackChain) {
2247
+ const altProfile = phase2.profileResolver(altId);
2248
+ if (!altProfile) continue;
2249
+ if (altProfile.id === profile.id) continue;
2250
+ const altScore = getArchetypePerfScore(altProfile.id, archetype);
2251
+ if (altScore.grounding !== "measured") continue;
2252
+ if (altScore.score < QUALITY_FLOOR_FOR_RECOMMENDATION) continue;
2253
+ if (altScore.score < chosenScore.score) continue;
2254
+ if (altProfile.costInputPer1m > chosenCost * TIER_DOWN_COST_RATIO) continue;
2255
+ if (!bestAlt || altProfile.costInputPer1m < bestAlt.profile.costInputPer1m || altProfile.costInputPer1m === bestAlt.profile.costInputPer1m && altScore.score > bestAlt.score.score) {
2256
+ bestAlt = { id: altId, profile: altProfile, score: altScore };
2257
+ }
2258
+ }
2259
+ if (!bestAlt) return [];
2260
+ const chosenDesc = chosenScore.grounding === "measured" ? `archetypePerf.${archetype}=${chosenScore.score} (measured, n=${chosenScore.n})` : `archetypePerf.${archetype}=${chosenScore.score} (${chosenScore.grounding})`;
2261
+ return [
2262
+ {
2263
+ level: "warn",
2264
+ code: "tier-down",
2265
+ message: `Tier-down: target=${profile.id} (${chosenDesc}) selected for ${archetype}. Brain shows ${bestAlt.id} delivers equal-or-better quality (archetypePerf.${archetype}=${bestAlt.score.score}, measured, n=${bestAlt.score.n}) at $${bestAlt.profile.costInputPer1m}/$${bestAlt.profile.costOutputPer1m} per 1M vs $${profile.costInputPer1m}/$${profile.costOutputPer1m} \u2014 a measured tier-down opportunity.`,
2266
+ suggestion: `Move \`${bestAlt.id}\` to primary for this archetype. The brain has n=${bestAlt.score.n} measured outcomes backing the recommendation; this is data, not opinion. If posture='locked' is required (compliance/brand promise), set it explicitly to silence this rule.`,
2267
+ recommendationType: "tier-down",
2268
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
2269
+ }
2270
+ ];
2271
+ }
2033
2272
 
2034
2273
  // src/compile.ts
2035
2274
  var counter = 0;
@@ -2105,6 +2344,13 @@ function compile(ir, opts = {}) {
2105
2344
  description: "ir.constraints.toolOrchestration='sequential' selected the DeepSeek-tier-0 hunt chain overlay (L-040 parallel-tool cliff doesn't apply at single-step granularity)."
2106
2345
  });
2107
2346
  }
2347
+ const phase2ProfileResolver = opts.profileResolver ? (id) => {
2348
+ try {
2349
+ return opts.profileResolver(id);
2350
+ } catch {
2351
+ return void 0;
2352
+ }
2353
+ } : tryGetProfile;
2108
2354
  const advisories = runAdvisor(
2109
2355
  ir,
2110
2356
  {
@@ -2114,7 +2360,11 @@ function compile(ir, opts = {}) {
2114
2360
  diagnostics
2115
2361
  },
2116
2362
  profile,
2117
- opts.policy
2363
+ opts.policy,
2364
+ {
2365
+ fallbackChain,
2366
+ profileResolver: phase2ProfileResolver
2367
+ }
2118
2368
  );
2119
2369
  return {
2120
2370
  handle,
@@ -2167,84 +2417,6 @@ function validateFinalFit(ir, profile, tokens) {
2167
2417
  }
2168
2418
  }
2169
2419
 
2170
- // src/brain-query.ts
2171
- var FRESH_SNAPSHOT = {
2172
- data: null,
2173
- expiresAt: 0,
2174
- refreshing: false,
2175
- warned: false
2176
- };
2177
- var snapshot = { ...FRESH_SNAPSHOT };
2178
- var runtime;
2179
- function configureBrainQuery(rt) {
2180
- runtime = rt;
2181
- snapshot = { ...FRESH_SNAPSHOT };
2182
- }
2183
- function createBrainQueryCache(opts) {
2184
- return () => {
2185
- const rt = runtime;
2186
- if (!rt || !rt.enabledTables.has(opts.table)) {
2187
- return opts.bundledFallback();
2188
- }
2189
- const now = Date.now();
2190
- const stale = snapshot.expiresAt <= now;
2191
- if (stale && !snapshot.refreshing) {
2192
- snapshot.refreshing = true;
2193
- void asyncRefresh(rt);
2194
- }
2195
- if (snapshot.data) {
2196
- const rows = snapshot.data[opts.table];
2197
- if (Array.isArray(rows) && rows.length > 0) {
2198
- try {
2199
- return opts.mapRows(rows);
2200
- } catch {
2201
- return opts.bundledFallback();
2202
- }
2203
- }
2204
- }
2205
- return opts.bundledFallback();
2206
- };
2207
- }
2208
- var pendingRefresh;
2209
- async function asyncRefresh(rt) {
2210
- const promise = doRefresh(rt);
2211
- pendingRefresh = promise;
2212
- try {
2213
- await promise;
2214
- } finally {
2215
- if (pendingRefresh === promise) pendingRefresh = void 0;
2216
- }
2217
- }
2218
- var DEFAULT_CONFIG_URL = "https://kgauto-dashboard.vercel.app/api/kgauto-v2/config";
2219
- async function doRefresh(rt) {
2220
- const url = rt.configEndpoint ?? DEFAULT_CONFIG_URL;
2221
- try {
2222
- const res = await rt.fetchImpl(url, { method: "GET" });
2223
- if (!res.ok) {
2224
- throw new Error(`brain-query ${res.status}: ${res.statusText}`);
2225
- }
2226
- const body = await res.json();
2227
- if (runtime !== rt) return;
2228
- snapshot = {
2229
- data: body,
2230
- expiresAt: Date.now() + rt.ttlMs,
2231
- refreshing: false,
2232
- warned: snapshot.warned
2233
- };
2234
- } catch (err) {
2235
- if (runtime !== rt) return;
2236
- snapshot.refreshing = false;
2237
- snapshot.expiresAt = Date.now() + rt.ttlMs;
2238
- if (!snapshot.warned) {
2239
- snapshot.warned = true;
2240
- (rt.onError ?? defaultOnError)(err);
2241
- }
2242
- }
2243
- }
2244
- function defaultOnError(err) {
2245
- console.warn("[kgauto] brain-query failed (using bundled fallback):", err);
2246
- }
2247
-
2248
2420
  // src/pricing-brain.ts
2249
2421
  function isPricingRow(x) {
2250
2422
  if (!x || typeof x !== "object") return false;
@@ -2898,101 +3070,119 @@ var loadChainsFromBrain = createBrainQueryCache({
2898
3070
  });
2899
3071
 
2900
3072
  // src/fallback.ts
2901
- var STARTER_CHAINS = {
3073
+ var STARTER_CHAINS_GROUNDED = {
2902
3074
  // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
2903
- // alpha.16: gpt-5.5 appended as third-provider critique floor (frontier-tier,
2904
- // archetypePerf=9). Cross-provider-tail invariant has somewhere to land when
2905
- // both Anthropic + Google are unreachable (consumer adds only OpenAI key).
2906
3075
  critique: [
2907
- "claude-opus-4-7",
2908
- "claude-sonnet-4-6",
2909
- "gemini-2.5-pro",
2910
- "gpt-5.5"
3076
+ { id: "claude-opus-4-7", grounding: "judgment", reason: "Highest reasoning bar, no degradation tier \u2014 engineer pick, awaiting measured backing" },
3077
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Same-provider walk-down from Opus on 429" },
3078
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor in similar quality bracket" },
3079
+ { id: "gpt-5.5", grounding: "judgment", reason: "alpha.16: third-provider frontier-tier floor (archetypePerf=9)" }
2911
3080
  ],
2912
- // Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
2913
- // to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
3081
+ // Reasoning matters — Sonnet primary; walk UP to Opus on 429.
2914
3082
  plan: [
2915
- "claude-sonnet-4-6",
2916
- "claude-opus-4-7",
2917
- "gemini-2.5-pro",
2918
- "deepseek-v4-pro"
3083
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Reasoning + cost balance \u2014 engineer pick" },
3084
+ { id: "claude-opus-4-7", grounding: "judgment", reason: 'Same-provider walk-UP on 429 (rare exception to "always cheaper")' },
3085
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
3086
+ { id: "deepseek-v4-pro", grounding: "judgment", reason: "Tier 3 cost floor \u2014 no brain evidence yet" }
2919
3087
  ],
2920
- // Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
2921
- // gpt-5.4-mini as third-provider tail (alpha.16 — closes the mono-Anthropic
2922
- // gap when consumer has only ANTHROPIC + OPENAI keys; archetypePerf=7).
3088
+ // Quality + cost match.
2923
3089
  generate: [
2924
- "claude-sonnet-4-6",
2925
- "claude-haiku-4-5",
2926
- "gemini-2.5-pro",
2927
- "gpt-5.4-mini"
3090
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality + cost match \u2014 engineer pick" },
3091
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
3092
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
3093
+ { id: "gpt-5.4-mini", grounding: "judgment", reason: "alpha.16: third-provider tail (archetypePerf=7) \u2014 closes mono-Anthropic gap" }
2928
3094
  ],
3095
+ // ask::sonnet — STARTER_CHAINS calls this "Quality + cost match" but
3096
+ // tt-intel s78 prod data showed 27% empty rate. Labeled 'judgment' until
3097
+ // evidence either validates or refutes the placement.
2929
3098
  ask: [
2930
- "claude-sonnet-4-6",
2931
- "claude-haiku-4-5",
2932
- "gemini-2.5-pro",
2933
- "gpt-5.4-mini"
3099
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality + cost match \u2014 engineer pick. NOTE: tt-intel s78 prod showed 27% empty rate; placement awaits measurement validation" },
3100
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
3101
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
3102
+ { id: "gpt-5.4-mini", grounding: "judgment", reason: "alpha.16: third-provider tail (archetypePerf=7)" }
2934
3103
  ],
2935
- // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
2936
- // DeepSeek skipped (no brain evidence). Floor at Haiku. alpha.16: gpt-5.4
2937
- // appended as third-provider extract floor (archetypePerf=8, native
2938
- // structured-output support).
3104
+ // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff,
3105
+ // capability-fact); DeepSeek skipped (no brain evidence).
2939
3106
  extract: [
2940
- "claude-sonnet-4-6",
2941
- "claude-haiku-4-5",
2942
- "gemini-2.5-pro",
2943
- "gpt-5.4"
3107
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Reliable structured-output anchor \u2014 engineer pick" },
3108
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down with native structured output" },
3109
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor with structured-output support" },
3110
+ { id: "gpt-5.4", grounding: "capability-fact", reason: "alpha.16: third-provider floor \u2014 native structured-output capability (archetypePerf=8)" }
2944
3111
  ],
2945
3112
  // Forgiving archetype — Sonnet primary but Flash safely floors it.
2946
3113
  transform: [
2947
- "claude-sonnet-4-6",
2948
- "claude-haiku-4-5",
2949
- "gemini-2.5-pro",
2950
- "gemini-2.5-flash"
3114
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality anchor \u2014 engineer pick" },
3115
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
3116
+ { id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
3117
+ { id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost floor \u2014 forgiving archetype tolerates Flash" }
2951
3118
  ],
2952
- // Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
2953
- // Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
2954
- // Haiku (reduced tool budget — cliff at 16 fires). This is the
2955
- // `toolOrchestration: 'parallel'` (default) hunt chain. The sequential
2956
- // variant lives in STARTER_CHAINS_BY_MODE.hunt.sequential below — see
2957
- // alpha.20 E3 / interfaces/kgauto.md `sequential-agentic-hunt-mode`.
3119
+ // Parallel-tool throughput champion Flash leads on the L-040 cliff
3120
+ // (capability-fact: Flash 15-75 parallel calls/step vs DeepSeek 7-8).
2958
3121
  hunt: [
2959
- "gemini-2.5-flash",
2960
- "gemini-2.5-pro",
2961
- "claude-sonnet-4-6",
2962
- "claude-haiku-4-5"
3122
+ { id: "gemini-2.5-flash", grounding: "capability-fact", reason: "L-040 parallel-tool throughput champion (15-75 calls/step)" },
3123
+ { id: "gemini-2.5-pro", grounding: "capability-fact", reason: "Cross-provider tier 1 with strong parallel-tool support" },
3124
+ { id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality safety net for blocked-Flash case" },
3125
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Reduced tool budget \u2014 cliff at 16 fires" }
2963
3126
  ],
2964
- // Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
2965
- // for quality safety; Flash-Lite emergency floor (onboarded s22).
3127
+ // Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1.
2966
3128
  summarize: [
2967
- "gemini-2.5-flash",
2968
- "deepseek-v4-flash",
2969
- "claude-haiku-4-5",
2970
- "gemini-2.5-flash-lite"
3129
+ { id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost-sensitive primary \u2014 engineer pick" },
3130
+ { id: "deepseek-v4-flash", grounding: "measured", reason: "Brain-validated tier 1 for cost-sensitive summarize workloads", n: 169 },
3131
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Quality safety net" },
3132
+ { id: "gemini-2.5-flash-lite", grounding: "judgment", reason: "Emergency floor \u2014 onboarded s22, no brain evidence yet" }
2971
3133
  ],
2972
- // Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
2973
- // Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
3134
+ // Brain-validated DeepSeek tier 1 (169 rows, 0% empty rate).
2974
3135
  classify: [
2975
- "gemini-2.5-flash",
2976
- "deepseek-v4-flash",
2977
- "claude-haiku-4-5",
2978
- "gemini-2.5-flash-lite"
3136
+ { id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost-sensitive primary \u2014 engineer pick" },
3137
+ { id: "deepseek-v4-flash", grounding: "measured", reason: "Brain-validated tier 1 (169 rows, 0% empty rate)", n: 169 },
3138
+ { id: "claude-haiku-4-5", grounding: "judgment", reason: "Quality safety net" },
3139
+ { id: "gemini-2.5-flash-lite", grounding: "judgment", reason: "Cache-discount 10\xD7 floor for repeat-prompt workloads" }
2979
3140
  ]
2980
3141
  };
2981
- var STARTER_CHAINS_BY_MODE = {
3142
+ var STARTER_CHAINS = (() => {
3143
+ const out = {};
3144
+ for (const [archetype, entries] of Object.entries(STARTER_CHAINS_GROUNDED)) {
3145
+ out[archetype] = entries.map((e) => e.id);
3146
+ }
3147
+ return out;
3148
+ })();
3149
+ var STARTER_CHAINS_BY_MODE_GROUNDED = {
2982
3150
  hunt: {
2983
3151
  sequential: [
2984
- // V4-Pro: cheap + good reasoning at single-step granularity; no
2985
- // L-040 cliff applies when consumer commits to sequential.
2986
- "deepseek-v4-pro",
2987
- // V4-Flash: cheapest viable; sibling-provider fallback.
2988
- "deepseek-v4-flash",
2989
- // Cross-provider safety net — Sonnet handles sequential agentic loops
2990
- // cleanly; Pro as third-provider tail when no DeepSeek key reachable.
2991
- "claude-sonnet-4-6",
2992
- "gemini-2.5-pro"
3152
+ {
3153
+ id: "deepseek-v4-pro",
3154
+ grounding: "judgment",
3155
+ reason: "alpha.20 E3: cheap + good reasoning at single-step granularity; L-040 cliff silenced when sequential \u2014 hypothesis not yet measured"
3156
+ },
3157
+ {
3158
+ id: "deepseek-v4-flash",
3159
+ grounding: "judgment",
3160
+ reason: "Cheapest viable; sibling-provider fallback"
3161
+ },
3162
+ {
3163
+ id: "claude-sonnet-4-6",
3164
+ grounding: "judgment",
3165
+ reason: "Cross-provider safety net \u2014 Sonnet handles sequential agentic loops cleanly"
3166
+ },
3167
+ {
3168
+ id: "gemini-2.5-pro",
3169
+ grounding: "judgment",
3170
+ reason: "Third-provider tail when no DeepSeek key reachable"
3171
+ }
2993
3172
  ]
2994
3173
  }
2995
3174
  };
3175
+ var STARTER_CHAINS_BY_MODE = (() => {
3176
+ const out = {};
3177
+ for (const [archetype, modes] of Object.entries(STARTER_CHAINS_BY_MODE_GROUNDED)) {
3178
+ if (modes?.sequential) {
3179
+ out[archetype] = {
3180
+ sequential: modes.sequential.map((e) => e.id)
3181
+ };
3182
+ }
3183
+ }
3184
+ return out;
3185
+ })();
2996
3186
  function resolveStarterForMode(archetype, toolOrchestration, allChains) {
2997
3187
  if (toolOrchestration === "sequential") {
2998
3188
  const overlay = STARTER_CHAINS_BY_MODE[archetype]?.sequential;
@@ -3054,6 +3244,114 @@ function getAllStarterChains() {
3054
3244
  }
3055
3245
  return out;
3056
3246
  }
3247
+ function getSequentialStarterChain(archetype) {
3248
+ const overlay = STARTER_CHAINS_BY_MODE[archetype]?.sequential;
3249
+ return overlay ? [...overlay] : void 0;
3250
+ }
3251
+ function copyEntry(e) {
3252
+ const out = { id: e.id, grounding: e.grounding };
3253
+ if (e.reason !== void 0) out.reason = e.reason;
3254
+ if (e.n !== void 0) out.n = e.n;
3255
+ return out;
3256
+ }
3257
+ function lookupStaticEntry(id, archetype) {
3258
+ const archetypeEntries = STARTER_CHAINS_GROUNDED[archetype];
3259
+ if (archetypeEntries) {
3260
+ const hit = archetypeEntries.find((e) => e.id === id);
3261
+ if (hit) return hit;
3262
+ }
3263
+ const seqOverlay = STARTER_CHAINS_BY_MODE_GROUNDED[archetype]?.sequential;
3264
+ if (seqOverlay) {
3265
+ const hit = seqOverlay.find((e) => e.id === id);
3266
+ if (hit) return hit;
3267
+ }
3268
+ return void 0;
3269
+ }
3270
+ function resolveGroundedChainForArchetype(archetype, toolOrchestration) {
3271
+ if (toolOrchestration === "sequential") {
3272
+ const overlay = STARTER_CHAINS_BY_MODE_GROUNDED[archetype]?.sequential;
3273
+ if (overlay) return overlay.map(copyEntry);
3274
+ }
3275
+ const allChains = loadChainsFromBrain();
3276
+ const ids = allChains[archetype];
3277
+ if (!ids) return void 0;
3278
+ return ids.map((id) => {
3279
+ const known = lookupStaticEntry(id, archetype);
3280
+ if (known) return copyEntry(known);
3281
+ return { id, grounding: "judgment" };
3282
+ });
3283
+ }
3284
+ function getDefaultFallbackChainWithGrounding(opts) {
3285
+ const {
3286
+ archetype,
3287
+ primary,
3288
+ maxDepth = 3,
3289
+ policy,
3290
+ reachability,
3291
+ toolOrchestration
3292
+ } = opts;
3293
+ if (maxDepth < 1) {
3294
+ throw new Error(
3295
+ `getDefaultFallbackChainWithGrounding: maxDepth must be >= 1, got ${maxDepth}`
3296
+ );
3297
+ }
3298
+ const starter = resolveGroundedChainForArchetype(archetype, toolOrchestration);
3299
+ if (!starter) {
3300
+ throw new Error(
3301
+ `getDefaultFallbackChainWithGrounding: unknown archetype "${archetype}". Known: ${Object.keys(STARTER_CHAINS_GROUNDED).join(", ")}`
3302
+ );
3303
+ }
3304
+ let chain;
3305
+ if (primary) {
3306
+ const primaryEntry = (() => {
3307
+ const inStarter = starter.find((e) => e.id === primary);
3308
+ if (inStarter) return copyEntry(inStarter);
3309
+ const knownAnywhere = lookupStaticEntry(primary, archetype);
3310
+ if (knownAnywhere) return { ...copyEntry(knownAnywhere), id: primary };
3311
+ return { id: primary, grounding: "judgment" };
3312
+ })();
3313
+ chain = [primaryEntry, ...starter.filter((e) => e.id !== primary)];
3314
+ } else {
3315
+ chain = [...starter];
3316
+ }
3317
+ if (policy?.blockedModels && policy.blockedModels.length > 0) {
3318
+ const blocked = new Set(policy.blockedModels);
3319
+ chain = chain.filter((e) => !blocked.has(e.id));
3320
+ }
3321
+ const seen = /* @__PURE__ */ new Set();
3322
+ const deduped = [];
3323
+ for (const e of chain) {
3324
+ if (!seen.has(e.id)) {
3325
+ seen.add(e.id);
3326
+ deduped.push(e);
3327
+ }
3328
+ }
3329
+ let filtered = deduped;
3330
+ if (reachability) {
3331
+ filtered = deduped.filter((e) => isModelReachable(e.id, reachability));
3332
+ }
3333
+ return filtered.slice(0, maxDepth);
3334
+ }
3335
+ function getStarterChainWithGrounding(archetype) {
3336
+ const entries = STARTER_CHAINS_GROUNDED[archetype];
3337
+ if (!entries) {
3338
+ throw new Error(
3339
+ `getStarterChainWithGrounding: unknown archetype "${archetype}"`
3340
+ );
3341
+ }
3342
+ return entries.map(copyEntry);
3343
+ }
3344
+ function getAllStarterChainsWithGrounding() {
3345
+ const out = {};
3346
+ for (const [archetype, entries] of Object.entries(STARTER_CHAINS_GROUNDED)) {
3347
+ out[archetype] = entries.map(copyEntry);
3348
+ }
3349
+ return out;
3350
+ }
3351
+ function getSequentialStarterChainWithGrounding(archetype) {
3352
+ const overlay = STARTER_CHAINS_BY_MODE_GROUNDED[archetype]?.sequential;
3353
+ return overlay ? overlay.map(copyEntry) : void 0;
3354
+ }
3057
3355
  function ensureCrossProviderTail(opts) {
3058
3356
  const { chain, archetype, apiKeys, envSource } = opts;
3059
3357
  if (chain.length < 1) return { chain };
@@ -3790,38 +4088,6 @@ function clamp(n) {
3790
4088
  return Math.max(0, Math.min(1, n));
3791
4089
  }
3792
4090
 
3793
- // src/archetype-perf-brain.ts
3794
- function isPerfRow(x) {
3795
- if (!x || typeof x !== "object") return false;
3796
- const r = x;
3797
- return typeof r.model_id === "string" && typeof r.archetype === "string" && typeof r.perf_score === "number";
3798
- }
3799
- function mapRowsToPerfMap(rows) {
3800
- const out = /* @__PURE__ */ new Map();
3801
- for (const row of rows) {
3802
- if (!isPerfRow(row)) continue;
3803
- const existing = out.get(row.model_id) ?? {};
3804
- existing[row.archetype] = row.perf_score;
3805
- out.set(row.model_id, existing);
3806
- }
3807
- return out;
3808
- }
3809
- function bundledArchetypePerf() {
3810
- const out = /* @__PURE__ */ new Map();
3811
- for (const profile of allProfiles()) {
3812
- if (profile.archetypePerf) out.set(profile.id, profile.archetypePerf);
3813
- }
3814
- return out;
3815
- }
3816
- var loadArchetypePerfFromBrain = createBrainQueryCache({
3817
- table: "kgauto_archetype_perf",
3818
- mapRows: mapRowsToPerfMap,
3819
- bundledFallback: bundledArchetypePerf
3820
- });
3821
- function getArchetypePerfScore(modelId, archetype) {
3822
- return loadArchetypePerfFromBrain().get(modelId)?.[archetype] ?? 5;
3823
- }
3824
-
3825
4091
  // src/models-brain.ts
3826
4092
  function isModelRow(x) {
3827
4093
  if (!x || typeof x !== "object") return false;
@@ -3954,6 +4220,7 @@ function compile2(ir, opts) {
3954
4220
  CallError,
3955
4221
  DIALECT_VERSION,
3956
4222
  INTENT_ARCHETYPES,
4223
+ MEASURED_GROUNDING_MIN_N,
3957
4224
  PROVIDER_ENV_KEYS,
3958
4225
  allProfiles,
3959
4226
  bucketContext,
@@ -3967,18 +4234,25 @@ function compile2(ir, opts) {
3967
4234
  countTokens,
3968
4235
  execute,
3969
4236
  getAllStarterChains,
4237
+ getAllStarterChainsWithGrounding,
3970
4238
  getArchetypePerfScore,
3971
4239
  getDefaultFallbackChain,
4240
+ getDefaultFallbackChainWithGrounding,
3972
4241
  getProfile,
3973
4242
  getReachabilityDiagnostic,
4243
+ getSequentialStarterChain,
4244
+ getSequentialStarterChainWithGrounding,
3974
4245
  getStarterChain,
4246
+ getStarterChainWithGrounding,
3975
4247
  hashShape,
3976
4248
  isArchetype,
4249
+ isBrainQueryActiveFor,
3977
4250
  isModelReachable,
3978
4251
  isProviderReachable,
3979
4252
  learningKey,
3980
4253
  loadAliasesFromBrain,
3981
4254
  loadArchetypePerfFromBrain,
4255
+ loadArchetypePerfNFromBrain,
3982
4256
  loadChainsFromBrain,
3983
4257
  loadModelsFromBrain,
3984
4258
  loadPricingFromBrain,