@warmdrift/kgauto-compiler 2.0.0-alpha.7 → 2.0.0-alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MBEI5UOM.mjs → chunk-3KVKELZN.mjs} +257 -9
- package/dist/index.d.mts +89 -7
- package/dist/index.d.ts +89 -7
- package/dist/index.js +463 -36
- package/dist/index.mjs +204 -28
- package/dist/{profiles-B3eNQ2py.d.ts → profiles-BYVOc1eW.d.ts} +82 -1
- package/dist/{profiles-Py8c7zjJ.d.mts → profiles-NUZOIzGr.d.mts} +82 -1
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/profiles.js +257 -9
- package/dist/profiles.mjs +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -15,7 +15,7 @@ import {
|
|
|
15
15
|
getProfile,
|
|
16
16
|
profilesByProvider,
|
|
17
17
|
tryGetProfile
|
|
18
|
-
} from "./chunk-
|
|
18
|
+
} from "./chunk-3KVKELZN.mjs";
|
|
19
19
|
|
|
20
20
|
// src/tokenizer.ts
|
|
21
21
|
var tokenizerImpl = defaultCharBasedCounter;
|
|
@@ -408,7 +408,11 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
408
408
|
system: systemBlocks,
|
|
409
409
|
messages,
|
|
410
410
|
tools,
|
|
411
|
-
|
|
411
|
+
// alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
|
|
412
|
+
// floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
|
|
413
|
+
// Profile is the single source of truth; consumers wanting a tighter
|
|
414
|
+
// budget can pass providerOverrides.anthropic.max_tokens explicitly.
|
|
415
|
+
max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
|
|
412
416
|
},
|
|
413
417
|
diagnostics: {
|
|
414
418
|
cacheableTokens,
|
|
@@ -687,12 +691,13 @@ function setNestedField(obj, path, value) {
|
|
|
687
691
|
}
|
|
688
692
|
|
|
689
693
|
// src/advisor.ts
|
|
690
|
-
function runAdvisor(ir, result, profile) {
|
|
694
|
+
function runAdvisor(ir, result, profile, policy) {
|
|
691
695
|
const out = [];
|
|
692
696
|
out.push(...detectCachingOff(ir, profile));
|
|
693
697
|
out.push(...detectSingleChunkSystem(ir, profile));
|
|
694
698
|
out.push(...detectToolBloat(ir, result));
|
|
695
699
|
out.push(...detectHistoryUncached(ir, profile));
|
|
700
|
+
out.push(...detectSingleModelArray(ir, policy));
|
|
696
701
|
return out;
|
|
697
702
|
}
|
|
698
703
|
function detectCachingOff(ir, profile) {
|
|
@@ -764,6 +769,20 @@ function detectHistoryUncached(ir, profile) {
|
|
|
764
769
|
}
|
|
765
770
|
];
|
|
766
771
|
}
|
|
772
|
+
function detectSingleModelArray(ir, policy) {
|
|
773
|
+
if (ir.models.length !== 1) return [];
|
|
774
|
+
if (policy?.posture === "locked") return [];
|
|
775
|
+
const only = ir.models[0];
|
|
776
|
+
return [
|
|
777
|
+
{
|
|
778
|
+
level: "warn",
|
|
779
|
+
code: "single-model-array",
|
|
780
|
+
message: `\`ir.models\` has length 1 (only "${only}") and posture is not 'locked'. A single-model chain has no safety net \u2014 the first 429 / 5xx / cliff hits the user as a failure. Master plan \xA71.2 closes the reliability gap with a 2-step minimum.`,
|
|
781
|
+
suggestion: "Use `getDefaultFallbackChain({ archetype: ir.intent.archetype, primary: '" + only + "', posture: 'preferred' })` for a user-anchored chain, or `getDefaultFallbackChain({ archetype, posture: 'open' })` for library-picked. If single-model is intentional (compliance/brand promise), set `policy.posture = 'locked'` to silence this rule.",
|
|
782
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#single-model-array"
|
|
783
|
+
}
|
|
784
|
+
];
|
|
785
|
+
}
|
|
767
786
|
|
|
768
787
|
// src/compile.ts
|
|
769
788
|
var counter = 0;
|
|
@@ -836,7 +855,8 @@ function compile(ir, opts = {}) {
|
|
|
836
855
|
tokensIn: inputTokens,
|
|
837
856
|
diagnostics
|
|
838
857
|
},
|
|
839
|
-
profile
|
|
858
|
+
profile,
|
|
859
|
+
opts.policy
|
|
840
860
|
);
|
|
841
861
|
return {
|
|
842
862
|
handle,
|
|
@@ -1288,44 +1308,48 @@ async function call(ir, opts = {}) {
|
|
|
1288
1308
|
fetchImpl: opts.fetchImpl,
|
|
1289
1309
|
providerOverrides: opts.providerOverrides
|
|
1290
1310
|
});
|
|
1291
|
-
|
|
1311
|
+
const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
|
|
1312
|
+
if (validated.ok) {
|
|
1292
1313
|
attempts.push({ model: targetModel, status: "success" });
|
|
1293
1314
|
const latencyMs2 = Date.now() - start;
|
|
1294
|
-
const responseWithStructured = withStructuredOutput(exec.response, ir);
|
|
1295
1315
|
await record({
|
|
1296
1316
|
handle: initial.handle,
|
|
1297
|
-
tokensIn:
|
|
1298
|
-
tokensOut:
|
|
1317
|
+
tokensIn: validated.response.tokens.input,
|
|
1318
|
+
tokensOut: validated.response.tokens.output,
|
|
1299
1319
|
latencyMs: latencyMs2,
|
|
1300
1320
|
success: true,
|
|
1301
|
-
emptyResponse:
|
|
1302
|
-
toolsCalled:
|
|
1321
|
+
emptyResponse: validated.response.tokens.output === 0,
|
|
1322
|
+
toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
|
|
1303
1323
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1304
1324
|
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
1305
1325
|
promptPreview: extractPromptPreview(ir),
|
|
1306
|
-
responsePreview:
|
|
1307
|
-
cacheReadInputTokens:
|
|
1308
|
-
cacheCreationInputTokens:
|
|
1326
|
+
responsePreview: validated.response.text.slice(0, 200),
|
|
1327
|
+
cacheReadInputTokens: validated.response.tokens.cached,
|
|
1328
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
1309
1329
|
});
|
|
1330
|
+
const fellOver = targetModel !== initial.target;
|
|
1310
1331
|
return {
|
|
1311
1332
|
handle: initial.handle,
|
|
1312
1333
|
actualModel: targetModel,
|
|
1313
1334
|
requestedModel: initial.target,
|
|
1314
1335
|
provider: activeCompile.provider,
|
|
1315
|
-
response:
|
|
1336
|
+
response: validated.response,
|
|
1316
1337
|
latencyMs: latencyMs2,
|
|
1317
1338
|
mutationsApplied: activeCompile.mutationsApplied,
|
|
1318
|
-
attempts
|
|
1339
|
+
attempts,
|
|
1340
|
+
servedBy: targetModel,
|
|
1341
|
+
fellOverFrom: fellOver ? initial.target : void 0,
|
|
1342
|
+
fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0
|
|
1319
1343
|
};
|
|
1320
1344
|
}
|
|
1321
1345
|
attempts.push({
|
|
1322
1346
|
model: targetModel,
|
|
1323
|
-
status:
|
|
1324
|
-
errorCode:
|
|
1325
|
-
message:
|
|
1347
|
+
status: validated.errorType,
|
|
1348
|
+
errorCode: validated.errorCode,
|
|
1349
|
+
message: validated.message
|
|
1326
1350
|
});
|
|
1327
|
-
lastErr =
|
|
1328
|
-
if (
|
|
1351
|
+
lastErr = validated;
|
|
1352
|
+
if (validated.errorType === "terminal" || opts.noFallback) {
|
|
1329
1353
|
break;
|
|
1330
1354
|
}
|
|
1331
1355
|
}
|
|
@@ -1362,20 +1386,49 @@ function extractPromptPreview(ir) {
|
|
|
1362
1386
|
if (lastHist) return lastHist.slice(0, 200);
|
|
1363
1387
|
return void 0;
|
|
1364
1388
|
}
|
|
1365
|
-
function
|
|
1366
|
-
if (!ir.constraints?.structuredOutput)
|
|
1367
|
-
|
|
1389
|
+
function validateStructuredContract(exec, ir) {
|
|
1390
|
+
if (!ir.constraints?.structuredOutput) {
|
|
1391
|
+
return { ok: true, response: exec.response };
|
|
1392
|
+
}
|
|
1393
|
+
const finish = (exec.response.finishReason ?? "").toLowerCase();
|
|
1394
|
+
if (finish === "max_tokens" || finish === "length") {
|
|
1395
|
+
return {
|
|
1396
|
+
ok: false,
|
|
1397
|
+
status: exec.status,
|
|
1398
|
+
errorType: "retryable",
|
|
1399
|
+
errorCode: "max_tokens_on_structured_output",
|
|
1400
|
+
message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
|
|
1401
|
+
raw: exec.response.raw
|
|
1402
|
+
};
|
|
1403
|
+
}
|
|
1404
|
+
if (!exec.response.text) {
|
|
1405
|
+
return { ok: true, response: exec.response };
|
|
1406
|
+
}
|
|
1368
1407
|
try {
|
|
1369
|
-
const parsed = JSON.parse(response.text);
|
|
1370
|
-
return { ...response, structuredOutput: parsed };
|
|
1408
|
+
const parsed = JSON.parse(exec.response.text);
|
|
1409
|
+
return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
|
|
1371
1410
|
} catch (err) {
|
|
1372
1411
|
return {
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1412
|
+
ok: false,
|
|
1413
|
+
status: exec.status,
|
|
1414
|
+
errorType: "retryable",
|
|
1415
|
+
errorCode: "structured_output_parse_failed",
|
|
1416
|
+
message: err instanceof Error ? err.message : String(err),
|
|
1417
|
+
raw: exec.response.raw
|
|
1376
1418
|
};
|
|
1377
1419
|
}
|
|
1378
1420
|
}
|
|
1421
|
+
function normalizeFallbackReason(attempts) {
|
|
1422
|
+
const first = attempts.find((a) => a.status !== "success");
|
|
1423
|
+
if (!first) return void 0;
|
|
1424
|
+
const code = first.errorCode ?? "";
|
|
1425
|
+
if (code === "rate_limit_429" || code === "rate_limit") return "rate_limit";
|
|
1426
|
+
if (code === "max_tokens_on_structured_output" || code === "structured_output_parse_failed") {
|
|
1427
|
+
return "cliff";
|
|
1428
|
+
}
|
|
1429
|
+
if (code === "cost_cap_exceeded") return "cost_cap";
|
|
1430
|
+
return "provider_error";
|
|
1431
|
+
}
|
|
1379
1432
|
|
|
1380
1433
|
// src/oracle.ts
|
|
1381
1434
|
var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
|
|
@@ -1465,6 +1518,126 @@ function clamp(n) {
|
|
|
1465
1518
|
return Math.max(0, Math.min(1, n));
|
|
1466
1519
|
}
|
|
1467
1520
|
|
|
1521
|
+
// src/fallback.ts
|
|
1522
|
+
var STARTER_CHAINS = {
|
|
1523
|
+
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
1524
|
+
critique: [
|
|
1525
|
+
"claude-opus-4-7",
|
|
1526
|
+
"claude-sonnet-4-6",
|
|
1527
|
+
"gemini-2.5-pro"
|
|
1528
|
+
],
|
|
1529
|
+
// Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
|
|
1530
|
+
// to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
|
|
1531
|
+
plan: [
|
|
1532
|
+
"claude-sonnet-4-6",
|
|
1533
|
+
"claude-opus-4-7",
|
|
1534
|
+
"gemini-2.5-pro",
|
|
1535
|
+
"deepseek-v4-pro"
|
|
1536
|
+
],
|
|
1537
|
+
// Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
|
|
1538
|
+
// Flash floor for the open-posture chain.
|
|
1539
|
+
generate: [
|
|
1540
|
+
"claude-sonnet-4-6",
|
|
1541
|
+
"claude-haiku-4-5",
|
|
1542
|
+
"gemini-2.5-pro",
|
|
1543
|
+
"gemini-2.5-flash"
|
|
1544
|
+
],
|
|
1545
|
+
ask: [
|
|
1546
|
+
"claude-sonnet-4-6",
|
|
1547
|
+
"claude-haiku-4-5",
|
|
1548
|
+
"gemini-2.5-pro",
|
|
1549
|
+
"gemini-2.5-flash"
|
|
1550
|
+
],
|
|
1551
|
+
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
|
|
1552
|
+
// DeepSeek skipped (no brain evidence). Floor at Haiku.
|
|
1553
|
+
extract: [
|
|
1554
|
+
"claude-sonnet-4-6",
|
|
1555
|
+
"claude-haiku-4-5",
|
|
1556
|
+
"gemini-2.5-pro"
|
|
1557
|
+
],
|
|
1558
|
+
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
1559
|
+
transform: [
|
|
1560
|
+
"claude-sonnet-4-6",
|
|
1561
|
+
"claude-haiku-4-5",
|
|
1562
|
+
"gemini-2.5-pro",
|
|
1563
|
+
"gemini-2.5-flash"
|
|
1564
|
+
],
|
|
1565
|
+
// Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
|
|
1566
|
+
// Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
|
|
1567
|
+
// Haiku (reduced tool budget — cliff at 16 fires).
|
|
1568
|
+
hunt: [
|
|
1569
|
+
"gemini-2.5-flash",
|
|
1570
|
+
"gemini-2.5-pro",
|
|
1571
|
+
"claude-sonnet-4-6",
|
|
1572
|
+
"claude-haiku-4-5"
|
|
1573
|
+
],
|
|
1574
|
+
// Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
|
|
1575
|
+
// for quality safety; Flash-Lite emergency floor (onboarded s22).
|
|
1576
|
+
summarize: [
|
|
1577
|
+
"gemini-2.5-flash",
|
|
1578
|
+
"deepseek-v4-flash",
|
|
1579
|
+
"claude-haiku-4-5",
|
|
1580
|
+
"gemini-2.5-flash-lite"
|
|
1581
|
+
],
|
|
1582
|
+
// Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
|
|
1583
|
+
// Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
|
|
1584
|
+
classify: [
|
|
1585
|
+
"gemini-2.5-flash",
|
|
1586
|
+
"deepseek-v4-flash",
|
|
1587
|
+
"claude-haiku-4-5",
|
|
1588
|
+
"gemini-2.5-flash-lite"
|
|
1589
|
+
]
|
|
1590
|
+
};
|
|
1591
|
+
function getDefaultFallbackChain(opts) {
|
|
1592
|
+
const { archetype, primary, maxDepth = 3, policy } = opts;
|
|
1593
|
+
if (maxDepth < 1) {
|
|
1594
|
+
throw new Error(
|
|
1595
|
+
`getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
|
|
1596
|
+
);
|
|
1597
|
+
}
|
|
1598
|
+
const starter = STARTER_CHAINS[archetype];
|
|
1599
|
+
if (!starter) {
|
|
1600
|
+
throw new Error(
|
|
1601
|
+
`getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(STARTER_CHAINS).join(", ")}`
|
|
1602
|
+
);
|
|
1603
|
+
}
|
|
1604
|
+
let chain;
|
|
1605
|
+
if (primary) {
|
|
1606
|
+
chain = [primary, ...starter.filter((id) => id !== primary)];
|
|
1607
|
+
} else {
|
|
1608
|
+
chain = [...starter];
|
|
1609
|
+
}
|
|
1610
|
+
if (policy?.blockedModels && policy.blockedModels.length > 0) {
|
|
1611
|
+
const blocked = new Set(policy.blockedModels);
|
|
1612
|
+
chain = chain.filter((id) => !blocked.has(id));
|
|
1613
|
+
}
|
|
1614
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1615
|
+
const deduped = [];
|
|
1616
|
+
for (const id of chain) {
|
|
1617
|
+
if (!seen.has(id)) {
|
|
1618
|
+
seen.add(id);
|
|
1619
|
+
deduped.push(id);
|
|
1620
|
+
}
|
|
1621
|
+
}
|
|
1622
|
+
return deduped.slice(0, maxDepth);
|
|
1623
|
+
}
|
|
1624
|
+
function getStarterChain(archetype) {
|
|
1625
|
+
const chain = STARTER_CHAINS[archetype];
|
|
1626
|
+
if (!chain) {
|
|
1627
|
+
throw new Error(
|
|
1628
|
+
`getStarterChain: unknown archetype "${archetype}"`
|
|
1629
|
+
);
|
|
1630
|
+
}
|
|
1631
|
+
return [...chain];
|
|
1632
|
+
}
|
|
1633
|
+
function getAllStarterChains() {
|
|
1634
|
+
const out = {};
|
|
1635
|
+
for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
|
|
1636
|
+
out[archetype] = [...chain];
|
|
1637
|
+
}
|
|
1638
|
+
return out;
|
|
1639
|
+
}
|
|
1640
|
+
|
|
1468
1641
|
// src/index.ts
|
|
1469
1642
|
function compile2(ir, opts) {
|
|
1470
1643
|
const result = compile(ir, opts);
|
|
@@ -1488,7 +1661,10 @@ export {
|
|
|
1488
1661
|
configureBrain,
|
|
1489
1662
|
countTokens,
|
|
1490
1663
|
execute,
|
|
1664
|
+
getAllStarterChains,
|
|
1665
|
+
getDefaultFallbackChain,
|
|
1491
1666
|
getProfile,
|
|
1667
|
+
getStarterChain,
|
|
1492
1668
|
hashShape,
|
|
1493
1669
|
isArchetype,
|
|
1494
1670
|
learningKey,
|
|
@@ -159,6 +159,27 @@ interface CompilePolicy {
|
|
|
159
159
|
* differences but not large enough to override hard rejects).
|
|
160
160
|
*/
|
|
161
161
|
preferredModels?: string[];
|
|
162
|
+
/**
|
|
163
|
+
* Customer-posture tag (master plan §1.2, alpha.9).
|
|
164
|
+
*
|
|
165
|
+
* - `'locked'` — compliance/contract/brand-promise. Caller passes
|
|
166
|
+
* exactly one model; no fallback is desired. kgauto
|
|
167
|
+
* never walks the chain.
|
|
168
|
+
* - `'preferred'` — user-selected primary, fallback chain as safety
|
|
169
|
+
* net. On 429/5xx, walk the chain and surface
|
|
170
|
+
* `fellOverFrom` so the consumer can show "Claude
|
|
171
|
+
* was busy; we used Pro for this answer."
|
|
172
|
+
* - `'open'` — library picks the chain. Model identity is
|
|
173
|
+
* irrelevant; output is the contract.
|
|
174
|
+
*
|
|
175
|
+
* The field is **informational** — kgauto's execution path is already
|
|
176
|
+
* determined by the shape of `ir.models`. Posture surfaces in
|
|
177
|
+
* telemetry so the cost-watcher can distinguish "locked failed, no
|
|
178
|
+
* fallback was tried" from "open chain exhausted." Default: when
|
|
179
|
+
* `ir.models.length === 1` posture is treated as `'locked'` by the
|
|
180
|
+
* advisor; otherwise unspecified.
|
|
181
|
+
*/
|
|
182
|
+
posture?: 'locked' | 'preferred' | 'open';
|
|
162
183
|
}
|
|
163
184
|
/**
|
|
164
185
|
* The IR — the input to compile().
|
|
@@ -418,6 +439,18 @@ interface CallAttempt {
|
|
|
418
439
|
errorCode?: string;
|
|
419
440
|
message?: string;
|
|
420
441
|
}
|
|
442
|
+
/**
|
|
443
|
+
* Why fallback fired. Normalized for `CallResult.fallbackReason` (alpha.9).
|
|
444
|
+
*
|
|
445
|
+
* - `rate_limit` provider returned 429
|
|
446
|
+
* - `provider_error` 5xx, network, or other retryable upstream issue
|
|
447
|
+
* - `cost_cap` preflight policy.maxCostPerCallUsd rejected target
|
|
448
|
+
* - `cliff` alpha.8 contract violation (MAX_TOKENS on
|
|
449
|
+
* structured output, parse-failed JSON)
|
|
450
|
+
* - `contract_violation` other compile-time-contract failures (reserved
|
|
451
|
+
* for alpha.10+ — e.g. mid-stream policy rejects)
|
|
452
|
+
*/
|
|
453
|
+
type FallbackReason = 'rate_limit' | 'provider_error' | 'cost_cap' | 'cliff' | 'contract_violation';
|
|
421
454
|
interface CallResult {
|
|
422
455
|
/** Compile handle (still valid for record() if consumer wants to add oracle scores later). */
|
|
423
456
|
handle: string;
|
|
@@ -432,6 +465,28 @@ interface CallResult {
|
|
|
432
465
|
mutationsApplied: MutationApplied[];
|
|
433
466
|
/** One entry per provider attempt — observability for retry/fallback walks. */
|
|
434
467
|
attempts: CallAttempt[];
|
|
468
|
+
/**
|
|
469
|
+
* Alpha.9 normalization of fallback-walk telemetry. When the chain
|
|
470
|
+
* succeeded on the first attempt, these collapse to:
|
|
471
|
+
* - `servedBy === requestedModel`
|
|
472
|
+
* - `fellOverFrom` undefined
|
|
473
|
+
* - `fallbackReason` undefined
|
|
474
|
+
*
|
|
475
|
+
* When fallback fired:
|
|
476
|
+
* - `servedBy` = `actualModel` (the model that produced the response)
|
|
477
|
+
* - `fellOverFrom` = `requestedModel` (what the caller / compile() asked for)
|
|
478
|
+
* - `fallbackReason` = normalized cause derived from the first
|
|
479
|
+
* non-success attempt's `errorCode`
|
|
480
|
+
*
|
|
481
|
+
* Consumer UX use: show "Claude was busy; we used Pro for this answer"
|
|
482
|
+
* when `fellOverFrom` is set (master plan §3.6).
|
|
483
|
+
*/
|
|
484
|
+
/** Model that actually answered. Equal to `actualModel`; kept distinct for clarity. */
|
|
485
|
+
servedBy: string;
|
|
486
|
+
/** Set only when fallback fired. Equal to `requestedModel` in that case. */
|
|
487
|
+
fellOverFrom?: string;
|
|
488
|
+
/** Set only when fallback fired. Normalized cause. */
|
|
489
|
+
fallbackReason?: FallbackReason;
|
|
435
490
|
}
|
|
436
491
|
/**
|
|
437
492
|
* Thrown when call() exhausts the fallback chain without success.
|
|
@@ -609,6 +664,32 @@ interface ModelProfile {
|
|
|
609
664
|
weaknesses: string[];
|
|
610
665
|
notes?: string;
|
|
611
666
|
verifiedAgainstDocs?: string;
|
|
667
|
+
/**
|
|
668
|
+
* Hand-curated per-archetype performance score on a 0-10 scale.
|
|
669
|
+
*
|
|
670
|
+
* 10 = frontier on this archetype (e.g. Opus 4.7 on critique)
|
|
671
|
+
* 8 = strong second tier (Sonnet on plan, Pro on extract)
|
|
672
|
+
* 7 = competent (Haiku on classify, Flash on hunt)
|
|
673
|
+
* 5 = acceptable for tolerant archetypes (Flash-Lite on classify)
|
|
674
|
+
* 3 = degraded (Flash on critique, DeepSeek on hunt)
|
|
675
|
+
*
|
|
676
|
+
* Missing archetypes default to `5` (no data, neutral). Each non-default
|
|
677
|
+
* value should carry a one-line rationale in the profile's note or inline
|
|
678
|
+
* comment citing brain evidence, family prior, or "starter hypothesis —
|
|
679
|
+
* verify with telemetry."
|
|
680
|
+
*
|
|
681
|
+
* Source today: hand-curated from master plan §3.3 + §6.2 starter tables.
|
|
682
|
+
* Source tomorrow (alpha.10+): brain `archetype_model_evidence` view.
|
|
683
|
+
*
|
|
684
|
+
* Anti-hallucination guardrail (master plan §2.5): when the watcher's
|
|
685
|
+
* `--audit-fields` flag flags a profile stale (>90 days since
|
|
686
|
+
* verifiedAgainstDocs), the archetypePerf values get re-audited
|
|
687
|
+
* alongside capability fields. AI-trained intuition is NOT a valid
|
|
688
|
+
* source — only docs or brain evidence.
|
|
689
|
+
*
|
|
690
|
+
* alpha.9.
|
|
691
|
+
*/
|
|
692
|
+
archetypePerf?: Partial<Record<IntentArchetypeName, number>>;
|
|
612
693
|
}
|
|
613
694
|
declare const ALIASES: Record<string, string>;
|
|
614
695
|
declare function getProfile(id: string): ModelProfile;
|
|
@@ -616,4 +697,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
|
616
697
|
declare function allProfiles(): readonly ModelProfile[];
|
|
617
698
|
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
618
699
|
|
|
619
|
-
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
700
|
+
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
@@ -159,6 +159,27 @@ interface CompilePolicy {
|
|
|
159
159
|
* differences but not large enough to override hard rejects).
|
|
160
160
|
*/
|
|
161
161
|
preferredModels?: string[];
|
|
162
|
+
/**
|
|
163
|
+
* Customer-posture tag (master plan §1.2, alpha.9).
|
|
164
|
+
*
|
|
165
|
+
* - `'locked'` — compliance/contract/brand-promise. Caller passes
|
|
166
|
+
* exactly one model; no fallback is desired. kgauto
|
|
167
|
+
* never walks the chain.
|
|
168
|
+
* - `'preferred'` — user-selected primary, fallback chain as safety
|
|
169
|
+
* net. On 429/5xx, walk the chain and surface
|
|
170
|
+
* `fellOverFrom` so the consumer can show "Claude
|
|
171
|
+
* was busy; we used Pro for this answer."
|
|
172
|
+
* - `'open'` — library picks the chain. Model identity is
|
|
173
|
+
* irrelevant; output is the contract.
|
|
174
|
+
*
|
|
175
|
+
* The field is **informational** — kgauto's execution path is already
|
|
176
|
+
* determined by the shape of `ir.models`. Posture surfaces in
|
|
177
|
+
* telemetry so the cost-watcher can distinguish "locked failed, no
|
|
178
|
+
* fallback was tried" from "open chain exhausted." Default: when
|
|
179
|
+
* `ir.models.length === 1` posture is treated as `'locked'` by the
|
|
180
|
+
* advisor; otherwise unspecified.
|
|
181
|
+
*/
|
|
182
|
+
posture?: 'locked' | 'preferred' | 'open';
|
|
162
183
|
}
|
|
163
184
|
/**
|
|
164
185
|
* The IR — the input to compile().
|
|
@@ -418,6 +439,18 @@ interface CallAttempt {
|
|
|
418
439
|
errorCode?: string;
|
|
419
440
|
message?: string;
|
|
420
441
|
}
|
|
442
|
+
/**
|
|
443
|
+
* Why fallback fired. Normalized for `CallResult.fallbackReason` (alpha.9).
|
|
444
|
+
*
|
|
445
|
+
* - `rate_limit` provider returned 429
|
|
446
|
+
* - `provider_error` 5xx, network, or other retryable upstream issue
|
|
447
|
+
* - `cost_cap` preflight policy.maxCostPerCallUsd rejected target
|
|
448
|
+
* - `cliff` alpha.8 contract violation (MAX_TOKENS on
|
|
449
|
+
* structured output, parse-failed JSON)
|
|
450
|
+
* - `contract_violation` other compile-time-contract failures (reserved
|
|
451
|
+
* for alpha.10+ — e.g. mid-stream policy rejects)
|
|
452
|
+
*/
|
|
453
|
+
type FallbackReason = 'rate_limit' | 'provider_error' | 'cost_cap' | 'cliff' | 'contract_violation';
|
|
421
454
|
interface CallResult {
|
|
422
455
|
/** Compile handle (still valid for record() if consumer wants to add oracle scores later). */
|
|
423
456
|
handle: string;
|
|
@@ -432,6 +465,28 @@ interface CallResult {
|
|
|
432
465
|
mutationsApplied: MutationApplied[];
|
|
433
466
|
/** One entry per provider attempt — observability for retry/fallback walks. */
|
|
434
467
|
attempts: CallAttempt[];
|
|
468
|
+
/**
|
|
469
|
+
* Alpha.9 normalization of fallback-walk telemetry. When the chain
|
|
470
|
+
* succeeded on the first attempt, these collapse to:
|
|
471
|
+
* - `servedBy === requestedModel`
|
|
472
|
+
* - `fellOverFrom` undefined
|
|
473
|
+
* - `fallbackReason` undefined
|
|
474
|
+
*
|
|
475
|
+
* When fallback fired:
|
|
476
|
+
* - `servedBy` = `actualModel` (the model that produced the response)
|
|
477
|
+
* - `fellOverFrom` = `requestedModel` (what the caller / compile() asked for)
|
|
478
|
+
* - `fallbackReason` = normalized cause derived from the first
|
|
479
|
+
* non-success attempt's `errorCode`
|
|
480
|
+
*
|
|
481
|
+
* Consumer UX use: show "Claude was busy; we used Pro for this answer"
|
|
482
|
+
* when `fellOverFrom` is set (master plan §3.6).
|
|
483
|
+
*/
|
|
484
|
+
/** Model that actually answered. Equal to `actualModel`; kept distinct for clarity. */
|
|
485
|
+
servedBy: string;
|
|
486
|
+
/** Set only when fallback fired. Equal to `requestedModel` in that case. */
|
|
487
|
+
fellOverFrom?: string;
|
|
488
|
+
/** Set only when fallback fired. Normalized cause. */
|
|
489
|
+
fallbackReason?: FallbackReason;
|
|
435
490
|
}
|
|
436
491
|
/**
|
|
437
492
|
* Thrown when call() exhausts the fallback chain without success.
|
|
@@ -609,6 +664,32 @@ interface ModelProfile {
|
|
|
609
664
|
weaknesses: string[];
|
|
610
665
|
notes?: string;
|
|
611
666
|
verifiedAgainstDocs?: string;
|
|
667
|
+
/**
|
|
668
|
+
* Hand-curated per-archetype performance score on a 0-10 scale.
|
|
669
|
+
*
|
|
670
|
+
* 10 = frontier on this archetype (e.g. Opus 4.7 on critique)
|
|
671
|
+
* 8 = strong second tier (Sonnet on plan, Pro on extract)
|
|
672
|
+
* 7 = competent (Haiku on classify, Flash on hunt)
|
|
673
|
+
* 5 = acceptable for tolerant archetypes (Flash-Lite on classify)
|
|
674
|
+
* 3 = degraded (Flash on critique, DeepSeek on hunt)
|
|
675
|
+
*
|
|
676
|
+
* Missing archetypes default to `5` (no data, neutral). Each non-default
|
|
677
|
+
* value should carry a one-line rationale in the profile's note or inline
|
|
678
|
+
* comment citing brain evidence, family prior, or "starter hypothesis —
|
|
679
|
+
* verify with telemetry."
|
|
680
|
+
*
|
|
681
|
+
* Source today: hand-curated from master plan §3.3 + §6.2 starter tables.
|
|
682
|
+
* Source tomorrow (alpha.10+): brain `archetype_model_evidence` view.
|
|
683
|
+
*
|
|
684
|
+
* Anti-hallucination guardrail (master plan §2.5): when the watcher's
|
|
685
|
+
* `--audit-fields` flag flags a profile stale (>90 days since
|
|
686
|
+
* verifiedAgainstDocs), the archetypePerf values get re-audited
|
|
687
|
+
* alongside capability fields. AI-trained intuition is NOT a valid
|
|
688
|
+
* source — only docs or brain evidence.
|
|
689
|
+
*
|
|
690
|
+
* alpha.9.
|
|
691
|
+
*/
|
|
692
|
+
archetypePerf?: Partial<Record<IntentArchetypeName, number>>;
|
|
612
693
|
}
|
|
613
694
|
declare const ALIASES: Record<string, string>;
|
|
614
695
|
declare function getProfile(id: string): ModelProfile;
|
|
@@ -616,4 +697,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
|
|
|
616
697
|
declare function allProfiles(): readonly ModelProfile[];
|
|
617
698
|
declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
|
|
618
699
|
|
|
619
|
-
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
|
700
|
+
export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
|
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-NUZOIzGr.mjs';
|
|
2
2
|
import './dialect.mjs';
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BYVOc1eW.js';
|
|
2
2
|
import './dialect.js';
|