@warmdrift/kgauto-compiler 2.0.0-alpha.26 → 2.0.0-alpha.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-7MTHFSNY.mjs → chunk-JQGRWJZO.mjs} +181 -48
- package/dist/chunk-WXCFWUCN.mjs +678 -0
- package/dist/glassbox/index.d.mts +3 -3
- package/dist/glassbox/index.d.ts +3 -3
- package/dist/glassbox-routes/index.d.mts +88 -6
- package/dist/glassbox-routes/index.d.ts +88 -6
- package/dist/glassbox-routes/index.js +1820 -8
- package/dist/glassbox-routes/index.mjs +320 -8
- package/dist/index.d.mts +184 -3
- package/dist/index.d.ts +184 -3
- package/dist/index.js +342 -53
- package/dist/index.mjs +108 -581
- package/dist/{ir-B_XX2LAO.d.ts → ir-5W0efxt9.d.ts} +86 -1
- package/dist/{ir-B9zqlwjH.d.mts → ir-MXCJA8L7.d.mts} +86 -1
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/profiles.js +181 -48
- package/dist/profiles.mjs +1 -1
- package/dist/{types-bt0aVJb8.d.ts → types-CiZ9HLIU.d.ts} +1 -1
- package/dist/{types-o9etg93a.d.mts → types-sDZQzPM6.d.mts} +1 -1
- package/package.json +1 -1
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ARCHETYPE_FLOOR_DEFAULT,
|
|
3
|
+
getDefaultFallbackChain
|
|
4
|
+
} from "../chunk-WXCFWUCN.mjs";
|
|
5
|
+
import {
|
|
6
|
+
tryGetProfile
|
|
7
|
+
} from "../chunk-JQGRWJZO.mjs";
|
|
1
8
|
import {
|
|
2
9
|
subscribe,
|
|
3
10
|
subscribeApp
|
|
@@ -38,6 +45,151 @@ function checkAuth(req, config) {
|
|
|
38
45
|
return null;
|
|
39
46
|
}
|
|
40
47
|
|
|
48
|
+
// src/glassbox-routes/counterfactuals.ts
|
|
49
|
+
var COUNTERFACTUAL_MIN_SAVINGS_RATIO = 0.1;
|
|
50
|
+
var COUNTERFACTUAL_MAX_RESULTS = 2;
|
|
51
|
+
function computeCounterfactuals(args) {
|
|
52
|
+
const {
|
|
53
|
+
servedModel,
|
|
54
|
+
servedCostUsd,
|
|
55
|
+
archetype,
|
|
56
|
+
tokensIn,
|
|
57
|
+
tokensOut,
|
|
58
|
+
cacheReadInputTokens = 0,
|
|
59
|
+
toolOrchestration
|
|
60
|
+
} = args;
|
|
61
|
+
if (tokensIn <= 0) return [];
|
|
62
|
+
if (servedCostUsd <= 0) return [];
|
|
63
|
+
let chain;
|
|
64
|
+
try {
|
|
65
|
+
chain = getDefaultFallbackChain({
|
|
66
|
+
archetype,
|
|
67
|
+
posture: "open",
|
|
68
|
+
maxDepth: 10,
|
|
69
|
+
toolOrchestration
|
|
70
|
+
});
|
|
71
|
+
} catch {
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
const candidates = [];
|
|
75
|
+
const minSavings = servedCostUsd * COUNTERFACTUAL_MIN_SAVINGS_RATIO;
|
|
76
|
+
for (const modelId of chain) {
|
|
77
|
+
if (modelId === servedModel) continue;
|
|
78
|
+
const profile = tryGetProfile(modelId);
|
|
79
|
+
if (!profile) continue;
|
|
80
|
+
const perf = profile.archetypePerf?.[archetype] ?? 5;
|
|
81
|
+
if (perf < ARCHETYPE_FLOOR_DEFAULT) continue;
|
|
82
|
+
const estimated = estimateCostUsd({
|
|
83
|
+
profile,
|
|
84
|
+
tokensIn,
|
|
85
|
+
tokensOut,
|
|
86
|
+
cacheReadInputTokens
|
|
87
|
+
});
|
|
88
|
+
if (estimated === void 0) continue;
|
|
89
|
+
const savings = servedCostUsd - estimated;
|
|
90
|
+
if (savings < minSavings) continue;
|
|
91
|
+
const savingsPercent = Math.round(savings / servedCostUsd * 100);
|
|
92
|
+
const reason = buildReason({
|
|
93
|
+
modelId,
|
|
94
|
+
archetype,
|
|
95
|
+
perf,
|
|
96
|
+
profile
|
|
97
|
+
});
|
|
98
|
+
candidates.push({
|
|
99
|
+
modelId,
|
|
100
|
+
estimatedCostUsd: round6(estimated),
|
|
101
|
+
savingsUsd: round6(savings),
|
|
102
|
+
savingsPercent,
|
|
103
|
+
reason
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
candidates.sort((a, b) => a.estimatedCostUsd - b.estimatedCostUsd);
|
|
107
|
+
return candidates.slice(0, COUNTERFACTUAL_MAX_RESULTS);
|
|
108
|
+
}
|
|
109
|
+
function estimateCostUsd(args) {
|
|
110
|
+
const { profile, tokensIn, tokensOut, cacheReadInputTokens } = args;
|
|
111
|
+
const cacheableIn = Math.min(cacheReadInputTokens, tokensIn);
|
|
112
|
+
const nonCachedIn = Math.max(tokensIn - cacheableIn, 0);
|
|
113
|
+
const discount = profile.lowering.cache.discount ?? 1;
|
|
114
|
+
const inUsd = nonCachedIn / 1e6 * profile.costInputPer1m + cacheableIn / 1e6 * profile.costInputPer1m * discount;
|
|
115
|
+
const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
|
|
116
|
+
const total = inUsd + outUsd;
|
|
117
|
+
if (!Number.isFinite(total)) return void 0;
|
|
118
|
+
if (total < 0) return void 0;
|
|
119
|
+
return total;
|
|
120
|
+
}
|
|
121
|
+
function round6(n) {
|
|
122
|
+
return Math.round(n * 1e6) / 1e6;
|
|
123
|
+
}
|
|
124
|
+
function buildReason(args) {
|
|
125
|
+
const { modelId, archetype, perf, profile } = args;
|
|
126
|
+
const hook = profile.strengths?.[0];
|
|
127
|
+
const suffix = hook ? `, ${hook.replace(/_/g, " ")}` : "";
|
|
128
|
+
return `${modelId} on ${archetype}: archetypePerf=${perf}${suffix}`;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// src/glassbox-routes/projected-cost.ts
|
|
132
|
+
var INSUFFICIENT_VOLUME_THRESHOLD = 5;
|
|
133
|
+
var WINDOW_DAYS = 7;
|
|
134
|
+
async function computeProjectedDailyCost(args) {
|
|
135
|
+
const {
|
|
136
|
+
appId,
|
|
137
|
+
archetype,
|
|
138
|
+
servedCostUsd,
|
|
139
|
+
brainEndpoint,
|
|
140
|
+
brainJwt,
|
|
141
|
+
brainAnonKey,
|
|
142
|
+
fetch: fetchImpl
|
|
143
|
+
} = args;
|
|
144
|
+
if (!appId || !archetype) return void 0;
|
|
145
|
+
if (!Number.isFinite(servedCostUsd) || servedCostUsd <= 0) return void 0;
|
|
146
|
+
const doFetch = fetchImpl ?? ((...a) => globalThis.fetch(...a));
|
|
147
|
+
const base = brainEndpoint.replace(/\/+$/, "");
|
|
148
|
+
const cutoffIso = new Date(
|
|
149
|
+
Date.now() - WINDOW_DAYS * 24 * 60 * 60 * 1e3
|
|
150
|
+
).toISOString();
|
|
151
|
+
const qs = new URLSearchParams();
|
|
152
|
+
qs.set("app_id", `eq.${appId}`);
|
|
153
|
+
qs.set("intent_archetype", `eq.${archetype}`);
|
|
154
|
+
qs.set("created_at", `gte.${cutoffIso}`);
|
|
155
|
+
qs.set("select", "handle");
|
|
156
|
+
qs.set("limit", "0");
|
|
157
|
+
const url = `${base}/rest/v1/compile_outcomes?${qs.toString()}`;
|
|
158
|
+
let res;
|
|
159
|
+
try {
|
|
160
|
+
res = await doFetch(url, {
|
|
161
|
+
method: "GET",
|
|
162
|
+
headers: {
|
|
163
|
+
Authorization: `Bearer ${brainJwt}`,
|
|
164
|
+
apikey: brainAnonKey,
|
|
165
|
+
Accept: "application/json",
|
|
166
|
+
// Triggers PostgREST exact count in Content-Range header.
|
|
167
|
+
Prefer: "count=exact"
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
} catch {
|
|
171
|
+
return void 0;
|
|
172
|
+
}
|
|
173
|
+
if (!res.ok) return void 0;
|
|
174
|
+
const contentRange = res.headers.get("content-range");
|
|
175
|
+
const count = parseContentRangeCount(contentRange);
|
|
176
|
+
if (count === void 0) return void 0;
|
|
177
|
+
const avgPerDay = count / WINDOW_DAYS;
|
|
178
|
+
if (avgPerDay < INSUFFICIENT_VOLUME_THRESHOLD) return void 0;
|
|
179
|
+
const projected = avgPerDay * servedCostUsd;
|
|
180
|
+
return Math.round(projected * 1e6) / 1e6;
|
|
181
|
+
}
|
|
182
|
+
function parseContentRangeCount(header) {
|
|
183
|
+
if (!header) return void 0;
|
|
184
|
+
const slash = header.lastIndexOf("/");
|
|
185
|
+
if (slash < 0) return void 0;
|
|
186
|
+
const tail = header.slice(slash + 1).trim();
|
|
187
|
+
if (tail === "*" || tail === "") return void 0;
|
|
188
|
+
const n = Number.parseInt(tail, 10);
|
|
189
|
+
if (!Number.isFinite(n) || n < 0) return void 0;
|
|
190
|
+
return n;
|
|
191
|
+
}
|
|
192
|
+
|
|
41
193
|
// src/glassbox-routes/proxy.ts
|
|
42
194
|
var JSON_HEADERS2 = {
|
|
43
195
|
"Content-Type": "application/json",
|
|
@@ -77,15 +229,151 @@ function rowToSummary(row) {
|
|
|
77
229
|
estimatedCostUsd: typeof row.cost_usd_actual === "number" ? row.cost_usd_actual : 0
|
|
78
230
|
};
|
|
79
231
|
}
|
|
232
|
+
var INPUT_RATIO_YELLOW = 0.65;
|
|
233
|
+
var INPUT_RATIO_RED = 0.85;
|
|
234
|
+
var CACHE_HEALTH_MIN_TOKENS = 1e3;
|
|
235
|
+
var CACHE_RATIO_GREEN = 0.5;
|
|
236
|
+
var CACHE_RATIO_YELLOW = 0.1;
|
|
237
|
+
var FALLBACK_REASONS = /* @__PURE__ */ new Set([
|
|
238
|
+
"rate_limit",
|
|
239
|
+
"provider_auth_failed",
|
|
240
|
+
"provider_error",
|
|
241
|
+
"cliff",
|
|
242
|
+
"cost_cap",
|
|
243
|
+
"contract_violation"
|
|
244
|
+
]);
|
|
245
|
+
function asString(v) {
|
|
246
|
+
return typeof v === "string" && v.length > 0 ? v : void 0;
|
|
247
|
+
}
|
|
248
|
+
function asNumber(v) {
|
|
249
|
+
return typeof v === "number" && Number.isFinite(v) ? v : void 0;
|
|
250
|
+
}
|
|
251
|
+
function asNumberOrZero(v) {
|
|
252
|
+
return typeof v === "number" && Number.isFinite(v) ? v : 0;
|
|
253
|
+
}
|
|
254
|
+
function asStringArray(v) {
|
|
255
|
+
if (!Array.isArray(v)) return [];
|
|
256
|
+
const out = [];
|
|
257
|
+
for (const e of v) {
|
|
258
|
+
if (typeof e === "string") out.push(e);
|
|
259
|
+
}
|
|
260
|
+
return out;
|
|
261
|
+
}
|
|
262
|
+
function asFallbackReason(v) {
|
|
263
|
+
if (typeof v !== "string") return void 0;
|
|
264
|
+
const candidate = v;
|
|
265
|
+
if (candidate && FALLBACK_REASONS.has(candidate)) return candidate;
|
|
266
|
+
return "provider_error";
|
|
267
|
+
}
|
|
268
|
+
function rowToAdvisory(raw) {
|
|
269
|
+
if (!raw || typeof raw !== "object") return void 0;
|
|
270
|
+
const r = raw;
|
|
271
|
+
const level = r.level;
|
|
272
|
+
const code = r.code;
|
|
273
|
+
const message = r.message;
|
|
274
|
+
if (level !== "info" && level !== "warn" && level !== "critical" || typeof code !== "string" || typeof message !== "string") {
|
|
275
|
+
return void 0;
|
|
276
|
+
}
|
|
277
|
+
const out = { level, code, message };
|
|
278
|
+
const suggestion = asString(r.suggestion);
|
|
279
|
+
if (suggestion) out.suggestion = suggestion;
|
|
280
|
+
const docsUrl = asString(r.docs_url ?? r.docsUrl);
|
|
281
|
+
if (docsUrl) out.docsUrl = docsUrl;
|
|
282
|
+
const adapter = toAdapter(r.suggested_adaptation ?? r.suggestedAdaptation);
|
|
283
|
+
if (adapter) out.suggestedAdaptation = adapter;
|
|
284
|
+
return out;
|
|
285
|
+
}
|
|
286
|
+
function toAdapter(raw) {
|
|
287
|
+
if (!raw || typeof raw !== "object") return void 0;
|
|
288
|
+
const a = raw;
|
|
289
|
+
if (a.parameter === "toolOrchestration" && a.value === "sequential" && typeof a.consequence === "string") {
|
|
290
|
+
return {
|
|
291
|
+
parameter: "toolOrchestration",
|
|
292
|
+
value: "sequential",
|
|
293
|
+
consequence: a.consequence
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
return void 0;
|
|
297
|
+
}
|
|
298
|
+
function computeHealth(args) {
|
|
299
|
+
const {
|
|
300
|
+
tokensIn,
|
|
301
|
+
tokensOut,
|
|
302
|
+
historyCacheableTokens,
|
|
303
|
+
inputCacheHitRatio,
|
|
304
|
+
fellOverFrom,
|
|
305
|
+
target
|
|
306
|
+
} = args;
|
|
307
|
+
const total = tokensIn + tokensOut;
|
|
308
|
+
const ratio = total > 0 ? tokensIn / total : 0;
|
|
309
|
+
let inputRatioStatus;
|
|
310
|
+
if (ratio > INPUT_RATIO_RED) inputRatioStatus = "red";
|
|
311
|
+
else if (ratio > INPUT_RATIO_YELLOW) inputRatioStatus = "yellow";
|
|
312
|
+
else inputRatioStatus = "green";
|
|
313
|
+
let cacheStatus;
|
|
314
|
+
if (historyCacheableTokens <= CACHE_HEALTH_MIN_TOKENS) {
|
|
315
|
+
cacheStatus = "na";
|
|
316
|
+
} else if (inputCacheHitRatio >= CACHE_RATIO_GREEN) {
|
|
317
|
+
cacheStatus = "green";
|
|
318
|
+
} else if (inputCacheHitRatio >= CACHE_RATIO_YELLOW) {
|
|
319
|
+
cacheStatus = "yellow";
|
|
320
|
+
} else {
|
|
321
|
+
cacheStatus = "red";
|
|
322
|
+
}
|
|
323
|
+
const fallbackStatus = fellOverFrom !== void 0 && fellOverFrom !== target ? "red" : "green";
|
|
324
|
+
return { inputRatioStatus, cacheStatus, fallbackStatus };
|
|
325
|
+
}
|
|
80
326
|
function rowToDetail(row) {
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
327
|
+
const summary = rowToSummary(row);
|
|
328
|
+
const tokensIn = summary.tokensIn;
|
|
329
|
+
const tokensOut = summary.tokensOut;
|
|
330
|
+
const cacheReadInputTokens = asNumberOrZero(row.cache_read_input_tokens);
|
|
331
|
+
const cacheCreationInputTokens = asNumberOrZero(
|
|
332
|
+
row.cache_creation_input_tokens
|
|
333
|
+
);
|
|
334
|
+
const historyCacheableTokens = asNumberOrZero(row.history_cacheable_tokens);
|
|
335
|
+
const inputCacheHitRatio = tokensIn > 0 ? cacheReadInputTokens / tokensIn : 0;
|
|
336
|
+
const fellOverFrom = asString(row.fell_over_from);
|
|
337
|
+
const fallbackReasonRaw = row.fallback_reason;
|
|
338
|
+
const fallbackReason = fellOverFrom ? asFallbackReason(fallbackReasonRaw) : void 0;
|
|
339
|
+
const requestedModel = asString(row.requested_model) ?? fellOverFrom;
|
|
340
|
+
const advisoriesRaw = Array.isArray(row.advisories) ? row.advisories : [];
|
|
341
|
+
const advisories = [];
|
|
342
|
+
for (const a of advisoriesRaw) {
|
|
343
|
+
const rec = rowToAdvisory(a);
|
|
344
|
+
if (rec) advisories.push(rec);
|
|
345
|
+
}
|
|
346
|
+
const health = computeHealth({
|
|
347
|
+
tokensIn,
|
|
348
|
+
tokensOut,
|
|
349
|
+
cacheReadInputTokens,
|
|
350
|
+
historyCacheableTokens,
|
|
351
|
+
inputCacheHitRatio,
|
|
352
|
+
fellOverFrom,
|
|
353
|
+
target: summary.target
|
|
354
|
+
});
|
|
355
|
+
const detail = {
|
|
356
|
+
...summary,
|
|
357
|
+
mutationsApplied: asStringArray(row.mutations_applied),
|
|
358
|
+
advisories,
|
|
359
|
+
rawRequest: asString(row.prompt_preview),
|
|
360
|
+
rawResponse: asString(row.response_preview),
|
|
361
|
+
requestedModel,
|
|
362
|
+
finishReason: asString(row.finish_reason),
|
|
363
|
+
ttftMs: asNumber(row.ttft_ms),
|
|
364
|
+
totalMs: asNumber(row.total_ms) ?? asNumber(row.latency_ms),
|
|
365
|
+
toolsCount: asNumber(row.tools_count),
|
|
366
|
+
historyDepth: asNumber(row.history_depth),
|
|
367
|
+
systemPromptChars: asNumber(row.system_prompt_chars),
|
|
368
|
+
cacheReadInputTokens,
|
|
369
|
+
cacheCreationInputTokens,
|
|
370
|
+
historyCacheableTokens,
|
|
371
|
+
inputCacheHitRatio,
|
|
372
|
+
fellOverFrom,
|
|
373
|
+
fallbackReason,
|
|
374
|
+
health
|
|
88
375
|
};
|
|
376
|
+
return detail;
|
|
89
377
|
}
|
|
90
378
|
function createProxyHandler(config) {
|
|
91
379
|
const {
|
|
@@ -157,7 +445,31 @@ function createProxyHandler(config) {
|
|
|
157
445
|
if (traceId) {
|
|
158
446
|
const first = scrubbed[0];
|
|
159
447
|
if (!first) return jsonError2(404, "not_found");
|
|
160
|
-
|
|
448
|
+
const detail = rowToDetail(first);
|
|
449
|
+
const counterfactuals = computeCounterfactuals({
|
|
450
|
+
servedModel: detail.target,
|
|
451
|
+
servedCostUsd: detail.estimatedCostUsd,
|
|
452
|
+
archetype: detail.archetype,
|
|
453
|
+
tokensIn: detail.tokensIn,
|
|
454
|
+
tokensOut: detail.tokensOut,
|
|
455
|
+
cacheReadInputTokens: detail.cacheReadInputTokens
|
|
456
|
+
});
|
|
457
|
+
detail.counterfactuals = counterfactuals;
|
|
458
|
+
if (detail.estimatedCostUsd > 0) {
|
|
459
|
+
const projected = await computeProjectedDailyCost({
|
|
460
|
+
appId: detail.appId,
|
|
461
|
+
archetype: detail.archetype,
|
|
462
|
+
servedCostUsd: detail.estimatedCostUsd,
|
|
463
|
+
brainEndpoint: base,
|
|
464
|
+
brainJwt,
|
|
465
|
+
brainAnonKey,
|
|
466
|
+
fetch: doFetch
|
|
467
|
+
});
|
|
468
|
+
if (projected !== void 0) {
|
|
469
|
+
detail.projectedDailyCostUsd = projected;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
return jsonResponse(200, detail);
|
|
161
473
|
}
|
|
162
474
|
return jsonResponse(200, { traces: scrubbed.map(rowToSummary) });
|
|
163
475
|
};
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, e as RecordOutcomeInput, O as OutcomeResult, f as OracleScore, g as CompileResult, B as BestPracticeAdvisory, h as
|
|
2
|
-
export {
|
|
1
|
+
import { C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, e as RecordOutcomeInput, O as OutcomeResult, f as OracleScore, g as CompileResult, B as BestPracticeAdvisory, h as Adapter, i as PerAxisMetrics, j as Provider, k as ChainEntry, G as Grounding } from './ir-MXCJA8L7.mjs';
|
|
2
|
+
export { l as CallAttempt, m as CallError, n as ChainWithGrounding, o as Constraints, F as FallbackReason, H as HistoryCachePolicy, I as IntentDeclaration, M as Message, p as MutationApplied, q as NormalizedTokens, r as OutcomeKind, s as PerAxisMetricsByModel, t as PromptSection, T as ToolCall, u as ToolDefinition } from './ir-MXCJA8L7.mjs';
|
|
3
3
|
import { ModelProfile } from './profiles.mjs';
|
|
4
4
|
export { ALIASES, CacheStrategy, CliffRule, LoweringSpec, RecoveryRule, StructuredOutputCapability, SystemPromptMode, allProfiles, getProfile, profilesByProvider, tryGetProfile } from './profiles.mjs';
|
|
5
5
|
import { IntentArchetypeName } from './dialect.mjs';
|
|
@@ -205,6 +205,13 @@ interface OutcomePayload {
|
|
|
205
205
|
* cliff lumps DeepSeek sequential perf with parallel without this).
|
|
206
206
|
*/
|
|
207
207
|
tool_orchestration?: 'parallel' | 'sequential' | 'either' | null;
|
|
208
|
+
finish_reason?: string;
|
|
209
|
+
total_ms?: number;
|
|
210
|
+
tools_count?: number;
|
|
211
|
+
history_depth?: number;
|
|
212
|
+
system_prompt_chars?: number;
|
|
213
|
+
fell_over_from?: string;
|
|
214
|
+
fallback_reason?: 'rate_limit' | 'provider_auth_failed' | 'provider_error' | 'cliff' | 'cost_cap' | 'contract_violation';
|
|
208
215
|
}
|
|
209
216
|
/**
|
|
210
217
|
* alpha.20 Entry 4: record a quality outcome for a previously-compiled call.
|
|
@@ -358,6 +365,180 @@ interface RunAdvisorPhase2Context {
|
|
|
358
365
|
*/
|
|
359
366
|
declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: ModelProfile, policy?: CompilePolicy, phase2?: RunAdvisorPhase2Context): BestPracticeAdvisory[];
|
|
360
367
|
|
|
368
|
+
/**
|
|
369
|
+
* Archetype-cliff compatibility — alpha.28 (tt-intel-Cairn ratified).
|
|
370
|
+
*
|
|
371
|
+
* One question, one answer: *given this model and this intent, can it work
|
|
372
|
+
* — and if not, what adapter (if any) would make it work?*
|
|
373
|
+
*
|
|
374
|
+
* Replaces the silent-archetype-cliff failure mode where a consumer picks a
|
|
375
|
+
* model that's structurally wrong for the intent and kgauto compiles cleanly
|
|
376
|
+
* without surfacing the gap. The triggering incident: tt-intel shipped
|
|
377
|
+
* `deepseek-v4-pro` as the hunt default per a local spec; the kgauto coord
|
|
378
|
+
* doc said L-040 — V4 is structurally wrong for hunt (sequential tools).
|
|
379
|
+
* `archetypePerf.hunt = 4` was already in the profile data. The compiler
|
|
380
|
+
* stayed silent. This API + the matching advisor rule surfaces it.
|
|
381
|
+
*
|
|
382
|
+
* Pure function. No network. No brain query. No side effects. ~1ms.
|
|
383
|
+
*
|
|
384
|
+
* Consultation doc:
|
|
385
|
+
* command-center/advisory/kgauto/2026-05-21_archetype-cliff-advisor.md
|
|
386
|
+
*
|
|
387
|
+
* Refinements applied per tt-intel-Cairn ratification (2026-05-21):
|
|
388
|
+
* R1: every variant carries `archetypePerf: number` (raw score) — and
|
|
389
|
+
* `requires-adapter` adds `archetypePerfWithAdapter: number` so
|
|
390
|
+
* consumer policy can be expressed as "accept adapter only when score
|
|
391
|
+
* crosses some threshold WITH the adapter on."
|
|
392
|
+
* R2: every variant carries a plain-English `reason: string`. No internal
|
|
393
|
+
* jargon ("L-040", "archetypePerf=4") — the consumer chooses whether
|
|
394
|
+
* to render it as UI hint or operator-tooling tooltip.
|
|
395
|
+
* R3: `Adapter` is a CLOSED discriminated union, not `| string`. alpha.28
|
|
396
|
+
* ships ONE variant (`toolOrchestration: 'sequential'`). Future
|
|
397
|
+
* adapter parameters extend the union explicitly in named releases.
|
|
398
|
+
* NO escape hatch — the whole point is catching "I added a new
|
|
399
|
+
* adapter and forgot to update consumer policy" at compile time.
|
|
400
|
+
*/
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Minimum `archetypePerf[archetype]` score to count as `compatible` under
|
|
404
|
+
* Option A (default policy). Below this, a documented adapter is needed
|
|
405
|
+
* to lift the model above the floor; if no adapter exists, the model is
|
|
406
|
+
* rejected.
|
|
407
|
+
*
|
|
408
|
+
* Matches `QUALITY_FLOOR_FOR_RECOMMENDATION` in `advisor.ts` — kgauto's
|
|
409
|
+
* library-wide convention for "below this score, swap recommendations stop."
|
|
410
|
+
*/
|
|
411
|
+
declare const ARCHETYPE_FLOOR_DEFAULT = 6;
|
|
412
|
+
/**
|
|
413
|
+
* Absolute floor — below this, the cliff is too steep for ANY adapter to
|
|
414
|
+
* lift cleanly. Reserved under Option A (unused today; every below-floor
|
|
415
|
+
* case is gated by adapter availability). Would gate `reject` vs
|
|
416
|
+
* `requires-adapter` under a future Option B per consultation doc Q1.
|
|
417
|
+
*
|
|
418
|
+
* Exported so consumer-side policy can read it (e.g. "accept adapter only
|
|
419
|
+
* when archetypePerf >= ABSOLUTE_FLOOR + 1"). Not used internally by
|
|
420
|
+
* `getModelCompatibility` today — the gate is "does an adapter exist for
|
|
421
|
+
* this cliff?", not score-based.
|
|
422
|
+
*/
|
|
423
|
+
declare const ABSOLUTE_FLOOR = 4;
|
|
424
|
+
/**
|
|
425
|
+
* The intent the call is expressing — archetype + optional orchestration
|
|
426
|
+
* mode. Same shape as `ir.intent.archetype` + `ir.constraints.toolOrchestration`
|
|
427
|
+
* so a consumer can pass `{ archetype: ir.intent.archetype,
|
|
428
|
+
* toolOrchestration: ir.constraints?.toolOrchestration }` directly.
|
|
429
|
+
*/
|
|
430
|
+
interface CompatibilityIntent {
|
|
431
|
+
archetype: IntentArchetypeName;
|
|
432
|
+
toolOrchestration?: 'parallel' | 'sequential' | 'either';
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* `Adapter` — re-exported above. Canonical definition lives in `ir.ts` to
|
|
436
|
+
* avoid an import cycle (compatibility.ts → profiles.ts → ir.ts).
|
|
437
|
+
*
|
|
438
|
+
* **CLOSED discriminated union per R3.** Future adapter parameters extend
|
|
439
|
+
* the union explicitly in named alpha releases. No `| string` escape hatch
|
|
440
|
+
* — consumer policy code SHOULD write exhaustive `switch (adapter.parameter)`
|
|
441
|
+
* and rely on the compiler to flag "I added a new adapter parameter and
|
|
442
|
+
* forgot to update the consumer's policy."
|
|
443
|
+
*
|
|
444
|
+
* alpha.28 variants:
|
|
445
|
+
* - `{ parameter: 'toolOrchestration'; value: 'sequential'; consequence }`
|
|
446
|
+
* Lifts DeepSeek V4-family on `hunt` from sequential-tool cliff (L-040).
|
|
447
|
+
* The "consequence" plain-English-ifies the trade-off:
|
|
448
|
+
* "Tool calls run one at a time — slower but reliable."
|
|
449
|
+
*
|
|
450
|
+
* alpha.29+ likely additions (per tt-intel-Cairn priority list):
|
|
451
|
+
* - `{ parameter: 'parallelToolCalls'; value: false; consequence }`
|
|
452
|
+
* - `{ parameter: 'maxTools'; value: number; consequence }`
|
|
453
|
+
* - `{ parameter: 'thinkingBudget'; value: 0; consequence }`
|
|
454
|
+
*
|
|
455
|
+
* Each new variant lands in its own named release with the union extended
|
|
456
|
+
* in `ir.ts`. Consumers see the change at compile time.
|
|
457
|
+
*/
|
|
458
|
+
/**
|
|
459
|
+
* The compatibility verdict for a (model, intent) pair. Discriminated union
|
|
460
|
+
* on `status` — `compatible` | `requires-adapter` | `reject`.
|
|
461
|
+
*
|
|
462
|
+
* **Every variant carries `archetypePerf` (R1) + `reason` (R2):**
|
|
463
|
+
* - `archetypePerf` — the raw 0-10 score for (model, archetype). Lets
|
|
464
|
+
* consumers build their own thresholds without re-importing the profile
|
|
465
|
+
* registry.
|
|
466
|
+
* - `reason` — plain-English, consumer-renderable. NOT internal jargon
|
|
467
|
+
* like "L-040 cliff" or "archetypePerf=4". Examples in R2 ratification:
|
|
468
|
+
* - compatible: "Suited for hunt-style parallel discovery."
|
|
469
|
+
* - requires-adapter: "Best with sequential tool calls for hunt — slower but works."
|
|
470
|
+
* - reject: "Not suited for hunt — would underperform significantly."
|
|
471
|
+
*
|
|
472
|
+
* `requires-adapter` additionally carries:
|
|
473
|
+
* - `archetypePerfWithAdapter` — estimated post-adapter score. May be an
|
|
474
|
+
* estimate (we don't measure post-adapter scores yet); kgauto's prior is
|
|
475
|
+
* "adapter lifts to ARCHETYPE_FLOOR_DEFAULT + 1" unless brain-evidenced.
|
|
476
|
+
* - `adapter` — the closed-union variant describing the structural change.
|
|
477
|
+
*
|
|
478
|
+
* Backward-compat: an unknown model returns `reject` with a "model not
|
|
479
|
+
* registered" reason; callers never throw. Unknown archetype is impossible
|
|
480
|
+
* at the type level (`IntentArchetypeName` is a closed union).
|
|
481
|
+
*/
|
|
482
|
+
type ModelCompatibility = {
|
|
483
|
+
status: 'compatible';
|
|
484
|
+
reason: string;
|
|
485
|
+
archetypePerf: number;
|
|
486
|
+
} | {
|
|
487
|
+
status: 'requires-adapter';
|
|
488
|
+
reason: string;
|
|
489
|
+
archetypePerf: number;
|
|
490
|
+
archetypePerfWithAdapter: number;
|
|
491
|
+
adapter: Adapter;
|
|
492
|
+
} | {
|
|
493
|
+
status: 'reject';
|
|
494
|
+
reason: string;
|
|
495
|
+
archetypePerf: number;
|
|
496
|
+
};
|
|
497
|
+
/**
|
|
498
|
+
* Compatibility query — *does this model fit this intent, and if not,
|
|
499
|
+
* what would?*
|
|
500
|
+
*
|
|
501
|
+
* **Rules (Option A from consultation doc Q1):**
|
|
502
|
+
* 1. If model is unregistered → `reject` with "model not registered" reason.
|
|
503
|
+
* 2. If intent provides `toolOrchestration: 'sequential'` AND that adapter
|
|
504
|
+
* silences the cliff (because the cliff IS the sequential-tool one)
|
|
505
|
+
* → return `compatible` with raw score (NOT the post-adapter estimate
|
|
506
|
+
* — caller already paid the adapter, score reflects reality).
|
|
507
|
+
* 3. If raw `archetypePerf[archetype] >= ARCHETYPE_FLOOR_DEFAULT`
|
|
508
|
+
* → `compatible`.
|
|
509
|
+
* 4. If below floor BUT a documented adapter exists that lifts to floor
|
|
510
|
+
* → `requires-adapter` with adapter + estimated post-adapter score.
|
|
511
|
+
* 5. If below floor AND no adapter exists → `reject`.
|
|
512
|
+
*
|
|
513
|
+
* **Pure function.** Deterministic for `(modelId, intent)`. No I/O.
|
|
514
|
+
*
|
|
515
|
+
* @example
|
|
516
|
+
* ```ts
|
|
517
|
+
* import { getModelCompatibility } from '@warmdrift/kgauto-compiler';
|
|
518
|
+
*
|
|
519
|
+
* const c = getModelCompatibility('deepseek-v4-pro', { archetype: 'hunt' });
|
|
520
|
+
* // → { status: 'requires-adapter',
|
|
521
|
+
* // reason: 'Best with sequential tool calls for hunt — slower but works.',
|
|
522
|
+
* // archetypePerf: 4,
|
|
523
|
+
* // archetypePerfWithAdapter: 7,
|
|
524
|
+
* // adapter: {
|
|
525
|
+
* // parameter: 'toolOrchestration',
|
|
526
|
+
* // value: 'sequential',
|
|
527
|
+
* // consequence: 'Tool calls run one at a time...'
|
|
528
|
+
* // } }
|
|
529
|
+
*
|
|
530
|
+
* // With the adapter already declared:
|
|
531
|
+
* const c2 = getModelCompatibility('deepseek-v4-pro', {
|
|
532
|
+
* archetype: 'hunt',
|
|
533
|
+
* toolOrchestration: 'sequential',
|
|
534
|
+
* });
|
|
535
|
+
* // → { status: 'compatible',
|
|
536
|
+
* // reason: 'Suited for hunt with sequential tool calls.',
|
|
537
|
+
* // archetypePerf: 4 }
|
|
538
|
+
* ```
|
|
539
|
+
*/
|
|
540
|
+
declare function getModelCompatibility(modelId: string, intent: CompatibilityIntent): ModelCompatibility;
|
|
541
|
+
|
|
361
542
|
/**
|
|
362
543
|
* alpha.22 — sync introspection: is brain-query mode active for a given
|
|
363
544
|
* table? Used by the advisor (`model-stale-evidence` rule) to decide
|
|
@@ -943,4 +1124,4 @@ declare const loadAliasesFromBrain: () => Record<string, string>;
|
|
|
943
1124
|
*/
|
|
944
1125
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
945
1126
|
|
|
946
|
-
export { ApiKeys, type AppOracle, type ArchetypePerfMap, type ArchetypePerfNMap, type ArchetypePerfScoreResult, BestPracticeAdvisory, type BrainConfig, type BrainQueryConfig, CallOptions, CallResult, ChainEntry, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type FallbackPosture, type GetDefaultFallbackChainOpts, type GetPerAxisMetricsOpts, Grounding, IntentArchetypeName, type LLMJudgeOptions, MEASURED_GROUNDING_MIN_N, type ModelBrainRow, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, OutcomeResult, PROVIDER_ENV_KEYS, PerAxisMetrics, type PricingRow, type ProfileToRowOptions, PromptIR, Provider, ProviderOverrides, type ProviderReachability, type ReachabilityOpts, RecordInput, RecordOutcomeInput, type RunAdvisorPhase2Context, type SupportedProvider, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, getAllStarterChains, getAllStarterChainsWithGrounding, getArchetypePerfScore, getDefaultFallbackChain, getDefaultFallbackChainWithGrounding, getPerAxisMetrics, getReachabilityDiagnostic, getSequentialStarterChain, getSequentialStarterChainWithGrounding, getStarterChain, getStarterChainWithGrounding, isBrainQueryActiveFor, isModelReachable, isProviderReachable, loadAliasesFromBrain, loadArchetypePerfFromBrain, loadArchetypePerfNFromBrain, loadChainsFromBrain, loadModelsFromBrain, loadPricingFromBrain, profileToRow, record, recordOutcome, resetTokenizer, resolvePricingAt, resolveProviderKey, runAdvisor, setTokenizer };
|
|
1127
|
+
export { ABSOLUTE_FLOOR, ARCHETYPE_FLOOR_DEFAULT, Adapter, ApiKeys, type AppOracle, type ArchetypePerfMap, type ArchetypePerfNMap, type ArchetypePerfScoreResult, BestPracticeAdvisory, type BrainConfig, type BrainQueryConfig, CallOptions, CallResult, ChainEntry, type CompatibilityIntent, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type FallbackPosture, type GetDefaultFallbackChainOpts, type GetPerAxisMetricsOpts, Grounding, IntentArchetypeName, type LLMJudgeOptions, MEASURED_GROUNDING_MIN_N, type ModelBrainRow, type ModelCompatibility, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, OutcomeResult, PROVIDER_ENV_KEYS, PerAxisMetrics, type PricingRow, type ProfileToRowOptions, PromptIR, Provider, ProviderOverrides, type ProviderReachability, type ReachabilityOpts, RecordInput, RecordOutcomeInput, type RunAdvisorPhase2Context, type SupportedProvider, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, getAllStarterChains, getAllStarterChainsWithGrounding, getArchetypePerfScore, getDefaultFallbackChain, getDefaultFallbackChainWithGrounding, getModelCompatibility, getPerAxisMetrics, getReachabilityDiagnostic, getSequentialStarterChain, getSequentialStarterChainWithGrounding, getStarterChain, getStarterChainWithGrounding, isBrainQueryActiveFor, isModelReachable, isProviderReachable, loadAliasesFromBrain, loadArchetypePerfFromBrain, loadArchetypePerfNFromBrain, loadChainsFromBrain, loadModelsFromBrain, loadPricingFromBrain, profileToRow, record, recordOutcome, resetTokenizer, resolvePricingAt, resolveProviderKey, runAdvisor, setTokenizer };
|