@warmdrift/kgauto-compiler 2.0.0-alpha.27 → 2.0.0-alpha.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/dist/chunk-WXCFWUCN.mjs +678 -0
- package/dist/glassbox/index.d.mts +3 -3
- package/dist/glassbox/index.d.ts +3 -3
- package/dist/glassbox-routes/index.d.mts +117 -6
- package/dist/glassbox-routes/index.d.ts +117 -6
- package/dist/glassbox-routes/index.js +1859 -8
- package/dist/glassbox-routes/index.mjs +359 -8
- package/dist/index.d.mts +473 -13
- package/dist/index.d.ts +473 -13
- package/dist/index.js +543 -10
- package/dist/index.mjs +484 -585
- package/dist/{ir-B_XX2LAO.d.ts → ir-BIAT9gJk.d.ts} +195 -1
- package/dist/{ir-B9zqlwjH.d.mts → ir-De2AQtlr.d.mts} +195 -1
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/{types-o9etg93a.d.mts → types-BjrIFPGe.d.mts} +1 -1
- package/dist/{types-bt0aVJb8.d.ts → types-D_JAhCv4.d.ts} +1 -1
- package/package.json +1 -1
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ARCHETYPE_FLOOR_DEFAULT,
|
|
3
|
+
getDefaultFallbackChain
|
|
4
|
+
} from "../chunk-WXCFWUCN.mjs";
|
|
5
|
+
import {
|
|
6
|
+
tryGetProfile
|
|
7
|
+
} from "../chunk-JQGRWJZO.mjs";
|
|
1
8
|
import {
|
|
2
9
|
subscribe,
|
|
3
10
|
subscribeApp
|
|
@@ -38,6 +45,151 @@ function checkAuth(req, config) {
|
|
|
38
45
|
return null;
|
|
39
46
|
}
|
|
40
47
|
|
|
48
|
+
// src/glassbox-routes/counterfactuals.ts
|
|
49
|
+
var COUNTERFACTUAL_MIN_SAVINGS_RATIO = 0.1;
|
|
50
|
+
var COUNTERFACTUAL_MAX_RESULTS = 2;
|
|
51
|
+
function computeCounterfactuals(args) {
|
|
52
|
+
const {
|
|
53
|
+
servedModel,
|
|
54
|
+
servedCostUsd,
|
|
55
|
+
archetype,
|
|
56
|
+
tokensIn,
|
|
57
|
+
tokensOut,
|
|
58
|
+
cacheReadInputTokens = 0,
|
|
59
|
+
toolOrchestration
|
|
60
|
+
} = args;
|
|
61
|
+
if (tokensIn <= 0) return [];
|
|
62
|
+
if (servedCostUsd <= 0) return [];
|
|
63
|
+
let chain;
|
|
64
|
+
try {
|
|
65
|
+
chain = getDefaultFallbackChain({
|
|
66
|
+
archetype,
|
|
67
|
+
posture: "open",
|
|
68
|
+
maxDepth: 10,
|
|
69
|
+
toolOrchestration
|
|
70
|
+
});
|
|
71
|
+
} catch {
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
const candidates = [];
|
|
75
|
+
const minSavings = servedCostUsd * COUNTERFACTUAL_MIN_SAVINGS_RATIO;
|
|
76
|
+
for (const modelId of chain) {
|
|
77
|
+
if (modelId === servedModel) continue;
|
|
78
|
+
const profile = tryGetProfile(modelId);
|
|
79
|
+
if (!profile) continue;
|
|
80
|
+
const perf = profile.archetypePerf?.[archetype] ?? 5;
|
|
81
|
+
if (perf < ARCHETYPE_FLOOR_DEFAULT) continue;
|
|
82
|
+
const estimated = estimateCostUsd({
|
|
83
|
+
profile,
|
|
84
|
+
tokensIn,
|
|
85
|
+
tokensOut,
|
|
86
|
+
cacheReadInputTokens
|
|
87
|
+
});
|
|
88
|
+
if (estimated === void 0) continue;
|
|
89
|
+
const savings = servedCostUsd - estimated;
|
|
90
|
+
if (savings < minSavings) continue;
|
|
91
|
+
const savingsPercent = Math.round(savings / servedCostUsd * 100);
|
|
92
|
+
const reason = buildReason({
|
|
93
|
+
modelId,
|
|
94
|
+
archetype,
|
|
95
|
+
perf,
|
|
96
|
+
profile
|
|
97
|
+
});
|
|
98
|
+
candidates.push({
|
|
99
|
+
modelId,
|
|
100
|
+
estimatedCostUsd: round6(estimated),
|
|
101
|
+
savingsUsd: round6(savings),
|
|
102
|
+
savingsPercent,
|
|
103
|
+
reason
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
candidates.sort((a, b) => a.estimatedCostUsd - b.estimatedCostUsd);
|
|
107
|
+
return candidates.slice(0, COUNTERFACTUAL_MAX_RESULTS);
|
|
108
|
+
}
|
|
109
|
+
function estimateCostUsd(args) {
|
|
110
|
+
const { profile, tokensIn, tokensOut, cacheReadInputTokens } = args;
|
|
111
|
+
const cacheableIn = Math.min(cacheReadInputTokens, tokensIn);
|
|
112
|
+
const nonCachedIn = Math.max(tokensIn - cacheableIn, 0);
|
|
113
|
+
const discount = profile.lowering.cache.discount ?? 1;
|
|
114
|
+
const inUsd = nonCachedIn / 1e6 * profile.costInputPer1m + cacheableIn / 1e6 * profile.costInputPer1m * discount;
|
|
115
|
+
const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
|
|
116
|
+
const total = inUsd + outUsd;
|
|
117
|
+
if (!Number.isFinite(total)) return void 0;
|
|
118
|
+
if (total < 0) return void 0;
|
|
119
|
+
return total;
|
|
120
|
+
}
|
|
121
|
+
function round6(n) {
|
|
122
|
+
return Math.round(n * 1e6) / 1e6;
|
|
123
|
+
}
|
|
124
|
+
function buildReason(args) {
|
|
125
|
+
const { modelId, archetype, perf, profile } = args;
|
|
126
|
+
const hook = profile.strengths?.[0];
|
|
127
|
+
const suffix = hook ? `, ${hook.replace(/_/g, " ")}` : "";
|
|
128
|
+
return `${modelId} on ${archetype}: archetypePerf=${perf}${suffix}`;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// src/glassbox-routes/projected-cost.ts
|
|
132
|
+
var INSUFFICIENT_VOLUME_THRESHOLD = 5;
|
|
133
|
+
var WINDOW_DAYS = 7;
|
|
134
|
+
async function computeProjectedDailyCost(args) {
|
|
135
|
+
const {
|
|
136
|
+
appId,
|
|
137
|
+
archetype,
|
|
138
|
+
servedCostUsd,
|
|
139
|
+
brainEndpoint,
|
|
140
|
+
brainJwt,
|
|
141
|
+
brainAnonKey,
|
|
142
|
+
fetch: fetchImpl
|
|
143
|
+
} = args;
|
|
144
|
+
if (!appId || !archetype) return void 0;
|
|
145
|
+
if (!Number.isFinite(servedCostUsd) || servedCostUsd <= 0) return void 0;
|
|
146
|
+
const doFetch = fetchImpl ?? ((...a) => globalThis.fetch(...a));
|
|
147
|
+
const base = brainEndpoint.replace(/\/+$/, "");
|
|
148
|
+
const cutoffIso = new Date(
|
|
149
|
+
Date.now() - WINDOW_DAYS * 24 * 60 * 60 * 1e3
|
|
150
|
+
).toISOString();
|
|
151
|
+
const qs = new URLSearchParams();
|
|
152
|
+
qs.set("app_id", `eq.${appId}`);
|
|
153
|
+
qs.set("intent_archetype", `eq.${archetype}`);
|
|
154
|
+
qs.set("created_at", `gte.${cutoffIso}`);
|
|
155
|
+
qs.set("select", "handle");
|
|
156
|
+
qs.set("limit", "0");
|
|
157
|
+
const url = `${base}/rest/v1/compile_outcomes?${qs.toString()}`;
|
|
158
|
+
let res;
|
|
159
|
+
try {
|
|
160
|
+
res = await doFetch(url, {
|
|
161
|
+
method: "GET",
|
|
162
|
+
headers: {
|
|
163
|
+
Authorization: `Bearer ${brainJwt}`,
|
|
164
|
+
apikey: brainAnonKey,
|
|
165
|
+
Accept: "application/json",
|
|
166
|
+
// Triggers PostgREST exact count in Content-Range header.
|
|
167
|
+
Prefer: "count=exact"
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
} catch {
|
|
171
|
+
return void 0;
|
|
172
|
+
}
|
|
173
|
+
if (!res.ok) return void 0;
|
|
174
|
+
const contentRange = res.headers.get("content-range");
|
|
175
|
+
const count = parseContentRangeCount(contentRange);
|
|
176
|
+
if (count === void 0) return void 0;
|
|
177
|
+
const avgPerDay = count / WINDOW_DAYS;
|
|
178
|
+
if (avgPerDay < INSUFFICIENT_VOLUME_THRESHOLD) return void 0;
|
|
179
|
+
const projected = avgPerDay * servedCostUsd;
|
|
180
|
+
return Math.round(projected * 1e6) / 1e6;
|
|
181
|
+
}
|
|
182
|
+
function parseContentRangeCount(header) {
|
|
183
|
+
if (!header) return void 0;
|
|
184
|
+
const slash = header.lastIndexOf("/");
|
|
185
|
+
if (slash < 0) return void 0;
|
|
186
|
+
const tail = header.slice(slash + 1).trim();
|
|
187
|
+
if (tail === "*" || tail === "") return void 0;
|
|
188
|
+
const n = Number.parseInt(tail, 10);
|
|
189
|
+
if (!Number.isFinite(n) || n < 0) return void 0;
|
|
190
|
+
return n;
|
|
191
|
+
}
|
|
192
|
+
|
|
41
193
|
// src/glassbox-routes/proxy.ts
|
|
42
194
|
var JSON_HEADERS2 = {
|
|
43
195
|
"Content-Type": "application/json",
|
|
@@ -77,16 +229,191 @@ function rowToSummary(row) {
|
|
|
77
229
|
estimatedCostUsd: typeof row.cost_usd_actual === "number" ? row.cost_usd_actual : 0
|
|
78
230
|
};
|
|
79
231
|
}
|
|
80
|
-
|
|
232
|
+
var INPUT_RATIO_YELLOW = 0.65;
|
|
233
|
+
var INPUT_RATIO_RED = 0.85;
|
|
234
|
+
var CACHE_HEALTH_MIN_TOKENS = 1e3;
|
|
235
|
+
var CACHE_RATIO_GREEN = 0.5;
|
|
236
|
+
var CACHE_RATIO_YELLOW = 0.1;
|
|
237
|
+
var FALLBACK_REASONS = /* @__PURE__ */ new Set([
|
|
238
|
+
"rate_limit",
|
|
239
|
+
"provider_auth_failed",
|
|
240
|
+
"provider_error",
|
|
241
|
+
"cliff",
|
|
242
|
+
"cost_cap",
|
|
243
|
+
"contract_violation"
|
|
244
|
+
]);
|
|
245
|
+
function asString(v) {
|
|
246
|
+
return typeof v === "string" && v.length > 0 ? v : void 0;
|
|
247
|
+
}
|
|
248
|
+
function asNumber(v) {
|
|
249
|
+
return typeof v === "number" && Number.isFinite(v) ? v : void 0;
|
|
250
|
+
}
|
|
251
|
+
function asNumberOrZero(v) {
|
|
252
|
+
return typeof v === "number" && Number.isFinite(v) ? v : 0;
|
|
253
|
+
}
|
|
254
|
+
function asStringArray(v) {
|
|
255
|
+
if (!Array.isArray(v)) return [];
|
|
256
|
+
const out = [];
|
|
257
|
+
for (const e of v) {
|
|
258
|
+
if (typeof e === "string") out.push(e);
|
|
259
|
+
}
|
|
260
|
+
return out;
|
|
261
|
+
}
|
|
262
|
+
function asFallbackReason(v) {
|
|
263
|
+
if (typeof v !== "string") return void 0;
|
|
264
|
+
const candidate = v;
|
|
265
|
+
if (candidate && FALLBACK_REASONS.has(candidate)) return candidate;
|
|
266
|
+
return "provider_error";
|
|
267
|
+
}
|
|
268
|
+
function rowToAdvisory(raw) {
|
|
269
|
+
if (!raw || typeof raw !== "object") return void 0;
|
|
270
|
+
const r = raw;
|
|
271
|
+
const level = r.level;
|
|
272
|
+
const code = r.code;
|
|
273
|
+
const message = r.message;
|
|
274
|
+
if (level !== "info" && level !== "warn" && level !== "critical" || typeof code !== "string" || typeof message !== "string") {
|
|
275
|
+
return void 0;
|
|
276
|
+
}
|
|
277
|
+
const out = { level, code, message };
|
|
278
|
+
const suggestion = asString(r.suggestion);
|
|
279
|
+
if (suggestion) out.suggestion = suggestion;
|
|
280
|
+
const docsUrl = asString(r.docs_url ?? r.docsUrl);
|
|
281
|
+
if (docsUrl) out.docsUrl = docsUrl;
|
|
282
|
+
const adapter = toAdapter(r.suggested_adaptation ?? r.suggestedAdaptation);
|
|
283
|
+
if (adapter) out.suggestedAdaptation = adapter;
|
|
284
|
+
return out;
|
|
285
|
+
}
|
|
286
|
+
var SECTION_KINDS = /* @__PURE__ */ new Set([
|
|
287
|
+
"role_intro",
|
|
288
|
+
"tool_call_contract",
|
|
289
|
+
"narration_contract",
|
|
290
|
+
"user_turn",
|
|
291
|
+
"reference",
|
|
292
|
+
"arbitrary"
|
|
293
|
+
]);
|
|
294
|
+
function summarizeSectionRewrite(kind, rule) {
|
|
295
|
+
if (kind === "tool_call_contract" && rule === "sequential-tool-cliff-below-floor") {
|
|
296
|
+
return "Sequential tool pattern applied (model cliff cleared at compile time).";
|
|
297
|
+
}
|
|
298
|
+
return `Translator applied rule "${rule}" to ${kind} section.`;
|
|
299
|
+
}
|
|
300
|
+
function rowToSectionRewrite(raw) {
|
|
301
|
+
if (!raw || typeof raw !== "object") return void 0;
|
|
302
|
+
const r = raw;
|
|
303
|
+
const sectionId = r.sectionId ?? r.section_id;
|
|
304
|
+
if (typeof sectionId !== "string" || sectionId.length === 0) return void 0;
|
|
305
|
+
const kind = r.kind;
|
|
306
|
+
if (typeof kind !== "string" || !SECTION_KINDS.has(kind)) {
|
|
307
|
+
return void 0;
|
|
308
|
+
}
|
|
309
|
+
const rule = r.rule;
|
|
310
|
+
if (typeof rule !== "string" || rule.length === 0) return void 0;
|
|
81
311
|
return {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
rawRequest: row.prompt_preview ?? void 0,
|
|
87
|
-
rawResponse: row.response_preview ?? void 0
|
|
312
|
+
sectionId,
|
|
313
|
+
kind,
|
|
314
|
+
rule,
|
|
315
|
+
summary: summarizeSectionRewrite(kind, rule)
|
|
88
316
|
};
|
|
89
317
|
}
|
|
318
|
+
function toAdapter(raw) {
|
|
319
|
+
if (!raw || typeof raw !== "object") return void 0;
|
|
320
|
+
const a = raw;
|
|
321
|
+
if (a.parameter === "toolOrchestration" && a.value === "sequential" && typeof a.consequence === "string") {
|
|
322
|
+
return {
|
|
323
|
+
parameter: "toolOrchestration",
|
|
324
|
+
value: "sequential",
|
|
325
|
+
consequence: a.consequence
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
return void 0;
|
|
329
|
+
}
|
|
330
|
+
function computeHealth(args) {
|
|
331
|
+
const {
|
|
332
|
+
tokensIn,
|
|
333
|
+
tokensOut,
|
|
334
|
+
historyCacheableTokens,
|
|
335
|
+
inputCacheHitRatio,
|
|
336
|
+
fellOverFrom,
|
|
337
|
+
target
|
|
338
|
+
} = args;
|
|
339
|
+
const total = tokensIn + tokensOut;
|
|
340
|
+
const ratio = total > 0 ? tokensIn / total : 0;
|
|
341
|
+
let inputRatioStatus;
|
|
342
|
+
if (ratio > INPUT_RATIO_RED) inputRatioStatus = "red";
|
|
343
|
+
else if (ratio > INPUT_RATIO_YELLOW) inputRatioStatus = "yellow";
|
|
344
|
+
else inputRatioStatus = "green";
|
|
345
|
+
let cacheStatus;
|
|
346
|
+
if (historyCacheableTokens <= CACHE_HEALTH_MIN_TOKENS) {
|
|
347
|
+
cacheStatus = "na";
|
|
348
|
+
} else if (inputCacheHitRatio >= CACHE_RATIO_GREEN) {
|
|
349
|
+
cacheStatus = "green";
|
|
350
|
+
} else if (inputCacheHitRatio >= CACHE_RATIO_YELLOW) {
|
|
351
|
+
cacheStatus = "yellow";
|
|
352
|
+
} else {
|
|
353
|
+
cacheStatus = "red";
|
|
354
|
+
}
|
|
355
|
+
const fallbackStatus = fellOverFrom !== void 0 && fellOverFrom !== target ? "red" : "green";
|
|
356
|
+
return { inputRatioStatus, cacheStatus, fallbackStatus };
|
|
357
|
+
}
|
|
358
|
+
function rowToDetail(row) {
|
|
359
|
+
const summary = rowToSummary(row);
|
|
360
|
+
const tokensIn = summary.tokensIn;
|
|
361
|
+
const tokensOut = summary.tokensOut;
|
|
362
|
+
const cacheReadInputTokens = asNumberOrZero(row.cache_read_input_tokens);
|
|
363
|
+
const cacheCreationInputTokens = asNumberOrZero(
|
|
364
|
+
row.cache_creation_input_tokens
|
|
365
|
+
);
|
|
366
|
+
const historyCacheableTokens = asNumberOrZero(row.history_cacheable_tokens);
|
|
367
|
+
const inputCacheHitRatio = tokensIn > 0 ? cacheReadInputTokens / tokensIn : 0;
|
|
368
|
+
const fellOverFrom = asString(row.fell_over_from);
|
|
369
|
+
const fallbackReasonRaw = row.fallback_reason;
|
|
370
|
+
const fallbackReason = fellOverFrom ? asFallbackReason(fallbackReasonRaw) : void 0;
|
|
371
|
+
const requestedModel = asString(row.requested_model) ?? fellOverFrom;
|
|
372
|
+
const advisoriesRaw = Array.isArray(row.advisories) ? row.advisories : [];
|
|
373
|
+
const advisories = [];
|
|
374
|
+
for (const a of advisoriesRaw) {
|
|
375
|
+
const rec = rowToAdvisory(a);
|
|
376
|
+
if (rec) advisories.push(rec);
|
|
377
|
+
}
|
|
378
|
+
const health = computeHealth({
|
|
379
|
+
tokensIn,
|
|
380
|
+
tokensOut,
|
|
381
|
+
cacheReadInputTokens,
|
|
382
|
+
historyCacheableTokens,
|
|
383
|
+
inputCacheHitRatio,
|
|
384
|
+
fellOverFrom,
|
|
385
|
+
target: summary.target
|
|
386
|
+
});
|
|
387
|
+
const sectionRewritesRaw = Array.isArray(row.section_rewrites_applied) ? row.section_rewrites_applied : [];
|
|
388
|
+
const sectionRewritesApplied = [];
|
|
389
|
+
for (const e of sectionRewritesRaw) {
|
|
390
|
+
const rw = rowToSectionRewrite(e);
|
|
391
|
+
if (rw) sectionRewritesApplied.push(rw);
|
|
392
|
+
}
|
|
393
|
+
const detail = {
|
|
394
|
+
...summary,
|
|
395
|
+
mutationsApplied: asStringArray(row.mutations_applied),
|
|
396
|
+
advisories,
|
|
397
|
+
rawRequest: asString(row.prompt_preview),
|
|
398
|
+
rawResponse: asString(row.response_preview),
|
|
399
|
+
requestedModel,
|
|
400
|
+
finishReason: asString(row.finish_reason),
|
|
401
|
+
ttftMs: asNumber(row.ttft_ms),
|
|
402
|
+
totalMs: asNumber(row.total_ms) ?? asNumber(row.latency_ms),
|
|
403
|
+
toolsCount: asNumber(row.tools_count),
|
|
404
|
+
historyDepth: asNumber(row.history_depth),
|
|
405
|
+
systemPromptChars: asNumber(row.system_prompt_chars),
|
|
406
|
+
cacheReadInputTokens,
|
|
407
|
+
cacheCreationInputTokens,
|
|
408
|
+
historyCacheableTokens,
|
|
409
|
+
inputCacheHitRatio,
|
|
410
|
+
fellOverFrom,
|
|
411
|
+
fallbackReason,
|
|
412
|
+
sectionRewritesApplied,
|
|
413
|
+
health
|
|
414
|
+
};
|
|
415
|
+
return detail;
|
|
416
|
+
}
|
|
90
417
|
function createProxyHandler(config) {
|
|
91
418
|
const {
|
|
92
419
|
installToken,
|
|
@@ -157,7 +484,31 @@ function createProxyHandler(config) {
|
|
|
157
484
|
if (traceId) {
|
|
158
485
|
const first = scrubbed[0];
|
|
159
486
|
if (!first) return jsonError2(404, "not_found");
|
|
160
|
-
|
|
487
|
+
const detail = rowToDetail(first);
|
|
488
|
+
const counterfactuals = computeCounterfactuals({
|
|
489
|
+
servedModel: detail.target,
|
|
490
|
+
servedCostUsd: detail.estimatedCostUsd,
|
|
491
|
+
archetype: detail.archetype,
|
|
492
|
+
tokensIn: detail.tokensIn,
|
|
493
|
+
tokensOut: detail.tokensOut,
|
|
494
|
+
cacheReadInputTokens: detail.cacheReadInputTokens
|
|
495
|
+
});
|
|
496
|
+
detail.counterfactuals = counterfactuals;
|
|
497
|
+
if (detail.estimatedCostUsd > 0) {
|
|
498
|
+
const projected = await computeProjectedDailyCost({
|
|
499
|
+
appId: detail.appId,
|
|
500
|
+
archetype: detail.archetype,
|
|
501
|
+
servedCostUsd: detail.estimatedCostUsd,
|
|
502
|
+
brainEndpoint: base,
|
|
503
|
+
brainJwt,
|
|
504
|
+
brainAnonKey,
|
|
505
|
+
fetch: doFetch
|
|
506
|
+
});
|
|
507
|
+
if (projected !== void 0) {
|
|
508
|
+
detail.projectedDailyCostUsd = projected;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
return jsonResponse(200, detail);
|
|
161
512
|
}
|
|
162
513
|
return jsonResponse(200, { traces: scrubbed.map(rowToSummary) });
|
|
163
514
|
};
|