aiopt 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -19
- package/dist/cli.js +476 -366
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -33,6 +33,59 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
33
33
|
mod
|
|
34
34
|
));
|
|
35
35
|
|
|
36
|
+
// src/cost.ts
|
|
37
|
+
function getRates(rt, provider, model) {
|
|
38
|
+
const prov = String(provider || "").toLowerCase();
|
|
39
|
+
if (prov === "local" || prov === "ollama" || prov === "vllm") {
|
|
40
|
+
return { kind: "official", input: 0, output: 0 };
|
|
41
|
+
}
|
|
42
|
+
const p = rt.providers[prov];
|
|
43
|
+
if (!p) return null;
|
|
44
|
+
const m = p.models[model];
|
|
45
|
+
if (m) return { kind: "official", input: m.input, output: m.output };
|
|
46
|
+
return { kind: "estimated", input: p.default_estimated.input, output: p.default_estimated.output };
|
|
47
|
+
}
|
|
48
|
+
function costOfEvent(rt, ev) {
|
|
49
|
+
if (typeof ev.billed_cost === "number" && Number.isFinite(ev.billed_cost)) {
|
|
50
|
+
return {
|
|
51
|
+
cost: ev.billed_cost,
|
|
52
|
+
used_rate: {
|
|
53
|
+
kind: "billed_cost",
|
|
54
|
+
provider: ev.provider,
|
|
55
|
+
model: ev.model,
|
|
56
|
+
input_per_m: 0,
|
|
57
|
+
output_per_m: 0
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const r = getRates(rt, ev.provider, ev.model);
|
|
62
|
+
if (!r) {
|
|
63
|
+
const input_per_m = 1;
|
|
64
|
+
const output_per_m = 4;
|
|
65
|
+
const cost2 = ev.input_tokens / 1e6 * input_per_m + ev.output_tokens / 1e6 * output_per_m;
|
|
66
|
+
return {
|
|
67
|
+
cost: cost2,
|
|
68
|
+
used_rate: { kind: "estimated", provider: String(ev.provider || "").toLowerCase(), model: ev.model, input_per_m, output_per_m }
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
const cost = ev.input_tokens / 1e6 * r.input + ev.output_tokens / 1e6 * r.output;
|
|
72
|
+
return {
|
|
73
|
+
cost,
|
|
74
|
+
used_rate: {
|
|
75
|
+
kind: r.kind,
|
|
76
|
+
provider: ev.provider,
|
|
77
|
+
model: ev.model,
|
|
78
|
+
input_per_m: r.input,
|
|
79
|
+
output_per_m: r.output
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
var init_cost = __esm({
|
|
84
|
+
"src/cost.ts"() {
|
|
85
|
+
"use strict";
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
36
89
|
// src/solutions.ts
|
|
37
90
|
var solutions_exports = {};
|
|
38
91
|
__export(solutions_exports, {
|
|
@@ -111,12 +164,258 @@ var init_solutions = __esm({
|
|
|
111
164
|
}
|
|
112
165
|
});
|
|
113
166
|
|
|
167
|
+
// src/scan.ts
|
|
168
|
+
function topN(map, n) {
|
|
169
|
+
return [...map.entries()].map(([key, v]) => ({ key, cost: v.cost, events: v.events })).sort((a, b) => b.cost - a.cost).slice(0, n);
|
|
170
|
+
}
|
|
171
|
+
function analyze(rt, events) {
|
|
172
|
+
const byModel = /* @__PURE__ */ new Map();
|
|
173
|
+
const byFeature = /* @__PURE__ */ new Map();
|
|
174
|
+
const unknownModels = [];
|
|
175
|
+
const perEventCosts = [];
|
|
176
|
+
const isAttemptLog = events.some((e) => e.trace_id && String(e.trace_id).length > 0 || e.attempt !== void 0 && Number(e.attempt) > 0);
|
|
177
|
+
let baseTotal = 0;
|
|
178
|
+
let total = 0;
|
|
179
|
+
for (const ev of events) {
|
|
180
|
+
const cr = costOfEvent(rt, ev);
|
|
181
|
+
baseTotal += cr.cost;
|
|
182
|
+
if (isAttemptLog) {
|
|
183
|
+
total += cr.cost;
|
|
184
|
+
} else {
|
|
185
|
+
const retries = Math.max(0, Number(ev.retries || 0));
|
|
186
|
+
total += cr.cost * (1 + retries);
|
|
187
|
+
}
|
|
188
|
+
perEventCosts.push({ ev, cost: cr.cost });
|
|
189
|
+
const mk = `${ev.provider}:${ev.model}`;
|
|
190
|
+
const fk = ev.feature_tag || "(none)";
|
|
191
|
+
const mv = byModel.get(mk) || { cost: 0, events: 0 };
|
|
192
|
+
mv.cost += cr.cost;
|
|
193
|
+
mv.events += 1;
|
|
194
|
+
byModel.set(mk, mv);
|
|
195
|
+
const fv = byFeature.get(fk) || { cost: 0, events: 0 };
|
|
196
|
+
fv.cost += cr.cost;
|
|
197
|
+
fv.events += 1;
|
|
198
|
+
byFeature.set(fk, fv);
|
|
199
|
+
const rr = getRates(rt, ev.provider, ev.model);
|
|
200
|
+
if (!rr) {
|
|
201
|
+
unknownModels.push({ provider: ev.provider, model: ev.model, reason: "unknown provider (estimated)" });
|
|
202
|
+
} else if (rr.kind === "estimated") {
|
|
203
|
+
unknownModels.push({ provider: ev.provider, model: ev.model, reason: "unknown model (estimated)" });
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
const potByIdx = [];
|
|
207
|
+
for (const { ev, cost } of perEventCosts) {
|
|
208
|
+
const retries = Math.max(0, Number(ev.retries || 0));
|
|
209
|
+
const attempt = Number(ev.attempt || 1);
|
|
210
|
+
const total_i = isAttemptLog ? cost : cost * (1 + retries);
|
|
211
|
+
const waste_i = isAttemptLog ? attempt >= 2 ? cost : 0 : cost * retries;
|
|
212
|
+
let routing_i = 0;
|
|
213
|
+
if (ROUTE_TO_CHEAP_FEATURES.has(String(ev.feature_tag || "").toLowerCase())) {
|
|
214
|
+
const provider = ev.provider;
|
|
215
|
+
const p = rt.providers[provider];
|
|
216
|
+
if (p) {
|
|
217
|
+
const entries = Object.entries(p.models);
|
|
218
|
+
if (entries.length > 0) {
|
|
219
|
+
const cheapest = entries.map(([name, r]) => ({ name, score: (r.input + r.output) / 2, r })).sort((a, b) => a.score - b.score)[0];
|
|
220
|
+
const currentRate = getRates(rt, provider, ev.model);
|
|
221
|
+
if (currentRate && currentRate.kind !== "estimated") {
|
|
222
|
+
const currentCost = ev.input_tokens / 1e6 * currentRate.input + ev.output_tokens / 1e6 * currentRate.output;
|
|
223
|
+
const cheapCost = ev.input_tokens / 1e6 * cheapest.r.input + ev.output_tokens / 1e6 * cheapest.r.output;
|
|
224
|
+
const diff = (currentCost - cheapCost) * (1 + retries);
|
|
225
|
+
routing_i = Math.max(0, diff);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
potByIdx.push({ routing: routing_i, context: 0, retry: waste_i, total: total_i, waste: waste_i });
|
|
231
|
+
}
|
|
232
|
+
const sortedIdx = [...events.map((e, i) => ({ i, input: Number(e.input_tokens || 0), ok: !isAttemptLog || Number(e.attempt || 1) === 1 }))].filter((x) => x.ok).sort((a, b) => b.input - a.input);
|
|
233
|
+
const k = Math.max(1, Math.floor(sortedIdx.length * 0.2));
|
|
234
|
+
const topIdx = new Set(sortedIdx.slice(0, k).map((x) => x.i));
|
|
235
|
+
for (let i = 0; i < events.length; i++) {
|
|
236
|
+
if (!topIdx.has(i)) continue;
|
|
237
|
+
const ev = events[i];
|
|
238
|
+
const retries = Math.max(0, Number(ev.retries || 0));
|
|
239
|
+
const r = getRates(rt, ev.provider, ev.model);
|
|
240
|
+
if (!r) continue;
|
|
241
|
+
const saveTokens = Number(ev.input_tokens || 0) * 0.25;
|
|
242
|
+
const multiplier = isAttemptLog ? 1 : 1 + retries;
|
|
243
|
+
const diff = saveTokens / 1e6 * r.input * multiplier;
|
|
244
|
+
potByIdx[i].context = Math.max(0, diff);
|
|
245
|
+
}
|
|
246
|
+
let routingSavings = 0;
|
|
247
|
+
let contextSavings = 0;
|
|
248
|
+
let retryWaste = 0;
|
|
249
|
+
for (const p of potByIdx) {
|
|
250
|
+
let remaining = p.total;
|
|
251
|
+
const rSave = Math.min(p.routing, remaining);
|
|
252
|
+
remaining -= rSave;
|
|
253
|
+
routingSavings += rSave;
|
|
254
|
+
const cSave = Math.min(p.context, remaining);
|
|
255
|
+
remaining -= cSave;
|
|
256
|
+
contextSavings += cSave;
|
|
257
|
+
const retrySave = Math.min(p.retry, remaining);
|
|
258
|
+
retryWaste += retrySave;
|
|
259
|
+
}
|
|
260
|
+
const estimatedSavingsTotal = routingSavings + contextSavings + retryWaste;
|
|
261
|
+
const guardedSavingsTotal = Math.min(estimatedSavingsTotal, total);
|
|
262
|
+
const analysis = {
|
|
263
|
+
total_cost: round22(total),
|
|
264
|
+
by_model_top: topN(byModel, 10).map((x) => ({ ...x, cost: round22(x.cost) })),
|
|
265
|
+
by_feature_top: topN(byFeature, 10).map((x) => ({ ...x, cost: round22(x.cost) })),
|
|
266
|
+
unknown_models: uniqUnknown(unknownModels),
|
|
267
|
+
rate_table_version: rt.version,
|
|
268
|
+
rate_table_date: rt.date
|
|
269
|
+
};
|
|
270
|
+
const savings = {
|
|
271
|
+
estimated_savings_total: round22(guardedSavingsTotal),
|
|
272
|
+
routing_savings: round22(routingSavings),
|
|
273
|
+
context_savings: round22(contextSavings),
|
|
274
|
+
retry_waste: round22(retryWaste),
|
|
275
|
+
notes: [
|
|
276
|
+
`a) \uBAA8\uB378 \uB77C\uC6B0\uD305 \uC808\uAC10(\uCD94\uC815): $${round22(routingSavings)}`,
|
|
277
|
+
`b) \uCEE8\uD14D\uC2A4\uD2B8 \uAC10\uCD95(\uCD94\uC815): $${round22(contextSavings)} (\uC0C1\uC704 20% input\uC5D0 25% \uAC10\uCD95 \uAC00\uC815)`,
|
|
278
|
+
`c) \uC7AC\uC2DC\uB3C4/\uC624\uB958 \uB0AD\uBE44(\uC0C1\uD55C \uC801\uC6A9): $${round22(retryWaste)} (retries \uAE30\uBC18)`
|
|
279
|
+
]
|
|
280
|
+
};
|
|
281
|
+
const policy = buildPolicy(rt, events);
|
|
282
|
+
return { analysis, savings, policy, meta: { mode: isAttemptLog ? "attempt-log" : "legacy" } };
|
|
283
|
+
}
|
|
284
|
+
function buildPolicy(rt, events) {
|
|
285
|
+
const freq = /* @__PURE__ */ new Map();
|
|
286
|
+
for (const ev of events) freq.set(ev.provider, (freq.get(ev.provider) || 0) + 1);
|
|
287
|
+
const defaultProvider = [...freq.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] || "openai";
|
|
288
|
+
const rules = [];
|
|
289
|
+
for (const provider of Object.keys(rt.providers)) {
|
|
290
|
+
const p = rt.providers[provider];
|
|
291
|
+
const entries = Object.entries(p.models);
|
|
292
|
+
if (entries.length === 0) continue;
|
|
293
|
+
const cheapest = entries.map(([name, r]) => ({ name, score: (r.input + r.output) / 2, r })).sort((a, b) => a.score - b.score)[0];
|
|
294
|
+
rules.push({
|
|
295
|
+
match: { provider, feature_tag_in: ["summarize", "classify", "translate"] },
|
|
296
|
+
action: { recommend_model: cheapest.name, reason: "cheap-feature routing" }
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
rules.push({ match: { model_unknown: true }, action: { keep: true, reason: "unknown model -> no policy applied" } });
|
|
300
|
+
return {
|
|
301
|
+
version: 1,
|
|
302
|
+
default_provider: defaultProvider,
|
|
303
|
+
rules,
|
|
304
|
+
budgets: { currency: rt.currency, notes: "MVP: budgets not enforced" },
|
|
305
|
+
generated_from: { rate_table_version: rt.version, input: "./aiopt-input/usage.jsonl" }
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
function uniqUnknown(list) {
|
|
309
|
+
const seen = /* @__PURE__ */ new Set();
|
|
310
|
+
const out = [];
|
|
311
|
+
for (const x of list) {
|
|
312
|
+
const k = `${x.provider}:${x.model}:${x.reason}`;
|
|
313
|
+
if (seen.has(k)) continue;
|
|
314
|
+
seen.add(k);
|
|
315
|
+
out.push(x);
|
|
316
|
+
}
|
|
317
|
+
return out;
|
|
318
|
+
}
|
|
319
|
+
function round22(n) {
|
|
320
|
+
return Math.round(n * 100) / 100;
|
|
321
|
+
}
|
|
322
|
+
function writeOutputs(outDir, analysis, savings, policy, meta) {
|
|
323
|
+
const mode = meta?.mode || "legacy";
|
|
324
|
+
import_fs3.default.mkdirSync(outDir, { recursive: true });
|
|
325
|
+
import_fs3.default.writeFileSync(import_path3.default.join(outDir, "analysis.json"), JSON.stringify(analysis, null, 2));
|
|
326
|
+
const unknownCount = analysis.unknown_models?.length || 0;
|
|
327
|
+
const confidence = unknownCount === 0 ? "HIGH" : unknownCount <= 3 ? "MED" : "LOW";
|
|
328
|
+
const ratio = analysis.total_cost > 0 ? savings.estimated_savings_total / analysis.total_cost : 0;
|
|
329
|
+
const warnings = [];
|
|
330
|
+
if (ratio >= 0.9) warnings.push("estimated savings >= 90%");
|
|
331
|
+
if (unknownCount > 0) warnings.push("unknown models/providers detected (estimated pricing used)");
|
|
332
|
+
const reportJson = {
|
|
333
|
+
version: 3,
|
|
334
|
+
generated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
335
|
+
confidence,
|
|
336
|
+
warnings,
|
|
337
|
+
assumptions: {
|
|
338
|
+
no_double_counting: "routing -> context -> retry allocation per-event with remaining-cost caps",
|
|
339
|
+
retry_cost_model: mode === "attempt-log" ? "attempt-log mode: total_cost is sum of attempt lines; retry_waste is sum of attempts>=2" : "legacy mode: total_cost includes retries as extra attempts (base_cost*(1+retries))",
|
|
340
|
+
context_model: "top 20% by input_tokens assume 25% input reduction",
|
|
341
|
+
estimated_pricing_note: unknownCount > 0 ? "some items use estimated rates; treat savings as a band" : "all items used known rates"
|
|
342
|
+
},
|
|
343
|
+
summary: {
|
|
344
|
+
total_cost_usd: analysis.total_cost,
|
|
345
|
+
estimated_savings_usd: savings.estimated_savings_total,
|
|
346
|
+
routing_savings_usd: savings.routing_savings,
|
|
347
|
+
context_savings_usd: savings.context_savings,
|
|
348
|
+
retry_waste_usd: savings.retry_waste
|
|
349
|
+
},
|
|
350
|
+
top: {
|
|
351
|
+
by_model: analysis.by_model_top,
|
|
352
|
+
by_feature: analysis.by_feature_top
|
|
353
|
+
},
|
|
354
|
+
unknown_models: analysis.unknown_models,
|
|
355
|
+
notes: savings.notes
|
|
356
|
+
};
|
|
357
|
+
import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.json"), JSON.stringify(reportJson, null, 2));
|
|
358
|
+
const ratioMd = analysis.total_cost > 0 ? savings.estimated_savings_total / analysis.total_cost : 0;
|
|
359
|
+
const warningsMd = [];
|
|
360
|
+
if (ratioMd >= 0.9) warningsMd.push("WARNING: estimated savings >= 90% \u2014 check overlap/missing rate table");
|
|
361
|
+
const reportMd = [
|
|
362
|
+
"# AIOpt Report",
|
|
363
|
+
"",
|
|
364
|
+
`- Total cost: $${analysis.total_cost}`,
|
|
365
|
+
`- Estimated savings: $${savings.estimated_savings_total} (guarded <= total_cost)`,
|
|
366
|
+
`- Confidence: ${confidence}`,
|
|
367
|
+
unknownCount > 0 ? `- Unknown models: ${unknownCount} (estimated pricing used)` : "- Unknown models: 0",
|
|
368
|
+
...warningsMd.map((w) => `- ${w}`),
|
|
369
|
+
"",
|
|
370
|
+
"## ASSUMPTIONS",
|
|
371
|
+
"- No double-counting: routing \u2192 context \u2192 retry savings allocated per-event with remaining-cost caps.",
|
|
372
|
+
mode === "attempt-log" ? "- Retry cost model: attempt-log mode (total_cost=sum attempts, retry_waste=sum attempt>=2)." : "- Retry cost model: legacy mode (total_cost=base_cost*(1+retries)).",
|
|
373
|
+
"- Context savings: top 20% input_tokens events assume 25% input reduction.",
|
|
374
|
+
"",
|
|
375
|
+
"## WHAT TO CHANGE",
|
|
376
|
+
"1) Retry tuning \u2192 edit `aiopt/policies/retry.json`",
|
|
377
|
+
"2) Output cap \u2192 edit `aiopt/policies/output.json`",
|
|
378
|
+
"3) Routing rule \u2192 edit `aiopt/policies/routing.json`",
|
|
379
|
+
"",
|
|
380
|
+
"## OUTPUTS",
|
|
381
|
+
"- `aiopt-output/analysis.json`",
|
|
382
|
+
"- `aiopt-output/report.json`",
|
|
383
|
+
"- `aiopt-output/patches/*`",
|
|
384
|
+
""
|
|
385
|
+
].join("\n");
|
|
386
|
+
import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.md"), reportMd);
|
|
387
|
+
const reportTxt = [
|
|
388
|
+
`\uCD1D\uBE44\uC6A9: $${analysis.total_cost}`,
|
|
389
|
+
`\uC808\uAC10 \uAC00\uB2A5 \uAE08\uC561(Estimated): $${savings.estimated_savings_total}`,
|
|
390
|
+
`\uC808\uAC10 \uADFC\uAC70 3\uC904:`,
|
|
391
|
+
savings.notes[0],
|
|
392
|
+
savings.notes[1],
|
|
393
|
+
savings.notes[2],
|
|
394
|
+
""
|
|
395
|
+
].join("\n");
|
|
396
|
+
import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.txt"), reportTxt);
|
|
397
|
+
import_fs3.default.writeFileSync(import_path3.default.join(outDir, "cost-policy.json"), JSON.stringify(policy, null, 2));
|
|
398
|
+
const fixes = buildTopFixes(analysis, savings);
|
|
399
|
+
writePatches(outDir, fixes);
|
|
400
|
+
}
|
|
401
|
+
var import_fs3, import_path3, ROUTE_TO_CHEAP_FEATURES;
|
|
402
|
+
var init_scan = __esm({
|
|
403
|
+
"src/scan.ts"() {
|
|
404
|
+
"use strict";
|
|
405
|
+
import_fs3 = __toESM(require("fs"));
|
|
406
|
+
import_path3 = __toESM(require("path"));
|
|
407
|
+
init_cost();
|
|
408
|
+
init_solutions();
|
|
409
|
+
ROUTE_TO_CHEAP_FEATURES = /* @__PURE__ */ new Set(["summarize", "classify", "translate"]);
|
|
410
|
+
}
|
|
411
|
+
});
|
|
412
|
+
|
|
114
413
|
// package.json
|
|
115
414
|
var require_package = __commonJS({
|
|
116
415
|
"package.json"(exports2, module2) {
|
|
117
416
|
module2.exports = {
|
|
118
417
|
name: "aiopt",
|
|
119
|
-
version: "0.2.
|
|
418
|
+
version: "0.2.3",
|
|
120
419
|
description: "Serverless local CLI MVP for AI API cost analysis & cost-policy generation",
|
|
121
420
|
bin: {
|
|
122
421
|
aiopt: "dist/cli.js"
|
|
@@ -510,387 +809,176 @@ function runDoctor(cwd) {
|
|
|
510
809
|
attempt: j.attempt,
|
|
511
810
|
feature_tag: j?.meta?.feature_tag
|
|
512
811
|
};
|
|
513
|
-
} catch {
|
|
514
|
-
return {};
|
|
515
|
-
}
|
|
516
|
-
});
|
|
517
|
-
const last50 = tailLines(usagePath, 50);
|
|
518
|
-
let missing = 0;
|
|
519
|
-
let total50 = 0;
|
|
520
|
-
for (const l of last50) {
|
|
521
|
-
total50++;
|
|
522
|
-
try {
|
|
523
|
-
const j = JSON.parse(l);
|
|
524
|
-
const ft = j?.meta?.feature_tag;
|
|
525
|
-
if (!ft || String(ft).trim() === "") missing++;
|
|
526
|
-
} catch {
|
|
527
|
-
missing++;
|
|
528
|
-
}
|
|
529
|
-
}
|
|
530
|
-
if (total50 > 0 && missing > 0) {
|
|
531
|
-
checks.push({ name: "feature_tag quality (last50)", ok: false, detail: `${missing}/${total50} missing meta.feature_tag` });
|
|
532
|
-
} else {
|
|
533
|
-
checks.push({ name: "feature_tag quality (last50)", ok: true, detail: "meta.feature_tag present" });
|
|
534
|
-
}
|
|
535
|
-
const ok = checks.every((c) => c.ok);
|
|
536
|
-
return { ok, checks, last5 };
|
|
537
|
-
}
|
|
538
|
-
var import_fs5, import_path5;
|
|
539
|
-
var init_doctor = __esm({
|
|
540
|
-
"src/doctor.ts"() {
|
|
541
|
-
"use strict";
|
|
542
|
-
import_fs5 = __toESM(require("fs"));
|
|
543
|
-
import_path5 = __toESM(require("path"));
|
|
544
|
-
}
|
|
545
|
-
});
|
|
546
|
-
|
|
547
|
-
// src/cli.ts
|
|
548
|
-
var import_fs6 = __toESM(require("fs"));
|
|
549
|
-
var import_path6 = __toESM(require("path"));
|
|
550
|
-
var import_commander = require("commander");
|
|
551
|
-
|
|
552
|
-
// src/io.ts
|
|
553
|
-
var import_fs = __toESM(require("fs"));
|
|
554
|
-
var import_path = __toESM(require("path"));
|
|
555
|
-
var import_sync = require("csv-parse/sync");
|
|
556
|
-
function ensureDir(p) {
|
|
557
|
-
import_fs.default.mkdirSync(p, { recursive: true });
|
|
558
|
-
}
|
|
559
|
-
function readJsonl(filePath) {
|
|
560
|
-
const raw = import_fs.default.readFileSync(filePath, "utf8");
|
|
561
|
-
const lines = raw.split(/\r?\n/).filter((l) => l.trim().length > 0);
|
|
562
|
-
const out = [];
|
|
563
|
-
for (const line of lines) {
|
|
564
|
-
const obj = JSON.parse(line);
|
|
565
|
-
out.push(normalizeEvent(obj));
|
|
566
|
-
}
|
|
567
|
-
return out;
|
|
568
|
-
}
|
|
569
|
-
function readCsv(filePath) {
|
|
570
|
-
const raw = import_fs.default.readFileSync(filePath, "utf8");
|
|
571
|
-
const records = (0, import_sync.parse)(raw, { columns: true, skip_empty_lines: true, trim: true });
|
|
572
|
-
return records.map((r) => normalizeEvent(r));
|
|
573
|
-
}
|
|
574
|
-
function toNum(x, def = 0) {
|
|
575
|
-
const n = Number(x);
|
|
576
|
-
return Number.isFinite(n) ? n : def;
|
|
577
|
-
}
|
|
578
|
-
function normalizeEvent(x) {
|
|
579
|
-
const inputTokens = x.input_tokens ?? x.prompt_tokens;
|
|
580
|
-
const outputTokens = x.output_tokens ?? x.completion_tokens;
|
|
581
|
-
const featureTag = x.feature_tag ?? x?.meta?.feature_tag ?? x.endpoint ?? "";
|
|
582
|
-
const retries = x.retries ?? (x.attempt !== void 0 ? Math.max(0, toNum(x.attempt) - 1) : 0);
|
|
583
|
-
const billed = x.billed_cost ?? x.cost_usd;
|
|
584
|
-
return {
|
|
585
|
-
ts: String(x.ts ?? ""),
|
|
586
|
-
provider: String(x.provider ?? "").toLowerCase(),
|
|
587
|
-
model: String(x.model ?? ""),
|
|
588
|
-
input_tokens: toNum(inputTokens),
|
|
589
|
-
output_tokens: toNum(outputTokens),
|
|
590
|
-
feature_tag: String(featureTag ?? ""),
|
|
591
|
-
retries: toNum(retries),
|
|
592
|
-
status: String(x.status ?? ""),
|
|
593
|
-
billed_cost: billed === void 0 || billed === "" ? void 0 : toNum(billed),
|
|
594
|
-
trace_id: x.trace_id ? String(x.trace_id) : void 0,
|
|
595
|
-
request_id: x.request_id ? String(x.request_id) : void 0,
|
|
596
|
-
attempt: x.attempt === void 0 ? void 0 : toNum(x.attempt),
|
|
597
|
-
endpoint: x.endpoint ? String(x.endpoint) : void 0
|
|
598
|
-
};
|
|
599
|
-
}
|
|
600
|
-
function isCsvPath(p) {
|
|
601
|
-
return import_path.default.extname(p).toLowerCase() === ".csv";
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
// src/scan.ts
|
|
605
|
-
var import_fs3 = __toESM(require("fs"));
|
|
606
|
-
var import_path3 = __toESM(require("path"));
|
|
607
|
-
|
|
608
|
-
// src/cost.ts
|
|
609
|
-
function getRates(rt, provider, model) {
|
|
610
|
-
const prov = String(provider || "").toLowerCase();
|
|
611
|
-
if (prov === "local" || prov === "ollama" || prov === "vllm") {
|
|
612
|
-
return { kind: "official", input: 0, output: 0 };
|
|
613
|
-
}
|
|
614
|
-
const p = rt.providers[prov];
|
|
615
|
-
if (!p) return null;
|
|
616
|
-
const m = p.models[model];
|
|
617
|
-
if (m) return { kind: "official", input: m.input, output: m.output };
|
|
618
|
-
return { kind: "estimated", input: p.default_estimated.input, output: p.default_estimated.output };
|
|
619
|
-
}
|
|
620
|
-
function costOfEvent(rt, ev) {
|
|
621
|
-
if (typeof ev.billed_cost === "number" && Number.isFinite(ev.billed_cost)) {
|
|
622
|
-
return {
|
|
623
|
-
cost: ev.billed_cost,
|
|
624
|
-
used_rate: {
|
|
625
|
-
kind: "billed_cost",
|
|
626
|
-
provider: ev.provider,
|
|
627
|
-
model: ev.model,
|
|
628
|
-
input_per_m: 0,
|
|
629
|
-
output_per_m: 0
|
|
630
|
-
}
|
|
631
|
-
};
|
|
632
|
-
}
|
|
633
|
-
const r = getRates(rt, ev.provider, ev.model);
|
|
634
|
-
if (!r) {
|
|
635
|
-
const input_per_m = 1;
|
|
636
|
-
const output_per_m = 4;
|
|
637
|
-
const cost2 = ev.input_tokens / 1e6 * input_per_m + ev.output_tokens / 1e6 * output_per_m;
|
|
638
|
-
return {
|
|
639
|
-
cost: cost2,
|
|
640
|
-
used_rate: { kind: "estimated", provider: String(ev.provider || "").toLowerCase(), model: ev.model, input_per_m, output_per_m }
|
|
641
|
-
};
|
|
642
|
-
}
|
|
643
|
-
const cost = ev.input_tokens / 1e6 * r.input + ev.output_tokens / 1e6 * r.output;
|
|
644
|
-
return {
|
|
645
|
-
cost,
|
|
646
|
-
used_rate: {
|
|
647
|
-
kind: r.kind,
|
|
648
|
-
provider: ev.provider,
|
|
649
|
-
model: ev.model,
|
|
650
|
-
input_per_m: r.input,
|
|
651
|
-
output_per_m: r.output
|
|
652
|
-
}
|
|
653
|
-
};
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
// src/scan.ts
|
|
657
|
-
init_solutions();
|
|
658
|
-
var ROUTE_TO_CHEAP_FEATURES = /* @__PURE__ */ new Set(["summarize", "classify", "translate"]);
|
|
659
|
-
function topN(map, n) {
|
|
660
|
-
return [...map.entries()].map(([key, v]) => ({ key, cost: v.cost, events: v.events })).sort((a, b) => b.cost - a.cost).slice(0, n);
|
|
661
|
-
}
|
|
662
|
-
function analyze(rt, events) {
|
|
663
|
-
const byModel = /* @__PURE__ */ new Map();
|
|
664
|
-
const byFeature = /* @__PURE__ */ new Map();
|
|
665
|
-
const unknownModels = [];
|
|
666
|
-
const perEventCosts = [];
|
|
667
|
-
const isAttemptLog = events.some((e) => e.trace_id && String(e.trace_id).length > 0 || e.attempt !== void 0 && Number(e.attempt) > 0);
|
|
668
|
-
let baseTotal = 0;
|
|
669
|
-
let total = 0;
|
|
670
|
-
for (const ev of events) {
|
|
671
|
-
const cr = costOfEvent(rt, ev);
|
|
672
|
-
baseTotal += cr.cost;
|
|
673
|
-
if (isAttemptLog) {
|
|
674
|
-
total += cr.cost;
|
|
675
|
-
} else {
|
|
676
|
-
const retries = Math.max(0, Number(ev.retries || 0));
|
|
677
|
-
total += cr.cost * (1 + retries);
|
|
678
|
-
}
|
|
679
|
-
perEventCosts.push({ ev, cost: cr.cost });
|
|
680
|
-
const mk = `${ev.provider}:${ev.model}`;
|
|
681
|
-
const fk = ev.feature_tag || "(none)";
|
|
682
|
-
const mv = byModel.get(mk) || { cost: 0, events: 0 };
|
|
683
|
-
mv.cost += cr.cost;
|
|
684
|
-
mv.events += 1;
|
|
685
|
-
byModel.set(mk, mv);
|
|
686
|
-
const fv = byFeature.get(fk) || { cost: 0, events: 0 };
|
|
687
|
-
fv.cost += cr.cost;
|
|
688
|
-
fv.events += 1;
|
|
689
|
-
byFeature.set(fk, fv);
|
|
690
|
-
const rr = getRates(rt, ev.provider, ev.model);
|
|
691
|
-
if (!rr) {
|
|
692
|
-
unknownModels.push({ provider: ev.provider, model: ev.model, reason: "unknown provider (estimated)" });
|
|
693
|
-
} else if (rr.kind === "estimated") {
|
|
694
|
-
unknownModels.push({ provider: ev.provider, model: ev.model, reason: "unknown model (estimated)" });
|
|
695
|
-
}
|
|
696
|
-
}
|
|
697
|
-
const potByIdx = [];
|
|
698
|
-
for (const { ev, cost } of perEventCosts) {
|
|
699
|
-
const retries = Math.max(0, Number(ev.retries || 0));
|
|
700
|
-
const attempt = Number(ev.attempt || 1);
|
|
701
|
-
const total_i = isAttemptLog ? cost : cost * (1 + retries);
|
|
702
|
-
const waste_i = isAttemptLog ? attempt >= 2 ? cost : 0 : cost * retries;
|
|
703
|
-
let routing_i = 0;
|
|
704
|
-
if (ROUTE_TO_CHEAP_FEATURES.has(String(ev.feature_tag || "").toLowerCase())) {
|
|
705
|
-
const provider = ev.provider;
|
|
706
|
-
const p = rt.providers[provider];
|
|
707
|
-
if (p) {
|
|
708
|
-
const entries = Object.entries(p.models);
|
|
709
|
-
if (entries.length > 0) {
|
|
710
|
-
const cheapest = entries.map(([name, r]) => ({ name, score: (r.input + r.output) / 2, r })).sort((a, b) => a.score - b.score)[0];
|
|
711
|
-
const currentRate = getRates(rt, provider, ev.model);
|
|
712
|
-
if (currentRate && currentRate.kind !== "estimated") {
|
|
713
|
-
const currentCost = ev.input_tokens / 1e6 * currentRate.input + ev.output_tokens / 1e6 * currentRate.output;
|
|
714
|
-
const cheapCost = ev.input_tokens / 1e6 * cheapest.r.input + ev.output_tokens / 1e6 * cheapest.r.output;
|
|
715
|
-
const diff = (currentCost - cheapCost) * (1 + retries);
|
|
716
|
-
routing_i = Math.max(0, diff);
|
|
717
|
-
}
|
|
718
|
-
}
|
|
719
|
-
}
|
|
812
|
+
} catch {
|
|
813
|
+
return {};
|
|
814
|
+
}
|
|
815
|
+
});
|
|
816
|
+
const last50 = tailLines(usagePath, 50);
|
|
817
|
+
let missing = 0;
|
|
818
|
+
let total50 = 0;
|
|
819
|
+
for (const l of last50) {
|
|
820
|
+
total50++;
|
|
821
|
+
try {
|
|
822
|
+
const j = JSON.parse(l);
|
|
823
|
+
const ft = j?.meta?.feature_tag;
|
|
824
|
+
if (!ft || String(ft).trim() === "") missing++;
|
|
825
|
+
} catch {
|
|
826
|
+
missing++;
|
|
720
827
|
}
|
|
721
|
-
potByIdx.push({ routing: routing_i, context: 0, retry: waste_i, total: total_i, waste: waste_i });
|
|
722
828
|
}
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
if (!topIdx.has(i)) continue;
|
|
728
|
-
const ev = events[i];
|
|
729
|
-
const retries = Math.max(0, Number(ev.retries || 0));
|
|
730
|
-
const r = getRates(rt, ev.provider, ev.model);
|
|
731
|
-
if (!r) continue;
|
|
732
|
-
const saveTokens = Number(ev.input_tokens || 0) * 0.25;
|
|
733
|
-
const multiplier = isAttemptLog ? 1 : 1 + retries;
|
|
734
|
-
const diff = saveTokens / 1e6 * r.input * multiplier;
|
|
735
|
-
potByIdx[i].context = Math.max(0, diff);
|
|
829
|
+
if (total50 > 0 && missing > 0) {
|
|
830
|
+
checks.push({ name: "feature_tag quality (last50)", ok: false, detail: `${missing}/${total50} missing meta.feature_tag` });
|
|
831
|
+
} else {
|
|
832
|
+
checks.push({ name: "feature_tag quality (last50)", ok: true, detail: "meta.feature_tag present" });
|
|
736
833
|
}
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
remaining -= cSave;
|
|
747
|
-
contextSavings += cSave;
|
|
748
|
-
const retrySave = Math.min(p.retry, remaining);
|
|
749
|
-
retryWaste += retrySave;
|
|
834
|
+
const ok = checks.every((c) => c.ok);
|
|
835
|
+
return { ok, checks, last5 };
|
|
836
|
+
}
|
|
837
|
+
var import_fs5, import_path5;
|
|
838
|
+
var init_doctor = __esm({
|
|
839
|
+
"src/doctor.ts"() {
|
|
840
|
+
"use strict";
|
|
841
|
+
import_fs5 = __toESM(require("fs"));
|
|
842
|
+
import_path5 = __toESM(require("path"));
|
|
750
843
|
}
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
};
|
|
761
|
-
const savings = {
|
|
762
|
-
estimated_savings_total: round22(guardedSavingsTotal),
|
|
763
|
-
routing_savings: round22(routingSavings),
|
|
764
|
-
context_savings: round22(contextSavings),
|
|
765
|
-
retry_waste: round22(retryWaste),
|
|
766
|
-
notes: [
|
|
767
|
-
`a) \uBAA8\uB378 \uB77C\uC6B0\uD305 \uC808\uAC10(\uCD94\uC815): $${round22(routingSavings)}`,
|
|
768
|
-
`b) \uCEE8\uD14D\uC2A4\uD2B8 \uAC10\uCD95(\uCD94\uC815): $${round22(contextSavings)} (\uC0C1\uC704 20% input\uC5D0 25% \uAC10\uCD95 \uAC00\uC815)`,
|
|
769
|
-
`c) \uC7AC\uC2DC\uB3C4/\uC624\uB958 \uB0AD\uBE44(\uC0C1\uD55C \uC801\uC6A9): $${round22(retryWaste)} (retries \uAE30\uBC18)`
|
|
770
|
-
]
|
|
771
|
-
};
|
|
772
|
-
const policy = buildPolicy(rt, events);
|
|
773
|
-
return { analysis, savings, policy, meta: { mode: isAttemptLog ? "attempt-log" : "legacy" } };
|
|
844
|
+
});
|
|
845
|
+
|
|
846
|
+
// src/guard.ts
|
|
847
|
+
var guard_exports = {};
|
|
848
|
+
__export(guard_exports, {
|
|
849
|
+
runGuard: () => runGuard
|
|
850
|
+
});
|
|
851
|
+
function round23(n) {
|
|
852
|
+
return Math.round(n * 100) / 100;
|
|
774
853
|
}
|
|
775
|
-
function
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
const
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
854
|
+
function monthEstimate(delta) {
|
|
855
|
+
return delta * 30;
|
|
856
|
+
}
|
|
857
|
+
function applyCandidate(events, cand) {
|
|
858
|
+
const ctxM = cand.contextMultiplier ?? 1;
|
|
859
|
+
const outM = cand.outputMultiplier ?? 1;
|
|
860
|
+
const rDelta = cand.retriesDelta ?? 0;
|
|
861
|
+
return events.map((ev) => ({
|
|
862
|
+
...ev,
|
|
863
|
+
provider: cand.provider ? String(cand.provider).toLowerCase() : ev.provider,
|
|
864
|
+
model: cand.model ? String(cand.model) : ev.model,
|
|
865
|
+
input_tokens: Math.max(0, Math.round((ev.input_tokens || 0) * ctxM)),
|
|
866
|
+
output_tokens: Math.max(0, Math.round((ev.output_tokens || 0) * outM)),
|
|
867
|
+
retries: Math.max(0, Math.round((ev.retries || 0) + rDelta)),
|
|
868
|
+
// clear billed_cost so pricing recalculates for new model/provider
|
|
869
|
+
billed_cost: void 0
|
|
870
|
+
}));
|
|
871
|
+
}
|
|
872
|
+
function confidenceFromChange(cand) {
|
|
873
|
+
const reasons = [];
|
|
874
|
+
if (cand.retriesDelta && cand.retriesDelta !== 0) reasons.push("retries change");
|
|
875
|
+
if (cand.model) reasons.push("model change");
|
|
876
|
+
if (cand.provider) reasons.push("provider change");
|
|
877
|
+
if (cand.contextMultiplier && cand.contextMultiplier !== 1) reasons.push("context length change");
|
|
878
|
+
if (cand.retriesDelta && cand.retriesDelta !== 0) return { level: "High", reasons };
|
|
879
|
+
if (cand.model || cand.provider) return { level: "Medium", reasons };
|
|
880
|
+
if (cand.contextMultiplier && cand.contextMultiplier !== 1) return { level: "Low", reasons };
|
|
881
|
+
return { level: "Medium", reasons: reasons.length ? reasons : ["unknown change"] };
|
|
882
|
+
}
|
|
883
|
+
function runGuard(rt, input) {
|
|
884
|
+
if (!input.baselineEvents || input.baselineEvents.length === 0) {
|
|
885
|
+
return { exitCode: 3, message: "FAIL: baseline usage is empty (need aiopt-output/usage.jsonl)" };
|
|
789
886
|
}
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
887
|
+
const baselineEvents = input.baselineEvents.map((e) => ({ ...e, billed_cost: void 0 }));
|
|
888
|
+
const base = analyze(rt, baselineEvents);
|
|
889
|
+
const candidateEvents = applyCandidate(baselineEvents, input.candidate);
|
|
890
|
+
const cand = analyze(rt, candidateEvents);
|
|
891
|
+
const baseCost = base.analysis.total_cost;
|
|
892
|
+
const candCost = cand.analysis.total_cost;
|
|
893
|
+
const delta = candCost - baseCost;
|
|
894
|
+
const conf = confidenceFromChange(input.candidate);
|
|
895
|
+
const monthly = monthEstimate(Math.max(0, delta));
|
|
896
|
+
const monthlyRounded = round23(monthly);
|
|
897
|
+
let exitCode = 0;
|
|
898
|
+
let headline = "OK: no cost accident risk detected";
|
|
899
|
+
if (monthly >= 100) {
|
|
900
|
+
exitCode = 3;
|
|
901
|
+
headline = "FAIL: high risk of LLM cost accident";
|
|
902
|
+
} else if (monthly >= 10) {
|
|
903
|
+
exitCode = 2;
|
|
904
|
+
headline = "WARN: possible LLM cost accident";
|
|
905
|
+
}
|
|
906
|
+
const reasons = conf.reasons.length ? conf.reasons.join(", ") : "n/a";
|
|
907
|
+
const msg = [
|
|
908
|
+
headline,
|
|
909
|
+
`Summary: baseline=$${baseCost} \u2192 candidate=$${candCost} (\u0394=$${round23(delta)})`,
|
|
910
|
+
`Impact (monthly est): +$${monthlyRounded}`,
|
|
911
|
+
`Confidence: ${conf.level} (${reasons})`,
|
|
912
|
+
"Recommendation: review model/provider/retry/context changes before deploy."
|
|
913
|
+
].join("\n");
|
|
914
|
+
return { exitCode, message: msg };
|
|
798
915
|
}
|
|
799
|
-
|
|
800
|
-
|
|
916
|
+
var init_guard = __esm({
|
|
917
|
+
"src/guard.ts"() {
|
|
918
|
+
"use strict";
|
|
919
|
+
init_scan();
|
|
920
|
+
}
|
|
921
|
+
});
|
|
922
|
+
|
|
923
|
+
// src/cli.ts
|
|
924
|
+
var import_fs6 = __toESM(require("fs"));
|
|
925
|
+
var import_path6 = __toESM(require("path"));
|
|
926
|
+
var import_commander = require("commander");
|
|
927
|
+
|
|
928
|
+
// src/io.ts
|
|
929
|
+
var import_fs = __toESM(require("fs"));
|
|
930
|
+
var import_path = __toESM(require("path"));
|
|
931
|
+
var import_sync = require("csv-parse/sync");
|
|
932
|
+
function ensureDir(p) {
|
|
933
|
+
import_fs.default.mkdirSync(p, { recursive: true });
|
|
934
|
+
}
|
|
935
|
+
function readJsonl(filePath) {
|
|
936
|
+
const raw = import_fs.default.readFileSync(filePath, "utf8");
|
|
937
|
+
const lines = raw.split(/\r?\n/).filter((l) => l.trim().length > 0);
|
|
801
938
|
const out = [];
|
|
802
|
-
for (const
|
|
803
|
-
const
|
|
804
|
-
|
|
805
|
-
seen.add(k);
|
|
806
|
-
out.push(x);
|
|
939
|
+
for (const line of lines) {
|
|
940
|
+
const obj = JSON.parse(line);
|
|
941
|
+
out.push(normalizeEvent(obj));
|
|
807
942
|
}
|
|
808
943
|
return out;
|
|
809
944
|
}
|
|
810
|
-
function
|
|
811
|
-
|
|
945
|
+
function readCsv(filePath) {
|
|
946
|
+
const raw = import_fs.default.readFileSync(filePath, "utf8");
|
|
947
|
+
const records = (0, import_sync.parse)(raw, { columns: true, skip_empty_lines: true, trim: true });
|
|
948
|
+
return records.map((r) => normalizeEvent(r));
|
|
812
949
|
}
|
|
813
|
-
function
|
|
814
|
-
const
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
const
|
|
819
|
-
const
|
|
820
|
-
const
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
routing_savings_usd: savings.routing_savings,
|
|
838
|
-
context_savings_usd: savings.context_savings,
|
|
839
|
-
retry_waste_usd: savings.retry_waste
|
|
840
|
-
},
|
|
841
|
-
top: {
|
|
842
|
-
by_model: analysis.by_model_top,
|
|
843
|
-
by_feature: analysis.by_feature_top
|
|
844
|
-
},
|
|
845
|
-
unknown_models: analysis.unknown_models,
|
|
846
|
-
notes: savings.notes
|
|
950
|
+
function toNum(x, def = 0) {
|
|
951
|
+
const n = Number(x);
|
|
952
|
+
return Number.isFinite(n) ? n : def;
|
|
953
|
+
}
|
|
954
|
+
function normalizeEvent(x) {
|
|
955
|
+
const inputTokens = x.input_tokens ?? x.prompt_tokens;
|
|
956
|
+
const outputTokens = x.output_tokens ?? x.completion_tokens;
|
|
957
|
+
const featureTag = x.feature_tag ?? x?.meta?.feature_tag ?? x.endpoint ?? "";
|
|
958
|
+
const retries = x.retries ?? (x.attempt !== void 0 ? Math.max(0, toNum(x.attempt) - 1) : 0);
|
|
959
|
+
const billed = x.billed_cost ?? x.cost_usd;
|
|
960
|
+
return {
|
|
961
|
+
ts: String(x.ts ?? ""),
|
|
962
|
+
provider: String(x.provider ?? "").toLowerCase(),
|
|
963
|
+
model: String(x.model ?? ""),
|
|
964
|
+
input_tokens: toNum(inputTokens),
|
|
965
|
+
output_tokens: toNum(outputTokens),
|
|
966
|
+
feature_tag: String(featureTag ?? ""),
|
|
967
|
+
retries: toNum(retries),
|
|
968
|
+
status: String(x.status ?? ""),
|
|
969
|
+
billed_cost: billed === void 0 || billed === "" ? void 0 : toNum(billed),
|
|
970
|
+
trace_id: x.trace_id ? String(x.trace_id) : void 0,
|
|
971
|
+
request_id: x.request_id ? String(x.request_id) : void 0,
|
|
972
|
+
attempt: x.attempt === void 0 ? void 0 : toNum(x.attempt),
|
|
973
|
+
endpoint: x.endpoint ? String(x.endpoint) : void 0
|
|
847
974
|
};
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
if (ratioMd >= 0.9) warningsMd.push("WARNING: estimated savings >= 90% \u2014 check overlap/missing rate table");
|
|
852
|
-
const reportMd = [
|
|
853
|
-
"# AIOpt Report",
|
|
854
|
-
"",
|
|
855
|
-
`- Total cost: $${analysis.total_cost}`,
|
|
856
|
-
`- Estimated savings: $${savings.estimated_savings_total} (guarded <= total_cost)`,
|
|
857
|
-
`- Confidence: ${confidence}`,
|
|
858
|
-
unknownCount > 0 ? `- Unknown models: ${unknownCount} (estimated pricing used)` : "- Unknown models: 0",
|
|
859
|
-
...warningsMd.map((w) => `- ${w}`),
|
|
860
|
-
"",
|
|
861
|
-
"## ASSUMPTIONS",
|
|
862
|
-
"- No double-counting: routing \u2192 context \u2192 retry savings allocated per-event with remaining-cost caps.",
|
|
863
|
-
mode === "attempt-log" ? "- Retry cost model: attempt-log mode (total_cost=sum attempts, retry_waste=sum attempt>=2)." : "- Retry cost model: legacy mode (total_cost=base_cost*(1+retries)).",
|
|
864
|
-
"- Context savings: top 20% input_tokens events assume 25% input reduction.",
|
|
865
|
-
"",
|
|
866
|
-
"## WHAT TO CHANGE",
|
|
867
|
-
"1) Retry tuning \u2192 edit `aiopt/policies/retry.json`",
|
|
868
|
-
"2) Output cap \u2192 edit `aiopt/policies/output.json`",
|
|
869
|
-
"3) Routing rule \u2192 edit `aiopt/policies/routing.json`",
|
|
870
|
-
"",
|
|
871
|
-
"## OUTPUTS",
|
|
872
|
-
"- `aiopt-output/analysis.json`",
|
|
873
|
-
"- `aiopt-output/report.json`",
|
|
874
|
-
"- `aiopt-output/patches/*`",
|
|
875
|
-
""
|
|
876
|
-
].join("\n");
|
|
877
|
-
import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.md"), reportMd);
|
|
878
|
-
const reportTxt = [
|
|
879
|
-
`\uCD1D\uBE44\uC6A9: $${analysis.total_cost}`,
|
|
880
|
-
`\uC808\uAC10 \uAC00\uB2A5 \uAE08\uC561(Estimated): $${savings.estimated_savings_total}`,
|
|
881
|
-
`\uC808\uAC10 \uADFC\uAC70 3\uC904:`,
|
|
882
|
-
savings.notes[0],
|
|
883
|
-
savings.notes[1],
|
|
884
|
-
savings.notes[2],
|
|
885
|
-
""
|
|
886
|
-
].join("\n");
|
|
887
|
-
import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.txt"), reportTxt);
|
|
888
|
-
import_fs3.default.writeFileSync(import_path3.default.join(outDir, "cost-policy.json"), JSON.stringify(policy, null, 2));
|
|
889
|
-
const fixes = buildTopFixes(analysis, savings);
|
|
890
|
-
writePatches(outDir, fixes);
|
|
975
|
+
}
|
|
976
|
+
function isCsvPath(p) {
|
|
977
|
+
return import_path.default.extname(p).toLowerCase() === ".csv";
|
|
891
978
|
}
|
|
892
979
|
|
|
893
980
|
// src/cli.ts
|
|
981
|
+
init_scan();
|
|
894
982
|
var program = new import_commander.Command();
|
|
895
983
|
var DEFAULT_INPUT = "./aiopt-output/usage.jsonl";
|
|
896
984
|
var DEFAULT_OUTPUT_DIR = "./aiopt-output";
|
|
@@ -963,5 +1051,27 @@ program.command("doctor").description("Check installation + print last 5 usage e
|
|
|
963
1051
|
console.log(JSON.stringify(x));
|
|
964
1052
|
}
|
|
965
1053
|
});
|
|
1054
|
+
program.command("guard").description("Pre-deploy guardrail: compare baseline usage vs candidate change and print warnings (exit codes 0/2/3)").option("--input <path>", "baseline usage jsonl/csv (default: ./aiopt-output/usage.jsonl)", DEFAULT_INPUT).option("--provider <provider>", "candidate provider override").option("--model <model>", "candidate model override").option("--context-mult <n>", "multiply input_tokens by n", (v) => Number(v)).option("--output-mult <n>", "multiply output_tokens by n", (v) => Number(v)).option("--retries-delta <n>", "add n to retries", (v) => Number(v)).action(async (opts) => {
|
|
1055
|
+
const rt = loadRateTable();
|
|
1056
|
+
const inputPath = String(opts.input);
|
|
1057
|
+
if (!import_fs6.default.existsSync(inputPath)) {
|
|
1058
|
+
console.error(`FAIL: baseline not found: ${inputPath}`);
|
|
1059
|
+
process.exit(3);
|
|
1060
|
+
}
|
|
1061
|
+
const events = isCsvPath(inputPath) ? readCsv(inputPath) : readJsonl(inputPath);
|
|
1062
|
+
const { runGuard: runGuard2 } = await Promise.resolve().then(() => (init_guard(), guard_exports));
|
|
1063
|
+
const r = runGuard2(rt, {
|
|
1064
|
+
baselineEvents: events,
|
|
1065
|
+
candidate: {
|
|
1066
|
+
provider: opts.provider,
|
|
1067
|
+
model: opts.model,
|
|
1068
|
+
contextMultiplier: opts.contextMult,
|
|
1069
|
+
outputMultiplier: opts.outputMult,
|
|
1070
|
+
retriesDelta: opts.retriesDelta
|
|
1071
|
+
}
|
|
1072
|
+
});
|
|
1073
|
+
console.log(r.message);
|
|
1074
|
+
process.exit(r.exitCode);
|
|
1075
|
+
});
|
|
966
1076
|
program.parse(process.argv);
|
|
967
1077
|
//# sourceMappingURL=cli.js.map
|