aiopt 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -33,6 +33,59 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
33
33
  mod
34
34
  ));
35
35
 
36
+ // src/cost.ts
37
+ function getRates(rt, provider, model) {
38
+ const prov = String(provider || "").toLowerCase();
39
+ if (prov === "local" || prov === "ollama" || prov === "vllm") {
40
+ return { kind: "official", input: 0, output: 0 };
41
+ }
42
+ const p = rt.providers[prov];
43
+ if (!p) return null;
44
+ const m = p.models[model];
45
+ if (m) return { kind: "official", input: m.input, output: m.output };
46
+ return { kind: "estimated", input: p.default_estimated.input, output: p.default_estimated.output };
47
+ }
48
+ function costOfEvent(rt, ev) {
49
+ if (typeof ev.billed_cost === "number" && Number.isFinite(ev.billed_cost)) {
50
+ return {
51
+ cost: ev.billed_cost,
52
+ used_rate: {
53
+ kind: "billed_cost",
54
+ provider: ev.provider,
55
+ model: ev.model,
56
+ input_per_m: 0,
57
+ output_per_m: 0
58
+ }
59
+ };
60
+ }
61
+ const r = getRates(rt, ev.provider, ev.model);
62
+ if (!r) {
63
+ const input_per_m = 1;
64
+ const output_per_m = 4;
65
+ const cost2 = ev.input_tokens / 1e6 * input_per_m + ev.output_tokens / 1e6 * output_per_m;
66
+ return {
67
+ cost: cost2,
68
+ used_rate: { kind: "estimated", provider: String(ev.provider || "").toLowerCase(), model: ev.model, input_per_m, output_per_m }
69
+ };
70
+ }
71
+ const cost = ev.input_tokens / 1e6 * r.input + ev.output_tokens / 1e6 * r.output;
72
+ return {
73
+ cost,
74
+ used_rate: {
75
+ kind: r.kind,
76
+ provider: ev.provider,
77
+ model: ev.model,
78
+ input_per_m: r.input,
79
+ output_per_m: r.output
80
+ }
81
+ };
82
+ }
83
+ var init_cost = __esm({
84
+ "src/cost.ts"() {
85
+ "use strict";
86
+ }
87
+ });
88
+
36
89
  // src/solutions.ts
37
90
  var solutions_exports = {};
38
91
  __export(solutions_exports, {
@@ -111,12 +164,258 @@ var init_solutions = __esm({
111
164
  }
112
165
  });
113
166
 
167
+ // src/scan.ts
168
+ function topN(map, n) {
169
+ return [...map.entries()].map(([key, v]) => ({ key, cost: v.cost, events: v.events })).sort((a, b) => b.cost - a.cost).slice(0, n);
170
+ }
171
+ function analyze(rt, events) {
172
+ const byModel = /* @__PURE__ */ new Map();
173
+ const byFeature = /* @__PURE__ */ new Map();
174
+ const unknownModels = [];
175
+ const perEventCosts = [];
176
+ const isAttemptLog = events.some((e) => e.trace_id && String(e.trace_id).length > 0 || e.attempt !== void 0 && Number(e.attempt) > 0);
177
+ let baseTotal = 0;
178
+ let total = 0;
179
+ for (const ev of events) {
180
+ const cr = costOfEvent(rt, ev);
181
+ baseTotal += cr.cost;
182
+ if (isAttemptLog) {
183
+ total += cr.cost;
184
+ } else {
185
+ const retries = Math.max(0, Number(ev.retries || 0));
186
+ total += cr.cost * (1 + retries);
187
+ }
188
+ perEventCosts.push({ ev, cost: cr.cost });
189
+ const mk = `${ev.provider}:${ev.model}`;
190
+ const fk = ev.feature_tag || "(none)";
191
+ const mv = byModel.get(mk) || { cost: 0, events: 0 };
192
+ mv.cost += cr.cost;
193
+ mv.events += 1;
194
+ byModel.set(mk, mv);
195
+ const fv = byFeature.get(fk) || { cost: 0, events: 0 };
196
+ fv.cost += cr.cost;
197
+ fv.events += 1;
198
+ byFeature.set(fk, fv);
199
+ const rr = getRates(rt, ev.provider, ev.model);
200
+ if (!rr) {
201
+ unknownModels.push({ provider: ev.provider, model: ev.model, reason: "unknown provider (estimated)" });
202
+ } else if (rr.kind === "estimated") {
203
+ unknownModels.push({ provider: ev.provider, model: ev.model, reason: "unknown model (estimated)" });
204
+ }
205
+ }
206
+ const potByIdx = [];
207
+ for (const { ev, cost } of perEventCosts) {
208
+ const retries = Math.max(0, Number(ev.retries || 0));
209
+ const attempt = Number(ev.attempt || 1);
210
+ const total_i = isAttemptLog ? cost : cost * (1 + retries);
211
+ const waste_i = isAttemptLog ? attempt >= 2 ? cost : 0 : cost * retries;
212
+ let routing_i = 0;
213
+ if (ROUTE_TO_CHEAP_FEATURES.has(String(ev.feature_tag || "").toLowerCase())) {
214
+ const provider = ev.provider;
215
+ const p = rt.providers[provider];
216
+ if (p) {
217
+ const entries = Object.entries(p.models);
218
+ if (entries.length > 0) {
219
+ const cheapest = entries.map(([name, r]) => ({ name, score: (r.input + r.output) / 2, r })).sort((a, b) => a.score - b.score)[0];
220
+ const currentRate = getRates(rt, provider, ev.model);
221
+ if (currentRate && currentRate.kind !== "estimated") {
222
+ const currentCost = ev.input_tokens / 1e6 * currentRate.input + ev.output_tokens / 1e6 * currentRate.output;
223
+ const cheapCost = ev.input_tokens / 1e6 * cheapest.r.input + ev.output_tokens / 1e6 * cheapest.r.output;
224
+ const diff = (currentCost - cheapCost) * (1 + retries);
225
+ routing_i = Math.max(0, diff);
226
+ }
227
+ }
228
+ }
229
+ }
230
+ potByIdx.push({ routing: routing_i, context: 0, retry: waste_i, total: total_i, waste: waste_i });
231
+ }
232
+ const sortedIdx = [...events.map((e, i) => ({ i, input: Number(e.input_tokens || 0), ok: !isAttemptLog || Number(e.attempt || 1) === 1 }))].filter((x) => x.ok).sort((a, b) => b.input - a.input);
233
+ const k = Math.max(1, Math.floor(sortedIdx.length * 0.2));
234
+ const topIdx = new Set(sortedIdx.slice(0, k).map((x) => x.i));
235
+ for (let i = 0; i < events.length; i++) {
236
+ if (!topIdx.has(i)) continue;
237
+ const ev = events[i];
238
+ const retries = Math.max(0, Number(ev.retries || 0));
239
+ const r = getRates(rt, ev.provider, ev.model);
240
+ if (!r) continue;
241
+ const saveTokens = Number(ev.input_tokens || 0) * 0.25;
242
+ const multiplier = isAttemptLog ? 1 : 1 + retries;
243
+ const diff = saveTokens / 1e6 * r.input * multiplier;
244
+ potByIdx[i].context = Math.max(0, diff);
245
+ }
246
+ let routingSavings = 0;
247
+ let contextSavings = 0;
248
+ let retryWaste = 0;
249
+ for (const p of potByIdx) {
250
+ let remaining = p.total;
251
+ const rSave = Math.min(p.routing, remaining);
252
+ remaining -= rSave;
253
+ routingSavings += rSave;
254
+ const cSave = Math.min(p.context, remaining);
255
+ remaining -= cSave;
256
+ contextSavings += cSave;
257
+ const retrySave = Math.min(p.retry, remaining);
258
+ retryWaste += retrySave;
259
+ }
260
+ const estimatedSavingsTotal = routingSavings + contextSavings + retryWaste;
261
+ const guardedSavingsTotal = Math.min(estimatedSavingsTotal, total);
262
+ const analysis = {
263
+ total_cost: round22(total),
264
+ by_model_top: topN(byModel, 10).map((x) => ({ ...x, cost: round22(x.cost) })),
265
+ by_feature_top: topN(byFeature, 10).map((x) => ({ ...x, cost: round22(x.cost) })),
266
+ unknown_models: uniqUnknown(unknownModels),
267
+ rate_table_version: rt.version,
268
+ rate_table_date: rt.date
269
+ };
270
+ const savings = {
271
+ estimated_savings_total: round22(guardedSavingsTotal),
272
+ routing_savings: round22(routingSavings),
273
+ context_savings: round22(contextSavings),
274
+ retry_waste: round22(retryWaste),
275
+ notes: [
276
+ `a) \uBAA8\uB378 \uB77C\uC6B0\uD305 \uC808\uAC10(\uCD94\uC815): $${round22(routingSavings)}`,
277
+ `b) \uCEE8\uD14D\uC2A4\uD2B8 \uAC10\uCD95(\uCD94\uC815): $${round22(contextSavings)} (\uC0C1\uC704 20% input\uC5D0 25% \uAC10\uCD95 \uAC00\uC815)`,
278
+ `c) \uC7AC\uC2DC\uB3C4/\uC624\uB958 \uB0AD\uBE44(\uC0C1\uD55C \uC801\uC6A9): $${round22(retryWaste)} (retries \uAE30\uBC18)`
279
+ ]
280
+ };
281
+ const policy = buildPolicy(rt, events);
282
+ return { analysis, savings, policy, meta: { mode: isAttemptLog ? "attempt-log" : "legacy" } };
283
+ }
284
+ function buildPolicy(rt, events) {
285
+ const freq = /* @__PURE__ */ new Map();
286
+ for (const ev of events) freq.set(ev.provider, (freq.get(ev.provider) || 0) + 1);
287
+ const defaultProvider = [...freq.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] || "openai";
288
+ const rules = [];
289
+ for (const provider of Object.keys(rt.providers)) {
290
+ const p = rt.providers[provider];
291
+ const entries = Object.entries(p.models);
292
+ if (entries.length === 0) continue;
293
+ const cheapest = entries.map(([name, r]) => ({ name, score: (r.input + r.output) / 2, r })).sort((a, b) => a.score - b.score)[0];
294
+ rules.push({
295
+ match: { provider, feature_tag_in: ["summarize", "classify", "translate"] },
296
+ action: { recommend_model: cheapest.name, reason: "cheap-feature routing" }
297
+ });
298
+ }
299
+ rules.push({ match: { model_unknown: true }, action: { keep: true, reason: "unknown model -> no policy applied" } });
300
+ return {
301
+ version: 1,
302
+ default_provider: defaultProvider,
303
+ rules,
304
+ budgets: { currency: rt.currency, notes: "MVP: budgets not enforced" },
305
+ generated_from: { rate_table_version: rt.version, input: "./aiopt-input/usage.jsonl" }
306
+ };
307
+ }
308
+ function uniqUnknown(list) {
309
+ const seen = /* @__PURE__ */ new Set();
310
+ const out = [];
311
+ for (const x of list) {
312
+ const k = `${x.provider}:${x.model}:${x.reason}`;
313
+ if (seen.has(k)) continue;
314
+ seen.add(k);
315
+ out.push(x);
316
+ }
317
+ return out;
318
+ }
319
+ function round22(n) {
320
+ return Math.round(n * 100) / 100;
321
+ }
322
+ function writeOutputs(outDir, analysis, savings, policy, meta) {
323
+ const mode = meta?.mode || "legacy";
324
+ import_fs3.default.mkdirSync(outDir, { recursive: true });
325
+ import_fs3.default.writeFileSync(import_path3.default.join(outDir, "analysis.json"), JSON.stringify(analysis, null, 2));
326
+ const unknownCount = analysis.unknown_models?.length || 0;
327
+ const confidence = unknownCount === 0 ? "HIGH" : unknownCount <= 3 ? "MED" : "LOW";
328
+ const ratio = analysis.total_cost > 0 ? savings.estimated_savings_total / analysis.total_cost : 0;
329
+ const warnings = [];
330
+ if (ratio >= 0.9) warnings.push("estimated savings >= 90%");
331
+ if (unknownCount > 0) warnings.push("unknown models/providers detected (estimated pricing used)");
332
+ const reportJson = {
333
+ version: 3,
334
+ generated_at: (/* @__PURE__ */ new Date()).toISOString(),
335
+ confidence,
336
+ warnings,
337
+ assumptions: {
338
+ no_double_counting: "routing -> context -> retry allocation per-event with remaining-cost caps",
339
+ retry_cost_model: mode === "attempt-log" ? "attempt-log mode: total_cost is sum of attempt lines; retry_waste is sum of attempts>=2" : "legacy mode: total_cost includes retries as extra attempts (base_cost*(1+retries))",
340
+ context_model: "top 20% by input_tokens assume 25% input reduction",
341
+ estimated_pricing_note: unknownCount > 0 ? "some items use estimated rates; treat savings as a band" : "all items used known rates"
342
+ },
343
+ summary: {
344
+ total_cost_usd: analysis.total_cost,
345
+ estimated_savings_usd: savings.estimated_savings_total,
346
+ routing_savings_usd: savings.routing_savings,
347
+ context_savings_usd: savings.context_savings,
348
+ retry_waste_usd: savings.retry_waste
349
+ },
350
+ top: {
351
+ by_model: analysis.by_model_top,
352
+ by_feature: analysis.by_feature_top
353
+ },
354
+ unknown_models: analysis.unknown_models,
355
+ notes: savings.notes
356
+ };
357
+ import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.json"), JSON.stringify(reportJson, null, 2));
358
+ const ratioMd = analysis.total_cost > 0 ? savings.estimated_savings_total / analysis.total_cost : 0;
359
+ const warningsMd = [];
360
+ if (ratioMd >= 0.9) warningsMd.push("WARNING: estimated savings >= 90% \u2014 check overlap/missing rate table");
361
+ const reportMd = [
362
+ "# AIOpt Report",
363
+ "",
364
+ `- Total cost: $${analysis.total_cost}`,
365
+ `- Estimated savings: $${savings.estimated_savings_total} (guarded <= total_cost)`,
366
+ `- Confidence: ${confidence}`,
367
+ unknownCount > 0 ? `- Unknown models: ${unknownCount} (estimated pricing used)` : "- Unknown models: 0",
368
+ ...warningsMd.map((w) => `- ${w}`),
369
+ "",
370
+ "## ASSUMPTIONS",
371
+ "- No double-counting: routing \u2192 context \u2192 retry savings allocated per-event with remaining-cost caps.",
372
+ mode === "attempt-log" ? "- Retry cost model: attempt-log mode (total_cost=sum attempts, retry_waste=sum attempt>=2)." : "- Retry cost model: legacy mode (total_cost=base_cost*(1+retries)).",
373
+ "- Context savings: top 20% input_tokens events assume 25% input reduction.",
374
+ "",
375
+ "## WHAT TO CHANGE",
376
+ "1) Retry tuning \u2192 edit `aiopt/policies/retry.json`",
377
+ "2) Output cap \u2192 edit `aiopt/policies/output.json`",
378
+ "3) Routing rule \u2192 edit `aiopt/policies/routing.json`",
379
+ "",
380
+ "## OUTPUTS",
381
+ "- `aiopt-output/analysis.json`",
382
+ "- `aiopt-output/report.json`",
383
+ "- `aiopt-output/patches/*`",
384
+ ""
385
+ ].join("\n");
386
+ import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.md"), reportMd);
387
+ const reportTxt = [
388
+ `\uCD1D\uBE44\uC6A9: $${analysis.total_cost}`,
389
+ `\uC808\uAC10 \uAC00\uB2A5 \uAE08\uC561(Estimated): $${savings.estimated_savings_total}`,
390
+ `\uC808\uAC10 \uADFC\uAC70 3\uC904:`,
391
+ savings.notes[0],
392
+ savings.notes[1],
393
+ savings.notes[2],
394
+ ""
395
+ ].join("\n");
396
+ import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.txt"), reportTxt);
397
+ import_fs3.default.writeFileSync(import_path3.default.join(outDir, "cost-policy.json"), JSON.stringify(policy, null, 2));
398
+ const fixes = buildTopFixes(analysis, savings);
399
+ writePatches(outDir, fixes);
400
+ }
401
+ var import_fs3, import_path3, ROUTE_TO_CHEAP_FEATURES;
402
+ var init_scan = __esm({
403
+ "src/scan.ts"() {
404
+ "use strict";
405
+ import_fs3 = __toESM(require("fs"));
406
+ import_path3 = __toESM(require("path"));
407
+ init_cost();
408
+ init_solutions();
409
+ ROUTE_TO_CHEAP_FEATURES = /* @__PURE__ */ new Set(["summarize", "classify", "translate"]);
410
+ }
411
+ });
412
+
114
413
  // package.json
115
414
  var require_package = __commonJS({
116
415
  "package.json"(exports2, module2) {
117
416
  module2.exports = {
118
417
  name: "aiopt",
119
- version: "0.2.1",
418
+ version: "0.2.3",
120
419
  description: "Serverless local CLI MVP for AI API cost analysis & cost-policy generation",
121
420
  bin: {
122
421
  aiopt: "dist/cli.js"
@@ -510,387 +809,176 @@ function runDoctor(cwd) {
510
809
  attempt: j.attempt,
511
810
  feature_tag: j?.meta?.feature_tag
512
811
  };
513
- } catch {
514
- return {};
515
- }
516
- });
517
- const last50 = tailLines(usagePath, 50);
518
- let missing = 0;
519
- let total50 = 0;
520
- for (const l of last50) {
521
- total50++;
522
- try {
523
- const j = JSON.parse(l);
524
- const ft = j?.meta?.feature_tag;
525
- if (!ft || String(ft).trim() === "") missing++;
526
- } catch {
527
- missing++;
528
- }
529
- }
530
- if (total50 > 0 && missing > 0) {
531
- checks.push({ name: "feature_tag quality (last50)", ok: false, detail: `${missing}/${total50} missing meta.feature_tag` });
532
- } else {
533
- checks.push({ name: "feature_tag quality (last50)", ok: true, detail: "meta.feature_tag present" });
534
- }
535
- const ok = checks.every((c) => c.ok);
536
- return { ok, checks, last5 };
537
- }
538
- var import_fs5, import_path5;
539
- var init_doctor = __esm({
540
- "src/doctor.ts"() {
541
- "use strict";
542
- import_fs5 = __toESM(require("fs"));
543
- import_path5 = __toESM(require("path"));
544
- }
545
- });
546
-
547
- // src/cli.ts
548
- var import_fs6 = __toESM(require("fs"));
549
- var import_path6 = __toESM(require("path"));
550
- var import_commander = require("commander");
551
-
552
- // src/io.ts
553
- var import_fs = __toESM(require("fs"));
554
- var import_path = __toESM(require("path"));
555
- var import_sync = require("csv-parse/sync");
556
- function ensureDir(p) {
557
- import_fs.default.mkdirSync(p, { recursive: true });
558
- }
559
- function readJsonl(filePath) {
560
- const raw = import_fs.default.readFileSync(filePath, "utf8");
561
- const lines = raw.split(/\r?\n/).filter((l) => l.trim().length > 0);
562
- const out = [];
563
- for (const line of lines) {
564
- const obj = JSON.parse(line);
565
- out.push(normalizeEvent(obj));
566
- }
567
- return out;
568
- }
569
- function readCsv(filePath) {
570
- const raw = import_fs.default.readFileSync(filePath, "utf8");
571
- const records = (0, import_sync.parse)(raw, { columns: true, skip_empty_lines: true, trim: true });
572
- return records.map((r) => normalizeEvent(r));
573
- }
574
- function toNum(x, def = 0) {
575
- const n = Number(x);
576
- return Number.isFinite(n) ? n : def;
577
- }
578
- function normalizeEvent(x) {
579
- const inputTokens = x.input_tokens ?? x.prompt_tokens;
580
- const outputTokens = x.output_tokens ?? x.completion_tokens;
581
- const featureTag = x.feature_tag ?? x?.meta?.feature_tag ?? x.endpoint ?? "";
582
- const retries = x.retries ?? (x.attempt !== void 0 ? Math.max(0, toNum(x.attempt) - 1) : 0);
583
- const billed = x.billed_cost ?? x.cost_usd;
584
- return {
585
- ts: String(x.ts ?? ""),
586
- provider: String(x.provider ?? "").toLowerCase(),
587
- model: String(x.model ?? ""),
588
- input_tokens: toNum(inputTokens),
589
- output_tokens: toNum(outputTokens),
590
- feature_tag: String(featureTag ?? ""),
591
- retries: toNum(retries),
592
- status: String(x.status ?? ""),
593
- billed_cost: billed === void 0 || billed === "" ? void 0 : toNum(billed),
594
- trace_id: x.trace_id ? String(x.trace_id) : void 0,
595
- request_id: x.request_id ? String(x.request_id) : void 0,
596
- attempt: x.attempt === void 0 ? void 0 : toNum(x.attempt),
597
- endpoint: x.endpoint ? String(x.endpoint) : void 0
598
- };
599
- }
600
- function isCsvPath(p) {
601
- return import_path.default.extname(p).toLowerCase() === ".csv";
602
- }
603
-
604
- // src/scan.ts
605
- var import_fs3 = __toESM(require("fs"));
606
- var import_path3 = __toESM(require("path"));
607
-
608
- // src/cost.ts
609
- function getRates(rt, provider, model) {
610
- const prov = String(provider || "").toLowerCase();
611
- if (prov === "local" || prov === "ollama" || prov === "vllm") {
612
- return { kind: "official", input: 0, output: 0 };
613
- }
614
- const p = rt.providers[prov];
615
- if (!p) return null;
616
- const m = p.models[model];
617
- if (m) return { kind: "official", input: m.input, output: m.output };
618
- return { kind: "estimated", input: p.default_estimated.input, output: p.default_estimated.output };
619
- }
620
- function costOfEvent(rt, ev) {
621
- if (typeof ev.billed_cost === "number" && Number.isFinite(ev.billed_cost)) {
622
- return {
623
- cost: ev.billed_cost,
624
- used_rate: {
625
- kind: "billed_cost",
626
- provider: ev.provider,
627
- model: ev.model,
628
- input_per_m: 0,
629
- output_per_m: 0
630
- }
631
- };
632
- }
633
- const r = getRates(rt, ev.provider, ev.model);
634
- if (!r) {
635
- const input_per_m = 1;
636
- const output_per_m = 4;
637
- const cost2 = ev.input_tokens / 1e6 * input_per_m + ev.output_tokens / 1e6 * output_per_m;
638
- return {
639
- cost: cost2,
640
- used_rate: { kind: "estimated", provider: String(ev.provider || "").toLowerCase(), model: ev.model, input_per_m, output_per_m }
641
- };
642
- }
643
- const cost = ev.input_tokens / 1e6 * r.input + ev.output_tokens / 1e6 * r.output;
644
- return {
645
- cost,
646
- used_rate: {
647
- kind: r.kind,
648
- provider: ev.provider,
649
- model: ev.model,
650
- input_per_m: r.input,
651
- output_per_m: r.output
652
- }
653
- };
654
- }
655
-
656
- // src/scan.ts
657
- init_solutions();
658
- var ROUTE_TO_CHEAP_FEATURES = /* @__PURE__ */ new Set(["summarize", "classify", "translate"]);
659
- function topN(map, n) {
660
- return [...map.entries()].map(([key, v]) => ({ key, cost: v.cost, events: v.events })).sort((a, b) => b.cost - a.cost).slice(0, n);
661
- }
662
- function analyze(rt, events) {
663
- const byModel = /* @__PURE__ */ new Map();
664
- const byFeature = /* @__PURE__ */ new Map();
665
- const unknownModels = [];
666
- const perEventCosts = [];
667
- const isAttemptLog = events.some((e) => e.trace_id && String(e.trace_id).length > 0 || e.attempt !== void 0 && Number(e.attempt) > 0);
668
- let baseTotal = 0;
669
- let total = 0;
670
- for (const ev of events) {
671
- const cr = costOfEvent(rt, ev);
672
- baseTotal += cr.cost;
673
- if (isAttemptLog) {
674
- total += cr.cost;
675
- } else {
676
- const retries = Math.max(0, Number(ev.retries || 0));
677
- total += cr.cost * (1 + retries);
678
- }
679
- perEventCosts.push({ ev, cost: cr.cost });
680
- const mk = `${ev.provider}:${ev.model}`;
681
- const fk = ev.feature_tag || "(none)";
682
- const mv = byModel.get(mk) || { cost: 0, events: 0 };
683
- mv.cost += cr.cost;
684
- mv.events += 1;
685
- byModel.set(mk, mv);
686
- const fv = byFeature.get(fk) || { cost: 0, events: 0 };
687
- fv.cost += cr.cost;
688
- fv.events += 1;
689
- byFeature.set(fk, fv);
690
- const rr = getRates(rt, ev.provider, ev.model);
691
- if (!rr) {
692
- unknownModels.push({ provider: ev.provider, model: ev.model, reason: "unknown provider (estimated)" });
693
- } else if (rr.kind === "estimated") {
694
- unknownModels.push({ provider: ev.provider, model: ev.model, reason: "unknown model (estimated)" });
695
- }
696
- }
697
- const potByIdx = [];
698
- for (const { ev, cost } of perEventCosts) {
699
- const retries = Math.max(0, Number(ev.retries || 0));
700
- const attempt = Number(ev.attempt || 1);
701
- const total_i = isAttemptLog ? cost : cost * (1 + retries);
702
- const waste_i = isAttemptLog ? attempt >= 2 ? cost : 0 : cost * retries;
703
- let routing_i = 0;
704
- if (ROUTE_TO_CHEAP_FEATURES.has(String(ev.feature_tag || "").toLowerCase())) {
705
- const provider = ev.provider;
706
- const p = rt.providers[provider];
707
- if (p) {
708
- const entries = Object.entries(p.models);
709
- if (entries.length > 0) {
710
- const cheapest = entries.map(([name, r]) => ({ name, score: (r.input + r.output) / 2, r })).sort((a, b) => a.score - b.score)[0];
711
- const currentRate = getRates(rt, provider, ev.model);
712
- if (currentRate && currentRate.kind !== "estimated") {
713
- const currentCost = ev.input_tokens / 1e6 * currentRate.input + ev.output_tokens / 1e6 * currentRate.output;
714
- const cheapCost = ev.input_tokens / 1e6 * cheapest.r.input + ev.output_tokens / 1e6 * cheapest.r.output;
715
- const diff = (currentCost - cheapCost) * (1 + retries);
716
- routing_i = Math.max(0, diff);
717
- }
718
- }
719
- }
812
+ } catch {
813
+ return {};
814
+ }
815
+ });
816
+ const last50 = tailLines(usagePath, 50);
817
+ let missing = 0;
818
+ let total50 = 0;
819
+ for (const l of last50) {
820
+ total50++;
821
+ try {
822
+ const j = JSON.parse(l);
823
+ const ft = j?.meta?.feature_tag;
824
+ if (!ft || String(ft).trim() === "") missing++;
825
+ } catch {
826
+ missing++;
720
827
  }
721
- potByIdx.push({ routing: routing_i, context: 0, retry: waste_i, total: total_i, waste: waste_i });
722
828
  }
723
- const sortedIdx = [...events.map((e, i) => ({ i, input: Number(e.input_tokens || 0), ok: !isAttemptLog || Number(e.attempt || 1) === 1 }))].filter((x) => x.ok).sort((a, b) => b.input - a.input);
724
- const k = Math.max(1, Math.floor(sortedIdx.length * 0.2));
725
- const topIdx = new Set(sortedIdx.slice(0, k).map((x) => x.i));
726
- for (let i = 0; i < events.length; i++) {
727
- if (!topIdx.has(i)) continue;
728
- const ev = events[i];
729
- const retries = Math.max(0, Number(ev.retries || 0));
730
- const r = getRates(rt, ev.provider, ev.model);
731
- if (!r) continue;
732
- const saveTokens = Number(ev.input_tokens || 0) * 0.25;
733
- const multiplier = isAttemptLog ? 1 : 1 + retries;
734
- const diff = saveTokens / 1e6 * r.input * multiplier;
735
- potByIdx[i].context = Math.max(0, diff);
829
+ if (total50 > 0 && missing > 0) {
830
+ checks.push({ name: "feature_tag quality (last50)", ok: false, detail: `${missing}/${total50} missing meta.feature_tag` });
831
+ } else {
832
+ checks.push({ name: "feature_tag quality (last50)", ok: true, detail: "meta.feature_tag present" });
736
833
  }
737
- let routingSavings = 0;
738
- let contextSavings = 0;
739
- let retryWaste = 0;
740
- for (const p of potByIdx) {
741
- let remaining = p.total;
742
- const rSave = Math.min(p.routing, remaining);
743
- remaining -= rSave;
744
- routingSavings += rSave;
745
- const cSave = Math.min(p.context, remaining);
746
- remaining -= cSave;
747
- contextSavings += cSave;
748
- const retrySave = Math.min(p.retry, remaining);
749
- retryWaste += retrySave;
834
+ const ok = checks.every((c) => c.ok);
835
+ return { ok, checks, last5 };
836
+ }
837
+ var import_fs5, import_path5;
838
+ var init_doctor = __esm({
839
+ "src/doctor.ts"() {
840
+ "use strict";
841
+ import_fs5 = __toESM(require("fs"));
842
+ import_path5 = __toESM(require("path"));
750
843
  }
751
- const estimatedSavingsTotal = routingSavings + contextSavings + retryWaste;
752
- const guardedSavingsTotal = Math.min(estimatedSavingsTotal, total);
753
- const analysis = {
754
- total_cost: round22(total),
755
- by_model_top: topN(byModel, 10).map((x) => ({ ...x, cost: round22(x.cost) })),
756
- by_feature_top: topN(byFeature, 10).map((x) => ({ ...x, cost: round22(x.cost) })),
757
- unknown_models: uniqUnknown(unknownModels),
758
- rate_table_version: rt.version,
759
- rate_table_date: rt.date
760
- };
761
- const savings = {
762
- estimated_savings_total: round22(guardedSavingsTotal),
763
- routing_savings: round22(routingSavings),
764
- context_savings: round22(contextSavings),
765
- retry_waste: round22(retryWaste),
766
- notes: [
767
- `a) \uBAA8\uB378 \uB77C\uC6B0\uD305 \uC808\uAC10(\uCD94\uC815): $${round22(routingSavings)}`,
768
- `b) \uCEE8\uD14D\uC2A4\uD2B8 \uAC10\uCD95(\uCD94\uC815): $${round22(contextSavings)} (\uC0C1\uC704 20% input\uC5D0 25% \uAC10\uCD95 \uAC00\uC815)`,
769
- `c) \uC7AC\uC2DC\uB3C4/\uC624\uB958 \uB0AD\uBE44(\uC0C1\uD55C \uC801\uC6A9): $${round22(retryWaste)} (retries \uAE30\uBC18)`
770
- ]
771
- };
772
- const policy = buildPolicy(rt, events);
773
- return { analysis, savings, policy, meta: { mode: isAttemptLog ? "attempt-log" : "legacy" } };
844
+ });
845
+
846
+ // src/guard.ts
847
+ var guard_exports = {};
848
+ __export(guard_exports, {
849
+ runGuard: () => runGuard
850
+ });
851
+ function round23(n) {
852
+ return Math.round(n * 100) / 100;
774
853
  }
775
- function buildPolicy(rt, events) {
776
- const freq = /* @__PURE__ */ new Map();
777
- for (const ev of events) freq.set(ev.provider, (freq.get(ev.provider) || 0) + 1);
778
- const defaultProvider = [...freq.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] || "openai";
779
- const rules = [];
780
- for (const provider of Object.keys(rt.providers)) {
781
- const p = rt.providers[provider];
782
- const entries = Object.entries(p.models);
783
- if (entries.length === 0) continue;
784
- const cheapest = entries.map(([name, r]) => ({ name, score: (r.input + r.output) / 2, r })).sort((a, b) => a.score - b.score)[0];
785
- rules.push({
786
- match: { provider, feature_tag_in: ["summarize", "classify", "translate"] },
787
- action: { recommend_model: cheapest.name, reason: "cheap-feature routing" }
788
- });
854
+ function monthEstimate(delta) {
855
+ return delta * 30;
856
+ }
857
+ function applyCandidate(events, cand) {
858
+ const ctxM = cand.contextMultiplier ?? 1;
859
+ const outM = cand.outputMultiplier ?? 1;
860
+ const rDelta = cand.retriesDelta ?? 0;
861
+ return events.map((ev) => ({
862
+ ...ev,
863
+ provider: cand.provider ? String(cand.provider).toLowerCase() : ev.provider,
864
+ model: cand.model ? String(cand.model) : ev.model,
865
+ input_tokens: Math.max(0, Math.round((ev.input_tokens || 0) * ctxM)),
866
+ output_tokens: Math.max(0, Math.round((ev.output_tokens || 0) * outM)),
867
+ retries: Math.max(0, Math.round((ev.retries || 0) + rDelta)),
868
+ // clear billed_cost so pricing recalculates for new model/provider
869
+ billed_cost: void 0
870
+ }));
871
+ }
872
+ function confidenceFromChange(cand) {
873
+ const reasons = [];
874
+ if (cand.retriesDelta && cand.retriesDelta !== 0) reasons.push("retries change");
875
+ if (cand.model) reasons.push("model change");
876
+ if (cand.provider) reasons.push("provider change");
877
+ if (cand.contextMultiplier && cand.contextMultiplier !== 1) reasons.push("context length change");
878
+ if (cand.retriesDelta && cand.retriesDelta !== 0) return { level: "High", reasons };
879
+ if (cand.model || cand.provider) return { level: "Medium", reasons };
880
+ if (cand.contextMultiplier && cand.contextMultiplier !== 1) return { level: "Low", reasons };
881
+ return { level: "Medium", reasons: reasons.length ? reasons : ["unknown change"] };
882
+ }
883
+ function runGuard(rt, input) {
884
+ if (!input.baselineEvents || input.baselineEvents.length === 0) {
885
+ return { exitCode: 3, message: "FAIL: baseline usage is empty (need aiopt-output/usage.jsonl)" };
789
886
  }
790
- rules.push({ match: { model_unknown: true }, action: { keep: true, reason: "unknown model -> no policy applied" } });
791
- return {
792
- version: 1,
793
- default_provider: defaultProvider,
794
- rules,
795
- budgets: { currency: rt.currency, notes: "MVP: budgets not enforced" },
796
- generated_from: { rate_table_version: rt.version, input: "./aiopt-input/usage.jsonl" }
797
- };
887
+ const baselineEvents = input.baselineEvents.map((e) => ({ ...e, billed_cost: void 0 }));
888
+ const base = analyze(rt, baselineEvents);
889
+ const candidateEvents = applyCandidate(baselineEvents, input.candidate);
890
+ const cand = analyze(rt, candidateEvents);
891
+ const baseCost = base.analysis.total_cost;
892
+ const candCost = cand.analysis.total_cost;
893
+ const delta = candCost - baseCost;
894
+ const conf = confidenceFromChange(input.candidate);
895
+ const monthly = monthEstimate(Math.max(0, delta));
896
+ const monthlyRounded = round23(monthly);
897
+ let exitCode = 0;
898
+ let headline = "OK: no cost accident risk detected";
899
+ if (monthly >= 100) {
900
+ exitCode = 3;
901
+ headline = "FAIL: high risk of LLM cost accident";
902
+ } else if (monthly >= 10) {
903
+ exitCode = 2;
904
+ headline = "WARN: possible LLM cost accident";
905
+ }
906
+ const reasons = conf.reasons.length ? conf.reasons.join(", ") : "n/a";
907
+ const msg = [
908
+ headline,
909
+ `Summary: baseline=$${baseCost} \u2192 candidate=$${candCost} (\u0394=$${round23(delta)})`,
910
+ `Impact (monthly est): +$${monthlyRounded}`,
911
+ `Confidence: ${conf.level} (${reasons})`,
912
+ "Recommendation: review model/provider/retry/context changes before deploy."
913
+ ].join("\n");
914
+ return { exitCode, message: msg };
798
915
  }
799
- function uniqUnknown(list) {
800
- const seen = /* @__PURE__ */ new Set();
916
+ var init_guard = __esm({
917
+ "src/guard.ts"() {
918
+ "use strict";
919
+ init_scan();
920
+ }
921
+ });
922
+
923
+ // src/cli.ts
924
+ var import_fs6 = __toESM(require("fs"));
925
+ var import_path6 = __toESM(require("path"));
926
+ var import_commander = require("commander");
927
+
928
+ // src/io.ts
929
+ var import_fs = __toESM(require("fs"));
930
+ var import_path = __toESM(require("path"));
931
+ var import_sync = require("csv-parse/sync");
932
+ function ensureDir(p) {
933
+ import_fs.default.mkdirSync(p, { recursive: true });
934
+ }
935
+ function readJsonl(filePath) {
936
+ const raw = import_fs.default.readFileSync(filePath, "utf8");
937
+ const lines = raw.split(/\r?\n/).filter((l) => l.trim().length > 0);
801
938
  const out = [];
802
- for (const x of list) {
803
- const k = `${x.provider}:${x.model}:${x.reason}`;
804
- if (seen.has(k)) continue;
805
- seen.add(k);
806
- out.push(x);
939
+ for (const line of lines) {
940
+ const obj = JSON.parse(line);
941
+ out.push(normalizeEvent(obj));
807
942
  }
808
943
  return out;
809
944
  }
810
- function round22(n) {
811
- return Math.round(n * 100) / 100;
945
+ function readCsv(filePath) {
946
+ const raw = import_fs.default.readFileSync(filePath, "utf8");
947
+ const records = (0, import_sync.parse)(raw, { columns: true, skip_empty_lines: true, trim: true });
948
+ return records.map((r) => normalizeEvent(r));
812
949
  }
813
- function writeOutputs(outDir, analysis, savings, policy, meta) {
814
- const mode = meta?.mode || "legacy";
815
- import_fs3.default.mkdirSync(outDir, { recursive: true });
816
- import_fs3.default.writeFileSync(import_path3.default.join(outDir, "analysis.json"), JSON.stringify(analysis, null, 2));
817
- const unknownCount = analysis.unknown_models?.length || 0;
818
- const confidence = unknownCount === 0 ? "HIGH" : unknownCount <= 3 ? "MED" : "LOW";
819
- const ratio = analysis.total_cost > 0 ? savings.estimated_savings_total / analysis.total_cost : 0;
820
- const warnings = [];
821
- if (ratio >= 0.9) warnings.push("estimated savings >= 90%");
822
- if (unknownCount > 0) warnings.push("unknown models/providers detected (estimated pricing used)");
823
- const reportJson = {
824
- version: 3,
825
- generated_at: (/* @__PURE__ */ new Date()).toISOString(),
826
- confidence,
827
- warnings,
828
- assumptions: {
829
- no_double_counting: "routing -> context -> retry allocation per-event with remaining-cost caps",
830
- retry_cost_model: mode === "attempt-log" ? "attempt-log mode: total_cost is sum of attempt lines; retry_waste is sum of attempts>=2" : "legacy mode: total_cost includes retries as extra attempts (base_cost*(1+retries))",
831
- context_model: "top 20% by input_tokens assume 25% input reduction",
832
- estimated_pricing_note: unknownCount > 0 ? "some items use estimated rates; treat savings as a band" : "all items used known rates"
833
- },
834
- summary: {
835
- total_cost_usd: analysis.total_cost,
836
- estimated_savings_usd: savings.estimated_savings_total,
837
- routing_savings_usd: savings.routing_savings,
838
- context_savings_usd: savings.context_savings,
839
- retry_waste_usd: savings.retry_waste
840
- },
841
- top: {
842
- by_model: analysis.by_model_top,
843
- by_feature: analysis.by_feature_top
844
- },
845
- unknown_models: analysis.unknown_models,
846
- notes: savings.notes
950
+ function toNum(x, def = 0) {
951
+ const n = Number(x);
952
+ return Number.isFinite(n) ? n : def;
953
+ }
954
+ function normalizeEvent(x) {
955
+ const inputTokens = x.input_tokens ?? x.prompt_tokens;
956
+ const outputTokens = x.output_tokens ?? x.completion_tokens;
957
+ const featureTag = x.feature_tag ?? x?.meta?.feature_tag ?? x.endpoint ?? "";
958
+ const retries = x.retries ?? (x.attempt !== void 0 ? Math.max(0, toNum(x.attempt) - 1) : 0);
959
+ const billed = x.billed_cost ?? x.cost_usd;
960
+ return {
961
+ ts: String(x.ts ?? ""),
962
+ provider: String(x.provider ?? "").toLowerCase(),
963
+ model: String(x.model ?? ""),
964
+ input_tokens: toNum(inputTokens),
965
+ output_tokens: toNum(outputTokens),
966
+ feature_tag: String(featureTag ?? ""),
967
+ retries: toNum(retries),
968
+ status: String(x.status ?? ""),
969
+ billed_cost: billed === void 0 || billed === "" ? void 0 : toNum(billed),
970
+ trace_id: x.trace_id ? String(x.trace_id) : void 0,
971
+ request_id: x.request_id ? String(x.request_id) : void 0,
972
+ attempt: x.attempt === void 0 ? void 0 : toNum(x.attempt),
973
+ endpoint: x.endpoint ? String(x.endpoint) : void 0
847
974
  };
848
- import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.json"), JSON.stringify(reportJson, null, 2));
849
- const ratioMd = analysis.total_cost > 0 ? savings.estimated_savings_total / analysis.total_cost : 0;
850
- const warningsMd = [];
851
- if (ratioMd >= 0.9) warningsMd.push("WARNING: estimated savings >= 90% \u2014 check overlap/missing rate table");
852
- const reportMd = [
853
- "# AIOpt Report",
854
- "",
855
- `- Total cost: $${analysis.total_cost}`,
856
- `- Estimated savings: $${savings.estimated_savings_total} (guarded <= total_cost)`,
857
- `- Confidence: ${confidence}`,
858
- unknownCount > 0 ? `- Unknown models: ${unknownCount} (estimated pricing used)` : "- Unknown models: 0",
859
- ...warningsMd.map((w) => `- ${w}`),
860
- "",
861
- "## ASSUMPTIONS",
862
- "- No double-counting: routing \u2192 context \u2192 retry savings allocated per-event with remaining-cost caps.",
863
- mode === "attempt-log" ? "- Retry cost model: attempt-log mode (total_cost=sum attempts, retry_waste=sum attempt>=2)." : "- Retry cost model: legacy mode (total_cost=base_cost*(1+retries)).",
864
- "- Context savings: top 20% input_tokens events assume 25% input reduction.",
865
- "",
866
- "## WHAT TO CHANGE",
867
- "1) Retry tuning \u2192 edit `aiopt/policies/retry.json`",
868
- "2) Output cap \u2192 edit `aiopt/policies/output.json`",
869
- "3) Routing rule \u2192 edit `aiopt/policies/routing.json`",
870
- "",
871
- "## OUTPUTS",
872
- "- `aiopt-output/analysis.json`",
873
- "- `aiopt-output/report.json`",
874
- "- `aiopt-output/patches/*`",
875
- ""
876
- ].join("\n");
877
- import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.md"), reportMd);
878
- const reportTxt = [
879
- `\uCD1D\uBE44\uC6A9: $${analysis.total_cost}`,
880
- `\uC808\uAC10 \uAC00\uB2A5 \uAE08\uC561(Estimated): $${savings.estimated_savings_total}`,
881
- `\uC808\uAC10 \uADFC\uAC70 3\uC904:`,
882
- savings.notes[0],
883
- savings.notes[1],
884
- savings.notes[2],
885
- ""
886
- ].join("\n");
887
- import_fs3.default.writeFileSync(import_path3.default.join(outDir, "report.txt"), reportTxt);
888
- import_fs3.default.writeFileSync(import_path3.default.join(outDir, "cost-policy.json"), JSON.stringify(policy, null, 2));
889
- const fixes = buildTopFixes(analysis, savings);
890
- writePatches(outDir, fixes);
975
+ }
976
+ function isCsvPath(p) {
977
+ return import_path.default.extname(p).toLowerCase() === ".csv";
891
978
  }
892
979
 
893
980
  // src/cli.ts
981
+ init_scan();
894
982
  var program = new import_commander.Command();
895
983
  var DEFAULT_INPUT = "./aiopt-output/usage.jsonl";
896
984
  var DEFAULT_OUTPUT_DIR = "./aiopt-output";
@@ -963,5 +1051,27 @@ program.command("doctor").description("Check installation + print last 5 usage e
963
1051
  console.log(JSON.stringify(x));
964
1052
  }
965
1053
  });
1054
+ program.command("guard").description("Pre-deploy guardrail: compare baseline usage vs candidate change and print warnings (exit codes 0/2/3)").option("--input <path>", "baseline usage jsonl/csv (default: ./aiopt-output/usage.jsonl)", DEFAULT_INPUT).option("--provider <provider>", "candidate provider override").option("--model <model>", "candidate model override").option("--context-mult <n>", "multiply input_tokens by n", (v) => Number(v)).option("--output-mult <n>", "multiply output_tokens by n", (v) => Number(v)).option("--retries-delta <n>", "add n to retries", (v) => Number(v)).action(async (opts) => {
1055
+ const rt = loadRateTable();
1056
+ const inputPath = String(opts.input);
1057
+ if (!import_fs6.default.existsSync(inputPath)) {
1058
+ console.error(`FAIL: baseline not found: ${inputPath}`);
1059
+ process.exit(3);
1060
+ }
1061
+ const events = isCsvPath(inputPath) ? readCsv(inputPath) : readJsonl(inputPath);
1062
+ const { runGuard: runGuard2 } = await Promise.resolve().then(() => (init_guard(), guard_exports));
1063
+ const r = runGuard2(rt, {
1064
+ baselineEvents: events,
1065
+ candidate: {
1066
+ provider: opts.provider,
1067
+ model: opts.model,
1068
+ contextMultiplier: opts.contextMult,
1069
+ outputMultiplier: opts.outputMult,
1070
+ retriesDelta: opts.retriesDelta
1071
+ }
1072
+ });
1073
+ console.log(r.message);
1074
+ process.exit(r.exitCode);
1075
+ });
966
1076
  program.parse(process.argv);
967
1077
  //# sourceMappingURL=cli.js.map