@relayplane/proxy 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1687,6 +1687,63 @@ var VERSION = "0.1.9";
1687
1687
  var recentRuns = [];
1688
1688
  var MAX_RECENT_RUNS = 100;
1689
1689
  var modelCounts = {};
1690
+ var tokenStats = {};
1691
+ var MODEL_PRICING2 = {
1692
+ // Anthropic
1693
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1694
+ "claude-3-5-haiku-20241022": { input: 1, output: 5 },
1695
+ "claude-3-5-haiku-latest": { input: 1, output: 5 },
1696
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1697
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
1698
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1699
+ "claude-opus-4-5-20250514": { input: 15, output: 75 },
1700
+ // OpenAI
1701
+ "gpt-4o": { input: 2.5, output: 10 },
1702
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1703
+ "gpt-4-turbo": { input: 10, output: 30 },
1704
+ // Defaults for unknown models
1705
+ "default-cheap": { input: 1, output: 5 },
1706
+ "default-expensive": { input: 15, output: 75 }
1707
+ };
1708
+ function trackTokens(model, inputTokens, outputTokens) {
1709
+ if (!tokenStats[model]) {
1710
+ tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
1711
+ }
1712
+ tokenStats[model].inputTokens += inputTokens;
1713
+ tokenStats[model].outputTokens += outputTokens;
1714
+ tokenStats[model].requests += 1;
1715
+ }
1716
+ function calculateCosts() {
1717
+ let totalInputTokens = 0;
1718
+ let totalOutputTokens = 0;
1719
+ let actualCostUsd = 0;
1720
+ const byModel = {};
1721
+ for (const [model, stats] of Object.entries(tokenStats)) {
1722
+ totalInputTokens += stats.inputTokens;
1723
+ totalOutputTokens += stats.outputTokens;
1724
+ const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
1725
+ const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
1726
+ actualCostUsd += cost;
1727
+ byModel[model] = {
1728
+ inputTokens: stats.inputTokens,
1729
+ outputTokens: stats.outputTokens,
1730
+ costUsd: parseFloat(cost.toFixed(4))
1731
+ };
1732
+ }
1733
+ const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
1734
+ const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
1735
+ const savingsUsd = opusCostUsd - actualCostUsd;
1736
+ const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
1737
+ return {
1738
+ totalInputTokens,
1739
+ totalOutputTokens,
1740
+ actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
1741
+ opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
1742
+ savingsUsd: parseFloat(savingsUsd.toFixed(4)),
1743
+ savingsPercent,
1744
+ byModel
1745
+ };
1746
+ }
1690
1747
  var serverStartTime = 0;
1691
1748
  var currentConfig = loadConfig();
1692
1749
  var DEFAULT_ENDPOINTS = {
@@ -2314,6 +2371,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
2314
2371
  return null;
2315
2372
  }
2316
2373
  }
2374
+ var lastStreamingUsage = null;
2317
2375
  async function* convertAnthropicStream(response, model) {
2318
2376
  const reader = response.body?.getReader();
2319
2377
  if (!reader) {
@@ -2326,6 +2384,8 @@ async function* convertAnthropicStream(response, model) {
2326
2384
  currentToolIndex: 0,
2327
2385
  tools: /* @__PURE__ */ new Map()
2328
2386
  };
2387
+ let streamInputTokens = 0;
2388
+ let streamOutputTokens = 0;
2329
2389
  try {
2330
2390
  while (true) {
2331
2391
  const { done, value } = await reader.read();
@@ -2343,6 +2403,17 @@ async function* convertAnthropicStream(response, model) {
2343
2403
  } else if (line === "" && eventType && eventData) {
2344
2404
  try {
2345
2405
  const parsed = JSON.parse(eventData);
2406
+ if (eventType === "message_start") {
2407
+ const msg = parsed["message"];
2408
+ if (msg?.usage?.input_tokens) {
2409
+ streamInputTokens = msg.usage.input_tokens;
2410
+ }
2411
+ } else if (eventType === "message_delta") {
2412
+ const usage = parsed["usage"];
2413
+ if (usage?.output_tokens) {
2414
+ streamOutputTokens = usage.output_tokens;
2415
+ }
2416
+ }
2346
2417
  const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
2347
2418
  if (converted) {
2348
2419
  yield converted;
@@ -2354,6 +2425,7 @@ async function* convertAnthropicStream(response, model) {
2354
2425
  }
2355
2426
  }
2356
2427
  }
2428
+ lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
2357
2429
  } finally {
2358
2430
  reader.releaseLock();
2359
2431
  }
@@ -2451,23 +2523,32 @@ async function startProxy(config = {}) {
2451
2523
  }
2452
2524
  if (req.method === "GET" && pathname === "/stats") {
2453
2525
  const stats = relay.stats();
2454
- const savings = relay.savingsReport(30);
2526
+ const costs = calculateCosts();
2455
2527
  const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2456
2528
  const modelDistribution = {};
2457
2529
  for (const [model, count] of Object.entries(modelCounts)) {
2530
+ const modelName = model.split("/")[1] || model;
2531
+ const tokenData = costs.byModel[modelName];
2458
2532
  modelDistribution[model] = {
2459
2533
  count,
2460
- percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2534
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
2535
+ tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
2536
+ costUsd: tokenData?.costUsd
2461
2537
  };
2462
2538
  }
2463
2539
  res.writeHead(200, { "Content-Type": "application/json" });
2464
2540
  res.end(JSON.stringify({
2465
2541
  totalRuns,
2466
- savings: {
2467
- estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2468
- actualCostUsd: savings.actualCost.toFixed(4),
2469
- baselineCostUsd: savings.baselineCost.toFixed(4),
2470
- savedUsd: savings.savings.toFixed(4)
2542
+ tokens: {
2543
+ input: costs.totalInputTokens,
2544
+ output: costs.totalOutputTokens,
2545
+ total: costs.totalInputTokens + costs.totalOutputTokens
2546
+ },
2547
+ costs: {
2548
+ actualUsd: costs.actualCostUsd,
2549
+ opusBaselineUsd: costs.opusCostUsd,
2550
+ savingsUsd: costs.savingsUsd,
2551
+ savingsPercent: costs.savingsPercent
2471
2552
  },
2472
2553
  modelDistribution,
2473
2554
  byTaskType: stats.byTaskType,
@@ -2723,6 +2804,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2723
2804
  const durationMs = Date.now() - startTime;
2724
2805
  const modelKey = `${targetProvider}/${targetModel}`;
2725
2806
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2807
+ if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
2808
+ trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
2809
+ log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
2810
+ lastStreamingUsage = null;
2811
+ }
2726
2812
  relay.run({
2727
2813
  prompt: promptText.slice(0, 500),
2728
2814
  taskType,
@@ -2814,6 +2900,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2814
2900
  const durationMs = Date.now() - startTime;
2815
2901
  const modelKey = `${targetProvider}/${targetModel}`;
2816
2902
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2903
+ const usage = responseData["usage"];
2904
+ if (usage?.prompt_tokens || usage?.completion_tokens) {
2905
+ trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
2906
+ log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
2907
+ }
2817
2908
  try {
2818
2909
  const runResult = await relay.run({
2819
2910
  prompt: promptText.slice(0, 500),