@relayplane/proxy 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1689,6 +1689,63 @@ var VERSION = "0.1.9";
1689
1689
  var recentRuns = [];
1690
1690
  var MAX_RECENT_RUNS = 100;
1691
1691
  var modelCounts = {};
1692
+ var tokenStats = {};
1693
+ var MODEL_PRICING2 = {
1694
+ // Anthropic
1695
+ "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
1696
+ "claude-3-5-haiku-20241022": { input: 1, output: 5 },
1697
+ "claude-3-5-haiku-latest": { input: 1, output: 5 },
1698
+ "claude-3-5-sonnet-20241022": { input: 3, output: 15 },
1699
+ "claude-sonnet-4-20250514": { input: 3, output: 15 },
1700
+ "claude-3-opus-20240229": { input: 15, output: 75 },
1701
+ "claude-opus-4-5-20250514": { input: 15, output: 75 },
1702
+ // OpenAI
1703
+ "gpt-4o": { input: 2.5, output: 10 },
1704
+ "gpt-4o-mini": { input: 0.15, output: 0.6 },
1705
+ "gpt-4-turbo": { input: 10, output: 30 },
1706
+ // Defaults for unknown models
1707
+ "default-cheap": { input: 1, output: 5 },
1708
+ "default-expensive": { input: 15, output: 75 }
1709
+ };
1710
+ function trackTokens(model, inputTokens, outputTokens) {
1711
+ if (!tokenStats[model]) {
1712
+ tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
1713
+ }
1714
+ tokenStats[model].inputTokens += inputTokens;
1715
+ tokenStats[model].outputTokens += outputTokens;
1716
+ tokenStats[model].requests += 1;
1717
+ }
1718
+ function calculateCosts() {
1719
+ let totalInputTokens = 0;
1720
+ let totalOutputTokens = 0;
1721
+ let actualCostUsd = 0;
1722
+ const byModel = {};
1723
+ for (const [model, stats] of Object.entries(tokenStats)) {
1724
+ totalInputTokens += stats.inputTokens;
1725
+ totalOutputTokens += stats.outputTokens;
1726
+ const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
1727
+ const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
1728
+ actualCostUsd += cost;
1729
+ byModel[model] = {
1730
+ inputTokens: stats.inputTokens,
1731
+ outputTokens: stats.outputTokens,
1732
+ costUsd: parseFloat(cost.toFixed(4))
1733
+ };
1734
+ }
1735
+ const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
1736
+ const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
1737
+ const savingsUsd = opusCostUsd - actualCostUsd;
1738
+ const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
1739
+ return {
1740
+ totalInputTokens,
1741
+ totalOutputTokens,
1742
+ actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
1743
+ opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
1744
+ savingsUsd: parseFloat(savingsUsd.toFixed(4)),
1745
+ savingsPercent,
1746
+ byModel
1747
+ };
1748
+ }
1692
1749
  var serverStartTime = 0;
1693
1750
  var currentConfig = loadConfig();
1694
1751
  var DEFAULT_ENDPOINTS = {
@@ -2316,6 +2373,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
2316
2373
  return null;
2317
2374
  }
2318
2375
  }
2376
+ var lastStreamingUsage = null;
2319
2377
  async function* convertAnthropicStream(response, model) {
2320
2378
  const reader = response.body?.getReader();
2321
2379
  if (!reader) {
@@ -2328,6 +2386,8 @@ async function* convertAnthropicStream(response, model) {
2328
2386
  currentToolIndex: 0,
2329
2387
  tools: /* @__PURE__ */ new Map()
2330
2388
  };
2389
+ let streamInputTokens = 0;
2390
+ let streamOutputTokens = 0;
2331
2391
  try {
2332
2392
  while (true) {
2333
2393
  const { done, value } = await reader.read();
@@ -2345,6 +2405,17 @@ async function* convertAnthropicStream(response, model) {
2345
2405
  } else if (line === "" && eventType && eventData) {
2346
2406
  try {
2347
2407
  const parsed = JSON.parse(eventData);
2408
+ if (eventType === "message_start") {
2409
+ const msg = parsed["message"];
2410
+ if (msg?.usage?.input_tokens) {
2411
+ streamInputTokens = msg.usage.input_tokens;
2412
+ }
2413
+ } else if (eventType === "message_delta") {
2414
+ const usage = parsed["usage"];
2415
+ if (usage?.output_tokens) {
2416
+ streamOutputTokens = usage.output_tokens;
2417
+ }
2418
+ }
2348
2419
  const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
2349
2420
  if (converted) {
2350
2421
  yield converted;
@@ -2356,6 +2427,7 @@ async function* convertAnthropicStream(response, model) {
2356
2427
  }
2357
2428
  }
2358
2429
  }
2430
+ lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
2359
2431
  } finally {
2360
2432
  reader.releaseLock();
2361
2433
  }
@@ -2453,23 +2525,32 @@ async function startProxy(config = {}) {
2453
2525
  }
2454
2526
  if (req.method === "GET" && pathname === "/stats") {
2455
2527
  const stats = relay.stats();
2456
- const savings = relay.savingsReport(30);
2528
+ const costs = calculateCosts();
2457
2529
  const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
2458
2530
  const modelDistribution = {};
2459
2531
  for (const [model, count] of Object.entries(modelCounts)) {
2532
+ const modelName = model.split("/")[1] || model;
2533
+ const tokenData = costs.byModel[modelName];
2460
2534
  modelDistribution[model] = {
2461
2535
  count,
2462
- percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
2536
+ percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
2537
+ tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
2538
+ costUsd: tokenData?.costUsd
2463
2539
  };
2464
2540
  }
2465
2541
  res.writeHead(200, { "Content-Type": "application/json" });
2466
2542
  res.end(JSON.stringify({
2467
2543
  totalRuns,
2468
- savings: {
2469
- estimatedSavingsPercent: savings.savingsPercent.toFixed(1) + "%",
2470
- actualCostUsd: savings.actualCost.toFixed(4),
2471
- baselineCostUsd: savings.baselineCost.toFixed(4),
2472
- savedUsd: savings.savings.toFixed(4)
2544
+ tokens: {
2545
+ input: costs.totalInputTokens,
2546
+ output: costs.totalOutputTokens,
2547
+ total: costs.totalInputTokens + costs.totalOutputTokens
2548
+ },
2549
+ costs: {
2550
+ actualUsd: costs.actualCostUsd,
2551
+ opusBaselineUsd: costs.opusCostUsd,
2552
+ savingsUsd: costs.savingsUsd,
2553
+ savingsPercent: costs.savingsPercent
2473
2554
  },
2474
2555
  modelDistribution,
2475
2556
  byTaskType: stats.byTaskType,
@@ -2725,6 +2806,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2725
2806
  const durationMs = Date.now() - startTime;
2726
2807
  const modelKey = `${targetProvider}/${targetModel}`;
2727
2808
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2809
+ if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
2810
+ trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
2811
+ log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
2812
+ lastStreamingUsage = null;
2813
+ }
2728
2814
  relay.run({
2729
2815
  prompt: promptText.slice(0, 500),
2730
2816
  taskType,
@@ -2816,6 +2902,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
2816
2902
  const durationMs = Date.now() - startTime;
2817
2903
  const modelKey = `${targetProvider}/${targetModel}`;
2818
2904
  modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
2905
+ const usage = responseData["usage"];
2906
+ if (usage?.prompt_tokens || usage?.completion_tokens) {
2907
+ trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
2908
+ log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
2909
+ }
2819
2910
  try {
2820
2911
  const runResult = await relay.run({
2821
2912
  prompt: promptText.slice(0, 500),