@relayplane/proxy 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -71
- package/dist/cli.js +98 -7
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +98 -7
- package/dist/cli.mjs.map +1 -1
- package/dist/index.js +98 -7
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +98 -7
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1747,6 +1747,63 @@ var VERSION = "0.1.9";
|
|
|
1747
1747
|
var recentRuns = [];
|
|
1748
1748
|
var MAX_RECENT_RUNS = 100;
|
|
1749
1749
|
var modelCounts = {};
|
|
1750
|
+
var tokenStats = {};
|
|
1751
|
+
var MODEL_PRICING2 = {
|
|
1752
|
+
// Anthropic
|
|
1753
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1754
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
1755
|
+
"claude-3-5-haiku-latest": { input: 1, output: 5 },
|
|
1756
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1757
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1758
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1759
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1760
|
+
// OpenAI
|
|
1761
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1762
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1763
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1764
|
+
// Defaults for unknown models
|
|
1765
|
+
"default-cheap": { input: 1, output: 5 },
|
|
1766
|
+
"default-expensive": { input: 15, output: 75 }
|
|
1767
|
+
};
|
|
1768
|
+
function trackTokens(model, inputTokens, outputTokens) {
|
|
1769
|
+
if (!tokenStats[model]) {
|
|
1770
|
+
tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
|
|
1771
|
+
}
|
|
1772
|
+
tokenStats[model].inputTokens += inputTokens;
|
|
1773
|
+
tokenStats[model].outputTokens += outputTokens;
|
|
1774
|
+
tokenStats[model].requests += 1;
|
|
1775
|
+
}
|
|
1776
|
+
function calculateCosts() {
|
|
1777
|
+
let totalInputTokens = 0;
|
|
1778
|
+
let totalOutputTokens = 0;
|
|
1779
|
+
let actualCostUsd = 0;
|
|
1780
|
+
const byModel = {};
|
|
1781
|
+
for (const [model, stats] of Object.entries(tokenStats)) {
|
|
1782
|
+
totalInputTokens += stats.inputTokens;
|
|
1783
|
+
totalOutputTokens += stats.outputTokens;
|
|
1784
|
+
const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
|
|
1785
|
+
const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
|
|
1786
|
+
actualCostUsd += cost;
|
|
1787
|
+
byModel[model] = {
|
|
1788
|
+
inputTokens: stats.inputTokens,
|
|
1789
|
+
outputTokens: stats.outputTokens,
|
|
1790
|
+
costUsd: parseFloat(cost.toFixed(4))
|
|
1791
|
+
};
|
|
1792
|
+
}
|
|
1793
|
+
const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
|
|
1794
|
+
const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
|
|
1795
|
+
const savingsUsd = opusCostUsd - actualCostUsd;
|
|
1796
|
+
const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
|
|
1797
|
+
return {
|
|
1798
|
+
totalInputTokens,
|
|
1799
|
+
totalOutputTokens,
|
|
1800
|
+
actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
|
|
1801
|
+
opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
|
|
1802
|
+
savingsUsd: parseFloat(savingsUsd.toFixed(4)),
|
|
1803
|
+
savingsPercent,
|
|
1804
|
+
byModel
|
|
1805
|
+
};
|
|
1806
|
+
}
|
|
1750
1807
|
var serverStartTime = 0;
|
|
1751
1808
|
var currentConfig = loadConfig();
|
|
1752
1809
|
var DEFAULT_ENDPOINTS = {
|
|
@@ -2374,6 +2431,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
2374
2431
|
return null;
|
|
2375
2432
|
}
|
|
2376
2433
|
}
|
|
2434
|
+
var lastStreamingUsage = null;
|
|
2377
2435
|
async function* convertAnthropicStream(response, model) {
|
|
2378
2436
|
const reader = response.body?.getReader();
|
|
2379
2437
|
if (!reader) {
|
|
@@ -2386,6 +2444,8 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2386
2444
|
currentToolIndex: 0,
|
|
2387
2445
|
tools: /* @__PURE__ */ new Map()
|
|
2388
2446
|
};
|
|
2447
|
+
let streamInputTokens = 0;
|
|
2448
|
+
let streamOutputTokens = 0;
|
|
2389
2449
|
try {
|
|
2390
2450
|
while (true) {
|
|
2391
2451
|
const { done, value } = await reader.read();
|
|
@@ -2403,6 +2463,17 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2403
2463
|
} else if (line === "" && eventType && eventData) {
|
|
2404
2464
|
try {
|
|
2405
2465
|
const parsed = JSON.parse(eventData);
|
|
2466
|
+
if (eventType === "message_start") {
|
|
2467
|
+
const msg = parsed["message"];
|
|
2468
|
+
if (msg?.usage?.input_tokens) {
|
|
2469
|
+
streamInputTokens = msg.usage.input_tokens;
|
|
2470
|
+
}
|
|
2471
|
+
} else if (eventType === "message_delta") {
|
|
2472
|
+
const usage = parsed["usage"];
|
|
2473
|
+
if (usage?.output_tokens) {
|
|
2474
|
+
streamOutputTokens = usage.output_tokens;
|
|
2475
|
+
}
|
|
2476
|
+
}
|
|
2406
2477
|
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2407
2478
|
if (converted) {
|
|
2408
2479
|
yield converted;
|
|
@@ -2414,6 +2485,7 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2414
2485
|
}
|
|
2415
2486
|
}
|
|
2416
2487
|
}
|
|
2488
|
+
lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
|
|
2417
2489
|
} finally {
|
|
2418
2490
|
reader.releaseLock();
|
|
2419
2491
|
}
|
|
@@ -2511,23 +2583,32 @@ async function startProxy(config = {}) {
|
|
|
2511
2583
|
}
|
|
2512
2584
|
if (req.method === "GET" && pathname === "/stats") {
|
|
2513
2585
|
const stats = relay.stats();
|
|
2514
|
-
const
|
|
2586
|
+
const costs = calculateCosts();
|
|
2515
2587
|
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2516
2588
|
const modelDistribution = {};
|
|
2517
2589
|
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2590
|
+
const modelName = model.split("/")[1] || model;
|
|
2591
|
+
const tokenData = costs.byModel[modelName];
|
|
2518
2592
|
modelDistribution[model] = {
|
|
2519
2593
|
count,
|
|
2520
|
-
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2594
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
|
|
2595
|
+
tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
|
|
2596
|
+
costUsd: tokenData?.costUsd
|
|
2521
2597
|
};
|
|
2522
2598
|
}
|
|
2523
2599
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2524
2600
|
res.end(JSON.stringify({
|
|
2525
2601
|
totalRuns,
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
2602
|
+
tokens: {
|
|
2603
|
+
input: costs.totalInputTokens,
|
|
2604
|
+
output: costs.totalOutputTokens,
|
|
2605
|
+
total: costs.totalInputTokens + costs.totalOutputTokens
|
|
2606
|
+
},
|
|
2607
|
+
costs: {
|
|
2608
|
+
actualUsd: costs.actualCostUsd,
|
|
2609
|
+
opusBaselineUsd: costs.opusCostUsd,
|
|
2610
|
+
savingsUsd: costs.savingsUsd,
|
|
2611
|
+
savingsPercent: costs.savingsPercent
|
|
2531
2612
|
},
|
|
2532
2613
|
modelDistribution,
|
|
2533
2614
|
byTaskType: stats.byTaskType,
|
|
@@ -2783,6 +2864,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2783
2864
|
const durationMs = Date.now() - startTime;
|
|
2784
2865
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2785
2866
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2867
|
+
if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
|
|
2868
|
+
trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
|
|
2869
|
+
log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
|
|
2870
|
+
lastStreamingUsage = null;
|
|
2871
|
+
}
|
|
2786
2872
|
relay.run({
|
|
2787
2873
|
prompt: promptText.slice(0, 500),
|
|
2788
2874
|
taskType,
|
|
@@ -2874,6 +2960,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2874
2960
|
const durationMs = Date.now() - startTime;
|
|
2875
2961
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2876
2962
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2963
|
+
const usage = responseData["usage"];
|
|
2964
|
+
if (usage?.prompt_tokens || usage?.completion_tokens) {
|
|
2965
|
+
trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
|
|
2966
|
+
log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
|
|
2967
|
+
}
|
|
2877
2968
|
try {
|
|
2878
2969
|
const runResult = await relay.run({
|
|
2879
2970
|
prompt: promptText.slice(0, 500),
|