@relayplane/proxy 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -71
- package/dist/cli.js +98 -7
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +98 -7
- package/dist/cli.mjs.map +1 -1
- package/dist/index.js +98 -7
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +98 -7
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1689,6 +1689,63 @@ var VERSION = "0.1.9";
|
|
|
1689
1689
|
var recentRuns = [];
|
|
1690
1690
|
var MAX_RECENT_RUNS = 100;
|
|
1691
1691
|
var modelCounts = {};
|
|
1692
|
+
var tokenStats = {};
|
|
1693
|
+
var MODEL_PRICING2 = {
|
|
1694
|
+
// Anthropic
|
|
1695
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1696
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
1697
|
+
"claude-3-5-haiku-latest": { input: 1, output: 5 },
|
|
1698
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1699
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1700
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1701
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1702
|
+
// OpenAI
|
|
1703
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1704
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1705
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1706
|
+
// Defaults for unknown models
|
|
1707
|
+
"default-cheap": { input: 1, output: 5 },
|
|
1708
|
+
"default-expensive": { input: 15, output: 75 }
|
|
1709
|
+
};
|
|
1710
|
+
function trackTokens(model, inputTokens, outputTokens) {
|
|
1711
|
+
if (!tokenStats[model]) {
|
|
1712
|
+
tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
|
|
1713
|
+
}
|
|
1714
|
+
tokenStats[model].inputTokens += inputTokens;
|
|
1715
|
+
tokenStats[model].outputTokens += outputTokens;
|
|
1716
|
+
tokenStats[model].requests += 1;
|
|
1717
|
+
}
|
|
1718
|
+
function calculateCosts() {
|
|
1719
|
+
let totalInputTokens = 0;
|
|
1720
|
+
let totalOutputTokens = 0;
|
|
1721
|
+
let actualCostUsd = 0;
|
|
1722
|
+
const byModel = {};
|
|
1723
|
+
for (const [model, stats] of Object.entries(tokenStats)) {
|
|
1724
|
+
totalInputTokens += stats.inputTokens;
|
|
1725
|
+
totalOutputTokens += stats.outputTokens;
|
|
1726
|
+
const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
|
|
1727
|
+
const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
|
|
1728
|
+
actualCostUsd += cost;
|
|
1729
|
+
byModel[model] = {
|
|
1730
|
+
inputTokens: stats.inputTokens,
|
|
1731
|
+
outputTokens: stats.outputTokens,
|
|
1732
|
+
costUsd: parseFloat(cost.toFixed(4))
|
|
1733
|
+
};
|
|
1734
|
+
}
|
|
1735
|
+
const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
|
|
1736
|
+
const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
|
|
1737
|
+
const savingsUsd = opusCostUsd - actualCostUsd;
|
|
1738
|
+
const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
|
|
1739
|
+
return {
|
|
1740
|
+
totalInputTokens,
|
|
1741
|
+
totalOutputTokens,
|
|
1742
|
+
actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
|
|
1743
|
+
opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
|
|
1744
|
+
savingsUsd: parseFloat(savingsUsd.toFixed(4)),
|
|
1745
|
+
savingsPercent,
|
|
1746
|
+
byModel
|
|
1747
|
+
};
|
|
1748
|
+
}
|
|
1692
1749
|
var serverStartTime = 0;
|
|
1693
1750
|
var currentConfig = loadConfig();
|
|
1694
1751
|
var DEFAULT_ENDPOINTS = {
|
|
@@ -2316,6 +2373,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
2316
2373
|
return null;
|
|
2317
2374
|
}
|
|
2318
2375
|
}
|
|
2376
|
+
var lastStreamingUsage = null;
|
|
2319
2377
|
async function* convertAnthropicStream(response, model) {
|
|
2320
2378
|
const reader = response.body?.getReader();
|
|
2321
2379
|
if (!reader) {
|
|
@@ -2328,6 +2386,8 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2328
2386
|
currentToolIndex: 0,
|
|
2329
2387
|
tools: /* @__PURE__ */ new Map()
|
|
2330
2388
|
};
|
|
2389
|
+
let streamInputTokens = 0;
|
|
2390
|
+
let streamOutputTokens = 0;
|
|
2331
2391
|
try {
|
|
2332
2392
|
while (true) {
|
|
2333
2393
|
const { done, value } = await reader.read();
|
|
@@ -2345,6 +2405,17 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2345
2405
|
} else if (line === "" && eventType && eventData) {
|
|
2346
2406
|
try {
|
|
2347
2407
|
const parsed = JSON.parse(eventData);
|
|
2408
|
+
if (eventType === "message_start") {
|
|
2409
|
+
const msg = parsed["message"];
|
|
2410
|
+
if (msg?.usage?.input_tokens) {
|
|
2411
|
+
streamInputTokens = msg.usage.input_tokens;
|
|
2412
|
+
}
|
|
2413
|
+
} else if (eventType === "message_delta") {
|
|
2414
|
+
const usage = parsed["usage"];
|
|
2415
|
+
if (usage?.output_tokens) {
|
|
2416
|
+
streamOutputTokens = usage.output_tokens;
|
|
2417
|
+
}
|
|
2418
|
+
}
|
|
2348
2419
|
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2349
2420
|
if (converted) {
|
|
2350
2421
|
yield converted;
|
|
@@ -2356,6 +2427,7 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2356
2427
|
}
|
|
2357
2428
|
}
|
|
2358
2429
|
}
|
|
2430
|
+
lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
|
|
2359
2431
|
} finally {
|
|
2360
2432
|
reader.releaseLock();
|
|
2361
2433
|
}
|
|
@@ -2453,23 +2525,32 @@ async function startProxy(config = {}) {
|
|
|
2453
2525
|
}
|
|
2454
2526
|
if (req.method === "GET" && pathname === "/stats") {
|
|
2455
2527
|
const stats = relay.stats();
|
|
2456
|
-
const
|
|
2528
|
+
const costs = calculateCosts();
|
|
2457
2529
|
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2458
2530
|
const modelDistribution = {};
|
|
2459
2531
|
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2532
|
+
const modelName = model.split("/")[1] || model;
|
|
2533
|
+
const tokenData = costs.byModel[modelName];
|
|
2460
2534
|
modelDistribution[model] = {
|
|
2461
2535
|
count,
|
|
2462
|
-
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2536
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
|
|
2537
|
+
tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
|
|
2538
|
+
costUsd: tokenData?.costUsd
|
|
2463
2539
|
};
|
|
2464
2540
|
}
|
|
2465
2541
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2466
2542
|
res.end(JSON.stringify({
|
|
2467
2543
|
totalRuns,
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2544
|
+
tokens: {
|
|
2545
|
+
input: costs.totalInputTokens,
|
|
2546
|
+
output: costs.totalOutputTokens,
|
|
2547
|
+
total: costs.totalInputTokens + costs.totalOutputTokens
|
|
2548
|
+
},
|
|
2549
|
+
costs: {
|
|
2550
|
+
actualUsd: costs.actualCostUsd,
|
|
2551
|
+
opusBaselineUsd: costs.opusCostUsd,
|
|
2552
|
+
savingsUsd: costs.savingsUsd,
|
|
2553
|
+
savingsPercent: costs.savingsPercent
|
|
2473
2554
|
},
|
|
2474
2555
|
modelDistribution,
|
|
2475
2556
|
byTaskType: stats.byTaskType,
|
|
@@ -2725,6 +2806,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2725
2806
|
const durationMs = Date.now() - startTime;
|
|
2726
2807
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2727
2808
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2809
|
+
if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
|
|
2810
|
+
trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
|
|
2811
|
+
log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
|
|
2812
|
+
lastStreamingUsage = null;
|
|
2813
|
+
}
|
|
2728
2814
|
relay.run({
|
|
2729
2815
|
prompt: promptText.slice(0, 500),
|
|
2730
2816
|
taskType,
|
|
@@ -2816,6 +2902,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2816
2902
|
const durationMs = Date.now() - startTime;
|
|
2817
2903
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2818
2904
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2905
|
+
const usage = responseData["usage"];
|
|
2906
|
+
if (usage?.prompt_tokens || usage?.completion_tokens) {
|
|
2907
|
+
trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
|
|
2908
|
+
log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
|
|
2909
|
+
}
|
|
2819
2910
|
try {
|
|
2820
2911
|
const runResult = await relay.run({
|
|
2821
2912
|
prompt: promptText.slice(0, 500),
|