@relayplane/proxy 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -43
- package/dist/cli.js +158 -23
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +158 -23
- package/dist/cli.mjs.map +1 -1
- package/dist/index.d.mts +23 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.js +158 -23
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +158 -23
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -1071,6 +1071,19 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
1071
1071
|
qualityModel?: string | undefined;
|
|
1072
1072
|
costModel?: string | undefined;
|
|
1073
1073
|
}>>;
|
|
1074
|
+
auth: z.ZodOptional<z.ZodObject<{
|
|
1075
|
+
anthropicApiKey: z.ZodOptional<z.ZodString>;
|
|
1076
|
+
anthropicMaxToken: z.ZodOptional<z.ZodString>;
|
|
1077
|
+
useMaxForModels: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
1078
|
+
}, "strip", z.ZodTypeAny, {
|
|
1079
|
+
anthropicApiKey?: string | undefined;
|
|
1080
|
+
anthropicMaxToken?: string | undefined;
|
|
1081
|
+
useMaxForModels?: string[] | undefined;
|
|
1082
|
+
}, {
|
|
1083
|
+
anthropicApiKey?: string | undefined;
|
|
1084
|
+
anthropicMaxToken?: string | undefined;
|
|
1085
|
+
useMaxForModels?: string[] | undefined;
|
|
1086
|
+
}>>;
|
|
1074
1087
|
}, "strip", z.ZodTypeAny, {
|
|
1075
1088
|
strategies?: Record<string, {
|
|
1076
1089
|
model: string;
|
|
@@ -1081,6 +1094,11 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
1081
1094
|
qualityModel?: string | undefined;
|
|
1082
1095
|
costModel?: string | undefined;
|
|
1083
1096
|
} | undefined;
|
|
1097
|
+
auth?: {
|
|
1098
|
+
anthropicApiKey?: string | undefined;
|
|
1099
|
+
anthropicMaxToken?: string | undefined;
|
|
1100
|
+
useMaxForModels?: string[] | undefined;
|
|
1101
|
+
} | undefined;
|
|
1084
1102
|
}, {
|
|
1085
1103
|
strategies?: Record<string, {
|
|
1086
1104
|
model: string;
|
|
@@ -1091,6 +1109,11 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
1091
1109
|
qualityModel?: string | undefined;
|
|
1092
1110
|
costModel?: string | undefined;
|
|
1093
1111
|
} | undefined;
|
|
1112
|
+
auth?: {
|
|
1113
|
+
anthropicApiKey?: string | undefined;
|
|
1114
|
+
anthropicMaxToken?: string | undefined;
|
|
1115
|
+
useMaxForModels?: string[] | undefined;
|
|
1116
|
+
} | undefined;
|
|
1094
1117
|
}>;
|
|
1095
1118
|
type StrategyConfig = z.infer<typeof StrategySchema>;
|
|
1096
1119
|
type Config = z.infer<typeof ConfigSchema>;
|
package/dist/index.d.ts
CHANGED
|
@@ -1071,6 +1071,19 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
1071
1071
|
qualityModel?: string | undefined;
|
|
1072
1072
|
costModel?: string | undefined;
|
|
1073
1073
|
}>>;
|
|
1074
|
+
auth: z.ZodOptional<z.ZodObject<{
|
|
1075
|
+
anthropicApiKey: z.ZodOptional<z.ZodString>;
|
|
1076
|
+
anthropicMaxToken: z.ZodOptional<z.ZodString>;
|
|
1077
|
+
useMaxForModels: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
1078
|
+
}, "strip", z.ZodTypeAny, {
|
|
1079
|
+
anthropicApiKey?: string | undefined;
|
|
1080
|
+
anthropicMaxToken?: string | undefined;
|
|
1081
|
+
useMaxForModels?: string[] | undefined;
|
|
1082
|
+
}, {
|
|
1083
|
+
anthropicApiKey?: string | undefined;
|
|
1084
|
+
anthropicMaxToken?: string | undefined;
|
|
1085
|
+
useMaxForModels?: string[] | undefined;
|
|
1086
|
+
}>>;
|
|
1074
1087
|
}, "strip", z.ZodTypeAny, {
|
|
1075
1088
|
strategies?: Record<string, {
|
|
1076
1089
|
model: string;
|
|
@@ -1081,6 +1094,11 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
1081
1094
|
qualityModel?: string | undefined;
|
|
1082
1095
|
costModel?: string | undefined;
|
|
1083
1096
|
} | undefined;
|
|
1097
|
+
auth?: {
|
|
1098
|
+
anthropicApiKey?: string | undefined;
|
|
1099
|
+
anthropicMaxToken?: string | undefined;
|
|
1100
|
+
useMaxForModels?: string[] | undefined;
|
|
1101
|
+
} | undefined;
|
|
1084
1102
|
}, {
|
|
1085
1103
|
strategies?: Record<string, {
|
|
1086
1104
|
model: string;
|
|
@@ -1091,6 +1109,11 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
1091
1109
|
qualityModel?: string | undefined;
|
|
1092
1110
|
costModel?: string | undefined;
|
|
1093
1111
|
} | undefined;
|
|
1112
|
+
auth?: {
|
|
1113
|
+
anthropicApiKey?: string | undefined;
|
|
1114
|
+
anthropicMaxToken?: string | undefined;
|
|
1115
|
+
useMaxForModels?: string[] | undefined;
|
|
1116
|
+
} | undefined;
|
|
1094
1117
|
}>;
|
|
1095
1118
|
type StrategyConfig = z.infer<typeof StrategySchema>;
|
|
1096
1119
|
type Config = z.infer<typeof ConfigSchema>;
|
package/dist/index.js
CHANGED
|
@@ -1638,12 +1638,19 @@ var StrategySchema = import_zod.z.object({
|
|
|
1638
1638
|
minConfidence: import_zod.z.number().min(0).max(1).optional(),
|
|
1639
1639
|
fallback: import_zod.z.string().optional()
|
|
1640
1640
|
});
|
|
1641
|
+
var AuthSchema = import_zod.z.object({
|
|
1642
|
+
anthropicApiKey: import_zod.z.string().optional(),
|
|
1643
|
+
anthropicMaxToken: import_zod.z.string().optional(),
|
|
1644
|
+
useMaxForModels: import_zod.z.array(import_zod.z.string()).optional()
|
|
1645
|
+
// Default: ['opus']
|
|
1646
|
+
}).optional();
|
|
1641
1647
|
var ConfigSchema = import_zod.z.object({
|
|
1642
1648
|
strategies: import_zod.z.record(import_zod.z.string(), StrategySchema).optional(),
|
|
1643
1649
|
defaults: import_zod.z.object({
|
|
1644
1650
|
qualityModel: import_zod.z.string().optional(),
|
|
1645
1651
|
costModel: import_zod.z.string().optional()
|
|
1646
|
-
}).optional()
|
|
1652
|
+
}).optional(),
|
|
1653
|
+
auth: AuthSchema
|
|
1647
1654
|
});
|
|
1648
1655
|
var DEFAULT_CONFIG = {
|
|
1649
1656
|
strategies: {
|
|
@@ -1703,6 +1710,19 @@ function loadConfig() {
|
|
|
1703
1710
|
function getStrategy(config, taskType) {
|
|
1704
1711
|
return config.strategies?.[taskType] ?? null;
|
|
1705
1712
|
}
|
|
1713
|
+
function getAnthropicAuth(config, model) {
|
|
1714
|
+
const auth = config.auth;
|
|
1715
|
+
const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
|
|
1716
|
+
const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
|
|
1717
|
+
if (shouldUseMax && auth?.anthropicMaxToken) {
|
|
1718
|
+
return { type: "max", value: auth.anthropicMaxToken };
|
|
1719
|
+
}
|
|
1720
|
+
const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
|
|
1721
|
+
if (apiKey) {
|
|
1722
|
+
return { type: "apiKey", value: apiKey };
|
|
1723
|
+
}
|
|
1724
|
+
return null;
|
|
1725
|
+
}
|
|
1706
1726
|
function watchConfig(onChange) {
|
|
1707
1727
|
const configPath = getConfigPath();
|
|
1708
1728
|
const dir = path2.dirname(configPath);
|
|
@@ -1723,10 +1743,67 @@ function watchConfig(onChange) {
|
|
|
1723
1743
|
}
|
|
1724
1744
|
|
|
1725
1745
|
// src/proxy.ts
|
|
1726
|
-
var VERSION = "0.1.
|
|
1746
|
+
var VERSION = "0.1.9";
|
|
1727
1747
|
var recentRuns = [];
|
|
1728
1748
|
var MAX_RECENT_RUNS = 100;
|
|
1729
1749
|
var modelCounts = {};
|
|
1750
|
+
var tokenStats = {};
|
|
1751
|
+
var MODEL_PRICING2 = {
|
|
1752
|
+
// Anthropic
|
|
1753
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1754
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
1755
|
+
"claude-3-5-haiku-latest": { input: 1, output: 5 },
|
|
1756
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1757
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1758
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1759
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1760
|
+
// OpenAI
|
|
1761
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1762
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1763
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1764
|
+
// Defaults for unknown models
|
|
1765
|
+
"default-cheap": { input: 1, output: 5 },
|
|
1766
|
+
"default-expensive": { input: 15, output: 75 }
|
|
1767
|
+
};
|
|
1768
|
+
function trackTokens(model, inputTokens, outputTokens) {
|
|
1769
|
+
if (!tokenStats[model]) {
|
|
1770
|
+
tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
|
|
1771
|
+
}
|
|
1772
|
+
tokenStats[model].inputTokens += inputTokens;
|
|
1773
|
+
tokenStats[model].outputTokens += outputTokens;
|
|
1774
|
+
tokenStats[model].requests += 1;
|
|
1775
|
+
}
|
|
1776
|
+
function calculateCosts() {
|
|
1777
|
+
let totalInputTokens = 0;
|
|
1778
|
+
let totalOutputTokens = 0;
|
|
1779
|
+
let actualCostUsd = 0;
|
|
1780
|
+
const byModel = {};
|
|
1781
|
+
for (const [model, stats] of Object.entries(tokenStats)) {
|
|
1782
|
+
totalInputTokens += stats.inputTokens;
|
|
1783
|
+
totalOutputTokens += stats.outputTokens;
|
|
1784
|
+
const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
|
|
1785
|
+
const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
|
|
1786
|
+
actualCostUsd += cost;
|
|
1787
|
+
byModel[model] = {
|
|
1788
|
+
inputTokens: stats.inputTokens,
|
|
1789
|
+
outputTokens: stats.outputTokens,
|
|
1790
|
+
costUsd: parseFloat(cost.toFixed(4))
|
|
1791
|
+
};
|
|
1792
|
+
}
|
|
1793
|
+
const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
|
|
1794
|
+
const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
|
|
1795
|
+
const savingsUsd = opusCostUsd - actualCostUsd;
|
|
1796
|
+
const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
|
|
1797
|
+
return {
|
|
1798
|
+
totalInputTokens,
|
|
1799
|
+
totalOutputTokens,
|
|
1800
|
+
actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
|
|
1801
|
+
opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
|
|
1802
|
+
savingsUsd: parseFloat(savingsUsd.toFixed(4)),
|
|
1803
|
+
savingsPercent,
|
|
1804
|
+
byModel
|
|
1805
|
+
};
|
|
1806
|
+
}
|
|
1730
1807
|
var serverStartTime = 0;
|
|
1731
1808
|
var currentConfig = loadConfig();
|
|
1732
1809
|
var DEFAULT_ENDPOINTS = {
|
|
@@ -1791,13 +1868,17 @@ function extractPromptText(messages) {
|
|
|
1791
1868
|
return "";
|
|
1792
1869
|
}).join("\n");
|
|
1793
1870
|
}
|
|
1794
|
-
async function forwardToAnthropic(request, targetModel,
|
|
1871
|
+
async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
|
|
1795
1872
|
const anthropicBody = buildAnthropicBody(request, targetModel, false);
|
|
1796
1873
|
const headers = {
|
|
1797
1874
|
"Content-Type": "application/json",
|
|
1798
|
-
"x-api-key": apiKey,
|
|
1799
1875
|
"anthropic-version": "2023-06-01"
|
|
1800
1876
|
};
|
|
1877
|
+
if (auth.type === "max") {
|
|
1878
|
+
headers["Authorization"] = `Bearer ${auth.value}`;
|
|
1879
|
+
} else {
|
|
1880
|
+
headers["x-api-key"] = auth.value;
|
|
1881
|
+
}
|
|
1801
1882
|
if (betaHeaders) {
|
|
1802
1883
|
headers["anthropic-beta"] = betaHeaders;
|
|
1803
1884
|
}
|
|
@@ -1808,13 +1889,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
|
|
|
1808
1889
|
});
|
|
1809
1890
|
return response;
|
|
1810
1891
|
}
|
|
1811
|
-
async function forwardToAnthropicStream(request, targetModel,
|
|
1892
|
+
async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
|
|
1812
1893
|
const anthropicBody = buildAnthropicBody(request, targetModel, true);
|
|
1813
1894
|
const headers = {
|
|
1814
1895
|
"Content-Type": "application/json",
|
|
1815
|
-
"x-api-key": apiKey,
|
|
1816
1896
|
"anthropic-version": "2023-06-01"
|
|
1817
1897
|
};
|
|
1898
|
+
if (auth.type === "max") {
|
|
1899
|
+
headers["Authorization"] = `Bearer ${auth.value}`;
|
|
1900
|
+
} else {
|
|
1901
|
+
headers["x-api-key"] = auth.value;
|
|
1902
|
+
}
|
|
1818
1903
|
if (betaHeaders) {
|
|
1819
1904
|
headers["anthropic-beta"] = betaHeaders;
|
|
1820
1905
|
}
|
|
@@ -2346,6 +2431,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
2346
2431
|
return null;
|
|
2347
2432
|
}
|
|
2348
2433
|
}
|
|
2434
|
+
var lastStreamingUsage = null;
|
|
2349
2435
|
async function* convertAnthropicStream(response, model) {
|
|
2350
2436
|
const reader = response.body?.getReader();
|
|
2351
2437
|
if (!reader) {
|
|
@@ -2358,6 +2444,8 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2358
2444
|
currentToolIndex: 0,
|
|
2359
2445
|
tools: /* @__PURE__ */ new Map()
|
|
2360
2446
|
};
|
|
2447
|
+
let streamInputTokens = 0;
|
|
2448
|
+
let streamOutputTokens = 0;
|
|
2361
2449
|
try {
|
|
2362
2450
|
while (true) {
|
|
2363
2451
|
const { done, value } = await reader.read();
|
|
@@ -2375,6 +2463,17 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2375
2463
|
} else if (line === "" && eventType && eventData) {
|
|
2376
2464
|
try {
|
|
2377
2465
|
const parsed = JSON.parse(eventData);
|
|
2466
|
+
if (eventType === "message_start") {
|
|
2467
|
+
const msg = parsed["message"];
|
|
2468
|
+
if (msg?.usage?.input_tokens) {
|
|
2469
|
+
streamInputTokens = msg.usage.input_tokens;
|
|
2470
|
+
}
|
|
2471
|
+
} else if (eventType === "message_delta") {
|
|
2472
|
+
const usage = parsed["usage"];
|
|
2473
|
+
if (usage?.output_tokens) {
|
|
2474
|
+
streamOutputTokens = usage.output_tokens;
|
|
2475
|
+
}
|
|
2476
|
+
}
|
|
2378
2477
|
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2379
2478
|
if (converted) {
|
|
2380
2479
|
yield converted;
|
|
@@ -2386,6 +2485,7 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2386
2485
|
}
|
|
2387
2486
|
}
|
|
2388
2487
|
}
|
|
2488
|
+
lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
|
|
2389
2489
|
} finally {
|
|
2390
2490
|
reader.releaseLock();
|
|
2391
2491
|
}
|
|
@@ -2483,23 +2583,32 @@ async function startProxy(config = {}) {
|
|
|
2483
2583
|
}
|
|
2484
2584
|
if (req.method === "GET" && pathname === "/stats") {
|
|
2485
2585
|
const stats = relay.stats();
|
|
2486
|
-
const
|
|
2586
|
+
const costs = calculateCosts();
|
|
2487
2587
|
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2488
2588
|
const modelDistribution = {};
|
|
2489
2589
|
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2590
|
+
const modelName = model.split("/")[1] || model;
|
|
2591
|
+
const tokenData = costs.byModel[modelName];
|
|
2490
2592
|
modelDistribution[model] = {
|
|
2491
2593
|
count,
|
|
2492
|
-
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2594
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
|
|
2595
|
+
tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
|
|
2596
|
+
costUsd: tokenData?.costUsd
|
|
2493
2597
|
};
|
|
2494
2598
|
}
|
|
2495
2599
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2496
2600
|
res.end(JSON.stringify({
|
|
2497
2601
|
totalRuns,
|
|
2498
|
-
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
|
|
2602
|
+
tokens: {
|
|
2603
|
+
input: costs.totalInputTokens,
|
|
2604
|
+
output: costs.totalOutputTokens,
|
|
2605
|
+
total: costs.totalInputTokens + costs.totalOutputTokens
|
|
2606
|
+
},
|
|
2607
|
+
costs: {
|
|
2608
|
+
actualUsd: costs.actualCostUsd,
|
|
2609
|
+
opusBaselineUsd: costs.opusCostUsd,
|
|
2610
|
+
savingsUsd: costs.savingsUsd,
|
|
2611
|
+
savingsPercent: costs.savingsPercent
|
|
2503
2612
|
},
|
|
2504
2613
|
modelDistribution,
|
|
2505
2614
|
byTaskType: stats.byTaskType,
|
|
@@ -2620,12 +2729,24 @@ async function startProxy(config = {}) {
|
|
|
2620
2729
|
}
|
|
2621
2730
|
}
|
|
2622
2731
|
log(`Routing to: ${targetProvider}/${targetModel}`);
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
if (
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2732
|
+
let apiKey;
|
|
2733
|
+
let anthropicAuth = null;
|
|
2734
|
+
if (targetProvider === "anthropic") {
|
|
2735
|
+
anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
|
|
2736
|
+
if (!anthropicAuth) {
|
|
2737
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2738
|
+
res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
|
|
2739
|
+
return;
|
|
2740
|
+
}
|
|
2741
|
+
log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
|
|
2742
|
+
} else {
|
|
2743
|
+
const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
|
|
2744
|
+
apiKey = process.env[apiKeyEnv];
|
|
2745
|
+
if (!apiKey) {
|
|
2746
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2747
|
+
res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
|
|
2748
|
+
return;
|
|
2749
|
+
}
|
|
2629
2750
|
}
|
|
2630
2751
|
const startTime = Date.now();
|
|
2631
2752
|
const betaHeaders = req.headers["anthropic-beta"];
|
|
@@ -2636,6 +2757,7 @@ async function startProxy(config = {}) {
|
|
|
2636
2757
|
targetProvider,
|
|
2637
2758
|
targetModel,
|
|
2638
2759
|
apiKey,
|
|
2760
|
+
anthropicAuth,
|
|
2639
2761
|
relay,
|
|
2640
2762
|
promptText,
|
|
2641
2763
|
taskType,
|
|
@@ -2652,6 +2774,7 @@ async function startProxy(config = {}) {
|
|
|
2652
2774
|
targetProvider,
|
|
2653
2775
|
targetModel,
|
|
2654
2776
|
apiKey,
|
|
2777
|
+
anthropicAuth,
|
|
2655
2778
|
relay,
|
|
2656
2779
|
promptText,
|
|
2657
2780
|
taskType,
|
|
@@ -2681,12 +2804,13 @@ async function startProxy(config = {}) {
|
|
|
2681
2804
|
});
|
|
2682
2805
|
});
|
|
2683
2806
|
}
|
|
2684
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2807
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2685
2808
|
let providerResponse;
|
|
2686
2809
|
try {
|
|
2687
2810
|
switch (targetProvider) {
|
|
2688
2811
|
case "anthropic":
|
|
2689
|
-
|
|
2812
|
+
if (!anthropicAuth) throw new Error("No Anthropic auth");
|
|
2813
|
+
providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
|
|
2690
2814
|
break;
|
|
2691
2815
|
case "google":
|
|
2692
2816
|
providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
|
|
@@ -2740,6 +2864,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2740
2864
|
const durationMs = Date.now() - startTime;
|
|
2741
2865
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2742
2866
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2867
|
+
if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
|
|
2868
|
+
trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
|
|
2869
|
+
log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
|
|
2870
|
+
lastStreamingUsage = null;
|
|
2871
|
+
}
|
|
2743
2872
|
relay.run({
|
|
2744
2873
|
prompt: promptText.slice(0, 500),
|
|
2745
2874
|
taskType,
|
|
@@ -2764,13 +2893,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2764
2893
|
});
|
|
2765
2894
|
res.end();
|
|
2766
2895
|
}
|
|
2767
|
-
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2896
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2768
2897
|
let providerResponse;
|
|
2769
2898
|
let responseData;
|
|
2770
2899
|
try {
|
|
2771
2900
|
switch (targetProvider) {
|
|
2772
2901
|
case "anthropic": {
|
|
2773
|
-
|
|
2902
|
+
if (!anthropicAuth) throw new Error("No Anthropic auth");
|
|
2903
|
+
providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
|
|
2774
2904
|
const rawData = await providerResponse.json();
|
|
2775
2905
|
if (!providerResponse.ok) {
|
|
2776
2906
|
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
@@ -2830,6 +2960,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2830
2960
|
const durationMs = Date.now() - startTime;
|
|
2831
2961
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2832
2962
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2963
|
+
const usage = responseData["usage"];
|
|
2964
|
+
if (usage?.prompt_tokens || usage?.completion_tokens) {
|
|
2965
|
+
trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
|
|
2966
|
+
log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
|
|
2967
|
+
}
|
|
2833
2968
|
try {
|
|
2834
2969
|
const runResult = await relay.run({
|
|
2835
2970
|
prompt: promptText.slice(0, 500),
|