@relayplane/proxy 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -43
- package/dist/cli.js +158 -23
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +158 -23
- package/dist/cli.mjs.map +1 -1
- package/dist/index.d.mts +23 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.js +158 -23
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +158 -23
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,6 +4,19 @@
|
|
|
4
4
|
|
|
5
5
|
Intelligent AI model routing that cuts costs by 50-80% while maintaining quality.
|
|
6
6
|
|
|
7
|
+
> **Note:** Designed for standard API key users (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). MAX subscription OAuth is not currently supported — MAX users should continue using their provider directly.
|
|
8
|
+
|
|
9
|
+
> ⚠️ **Cost Monitoring Required**
|
|
10
|
+
>
|
|
11
|
+
> RelayPlane routes requests to LLM providers using your API keys. **This incurs real costs.**
|
|
12
|
+
>
|
|
13
|
+
> - Set up billing alerts with your providers (Anthropic, OpenAI, etc.)
|
|
14
|
+
> - Monitor usage through your provider's dashboard
|
|
15
|
+
> - Use `/relayplane stats` or `curl localhost:3001/control/stats` to track usage
|
|
16
|
+
> - Start with test requests to understand routing behavior
|
|
17
|
+
>
|
|
18
|
+
> RelayPlane provides cost *optimization*, not cost *elimination*. You are responsible for monitoring your actual spending.
|
|
19
|
+
|
|
7
20
|
[](https://github.com/RelayPlane/proxy/actions/workflows/ci.yml)
|
|
8
21
|
[](https://www.npmjs.com/package/@relayplane/proxy)
|
|
9
22
|
[](https://opensource.org/licenses/MIT)
|
|
@@ -39,6 +52,23 @@ npx @relayplane/proxy stats --days 30
|
|
|
39
52
|
npx @relayplane/proxy --help
|
|
40
53
|
```
|
|
41
54
|
|
|
55
|
+
## OpenClaw Slash Commands
|
|
56
|
+
|
|
57
|
+
If you're using OpenClaw, these chat commands are available:
|
|
58
|
+
|
|
59
|
+
| Command | Description |
|
|
60
|
+
|---------|-------------|
|
|
61
|
+
| `/relayplane stats` | Show usage statistics and cost savings |
|
|
62
|
+
| `/relayplane status` | Show proxy health and configuration |
|
|
63
|
+
| `/relayplane switch <mode>` | Change routing mode (auto\|cost\|fast\|quality) |
|
|
64
|
+
| `/relayplane models` | List available routing models |
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
```
|
|
68
|
+
/relayplane stats
|
|
69
|
+
/relayplane switch cost
|
|
70
|
+
```
|
|
71
|
+
|
|
42
72
|
## Quick Start
|
|
43
73
|
|
|
44
74
|
### 1. Set your API keys
|
|
@@ -110,11 +140,11 @@ Unlike static routing rules, RelayPlane adapts to **your** usage patterns.
|
|
|
110
140
|
|
|
111
141
|
| Provider | Models | Streaming | Tools |
|
|
112
142
|
|----------|--------|-----------|-------|
|
|
113
|
-
| **Anthropic** | Claude
|
|
114
|
-
| **OpenAI** | GPT-
|
|
115
|
-
| **Google** | Gemini 2.0 Flash,
|
|
116
|
-
| **xAI** | Grok
|
|
117
|
-
| **Moonshot** | v1
|
|
143
|
+
| **Anthropic** | Claude 3.5 Haiku, Sonnet 4, Opus 4.5 | ✓ | ✓ |
|
|
144
|
+
| **OpenAI** | GPT-4o, GPT-4o-mini, GPT-4.1, o1, o3 | ✓ | ✓ |
|
|
145
|
+
| **Google** | Gemini 2.0 Flash, Gemini Pro | ✓ | ✓ |
|
|
146
|
+
| **xAI** | Grok (grok-*) | ✓ | ✓ |
|
|
147
|
+
| **Moonshot** | Moonshot v1 (8k, 32k, 128k) | ✓ | ✓ |
|
|
118
148
|
|
|
119
149
|
## Routing Modes
|
|
120
150
|
|
|
@@ -178,73 +208,144 @@ Options:
|
|
|
178
208
|
|
|
179
209
|
## REST API
|
|
180
210
|
|
|
181
|
-
The proxy exposes endpoints for stats and monitoring:
|
|
211
|
+
The proxy exposes control endpoints for stats and monitoring:
|
|
182
212
|
|
|
183
|
-
### `GET /
|
|
213
|
+
### `GET /control/status`
|
|
184
214
|
|
|
185
|
-
|
|
215
|
+
Proxy status and current configuration.
|
|
186
216
|
|
|
187
217
|
```bash
|
|
188
|
-
curl http://localhost:3001/
|
|
218
|
+
curl http://localhost:3001/control/status
|
|
189
219
|
```
|
|
190
220
|
|
|
191
221
|
```json
|
|
192
222
|
{
|
|
193
|
-
"
|
|
194
|
-
"
|
|
195
|
-
"
|
|
196
|
-
"providers": { "anthropic": true, "openai": true, "google": false },
|
|
197
|
-
"totalRuns": 142
|
|
223
|
+
"enabled": true,
|
|
224
|
+
"mode": "cascade",
|
|
225
|
+
"modelOverrides": {}
|
|
198
226
|
}
|
|
199
227
|
```
|
|
200
228
|
|
|
201
|
-
### `GET /stats`
|
|
229
|
+
### `GET /control/stats`
|
|
202
230
|
|
|
203
|
-
Aggregated statistics and
|
|
231
|
+
Aggregated statistics and routing counts.
|
|
204
232
|
|
|
205
233
|
```bash
|
|
206
|
-
curl http://localhost:3001/stats
|
|
234
|
+
curl http://localhost:3001/control/stats
|
|
207
235
|
```
|
|
208
236
|
|
|
209
237
|
```json
|
|
210
238
|
{
|
|
211
|
-
"
|
|
212
|
-
"
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
239
|
+
"uptimeMs": 3600000,
|
|
240
|
+
"uptimeFormatted": "60m 0s",
|
|
241
|
+
"totalRequests": 142,
|
|
242
|
+
"successfulRequests": 138,
|
|
243
|
+
"failedRequests": 4,
|
|
244
|
+
"successRate": "97.2%",
|
|
245
|
+
"avgLatencyMs": 1203,
|
|
246
|
+
"escalations": 12,
|
|
247
|
+
"routingCounts": {
|
|
248
|
+
"auto": 100,
|
|
249
|
+
"cost": 30,
|
|
250
|
+
"passthrough": 12
|
|
217
251
|
},
|
|
218
|
-
"
|
|
219
|
-
"anthropic/claude-3-5-haiku-latest":
|
|
220
|
-
"anthropic/claude-sonnet-4-20250514":
|
|
252
|
+
"modelCounts": {
|
|
253
|
+
"anthropic/claude-3-5-haiku-latest": 98,
|
|
254
|
+
"anthropic/claude-sonnet-4-20250514": 44
|
|
221
255
|
}
|
|
222
256
|
}
|
|
223
257
|
```
|
|
224
258
|
|
|
225
|
-
### `
|
|
259
|
+
### `POST /control/enable` / `POST /control/disable`
|
|
260
|
+
|
|
261
|
+
Enable or disable routing (passthrough mode when disabled).
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
curl -X POST http://localhost:3001/control/enable
|
|
265
|
+
curl -X POST http://localhost:3001/control/disable
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### `POST /control/config`
|
|
226
269
|
|
|
227
|
-
|
|
270
|
+
Update configuration (hot-reload, merges with existing).
|
|
228
271
|
|
|
229
272
|
```bash
|
|
230
|
-
curl
|
|
273
|
+
curl -X POST http://localhost:3001/control/config \
|
|
274
|
+
-H "Content-Type: application/json" \
|
|
275
|
+
-d '{"routing": {"mode": "cascade"}}'
|
|
231
276
|
```
|
|
232
277
|
|
|
278
|
+
## Configuration
|
|
279
|
+
|
|
280
|
+
RelayPlane creates a config file on first run at `~/.relayplane/config.json`:
|
|
281
|
+
|
|
233
282
|
```json
|
|
234
283
|
{
|
|
235
|
-
"
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
"
|
|
240
|
-
"
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
284
|
+
"enabled": true,
|
|
285
|
+
"routing": {
|
|
286
|
+
"mode": "cascade",
|
|
287
|
+
"cascade": {
|
|
288
|
+
"enabled": true,
|
|
289
|
+
"models": [
|
|
290
|
+
"claude-3-haiku-20240307",
|
|
291
|
+
"claude-3-5-sonnet-20241022",
|
|
292
|
+
"claude-3-opus-20240229"
|
|
293
|
+
],
|
|
294
|
+
"escalateOn": "uncertainty",
|
|
295
|
+
"maxEscalations": 1
|
|
296
|
+
},
|
|
297
|
+
"complexity": {
|
|
298
|
+
"enabled": true,
|
|
299
|
+
"simple": "claude-3-haiku-20240307",
|
|
300
|
+
"moderate": "claude-3-5-sonnet-20241022",
|
|
301
|
+
"complex": "claude-3-opus-20240229"
|
|
245
302
|
}
|
|
246
|
-
|
|
247
|
-
"
|
|
303
|
+
},
|
|
304
|
+
"reliability": {
|
|
305
|
+
"cooldowns": {
|
|
306
|
+
"enabled": true,
|
|
307
|
+
"allowedFails": 3,
|
|
308
|
+
"windowSeconds": 60,
|
|
309
|
+
"cooldownSeconds": 120
|
|
310
|
+
}
|
|
311
|
+
},
|
|
312
|
+
"modelOverrides": {}
|
|
313
|
+
}
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
**Edit and save — changes apply instantly** (hot-reload, no restart needed).
|
|
317
|
+
|
|
318
|
+
### Configuration Options
|
|
319
|
+
|
|
320
|
+
| Field | Description |
|
|
321
|
+
|-------|-------------|
|
|
322
|
+
| `enabled` | Enable/disable routing (false = passthrough mode) |
|
|
323
|
+
| `routing.mode` | `"cascade"` or `"standard"` |
|
|
324
|
+
| `routing.cascade.models` | Ordered list of models to try (cheapest first) |
|
|
325
|
+
| `routing.cascade.escalateOn` | When to escalate: `"uncertainty"`, `"refusal"`, or `"error"` |
|
|
326
|
+
| `routing.complexity.simple/moderate/complex` | Models for each complexity level |
|
|
327
|
+
| `reliability.cooldowns` | Auto-disable failing providers temporarily |
|
|
328
|
+
| `modelOverrides` | Map input model names to different targets |
|
|
329
|
+
|
|
330
|
+
### Examples
|
|
331
|
+
|
|
332
|
+
Use GPT-4o for complex tasks:
|
|
333
|
+
```json
|
|
334
|
+
{
|
|
335
|
+
"routing": {
|
|
336
|
+
"complexity": {
|
|
337
|
+
"complex": "gpt-4o"
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
Override a specific model:
|
|
344
|
+
```json
|
|
345
|
+
{
|
|
346
|
+
"modelOverrides": {
|
|
347
|
+
"claude-3-opus": "claude-3-5-sonnet-20241022"
|
|
348
|
+
}
|
|
248
349
|
}
|
|
249
350
|
```
|
|
250
351
|
|
|
@@ -262,9 +363,9 @@ sqlite3 ~/.relayplane/data.db "SELECT * FROM routing_rules"
|
|
|
262
363
|
|
|
263
364
|
## Links
|
|
264
365
|
|
|
265
|
-
- [
|
|
366
|
+
- [RelayPlane Proxy](https://relayplane.com/integrations/openclaw)
|
|
266
367
|
- [GitHub](https://github.com/RelayPlane/proxy)
|
|
267
|
-
- [RelayPlane
|
|
368
|
+
- [RelayPlane](https://relayplane.com/)
|
|
268
369
|
|
|
269
370
|
## License
|
|
270
371
|
|
package/dist/cli.js
CHANGED
|
@@ -1601,12 +1601,19 @@ var StrategySchema = import_zod.z.object({
|
|
|
1601
1601
|
minConfidence: import_zod.z.number().min(0).max(1).optional(),
|
|
1602
1602
|
fallback: import_zod.z.string().optional()
|
|
1603
1603
|
});
|
|
1604
|
+
var AuthSchema = import_zod.z.object({
|
|
1605
|
+
anthropicApiKey: import_zod.z.string().optional(),
|
|
1606
|
+
anthropicMaxToken: import_zod.z.string().optional(),
|
|
1607
|
+
useMaxForModels: import_zod.z.array(import_zod.z.string()).optional()
|
|
1608
|
+
// Default: ['opus']
|
|
1609
|
+
}).optional();
|
|
1604
1610
|
var ConfigSchema = import_zod.z.object({
|
|
1605
1611
|
strategies: import_zod.z.record(import_zod.z.string(), StrategySchema).optional(),
|
|
1606
1612
|
defaults: import_zod.z.object({
|
|
1607
1613
|
qualityModel: import_zod.z.string().optional(),
|
|
1608
1614
|
costModel: import_zod.z.string().optional()
|
|
1609
|
-
}).optional()
|
|
1615
|
+
}).optional(),
|
|
1616
|
+
auth: AuthSchema
|
|
1610
1617
|
});
|
|
1611
1618
|
var DEFAULT_CONFIG = {
|
|
1612
1619
|
strategies: {
|
|
@@ -1666,6 +1673,19 @@ function loadConfig() {
|
|
|
1666
1673
|
function getStrategy(config, taskType) {
|
|
1667
1674
|
return config.strategies?.[taskType] ?? null;
|
|
1668
1675
|
}
|
|
1676
|
+
function getAnthropicAuth(config, model) {
|
|
1677
|
+
const auth = config.auth;
|
|
1678
|
+
const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
|
|
1679
|
+
const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
|
|
1680
|
+
if (shouldUseMax && auth?.anthropicMaxToken) {
|
|
1681
|
+
return { type: "max", value: auth.anthropicMaxToken };
|
|
1682
|
+
}
|
|
1683
|
+
const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
|
|
1684
|
+
if (apiKey) {
|
|
1685
|
+
return { type: "apiKey", value: apiKey };
|
|
1686
|
+
}
|
|
1687
|
+
return null;
|
|
1688
|
+
}
|
|
1669
1689
|
function watchConfig(onChange) {
|
|
1670
1690
|
const configPath = getConfigPath();
|
|
1671
1691
|
const dir = path2.dirname(configPath);
|
|
@@ -1686,10 +1706,67 @@ function watchConfig(onChange) {
|
|
|
1686
1706
|
}
|
|
1687
1707
|
|
|
1688
1708
|
// src/proxy.ts
|
|
1689
|
-
var VERSION = "0.1.
|
|
1709
|
+
var VERSION = "0.1.9";
|
|
1690
1710
|
var recentRuns = [];
|
|
1691
1711
|
var MAX_RECENT_RUNS = 100;
|
|
1692
1712
|
var modelCounts = {};
|
|
1713
|
+
var tokenStats = {};
|
|
1714
|
+
var MODEL_PRICING2 = {
|
|
1715
|
+
// Anthropic
|
|
1716
|
+
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
|
|
1717
|
+
"claude-3-5-haiku-20241022": { input: 1, output: 5 },
|
|
1718
|
+
"claude-3-5-haiku-latest": { input: 1, output: 5 },
|
|
1719
|
+
"claude-3-5-sonnet-20241022": { input: 3, output: 15 },
|
|
1720
|
+
"claude-sonnet-4-20250514": { input: 3, output: 15 },
|
|
1721
|
+
"claude-3-opus-20240229": { input: 15, output: 75 },
|
|
1722
|
+
"claude-opus-4-5-20250514": { input: 15, output: 75 },
|
|
1723
|
+
// OpenAI
|
|
1724
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1725
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1726
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1727
|
+
// Defaults for unknown models
|
|
1728
|
+
"default-cheap": { input: 1, output: 5 },
|
|
1729
|
+
"default-expensive": { input: 15, output: 75 }
|
|
1730
|
+
};
|
|
1731
|
+
function trackTokens(model, inputTokens, outputTokens) {
|
|
1732
|
+
if (!tokenStats[model]) {
|
|
1733
|
+
tokenStats[model] = { inputTokens: 0, outputTokens: 0, requests: 0 };
|
|
1734
|
+
}
|
|
1735
|
+
tokenStats[model].inputTokens += inputTokens;
|
|
1736
|
+
tokenStats[model].outputTokens += outputTokens;
|
|
1737
|
+
tokenStats[model].requests += 1;
|
|
1738
|
+
}
|
|
1739
|
+
function calculateCosts() {
|
|
1740
|
+
let totalInputTokens = 0;
|
|
1741
|
+
let totalOutputTokens = 0;
|
|
1742
|
+
let actualCostUsd = 0;
|
|
1743
|
+
const byModel = {};
|
|
1744
|
+
for (const [model, stats] of Object.entries(tokenStats)) {
|
|
1745
|
+
totalInputTokens += stats.inputTokens;
|
|
1746
|
+
totalOutputTokens += stats.outputTokens;
|
|
1747
|
+
const pricing = MODEL_PRICING2[model] || MODEL_PRICING2["default-cheap"];
|
|
1748
|
+
const cost = stats.inputTokens / 1e6 * pricing.input + stats.outputTokens / 1e6 * pricing.output;
|
|
1749
|
+
actualCostUsd += cost;
|
|
1750
|
+
byModel[model] = {
|
|
1751
|
+
inputTokens: stats.inputTokens,
|
|
1752
|
+
outputTokens: stats.outputTokens,
|
|
1753
|
+
costUsd: parseFloat(cost.toFixed(4))
|
|
1754
|
+
};
|
|
1755
|
+
}
|
|
1756
|
+
const opusPricing = MODEL_PRICING2["claude-opus-4-5-20250514"];
|
|
1757
|
+
const opusCostUsd = totalInputTokens / 1e6 * opusPricing.input + totalOutputTokens / 1e6 * opusPricing.output;
|
|
1758
|
+
const savingsUsd = opusCostUsd - actualCostUsd;
|
|
1759
|
+
const savingsPercent = opusCostUsd > 0 ? (savingsUsd / opusCostUsd * 100).toFixed(1) + "%" : "0%";
|
|
1760
|
+
return {
|
|
1761
|
+
totalInputTokens,
|
|
1762
|
+
totalOutputTokens,
|
|
1763
|
+
actualCostUsd: parseFloat(actualCostUsd.toFixed(4)),
|
|
1764
|
+
opusCostUsd: parseFloat(opusCostUsd.toFixed(4)),
|
|
1765
|
+
savingsUsd: parseFloat(savingsUsd.toFixed(4)),
|
|
1766
|
+
savingsPercent,
|
|
1767
|
+
byModel
|
|
1768
|
+
};
|
|
1769
|
+
}
|
|
1693
1770
|
var serverStartTime = 0;
|
|
1694
1771
|
var currentConfig = loadConfig();
|
|
1695
1772
|
var DEFAULT_ENDPOINTS = {
|
|
@@ -1754,13 +1831,17 @@ function extractPromptText(messages) {
|
|
|
1754
1831
|
return "";
|
|
1755
1832
|
}).join("\n");
|
|
1756
1833
|
}
|
|
1757
|
-
async function forwardToAnthropic(request, targetModel,
|
|
1834
|
+
async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
|
|
1758
1835
|
const anthropicBody = buildAnthropicBody(request, targetModel, false);
|
|
1759
1836
|
const headers = {
|
|
1760
1837
|
"Content-Type": "application/json",
|
|
1761
|
-
"x-api-key": apiKey,
|
|
1762
1838
|
"anthropic-version": "2023-06-01"
|
|
1763
1839
|
};
|
|
1840
|
+
if (auth.type === "max") {
|
|
1841
|
+
headers["Authorization"] = `Bearer ${auth.value}`;
|
|
1842
|
+
} else {
|
|
1843
|
+
headers["x-api-key"] = auth.value;
|
|
1844
|
+
}
|
|
1764
1845
|
if (betaHeaders) {
|
|
1765
1846
|
headers["anthropic-beta"] = betaHeaders;
|
|
1766
1847
|
}
|
|
@@ -1771,13 +1852,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
|
|
|
1771
1852
|
});
|
|
1772
1853
|
return response;
|
|
1773
1854
|
}
|
|
1774
|
-
async function forwardToAnthropicStream(request, targetModel,
|
|
1855
|
+
async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
|
|
1775
1856
|
const anthropicBody = buildAnthropicBody(request, targetModel, true);
|
|
1776
1857
|
const headers = {
|
|
1777
1858
|
"Content-Type": "application/json",
|
|
1778
|
-
"x-api-key": apiKey,
|
|
1779
1859
|
"anthropic-version": "2023-06-01"
|
|
1780
1860
|
};
|
|
1861
|
+
if (auth.type === "max") {
|
|
1862
|
+
headers["Authorization"] = `Bearer ${auth.value}`;
|
|
1863
|
+
} else {
|
|
1864
|
+
headers["x-api-key"] = auth.value;
|
|
1865
|
+
}
|
|
1781
1866
|
if (betaHeaders) {
|
|
1782
1867
|
headers["anthropic-beta"] = betaHeaders;
|
|
1783
1868
|
}
|
|
@@ -2309,6 +2394,7 @@ function convertAnthropicStreamEvent(eventType, eventData, messageId, model, too
|
|
|
2309
2394
|
return null;
|
|
2310
2395
|
}
|
|
2311
2396
|
}
|
|
2397
|
+
var lastStreamingUsage = null;
|
|
2312
2398
|
async function* convertAnthropicStream(response, model) {
|
|
2313
2399
|
const reader = response.body?.getReader();
|
|
2314
2400
|
if (!reader) {
|
|
@@ -2321,6 +2407,8 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2321
2407
|
currentToolIndex: 0,
|
|
2322
2408
|
tools: /* @__PURE__ */ new Map()
|
|
2323
2409
|
};
|
|
2410
|
+
let streamInputTokens = 0;
|
|
2411
|
+
let streamOutputTokens = 0;
|
|
2324
2412
|
try {
|
|
2325
2413
|
while (true) {
|
|
2326
2414
|
const { done, value } = await reader.read();
|
|
@@ -2338,6 +2426,17 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2338
2426
|
} else if (line === "" && eventType && eventData) {
|
|
2339
2427
|
try {
|
|
2340
2428
|
const parsed = JSON.parse(eventData);
|
|
2429
|
+
if (eventType === "message_start") {
|
|
2430
|
+
const msg = parsed["message"];
|
|
2431
|
+
if (msg?.usage?.input_tokens) {
|
|
2432
|
+
streamInputTokens = msg.usage.input_tokens;
|
|
2433
|
+
}
|
|
2434
|
+
} else if (eventType === "message_delta") {
|
|
2435
|
+
const usage = parsed["usage"];
|
|
2436
|
+
if (usage?.output_tokens) {
|
|
2437
|
+
streamOutputTokens = usage.output_tokens;
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2341
2440
|
const converted = convertAnthropicStreamEvent(eventType, parsed, messageId, model, toolState);
|
|
2342
2441
|
if (converted) {
|
|
2343
2442
|
yield converted;
|
|
@@ -2349,6 +2448,7 @@ async function* convertAnthropicStream(response, model) {
|
|
|
2349
2448
|
}
|
|
2350
2449
|
}
|
|
2351
2450
|
}
|
|
2451
|
+
lastStreamingUsage = { inputTokens: streamInputTokens, outputTokens: streamOutputTokens };
|
|
2352
2452
|
} finally {
|
|
2353
2453
|
reader.releaseLock();
|
|
2354
2454
|
}
|
|
@@ -2446,23 +2546,32 @@ async function startProxy(config = {}) {
|
|
|
2446
2546
|
}
|
|
2447
2547
|
if (req.method === "GET" && pathname === "/stats") {
|
|
2448
2548
|
const stats = relay.stats();
|
|
2449
|
-
const
|
|
2549
|
+
const costs = calculateCosts();
|
|
2450
2550
|
const totalRuns = Object.values(modelCounts).reduce((a, b) => a + b, 0);
|
|
2451
2551
|
const modelDistribution = {};
|
|
2452
2552
|
for (const [model, count] of Object.entries(modelCounts)) {
|
|
2553
|
+
const modelName = model.split("/")[1] || model;
|
|
2554
|
+
const tokenData = costs.byModel[modelName];
|
|
2453
2555
|
modelDistribution[model] = {
|
|
2454
2556
|
count,
|
|
2455
|
-
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%"
|
|
2557
|
+
percentage: totalRuns > 0 ? (count / totalRuns * 100).toFixed(1) + "%" : "0%",
|
|
2558
|
+
tokens: tokenData ? { input: tokenData.inputTokens, output: tokenData.outputTokens } : void 0,
|
|
2559
|
+
costUsd: tokenData?.costUsd
|
|
2456
2560
|
};
|
|
2457
2561
|
}
|
|
2458
2562
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2459
2563
|
res.end(JSON.stringify({
|
|
2460
2564
|
totalRuns,
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2565
|
+
tokens: {
|
|
2566
|
+
input: costs.totalInputTokens,
|
|
2567
|
+
output: costs.totalOutputTokens,
|
|
2568
|
+
total: costs.totalInputTokens + costs.totalOutputTokens
|
|
2569
|
+
},
|
|
2570
|
+
costs: {
|
|
2571
|
+
actualUsd: costs.actualCostUsd,
|
|
2572
|
+
opusBaselineUsd: costs.opusCostUsd,
|
|
2573
|
+
savingsUsd: costs.savingsUsd,
|
|
2574
|
+
savingsPercent: costs.savingsPercent
|
|
2466
2575
|
},
|
|
2467
2576
|
modelDistribution,
|
|
2468
2577
|
byTaskType: stats.byTaskType,
|
|
@@ -2583,12 +2692,24 @@ async function startProxy(config = {}) {
|
|
|
2583
2692
|
}
|
|
2584
2693
|
}
|
|
2585
2694
|
log(`Routing to: ${targetProvider}/${targetModel}`);
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
if (
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2695
|
+
let apiKey;
|
|
2696
|
+
let anthropicAuth = null;
|
|
2697
|
+
if (targetProvider === "anthropic") {
|
|
2698
|
+
anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
|
|
2699
|
+
if (!anthropicAuth) {
|
|
2700
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2701
|
+
res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
|
|
2702
|
+
return;
|
|
2703
|
+
}
|
|
2704
|
+
log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
|
|
2705
|
+
} else {
|
|
2706
|
+
const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
|
|
2707
|
+
apiKey = process.env[apiKeyEnv];
|
|
2708
|
+
if (!apiKey) {
|
|
2709
|
+
res.writeHead(500, { "Content-Type": "application/json" });
|
|
2710
|
+
res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
|
|
2711
|
+
return;
|
|
2712
|
+
}
|
|
2592
2713
|
}
|
|
2593
2714
|
const startTime = Date.now();
|
|
2594
2715
|
const betaHeaders = req.headers["anthropic-beta"];
|
|
@@ -2599,6 +2720,7 @@ async function startProxy(config = {}) {
|
|
|
2599
2720
|
targetProvider,
|
|
2600
2721
|
targetModel,
|
|
2601
2722
|
apiKey,
|
|
2723
|
+
anthropicAuth,
|
|
2602
2724
|
relay,
|
|
2603
2725
|
promptText,
|
|
2604
2726
|
taskType,
|
|
@@ -2615,6 +2737,7 @@ async function startProxy(config = {}) {
|
|
|
2615
2737
|
targetProvider,
|
|
2616
2738
|
targetModel,
|
|
2617
2739
|
apiKey,
|
|
2740
|
+
anthropicAuth,
|
|
2618
2741
|
relay,
|
|
2619
2742
|
promptText,
|
|
2620
2743
|
taskType,
|
|
@@ -2644,12 +2767,13 @@ async function startProxy(config = {}) {
|
|
|
2644
2767
|
});
|
|
2645
2768
|
});
|
|
2646
2769
|
}
|
|
2647
|
-
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2770
|
+
async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2648
2771
|
let providerResponse;
|
|
2649
2772
|
try {
|
|
2650
2773
|
switch (targetProvider) {
|
|
2651
2774
|
case "anthropic":
|
|
2652
|
-
|
|
2775
|
+
if (!anthropicAuth) throw new Error("No Anthropic auth");
|
|
2776
|
+
providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
|
|
2653
2777
|
break;
|
|
2654
2778
|
case "google":
|
|
2655
2779
|
providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
|
|
@@ -2703,6 +2827,11 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2703
2827
|
const durationMs = Date.now() - startTime;
|
|
2704
2828
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2705
2829
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2830
|
+
if (lastStreamingUsage && (lastStreamingUsage.inputTokens > 0 || lastStreamingUsage.outputTokens > 0)) {
|
|
2831
|
+
trackTokens(targetModel, lastStreamingUsage.inputTokens, lastStreamingUsage.outputTokens);
|
|
2832
|
+
log(`Tokens: ${lastStreamingUsage.inputTokens} in, ${lastStreamingUsage.outputTokens} out`);
|
|
2833
|
+
lastStreamingUsage = null;
|
|
2834
|
+
}
|
|
2706
2835
|
relay.run({
|
|
2707
2836
|
prompt: promptText.slice(0, 500),
|
|
2708
2837
|
taskType,
|
|
@@ -2727,13 +2856,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
|
|
|
2727
2856
|
});
|
|
2728
2857
|
res.end();
|
|
2729
2858
|
}
|
|
2730
|
-
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2859
|
+
async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
|
|
2731
2860
|
let providerResponse;
|
|
2732
2861
|
let responseData;
|
|
2733
2862
|
try {
|
|
2734
2863
|
switch (targetProvider) {
|
|
2735
2864
|
case "anthropic": {
|
|
2736
|
-
|
|
2865
|
+
if (!anthropicAuth) throw new Error("No Anthropic auth");
|
|
2866
|
+
providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
|
|
2737
2867
|
const rawData = await providerResponse.json();
|
|
2738
2868
|
if (!providerResponse.ok) {
|
|
2739
2869
|
res.writeHead(providerResponse.status, { "Content-Type": "application/json" });
|
|
@@ -2793,6 +2923,11 @@ async function handleNonStreamingRequest(res, request, targetProvider, targetMod
|
|
|
2793
2923
|
const durationMs = Date.now() - startTime;
|
|
2794
2924
|
const modelKey = `${targetProvider}/${targetModel}`;
|
|
2795
2925
|
modelCounts[modelKey] = (modelCounts[modelKey] || 0) + 1;
|
|
2926
|
+
const usage = responseData["usage"];
|
|
2927
|
+
if (usage?.prompt_tokens || usage?.completion_tokens) {
|
|
2928
|
+
trackTokens(targetModel, usage.prompt_tokens ?? 0, usage.completion_tokens ?? 0);
|
|
2929
|
+
log(`Tokens: ${usage.prompt_tokens ?? 0} in, ${usage.completion_tokens ?? 0} out`);
|
|
2930
|
+
}
|
|
2796
2931
|
try {
|
|
2797
2932
|
const runResult = await relay.run({
|
|
2798
2933
|
prompt: promptText.slice(0, 500),
|