@relayplane/proxy 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -176,13 +176,134 @@ Options:
176
176
  -h, --help Show help
177
177
  ```
178
178
 
179
+ ## REST API
180
+
181
+ The proxy exposes endpoints for stats and monitoring:
182
+
183
+ ### `GET /health`
184
+
185
+ Server health and version info.
186
+
187
+ ```bash
188
+ curl http://localhost:3001/health
189
+ ```
190
+
191
+ ```json
192
+ {
193
+ "status": "ok",
194
+ "version": "0.1.7",
195
+ "uptime": "2h 15m 30s",
196
+ "providers": { "anthropic": true, "openai": true, "google": false },
197
+ "totalRuns": 142
198
+ }
199
+ ```
200
+
201
+ ### `GET /stats`
202
+
203
+ Aggregated statistics and cost savings.
204
+
205
+ ```bash
206
+ curl http://localhost:3001/stats
207
+ ```
208
+
209
+ ```json
210
+ {
211
+ "totalRuns": 142,
212
+ "savings": {
213
+ "estimatedSavingsPercent": "73.2%",
214
+ "actualCostUsd": "0.0234",
215
+ "baselineCostUsd": "0.0873",
216
+ "savedUsd": "0.0639"
217
+ },
218
+ "modelDistribution": {
219
+ "anthropic/claude-3-5-haiku-latest": { "count": 98, "percentage": "69.0%" },
220
+ "anthropic/claude-sonnet-4-20250514": { "count": 44, "percentage": "31.0%" }
221
+ }
222
+ }
223
+ ```
224
+
225
+ ### `GET /runs`
226
+
227
+ Recent routing decisions.
228
+
229
+ ```bash
230
+ curl "http://localhost:3001/runs?limit=10"
231
+ ```
232
+
233
+ ```json
234
+ {
235
+ "runs": [
236
+ {
237
+ "runId": "abc123",
238
+ "timestamp": "2026-02-03T13:26:03Z",
239
+ "model": "anthropic/claude-3-5-haiku-latest",
240
+ "taskType": "code_generation",
241
+ "confidence": 0.92,
242
+ "mode": "auto",
243
+ "durationMs": 1203,
244
+ "promptPreview": "Write a function that..."
245
+ }
246
+ ],
247
+ "total": 142
248
+ }
249
+ ```
250
+
251
+ ## Configuration
252
+
253
+ RelayPlane creates a config file on first run at `~/.relayplane/config.json`:
254
+
255
+ ```json
256
+ {
257
+ "strategies": {
258
+ "code_review": { "model": "anthropic:claude-sonnet-4-20250514" },
259
+ "code_generation": { "model": "anthropic:claude-3-5-haiku-latest" },
260
+ "analysis": { "model": "anthropic:claude-sonnet-4-20250514" },
261
+ "summarization": { "model": "anthropic:claude-3-5-haiku-latest" },
262
+ "creative_writing": { "model": "anthropic:claude-sonnet-4-20250514" },
263
+ "data_extraction": { "model": "anthropic:claude-3-5-haiku-latest" },
264
+ "translation": { "model": "anthropic:claude-3-5-haiku-latest" },
265
+ "question_answering": { "model": "anthropic:claude-3-5-haiku-latest" },
266
+ "general": { "model": "anthropic:claude-3-5-haiku-latest" }
267
+ },
268
+ "defaults": {
269
+ "qualityModel": "claude-sonnet-4-20250514",
270
+ "costModel": "claude-3-5-haiku-latest"
271
+ }
272
+ }
273
+ ```
274
+
275
+ **Edit and save — changes apply instantly** (hot-reload, no restart needed).
276
+
277
+ ### Strategy Options
278
+
279
+ | Field | Description |
280
+ |-------|-------------|
281
+ | `model` | Provider and model in format `provider:model` |
282
+ | `minConfidence` | Optional. Only use this strategy if confidence >= threshold |
283
+ | `fallback` | Optional. Fallback model if primary fails |
284
+
285
+ ### Examples
286
+
287
+ Route all analysis tasks to GPT-4o:
288
+ ```json
289
+ "analysis": { "model": "openai:gpt-4o" }
290
+ ```
291
+
292
+ Use Opus for code review with fallback:
293
+ ```json
294
+ "code_review": {
295
+ "model": "anthropic:claude-opus-4-5-20250514",
296
+ "fallback": "anthropic:claude-sonnet-4-20250514"
297
+ }
298
+ ```
299
+
179
300
  ## Data Storage
180
301
 
181
302
  All data stored locally at `~/.relayplane/data.db` (SQLite).
182
303
 
183
304
  ```bash
184
305
  # View recent runs
185
- sqlite3 ~/.relayplane/data.db "SELECT * FROM runs ORDER BY timestamp DESC LIMIT 10"
306
+ sqlite3 ~/.relayplane/data.db "SELECT * FROM runs ORDER BY created_at DESC LIMIT 10"
186
307
 
187
308
  # Check routing rules
188
309
  sqlite3 ~/.relayplane/data.db "SELECT * FROM routing_rules"
package/dist/cli.js CHANGED
@@ -1591,12 +1591,127 @@ ${input.prompt}` : input.prompt;
1591
1591
  }
1592
1592
  };
1593
1593
 
1594
+ // src/config.ts
1595
+ var fs2 = __toESM(require("fs"));
1596
+ var path2 = __toESM(require("path"));
1597
+ var os2 = __toESM(require("os"));
1598
+ var import_zod = require("zod");
1599
+ var StrategySchema = import_zod.z.object({
1600
+ model: import_zod.z.string(),
1601
+ minConfidence: import_zod.z.number().min(0).max(1).optional(),
1602
+ fallback: import_zod.z.string().optional()
1603
+ });
1604
+ var AuthSchema = import_zod.z.object({
1605
+ anthropicApiKey: import_zod.z.string().optional(),
1606
+ anthropicMaxToken: import_zod.z.string().optional(),
1607
+ useMaxForModels: import_zod.z.array(import_zod.z.string()).optional()
1608
+ // Default: ['opus']
1609
+ }).optional();
1610
+ var ConfigSchema = import_zod.z.object({
1611
+ strategies: import_zod.z.record(import_zod.z.string(), StrategySchema).optional(),
1612
+ defaults: import_zod.z.object({
1613
+ qualityModel: import_zod.z.string().optional(),
1614
+ costModel: import_zod.z.string().optional()
1615
+ }).optional(),
1616
+ auth: AuthSchema
1617
+ });
1618
+ var DEFAULT_CONFIG = {
1619
+ strategies: {
1620
+ code_review: { model: "anthropic:claude-sonnet-4-20250514" },
1621
+ code_generation: { model: "anthropic:claude-3-5-haiku-latest" },
1622
+ analysis: { model: "anthropic:claude-sonnet-4-20250514" },
1623
+ summarization: { model: "anthropic:claude-3-5-haiku-latest" },
1624
+ creative_writing: { model: "anthropic:claude-sonnet-4-20250514" },
1625
+ data_extraction: { model: "anthropic:claude-3-5-haiku-latest" },
1626
+ translation: { model: "anthropic:claude-3-5-haiku-latest" },
1627
+ question_answering: { model: "anthropic:claude-3-5-haiku-latest" },
1628
+ general: { model: "anthropic:claude-3-5-haiku-latest" }
1629
+ },
1630
+ defaults: {
1631
+ qualityModel: "claude-sonnet-4-20250514",
1632
+ costModel: "claude-3-5-haiku-latest"
1633
+ }
1634
+ };
1635
+ function getConfigPath() {
1636
+ return path2.join(os2.homedir(), ".relayplane", "config.json");
1637
+ }
1638
+ function writeDefaultConfig() {
1639
+ const configPath = getConfigPath();
1640
+ const dir = path2.dirname(configPath);
1641
+ if (!fs2.existsSync(dir)) {
1642
+ fs2.mkdirSync(dir, { recursive: true });
1643
+ }
1644
+ if (!fs2.existsSync(configPath)) {
1645
+ fs2.writeFileSync(
1646
+ configPath,
1647
+ JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n",
1648
+ "utf-8"
1649
+ );
1650
+ console.log(`[relayplane] Created default config at ${configPath}`);
1651
+ }
1652
+ }
1653
+ function loadConfig() {
1654
+ const configPath = getConfigPath();
1655
+ writeDefaultConfig();
1656
+ try {
1657
+ const raw = fs2.readFileSync(configPath, "utf-8");
1658
+ const parsed = JSON.parse(raw);
1659
+ const validated = ConfigSchema.parse(parsed);
1660
+ return validated;
1661
+ } catch (err) {
1662
+ if (err instanceof import_zod.z.ZodError) {
1663
+ console.error(`[relayplane] Invalid config: ${err.message}`);
1664
+ } else if (err instanceof SyntaxError) {
1665
+ console.error(`[relayplane] Config JSON parse error: ${err.message}`);
1666
+ } else {
1667
+ console.error(`[relayplane] Failed to load config: ${err}`);
1668
+ }
1669
+ console.log("[relayplane] Using default config");
1670
+ return DEFAULT_CONFIG;
1671
+ }
1672
+ }
1673
+ function getStrategy(config, taskType) {
1674
+ return config.strategies?.[taskType] ?? null;
1675
+ }
1676
+ function getAnthropicAuth(config, model) {
1677
+ const auth = config.auth;
1678
+ const useMaxForModels = auth?.useMaxForModels ?? ["opus"];
1679
+ const shouldUseMax = useMaxForModels.some((m) => model.toLowerCase().includes(m.toLowerCase()));
1680
+ if (shouldUseMax && auth?.anthropicMaxToken) {
1681
+ return { type: "max", value: auth.anthropicMaxToken };
1682
+ }
1683
+ const apiKey = auth?.anthropicApiKey ?? process.env["ANTHROPIC_API_KEY"];
1684
+ if (apiKey) {
1685
+ return { type: "apiKey", value: apiKey };
1686
+ }
1687
+ return null;
1688
+ }
1689
+ function watchConfig(onChange) {
1690
+ const configPath = getConfigPath();
1691
+ const dir = path2.dirname(configPath);
1692
+ if (!fs2.existsSync(dir)) {
1693
+ fs2.mkdirSync(dir, { recursive: true });
1694
+ }
1695
+ let debounceTimer = null;
1696
+ fs2.watch(dir, (eventType, filename) => {
1697
+ if (filename === "config.json") {
1698
+ if (debounceTimer) clearTimeout(debounceTimer);
1699
+ debounceTimer = setTimeout(() => {
1700
+ console.log("[relayplane] Config file changed, reloading...");
1701
+ const newConfig = loadConfig();
1702
+ onChange(newConfig);
1703
+ }, 100);
1704
+ }
1705
+ });
1706
+ }
1707
+
1594
1708
  // src/proxy.ts
1595
- var VERSION = "0.1.7";
1709
+ var VERSION = "0.1.9";
1596
1710
  var recentRuns = [];
1597
1711
  var MAX_RECENT_RUNS = 100;
1598
1712
  var modelCounts = {};
1599
1713
  var serverStartTime = 0;
1714
+ var currentConfig = loadConfig();
1600
1715
  var DEFAULT_ENDPOINTS = {
1601
1716
  anthropic: {
1602
1717
  baseUrl: "https://api.anthropic.com/v1",
@@ -1659,13 +1774,17 @@ function extractPromptText(messages) {
1659
1774
  return "";
1660
1775
  }).join("\n");
1661
1776
  }
1662
- async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1777
+ async function forwardToAnthropic(request, targetModel, auth, betaHeaders) {
1663
1778
  const anthropicBody = buildAnthropicBody(request, targetModel, false);
1664
1779
  const headers = {
1665
1780
  "Content-Type": "application/json",
1666
- "x-api-key": apiKey,
1667
1781
  "anthropic-version": "2023-06-01"
1668
1782
  };
1783
+ if (auth.type === "max") {
1784
+ headers["Authorization"] = `Bearer ${auth.value}`;
1785
+ } else {
1786
+ headers["x-api-key"] = auth.value;
1787
+ }
1669
1788
  if (betaHeaders) {
1670
1789
  headers["anthropic-beta"] = betaHeaders;
1671
1790
  }
@@ -1676,13 +1795,17 @@ async function forwardToAnthropic(request, targetModel, apiKey, betaHeaders) {
1676
1795
  });
1677
1796
  return response;
1678
1797
  }
1679
- async function forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders) {
1798
+ async function forwardToAnthropicStream(request, targetModel, auth, betaHeaders) {
1680
1799
  const anthropicBody = buildAnthropicBody(request, targetModel, true);
1681
1800
  const headers = {
1682
1801
  "Content-Type": "application/json",
1683
- "x-api-key": apiKey,
1684
1802
  "anthropic-version": "2023-06-01"
1685
1803
  };
1804
+ if (auth.type === "max") {
1805
+ headers["Authorization"] = `Bearer ${auth.value}`;
1806
+ } else {
1807
+ headers["x-api-key"] = auth.value;
1808
+ }
1686
1809
  if (betaHeaders) {
1687
1810
  headers["anthropic-beta"] = betaHeaders;
1688
1811
  }
@@ -2447,42 +2570,65 @@ async function startProxy(config = {}) {
2447
2570
  const confidence = getInferenceConfidence(promptText, taskType);
2448
2571
  log(`Inferred task: ${taskType} (confidence: ${confidence.toFixed(2)})`);
2449
2572
  if (routingMode !== "passthrough") {
2450
- const rule = relay.routing.get(taskType);
2451
- if (rule && rule.preferredModel) {
2452
- const parsed = parsePreferredModel(rule.preferredModel);
2573
+ const configStrategy = getStrategy(currentConfig, taskType);
2574
+ if (configStrategy) {
2575
+ const parsed = parsePreferredModel(configStrategy.model);
2453
2576
  if (parsed) {
2454
2577
  targetProvider = parsed.provider;
2455
2578
  targetModel = parsed.model;
2456
- log(`Using learned rule: ${rule.preferredModel}`);
2579
+ log(`Using config strategy: ${configStrategy.model}`);
2580
+ }
2581
+ }
2582
+ if (!configStrategy) {
2583
+ const rule = relay.routing.get(taskType);
2584
+ if (rule && rule.preferredModel) {
2585
+ const parsed = parsePreferredModel(rule.preferredModel);
2586
+ if (parsed) {
2587
+ targetProvider = parsed.provider;
2588
+ targetModel = parsed.model;
2589
+ log(`Using learned rule: ${rule.preferredModel}`);
2590
+ } else {
2591
+ const defaultRoute = DEFAULT_ROUTING[taskType];
2592
+ targetProvider = defaultRoute.provider;
2593
+ targetModel = defaultRoute.model;
2594
+ }
2457
2595
  } else {
2458
2596
  const defaultRoute = DEFAULT_ROUTING[taskType];
2459
2597
  targetProvider = defaultRoute.provider;
2460
2598
  targetModel = defaultRoute.model;
2461
2599
  }
2462
- } else {
2463
- const defaultRoute = DEFAULT_ROUTING[taskType];
2464
- targetProvider = defaultRoute.provider;
2465
- targetModel = defaultRoute.model;
2466
2600
  }
2467
2601
  if (routingMode === "cost") {
2468
- const simpleTasks = ["summarization", "data_extraction", "translation", "question_answering"];
2469
- if (simpleTasks.includes(taskType)) {
2470
- targetModel = "claude-3-5-haiku-latest";
2471
- targetProvider = "anthropic";
2472
- }
2602
+ const costModel = currentConfig.defaults?.costModel || "claude-3-5-haiku-latest";
2603
+ targetModel = costModel;
2604
+ targetProvider = "anthropic";
2605
+ log(`Cost mode: using ${costModel}`);
2473
2606
  } else if (routingMode === "quality") {
2474
- const qualityModel = process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2607
+ const qualityModel = currentConfig.defaults?.qualityModel || process.env["RELAYPLANE_QUALITY_MODEL"] || "claude-sonnet-4-20250514";
2475
2608
  targetModel = qualityModel;
2476
2609
  targetProvider = "anthropic";
2610
+ log(`Quality mode: using ${qualityModel}`);
2477
2611
  }
2478
2612
  }
2479
2613
  log(`Routing to: ${targetProvider}/${targetModel}`);
2480
- const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2481
- const apiKey = process.env[apiKeyEnv];
2482
- if (!apiKey) {
2483
- res.writeHead(500, { "Content-Type": "application/json" });
2484
- res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2485
- return;
2614
+ let apiKey;
2615
+ let anthropicAuth = null;
2616
+ if (targetProvider === "anthropic") {
2617
+ anthropicAuth = getAnthropicAuth(currentConfig, targetModel);
2618
+ if (!anthropicAuth) {
2619
+ res.writeHead(500, { "Content-Type": "application/json" });
2620
+ res.end(JSON.stringify({ error: "No Anthropic auth configured (set ANTHROPIC_API_KEY or config.auth.anthropicMaxToken)" }));
2621
+ return;
2622
+ }
2623
+ log(`Using ${anthropicAuth.type === "max" ? "MAX token" : "API key"} auth for ${targetModel}`);
2624
+ } else {
2625
+ const apiKeyEnv = DEFAULT_ENDPOINTS[targetProvider]?.apiKeyEnv ?? `${targetProvider.toUpperCase()}_API_KEY`;
2626
+ apiKey = process.env[apiKeyEnv];
2627
+ if (!apiKey) {
2628
+ res.writeHead(500, { "Content-Type": "application/json" });
2629
+ res.end(JSON.stringify({ error: `Missing ${apiKeyEnv} environment variable` }));
2630
+ return;
2631
+ }
2486
2632
  }
2487
2633
  const startTime = Date.now();
2488
2634
  const betaHeaders = req.headers["anthropic-beta"];
@@ -2493,6 +2639,7 @@ async function startProxy(config = {}) {
2493
2639
  targetProvider,
2494
2640
  targetModel,
2495
2641
  apiKey,
2642
+ anthropicAuth,
2496
2643
  relay,
2497
2644
  promptText,
2498
2645
  taskType,
@@ -2509,6 +2656,7 @@ async function startProxy(config = {}) {
2509
2656
  targetProvider,
2510
2657
  targetModel,
2511
2658
  apiKey,
2659
+ anthropicAuth,
2512
2660
  relay,
2513
2661
  promptText,
2514
2662
  taskType,
@@ -2520,6 +2668,10 @@ async function startProxy(config = {}) {
2520
2668
  );
2521
2669
  }
2522
2670
  });
2671
+ watchConfig((newConfig) => {
2672
+ currentConfig = newConfig;
2673
+ console.log("[relayplane] Config reloaded");
2674
+ });
2523
2675
  return new Promise((resolve, reject) => {
2524
2676
  server.on("error", reject);
2525
2677
  server.listen(port, host, () => {
@@ -2528,17 +2680,19 @@ async function startProxy(config = {}) {
2528
2680
  console.log(` Models: relayplane:auto, relayplane:cost, relayplane:quality`);
2529
2681
  console.log(` Endpoint: POST /v1/chat/completions`);
2530
2682
  console.log(` Stats: GET /stats, /runs, /health`);
2683
+ console.log(` Config: ~/.relayplane/config.json (hot-reload enabled)`);
2531
2684
  console.log(` Streaming: \u2705 Enabled`);
2532
2685
  resolve(server);
2533
2686
  });
2534
2687
  });
2535
2688
  }
2536
- async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2689
+ async function handleStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2537
2690
  let providerResponse;
2538
2691
  try {
2539
2692
  switch (targetProvider) {
2540
2693
  case "anthropic":
2541
- providerResponse = await forwardToAnthropicStream(request, targetModel, apiKey, betaHeaders);
2694
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2695
+ providerResponse = await forwardToAnthropicStream(request, targetModel, anthropicAuth, betaHeaders);
2542
2696
  break;
2543
2697
  case "google":
2544
2698
  providerResponse = await forwardToGeminiStream(request, targetModel, apiKey);
@@ -2616,13 +2770,14 @@ async function handleStreamingRequest(res, request, targetProvider, targetModel,
2616
2770
  });
2617
2771
  res.end();
2618
2772
  }
2619
- async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2773
+ async function handleNonStreamingRequest(res, request, targetProvider, targetModel, apiKey, anthropicAuth, relay, promptText, taskType, confidence, routingMode, startTime, log, betaHeaders) {
2620
2774
  let providerResponse;
2621
2775
  let responseData;
2622
2776
  try {
2623
2777
  switch (targetProvider) {
2624
2778
  case "anthropic": {
2625
- providerResponse = await forwardToAnthropic(request, targetModel, apiKey, betaHeaders);
2779
+ if (!anthropicAuth) throw new Error("No Anthropic auth");
2780
+ providerResponse = await forwardToAnthropic(request, targetModel, anthropicAuth, betaHeaders);
2626
2781
  const rawData = await providerResponse.json();
2627
2782
  if (!providerResponse.ok) {
2628
2783
  res.writeHead(providerResponse.status, { "Content-Type": "application/json" });