@relayplane/proxy 0.2.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.js CHANGED
@@ -1,1089 +1,637 @@
1
- "use strict";
1
+ #!/usr/bin/env node
2
2
  /**
3
- * RelayPlane Agent Ops Proxy Server
4
- *
5
- * OpenAI-compatible proxy server with integrated observability via the Learning Ledger
6
- * and auth enforcement via Auth Gate.
3
+ * RelayPlane Local LLM Proxy Server
7
4
  *
5
+ * Routes OpenAI-compatible requests to multiple providers.
8
6
  * Features:
9
- * - OpenAI-compatible `/v1/chat/completions` endpoint
10
- * - Auth Gate integration for consumer vs API auth detection
11
- * - Learning Ledger integration for run tracking
12
- * - Timing capture (latency_ms, ttft_ms)
13
- * - Structured error handling
14
- *
15
- * @packageDocumentation
7
+ * - /health endpoint for monitoring
8
+ * - Usage tracking with spending warnings
9
+ * - Model aliases (rp:fast, rp:cheap, rp:best)
10
+ * - Dry-run mode for testing
16
11
  */
17
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
18
- if (k2 === undefined) k2 = k;
19
- var desc = Object.getOwnPropertyDescriptor(m, k);
20
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
21
- desc = { enumerable: true, get: function() { return m[k]; } };
22
- }
23
- Object.defineProperty(o, k2, desc);
24
- }) : (function(o, m, k, k2) {
25
- if (k2 === undefined) k2 = k;
26
- o[k2] = m[k];
27
- }));
28
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
29
- Object.defineProperty(o, "default", { enumerable: true, value: v });
30
- }) : function(o, v) {
31
- o["default"] = v;
32
- });
33
- var __importStar = (this && this.__importStar) || (function () {
34
- var ownKeys = function(o) {
35
- ownKeys = Object.getOwnPropertyNames || function (o) {
36
- var ar = [];
37
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
38
- return ar;
39
- };
40
- return ownKeys(o);
41
- };
42
- return function (mod) {
43
- if (mod && mod.__esModule) return mod;
44
- var result = {};
45
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
46
- __setModuleDefault(result, mod);
47
- return result;
48
- };
49
- })();
50
- Object.defineProperty(exports, "__esModule", { value: true });
51
- exports.ProxyServer = void 0;
52
- exports.createProxyServer = createProxyServer;
53
- const http = __importStar(require("node:http"));
54
- const ledger_1 = require("@relayplane/ledger");
55
- const auth_gate_1 = require("@relayplane/auth-gate");
56
- const policy_engine_1 = require("@relayplane/policy-engine");
57
- const routing_engine_1 = require("@relayplane/routing-engine");
58
- const explainability_1 = require("@relayplane/explainability");
12
+ import * as http from 'http';
13
+ import * as https from 'https';
14
+ import * as fs from 'fs';
15
+ import * as path from 'path';
16
+ import * as os from 'os';
17
+ const VERSION = '1.1.0';
18
+ const startTime = Date.now();
19
+ // Configuration
20
+ const PORT = parseInt(process.env.RELAYPLANE_PROXY_PORT || '8787', 10);
21
+ const HOST = process.env.RELAYPLANE_PROXY_HOST || '127.0.0.1';
22
+ const CONFIG_DIR = process.env.RELAYPLANE_CONFIG_DIR || path.join(os.homedir(), '.relayplane');
23
+ const stats = {
24
+ requestsHandled: 0,
25
+ requestsSuccessful: 0,
26
+ requestsFailed: 0,
27
+ totalInputTokens: 0,
28
+ totalOutputTokens: 0,
29
+ totalCost: 0,
30
+ byModel: new Map(),
31
+ byProvider: new Map(),
32
+ };
59
33
  /**
60
- * Provider endpoint configuration
34
+ * Model alias mappings
61
35
  */
62
- const PROVIDER_ENDPOINTS = {
36
+ const MODEL_ALIASES = {
37
+ 'rp:fast': { model: 'llama-3.1-8b-instant', provider: 'groq' },
38
+ 'rp:cheap': { model: 'llama-3.1-8b-instant', provider: 'groq' },
39
+ 'rp:best': { model: 'claude-3-5-sonnet-20241022', provider: 'anthropic' },
40
+ 'rp:balanced': { model: 'gpt-4o-mini', provider: 'openai' },
41
+ };
42
+ const PROVIDERS = {
43
+ openai: {
44
+ baseUrl: 'https://api.openai.com',
45
+ apiKeyEnv: 'OPENAI_API_KEY',
46
+ headerName: 'Authorization',
47
+ headerPrefix: 'Bearer ',
48
+ },
63
49
  anthropic: {
64
- baseUrl: 'https://api.anthropic.com/v1',
65
- authHeader: 'x-api-key',
50
+ baseUrl: 'https://api.anthropic.com',
51
+ apiKeyEnv: 'ANTHROPIC_API_KEY',
52
+ headerName: 'x-api-key',
53
+ headerPrefix: '',
54
+ extraHeaders: {
55
+ 'anthropic-version': '2023-06-01',
56
+ },
66
57
  },
67
- openai: {
68
- baseUrl: 'https://api.openai.com/v1',
69
- authHeader: 'Authorization',
58
+ groq: {
59
+ baseUrl: 'https://api.groq.com/openai',
60
+ apiKeyEnv: 'GROQ_API_KEY',
61
+ headerName: 'Authorization',
62
+ headerPrefix: 'Bearer ',
63
+ },
64
+ together: {
65
+ baseUrl: 'https://api.together.xyz',
66
+ apiKeyEnv: 'TOGETHER_API_KEY',
67
+ headerName: 'Authorization',
68
+ headerPrefix: 'Bearer ',
70
69
  },
71
70
  openrouter: {
72
- baseUrl: 'https://openrouter.ai/api/v1',
73
- authHeader: 'Authorization',
71
+ baseUrl: 'https://openrouter.ai/api',
72
+ apiKeyEnv: 'OPENROUTER_API_KEY',
73
+ headerName: 'Authorization',
74
+ headerPrefix: 'Bearer ',
74
75
  },
75
76
  };
76
77
  /**
77
- * Model to provider mapping
78
+ * Model to provider mapping (simplified)
78
79
  */
79
- function getProviderForModel(model) {
80
- if (model.startsWith('claude') || model.startsWith('anthropic')) {
81
- return 'anthropic';
82
- }
83
- if (model.startsWith('gpt') || model.startsWith('o1') || model.startsWith('o3')) {
80
+ function detectProvider(model) {
81
+ if (model.startsWith('gpt-') || model.startsWith('o1-'))
84
82
  return 'openai';
85
- }
86
- // Default to openrouter for other models
87
- return 'openrouter';
83
+ if (model.startsWith('claude-'))
84
+ return 'anthropic';
85
+ if (model.startsWith('llama-') || model.startsWith('mixtral-'))
86
+ return 'groq';
87
+ if (model.startsWith('meta-llama/') || model.startsWith('mistralai/'))
88
+ return 'together';
89
+ if (model.includes('/'))
90
+ return 'openrouter';
91
+ return 'openai'; // Default fallback
88
92
  }
89
93
  /**
90
- * RelayPlane Agent Ops Proxy Server
94
+ * Track which warnings we've already logged to avoid spam
91
95
  */
92
- class ProxyServer {
93
- server = null;
94
- ledger;
95
- authGate;
96
- policyEngine;
97
- routingEngine;
98
- capabilityRegistry;
99
- providerManager;
100
- explainer;
101
- comparator;
102
- simulator;
103
- config;
104
- constructor(config = {}) {
105
- this.config = {
106
- port: config.port ?? 3001,
107
- host: config.host ?? '127.0.0.1',
108
- verbose: config.verbose ?? false,
109
- defaultWorkspaceId: config.defaultWorkspaceId ?? 'default',
110
- defaultAgentId: config.defaultAgentId ?? 'default',
111
- defaultAuthEnforcementMode: config.defaultAuthEnforcementMode ?? 'recommended',
112
- enforcePolicies: config.enforcePolicies ?? true,
113
- enableRouting: config.enableRouting ?? true,
114
- ...config,
115
- };
116
- // Initialize ledger
117
- this.ledger = config.ledger ?? (0, ledger_1.createLedger)();
118
- // Initialize auth storage and gate
119
- const authStorage = config.authStorage ?? new auth_gate_1.MemoryAuthProfileStorage();
120
- this.authGate = (0, auth_gate_1.createAuthGate)({
121
- storage: authStorage,
122
- ledger: this.ledger,
123
- defaultSettings: {
124
- auth_enforcement_mode: this.config.defaultAuthEnforcementMode,
125
- },
126
- });
127
- // Initialize policy engine
128
- const policyStorage = config.policyStorage ?? new policy_engine_1.MemoryPolicyStorage();
129
- this.policyEngine = config.policyEngine ?? (0, policy_engine_1.createPolicyEngine)({
130
- storage: policyStorage,
131
- ledger: this.ledger,
132
- });
133
- // Initialize routing engine (Phase 3)
134
- this.capabilityRegistry = config.capabilityRegistry ?? (0, routing_engine_1.createCapabilityRegistry)({
135
- providerOverrides: this.buildProviderOverrides(),
136
- });
137
- this.providerManager = config.providerManager ?? (0, routing_engine_1.createProviderManagerWithBuiltIns)();
138
- this.routingEngine = config.routingEngine ?? (0, routing_engine_1.createRoutingEngine)({
139
- registry: this.capabilityRegistry,
140
- ledger: this.ledger,
141
- });
142
- // Initialize explainability components (Phase 4)
143
- this.explainer = (0, explainability_1.createExplanationEngine)({
144
- ledger: this.ledger,
145
- policyEngine: this.policyEngine,
146
- routingEngine: this.routingEngine,
147
- capabilityRegistry: this.capabilityRegistry,
148
- });
149
- this.comparator = (0, explainability_1.createRunComparator)({
150
- ledger: this.ledger,
151
- explanationEngine: this.explainer,
152
- });
153
- this.simulator = (0, explainability_1.createSimulator)({
154
- policyEngine: this.policyEngine,
155
- routingEngine: this.routingEngine,
156
- capabilityRegistry: this.capabilityRegistry,
157
- });
158
- // Set API keys from config
159
- this.configureProviderApiKeys();
160
- }
161
- /**
162
- * Build provider overrides from config
163
- */
164
- buildProviderOverrides() {
165
- const overrides = {};
166
- const providers = this.config.providers ?? {};
167
- if (providers.anthropic) {
168
- overrides.anthropic = {
169
- enabled: true,
170
- base_url: providers.anthropic.baseUrl,
171
- };
172
- }
173
- if (providers.openai) {
174
- overrides.openai = {
175
- enabled: true,
176
- base_url: providers.openai.baseUrl,
177
- };
178
- }
179
- if (providers.openrouter) {
180
- overrides.openrouter = {
181
- enabled: true,
182
- base_url: providers.openrouter.baseUrl,
183
- };
184
- }
185
- if (providers.google) {
186
- overrides.google = {
187
- enabled: true,
188
- base_url: providers.google.baseUrl,
189
- };
190
- }
191
- if (providers.together) {
192
- overrides.together = {
193
- enabled: true,
194
- base_url: providers.together.baseUrl,
195
- };
196
- }
197
- if (providers.deepseek) {
198
- overrides.deepseek = {
199
- enabled: true,
200
- base_url: providers.deepseek.baseUrl,
201
- };
202
- }
203
- return overrides;
204
- }
205
- /**
206
- * Configure provider API keys from config
207
- */
208
- configureProviderApiKeys() {
209
- const providers = this.config.providers ?? {};
210
- if (providers.anthropic?.apiKey) {
211
- this.providerManager.setApiKey('anthropic', providers.anthropic.apiKey);
212
- }
213
- if (providers.openai?.apiKey) {
214
- this.providerManager.setApiKey('openai', providers.openai.apiKey);
215
- }
216
- if (providers.openrouter?.apiKey) {
217
- this.providerManager.setApiKey('openrouter', providers.openrouter.apiKey);
218
- }
219
- if (providers.google?.apiKey) {
220
- this.providerManager.setApiKey('google', providers.google.apiKey);
221
- }
222
- if (providers.together?.apiKey) {
223
- this.providerManager.setApiKey('together', providers.together.apiKey);
224
- }
225
- if (providers.deepseek?.apiKey) {
226
- this.providerManager.setApiKey('deepseek', providers.deepseek.apiKey);
96
+ const loggedWarnings = {
97
+ daily80: false,
98
+ daily90: false,
99
+ daily100: false,
100
+ monthly80: false,
101
+ monthly90: false,
102
+ monthly100: false,
103
+ };
104
+ function loadConfig() {
105
+ const configPath = path.join(CONFIG_DIR, 'config.json');
106
+ try {
107
+ if (fs.existsSync(configPath)) {
108
+ return JSON.parse(fs.readFileSync(configPath, 'utf-8'));
227
109
  }
228
- // Also try environment variables
229
- const configs = this.capabilityRegistry.getEnabledProviders();
230
- this.providerManager.setApiKeysFromEnv(configs);
231
110
  }
232
- /**
233
- * Start the proxy server
234
- */
235
- async start() {
236
- return new Promise((resolve) => {
237
- this.server = http.createServer((req, res) => {
238
- this.handleRequest(req, res).catch((err) => {
239
- this.log('error', `Unhandled error: ${err}`);
240
- this.sendError(res, 500, 'internal_error', 'Internal server error');
241
- });
242
- });
243
- this.server.listen(this.config.port, this.config.host, () => {
244
- this.log('info', `RelayPlane Proxy listening on http://${this.config.host}:${this.config.port}`);
245
- resolve();
246
- });
247
- });
248
- }
249
- /**
250
- * Stop the proxy server
251
- */
252
- async stop() {
253
- return new Promise((resolve) => {
254
- if (this.server) {
255
- this.server.close(() => {
256
- this.log('info', 'Proxy server stopped');
257
- resolve();
258
- });
259
- }
260
- else {
261
- resolve();
262
- }
263
- });
111
+ catch {
112
+ // Ignore errors
264
113
  }
265
- /**
266
- * Handle incoming request
267
- */
268
- async handleRequest(req, res) {
269
- const url = new URL(req.url ?? '/', `http://${req.headers.host}`);
270
- // CORS headers
271
- res.setHeader('Access-Control-Allow-Origin', '*');
272
- res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
273
- res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization, X-RelayPlane-Workspace, X-RelayPlane-Agent, X-RelayPlane-Session, X-RelayPlane-Automated');
274
- // Handle preflight
275
- if (req.method === 'OPTIONS') {
276
- res.writeHead(204);
277
- res.end();
278
- return;
279
- }
280
- // Health check
281
- if (url.pathname === '/health' || url.pathname === '/') {
282
- res.writeHead(200, { 'Content-Type': 'application/json' });
283
- res.end(JSON.stringify({ status: 'ok', version: '0.1.0' }));
284
- return;
285
- }
286
- // OpenAI-compatible chat completions
287
- if (url.pathname === '/v1/chat/completions' && req.method === 'POST') {
288
- await this.handleChatCompletions(req, res);
289
- return;
290
- }
291
- // Models endpoint (for client compatibility)
292
- if (url.pathname === '/v1/models' && req.method === 'GET') {
293
- res.writeHead(200, { 'Content-Type': 'application/json' });
294
- res.end(JSON.stringify({
295
- object: 'list',
296
- data: [
297
- { id: 'claude-3-5-sonnet', object: 'model', owned_by: 'anthropic' },
298
- { id: 'claude-3-5-haiku', object: 'model', owned_by: 'anthropic' },
299
- { id: 'gpt-4o', object: 'model', owned_by: 'openai' },
300
- { id: 'gpt-4o-mini', object: 'model', owned_by: 'openai' },
301
- ],
302
- }));
303
- return;
304
- }
305
- // Policy Management API (Phase 2)
306
- if (url.pathname === '/v1/policies' && req.method === 'GET') {
307
- await this.handleListPolicies(req, res);
308
- return;
309
- }
310
- if (url.pathname === '/v1/policies' && req.method === 'POST') {
311
- await this.handleCreatePolicy(req, res);
312
- return;
313
- }
314
- if (url.pathname.startsWith('/v1/policies/') && req.method === 'GET') {
315
- const policyId = url.pathname.split('/')[3];
316
- if (policyId) {
317
- await this.handleGetPolicy(res, policyId);
318
- return;
319
- }
320
- }
321
- if (url.pathname.startsWith('/v1/policies/') && req.method === 'PATCH') {
322
- const policyId = url.pathname.split('/')[3];
323
- if (policyId) {
324
- await this.handleUpdatePolicy(req, res, policyId);
325
- return;
326
- }
327
- }
328
- if (url.pathname.startsWith('/v1/policies/') && req.method === 'DELETE') {
329
- const policyId = url.pathname.split('/')[3];
330
- if (policyId) {
331
- await this.handleDeletePolicy(res, policyId);
332
- return;
114
+ return null;
115
+ }
116
+ function loadDailyUsage() {
117
+ const today = new Date().toISOString().split('T')[0];
118
+ const usagePath = path.join(CONFIG_DIR, 'daily-usage.json');
119
+ try {
120
+ if (fs.existsSync(usagePath)) {
121
+ const data = JSON.parse(fs.readFileSync(usagePath, 'utf-8'));
122
+ if (data.date === today) {
123
+ return data;
333
124
  }
334
125
  }
335
- if (url.pathname === '/v1/policies/test' && req.method === 'POST') {
336
- await this.handlePolicyTest(req, res);
337
- return;
338
- }
339
- // Budget state endpoint
340
- if (url.pathname === '/v1/budget' && req.method === 'GET') {
341
- await this.handleGetBudget(req, res);
342
- return;
343
- }
344
- // ========================================================================
345
- // Explainability API (Phase 4)
346
- // ========================================================================
347
- // GET /v1/runs/{id}/explain - Full decision chain explanation
348
- if (url.pathname.match(/^\/v1\/runs\/[^/]+\/explain$/) && req.method === 'GET') {
349
- const runId = url.pathname.split('/')[3];
350
- const format = url.searchParams.get('format') ?? 'full';
351
- await this.handleExplainRun(res, runId, format);
352
- return;
353
- }
354
- // GET /v1/runs/{id}/timeline - Timeline view only
355
- if (url.pathname.match(/^\/v1\/runs\/[^/]+\/timeline$/) && req.method === 'GET') {
356
- const runId = url.pathname.split('/')[3];
357
- await this.handleRunTimeline(res, runId);
358
- return;
359
- }
360
- // GET /v1/runs/{id}/decisions - Raw decision chain
361
- if (url.pathname.match(/^\/v1\/runs\/[^/]+\/decisions$/) && req.method === 'GET') {
362
- const runId = url.pathname.split('/')[3];
363
- await this.handleRunDecisions(res, runId);
364
- return;
365
- }
366
- // GET /v1/runs/{id} - Run inspector (all details)
367
- if (url.pathname.match(/^\/v1\/runs\/[^/]+$/) && req.method === 'GET') {
368
- const runId = url.pathname.split('/')[3];
369
- await this.handleRunInspector(res, runId);
370
- return;
371
- }
372
- // GET /v1/runs/compare?ids=run1,run2 - Run comparison
373
- if (url.pathname === '/v1/runs/compare' && req.method === 'GET') {
374
- const idsParam = url.searchParams.get('ids');
375
- const includeDecisions = url.searchParams.get('include_decisions') === 'true';
376
- await this.handleCompareRuns(res, idsParam, includeDecisions);
377
- return;
378
- }
379
- // POST /v1/simulate/policy - Policy simulation
380
- if (url.pathname === '/v1/simulate/policy' && req.method === 'POST') {
381
- await this.handleSimulatePolicy(req, res);
382
- return;
383
- }
384
- // POST /v1/simulate/routing - Routing simulation
385
- if (url.pathname === '/v1/simulate/routing' && req.method === 'POST') {
386
- await this.handleSimulateRouting(req, res);
387
- return;
388
- }
389
- // 404 for unknown routes
390
- this.sendError(res, 404, 'not_found', `Unknown endpoint: ${url.pathname}`);
391
126
  }
392
- // ============================================================================
393
- // Policy Management Handlers (Phase 2)
394
- // ============================================================================
395
- async handleListPolicies(req, res) {
396
- try {
397
- const workspaceId = req.headers['x-relayplane-workspace'] ?? this.config.defaultWorkspaceId;
398
- const policies = await this.policyEngine.listPolicies(workspaceId);
399
- res.writeHead(200, { 'Content-Type': 'application/json' });
400
- res.end(JSON.stringify({ policies }));
401
- }
402
- catch (err) {
403
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
404
- }
405
- }
406
- async handleCreatePolicy(req, res) {
407
- try {
408
- const body = await this.readBody(req);
409
- const policy = JSON.parse(body);
410
- const policyId = await this.policyEngine.createPolicy(policy);
411
- const created = await this.policyEngine.getPolicy(policyId);
412
- res.writeHead(201, { 'Content-Type': 'application/json' });
413
- res.end(JSON.stringify({ policy: created }));
414
- }
415
- catch (err) {
416
- this.sendError(res, 400, 'invalid_request', err instanceof Error ? err.message : 'Invalid policy');
417
- }
127
+ catch {
128
+ // Ignore errors
418
129
  }
419
- async handleGetPolicy(res, policyId) {
420
- try {
421
- const policy = await this.policyEngine.getPolicy(policyId);
422
- if (!policy) {
423
- this.sendError(res, 404, 'not_found', 'Policy not found');
424
- return;
425
- }
426
- res.writeHead(200, { 'Content-Type': 'application/json' });
427
- res.end(JSON.stringify({ policy }));
428
- }
429
- catch (err) {
430
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
130
+ return { date: today, cost: 0, requests: 0 };
131
+ }
132
+ /**
133
+ * Save daily usage to file
134
+ */
135
+ function saveDailyUsage(usage) {
136
+ const usagePath = path.join(CONFIG_DIR, 'daily-usage.json');
137
+ try {
138
+ if (!fs.existsSync(CONFIG_DIR)) {
139
+ fs.mkdirSync(CONFIG_DIR, { recursive: true, mode: 0o700 });
431
140
  }
141
+ fs.writeFileSync(usagePath, JSON.stringify(usage, null, 2));
432
142
  }
433
- async handleUpdatePolicy(req, res, policyId) {
434
- try {
435
- const body = await this.readBody(req);
436
- const updates = JSON.parse(body);
437
- await this.policyEngine.updatePolicy(policyId, updates);
438
- const updated = await this.policyEngine.getPolicy(policyId);
439
- res.writeHead(200, { 'Content-Type': 'application/json' });
440
- res.end(JSON.stringify({ policy: updated }));
441
- }
442
- catch (err) {
443
- this.sendError(res, 400, 'invalid_request', err instanceof Error ? err.message : 'Invalid update');
444
- }
143
+ catch {
144
+ // Ignore errors
445
145
  }
446
- async handleDeletePolicy(res, policyId) {
447
- try {
448
- await this.policyEngine.deletePolicy(policyId);
449
- res.writeHead(204);
450
- res.end();
451
- }
452
- catch (err) {
453
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
146
+ }
147
+ function loadMonthlyUsage() {
148
+ const currentMonth = new Date().toISOString().slice(0, 7); // YYYY-MM
149
+ const usagePath = path.join(CONFIG_DIR, 'monthly-usage.json');
150
+ try {
151
+ if (fs.existsSync(usagePath)) {
152
+ const data = JSON.parse(fs.readFileSync(usagePath, 'utf-8'));
153
+ if (data.month === currentMonth) {
154
+ return data;
155
+ }
454
156
  }
455
157
  }
456
- async handlePolicyTest(req, res) {
457
- try {
458
- const body = await this.readBody(req);
459
- const testRequest = JSON.parse(body);
460
- const decision = await this.policyEngine.dryRun(testRequest);
461
- res.writeHead(200, { 'Content-Type': 'application/json' });
462
- res.end(JSON.stringify({ decision }));
463
- }
464
- catch (err) {
465
- this.sendError(res, 400, 'invalid_request', err instanceof Error ? err.message : 'Invalid test request');
466
- }
158
+ catch {
159
+ // Ignore errors
467
160
  }
468
- async handleGetBudget(req, res) {
469
- try {
470
- const workspaceId = req.headers['x-relayplane-workspace'] ?? this.config.defaultWorkspaceId;
471
- const url = new URL(req.url ?? '/', `http://${req.headers.host}`);
472
- const scopeType = (url.searchParams.get('scope_type') ?? 'workspace');
473
- const scopeId = url.searchParams.get('scope_id') ?? workspaceId;
474
- const period = (url.searchParams.get('period') ?? 'day');
475
- const state = await this.policyEngine.getBudgetState(workspaceId, scopeType, scopeId, period);
476
- if (!state) {
477
- // Return empty state if no budget configured
478
- res.writeHead(200, { 'Content-Type': 'application/json' });
479
- res.end(JSON.stringify({ budget_state: null, message: 'No budget state found' }));
480
- return;
481
- }
482
- res.writeHead(200, { 'Content-Type': 'application/json' });
483
- res.end(JSON.stringify({ budget_state: state }));
484
- }
485
- catch (err) {
486
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
161
+ return { month: currentMonth, cost: 0, requests: 0 };
162
+ }
163
+ function saveMonthlyUsage(usage) {
164
+ const usagePath = path.join(CONFIG_DIR, 'monthly-usage.json');
165
+ try {
166
+ if (!fs.existsSync(CONFIG_DIR)) {
167
+ fs.mkdirSync(CONFIG_DIR, { recursive: true, mode: 0o700 });
487
168
  }
169
+ fs.writeFileSync(usagePath, JSON.stringify(usage, null, 2));
488
170
  }
489
- // ============================================================================
490
- // Explainability Handlers (Phase 4)
491
- // ============================================================================
492
- /**
493
- * Handle GET /v1/runs/{id}/explain - Full decision chain explanation
494
- */
495
- async handleExplainRun(res, runId, format) {
496
- try {
497
- const explanation = await this.explainer.explain(runId, format);
498
- if (!explanation) {
499
- this.sendError(res, 404, 'not_found', `Run not found: ${runId}`);
500
- return;
501
- }
502
- res.writeHead(200, { 'Content-Type': 'application/json' });
503
- res.end(JSON.stringify({
504
- run_id: runId,
505
- format,
506
- chain: explanation.chain,
507
- timeline: explanation.timeline,
508
- narrative: explanation.narrative,
509
- debug_info: explanation.debug_info,
510
- }));
511
- }
512
- catch (err) {
513
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
514
- }
171
+ catch {
172
+ // Ignore errors
515
173
  }
516
- /**
517
- * Handle GET /v1/runs/{id}/timeline - Timeline view only
518
- */
519
- async handleRunTimeline(res, runId) {
520
- try {
521
- const timeline = await this.explainer.getTimeline(runId);
522
- if (timeline.length === 0) {
523
- this.sendError(res, 404, 'not_found', `Run not found: ${runId}`);
524
- return;
525
- }
526
- res.writeHead(200, { 'Content-Type': 'application/json' });
527
- res.end(JSON.stringify({ run_id: runId, timeline }));
528
- }
529
- catch (err) {
530
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
174
+ }
175
+ /**
176
+ * Check spending limits and log warnings
177
+ */
178
+ function checkAndWarnLimits(dailyUsage, monthlyUsage, config) {
179
+ const warnings = [];
180
+ // Check daily limits
181
+ if (config?.limits?.daily) {
182
+ const dailyPercent = (dailyUsage.cost / config.limits.daily) * 100;
183
+ if (dailyPercent >= 100 && !loggedWarnings.daily100) {
184
+ console.warn(`⚠️ DAILY LIMIT REACHED: $${dailyUsage.cost.toFixed(2)} / $${config.limits.daily} (100%)`);
185
+ loggedWarnings.daily100 = true;
186
+ warnings.push(`Daily limit reached: $${dailyUsage.cost.toFixed(2)} of $${config.limits.daily}`);
187
+ }
188
+ else if (dailyPercent >= 90 && !loggedWarnings.daily90) {
189
+ console.warn(`⚠️ Daily spending at 90%: $${dailyUsage.cost.toFixed(2)} / $${config.limits.daily}`);
190
+ loggedWarnings.daily90 = true;
191
+ warnings.push(`⚠️ You've used $${dailyUsage.cost.toFixed(2)} of your $${config.limits.daily} daily limit`);
192
+ }
193
+ else if (dailyPercent >= 80 && !loggedWarnings.daily80) {
194
+ console.warn(`⚠️ Daily spending at 80%: $${dailyUsage.cost.toFixed(2)} / $${config.limits.daily}`);
195
+ loggedWarnings.daily80 = true;
196
+ warnings.push(`⚠️ You've used $${dailyUsage.cost.toFixed(2)} of your $${config.limits.daily} daily limit`);
197
+ }
198
+ }
199
+ // Check monthly limits
200
+ if (config?.limits?.monthly) {
201
+ const monthlyPercent = (monthlyUsage.cost / config.limits.monthly) * 100;
202
+ if (monthlyPercent >= 100 && !loggedWarnings.monthly100) {
203
+ console.warn(`⚠️ MONTHLY LIMIT REACHED: $${monthlyUsage.cost.toFixed(2)} / $${config.limits.monthly} (100%)`);
204
+ loggedWarnings.monthly100 = true;
205
+ warnings.push(`Monthly limit reached: $${monthlyUsage.cost.toFixed(2)} of $${config.limits.monthly}`);
206
+ }
207
+ else if (monthlyPercent >= 90 && !loggedWarnings.monthly90) {
208
+ console.warn(`⚠️ Monthly spending at 90%: $${monthlyUsage.cost.toFixed(2)} / $${config.limits.monthly}`);
209
+ loggedWarnings.monthly90 = true;
210
+ warnings.push(`⚠️ You've used $${monthlyUsage.cost.toFixed(2)} of your $${config.limits.monthly} monthly limit`);
211
+ }
212
+ else if (monthlyPercent >= 80 && !loggedWarnings.monthly80) {
213
+ console.warn(`⚠️ Monthly spending at 80%: $${monthlyUsage.cost.toFixed(2)} / $${config.limits.monthly}`);
214
+ loggedWarnings.monthly80 = true;
215
+ warnings.push(`⚠️ You've used $${monthlyUsage.cost.toFixed(2)} of your $${config.limits.monthly} monthly limit`);
216
+ }
217
+ }
218
+ return warnings.length > 0 ? warnings.join('; ') : null;
219
+ }
220
+ /**
221
+ * Log usage to JSONL file
222
+ */
223
+ function logUsage(record) {
224
+ const usagePath = path.join(CONFIG_DIR, 'usage.jsonl');
225
+ try {
226
+ if (!fs.existsSync(CONFIG_DIR)) {
227
+ fs.mkdirSync(CONFIG_DIR, { recursive: true, mode: 0o700 });
531
228
  }
229
+ fs.appendFileSync(usagePath, JSON.stringify(record) + '\n');
532
230
  }
533
- /**
534
- * Handle GET /v1/runs/{id}/decisions - Raw decision chain
535
- */
536
- async handleRunDecisions(res, runId) {
537
- try {
538
- const chain = await this.explainer.getDecisionChain(runId);
539
- if (!chain) {
540
- this.sendError(res, 404, 'not_found', `Run not found: ${runId}`);
541
- return;
542
- }
543
- res.writeHead(200, { 'Content-Type': 'application/json' });
544
- res.end(JSON.stringify({
545
- run_id: runId,
546
- decisions: chain.decisions,
547
- summary: chain.summary,
548
- insights: chain.insights,
549
- }));
550
- }
551
- catch (err) {
552
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
553
- }
231
+ catch {
232
+ // Ignore errors
554
233
  }
555
- /**
556
- * Handle GET /v1/runs/{id} - Run inspector (all details)
557
- */
558
- async handleRunInspector(res, runId) {
559
- try {
560
- // Get run from ledger
561
- const run = await this.ledger.getRun(runId);
562
- if (!run) {
563
- this.sendError(res, 404, 'not_found', `Run not found: ${runId}`);
564
- return;
565
- }
566
- // Get events
567
- const events = await this.ledger.getRunEvents(runId);
568
- // Get decision chain
569
- const chain = await this.explainer.getDecisionChain(runId);
570
- res.writeHead(200, { 'Content-Type': 'application/json' });
571
- res.end(JSON.stringify({
572
- run,
573
- events,
574
- decision_chain: chain,
575
- }));
576
- }
577
- catch (err) {
578
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
579
- }
234
+ }
235
+ /**
236
+ * Estimate cost for a request (simplified pricing)
237
+ */
238
+ function estimateCost(model, inputTokens, outputTokens) {
239
+ // Simplified pricing per 1M tokens
240
+ const pricing = {
241
+ 'gpt-4o': { input: 5.0, output: 15.0 },
242
+ 'gpt-4o-mini': { input: 0.15, output: 0.6 },
243
+ 'gpt-4-turbo': { input: 10.0, output: 30.0 },
244
+ 'gpt-3.5-turbo': { input: 0.5, output: 1.5 },
245
+ 'claude-3-5-sonnet-20241022': { input: 3.0, output: 15.0 },
246
+ 'claude-3-opus-20240229': { input: 15.0, output: 75.0 },
247
+ 'claude-3-haiku-20240307': { input: 0.25, output: 1.25 },
248
+ 'llama-3.1-8b-instant': { input: 0.05, output: 0.08 },
249
+ 'llama-3.1-70b-versatile': { input: 0.59, output: 0.79 },
250
+ 'mixtral-8x7b-32768': { input: 0.24, output: 0.24 },
251
+ };
252
+ const price = pricing[model] || { input: 1.0, output: 2.0 };
253
+ return (inputTokens / 1_000_000) * price.input + (outputTokens / 1_000_000) * price.output;
254
+ }
255
+ /**
256
+ * Parse request body
257
+ */
258
+ async function parseBody(req) {
259
+ return new Promise((resolve, reject) => {
260
+ let body = '';
261
+ req.on('data', chunk => (body += chunk.toString()));
262
+ req.on('end', () => resolve(body));
263
+ req.on('error', reject);
264
+ });
265
+ }
266
+ /**
267
+ * Send JSON response
268
+ */
269
+ function sendJson(res, status, data) {
270
+ res.writeHead(status, {
271
+ 'Content-Type': 'application/json',
272
+ 'Access-Control-Allow-Origin': '*',
273
+ 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
274
+ 'Access-Control-Allow-Headers': 'Content-Type, Authorization, X-Dry-Run',
275
+ });
276
+ res.end(JSON.stringify(data));
277
+ }
278
+ /**
279
+ * Get provider configuration status
280
+ */
281
+ function getProviderStatus() {
282
+ const providerStatus = {};
283
+ for (const [name, config] of Object.entries(PROVIDERS)) {
284
+ const apiKey = process.env[config.apiKeyEnv];
285
+ providerStatus[name] = apiKey ? 'configured' : 'not_configured';
580
286
  }
581
- /**
582
- * Handle GET /v1/runs/compare?ids=run1,run2 - Run comparison
583
- */
584
- async handleCompareRuns(res, idsParam, includeDecisions) {
287
+ return providerStatus;
288
+ }
289
+ /**
290
+ * Handle /health endpoint
291
+ */
292
+ function handleHealth(res) {
293
+ const config = loadConfig();
294
+ const dailyUsage = loadDailyUsage();
295
+ const monthlyUsage = loadMonthlyUsage();
296
+ const health = {
297
+ status: 'ok',
298
+ uptime: Math.floor((Date.now() - startTime) / 1000),
299
+ version: VERSION,
300
+ providers: getProviderStatus(),
301
+ requestsHandled: stats.requestsHandled,
302
+ requestsSuccessful: stats.requestsSuccessful,
303
+ requestsFailed: stats.requestsFailed,
304
+ dailyCost: dailyUsage.cost,
305
+ dailyLimit: config?.limits?.daily,
306
+ monthlyCost: monthlyUsage.cost,
307
+ monthlyLimit: config?.limits?.monthly,
308
+ usage: {
309
+ inputTokens: stats.totalInputTokens,
310
+ outputTokens: stats.totalOutputTokens,
311
+ totalCost: stats.totalCost,
312
+ },
313
+ };
314
+ sendJson(res, 200, health);
315
+ }
316
+ /**
317
+ * Handle /v1/models endpoint
318
+ */
319
+ function handleModels(res) {
320
+ const models = [
321
+ // OpenAI
322
+ { id: 'gpt-4o', provider: 'openai', alias: null },
323
+ { id: 'gpt-4o-mini', provider: 'openai', alias: 'rp:balanced' },
324
+ { id: 'gpt-4-turbo', provider: 'openai', alias: null },
325
+ { id: 'gpt-3.5-turbo', provider: 'openai', alias: null },
326
+ // Anthropic
327
+ { id: 'claude-3-5-sonnet-20241022', provider: 'anthropic', alias: 'rp:best' },
328
+ { id: 'claude-3-opus-20240229', provider: 'anthropic', alias: null },
329
+ { id: 'claude-3-haiku-20240307', provider: 'anthropic', alias: null },
330
+ // Groq
331
+ { id: 'llama-3.1-70b-versatile', provider: 'groq', alias: null },
332
+ { id: 'llama-3.1-8b-instant', provider: 'groq', alias: 'rp:fast, rp:cheap' },
333
+ { id: 'mixtral-8x7b-32768', provider: 'groq', alias: null },
334
+ // Aliases
335
+ { id: 'rp:fast', provider: 'groq', alias: '→ llama-3.1-8b-instant' },
336
+ { id: 'rp:cheap', provider: 'groq', alias: '→ llama-3.1-8b-instant' },
337
+ { id: 'rp:best', provider: 'anthropic', alias: '→ claude-3-5-sonnet-20241022' },
338
+ { id: 'rp:balanced', provider: 'openai', alias: '→ gpt-4o-mini' },
339
+ ];
340
+ sendJson(res, 200, {
341
+ object: 'list',
342
+ data: models.map(m => ({
343
+ id: m.id,
344
+ object: 'model',
345
+ owned_by: m.provider,
346
+ relayplane_alias: m.alias,
347
+ })),
348
+ });
349
+ }
350
+ /**
351
+ * Handle chat completions (and other API endpoints)
352
+ */
353
+ async function handleProxy(req, res, pathname) {
354
+ const startMs = Date.now();
355
+ const isDryRun = req.headers['x-dry-run'] === 'true';
356
+ try {
357
+ const body = await parseBody(req);
358
+ let data;
585
359
  try {
586
- if (!idsParam) {
587
- this.sendError(res, 400, 'invalid_request', 'Missing required parameter: ids');
588
- return;
589
- }
590
- const runIds = idsParam.split(',').map(id => id.trim()).filter(Boolean);
591
- if (runIds.length < 2) {
592
- this.sendError(res, 400, 'invalid_request', 'At least 2 run IDs required for comparison');
593
- return;
594
- }
595
- const comparison = await this.comparator.compare(runIds, {
596
- includeDecisionDiff: includeDecisions,
597
- });
598
- if (!comparison) {
599
- this.sendError(res, 404, 'not_found', 'One or more runs not found');
600
- return;
601
- }
602
- res.writeHead(200, { 'Content-Type': 'application/json' });
603
- res.end(JSON.stringify(comparison));
604
- }
605
- catch (err) {
606
- this.sendError(res, 500, 'internal_error', err instanceof Error ? err.message : 'Unknown error');
360
+ data = JSON.parse(body);
607
361
  }
608
- }
609
- /**
610
- * Handle POST /v1/simulate/policy - Policy simulation
611
- */
612
- async handleSimulatePolicy(req, res) {
613
- try {
614
- const body = await this.readBody(req);
615
- const request = JSON.parse(body);
616
- // Use workspace from header if not in body
617
- if (!request.workspace_id) {
618
- request.workspace_id = req.headers['x-relayplane-workspace'] ?? this.config.defaultWorkspaceId;
619
- }
620
- const result = await this.simulator.simulatePolicy(request);
621
- res.writeHead(200, { 'Content-Type': 'application/json' });
622
- res.end(JSON.stringify(result));
362
+ catch {
363
+ sendJson(res, 400, { error: { message: 'Invalid JSON body' } });
364
+ return;
623
365
  }
624
- catch (err) {
625
- this.sendError(res, 400, 'invalid_request', err instanceof Error ? err.message : 'Invalid simulation request');
366
+ let model = data.model || 'gpt-4o';
367
+ let provider;
368
+ // Resolve model alias
369
+ if (MODEL_ALIASES[model]) {
370
+ const alias = MODEL_ALIASES[model];
371
+ model = alias.model;
372
+ provider = alias.provider;
373
+ data.model = model;
626
374
  }
627
- }
628
- /**
629
- * Handle POST /v1/simulate/routing - Routing simulation
630
- */
631
- async handleSimulateRouting(req, res) {
632
- try {
633
- const body = await this.readBody(req);
634
- const request = JSON.parse(body);
635
- // Use workspace from header if not in body
636
- if (!request.workspace_id) {
637
- request.workspace_id = req.headers['x-relayplane-workspace'] ?? this.config.defaultWorkspaceId;
638
- }
639
- const result = await this.simulator.simulateRouting(request);
640
- res.writeHead(200, { 'Content-Type': 'application/json' });
641
- res.end(JSON.stringify(result));
375
+ else {
376
+ provider = detectProvider(model);
642
377
  }
643
- catch (err) {
644
- this.sendError(res, 400, 'invalid_request', err instanceof Error ? err.message : 'Invalid simulation request');
378
+ const providerConfig = PROVIDERS[provider];
379
+ if (!providerConfig) {
380
+ sendJson(res, 400, { error: { message: `Unknown provider for model: ${model}` } });
381
+ return;
645
382
  }
646
- }
647
- /**
648
- * Handle /v1/chat/completions
649
- */
650
- async handleChatCompletions(req, res) {
651
- const startTime = Date.now();
652
- let runId = null;
653
- try {
654
- // Parse request body
655
- const body = await this.readBody(req);
656
- const request = JSON.parse(body);
657
- // Extract metadata from headers
658
- const workspaceId = req.headers['x-relayplane-workspace'] ?? this.config.defaultWorkspaceId;
659
- const agentId = req.headers['x-relayplane-agent'] ?? this.config.defaultAgentId;
660
- const sessionId = req.headers['x-relayplane-session'];
661
- const isAutomated = req.headers['x-relayplane-automated'] === 'true';
662
- // Determine provider
663
- const provider = getProviderForModel(request.model);
664
- // Detect auth type from Authorization header
665
- const authHeader = req.headers['authorization'];
666
- const authType = this.detectAuthType(authHeader);
667
- const executionMode = isAutomated ? 'background' : 'interactive';
668
- // Validate auth via Auth Gate
669
- const authResult = await this.authGate.validate({
670
- workspace_id: workspaceId,
671
- metadata: {
672
- session_type: isAutomated ? 'background' : 'interactive',
673
- headers: {
674
- 'X-RelayPlane-Automated': isAutomated ? 'true' : 'false',
675
- },
676
- },
677
- });
678
- // Start ledger run
679
- runId = await this.ledger.startRun({
680
- workspace_id: workspaceId,
681
- agent_id: agentId,
682
- session_id: sessionId,
683
- provider,
684
- model: request.model,
685
- auth_type: authType,
686
- execution_mode: executionMode,
687
- compliance_mode: this.config.defaultAuthEnforcementMode,
688
- auth_risk: authResult.ledger_flags.auth_risk,
689
- policy_override: authResult.ledger_flags.policy_override,
383
+ const apiKey = process.env[providerConfig.apiKeyEnv];
384
+ if (!apiKey) {
385
+ sendJson(res, 401, {
386
+ error: { message: `Missing API key: ${providerConfig.apiKeyEnv}` },
690
387
  });
691
- // Record auth validation
692
- await this.authGate.emitAuthEvent(runId, authResult);
693
- // Check if auth was denied
694
- if (!authResult.allow) {
695
- const latencyMs = Date.now() - startTime;
696
- await this.ledger.completeRun(runId, {
697
- status: 'failed',
698
- input_tokens: 0,
699
- output_tokens: 0,
700
- total_tokens: 0,
701
- cost_usd: 0,
702
- latency_ms: latencyMs,
703
- error: {
704
- code: 'auth_denied',
705
- message: authResult.reason ?? 'Authentication denied',
706
- retryable: false,
707
- },
708
- });
709
- this.sendError(res, 403, 'auth_denied', authResult.reason ?? 'Authentication denied', runId, authResult.guidance_url);
710
- return;
711
- }
712
- // Evaluate policies (Phase 2)
713
- if (this.config.enforcePolicies) {
714
- const estimatedCost = this.policyEngine.estimateCost(request.model, provider, request.messages?.reduce((sum, m) => sum + (m.content?.length ?? 0) / 4, 0) ?? 1000, // Rough token estimate
715
- request.max_tokens ?? 1000);
716
- const policyDecision = await this.policyEngine.evaluate({
717
- workspace_id: workspaceId,
718
- agent_id: agentId,
719
- session_id: sessionId,
720
- run_id: runId,
721
- request: {
722
- model: request.model,
723
- provider,
724
- estimated_cost_usd: estimatedCost,
725
- estimated_tokens: request.max_tokens,
726
- context_size: request.messages?.reduce((sum, m) => sum + (m.content?.length ?? 0), 0),
727
- tools_requested: request.tools?.map((t) => t.function?.name).filter((n) => !!n),
728
- },
729
- });
730
- // Record policy evaluation in ledger
731
- await this.ledger.recordPolicyEvaluation(runId, policyDecision.policies_evaluated.map((p) => ({
732
- policy_id: p.policy_id,
733
- policy_name: p.policy_name,
734
- matched: p.matched,
735
- action_taken: p.action_taken,
736
- })));
737
- // Check if policy denied the request
738
- if (!policyDecision.allow) {
739
- const latencyMs = Date.now() - startTime;
740
- await this.ledger.completeRun(runId, {
741
- status: 'failed',
742
- input_tokens: 0,
743
- output_tokens: 0,
744
- total_tokens: 0,
745
- cost_usd: 0,
746
- latency_ms: latencyMs,
747
- error: {
748
- code: policyDecision.approval_required ? 'approval_required' : 'policy_denied',
749
- message: policyDecision.reason ?? 'Policy denied the request',
750
- retryable: false,
751
- },
752
- });
753
- if (policyDecision.approval_required) {
754
- this.sendError(res, 403, 'approval_required', policyDecision.reason ?? 'Approval required', runId);
755
- }
756
- else {
757
- this.sendError(res, 403, 'policy_denied', policyDecision.reason ?? 'Policy denied the request', runId);
758
- }
759
- return;
760
- }
761
- // Apply any modifications from policy (e.g., model downgrade, context cap)
762
- if (policyDecision.modified_request) {
763
- if (policyDecision.modified_request.model) {
764
- request.model = policyDecision.modified_request.model;
765
- }
766
- }
767
- // Log budget warning if present
768
- if (policyDecision.budget_warning) {
769
- this.log('info', `Budget warning for ${workspaceId}: ${policyDecision.budget_warning}`);
770
- }
771
- }
772
- // Record routing decision
773
- await this.ledger.recordRouting(runId, {
774
- selected_provider: provider,
775
- selected_model: request.model,
776
- reason: 'Direct model selection by client',
777
- });
778
- // Forward to provider
779
- const providerConfig = this.config.providers?.[provider];
780
- if (!providerConfig?.apiKey) {
781
- const latencyMs = Date.now() - startTime;
782
- await this.ledger.completeRun(runId, {
783
- status: 'failed',
784
- input_tokens: 0,
785
- output_tokens: 0,
786
- total_tokens: 0,
787
- cost_usd: 0,
788
- latency_ms: latencyMs,
388
+ return;
389
+ }
390
+ // Estimate tokens (rough estimate)
391
+ const inputTokens = Math.ceil(JSON.stringify(data.messages || data).length / 4);
392
+ const expectedOutputTokens = data.max_tokens || 500;
393
+ const estimatedCost = estimateCost(model, inputTokens, expectedOutputTokens);
394
+ // Check spending limits
395
+ const config = loadConfig();
396
+ const dailyUsage = loadDailyUsage();
397
+ const monthlyUsage = loadMonthlyUsage();
398
+ if (config?.limits?.daily) {
399
+ if (dailyUsage.cost + estimatedCost > config.limits.daily) {
400
+ sendJson(res, 429, {
789
401
  error: {
790
- code: 'provider_not_configured',
791
- message: `Provider ${provider} is not configured`,
792
- retryable: false,
402
+ message: `Daily spending limit reached ($${dailyUsage.cost.toFixed(2)} / $${config.limits.daily})`,
403
+ code: 'spending_limit_exceeded',
793
404
  },
794
405
  });
795
- this.sendError(res, 500, 'provider_not_configured', `Provider ${provider} is not configured`, runId);
796
406
  return;
797
407
  }
798
- // Make provider request
799
- const providerResponse = await this.forwardToProvider(provider, request, providerConfig, runId);
800
- const latencyMs = Date.now() - startTime;
801
- if (providerResponse.success) {
802
- const costUsd = this.estimateCost(provider, providerResponse.usage);
803
- // Complete run successfully
804
- await this.ledger.completeRun(runId, {
805
- status: 'completed',
806
- input_tokens: providerResponse.usage?.prompt_tokens ?? 0,
807
- output_tokens: providerResponse.usage?.completion_tokens ?? 0,
808
- total_tokens: providerResponse.usage?.total_tokens ?? 0,
809
- cost_usd: costUsd,
810
- latency_ms: latencyMs,
811
- ttft_ms: providerResponse.ttft_ms,
812
- });
813
- // Record spend for budget tracking (Phase 2)
814
- if (this.config.enforcePolicies) {
815
- await this.policyEngine.recordSpend(workspaceId, agentId, runId, costUsd);
816
- }
817
- // Add run_id to response
818
- const responseData = providerResponse.data;
819
- const responseWithMeta = {
820
- ...responseData,
821
- relayplane: {
822
- run_id: runId,
823
- latency_ms: latencyMs,
824
- ttft_ms: providerResponse.ttft_ms,
825
- },
826
- };
827
- res.writeHead(200, { 'Content-Type': 'application/json' });
828
- res.end(JSON.stringify(responseWithMeta));
829
- }
830
- else {
831
- // Complete run with failure
832
- await this.ledger.completeRun(runId, {
833
- status: 'failed',
834
- input_tokens: 0,
835
- output_tokens: 0,
836
- total_tokens: 0,
837
- cost_usd: 0,
838
- latency_ms: latencyMs,
839
- error: {
840
- code: providerResponse.error?.code ?? 'provider_error',
841
- message: providerResponse.error?.message ?? 'Provider request failed',
842
- provider_error: providerResponse.error?.raw,
843
- retryable: providerResponse.error?.retryable ?? false,
844
- },
845
- });
846
- this.sendError(res, providerResponse.error?.status ?? 500, providerResponse.error?.code ?? 'provider_error', providerResponse.error?.message ?? 'Provider request failed', runId);
847
- }
848
408
  }
849
- catch (err) {
850
- const latencyMs = Date.now() - startTime;
851
- const errorMessage = err instanceof Error ? err.message : 'Unknown error';
852
- if (runId) {
853
- await this.ledger.completeRun(runId, {
854
- status: 'failed',
855
- input_tokens: 0,
856
- output_tokens: 0,
857
- total_tokens: 0,
858
- cost_usd: 0,
859
- latency_ms: latencyMs,
409
+ if (config?.limits?.monthly) {
410
+ if (monthlyUsage.cost + estimatedCost > config.limits.monthly) {
411
+ sendJson(res, 429, {
860
412
  error: {
861
- code: 'internal_error',
862
- message: errorMessage,
863
- retryable: false,
413
+ message: `Monthly spending limit reached ($${monthlyUsage.cost.toFixed(2)} / $${config.limits.monthly})`,
414
+ code: 'spending_limit_exceeded',
864
415
  },
865
416
  });
417
+ return;
866
418
  }
867
- this.sendError(res, 500, 'internal_error', errorMessage, runId ?? undefined);
868
419
  }
869
- }
870
- /**
871
- * Forward request to provider
872
- */
873
- async forwardToProvider(provider, request, config, runId) {
874
- const endpoint = PROVIDER_ENDPOINTS[provider];
875
- if (!endpoint) {
876
- return {
877
- success: false,
878
- error: {
879
- code: 'unknown_provider',
880
- message: `Unknown provider: ${provider}`,
881
- status: 500,
882
- retryable: false,
420
+ // Dry run mode - return estimate without making actual request
421
+ if (isDryRun) {
422
+ sendJson(res, 200, {
423
+ dry_run: true,
424
+ routing: {
425
+ model,
426
+ provider,
427
+ endpoint: `${providerConfig.baseUrl}${pathname}`,
428
+ },
429
+ estimate: {
430
+ inputTokens,
431
+ expectedOutputTokens,
432
+ estimatedCost,
433
+ currency: 'USD',
883
434
  },
884
- };
435
+ limits: {
436
+ daily: config?.limits?.daily,
437
+ dailyUsed: dailyUsage.cost,
438
+ dailyRemaining: config?.limits?.daily
439
+ ? config.limits.daily - dailyUsage.cost
440
+ : null,
441
+ monthly: config?.limits?.monthly,
442
+ monthlyUsed: monthlyUsage.cost,
443
+ monthlyRemaining: config?.limits?.monthly
444
+ ? config.limits.monthly - monthlyUsage.cost
445
+ : null,
446
+ },
447
+ });
448
+ return;
885
449
  }
886
- const baseUrl = config.baseUrl ?? endpoint.baseUrl;
887
- const url = `${baseUrl}/chat/completions`;
450
+ // Make actual request to provider
451
+ const url = new URL(pathname, providerConfig.baseUrl);
888
452
  const headers = {
889
453
  'Content-Type': 'application/json',
454
+ 'User-Agent': `relayplane-proxy/${VERSION}`,
455
+ [providerConfig.headerName]: providerConfig.headerPrefix + apiKey,
456
+ ...providerConfig.extraHeaders,
890
457
  };
891
- // Set auth header
892
- if (provider === 'anthropic') {
893
- headers['x-api-key'] = config.apiKey;
894
- headers['anthropic-version'] = '2023-06-01';
895
- }
896
- else {
897
- headers['Authorization'] = `Bearer ${config.apiKey}`;
898
- }
899
- try {
900
- const ttftStart = Date.now();
901
- const response = await fetch(url, {
902
- method: 'POST',
903
- headers,
904
- body: JSON.stringify(request),
905
- });
906
- // Record provider call
907
- await this.ledger.recordProviderCall(runId, {
908
- provider,
909
- model: request.model,
910
- attempt: 1,
911
- ttft_ms: Date.now() - ttftStart,
912
- });
913
- if (!response.ok) {
914
- const errorBody = await response.text();
915
- let parsedError;
458
+ const proxyReq = (url.protocol === 'https:' ? https : http).request(url, {
459
+ method: 'POST',
460
+ headers,
461
+ }, proxyRes => {
462
+ const chunks = [];
463
+ proxyRes.on('data', chunk => chunks.push(chunk));
464
+ proxyRes.on('end', () => {
465
+ const responseBody = Buffer.concat(chunks).toString();
466
+ const latencyMs = Date.now() - startMs;
467
+ // Parse response for usage tracking
468
+ let outputTokens = expectedOutputTokens;
469
+ let success = proxyRes.statusCode === 200;
916
470
  try {
917
- parsedError = JSON.parse(errorBody);
471
+ const respData = JSON.parse(responseBody);
472
+ if (respData.usage) {
473
+ outputTokens = respData.usage.completion_tokens || outputTokens;
474
+ }
918
475
  }
919
476
  catch {
920
- parsedError = errorBody;
477
+ // Ignore parse errors
921
478
  }
922
- return {
923
- success: false,
924
- error: {
925
- code: `provider_${response.status}`,
926
- message: `Provider returned ${response.status}`,
927
- status: response.status,
928
- retryable: response.status >= 500 || response.status === 429,
929
- raw: parsedError,
930
- },
479
+ const actualCost = estimateCost(model, inputTokens, outputTokens);
480
+ // Update stats
481
+ stats.requestsHandled++;
482
+ if (success) {
483
+ stats.requestsSuccessful++;
484
+ }
485
+ else {
486
+ stats.requestsFailed++;
487
+ }
488
+ stats.totalInputTokens += inputTokens;
489
+ stats.totalOutputTokens += outputTokens;
490
+ stats.totalCost += actualCost;
491
+ // Update model stats
492
+ const modelStats = stats.byModel.get(model) || { requests: 0, tokens: 0, cost: 0 };
493
+ modelStats.requests++;
494
+ modelStats.tokens += inputTokens + outputTokens;
495
+ modelStats.cost += actualCost;
496
+ stats.byModel.set(model, modelStats);
497
+ // Update provider stats
498
+ const providerStats = stats.byProvider.get(provider) || { requests: 0, tokens: 0, cost: 0 };
499
+ providerStats.requests++;
500
+ providerStats.tokens += inputTokens + outputTokens;
501
+ providerStats.cost += actualCost;
502
+ stats.byProvider.set(provider, providerStats);
503
+ // Update daily usage
504
+ dailyUsage.cost += actualCost;
505
+ dailyUsage.requests++;
506
+ saveDailyUsage(dailyUsage);
507
+ // Update monthly usage
508
+ monthlyUsage.cost += actualCost;
509
+ monthlyUsage.requests++;
510
+ saveMonthlyUsage(monthlyUsage);
511
+ // Log usage
512
+ logUsage({
513
+ timestamp: new Date().toISOString(),
514
+ model,
515
+ provider,
516
+ inputTokens,
517
+ outputTokens,
518
+ cost: actualCost,
519
+ latencyMs,
520
+ success,
521
+ });
522
+ // Check limits and log warnings to console
523
+ const usageWarning = checkAndWarnLimits(dailyUsage, monthlyUsage, config);
524
+ // Add usage warning header if approaching limit
525
+ const responseHeaders = {
526
+ 'Content-Type': 'application/json',
527
+ 'Access-Control-Allow-Origin': '*',
528
+ 'X-RelayPlane-Cost': actualCost.toFixed(6),
529
+ 'X-RelayPlane-Latency': latencyMs.toString(),
931
530
  };
932
- }
933
- const data = await response.json();
934
- const ttftMs = Date.now() - ttftStart;
935
- return {
936
- success: true,
937
- data,
938
- usage: data.usage,
939
- ttft_ms: ttftMs,
940
- };
941
- }
942
- catch (err) {
943
- return {
531
+ if (config?.limits?.daily) {
532
+ responseHeaders['X-RelayPlane-Daily-Usage'] = `${dailyUsage.cost.toFixed(2)}/${config.limits.daily}`;
533
+ }
534
+ if (config?.limits?.monthly) {
535
+ responseHeaders['X-RelayPlane-Monthly-Usage'] = `${monthlyUsage.cost.toFixed(2)}/${config.limits.monthly}`;
536
+ }
537
+ if (usageWarning) {
538
+ responseHeaders['X-RelayPlane-Usage-Warning'] = usageWarning;
539
+ }
540
+ res.writeHead(proxyRes.statusCode || 200, responseHeaders);
541
+ res.end(responseBody);
542
+ });
543
+ });
544
+ proxyReq.on('error', error => {
545
+ stats.requestsHandled++;
546
+ stats.requestsFailed++;
547
+ logUsage({
548
+ timestamp: new Date().toISOString(),
549
+ model,
550
+ provider,
551
+ inputTokens,
552
+ outputTokens: 0,
553
+ cost: 0,
554
+ latencyMs: Date.now() - startMs,
944
555
  success: false,
945
- error: {
946
- code: 'network_error',
947
- message: err instanceof Error ? err.message : 'Network error',
948
- status: 500,
949
- retryable: true,
950
- raw: err,
951
- },
952
- };
953
- }
954
- }
955
- /**
956
- * Detect auth type from Authorization header
957
- */
958
- detectAuthType(authHeader) {
959
- if (!authHeader)
960
- return 'api';
961
- // Consumer auth typically uses session tokens or OAuth
962
- // API auth uses API keys starting with specific prefixes
963
- if (authHeader.includes('sk-ant-') ||
964
- authHeader.includes('sk-') ||
965
- authHeader.includes('Bearer sk-')) {
966
- return 'api';
967
- }
968
- // Default to consumer if it looks like a session token
969
- if (authHeader.startsWith('Bearer ') && authHeader.length > 100) {
970
- return 'consumer';
971
- }
972
- return 'api';
973
- }
974
- /**
975
- * Estimate cost based on provider and usage
976
- */
977
- estimateCost(provider, usage) {
978
- if (!usage)
979
- return 0;
980
- // Approximate pricing per 1K tokens
981
- const pricing = {
982
- anthropic: { input: 0.003, output: 0.015 }, // Claude 3.5 Sonnet
983
- openai: { input: 0.005, output: 0.015 }, // GPT-4o
984
- openrouter: { input: 0.003, output: 0.015 }, // Varies
985
- };
986
- const rates = pricing[provider] ?? { input: 0.003, output: 0.015 };
987
- return ((usage.prompt_tokens / 1000) * rates.input + (usage.completion_tokens / 1000) * rates.output);
988
- }
989
- /**
990
- * Read request body
991
- */
992
- readBody(req) {
993
- return new Promise((resolve, reject) => {
994
- let body = '';
995
- req.on('data', (chunk) => (body += chunk));
996
- req.on('end', () => resolve(body));
997
- req.on('error', reject);
556
+ });
557
+ sendJson(res, 502, { error: { message: `Proxy error: ${error.message}` } });
998
558
  });
559
+ proxyReq.write(JSON.stringify(data));
560
+ proxyReq.end();
999
561
  }
1000
- /**
1001
- * Send error response
1002
- */
1003
- sendError(res, status, code, message, runId, guidanceUrl) {
1004
- const error = {
1005
- error: {
1006
- message,
1007
- type: 'relayplane_error',
1008
- code,
1009
- run_id: runId,
1010
- },
1011
- };
1012
- if (guidanceUrl) {
1013
- error.error['guidance_url'] = guidanceUrl;
1014
- }
1015
- res.writeHead(status, { 'Content-Type': 'application/json' });
1016
- res.end(JSON.stringify(error));
1017
- }
1018
- /**
1019
- * Log message
1020
- */
1021
- log(level, message) {
1022
- if (this.config.verbose || level === 'error') {
1023
- const timestamp = new Date().toISOString();
1024
- console.log(`[${timestamp}] [${level.toUpperCase()}] ${message}`);
1025
- }
1026
- }
1027
- /**
1028
- * Get the ledger instance (useful for testing)
1029
- */
1030
- getLedger() {
1031
- return this.ledger;
1032
- }
1033
- /**
1034
- * Get the auth gate instance (useful for testing)
1035
- */
1036
- getAuthGate() {
1037
- return this.authGate;
1038
- }
1039
- /**
1040
- * Get the policy engine instance (useful for testing and policy management)
1041
- */
1042
- getPolicyEngine() {
1043
- return this.policyEngine;
1044
- }
1045
- /**
1046
- * Get the routing engine instance (Phase 3)
1047
- */
1048
- getRoutingEngine() {
1049
- return this.routingEngine;
1050
- }
1051
- /**
1052
- * Get the capability registry instance (Phase 3)
1053
- */
1054
- getCapabilityRegistry() {
1055
- return this.capabilityRegistry;
1056
- }
1057
- /**
1058
- * Get the provider manager instance (Phase 3)
1059
- */
1060
- getProviderManager() {
1061
- return this.providerManager;
1062
- }
1063
- /**
1064
- * Get the explanation engine instance (Phase 4)
1065
- */
1066
- getExplainer() {
1067
- return this.explainer;
1068
- }
1069
- /**
1070
- * Get the run comparator instance (Phase 4)
1071
- */
1072
- getComparator() {
1073
- return this.comparator;
1074
- }
1075
- /**
1076
- * Get the simulator instance (Phase 4)
1077
- */
1078
- getSimulator() {
1079
- return this.simulator;
562
+ catch (error) {
563
+ sendJson(res, 500, { error: { message: error.message } });
1080
564
  }
1081
565
  }
1082
- exports.ProxyServer = ProxyServer;
1083
566
  /**
1084
- * Create a new proxy server
567
+ * Main request handler
1085
568
  */
1086
- function createProxyServer(config) {
1087
- return new ProxyServer(config);
569
+ function handleRequest(req, res) {
570
+ const url = new URL(req.url || '/', `http://${HOST}:${PORT}`);
571
+ const pathname = url.pathname;
572
+ // CORS preflight
573
+ if (req.method === 'OPTIONS') {
574
+ res.writeHead(204, {
575
+ 'Access-Control-Allow-Origin': '*',
576
+ 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
577
+ 'Access-Control-Allow-Headers': 'Content-Type, Authorization, X-Dry-Run',
578
+ });
579
+ res.end();
580
+ return;
581
+ }
582
+ // Health check endpoint
583
+ if (pathname === '/health' && req.method === 'GET') {
584
+ handleHealth(res);
585
+ return;
586
+ }
587
+ // Models endpoint
588
+ if (pathname === '/v1/models' && req.method === 'GET') {
589
+ handleModels(res);
590
+ return;
591
+ }
592
+ // Chat completions and other API endpoints
593
+ if (pathname.startsWith('/v1/') && req.method === 'POST') {
594
+ handleProxy(req, res, pathname);
595
+ return;
596
+ }
597
+ // Root endpoint
598
+ if (pathname === '/' && req.method === 'GET') {
599
+ sendJson(res, 200, {
600
+ name: 'RelayPlane Proxy',
601
+ version: VERSION,
602
+ status: 'ok',
603
+ endpoints: {
604
+ health: '/health',
605
+ models: '/v1/models',
606
+ chat: '/v1/chat/completions',
607
+ },
608
+ });
609
+ return;
610
+ }
611
+ // 404 for unknown routes
612
+ sendJson(res, 404, { error: { message: 'Not found' } });
1088
613
  }
614
+ // Create and start server
615
+ const server = http.createServer(handleRequest);
616
+ server.listen(PORT, HOST, () => {
617
+ console.log(`🚀 RelayPlane Proxy v${VERSION}`);
618
+ console.log(` Listening on http://${HOST}:${PORT}`);
619
+ console.log(` Health check: http://${HOST}:${PORT}/health`);
620
+ console.log(` Press Ctrl+C to stop`);
621
+ });
622
+ // Graceful shutdown
623
+ process.on('SIGTERM', () => {
624
+ console.log('\nShutting down...');
625
+ server.close(() => {
626
+ console.log('Server stopped.');
627
+ process.exit(0);
628
+ });
629
+ });
630
+ process.on('SIGINT', () => {
631
+ console.log('\nShutting down...');
632
+ server.close(() => {
633
+ console.log('Server stopped.');
634
+ process.exit(0);
635
+ });
636
+ });
1089
637
  //# sourceMappingURL=server.js.map