api-key-lb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,125 @@
1
+ # api-key-lb
2
+
3
+ Transparent API key load balancer with session-aware sticky routing. Works with any OpenAI-compatible API.
4
+
5
+ **Why?** Agentic systems (Hermes, OpenCode, Claude Code, etc.) build up context caches per API key. If you round-robin between keys, you lose cache affinity on every other request. This proxy uses sticky routing — same session always hits the same key.
6
+
7
+ ## Features
8
+
9
+ - **Session-aware sticky routing** — same session fingerprint → same key (cache-friendly)
10
+ - **Automatic 429 fallback** — throttled key triggers fallback, reverts when unthrottled
11
+ - **Works with anything** — Hermes, OpenCode/kimaki, Claude Code, curl, any OpenAI-compatible client
12
+ - **Zero-config target** — proxy is transparent, forwards any path to the target API
13
+ - **Health endpoint** — `GET /health` for monitoring
14
+ - **macOS LaunchAgent** — auto-starts on login, auto-restarts on crash
15
+
16
+ ## Quick Start
17
+
18
+ ```bash
19
+ # Install globally (or use directly from this dir)
20
+ npm install -g .
21
+
22
+ # Setup — saves config, patches known configs, installs LaunchAgent
23
+ api-key-lb setup \
24
+ --keys "sk-key1,sk-key2" \
25
+ --target "https://api.z.ai" \
26
+ --port 4577
27
+
28
+ # Check status
29
+ api-key-lb status
30
+
31
+ # Stop
32
+ api-key-lb stop
33
+ ```
34
+
35
+ ## Config
36
+
37
+ Priority: CLI flags → env vars → config file → defaults
38
+
39
+ **Config file:** `~/.config/api-key-lb/config.json`
40
+
41
+ ```json
42
+ {
43
+ "target": "https://api.z.ai",
44
+ "keys": "key1,key2",
45
+ "port": 4577,
46
+ "cooldown_ms": 60000,
47
+ "session_ttl_ms": 3600000
48
+ }
49
+ ```
50
+
51
+ **Environment variables:**
52
+
53
+ | Variable | Default | Description |
54
+ |---|---|---|
55
+ | `API_KEYS` | required | Comma-separated API keys |
56
+ | `TARGET` | `https://api.openai.com` | Target API base URL |
57
+ | `PORT` | `4577` | Proxy listen port |
58
+ | `COOLDOWN_MS` | `60000` | 429 cooldown per key |
59
+ | `SESSION_TTL_MS` | `3600000` | Session sticky TTL |
60
+ | `API_KEY_LB_CONFIG` | — | Path to config file |
61
+
62
+ ## How Sticky Routing Works
63
+
64
+ 1. Extracts a session fingerprint from the request body (session_id, conversation_id, or model+system prompt hash)
65
+ 2. Hashes the fingerprint to deterministically pick a key
66
+ 3. Same fingerprint always routes to the same key
67
+ 4. Different sessions get distributed across keys
68
+ 5. On 429: falls back to alternate key, reverts to sticky when unthrottled
69
+
70
+ ## Connecting Your Tools
71
+
72
+ Just change the base URL to point at the proxy:
73
+
74
+ **Hermes** (`~/.hermes/config.yaml`):
75
+ ```yaml
76
+ model:
77
+ base_url: http://127.0.0.1:4577/api/coding/paas/v4
78
+ ```
79
+
80
+ **OpenCode** (`~/.config/opencode/opencode.json`):
81
+ ```json
82
+ {
83
+ "provider": {
84
+ "zai": {
85
+ "options": {
86
+ "baseURL": "http://127.0.0.1:4577/api/coding/paas/v4"
87
+ }
88
+ }
89
+ }
90
+ }
91
+ ```
92
+
93
+ **Any OpenAI-compatible client:**
94
+ ```bash
95
+ curl http://127.0.0.1:4577/v1/chat/completions \
96
+ -H "Authorization: Bearer anything" \
97
+ -d '{"model":"gpt-4","messages":[...]}'
98
+ ```
99
+
100
+ The `Authorization` header gets replaced by the proxy — the key you pass doesn't matter.
101
+
102
+ ## Health Check
103
+
104
+ ```bash
105
+ curl http://127.0.0.1:4577/health
106
+ ```
107
+
108
+ Returns per-key stats: requests, errors, cache hits, throttle status, active sessions.
109
+
110
+ ## Architecture
111
+
112
+ ```
113
+ ┌─────────┐ ┌──────────────────┐ ┌──────────┐
114
+ │ Client │────▶│ api-key-lb proxy │────▶│ API │
115
+ │ (Hermes) │ │ :4577 │ │ (z.ai) │
116
+ │ (OpenCode)│ │ sticky routing │ │ │
117
+ │ (curl) │ │ 429 fallback │ │ │
118
+ └─────────┘ └──────────────────┘ └──────────┘
119
+ ```
120
+
121
+ The proxy is fully transparent — it forwards whatever path/headers the client sends, only replacing the `Authorization` bearer token and `Host` header.
122
+
123
+ ## License
124
+
125
+ MIT
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "api-key-lb",
3
+ "version": "1.0.0",
4
+ "description": "Transparent API key load balancer with session-aware sticky routing. Works with any OpenAI-compatible API provider.",
5
+ "main": "src/proxy.mjs",
6
+ "bin": {
7
+ "api-key-lb": "./src/cli.mjs"
8
+ },
9
+ "files": [
10
+ "src/",
11
+ "README.md"
12
+ ],
13
+ "scripts": {
14
+ "start": "node src/proxy.mjs",
15
+ "setup": "node src/cli.mjs setup",
16
+ "status": "node src/cli.mjs status"
17
+ },
18
+ "keywords": ["proxy", "load-balancer", "api-keys", "openai", "openrouter", "cache-affinity", "rate-limit", "sticky-routing"],
19
+ "author": "jairodri",
20
+ "license": "MIT",
21
+ "engines": {
22
+ "node": ">=18"
23
+ },
24
+ "repository": {
25
+ "type": "git",
26
+ "url": "https://github.com/jairodri/api-key-lb"
27
+ }
28
+ }
package/src/cli.mjs ADDED
@@ -0,0 +1,299 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * api-key-lb CLI — setup, status, and config management
4
+ */
5
+
6
+ import fs from 'node:fs';
7
+ import path from 'node:path';
8
+ import os from 'node:os';
9
+ import { execSync } from 'node:child_process';
10
+ import { fileURLToPath } from 'node:url';
11
+
12
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
13
+ const command = process.argv[2] || 'help';
14
+ const args = process.argv.slice(3);
15
+
16
+ const HOME = os.homedir();
17
+ const CONFIG_DIR = path.join(HOME, '.config', 'api-key-lb');
18
+ const CONFIG_PATH = path.join(CONFIG_DIR, 'config.json');
19
+ const DEFAULT_PORT = 4577;
20
+
21
+ // ─── Commands ──────────────────────────────────────────────────────────
22
+
23
+ function cmdSetup() {
24
+ console.log('🔧 api-key-lb setup\n');
25
+
26
+ // 1. Ensure config dir
27
+ fs.mkdirSync(CONFIG_DIR, { recursive: true });
28
+
29
+ // 2. Load or create config
30
+ let cfg = {};
31
+ if (fs.existsSync(CONFIG_PATH)) {
32
+ cfg = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
33
+ console.log(` Found existing config at ${CONFIG_PATH}`);
34
+ } else {
35
+ console.log(` Creating config at ${CONFIG_PATH}`);
36
+ }
37
+
38
+ // Parse CLI args
39
+ const keysArg = findArg('--keys') || findArg('-k');
40
+ const targetArg = findArg('--target') || findArg('-t');
41
+ const portArg = findArg('--port') || findArg('-p');
42
+
43
+ if (keysArg) cfg.keys = keysArg;
44
+ if (targetArg) cfg.target = targetArg;
45
+ if (portArg) cfg.port = parseInt(portArg, 10);
46
+
47
+ cfg.port = cfg.port || DEFAULT_PORT;
48
+
49
+ if (!cfg.keys) {
50
+ console.error(' ERROR: --keys required. Usage: api-key-lb setup --keys key1,key2 [--target URL] [--port PORT]');
51
+ process.exit(1);
52
+ }
53
+ if (!cfg.target) {
54
+ console.error(' ERROR: --target required. Usage: api-key-lb setup --keys key1,key2 --target https://api.z.ai');
55
+ process.exit(1);
56
+ }
57
+
58
+ // Normalize keys to array
59
+ if (typeof cfg.keys === 'string') cfg.keys = cfg.keys.split(',').map(k => k.trim());
60
+
61
+ // 3. Write config
62
+ fs.writeFileSync(CONFIG_PATH, JSON.stringify(cfg, null, 2));
63
+ console.log(` ✅ Config saved (${cfg.keys.length} keys, target: ${cfg.target})`);
64
+
65
+ // 4. Patch agentic system configs
66
+ patchHermes(cfg);
67
+ patchOpenCode(cfg);
68
+
69
+ // 5. Install LaunchAgent (macOS)
70
+ installLaunchAgent(cfg);
71
+
72
+ console.log('\n ✅ Setup complete. Start with: api-key-lb start');
73
+ }
74
+
75
+ function cmdStatus() {
76
+ const port = getPort();
77
+ try {
78
+ const result = execSync(`curl -s http://127.0.0.1:${port}/health`, { timeout: 3000 }).toString();
79
+ const health = JSON.parse(result);
80
+ console.log('📊 api-key-lb status\n');
81
+ console.log(` Status: ${health.status}`);
82
+ console.log(` Target: ${health.target}`);
83
+ console.log(` Sessions: ${health.sessions}`);
84
+ console.log(` Uptime: ${(health.uptime_ms / 1000 / 60).toFixed(1)} min\n`);
85
+ for (const key of health.keys) {
86
+ console.log(` Key #${key.id} (${key.key}...)`);
87
+ console.log(` Requests: ${key.requests} Errors: ${key.errors} Cache hits: ${key.cache_hits} Status: ${key.status}`);
88
+ }
89
+ } catch {
90
+ console.log('❌ Proxy not running on port ' + port);
91
+ console.log(' Start with: api-key-lb start');
92
+ }
93
+ }
94
+
95
+ function cmdStart() {
96
+ const port = getPort();
97
+ // Check if already running
98
+ try {
99
+ const result = execSync(`curl -s http://127.0.0.1:${port}/health`, { timeout: 2000 }).toString();
100
+ const health = JSON.parse(result);
101
+ console.log(`✅ Already running on port ${port} (${health.keys.length} keys, ${health.sessions} sessions)`);
102
+ return;
103
+ } catch {
104
+ // Not running — start it
105
+ }
106
+
107
+ const env = {
108
+ ...process.env,
109
+ API_KEY_LB_CONFIG: CONFIG_PATH,
110
+ };
111
+
112
+ console.log(`Starting api-key-lb on port ${port}...`);
113
+ import(path.join(__dirname, 'proxy.mjs'));
114
+ }
115
+
116
+ function cmdStop() {
117
+ const port = getPort();
118
+ try {
119
+ // Find the proxy process
120
+ const pid = execSync(`lsof -ti:${port} -sTCP:LISTEN`, { encoding: 'utf8' }).trim();
121
+ if (pid) {
122
+ process.kill(parseInt(pid, 10), 'SIGTERM');
123
+ console.log(`✅ Stopped proxy (PID ${pid})`);
124
+ }
125
+ } catch {
126
+ console.log('Proxy not running');
127
+ }
128
+ }
129
+
130
+ function cmdHelp() {
131
+ console.log(`
132
+ api-key-lb — Transparent API Key Load Balancer
133
+
134
+ Usage:
135
+ api-key-lb setup --keys key1,key2 --target https://api.z.ai [--port 4577]
136
+ api-key-lb start Start the proxy (or show status if running)
137
+ api-key-lb stop Stop the proxy
138
+ api-key-lb status Show proxy health and key stats
139
+
140
+ Setup options:
141
+ -k, --keys Comma-separated API keys
142
+ -t, --target Target API base URL (e.g. https://api.z.ai, https://openrouter.ai/api/v1)
143
+ -p, --port Proxy port (default: 4577)
144
+
145
+ Config file: ~/.config/api-key-lb/config.json
146
+ LaunchAgent: ~/Library/LaunchAgents/com.api-key-lb.plist (macOS)
147
+
148
+ Environment variables (override config):
149
+ API_KEYS=key1,key2 API keys to balance
150
+ TARGET=https://... Target API URL
151
+ PORT=4577 Proxy port
152
+ COOLDOWN_MS=60000 429 cooldown in ms
153
+ SESSION_TTL_MS=3600000 Session sticky TTL in ms
154
+ `);
155
+ }
156
+
157
+ // ─── Config Patching ───────────────────────────────────────────────────
158
+
159
+ function patchHermes(cfg) {
160
+ const hermesConfig = path.join(HOME, '.hermes', 'config.yaml');
161
+ if (!fs.existsSync(hermesConfig)) {
162
+ console.log(' ⏭ Hermes config not found — skipping');
163
+ return;
164
+ }
165
+
166
+ let content = fs.readFileSync(hermesConfig, 'utf8');
167
+ const proxyUrl = `http://127.0.0.1:${cfg.port}`;
168
+
169
+ // Find lines like: base_url: https://api.z.ai/...
170
+ // that match the target domain and replace with proxy
171
+ try {
172
+ const targetHost = new URL(cfg.target).hostname;
173
+ const regex = new RegExp(`(base_url:\\s*)https?://${targetHost.replace(/\./g, '\\.')}`, 'g');
174
+ if (regex.test(content)) {
175
+ content = content.replace(regex, `$1${proxyUrl}`);
176
+ fs.writeFileSync(hermesConfig, content);
177
+ console.log(` ✅ Patched Hermes config → ${proxyUrl}`);
178
+ } else {
179
+ console.log(` ⏭ Hermes config doesn't reference ${targetHost} — skipping`);
180
+ }
181
+ } catch (e) {
182
+ console.log(` ⚠️ Could not patch Hermes: ${e.message}`);
183
+ }
184
+ }
185
+
186
+ function patchOpenCode(cfg) {
187
+ const opencodeConfig = path.join(HOME, '.config', 'opencode', 'opencode.json');
188
+ if (!fs.existsSync(opencodeConfig)) {
189
+ console.log(' ⏭ OpenCode config not found — skipping');
190
+ return;
191
+ }
192
+
193
+ try {
194
+ const content = JSON.parse(fs.readFileSync(opencodeConfig, 'utf8'));
195
+ const proxyUrl = `http://127.0.0.1:${cfg.port}`;
196
+ const targetHost = new URL(cfg.target).hostname;
197
+ let patched = false;
198
+
199
+ // Walk all providers — find any whose baseURL targets our API host
200
+ if (content.provider) {
201
+ for (const [name, provider] of Object.entries(content.provider)) {
202
+ if (provider.options?.baseURL?.includes(targetHost) && !provider.options.baseURL.includes('127.0.0.1')) {
203
+ // Replace the target host with proxy, keeping the path
204
+ const originalPath = new URL(provider.options.baseURL).pathname;
205
+ provider.options.baseURL = `${proxyUrl}${originalPath}`;
206
+ console.log(` ✅ Patched OpenCode provider "${name}" → ${provider.options.baseURL}`);
207
+ patched = true;
208
+ }
209
+ }
210
+ }
211
+
212
+ if (patched) {
213
+ fs.writeFileSync(opencodeConfig, JSON.stringify(content, null, 2));
214
+ } else {
215
+ console.log(` ⏭ OpenCode config doesn't reference ${targetHost} directly — skipping`);
216
+ }
217
+ } catch (e) {
218
+ console.log(` ⚠️ Could not patch OpenCode: ${e.message}`);
219
+ }
220
+ }
221
+
222
+ function installLaunchAgent(cfg) {
223
+ if (process.platform !== 'darwin') {
224
+ console.log(' ⏭ LaunchAgent only supported on macOS — skipping');
225
+ return;
226
+ }
227
+
228
+ const plistPath = path.join(HOME, 'Library', 'LaunchAgents', 'com.api-key-lb.plist');
229
+ const proxyPath = path.resolve(__dirname, 'proxy.mjs');
230
+ const logPath = path.join(CONFIG_DIR, 'proxy.log');
231
+
232
+ const plist = `<?xml version="1.0" encoding="UTF-8"?>
233
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
234
+ <plist version="1.0">
235
+ <dict>
236
+ <key>Label</key>
237
+ <string>com.api-key-lb</string>
238
+ <key>ProgramArguments</key>
239
+ <array>
240
+ <string>${process.execPath}</string>
241
+ <string>${proxyPath}</string>
242
+ </array>
243
+ <key>EnvironmentVariables</key>
244
+ <dict>
245
+ <key>API_KEY_LB_CONFIG</key>
246
+ <string>${CONFIG_PATH}</string>
247
+ </dict>
248
+ <key>RunAtLoad</key>
249
+ <true/>
250
+ <key>KeepAlive</key>
251
+ <true/>
252
+ <key>StandardOutPath</key>
253
+ <string>${logPath}</string>
254
+ <key>StandardErrorPath</key>
255
+ <string>${logPath}</string>
256
+ </dict>
257
+ </plist>`;
258
+
259
+ fs.writeFileSync(plistPath, plist);
260
+ console.log(` ✅ LaunchAgent installed at ${plistPath}`);
261
+
262
+ // Unload old if exists, load new
263
+ try {
264
+ execSync(`launchctl unload ${plistPath} 2>/dev/null`, { stdio: 'pipe' });
265
+ } catch {}
266
+ try {
267
+ execSync(`launchctl load ${plistPath}`);
268
+ console.log(' ✅ LaunchAgent loaded (starts on login)');
269
+ } catch (e) {
270
+ console.log(` ⚠️ Could not load LaunchAgent: ${e.message}`);
271
+ console.log(` Run manually: launchctl load ${plistPath}`);
272
+ }
273
+ }
274
+
275
+ // ─── Helpers ───────────────────────────────────────────────────────────
276
+
277
+ function findArg(flag) {
278
+ const idx = args.indexOf(flag);
279
+ return idx !== -1 && args[idx + 1] ? args[idx + 1] : null;
280
+ }
281
+
282
+ function getPort() {
283
+ if (fs.existsSync(CONFIG_PATH)) {
284
+ try {
285
+ return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8')).port || DEFAULT_PORT;
286
+ } catch {}
287
+ }
288
+ return parseInt(process.env.PORT, 10) || DEFAULT_PORT;
289
+ }
290
+
291
+ // ─── Dispatch ──────────────────────────────────────────────────────────
292
+ switch (command) {
293
+ case 'setup': cmdSetup(); break;
294
+ case 'start': cmdStart(); break;
295
+ case 'stop': cmdStop(); break;
296
+ case 'status': cmdStatus(); break;
297
+ case 'help': case '--help': case '-h': cmdHelp(); break;
298
+ default: console.log(`Unknown command: ${command}\n`); cmdHelp(); break;
299
+ }
package/src/proxy.mjs ADDED
@@ -0,0 +1,334 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * api-key-lb — Transparent API Key Load Balancer
4
+ *
5
+ * Sits between any agentic system and an OpenAI-compatible API.
6
+ * Routes requests with session-aware sticky routing for context cache affinity.
7
+ * Falls back to alternate keys on 429s, then reverts to sticky key.
8
+ *
9
+ * Features:
10
+ * - Works with any OpenAI-compatible API (z.ai, OpenRouter, OpenAI, etc.)
11
+ * - Session-aware sticky routing (hashes request fingerprint → same key)
12
+ * - Automatic 429 fallback with configurable cooldown
13
+ * - Zero-config: point your client at the proxy, it handles the rest
14
+ * - Health endpoint for monitoring
15
+ *
16
+ * Usage:
17
+ * ZAI_KEYS=key1,key2 TARGET=https://api.z.ai PORT=4577 node proxy.mjs
18
+ *
19
+ * Or via config file:
20
+ * node proxy.mjs --config ./api-key-lb.json
21
+ */
22
+
23
+ import http from 'node:http';
24
+ import https from 'node:https';
25
+ import crypto from 'node:crypto';
26
+ import fs from 'node:fs';
27
+ import path from 'node:path';
28
+ import { URL } from 'node:url';
29
+ import { fileURLToPath } from 'node:url';
30
+
31
+ // ─── Config ────────────────────────────────────────────────────────────
32
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
33
+
34
+ function loadConfig() {
35
+ // 1. CLI --config flag
36
+ const configIdx = process.argv.indexOf('--config');
37
+ if (configIdx !== -1 && process.argv[configIdx + 1]) {
38
+ const configPath = path.resolve(process.argv[configIdx + 1]);
39
+ return JSON.parse(fs.readFileSync(configPath, 'utf8'));
40
+ }
41
+
42
+ // 2. ENV-based config file path
43
+ if (process.env.API_KEY_LB_CONFIG) {
44
+ return JSON.parse(fs.readFileSync(process.env.API_KEY_LB_CONFIG, 'utf8'));
45
+ }
46
+
47
+ // 3. Look for config in common locations
48
+ const searchPaths = [
49
+ path.join(process.cwd(), 'api-key-lb.json'),
50
+ path.join(__dirname, '..', 'api-key-lb.json'),
51
+ path.join(process.env.HOME || '~', '.config', 'api-key-lb', 'config.json'),
52
+ ];
53
+ for (const p of searchPaths) {
54
+ if (fs.existsSync(p)) {
55
+ return JSON.parse(fs.readFileSync(p, 'utf8'));
56
+ }
57
+ }
58
+
59
+ // 4. Pure env vars
60
+ return {};
61
+ }
62
+
63
+ function resolveConfig(fileConfig) {
64
+ const env = (key, fileKey, defaultVal) => {
65
+ if (process.env[key]) return process.env[key];
66
+ if (fileConfig[fileKey] !== undefined) return String(fileConfig[fileKey]);
67
+ return defaultVal;
68
+ };
69
+
70
+ const rawKeys = process.env.API_KEYS || fileConfig.keys || '';
71
+ const keys = (Array.isArray(rawKeys) ? rawKeys : rawKeys.split(',')).map(k => k.trim()).filter(Boolean);
72
+ const target = env('TARGET', 'target', 'https://api.openai.com');
73
+ const port = parseInt(env('PORT', 'port', '4577'), 10);
74
+ const cooldownMs = parseInt(env('COOLDOWN_MS', 'cooldown_ms', '60000'), 10);
75
+ const sessionTTL = parseInt(env('SESSION_TTL_MS', 'session_ttl_ms', '3600000'), 10);
76
+
77
+ return { keys, target, port, cooldownMs, sessionTTL };
78
+ }
79
+
80
+ const fileConfig = loadConfig();
81
+ const config = resolveConfig(fileConfig);
82
+
83
+ if (config.keys.length === 0) {
84
+ console.error('ERROR: No API keys. Set API_KEYS=key1,key2,... or use a config file.');
85
+ console.error(' See: api-key-lb --help');
86
+ process.exit(1);
87
+ }
88
+
89
+ // ─── Key State ─────────────────────────────────────────────────────────
90
+ const keyState = config.keys.map((key, i) => ({
91
+ id: i,
92
+ key,
93
+ throttledUntil: 0,
94
+ requestCount: 0,
95
+ errorCount: 0,
96
+ cacheHits: 0,
97
+ }));
98
+
99
+ // ─── Session Tracking ──────────────────────────────────────────────────
100
+ const sessionKeyMap = new Map();
101
+ const SESSION_MAX = 2000;
102
+
103
+ function hashToKeyIndex(str) {
104
+ const hash = crypto.createHash('sha256').update(str).digest();
105
+ return hash[0] % config.keys.length;
106
+ }
107
+
108
+ function extractSessionId(body) {
109
+ if (!body || body.length === 0) return null;
110
+ try {
111
+ const json = JSON.parse(body.toString('utf8'));
112
+
113
+ // 1. Explicit session identifiers (OpenCode, custom agents)
114
+ const candidates = [
115
+ json.session_id,
116
+ json.sessionId,
117
+ json.conversation_id,
118
+ json.conversationId,
119
+ json.thread_id,
120
+ ];
121
+ for (const c of candidates) {
122
+ if (c && typeof c === 'string') return c;
123
+ }
124
+
125
+ // 2. Check messages metadata
126
+ const msgs = json.messages || [];
127
+ for (const msg of msgs) {
128
+ if (msg.custom_fields?.session_id) return msg.custom_fields.session_id;
129
+ if (msg.metadata?.session_id) return msg.metadata.session_id;
130
+ }
131
+
132
+ // 3. Fingerprint: model + system prompt prefix (stable across a session)
133
+ if (msgs.length > 0) {
134
+ const systemMsg = msgs.find(m => m.role === 'system');
135
+ if (systemMsg) {
136
+ return `${json.model || ''}:${systemMsg.content.slice(0, 500)}`;
137
+ }
138
+ }
139
+ } catch {
140
+ // Not JSON — ignore
141
+ }
142
+ return null;
143
+ }
144
+
145
+ function pruneSessions() {
146
+ if (sessionKeyMap.size <= SESSION_MAX) return;
147
+ const entries = [...sessionKeyMap.entries()];
148
+ sessionKeyMap.clear();
149
+ // Keep most recent half
150
+ entries.slice(-Math.floor(SESSION_MAX / 2)).forEach(([k, v]) => sessionKeyMap.set(k, v));
151
+ }
152
+
153
+ function getKeyForSession(sessionId) {
154
+ let stickyIndex;
155
+ if (sessionId && sessionKeyMap.has(sessionId)) {
156
+ stickyIndex = sessionKeyMap.get(sessionId);
157
+ } else if (sessionId) {
158
+ stickyIndex = hashToKeyIndex(sessionId);
159
+ sessionKeyMap.set(sessionId, stickyIndex);
160
+ pruneSessions();
161
+ } else {
162
+ stickyIndex = -1;
163
+ }
164
+
165
+ const now = Date.now();
166
+
167
+ // Try sticky key first
168
+ if (stickyIndex >= 0) {
169
+ const sticky = keyState[stickyIndex];
170
+ if (sticky.throttledUntil <= now) {
171
+ return { state: sticky, isSticky: true };
172
+ }
173
+ console.log(`[STICKY] Key #${sticky.id} throttled, trying alternate for session ${sessionId?.slice(0, 30)}...`);
174
+ }
175
+
176
+ // Try all keys, prefer unthrottled
177
+ for (let i = 0; i < config.keys.length; i++) {
178
+ if (keyState[i].throttledUntil <= now) {
179
+ return { state: keyState[i], isSticky: false };
180
+ }
181
+ }
182
+
183
+ // All throttled — pick soonest unlock
184
+ const soonest = keyState.reduce((best, s) =>
185
+ s.throttledUntil < best.throttledUntil ? s : best
186
+ );
187
+ return { state: soonest, isSticky: false };
188
+ }
189
+
190
+ function markThrottled(state, retryAfterMs) {
191
+ state.throttledUntil = Date.now() + (retryAfterMs || config.cooldownMs);
192
+ state.errorCount++;
193
+ console.log(`[THROTTLE] Key #${state.id} throttled until ${new Date(state.throttledUntil).toISOString()}`);
194
+ }
195
+
196
+ // ─── Request Handler ───────────────────────────────────────────────────
197
+ function handleRequest(req, res) {
198
+ const chunks = [];
199
+ req.on('data', chunk => chunks.push(chunk));
200
+ req.on('end', () => {
201
+ const body = Buffer.concat(chunks);
202
+ const sessionId = extractSessionId(body);
203
+ const { state: initialState, isSticky } = getKeyForSession(sessionId);
204
+ const attempts = [];
205
+
206
+ function tryRequest(ks, wasSticky) {
207
+ attempts.push(ks.id);
208
+ ks.requestCount++;
209
+
210
+ if (sessionId && attempts.length === 1) {
211
+ console.log(`[ROUTE] session=${sessionId.slice(0, 30)}... → key #${ks.id} (${wasSticky ? 'sticky' : 'fallback'})`);
212
+ }
213
+
214
+ const targetUrl = new URL(req.url, config.target);
215
+ const headers = { ...req.headers };
216
+ headers['authorization'] = `Bearer ${ks.key}`;
217
+ headers['host'] = targetUrl.host;
218
+ delete headers['connection'];
219
+
220
+ const options = {
221
+ hostname: targetUrl.hostname,
222
+ port: targetUrl.port || 443,
223
+ path: targetUrl.pathname + targetUrl.search,
224
+ method: req.method,
225
+ headers,
226
+ };
227
+
228
+ const proxyReq = https.request(options, (proxyRes) => {
229
+ if (proxyRes.statusCode === 429) {
230
+ const retryAfter = parseInt(proxyRes.headers['retry-after'] || '0', 10);
231
+ const retryAfterMs = retryAfter > 0 ? retryAfter * 1000 : config.cooldownMs;
232
+ markThrottled(ks, retryAfterMs);
233
+
234
+ const drainChunks = [];
235
+ proxyRes.on('data', c => drainChunks.push(c));
236
+ proxyRes.on('end', () => {
237
+ if (attempts.length < config.keys.length) {
238
+ const { state: nextKey } = getKeyForSession(sessionId);
239
+ console.log(`[RETRY] 429 on key #${ks.id}, falling back to key #${nextKey.id}`);
240
+ tryRequest(nextKey, false);
241
+ } else {
242
+ console.log(`[FAIL] All keys exhausted for ${req.method} ${req.url}`);
243
+ res.writeHead(proxyRes.statusCode, proxyRes.headers);
244
+ res.end(Buffer.concat(drainChunks));
245
+ }
246
+ });
247
+ return;
248
+ }
249
+
250
+ // Detect cache hits
251
+ const cacheHeader = proxyRes.headers['x-cache'] || proxyRes.headers['x-cached'];
252
+ if (cacheHeader === 'HIT' || cacheHeader === 'hit') {
253
+ ks.cacheHits++;
254
+ }
255
+
256
+ if (proxyRes.statusCode >= 500) {
257
+ console.log(`[ERROR] Key #${ks.id} got ${proxyRes.statusCode} for ${req.method} ${req.url}`);
258
+ } else if (attempts.length > 1) {
259
+ console.log(`[OK] Key #${ks.id} succeeded after ${attempts.length} attempts`);
260
+ }
261
+
262
+ res.writeHead(proxyRes.statusCode, proxyRes.headers);
263
+ proxyRes.pipe(res);
264
+ });
265
+
266
+ proxyReq.on('error', (err) => {
267
+ console.error(`[ERROR] Key #${ks.id} request failed:`, err.message);
268
+ if (attempts.length < config.keys.length) {
269
+ const { state: nextKey } = getKeyForSession(sessionId);
270
+ tryRequest(nextKey, false);
271
+ } else {
272
+ res.writeHead(502, { 'content-type': 'application/json' });
273
+ res.end(JSON.stringify({ error: 'proxy_error', message: err.message }));
274
+ }
275
+ });
276
+
277
+ if (body.length > 0) proxyReq.write(body);
278
+ proxyReq.end();
279
+ }
280
+
281
+ tryRequest(initialState, isSticky);
282
+ });
283
+ }
284
+
285
+ // ─── Health Endpoint ───────────────────────────────────────────────────
286
+ function handleHealth(req, res) {
287
+ const now = Date.now();
288
+ const info = keyState.map(s => ({
289
+ id: s.id,
290
+ key: s.key.slice(0, 8) + '...',
291
+ requests: s.requestCount,
292
+ errors: s.errorCount,
293
+ cache_hits: s.cacheHits,
294
+ status: s.throttledUntil > now ? 'throttled' : 'ready',
295
+ }));
296
+ res.writeHead(200, { 'content-type': 'application/json' });
297
+ res.end(JSON.stringify({
298
+ status: 'ok',
299
+ target: config.target.replace(/\/\/[^@]+@/, '//***@'),
300
+ keys: info,
301
+ sessions: sessionKeyMap.size,
302
+ uptime_ms: process.uptime() * 1000 | 0,
303
+ }, null, 2));
304
+ }
305
+
306
+ // ─── Server ────────────────────────────────────────────────────────────
307
+ const server = http.createServer((req, res) => {
308
+ if (req.url === '/health' && req.method === 'GET') {
309
+ return handleHealth(req, res);
310
+ }
311
+ handleRequest(req, res);
312
+ });
313
+
314
+ server.listen(config.port, '127.0.0.1', () => {
315
+ console.log(`[START] api-key-lb proxy listening on http://127.0.0.1:${config.port}`);
316
+ console.log(`[START] ${config.keys.length} key(s) loaded`);
317
+ console.log(`[START] Target: ${config.target}`);
318
+ console.log(`[START] Routing: session-aware sticky`);
319
+ console.log(`[START] Cooldown: ${config.cooldownMs}ms`);
320
+ console.log(`[START] Health: http://127.0.0.1:${config.port}/health`);
321
+ });
322
+
323
+ // Status log every 5 minutes
324
+ setInterval(() => {
325
+ const now = Date.now();
326
+ const status = keyState.map(s => ({
327
+ id: s.id,
328
+ reqs: s.requestCount,
329
+ errs: s.errorCount,
330
+ cache: s.cacheHits,
331
+ status: s.throttledUntil > now ? `throttled ${((s.throttledUntil - now) / 1000)|0}s` : 'ready',
332
+ }));
333
+ console.log(`[STATUS] ${JSON.stringify(status)} sessions=${sessionKeyMap.size}`);
334
+ }, 300000);