api-key-lb 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -0
- package/package.json +28 -0
- package/src/cli.mjs +299 -0
- package/src/proxy.mjs +334 -0
package/README.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# api-key-lb
|
|
2
|
+
|
|
3
|
+
Transparent API key load balancer with session-aware sticky routing. Works with any OpenAI-compatible API.
|
|
4
|
+
|
|
5
|
+
**Why?** Agentic systems (Hermes, OpenCode, Claude Code, etc.) build up context caches per API key. If you round-robin between keys, you lose cache affinity on every other request. This proxy uses sticky routing — same session always hits the same key.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Session-aware sticky routing** — same session fingerprint → same key (cache-friendly)
|
|
10
|
+
- **Automatic 429 fallback** — throttled key triggers fallback, reverts when unthrottled
|
|
11
|
+
- **Works with anything** — Hermes, OpenCode/kimaki, Claude Code, curl, any OpenAI-compatible client
|
|
12
|
+
- **Zero-config target** — proxy is transparent, forwards any path to the target API
|
|
13
|
+
- **Health endpoint** — `GET /health` for monitoring
|
|
14
|
+
- **macOS LaunchAgent** — auto-starts on login, auto-restarts on crash
|
|
15
|
+
|
|
16
|
+
## Quick Start
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Install globally (or use directly from this dir)
|
|
20
|
+
npm install -g .
|
|
21
|
+
|
|
22
|
+
# Setup — saves config, patches known configs, installs LaunchAgent
|
|
23
|
+
api-key-lb setup \
|
|
24
|
+
--keys "sk-key1,sk-key2" \
|
|
25
|
+
--target "https://api.z.ai" \
|
|
26
|
+
--port 4577
|
|
27
|
+
|
|
28
|
+
# Check status
|
|
29
|
+
api-key-lb status
|
|
30
|
+
|
|
31
|
+
# Stop
|
|
32
|
+
api-key-lb stop
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Config
|
|
36
|
+
|
|
37
|
+
Priority: CLI flags → env vars → config file → defaults
|
|
38
|
+
|
|
39
|
+
**Config file:** `~/.config/api-key-lb/config.json`
|
|
40
|
+
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"target": "https://api.z.ai",
|
|
44
|
+
"keys": "key1,key2",
|
|
45
|
+
"port": 4577,
|
|
46
|
+
"cooldown_ms": 60000,
|
|
47
|
+
"session_ttl_ms": 3600000
|
|
48
|
+
}
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
**Environment variables:**
|
|
52
|
+
|
|
53
|
+
| Variable | Default | Description |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| `API_KEYS` | required | Comma-separated API keys |
|
|
56
|
+
| `TARGET` | `https://api.openai.com` | Target API base URL |
|
|
57
|
+
| `PORT` | `4577` | Proxy listen port |
|
|
58
|
+
| `COOLDOWN_MS` | `60000` | 429 cooldown per key |
|
|
59
|
+
| `SESSION_TTL_MS` | `3600000` | Session sticky TTL |
|
|
60
|
+
| `API_KEY_LB_CONFIG` | — | Path to config file |
|
|
61
|
+
|
|
62
|
+
## How Sticky Routing Works
|
|
63
|
+
|
|
64
|
+
1. Extracts a session fingerprint from the request body (session_id, conversation_id, or model+system prompt hash)
|
|
65
|
+
2. Hashes the fingerprint to deterministically pick a key
|
|
66
|
+
3. Same fingerprint always routes to the same key
|
|
67
|
+
4. Different sessions get distributed across keys
|
|
68
|
+
5. On 429: falls back to alternate key, reverts to sticky when unthrottled
|
|
69
|
+
|
|
70
|
+
## Connecting Your Tools
|
|
71
|
+
|
|
72
|
+
Just change the base URL to point at the proxy:
|
|
73
|
+
|
|
74
|
+
**Hermes** (`~/.hermes/config.yaml`):
|
|
75
|
+
```yaml
|
|
76
|
+
model:
|
|
77
|
+
base_url: http://127.0.0.1:4577/api/coding/paas/v4
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**OpenCode** (`~/.config/opencode/opencode.json`):
|
|
81
|
+
```json
|
|
82
|
+
{
|
|
83
|
+
"provider": {
|
|
84
|
+
"zai": {
|
|
85
|
+
"options": {
|
|
86
|
+
"baseURL": "http://127.0.0.1:4577/api/coding/paas/v4"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Any OpenAI-compatible client:**
|
|
94
|
+
```bash
|
|
95
|
+
curl http://127.0.0.1:4577/v1/chat/completions \
|
|
96
|
+
-H "Authorization: Bearer anything" \
|
|
97
|
+
-d '{"model":"gpt-4","messages":[...]}'
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
The `Authorization` header gets replaced by the proxy — the key you pass doesn't matter.
|
|
101
|
+
|
|
102
|
+
## Health Check
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
curl http://127.0.0.1:4577/health
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Returns per-key stats: requests, errors, cache hits, throttle status, active sessions.
|
|
109
|
+
|
|
110
|
+
## Architecture
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
┌─────────┐ ┌──────────────────┐ ┌──────────┐
|
|
114
|
+
│ Client │────▶│ api-key-lb proxy │────▶│ API │
|
|
115
|
+
│ (Hermes) │ │ :4577 │ │ (z.ai) │
|
|
116
|
+
│ (OpenCode)│ │ sticky routing │ │ │
|
|
117
|
+
│ (curl) │ │ 429 fallback │ │ │
|
|
118
|
+
└─────────┘ └──────────────────┘ └──────────┘
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
The proxy is fully transparent — it forwards whatever path/headers the client sends, only replacing the `Authorization` bearer token and `Host` header.
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "api-key-lb",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Transparent API key load balancer with session-aware sticky routing. Works with any OpenAI-compatible API provider.",
|
|
5
|
+
"main": "src/proxy.mjs",
|
|
6
|
+
"bin": {
|
|
7
|
+
"api-key-lb": "./src/cli.mjs"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"src/",
|
|
11
|
+
"README.md"
|
|
12
|
+
],
|
|
13
|
+
"scripts": {
|
|
14
|
+
"start": "node src/proxy.mjs",
|
|
15
|
+
"setup": "node src/cli.mjs setup",
|
|
16
|
+
"status": "node src/cli.mjs status"
|
|
17
|
+
},
|
|
18
|
+
"keywords": ["proxy", "load-balancer", "api-keys", "openai", "openrouter", "cache-affinity", "rate-limit", "sticky-routing"],
|
|
19
|
+
"author": "jairodri",
|
|
20
|
+
"license": "MIT",
|
|
21
|
+
"engines": {
|
|
22
|
+
"node": ">=18"
|
|
23
|
+
},
|
|
24
|
+
"repository": {
|
|
25
|
+
"type": "git",
|
|
26
|
+
"url": "https://github.com/jairodri/api-key-lb"
|
|
27
|
+
}
|
|
28
|
+
}
|
package/src/cli.mjs
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* api-key-lb CLI — setup, status, and config management
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import fs from 'node:fs';
|
|
7
|
+
import path from 'node:path';
|
|
8
|
+
import os from 'node:os';
|
|
9
|
+
import { execSync } from 'node:child_process';
|
|
10
|
+
import { fileURLToPath } from 'node:url';
|
|
11
|
+
|
|
12
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const command = process.argv[2] || 'help';
|
|
14
|
+
const args = process.argv.slice(3);
|
|
15
|
+
|
|
16
|
+
const HOME = os.homedir();
|
|
17
|
+
const CONFIG_DIR = path.join(HOME, '.config', 'api-key-lb');
|
|
18
|
+
const CONFIG_PATH = path.join(CONFIG_DIR, 'config.json');
|
|
19
|
+
const DEFAULT_PORT = 4577;
|
|
20
|
+
|
|
21
|
+
// ─── Commands ──────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
function cmdSetup() {
|
|
24
|
+
console.log('🔧 api-key-lb setup\n');
|
|
25
|
+
|
|
26
|
+
// 1. Ensure config dir
|
|
27
|
+
fs.mkdirSync(CONFIG_DIR, { recursive: true });
|
|
28
|
+
|
|
29
|
+
// 2. Load or create config
|
|
30
|
+
let cfg = {};
|
|
31
|
+
if (fs.existsSync(CONFIG_PATH)) {
|
|
32
|
+
cfg = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
|
|
33
|
+
console.log(` Found existing config at ${CONFIG_PATH}`);
|
|
34
|
+
} else {
|
|
35
|
+
console.log(` Creating config at ${CONFIG_PATH}`);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Parse CLI args
|
|
39
|
+
const keysArg = findArg('--keys') || findArg('-k');
|
|
40
|
+
const targetArg = findArg('--target') || findArg('-t');
|
|
41
|
+
const portArg = findArg('--port') || findArg('-p');
|
|
42
|
+
|
|
43
|
+
if (keysArg) cfg.keys = keysArg;
|
|
44
|
+
if (targetArg) cfg.target = targetArg;
|
|
45
|
+
if (portArg) cfg.port = parseInt(portArg, 10);
|
|
46
|
+
|
|
47
|
+
cfg.port = cfg.port || DEFAULT_PORT;
|
|
48
|
+
|
|
49
|
+
if (!cfg.keys) {
|
|
50
|
+
console.error(' ERROR: --keys required. Usage: api-key-lb setup --keys key1,key2 [--target URL] [--port PORT]');
|
|
51
|
+
process.exit(1);
|
|
52
|
+
}
|
|
53
|
+
if (!cfg.target) {
|
|
54
|
+
console.error(' ERROR: --target required. Usage: api-key-lb setup --keys key1,key2 --target https://api.z.ai');
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Normalize keys to array
|
|
59
|
+
if (typeof cfg.keys === 'string') cfg.keys = cfg.keys.split(',').map(k => k.trim());
|
|
60
|
+
|
|
61
|
+
// 3. Write config
|
|
62
|
+
fs.writeFileSync(CONFIG_PATH, JSON.stringify(cfg, null, 2));
|
|
63
|
+
console.log(` ✅ Config saved (${cfg.keys.length} keys, target: ${cfg.target})`);
|
|
64
|
+
|
|
65
|
+
// 4. Patch agentic system configs
|
|
66
|
+
patchHermes(cfg);
|
|
67
|
+
patchOpenCode(cfg);
|
|
68
|
+
|
|
69
|
+
// 5. Install LaunchAgent (macOS)
|
|
70
|
+
installLaunchAgent(cfg);
|
|
71
|
+
|
|
72
|
+
console.log('\n ✅ Setup complete. Start with: api-key-lb start');
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function cmdStatus() {
|
|
76
|
+
const port = getPort();
|
|
77
|
+
try {
|
|
78
|
+
const result = execSync(`curl -s http://127.0.0.1:${port}/health`, { timeout: 3000 }).toString();
|
|
79
|
+
const health = JSON.parse(result);
|
|
80
|
+
console.log('📊 api-key-lb status\n');
|
|
81
|
+
console.log(` Status: ${health.status}`);
|
|
82
|
+
console.log(` Target: ${health.target}`);
|
|
83
|
+
console.log(` Sessions: ${health.sessions}`);
|
|
84
|
+
console.log(` Uptime: ${(health.uptime_ms / 1000 / 60).toFixed(1)} min\n`);
|
|
85
|
+
for (const key of health.keys) {
|
|
86
|
+
console.log(` Key #${key.id} (${key.key}...)`);
|
|
87
|
+
console.log(` Requests: ${key.requests} Errors: ${key.errors} Cache hits: ${key.cache_hits} Status: ${key.status}`);
|
|
88
|
+
}
|
|
89
|
+
} catch {
|
|
90
|
+
console.log('❌ Proxy not running on port ' + port);
|
|
91
|
+
console.log(' Start with: api-key-lb start');
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function cmdStart() {
|
|
96
|
+
const port = getPort();
|
|
97
|
+
// Check if already running
|
|
98
|
+
try {
|
|
99
|
+
const result = execSync(`curl -s http://127.0.0.1:${port}/health`, { timeout: 2000 }).toString();
|
|
100
|
+
const health = JSON.parse(result);
|
|
101
|
+
console.log(`✅ Already running on port ${port} (${health.keys.length} keys, ${health.sessions} sessions)`);
|
|
102
|
+
return;
|
|
103
|
+
} catch {
|
|
104
|
+
// Not running — start it
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const env = {
|
|
108
|
+
...process.env,
|
|
109
|
+
API_KEY_LB_CONFIG: CONFIG_PATH,
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
console.log(`Starting api-key-lb on port ${port}...`);
|
|
113
|
+
import(path.join(__dirname, 'proxy.mjs'));
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function cmdStop() {
|
|
117
|
+
const port = getPort();
|
|
118
|
+
try {
|
|
119
|
+
// Find the proxy process
|
|
120
|
+
const pid = execSync(`lsof -ti:${port} -sTCP:LISTEN`, { encoding: 'utf8' }).trim();
|
|
121
|
+
if (pid) {
|
|
122
|
+
process.kill(parseInt(pid, 10), 'SIGTERM');
|
|
123
|
+
console.log(`✅ Stopped proxy (PID ${pid})`);
|
|
124
|
+
}
|
|
125
|
+
} catch {
|
|
126
|
+
console.log('Proxy not running');
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function cmdHelp() {
|
|
131
|
+
console.log(`
|
|
132
|
+
api-key-lb — Transparent API Key Load Balancer
|
|
133
|
+
|
|
134
|
+
Usage:
|
|
135
|
+
api-key-lb setup --keys key1,key2 --target https://api.z.ai [--port 4577]
|
|
136
|
+
api-key-lb start Start the proxy (or show status if running)
|
|
137
|
+
api-key-lb stop Stop the proxy
|
|
138
|
+
api-key-lb status Show proxy health and key stats
|
|
139
|
+
|
|
140
|
+
Setup options:
|
|
141
|
+
-k, --keys Comma-separated API keys
|
|
142
|
+
-t, --target Target API base URL (e.g. https://api.z.ai, https://openrouter.ai/api/v1)
|
|
143
|
+
-p, --port Proxy port (default: 4577)
|
|
144
|
+
|
|
145
|
+
Config file: ~/.config/api-key-lb/config.json
|
|
146
|
+
LaunchAgent: ~/Library/LaunchAgents/com.api-key-lb.plist (macOS)
|
|
147
|
+
|
|
148
|
+
Environment variables (override config):
|
|
149
|
+
API_KEYS=key1,key2 API keys to balance
|
|
150
|
+
TARGET=https://... Target API URL
|
|
151
|
+
PORT=4577 Proxy port
|
|
152
|
+
COOLDOWN_MS=60000 429 cooldown in ms
|
|
153
|
+
SESSION_TTL_MS=3600000 Session sticky TTL in ms
|
|
154
|
+
`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// ─── Config Patching ───────────────────────────────────────────────────
|
|
158
|
+
|
|
159
|
+
function patchHermes(cfg) {
|
|
160
|
+
const hermesConfig = path.join(HOME, '.hermes', 'config.yaml');
|
|
161
|
+
if (!fs.existsSync(hermesConfig)) {
|
|
162
|
+
console.log(' ⏭ Hermes config not found — skipping');
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
let content = fs.readFileSync(hermesConfig, 'utf8');
|
|
167
|
+
const proxyUrl = `http://127.0.0.1:${cfg.port}`;
|
|
168
|
+
|
|
169
|
+
// Find lines like: base_url: https://api.z.ai/...
|
|
170
|
+
// that match the target domain and replace with proxy
|
|
171
|
+
try {
|
|
172
|
+
const targetHost = new URL(cfg.target).hostname;
|
|
173
|
+
const regex = new RegExp(`(base_url:\\s*)https?://${targetHost.replace(/\./g, '\\.')}`, 'g');
|
|
174
|
+
if (regex.test(content)) {
|
|
175
|
+
content = content.replace(regex, `$1${proxyUrl}`);
|
|
176
|
+
fs.writeFileSync(hermesConfig, content);
|
|
177
|
+
console.log(` ✅ Patched Hermes config → ${proxyUrl}`);
|
|
178
|
+
} else {
|
|
179
|
+
console.log(` ⏭ Hermes config doesn't reference ${targetHost} — skipping`);
|
|
180
|
+
}
|
|
181
|
+
} catch (e) {
|
|
182
|
+
console.log(` ⚠️ Could not patch Hermes: ${e.message}`);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function patchOpenCode(cfg) {
|
|
187
|
+
const opencodeConfig = path.join(HOME, '.config', 'opencode', 'opencode.json');
|
|
188
|
+
if (!fs.existsSync(opencodeConfig)) {
|
|
189
|
+
console.log(' ⏭ OpenCode config not found — skipping');
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
try {
|
|
194
|
+
const content = JSON.parse(fs.readFileSync(opencodeConfig, 'utf8'));
|
|
195
|
+
const proxyUrl = `http://127.0.0.1:${cfg.port}`;
|
|
196
|
+
const targetHost = new URL(cfg.target).hostname;
|
|
197
|
+
let patched = false;
|
|
198
|
+
|
|
199
|
+
// Walk all providers — find any whose baseURL targets our API host
|
|
200
|
+
if (content.provider) {
|
|
201
|
+
for (const [name, provider] of Object.entries(content.provider)) {
|
|
202
|
+
if (provider.options?.baseURL?.includes(targetHost) && !provider.options.baseURL.includes('127.0.0.1')) {
|
|
203
|
+
// Replace the target host with proxy, keeping the path
|
|
204
|
+
const originalPath = new URL(provider.options.baseURL).pathname;
|
|
205
|
+
provider.options.baseURL = `${proxyUrl}${originalPath}`;
|
|
206
|
+
console.log(` ✅ Patched OpenCode provider "${name}" → ${provider.options.baseURL}`);
|
|
207
|
+
patched = true;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if (patched) {
|
|
213
|
+
fs.writeFileSync(opencodeConfig, JSON.stringify(content, null, 2));
|
|
214
|
+
} else {
|
|
215
|
+
console.log(` ⏭ OpenCode config doesn't reference ${targetHost} directly — skipping`);
|
|
216
|
+
}
|
|
217
|
+
} catch (e) {
|
|
218
|
+
console.log(` ⚠️ Could not patch OpenCode: ${e.message}`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function installLaunchAgent(cfg) {
|
|
223
|
+
if (process.platform !== 'darwin') {
|
|
224
|
+
console.log(' ⏭ LaunchAgent only supported on macOS — skipping');
|
|
225
|
+
return;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const plistPath = path.join(HOME, 'Library', 'LaunchAgents', 'com.api-key-lb.plist');
|
|
229
|
+
const proxyPath = path.resolve(__dirname, 'proxy.mjs');
|
|
230
|
+
const logPath = path.join(CONFIG_DIR, 'proxy.log');
|
|
231
|
+
|
|
232
|
+
const plist = `<?xml version="1.0" encoding="UTF-8"?>
|
|
233
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
234
|
+
<plist version="1.0">
|
|
235
|
+
<dict>
|
|
236
|
+
<key>Label</key>
|
|
237
|
+
<string>com.api-key-lb</string>
|
|
238
|
+
<key>ProgramArguments</key>
|
|
239
|
+
<array>
|
|
240
|
+
<string>${process.execPath}</string>
|
|
241
|
+
<string>${proxyPath}</string>
|
|
242
|
+
</array>
|
|
243
|
+
<key>EnvironmentVariables</key>
|
|
244
|
+
<dict>
|
|
245
|
+
<key>API_KEY_LB_CONFIG</key>
|
|
246
|
+
<string>${CONFIG_PATH}</string>
|
|
247
|
+
</dict>
|
|
248
|
+
<key>RunAtLoad</key>
|
|
249
|
+
<true/>
|
|
250
|
+
<key>KeepAlive</key>
|
|
251
|
+
<true/>
|
|
252
|
+
<key>StandardOutPath</key>
|
|
253
|
+
<string>${logPath}</string>
|
|
254
|
+
<key>StandardErrorPath</key>
|
|
255
|
+
<string>${logPath}</string>
|
|
256
|
+
</dict>
|
|
257
|
+
</plist>`;
|
|
258
|
+
|
|
259
|
+
fs.writeFileSync(plistPath, plist);
|
|
260
|
+
console.log(` ✅ LaunchAgent installed at ${plistPath}`);
|
|
261
|
+
|
|
262
|
+
// Unload old if exists, load new
|
|
263
|
+
try {
|
|
264
|
+
execSync(`launchctl unload ${plistPath} 2>/dev/null`, { stdio: 'pipe' });
|
|
265
|
+
} catch {}
|
|
266
|
+
try {
|
|
267
|
+
execSync(`launchctl load ${plistPath}`);
|
|
268
|
+
console.log(' ✅ LaunchAgent loaded (starts on login)');
|
|
269
|
+
} catch (e) {
|
|
270
|
+
console.log(` ⚠️ Could not load LaunchAgent: ${e.message}`);
|
|
271
|
+
console.log(` Run manually: launchctl load ${plistPath}`);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// ─── Helpers ───────────────────────────────────────────────────────────
|
|
276
|
+
|
|
277
|
+
function findArg(flag) {
|
|
278
|
+
const idx = args.indexOf(flag);
|
|
279
|
+
return idx !== -1 && args[idx + 1] ? args[idx + 1] : null;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function getPort() {
|
|
283
|
+
if (fs.existsSync(CONFIG_PATH)) {
|
|
284
|
+
try {
|
|
285
|
+
return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8')).port || DEFAULT_PORT;
|
|
286
|
+
} catch {}
|
|
287
|
+
}
|
|
288
|
+
return parseInt(process.env.PORT, 10) || DEFAULT_PORT;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// ─── Dispatch ──────────────────────────────────────────────────────────
|
|
292
|
+
switch (command) {
|
|
293
|
+
case 'setup': cmdSetup(); break;
|
|
294
|
+
case 'start': cmdStart(); break;
|
|
295
|
+
case 'stop': cmdStop(); break;
|
|
296
|
+
case 'status': cmdStatus(); break;
|
|
297
|
+
case 'help': case '--help': case '-h': cmdHelp(); break;
|
|
298
|
+
default: console.log(`Unknown command: ${command}\n`); cmdHelp(); break;
|
|
299
|
+
}
|
package/src/proxy.mjs
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* api-key-lb — Transparent API Key Load Balancer
|
|
4
|
+
*
|
|
5
|
+
* Sits between any agentic system and an OpenAI-compatible API.
|
|
6
|
+
* Routes requests with session-aware sticky routing for context cache affinity.
|
|
7
|
+
* Falls back to alternate keys on 429s, then reverts to sticky key.
|
|
8
|
+
*
|
|
9
|
+
* Features:
|
|
10
|
+
* - Works with any OpenAI-compatible API (z.ai, OpenRouter, OpenAI, etc.)
|
|
11
|
+
* - Session-aware sticky routing (hashes request fingerprint → same key)
|
|
12
|
+
* - Automatic 429 fallback with configurable cooldown
|
|
13
|
+
* - Zero-config: point your client at the proxy, it handles the rest
|
|
14
|
+
* - Health endpoint for monitoring
|
|
15
|
+
*
|
|
16
|
+
* Usage:
|
|
17
|
+
* ZAI_KEYS=key1,key2 TARGET=https://api.z.ai PORT=4577 node proxy.mjs
|
|
18
|
+
*
|
|
19
|
+
* Or via config file:
|
|
20
|
+
* node proxy.mjs --config ./api-key-lb.json
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import http from 'node:http';
|
|
24
|
+
import https from 'node:https';
|
|
25
|
+
import crypto from 'node:crypto';
|
|
26
|
+
import fs from 'node:fs';
|
|
27
|
+
import path from 'node:path';
|
|
28
|
+
import { URL } from 'node:url';
|
|
29
|
+
import { fileURLToPath } from 'node:url';
|
|
30
|
+
|
|
31
|
+
// ─── Config ────────────────────────────────────────────────────────────
|
|
32
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
33
|
+
|
|
34
|
+
function loadConfig() {
|
|
35
|
+
// 1. CLI --config flag
|
|
36
|
+
const configIdx = process.argv.indexOf('--config');
|
|
37
|
+
if (configIdx !== -1 && process.argv[configIdx + 1]) {
|
|
38
|
+
const configPath = path.resolve(process.argv[configIdx + 1]);
|
|
39
|
+
return JSON.parse(fs.readFileSync(configPath, 'utf8'));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// 2. ENV-based config file path
|
|
43
|
+
if (process.env.API_KEY_LB_CONFIG) {
|
|
44
|
+
return JSON.parse(fs.readFileSync(process.env.API_KEY_LB_CONFIG, 'utf8'));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// 3. Look for config in common locations
|
|
48
|
+
const searchPaths = [
|
|
49
|
+
path.join(process.cwd(), 'api-key-lb.json'),
|
|
50
|
+
path.join(__dirname, '..', 'api-key-lb.json'),
|
|
51
|
+
path.join(process.env.HOME || '~', '.config', 'api-key-lb', 'config.json'),
|
|
52
|
+
];
|
|
53
|
+
for (const p of searchPaths) {
|
|
54
|
+
if (fs.existsSync(p)) {
|
|
55
|
+
return JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// 4. Pure env vars
|
|
60
|
+
return {};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function resolveConfig(fileConfig) {
|
|
64
|
+
const env = (key, fileKey, defaultVal) => {
|
|
65
|
+
if (process.env[key]) return process.env[key];
|
|
66
|
+
if (fileConfig[fileKey] !== undefined) return String(fileConfig[fileKey]);
|
|
67
|
+
return defaultVal;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const rawKeys = process.env.API_KEYS || fileConfig.keys || '';
|
|
71
|
+
const keys = (Array.isArray(rawKeys) ? rawKeys : rawKeys.split(',')).map(k => k.trim()).filter(Boolean);
|
|
72
|
+
const target = env('TARGET', 'target', 'https://api.openai.com');
|
|
73
|
+
const port = parseInt(env('PORT', 'port', '4577'), 10);
|
|
74
|
+
const cooldownMs = parseInt(env('COOLDOWN_MS', 'cooldown_ms', '60000'), 10);
|
|
75
|
+
const sessionTTL = parseInt(env('SESSION_TTL_MS', 'session_ttl_ms', '3600000'), 10);
|
|
76
|
+
|
|
77
|
+
return { keys, target, port, cooldownMs, sessionTTL };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const fileConfig = loadConfig();
|
|
81
|
+
const config = resolveConfig(fileConfig);
|
|
82
|
+
|
|
83
|
+
if (config.keys.length === 0) {
|
|
84
|
+
console.error('ERROR: No API keys. Set API_KEYS=key1,key2,... or use a config file.');
|
|
85
|
+
console.error(' See: api-key-lb --help');
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ─── Key State ─────────────────────────────────────────────────────────
|
|
90
|
+
const keyState = config.keys.map((key, i) => ({
|
|
91
|
+
id: i,
|
|
92
|
+
key,
|
|
93
|
+
throttledUntil: 0,
|
|
94
|
+
requestCount: 0,
|
|
95
|
+
errorCount: 0,
|
|
96
|
+
cacheHits: 0,
|
|
97
|
+
}));
|
|
98
|
+
|
|
99
|
+
// ─── Session Tracking ──────────────────────────────────────────────────
|
|
100
|
+
const sessionKeyMap = new Map();
|
|
101
|
+
const SESSION_MAX = 2000;
|
|
102
|
+
|
|
103
|
+
function hashToKeyIndex(str) {
|
|
104
|
+
const hash = crypto.createHash('sha256').update(str).digest();
|
|
105
|
+
return hash[0] % config.keys.length;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function extractSessionId(body) {
|
|
109
|
+
if (!body || body.length === 0) return null;
|
|
110
|
+
try {
|
|
111
|
+
const json = JSON.parse(body.toString('utf8'));
|
|
112
|
+
|
|
113
|
+
// 1. Explicit session identifiers (OpenCode, custom agents)
|
|
114
|
+
const candidates = [
|
|
115
|
+
json.session_id,
|
|
116
|
+
json.sessionId,
|
|
117
|
+
json.conversation_id,
|
|
118
|
+
json.conversationId,
|
|
119
|
+
json.thread_id,
|
|
120
|
+
];
|
|
121
|
+
for (const c of candidates) {
|
|
122
|
+
if (c && typeof c === 'string') return c;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// 2. Check messages metadata
|
|
126
|
+
const msgs = json.messages || [];
|
|
127
|
+
for (const msg of msgs) {
|
|
128
|
+
if (msg.custom_fields?.session_id) return msg.custom_fields.session_id;
|
|
129
|
+
if (msg.metadata?.session_id) return msg.metadata.session_id;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// 3. Fingerprint: model + system prompt prefix (stable across a session)
|
|
133
|
+
if (msgs.length > 0) {
|
|
134
|
+
const systemMsg = msgs.find(m => m.role === 'system');
|
|
135
|
+
if (systemMsg) {
|
|
136
|
+
return `${json.model || ''}:${systemMsg.content.slice(0, 500)}`;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
} catch {
|
|
140
|
+
// Not JSON — ignore
|
|
141
|
+
}
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function pruneSessions() {
|
|
146
|
+
if (sessionKeyMap.size <= SESSION_MAX) return;
|
|
147
|
+
const entries = [...sessionKeyMap.entries()];
|
|
148
|
+
sessionKeyMap.clear();
|
|
149
|
+
// Keep most recent half
|
|
150
|
+
entries.slice(-Math.floor(SESSION_MAX / 2)).forEach(([k, v]) => sessionKeyMap.set(k, v));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function getKeyForSession(sessionId) {
|
|
154
|
+
let stickyIndex;
|
|
155
|
+
if (sessionId && sessionKeyMap.has(sessionId)) {
|
|
156
|
+
stickyIndex = sessionKeyMap.get(sessionId);
|
|
157
|
+
} else if (sessionId) {
|
|
158
|
+
stickyIndex = hashToKeyIndex(sessionId);
|
|
159
|
+
sessionKeyMap.set(sessionId, stickyIndex);
|
|
160
|
+
pruneSessions();
|
|
161
|
+
} else {
|
|
162
|
+
stickyIndex = -1;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const now = Date.now();
|
|
166
|
+
|
|
167
|
+
// Try sticky key first
|
|
168
|
+
if (stickyIndex >= 0) {
|
|
169
|
+
const sticky = keyState[stickyIndex];
|
|
170
|
+
if (sticky.throttledUntil <= now) {
|
|
171
|
+
return { state: sticky, isSticky: true };
|
|
172
|
+
}
|
|
173
|
+
console.log(`[STICKY] Key #${sticky.id} throttled, trying alternate for session ${sessionId?.slice(0, 30)}...`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Try all keys, prefer unthrottled
|
|
177
|
+
for (let i = 0; i < config.keys.length; i++) {
|
|
178
|
+
if (keyState[i].throttledUntil <= now) {
|
|
179
|
+
return { state: keyState[i], isSticky: false };
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// All throttled — pick soonest unlock
|
|
184
|
+
const soonest = keyState.reduce((best, s) =>
|
|
185
|
+
s.throttledUntil < best.throttledUntil ? s : best
|
|
186
|
+
);
|
|
187
|
+
return { state: soonest, isSticky: false };
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function markThrottled(state, retryAfterMs) {
|
|
191
|
+
state.throttledUntil = Date.now() + (retryAfterMs || config.cooldownMs);
|
|
192
|
+
state.errorCount++;
|
|
193
|
+
console.log(`[THROTTLE] Key #${state.id} throttled until ${new Date(state.throttledUntil).toISOString()}`);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// ─── Request Handler ───────────────────────────────────────────────────
|
|
197
|
+
function handleRequest(req, res) {
|
|
198
|
+
const chunks = [];
|
|
199
|
+
req.on('data', chunk => chunks.push(chunk));
|
|
200
|
+
req.on('end', () => {
|
|
201
|
+
const body = Buffer.concat(chunks);
|
|
202
|
+
const sessionId = extractSessionId(body);
|
|
203
|
+
const { state: initialState, isSticky } = getKeyForSession(sessionId);
|
|
204
|
+
const attempts = [];
|
|
205
|
+
|
|
206
|
+
function tryRequest(ks, wasSticky) {
|
|
207
|
+
attempts.push(ks.id);
|
|
208
|
+
ks.requestCount++;
|
|
209
|
+
|
|
210
|
+
if (sessionId && attempts.length === 1) {
|
|
211
|
+
console.log(`[ROUTE] session=${sessionId.slice(0, 30)}... → key #${ks.id} (${wasSticky ? 'sticky' : 'fallback'})`);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const targetUrl = new URL(req.url, config.target);
|
|
215
|
+
const headers = { ...req.headers };
|
|
216
|
+
headers['authorization'] = `Bearer ${ks.key}`;
|
|
217
|
+
headers['host'] = targetUrl.host;
|
|
218
|
+
delete headers['connection'];
|
|
219
|
+
|
|
220
|
+
const options = {
|
|
221
|
+
hostname: targetUrl.hostname,
|
|
222
|
+
port: targetUrl.port || 443,
|
|
223
|
+
path: targetUrl.pathname + targetUrl.search,
|
|
224
|
+
method: req.method,
|
|
225
|
+
headers,
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
const proxyReq = https.request(options, (proxyRes) => {
|
|
229
|
+
if (proxyRes.statusCode === 429) {
|
|
230
|
+
const retryAfter = parseInt(proxyRes.headers['retry-after'] || '0', 10);
|
|
231
|
+
const retryAfterMs = retryAfter > 0 ? retryAfter * 1000 : config.cooldownMs;
|
|
232
|
+
markThrottled(ks, retryAfterMs);
|
|
233
|
+
|
|
234
|
+
const drainChunks = [];
|
|
235
|
+
proxyRes.on('data', c => drainChunks.push(c));
|
|
236
|
+
proxyRes.on('end', () => {
|
|
237
|
+
if (attempts.length < config.keys.length) {
|
|
238
|
+
const { state: nextKey } = getKeyForSession(sessionId);
|
|
239
|
+
console.log(`[RETRY] 429 on key #${ks.id}, falling back to key #${nextKey.id}`);
|
|
240
|
+
tryRequest(nextKey, false);
|
|
241
|
+
} else {
|
|
242
|
+
console.log(`[FAIL] All keys exhausted for ${req.method} ${req.url}`);
|
|
243
|
+
res.writeHead(proxyRes.statusCode, proxyRes.headers);
|
|
244
|
+
res.end(Buffer.concat(drainChunks));
|
|
245
|
+
}
|
|
246
|
+
});
|
|
247
|
+
return;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Detect cache hits
|
|
251
|
+
const cacheHeader = proxyRes.headers['x-cache'] || proxyRes.headers['x-cached'];
|
|
252
|
+
if (cacheHeader === 'HIT' || cacheHeader === 'hit') {
|
|
253
|
+
ks.cacheHits++;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (proxyRes.statusCode >= 500) {
|
|
257
|
+
console.log(`[ERROR] Key #${ks.id} got ${proxyRes.statusCode} for ${req.method} ${req.url}`);
|
|
258
|
+
} else if (attempts.length > 1) {
|
|
259
|
+
console.log(`[OK] Key #${ks.id} succeeded after ${attempts.length} attempts`);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
res.writeHead(proxyRes.statusCode, proxyRes.headers);
|
|
263
|
+
proxyRes.pipe(res);
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
proxyReq.on('error', (err) => {
|
|
267
|
+
console.error(`[ERROR] Key #${ks.id} request failed:`, err.message);
|
|
268
|
+
if (attempts.length < config.keys.length) {
|
|
269
|
+
const { state: nextKey } = getKeyForSession(sessionId);
|
|
270
|
+
tryRequest(nextKey, false);
|
|
271
|
+
} else {
|
|
272
|
+
res.writeHead(502, { 'content-type': 'application/json' });
|
|
273
|
+
res.end(JSON.stringify({ error: 'proxy_error', message: err.message }));
|
|
274
|
+
}
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
if (body.length > 0) proxyReq.write(body);
|
|
278
|
+
proxyReq.end();
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
tryRequest(initialState, isSticky);
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ─── Health Endpoint ───────────────────────────────────────────────────
|
|
286
|
+
function handleHealth(req, res) {
|
|
287
|
+
const now = Date.now();
|
|
288
|
+
const info = keyState.map(s => ({
|
|
289
|
+
id: s.id,
|
|
290
|
+
key: s.key.slice(0, 8) + '...',
|
|
291
|
+
requests: s.requestCount,
|
|
292
|
+
errors: s.errorCount,
|
|
293
|
+
cache_hits: s.cacheHits,
|
|
294
|
+
status: s.throttledUntil > now ? 'throttled' : 'ready',
|
|
295
|
+
}));
|
|
296
|
+
res.writeHead(200, { 'content-type': 'application/json' });
|
|
297
|
+
res.end(JSON.stringify({
|
|
298
|
+
status: 'ok',
|
|
299
|
+
target: config.target.replace(/\/\/[^@]+@/, '//***@'),
|
|
300
|
+
keys: info,
|
|
301
|
+
sessions: sessionKeyMap.size,
|
|
302
|
+
uptime_ms: process.uptime() * 1000 | 0,
|
|
303
|
+
}, null, 2));
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// ─── Server ────────────────────────────────────────────────────────────
|
|
307
|
+
const server = http.createServer((req, res) => {
|
|
308
|
+
if (req.url === '/health' && req.method === 'GET') {
|
|
309
|
+
return handleHealth(req, res);
|
|
310
|
+
}
|
|
311
|
+
handleRequest(req, res);
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
server.listen(config.port, '127.0.0.1', () => {
|
|
315
|
+
console.log(`[START] api-key-lb proxy listening on http://127.0.0.1:${config.port}`);
|
|
316
|
+
console.log(`[START] ${config.keys.length} key(s) loaded`);
|
|
317
|
+
console.log(`[START] Target: ${config.target}`);
|
|
318
|
+
console.log(`[START] Routing: session-aware sticky`);
|
|
319
|
+
console.log(`[START] Cooldown: ${config.cooldownMs}ms`);
|
|
320
|
+
console.log(`[START] Health: http://127.0.0.1:${config.port}/health`);
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
// Status log every 5 minutes
|
|
324
|
+
setInterval(() => {
|
|
325
|
+
const now = Date.now();
|
|
326
|
+
const status = keyState.map(s => ({
|
|
327
|
+
id: s.id,
|
|
328
|
+
reqs: s.requestCount,
|
|
329
|
+
errs: s.errorCount,
|
|
330
|
+
cache: s.cacheHits,
|
|
331
|
+
status: s.throttledUntil > now ? `throttled ${((s.throttledUntil - now) / 1000)|0}s` : 'ready',
|
|
332
|
+
}));
|
|
333
|
+
console.log(`[STATUS] ${JSON.stringify(status)} sessions=${sessionKeyMap.size}`);
|
|
334
|
+
}, 300000);
|