@prakashpro1/auto-modal 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +10 -0
- package/LICENSE +21 -0
- package/README.md +282 -0
- package/bin/cli.mjs +138 -0
- package/claude-router.sh +28 -0
- package/config.default.yaml +23 -0
- package/package.json +63 -0
- package/scripts/free-port.mjs +26 -0
- package/src/anthropic.js +186 -0
- package/src/config.js +101 -0
- package/src/dashboard.js +560 -0
- package/src/envfile.js +60 -0
- package/src/loadenv.js +5 -0
- package/src/server.js +543 -0
- package/src/usage.js +131 -0
package/.env.example
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Copy to .env and fill in your keys.
|
|
2
|
+
# Add as MANY keys as you have, comma-separated — the router rotates through all
|
|
3
|
+
# of them when one hits its rate/usage limit. No limit on how many you list.
|
|
4
|
+
# Scale by editing this file only; you never need to touch config.yaml.
|
|
5
|
+
|
|
6
|
+
# OpenRouter keys: https://openrouter.ai/keys
|
|
7
|
+
OPENROUTER_API_KEYS=sk-or-v1-aaa...,sk-or-v1-bbb...,sk-or-v1-ccc...
|
|
8
|
+
|
|
9
|
+
# HuggingFace tokens (read scope): https://huggingface.co/settings/tokens
|
|
10
|
+
HF_API_KEYS=hf_aaa...,hf_bbb...
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 godlikebgis
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
# Auto Modal
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
A local proxy that **automatically switches models AND API keys when a usage/rate
|
|
6
|
+
limit is exceeded**. Each model has a pool of keys; when a model+key returns `429`
|
|
7
|
+
(rate limit) or `402` (out of credits), the router cools that slot down and rotates
|
|
8
|
+
to the next **key**, then the next **model** — transparently, in the same request.
|
|
9
|
+
It only fails once *every key of every model* is spent.
|
|
10
|
+
|
|
11
|
+
It speaks **both** API dialects from one endpoint:
|
|
12
|
+
|
|
13
|
+
| Client | API it expects | Router endpoint |
|
|
14
|
+
|---|---|---|
|
|
15
|
+
| **Continue**, OpenAI SDKs, curl | OpenAI | `/v1/chat/completions`, `/v1/completions` |
|
|
16
|
+
| **Claude Code CLI** | Anthropic | `/v1/messages` (translated ⇄ OpenAI) |
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
client ──► router ──► nemotron-free (OpenRouter) key0 →429→ key1 →429→ ┐
|
|
20
|
+
owl-alpha (OpenRouter) key0 →429→ key1 →429→ ┤ rotate key,
|
|
21
|
+
qwen-72b (HuggingFace) key0 ✅ ◄┘ then model
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- **Model + key rotation** on `429` / `402` / network errors — each model holds a
|
|
29
|
+
key pool; a limited key is skipped and the next (even a different account) is tried.
|
|
30
|
+
- **Per-day caps** — counts requests per `(model, key)` (persisted to `usage.json`)
|
|
31
|
+
and proactively skips a slot at its `dailyLimit`.
|
|
32
|
+
- **Per-minute rate limit** — a token bucket per slot (`rpm`) rotates *before* the
|
|
33
|
+
upstream returns `429`, smoothing load.
|
|
34
|
+
- **Cooldown** — a slot that just hit a limit is skipped for `cooldownMs`.
|
|
35
|
+
- **Transient retries** — `5xx` / timeouts retry the same slot before rotating.
|
|
36
|
+
- **Streaming** (SSE) and non-streaming, on every endpoint.
|
|
37
|
+
- **Two API dialects** — OpenAI (Continue) and Anthropic (Claude Code), same routing.
|
|
38
|
+
- **Live dashboard** — add/test/edit/reorder/delete models, add/remove keys, watch
|
|
39
|
+
per-slot usage, sparklines, and the real OpenRouter free-pool — all hot-reloaded.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Install as a CLI
|
|
44
|
+
|
|
45
|
+
**Global** (one `automodal` command everywhere):
|
|
46
|
+
```bash
|
|
47
|
+
npm install -g @prakashpro1/auto-modal # → `automodal` on your PATH
|
|
48
|
+
automodal init # creates ~/.auto-modal/{config.yaml,.env}
|
|
49
|
+
# add your keys to ~/.auto-modal/.env (or use the dashboard 🔑 panel)
|
|
50
|
+
automodal # start the router → http://localhost:8787
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Per-project** (scoped to one repo, run via `npx`):
|
|
54
|
+
```bash
|
|
55
|
+
cd your-project
|
|
56
|
+
npm install @prakashpro1/auto-modal # add as a dependency
|
|
57
|
+
npx automodal init --local # creates ./.auto-modal/ for THIS project
|
|
58
|
+
npx automodal # start it
|
|
59
|
+
```
|
|
60
|
+
Add `.auto-modal/` to the project's `.gitignore` — it holds your keys.
|
|
61
|
+
|
|
62
|
+
Commands (run from anywhere):
|
|
63
|
+
| Command | Does |
|
|
64
|
+
|---|---|
|
|
65
|
+
| `automodal` / `automodal start` | start the router (auto-kills any stale instance first) |
|
|
66
|
+
| `automodal claude [args]` | launch Claude Code through the router |
|
|
67
|
+
| `automodal init [--local]` | create global `~/.auto-modal` (or project `./.auto-modal`) |
|
|
68
|
+
| `automodal where` | print which config / `.env` / `usage.json` is in effect |
|
|
69
|
+
| `automodal --help` | usage |
|
|
70
|
+
|
|
71
|
+
**Config resolution** (highest priority first):
|
|
72
|
+
`AUTOMODAL_HOME` env → nearest `./.auto-modal` (walking up from cwd) → `~/.auto-modal`.
|
|
73
|
+
|
|
74
|
+
So a project with its own `./.auto-modal/` uses that (its own chain + keys); anywhere
|
|
75
|
+
else falls back to the global `~/.auto-modal/`. The CLI never writes inside the package.
|
|
76
|
+
|
|
77
|
+
## Setup (from source / dev)
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
cd <path-to-automodal>
|
|
81
|
+
npm install
|
|
82
|
+
cp .env.example .env # add your keys (comma-separated for multiple)
|
|
83
|
+
npm start # → http://localhost:8787
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
`.env`:
|
|
87
|
+
```bash
|
|
88
|
+
OPENROUTER_API_KEYS=sk-or-1...,sk-or-2... # as many as you have; rotated automatically
|
|
89
|
+
HF_API_KEYS=hf_1...,hf_2...
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Verify: `curl -s http://localhost:8787/health` → `{"ok":true,"models":N}`.
|
|
93
|
+
Open the dashboard at <http://localhost:8787/> to manage everything visually.
|
|
94
|
+
|
|
95
|
+
> `npm start` self-guards: a `prestart` hook kills any stale instance already on
|
|
96
|
+
> the port first, so you never end up with two routers fighting over state. Use
|
|
97
|
+
> `npm run restart` to force a clean restart.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Use it with Continue (VS Code)
|
|
102
|
+
|
|
103
|
+
Add to `~/.continue/config.yaml`:
|
|
104
|
+
```yaml
|
|
105
|
+
models:
|
|
106
|
+
- name: Pro Model
|
|
107
|
+
provider: openai
|
|
108
|
+
model: auto # router picks the model (chain + rotation)
|
|
109
|
+
apiBase: http://localhost:8787/v1
|
|
110
|
+
apiKey: dummy # router holds the real keys
|
|
111
|
+
roles: [chat, edit, apply, autocomplete]
|
|
112
|
+
```
|
|
113
|
+
- Skip `embed` / `rerank` — the router proxies completions only; use separate models.
|
|
114
|
+
- **Autocomplete** uses `/v1/completions`, routed the same way.
|
|
115
|
+
- **Images:** point a dedicated entry at a vision model with `capabilities: [image_input]`
|
|
116
|
+
(free text models can't accept images). Route it through the router with
|
|
117
|
+
`model: <vision-chain-id>` to keep key rotation.
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Use it with Claude Code CLI
|
|
122
|
+
|
|
123
|
+
Claude Code speaks the **Anthropic Messages API**; the router exposes `POST /v1/messages`
|
|
124
|
+
(+ `/v1/messages/count_tokens`) and translates request/response/streaming-SSE and tool
|
|
125
|
+
calls ⇄ OpenAI, then routes through the same chain.
|
|
126
|
+
|
|
127
|
+
**Prerequisites:** router installed (`npm install`), keys in `.env`, and the `claude`
|
|
128
|
+
CLI installed.
|
|
129
|
+
|
|
130
|
+
### Step 1 — Start the router (Terminal A)
|
|
131
|
+
```bash
|
|
132
|
+
cd <path-to-automodal>
|
|
133
|
+
npm start
|
|
134
|
+
curl -s http://localhost:8787/health # → {"ok":true,"models":N}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Step 2 — Launch Claude Code through the router (Terminal B)
|
|
138
|
+
If you installed the global CLI, just run from anywhere:
|
|
139
|
+
```bash
|
|
140
|
+
automodal claude # interactive
|
|
141
|
+
automodal claude -p "explain this repo" # headless / one-shot
|
|
142
|
+
```
|
|
143
|
+
From source (no global install), use the bundled launcher:
|
|
144
|
+
```bash
|
|
145
|
+
cd <path-to-automodal>
|
|
146
|
+
./claude-router.sh # interactive
|
|
147
|
+
./claude-router.sh -p "explain this repo" # headless / one-shot
|
|
148
|
+
```
|
|
149
|
+
The launcher checks the router is up, then sets these **for that invocation only**
|
|
150
|
+
(your normal `claude` stays on real Anthropic):
|
|
151
|
+
```bash
|
|
152
|
+
ANTHROPIC_BASE_URL=http://localhost:8787
|
|
153
|
+
ANTHROPIC_AUTH_TOKEN=dummy # router holds the real provider keys
|
|
154
|
+
ANTHROPIC_MODEL=auto # full chain + rotation
|
|
155
|
+
ANTHROPIC_SMALL_FAST_MODEL=auto
|
|
156
|
+
```
|
|
157
|
+
Prefer manual env vars instead of the script? Export the four above and run `claude`.
|
|
158
|
+
|
|
159
|
+
### Step 3 (optional) — Pick a model
|
|
160
|
+
- **Auto chain** (default): `ANTHROPIC_MODEL=auto` — rotates models + keys.
|
|
161
|
+
- **Pin one**: `ANTHROPIC_MODEL=claude-owl-alpha ./claude-router.sh` — the router
|
|
162
|
+
strips the `claude-` prefix to match a chain id.
|
|
163
|
+
- **Switch in-session**: `CLAUDE_CODE_ENABLE_GATEWAY_MODEL_DISCOVERY=1 ./claude-router.sh`,
|
|
164
|
+
then type `/model` and pick a `claude-<id>` entry (advertised via `GET /v1/models`).
|
|
165
|
+
|
|
166
|
+
### Step 4 — Watch it work
|
|
167
|
+
Open <http://localhost:8787/> while you use Claude Code — requests land on slots,
|
|
168
|
+
sparklines grow, daily/free-pool counters tick.
|
|
169
|
+
|
|
170
|
+
### Sanity test
|
|
171
|
+
```bash
|
|
172
|
+
./claude-router.sh -p "What is 2+2? Reply with just the number." # → 4
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Troubleshooting
|
|
176
|
+
| Symptom | Fix |
|
|
177
|
+
|---|---|
|
|
178
|
+
| `✗ Router not reachable` | Start the router (Step 1). |
|
|
179
|
+
| `overloaded_error` / 529 | All slots exhausted (e.g. free daily pool spent) — add a key in the 🔑 dashboard, or wait for 00:00 UTC. |
|
|
180
|
+
| Flaky tool calls / odd multi-step | Expected on free models — pin a tool-capable one (`claude-owl-alpha`). |
|
|
181
|
+
|
|
182
|
+
> ⚠️ Claude Code is tuned for Claude models (heavy agentic tool use, big context).
|
|
183
|
+
> Free OpenRouter/HF models connect and respond but are far weaker at tool calling
|
|
184
|
+
> and long agent loops — a "make it work" path, not Claude parity.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Dashboard (`GET /`)
|
|
189
|
+
|
|
190
|
+
Auto-refreshes every 2s. One card per model, a row per key-slot.
|
|
191
|
+
|
|
192
|
+
- **Add a model** — searchable picker pulling each provider's live catalog
|
|
193
|
+
(filter to free, sorted by context); ID + key-env auto-filled. Active free
|
|
194
|
+
OpenRouter models auto-set `dailyLimit` from your account tier.
|
|
195
|
+
- **🔑 API keys** — list (masked) / add / remove keys; writes `.env`, hot-reloads,
|
|
196
|
+
activates key-less models instantly.
|
|
197
|
+
- **Drag-to-reorder** priority · **Test** / **Test all** (real ping, ok/latency) ·
|
|
198
|
+
**Edit** `rpm`/`dailyLimit` inline · **Delete**.
|
|
199
|
+
- **Per-slot sparkline** (last 30 min) + **"Today: N of T left"** per model.
|
|
200
|
+
- **OpenRouter free pool** header — the real shared cap (50/day free tier, 1000/day
|
|
201
|
+
once ≥10 credits bought) summed across all `:free` models.
|
|
202
|
+
|
|
203
|
+
All edits write `config.yaml` / `.env` and hot-reload — no restart. (Dashboard edits
|
|
204
|
+
normalize `config.yaml` and drop inline comments.)
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## API endpoints
|
|
209
|
+
|
|
210
|
+
| Endpoint | Purpose |
|
|
211
|
+
|---|---|
|
|
212
|
+
| `POST /v1/chat/completions` | OpenAI chat (Continue chat/edit/apply) |
|
|
213
|
+
| `POST /v1/completions` | OpenAI text/FIM (Continue autocomplete) |
|
|
214
|
+
| `POST /v1/messages` | Anthropic Messages (Claude Code), translated |
|
|
215
|
+
| `POST /v1/messages/count_tokens` | Token estimate for Claude Code |
|
|
216
|
+
| `GET /v1/models` | Chain as model list (+ `claude-<id>` aliases) |
|
|
217
|
+
| `GET /` | Live dashboard |
|
|
218
|
+
| `GET /status` | Chain + live usage (powers the dashboard) |
|
|
219
|
+
| `GET /usage` | Raw per-slot counts + cooldown |
|
|
220
|
+
| `GET /health` | Liveness + model count |
|
|
221
|
+
| `GET /admin/credits` | OpenRouter tier + credit usage + derived free cap |
|
|
222
|
+
| `GET /admin/catalog?provider=` | Live provider model catalog (picker) |
|
|
223
|
+
| `POST/PATCH/DELETE /admin/models[...]` | Add / edit / delete / reorder / test models |
|
|
224
|
+
| `GET/POST/DELETE /admin/keys` | Manage key pools |
|
|
225
|
+
|
|
226
|
+
Response headers: `X-Router-Model`, `X-Router-Key`, `X-Router-Upstream` reveal which
|
|
227
|
+
slot answered. Slots in `/usage` are labelled `modelId#keyIndex`.
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## How "limit exceeded" is detected
|
|
232
|
+
|
|
233
|
+
| Upstream status | Action |
|
|
234
|
+
|---|---|
|
|
235
|
+
| `429`, `402` | cool down this `(model, key)` → **rotate key, then model** |
|
|
236
|
+
| `500/502/503/504` | retry same slot (`transientRetries`), then rotate |
|
|
237
|
+
| `404` | endpoint unsupported by this slot → rotate (don't fail) |
|
|
238
|
+
| other `4xx` | return to client (won't be fixed by switching) |
|
|
239
|
+
| `2xx` | success → record usage for that `(model, key)` |
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Configuration (`config.yaml`)
|
|
244
|
+
|
|
245
|
+
```yaml
|
|
246
|
+
port: 8787
|
|
247
|
+
cooldownMs: 3600000 # how long a limited slot rests
|
|
248
|
+
transientRetries: 2 # retries on 5xx/timeout before rotating
|
|
249
|
+
chain: # order = priority
|
|
250
|
+
- id: nemotron-super-free
|
|
251
|
+
provider: openrouter # "openrouter" | "huggingface"
|
|
252
|
+
model: nvidia/nemotron-3-super-120b-a12b:free
|
|
253
|
+
apiKeys: ${OPENROUTER_API_KEYS} # one env var, comma-separated, rotated
|
|
254
|
+
dailyLimit: 50 # per key (optional)
|
|
255
|
+
rpm: 20 # per-key requests/min token bucket (optional)
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
**Requesting a specific model:** if a request's `model` matches a chain **id** or
|
|
259
|
+
**slug** (or `claude-<id>`), it routes to just that model (keeping key rotation);
|
|
260
|
+
`auto`/unknown uses the full chain.
|
|
261
|
+
|
|
262
|
+
**Env overrides:** `ROUTER_CONFIG`, `ROUTER_ENV`, `ROUTER_USAGE` relocate
|
|
263
|
+
`config.yaml` / `.env` / `usage.json`.
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## Notes
|
|
268
|
+
|
|
269
|
+
- Daily counters roll over at **UTC midnight**.
|
|
270
|
+
- OpenRouter free limit is **per key, shared across all `:free` models** — the
|
|
271
|
+
dashboard's free-pool line reflects this real cap; per-model `dailyLimit` is what
|
|
272
|
+
the router itself enforces to rotate early.
|
|
273
|
+
- Token buckets and request history are in-memory (reset on restart); daily counts
|
|
274
|
+
and cooldowns persist in `usage.json`.
|
|
275
|
+
|
|
276
|
+
## Development
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
npm test # 10 integration test suites (routing, rotation, rpm, limits,
|
|
280
|
+
# completions, admin/keys/reorder/edit, history, Anthropic /v1/messages)
|
|
281
|
+
npm run dev # auto-restart on change
|
|
282
|
+
```
|
package/bin/cli.mjs
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Global CLI for Auto Modal (the auto model router).
|
|
3
|
+
// automodal [start] start the router (default)
|
|
4
|
+
// automodal claude [...] launch Claude Code through the router
|
|
5
|
+
// automodal init create ~/.auto-modal and show where to add keys
|
|
6
|
+
// automodal where print the config/.env/usage paths
|
|
7
|
+
// automodal --help
|
|
8
|
+
//
|
|
9
|
+
// Config, keys and usage live in ~/.auto-modal (override with AUTOMODAL_HOME), so
|
|
10
|
+
// a global install never writes inside the package dir.
|
|
11
|
+
import { fileURLToPath } from "node:url";
|
|
12
|
+
import { dirname, join } from "node:path";
|
|
13
|
+
import { homedir } from "node:os";
|
|
14
|
+
import { existsSync, mkdirSync, copyFileSync, writeFileSync } from "node:fs";
|
|
15
|
+
import { spawn } from "node:child_process";
|
|
16
|
+
|
|
17
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
const PKG = join(__dirname, "..");
|
|
19
|
+
const HOME_ENV = process.env.AUTOMODAL_HOME;
|
|
20
|
+
const GLOBAL_HOME = HOME_ENV || join(homedir(), ".auto-modal");
|
|
21
|
+
const URL = process.env.ROUTER_URL || "http://localhost:8787";
|
|
22
|
+
|
|
23
|
+
// Walk up from `cwd` looking for a project-local `.auto-modal/` dir.
|
|
24
|
+
function findProjectHome(startDir) {
|
|
25
|
+
let dir = startDir;
|
|
26
|
+
for (;;) {
|
|
27
|
+
const candidate = join(dir, ".auto-modal");
|
|
28
|
+
if (existsSync(candidate)) return candidate;
|
|
29
|
+
const parent = dirname(dir);
|
|
30
|
+
if (parent === dir) return null; // hit filesystem root
|
|
31
|
+
dir = parent;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Config home resolution (highest priority first):
|
|
36
|
+
// 1. AUTOMODAL_HOME env var
|
|
37
|
+
// 2. a project-local ./.auto-modal found by walking up from cwd (per-project)
|
|
38
|
+
// 3. ~/.auto-modal (global)
|
|
39
|
+
function resolveHome() {
|
|
40
|
+
if (HOME_ENV) return { home: HOME_ENV, scope: "env" };
|
|
41
|
+
const proj = findProjectHome(process.cwd());
|
|
42
|
+
if (proj) return { home: proj, scope: "project" };
|
|
43
|
+
return { home: GLOBAL_HOME, scope: "global" };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function ensureHome(home) {
|
|
47
|
+
if (!existsSync(home)) mkdirSync(home, { recursive: true });
|
|
48
|
+
const cfg = join(home, "config.yaml");
|
|
49
|
+
if (!existsSync(cfg)) {
|
|
50
|
+
// Ship config.default.yaml; fall back to config.yaml when running from source.
|
|
51
|
+
const tmpl = existsSync(join(PKG, "config.default.yaml"))
|
|
52
|
+
? join(PKG, "config.default.yaml") : join(PKG, "config.yaml");
|
|
53
|
+
copyFileSync(tmpl, cfg);
|
|
54
|
+
}
|
|
55
|
+
const env = join(home, ".env");
|
|
56
|
+
if (!existsSync(env)) {
|
|
57
|
+
const example = join(PKG, ".env.example");
|
|
58
|
+
if (existsSync(example)) copyFileSync(example, env);
|
|
59
|
+
else writeFileSync(env, "OPENROUTER_API_KEYS=\nHF_API_KEYS=\n");
|
|
60
|
+
}
|
|
61
|
+
// Point the server modules at this home (unless already overridden).
|
|
62
|
+
process.env.ROUTER_CONFIG ||= cfg;
|
|
63
|
+
process.env.ROUTER_ENV ||= env;
|
|
64
|
+
process.env.ROUTER_USAGE ||= join(home, "usage.json");
|
|
65
|
+
return { cfg, env };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const [cmd, ...rest] = process.argv.slice(2);
|
|
69
|
+
|
|
70
|
+
if (cmd === "--help" || cmd === "-h" || cmd === "help") {
|
|
71
|
+
console.log(`auto-modal — auto model router (key/model switching proxy)
|
|
72
|
+
|
|
73
|
+
Usage:
|
|
74
|
+
automodal [start] Start the router (default; http://localhost:8787)
|
|
75
|
+
automodal claude [args] Launch Claude Code through the router
|
|
76
|
+
automodal init Create global config (~/.auto-modal) + show where to add keys
|
|
77
|
+
automodal init --local Create project-local config (./.auto-modal) for this project
|
|
78
|
+
automodal where Print which config / .env / usage is in effect
|
|
79
|
+
automodal --help
|
|
80
|
+
|
|
81
|
+
Config resolution (highest first): AUTOMODAL_HOME env > nearest ./.auto-modal > ~/.auto-modal
|
|
82
|
+
Install: npm install -g auto-modal (global) | npm install auto-modal + npx automodal (per-project)`);
|
|
83
|
+
process.exit(0);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (cmd === "init") {
|
|
87
|
+
const local = rest.includes("--local") || rest.includes("--project");
|
|
88
|
+
const home = local ? join(process.cwd(), ".auto-modal") : GLOBAL_HOME;
|
|
89
|
+
const { cfg, env } = ensureHome(home);
|
|
90
|
+
console.log(`Initialized ${local ? "project-local" : "global"} config at ${home}`);
|
|
91
|
+
console.log(` config: ${cfg}`);
|
|
92
|
+
console.log(` keys: ${env} ← add OPENROUTER_API_KEYS / HF_API_KEYS here`);
|
|
93
|
+
if (local) console.log(` tip: add ".auto-modal/" to .gitignore (it holds your keys)`);
|
|
94
|
+
console.log(`Then run: automodal`);
|
|
95
|
+
process.exit(0);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const { home, scope } = resolveHome();
|
|
99
|
+
const { cfg, env } = ensureHome(home);
|
|
100
|
+
|
|
101
|
+
if (cmd === "where") {
|
|
102
|
+
console.log(`scope: ${scope} (${home})`);
|
|
103
|
+
console.log(`config: ${process.env.ROUTER_CONFIG}`);
|
|
104
|
+
console.log(`env: ${process.env.ROUTER_ENV}`);
|
|
105
|
+
console.log(`usage: ${process.env.ROUTER_USAGE}`);
|
|
106
|
+
process.exit(0);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (cmd === "claude") {
|
|
110
|
+
// Launch Claude Code pointed at the router (router must be running).
|
|
111
|
+
try {
|
|
112
|
+
const r = await fetch(`${URL}/health`);
|
|
113
|
+
if (!r.ok) throw new Error("bad status");
|
|
114
|
+
} catch {
|
|
115
|
+
console.error(`✗ Router not reachable at ${URL} — start it first: automodal start`);
|
|
116
|
+
process.exit(1);
|
|
117
|
+
}
|
|
118
|
+
console.log(`→ Claude Code via Auto Modal (${URL})`);
|
|
119
|
+
const child = spawn("claude", rest, {
|
|
120
|
+
stdio: "inherit",
|
|
121
|
+
env: {
|
|
122
|
+
...process.env,
|
|
123
|
+
ANTHROPIC_BASE_URL: URL,
|
|
124
|
+
ANTHROPIC_AUTH_TOKEN: "dummy",
|
|
125
|
+
ANTHROPIC_MODEL: process.env.ANTHROPIC_MODEL || "auto",
|
|
126
|
+
ANTHROPIC_SMALL_FAST_MODEL: process.env.ANTHROPIC_SMALL_FAST_MODEL || "auto",
|
|
127
|
+
},
|
|
128
|
+
});
|
|
129
|
+
child.on("exit", (code) => process.exit(code ?? 0));
|
|
130
|
+
} else if (!cmd || cmd === "start") {
|
|
131
|
+
// Free a stale instance on the port, then start the server.
|
|
132
|
+
console.log(`config: ${scope} (${home})`);
|
|
133
|
+
await import(join(PKG, "scripts", "free-port.mjs"));
|
|
134
|
+
await import(join(PKG, "src", "server.js"));
|
|
135
|
+
} else {
|
|
136
|
+
console.error(`Unknown command: ${cmd}\nTry: automodal --help`);
|
|
137
|
+
process.exit(1);
|
|
138
|
+
}
|
package/claude-router.sh
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Launch Claude Code against the Auto Modal instead of the real Anthropic API.
|
|
3
|
+
# Env vars are set for THIS invocation only — your normal `claude` is unaffected.
|
|
4
|
+
#
|
|
5
|
+
# Usage: ./claude-router.sh (interactive)
|
|
6
|
+
# ./claude-router.sh -p "..." (headless, args pass through)
|
|
7
|
+
#
|
|
8
|
+
# The router must be running first: npm start (http://localhost:8787)
|
|
9
|
+
|
|
10
|
+
ROUTER_URL="${ROUTER_URL:-http://localhost:8787}"
|
|
11
|
+
|
|
12
|
+
# Fail early with a clear message if the router isn't up.
|
|
13
|
+
if ! curl -s -o /dev/null "${ROUTER_URL}/health"; then
|
|
14
|
+
echo "✗ Router not reachable at ${ROUTER_URL} — start it with: npm start" >&2
|
|
15
|
+
exit 1
|
|
16
|
+
fi
|
|
17
|
+
|
|
18
|
+
# ANTHROPIC_AUTH_TOKEN -> sets 'Authorization: Bearer' (the router ignores it; it
|
|
19
|
+
# holds the real provider keys). Pin a specific chain model with ANTHROPIC_MODEL,
|
|
20
|
+
# or leave it for the router's auto chain + key rotation.
|
|
21
|
+
export ANTHROPIC_BASE_URL="${ROUTER_URL}"
|
|
22
|
+
export ANTHROPIC_AUTH_TOKEN="dummy"
|
|
23
|
+
export ANTHROPIC_MODEL="${ANTHROPIC_MODEL:-auto}"
|
|
24
|
+
# Use a free model for the small/fast (background) calls too.
|
|
25
|
+
export ANTHROPIC_SMALL_FAST_MODEL="${ANTHROPIC_SMALL_FAST_MODEL:-auto}"
|
|
26
|
+
|
|
27
|
+
echo "→ Claude Code via Auto Modal (${ROUTER_URL}), model=${ANTHROPIC_MODEL}"
|
|
28
|
+
exec claude "$@"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Auto Modal — default config.
|
|
2
|
+
# Edit here, or manage models live from the dashboard at http://localhost:8787/
|
|
3
|
+
# ("+ Add a model" pulls each provider's catalog). Keys come from .env
|
|
4
|
+
# (OPENROUTER_API_KEYS / HF_API_KEYS), comma-separated, and are rotated automatically.
|
|
5
|
+
|
|
6
|
+
port: 8787
|
|
7
|
+
cooldownMs: 3600000 # how long a rate-limited slot rests before retry
|
|
8
|
+
transientRetries: 2 # retries on 5xx/timeout before rotating
|
|
9
|
+
|
|
10
|
+
chain: # order = priority (tried top to bottom)
|
|
11
|
+
- id: nemotron-super-free
|
|
12
|
+
provider: openrouter
|
|
13
|
+
model: nvidia/nemotron-3-super-120b-a12b:free
|
|
14
|
+
apiKeys: ${OPENROUTER_API_KEYS}
|
|
15
|
+
dailyLimit: 50 # per key (OpenRouter free tier ~50/day)
|
|
16
|
+
rpm: 20 # per-key requests/min
|
|
17
|
+
|
|
18
|
+
# Add a HuggingFace fallback once HF_API_KEYS is set (auto-skipped until then):
|
|
19
|
+
- id: hf-qwen
|
|
20
|
+
provider: huggingface
|
|
21
|
+
model: Qwen/Qwen2.5-72B-Instruct
|
|
22
|
+
apiKeys: ${HF_API_KEYS}
|
|
23
|
+
dailyLimit: 200
|
package/package.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@prakashpro1/auto-modal",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Local proxy that auto-switches models AND API keys when a rate/usage limit is hit. Serves both the OpenAI API (Continue) and the Anthropic Messages API (Claude Code), with a live dashboard.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/server.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"automodal": "bin/cli.mjs"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"src",
|
|
12
|
+
"bin",
|
|
13
|
+
"scripts",
|
|
14
|
+
"config.default.yaml",
|
|
15
|
+
".env.example",
|
|
16
|
+
"claude-router.sh",
|
|
17
|
+
"README.md"
|
|
18
|
+
],
|
|
19
|
+
"preferGlobal": true,
|
|
20
|
+
"publishConfig": {
|
|
21
|
+
"access": "public"
|
|
22
|
+
},
|
|
23
|
+
"keywords": [
|
|
24
|
+
"openrouter",
|
|
25
|
+
"huggingface",
|
|
26
|
+
"llm",
|
|
27
|
+
"proxy",
|
|
28
|
+
"router",
|
|
29
|
+
"rate-limit",
|
|
30
|
+
"fallback",
|
|
31
|
+
"api-key-rotation",
|
|
32
|
+
"continue",
|
|
33
|
+
"claude-code",
|
|
34
|
+
"anthropic",
|
|
35
|
+
"openai-compatible"
|
|
36
|
+
],
|
|
37
|
+
"author": "prakash.saeculumsolutions@gmail.com",
|
|
38
|
+
"license": "MIT",
|
|
39
|
+
"homepage": "https://github.com/prakashpro3/pro-modal#readme",
|
|
40
|
+
"repository": {
|
|
41
|
+
"type": "git",
|
|
42
|
+
"url": "git+https://github.com/prakashpro3/pro-modal.git"
|
|
43
|
+
},
|
|
44
|
+
"bugs": {
|
|
45
|
+
"url": "https://github.com/prakashpro3/pro-modal/issues"
|
|
46
|
+
},
|
|
47
|
+
"scripts": {
|
|
48
|
+
"prestart": "node scripts/free-port.mjs",
|
|
49
|
+
"start": "node src/server.js",
|
|
50
|
+
"restart": "node scripts/free-port.mjs && node src/server.js",
|
|
51
|
+
"predev": "node scripts/free-port.mjs",
|
|
52
|
+
"dev": "node --watch src/server.js",
|
|
53
|
+
"test": "node test/fallback.test.mjs && node test/rpm.test.mjs && node test/completions.test.mjs && node test/admin.test.mjs && node test/keys.test.mjs && node test/reorder.test.mjs && node test/edit-test.test.mjs && node test/history.test.mjs && node test/requested-model.test.mjs && node test/messages.test.mjs"
|
|
54
|
+
},
|
|
55
|
+
"dependencies": {
|
|
56
|
+
"dotenv": "^16.4.5",
|
|
57
|
+
"express": "^4.19.2",
|
|
58
|
+
"yaml": "^2.5.0"
|
|
59
|
+
},
|
|
60
|
+
"engines": {
|
|
61
|
+
"node": ">=20"
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
// Kill any stale process already listening on the router's port, so `npm start`
|
|
2
|
+
// never ends up with two instances fighting over the port + usage state.
|
|
3
|
+
// Runs automatically as `prestart` / `predev` (and via `npm run restart`).
|
|
4
|
+
import { execSync } from "node:child_process";
|
|
5
|
+
import { readFileSync } from "node:fs";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
7
|
+
import { dirname, join } from "node:path";
|
|
8
|
+
import YAML from "yaml";
|
|
9
|
+
|
|
10
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
11
|
+
const cfgPath = process.env.ROUTER_CONFIG || join(__dirname, "..", "config.yaml");
|
|
12
|
+
|
|
13
|
+
let port = 8787;
|
|
14
|
+
try { port = YAML.parse(readFileSync(cfgPath, "utf8"))?.port || 8787; } catch { /* default */ }
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
// lsof exits non-zero when nothing is listening — that throws and we no-op.
|
|
18
|
+
const pids = execSync(`lsof -ti tcp:${port}`, { stdio: ["ignore", "pipe", "ignore"] })
|
|
19
|
+
.toString().trim().split("\n").filter(Boolean);
|
|
20
|
+
if (pids.length) {
|
|
21
|
+
execSync(`kill -9 ${pids.join(" ")}`);
|
|
22
|
+
console.log(`free-port: stopped ${pids.length} stale instance(s) on :${port} (${pids.join(", ")})`);
|
|
23
|
+
}
|
|
24
|
+
} catch {
|
|
25
|
+
// Nothing on the port (or lsof unavailable) — nothing to do.
|
|
26
|
+
}
|