openclaw-freerouter 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +228 -105
  2. package/package.json +4 -2
  3. package/src/service.ts +5 -1
package/README.md CHANGED
@@ -1,121 +1,98 @@
1
- # FreeRouter — OpenClaw Plugin
1
+ # FreeRouter — Smart LLM Router for OpenClaw
2
2
 
3
- Smart LLM router that classifies your requests and routes them to the best model automatically. Uses a 14-dimension weighted scorer (<1ms) with configurable tier→model mapping.
3
+ **Stop overpaying for AI.** Every message to your AI assistant costs money a simple "hello" shouldn't cost the same as "prove the Riemann hypothesis." FreeRouter automatically routes each request to the right model based on complexity.
4
4
 
5
- ## Install
5
+ ## The Problem
6
6
 
7
- ```bash
8
- # From local directory
9
- openclaw plugins install -l /path/to/freerouter-plugin
7
+ - 🔥 **Wasted money** — Running Claude Opus ($15/$75 per 1M tokens) for every message, even "what's 2+2?"
8
+ - 🤷 **No control** — Can't switch models mid-conversation without editing config files and restarting
9
+ - 📊 **Blind routing** — Your AI shows "freerouter/auto" instead of telling you which model actually answered
10
+ - 🧠 **No adaptive thinking** — OpenClaw's built-in thinking levels (`/think low`, `/think high`) are just prompt hints. Anthropic deprecated manual thinking for Opus 4.6 — it now requires the native `thinking: { type: "adaptive" }` API parameter, which OpenClaw doesn't send
11
+ - 🔧 **Complex setup** — Existing routers need separate servers, Docker, complex infra
10
12
 
11
- # Or copy to extensions
12
- openclaw plugins install /path/to/freerouter-plugin
13
- ```
13
+ ## The Solution
14
14
 
15
- ## Configure
15
+ FreeRouter is an OpenClaw plugin that:
16
+ - **Classifies every request in <1ms** using a 14-dimension weighted scorer (no LLM needed for classification)
17
+ - **Routes to the cheapest model that can handle it** — Kimi for "hello", Opus for architecture design
18
+ - **Sends native adaptive thinking** — Automatically passes `thinking: { type: "adaptive" }` to Anthropic's API for Opus 4.6, and `thinking: { type: "enabled", budget_tokens: N }` for Sonnet. No prompt hacks — real API-level thinking control that OpenClaw doesn't support natively
19
+ - **Reports the real model name** — You see `anthropic/claude-opus-4-6`, not `freerouter/auto`
20
+ - **Lets you override anytime** — Just say "use opus" in plain English
16
21
 
17
- After install, configure tiers in `openclaw.json`:
22
+ ## Install
18
23
 
19
- ```json5
20
- {
21
- // Set FreeRouter as your default model
22
- agents: {
23
- defaults: {
24
- model: { primary: "freerouter/auto" }
25
- }
26
- },
24
+ ```bash
25
+ openclaw plugins install openclaw-freerouter
26
+ ```
27
27
 
28
- // Add FreeRouter as a provider pointing to its HTTP proxy
29
- providers: {
30
- freerouter: {
31
- baseUrl: "http://127.0.0.1:18801/v1",
32
- api: "openai-completions"
33
- }
34
- },
28
+ Then run the setup wizard:
35
29
 
36
- // Plugin config
37
- plugins: {
38
- entries: {
39
- freerouter: {
40
- enabled: true,
41
- config: {
42
- port: 18801, // HTTP proxy port (0 = disabled)
43
- host: "127.0.0.1", // Bind address
44
-
45
- // Customize which models handle each tier
46
- tiers: {
47
- SIMPLE: { primary: "kimi-coding/kimi-for-coding", fallback: ["anthropic/claude-haiku-4-5"] },
48
- MEDIUM: { primary: "anthropic/claude-sonnet-4-5", fallback: ["anthropic/claude-opus-4-6"] },
49
- COMPLEX: { primary: "anthropic/claude-opus-4-6", fallback: [] },
50
- REASONING: { primary: "anthropic/claude-opus-4-6", fallback: [] }
51
- },
30
+ ```bash
31
+ openclaw freerouter setup
32
+ ```
52
33
 
53
- // Thinking/reasoning config
54
- thinking: {
55
- adaptive: ["claude-opus-4-6"],
56
- enabled: { models: ["claude-sonnet-4-5"], budget: 4096 }
57
- },
34
+ Or configure manually — see [Configuration](#configure) below.
58
35
 
59
- // Default tier for ambiguous requests
60
- defaultTier: "MEDIUM"
61
- }
62
- }
63
- }
64
- }
65
- }
66
- ```
36
+ ## Switch Models Anytime
67
37
 
68
- ## How It Works
38
+ The killer feature: **switch models using natural language, slash commands, or session locks.**
69
39
 
70
- 1. OpenClaw sends a request with model `freerouter/auto`
71
- 2. FreeRouter's HTTP proxy receives it
72
- 3. The 14-dimension classifier scores the request in <1ms
73
- 4. Routes to the best model for the task (e.g., Kimi for simple, Opus for reasoning)
74
- 5. Forwards to the real provider API
75
- 6. **Returns the actual model name** (e.g., `anthropic/claude-opus-4-6`) so OpenClaw displays what's really running
40
+ ### Just Say It (Natural Language)
76
41
 
77
- ## Tiers
42
+ No slash commands needed. Just talk:
78
43
 
79
- | Tier | Default Model | Use Case |
80
- |------|--------------|----------|
81
- | SIMPLE | kimi-coding/kimi-for-coding | Quick lookups, translations, simple Q&A |
82
- | MEDIUM | anthropic/claude-sonnet-4-5 | Code generation, creative writing, moderate complexity |
83
- | COMPLEX | anthropic/claude-opus-4-6 | Architecture design, multi-step reasoning |
84
- | REASONING | anthropic/claude-opus-4-6 | Mathematical proofs, formal logic, deep analysis |
44
+ | What you say | What happens |
45
+ |---|---|
46
+ | `use opus` | Switches to Claude Opus for this message |
47
+ | `switch to sonnet` | Switches to Claude Sonnet |
48
+ | `try kimi` | Switches to Kimi |
49
+ | `let's use opus` | Switches to Opus |
50
+ | `please use sonnet` | Switches to Sonnet |
51
+ | `can you use haiku` | Switches to Haiku |
52
+ | `use opus: explain quantum computing` | Uses Opus for this specific prompt |
53
+ | `use opus, what is 2+2?` | Same — model + prompt in one message |
54
+ | `go back to auto` | Return to automatic routing |
85
55
 
86
- ## Per-Prompt Model Override
56
+ ### Lock a Model for the Whole Session
87
57
 
88
- Force a specific model for one message:
58
+ When you know the task is important and you want Opus (or any model) for everything:
89
59
 
90
- - `/opus Explain quantum computing` Claude Opus 4.6
91
- - `/sonnet Write a poem` → Claude Sonnet 4.5
92
- - `/kimi What's 2+2?` Kimi K2.5
93
- - `/haiku Translate this` Claude Haiku 4.5
94
- - `[opus] Deep analysis...` Claude Opus 4.6
60
+ | What you say | What happens |
61
+ |---|---|
62
+ | `use opus for this session` | 🔒 Locks ALL messages to Opus |
63
+ | `switch to sonnet from now on` | 🔒 Locks to Sonnet |
64
+ | `stick with opus` | 🔒 Locks to Opus |
65
+ | `keep using sonnet` | 🔒 Locks to Sonnet |
66
+ | `/lock opus` | 🔒 Same thing, slash command |
67
+ | `/unlock` | 🔓 Back to auto-routing |
68
+ | `/lock status` | Shows current lock state |
95
69
 
96
- ## Per-Prompt Tier Override
70
+ Session locks expire after 4 hours of inactivity.
97
71
 
98
- Force a tier (uses that tier's primary model):
72
+ ### When FreeRouter Isn't Sure
99
73
 
100
- - `/simple What's 2+2?` SIMPLE tier
101
- - `/max Prove the Riemann hypothesis` → REASONING tier
102
- - `[reasoning] Analyze this code...` → REASONING tier
74
+ If your request is ambiguous, FreeRouter asks before switching:
103
75
 
104
- ## Session Lock
76
+ > **You:** "opus please"
77
+ > **FreeRouter:** 🤔 Did you want to switch to **anthropic/claude-opus-4-6**? Reply **yes** or **no**.
78
+ > **You:** "yes"
79
+ > **FreeRouter:** ✅ Confirmed!
105
80
 
106
- Lock an entire session to a specific model:
81
+ This prevents accidental switches when you're just talking *about* a model.
107
82
 
108
- - `/lock opus` 🔒 All messages use Opus until unlocked
109
- - `/lock sonnet` → 🔒 All messages use Sonnet
110
- - `/lock simple` → 🔒 Lock to SIMPLE tier's primary model
111
- - `/lock anthropic/claude-opus-4-6` → 🔒 Full model ID
112
- - `/unlock` → 🔓 Return to auto-routing
113
- - `/lock auto` → 🔓 Same as unlock
114
- - `/lock status` → Show current lock state
83
+ ### Slash Commands (Power Users)
115
84
 
116
- Session locks expire after 4 hours of inactivity.
85
+ | Command | Effect |
86
+ |---|---|
87
+ | `/opus What is 2+2?` | Per-prompt: Opus for this message only |
88
+ | `/sonnet Write a poem` | Per-prompt: Sonnet |
89
+ | `/kimi Quick answer` | Per-prompt: Kimi |
90
+ | `/haiku Translate this` | Per-prompt: Haiku |
91
+ | `/simple What is 2+2?` | Per-prompt: use SIMPLE tier model |
92
+ | `/max Prove this theorem` | Per-prompt: use REASONING tier model |
93
+ | `[opus] Deep analysis` | Bracket syntax (same as /opus) |
117
94
 
118
- ### Supported Aliases
95
+ ### Supported Model Aliases
119
96
 
120
97
  | Alias | Model |
121
98
  |-------|-------|
@@ -125,37 +102,183 @@ Session locks expire after 4 hours of inactivity.
125
102
  | `haiku`, `haiku-4`, `haiku-4.5` | anthropic/claude-haiku-4-5 |
126
103
  | `kimi`, `kimi-k2`, `k2.5` | kimi-coding/kimi-for-coding |
127
104
 
128
- ## Scoring Dimensions
105
+ ## Adaptive Thinking (Native API-Level)
106
+
107
+ This is a key reason FreeRouter exists.
108
+
109
+ **The problem:** OpenClaw's built-in `/think low|medium|high` commands are prompt-level hints — they add text like "think harder" to your prompt. This is unreliable and doesn't use Anthropic's actual thinking API. Worse, **Anthropic deprecated manual thinking (`type: "enabled"`) for Opus 4.6** — it now only supports `type: "adaptive"`, where the model decides how much to think based on the task.
110
+
111
+ **What FreeRouter does:** Sends the real `thinking` parameter directly to Anthropic's API:
112
+
113
+ | Model | Thinking Mode | What's Sent |
114
+ |---|---|---|
115
+ | Claude Opus 4.6 | **Adaptive** (always) | `thinking: { type: "adaptive" }` |
116
+ | Claude Sonnet 4.5 | Enabled with budget | `thinking: { type: "enabled", budget_tokens: 4096 }` |
117
+ | Others (Kimi, Haiku) | Off | No thinking parameter |
118
+
119
+ **Why this matters:**
120
+ - Adaptive thinking lets Opus 4.6 decide how much reasoning it needs — simple questions get quick answers, complex proofs get deep thinking chains
121
+ - You get the full benefit of Claude's extended thinking without managing budgets
122
+ - The `X-FreeRouter-Thinking` response header tells you exactly which thinking mode was used
123
+
124
+ Configure in your plugin config:
125
+ ```json5
126
+ "thinking": {
127
+ "adaptive": ["claude-opus-4-6"], // Models that use adaptive (always-on)
128
+ "enabled": {
129
+ "models": ["claude-sonnet-4-5"], // Models that get explicit thinking
130
+ "budget": 4096 // Token budget for thinking
131
+ }
132
+ }
133
+ ```
134
+
135
+ ## How Routing Works
136
+
137
+ 1. You send a message → OpenClaw forwards to FreeRouter
138
+ 2. FreeRouter's 14-dimension classifier scores the request in **<1ms** (0.035ms average)
139
+ 3. Based on the score, it picks the best tier:
140
+
141
+ | Tier | Default Model | Use Case | Cost |
142
+ |------|--------------|----------|------|
143
+ | SIMPLE | Kimi K2.5 | Quick lookups, translations, "hello" | $0.50/1M |
144
+ | MEDIUM | Claude Sonnet 4.5 | Code, creative writing, moderate tasks | $3/$15/1M |
145
+ | COMPLEX | Claude Opus 4.6 | Architecture, deep analysis | $15/$75/1M |
146
+ | REASONING | Claude Opus 4.6 | Proofs, formal logic, step-by-step | $15/$75/1M |
147
+
148
+ 4. Forwards to the real provider API (Anthropic, Kimi, OpenAI, etc.)
149
+ 5. Returns the response with the **actual model name** — not "freerouter/auto"
129
150
 
130
- The classifier evaluates 14 weighted dimensions:
151
+ ### Scoring Dimensions
152
+
153
+ The classifier evaluates 14 weighted dimensions without calling any LLM:
131
154
  - Token count, code presence, reasoning markers, technical terms
132
155
  - Creative markers, simple indicators, multi-step patterns
133
156
  - Question complexity, imperative verbs, constraints
134
157
  - Output format, references, negation, domain specificity
135
- - Agentic task indicators
158
+ - Agentic task indicators (multi-tool, multi-step workflows)
159
+
160
+ Supports multilingual classification: English, Chinese, Japanese, Russian, German, Vietnamese, Arabic, Korean, and more.
161
+
162
+ ## Configure
163
+
164
+ After install, add to your `openclaw.json`:
165
+
166
+ ```json5
167
+ {
168
+ // 1. Set FreeRouter as your default model
169
+ "agents": {
170
+ "defaults": {
171
+ "model": {
172
+ "primary": "freerouter/freerouter/auto",
173
+ "fallbacks": ["anthropic/claude-opus-4-6"]
174
+ }
175
+ }
176
+ },
177
+
178
+ // 2. Add FreeRouter as a provider
179
+ "providers": {
180
+ "freerouter": {
181
+ "baseUrl": "http://127.0.0.1:18801/v1",
182
+ "api": "openai-completions"
183
+ }
184
+ },
185
+
186
+ // 3. Plugin config
187
+ "plugins": {
188
+ "entries": {
189
+ "freerouter": {
190
+ "enabled": true,
191
+ "config": {
192
+ "port": 18801,
193
+ "host": "127.0.0.1",
194
+ "tiers": {
195
+ "SIMPLE": { "primary": "kimi-coding/kimi-for-coding", "fallback": ["anthropic/claude-haiku-4-5"] },
196
+ "MEDIUM": { "primary": "anthropic/claude-sonnet-4-5", "fallback": ["anthropic/claude-opus-4-6"] },
197
+ "COMPLEX": { "primary": "anthropic/claude-opus-4-6", "fallback": [] },
198
+ "REASONING": { "primary": "anthropic/claude-opus-4-6", "fallback": [] }
199
+ },
200
+ "thinking": {
201
+ "adaptive": ["claude-opus-4-6"],
202
+ "enabled": { "models": ["claude-sonnet-4-5"], "budget": 4096 }
203
+ },
204
+ "defaultTier": "MEDIUM"
205
+ }
206
+ }
207
+ }
208
+ }
209
+ }
210
+ ```
211
+
212
+ Then restart: `openclaw gateway restart`
213
+
214
+ ## CLI Commands
215
+
216
+ | Command | Description |
217
+ |---|---|
218
+ | `openclaw freerouter status` | Show config, tier mapping, and live stats |
219
+ | `openclaw freerouter setup` | Interactive setup wizard |
220
+ | `openclaw freerouter test` | Quick 5-query classification smoke test |
221
+ | `openclaw freerouter doctor` | Diagnose issues (port conflicts, missing config, etc.) |
222
+ | `openclaw freerouter port <n>` | Change the proxy port |
223
+ | `openclaw freerouter reset` | Show default config for recovery |
224
+
225
+ ### Chat Commands
226
+
227
+ | Command | Description |
228
+ |---|---|
229
+ | `/freerouter` | Show routing stats in chat |
230
+ | `/freerouter-doctor` | Quick health check |
136
231
 
137
232
  ## Port Conflicts
138
233
 
139
- If port 18801 is in use, change it in the plugin config:
234
+ If port 18801 is in use, change it:
140
235
 
141
236
  ```json5
142
- { plugins: { entries: { freerouter: { config: { port: 18802 } } } } }
237
+ { "plugins": { "entries": { "freerouter": { "config": { "port": 18802 } } } } }
143
238
  ```
144
239
 
145
- Set `port: 0` to disable the HTTP proxy entirely.
240
+ Set `"port": 0` to disable the HTTP proxy entirely.
146
241
 
147
- ## Commands
242
+ ## Troubleshooting
243
+
244
+ ```bash
245
+ # Check if everything is working
246
+ openclaw freerouter doctor
148
247
 
149
- - `/freerouter` Show routing stats in chat
150
- - `openclaw freerouter status` — CLI status and stats
248
+ # Quick classification test
249
+ openclaw freerouter test
250
+
251
+ # Reset to defaults
252
+ openclaw freerouter reset
253
+ ```
151
254
 
152
255
  ## Response Headers
153
256
 
154
- Every proxied response includes:
155
- - `X-FreeRouter-Model` — Actual model used
156
- - `X-FreeRouter-Tier` — Classification tier
257
+ Every proxied response includes metadata headers:
258
+ - `X-FreeRouter-Model` — Actual model used (e.g., `anthropic/claude-opus-4-6`)
259
+ - `X-FreeRouter-Tier` — Classification tier (SIMPLE/MEDIUM/COMPLEX/REASONING)
157
260
  - `X-FreeRouter-Thinking` — Thinking mode (off/adaptive/enabled)
158
- - `X-FreeRouter-Reasoning` — Classification reasoning
261
+ - `X-FreeRouter-Reasoning` — Why this model was chosen
262
+
263
+ ## API Endpoints
264
+
265
+ The HTTP proxy exposes:
266
+ - `POST /v1/chat/completions` — OpenAI-compatible chat endpoint
267
+ - `GET /v1/models` — List available models
268
+ - `GET /health` — Health check
269
+ - `GET /stats` — Routing statistics
270
+ - `GET /sessions/locks` — Active session locks
271
+ - `DELETE /sessions/locks` — Clear all session locks
272
+
273
+ ## Tests
274
+
275
+ ```bash
276
+ npm test # Runs all 212 tests
277
+ ```
278
+
279
+ - `test-freerouter.mjs` — Classification, multilingual, edge cases, performance
280
+ - `test-resilience.mjs` — Fallback chains, bad configs, recovery, port conflicts
281
+ - `test-overrides.mjs` — Model overrides, session locks, natural language switching
159
282
 
160
283
  ## License
161
284
 
package/package.json CHANGED
@@ -1,10 +1,12 @@
1
1
  {
2
2
  "name": "openclaw-freerouter",
3
- "version": "2.0.0",
3
+ "version": "2.0.2",
4
4
  "description": "Smart LLM router plugin for OpenClaw — classify requests and route to the best model using your own API keys. 14-dimension scoring, <1ms classification, per-prompt/session model switching.",
5
5
  "type": "module",
6
6
  "openclaw": {
7
- "extensions": ["./src/index.ts"]
7
+ "extensions": [
8
+ "./src/index.ts"
9
+ ]
8
10
  },
9
11
  "main": "src/index.ts",
10
12
  "scripts": {
package/src/service.ts CHANGED
@@ -624,11 +624,15 @@ export function createProxyServer(options: ProxyOptions): { server: Server; stat
624
624
  }
625
625
 
626
626
  // Determine thinking mode
627
+ // Opus 4.6 REQUIRES adaptive thinking — manual mode is deprecated by Anthropic
628
+ // Other models can use enabled(budget) for explicit thinking control
627
629
  const thinkingCfg = pluginConfig.thinking as any;
628
630
  const adaptivePatterns = thinkingCfg?.adaptive ?? ["claude-opus-4-6"];
629
631
  const enabledCfg = thinkingCfg?.enabled;
630
632
 
631
- if (adaptivePatterns.some((p: string) => routedModel.includes(p)) && (tier === "COMPLEX" || tier === "REASONING")) {
633
+ if (adaptivePatterns.some((p: string) => routedModel.includes(p))) {
634
+ // Adaptive thinking models (e.g., Opus 4.6) — always use adaptive
635
+ // Anthropic deprecated manual thinking for Opus 4.6
632
636
  thinkingMode = "adaptive";
633
637
  } else if (enabledCfg?.models?.some((p: string) => routedModel.includes(p))) {
634
638
  thinkingMode = `enabled(${enabledCfg.budget ?? 4096})`;