@rungate/llmrouter 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,6 +35,12 @@ openclaw plugins install @rungate/llmrouter
35
35
  openclaw gateway restart
36
36
  ```
37
37
 
38
+ Or use the installer script from this repo:
39
+
40
+ ```bash
41
+ bash scripts/install-openclaw.sh
42
+ ```
43
+
38
44
  Recommended production environment:
39
45
 
40
46
  ```bash
@@ -44,6 +50,8 @@ X402_NETWORK=eip155:8453
44
50
 
45
51
  `llm_router` remains local-only in this setup. OpenClaw still talks to the local proxy at `http://127.0.0.1:3000/v1`; the remote upstream is configured by environment.
46
52
 
53
+ If `INFERENCE_PROVIDER_BASE_URL` is unset, the router defaults to `https://api.rungate.ai`.
54
+
47
55
  ## Local OpenClaw Install
48
56
 
49
57
  For local unpublished development, install from a packaged tarball:
@@ -61,8 +69,12 @@ For Docker/tempclaw-style testing, stage the tarball into the container and inst
61
69
  openclaw plugins install /staging/rungate-llmrouter-0.1.0.tgz
62
70
  ```
63
71
 
72
+ The installer script is for real OpenClaw installs, not tempclaw. Tempclaw should keep using the explicit install flow so restart and verification stay visible.
73
+
64
74
  ## Environment
65
75
 
76
+ For local development, override the production default upstream:
77
+
66
78
  ```bash
67
79
  LLM_ROUTER_HOST=127.0.0.1
68
80
  LLM_ROUTER_PORT=3000
@@ -70,20 +82,14 @@ INFERENCE_PROVIDER_BASE_URL=http://127.0.0.1:8787
70
82
  X402_NETWORK=eip155:84532
71
83
  ```
72
84
 
73
- For production, point the upstream at Rungate and use Base mainnet:
74
-
75
- ```bash
76
- INFERENCE_PROVIDER_BASE_URL=https://api.rungate.ai
77
- X402_NETWORK=eip155:8453
78
- ```
79
-
80
85
  ## OpenClaw Models
81
86
 
82
87
  - `llmrouter/auto`
83
88
  - `llmrouter/simple`
84
89
  - `llmrouter/coding`
85
90
  - `llmrouter/reasoning`
86
- - `llmrouter/vision`
91
+
92
+ Image requests still route automatically to the vision-capable upstream model through `llmrouter/auto`.
87
93
 
88
94
  ## Release Workflow
89
95
 
@@ -3,7 +3,6 @@ const MODEL_LIST = [
3
3
  { id: 'simple', name: 'LLM Router Simple', reasoning: false },
4
4
  { id: 'coding', name: 'LLM Router Coding', reasoning: true },
5
5
  { id: 'reasoning', name: 'LLM Router Reasoning', reasoning: true },
6
- { id: 'vision', name: 'LLM Router Vision', reasoning: true },
7
6
  ];
8
7
  // Inject the provider block and default model so OpenClaw can talk to the local proxy.
9
8
  export function ensureOpenClawProviderConfig(config, baseUrl) {
@@ -29,10 +28,18 @@ export function ensureOpenClawProviderConfig(config, baseUrl) {
29
28
  const agents = config.agents ?? {};
30
29
  const defaults = agents.defaults ?? {};
31
30
  const modelConfig = defaults.model ?? {};
31
+ const allowedModels = defaults.models ?? {};
32
32
  if (typeof modelConfig.primary !== 'string' || modelConfig.primary.length === 0) {
33
33
  modelConfig.primary = 'llmrouter/auto';
34
34
  }
35
+ for (const model of MODEL_LIST) {
36
+ const key = `llmrouter/${model.id}`;
37
+ if (!(key in allowedModels)) {
38
+ allowedModels[key] = {};
39
+ }
40
+ }
35
41
  defaults.model = modelConfig;
42
+ defaults.models = allowedModels;
36
43
  agents.defaults = defaults;
37
44
  config.agents = agents;
38
45
  }
@@ -95,6 +95,51 @@ function copyResponseHeaders(upstream, res) {
95
95
  res.setHeader(key, value);
96
96
  }
97
97
  }
98
+ function isRetryableUpstreamResponse(response) {
99
+ return response.status === 404 || response.status === 408 || response.status === 409 || response.status === 425
100
+ || response.status === 429 || response.status >= 500;
101
+ }
102
+ async function collectResponseText(response) {
103
+ try {
104
+ return await response.clone().text();
105
+ }
106
+ catch {
107
+ return '';
108
+ }
109
+ }
110
+ async function tryUpstreamModels(req, upstreamBaseUrl, payFetch, body, candidateModels) {
111
+ const attempts = [];
112
+ for (const model of candidateModels) {
113
+ const upstreamBody = {
114
+ ...body,
115
+ model,
116
+ };
117
+ try {
118
+ const response = await payFetch(new URL('/v1/chat/completions', upstreamBaseUrl), {
119
+ method: 'POST',
120
+ headers: {
121
+ ...copyRequestHeaders(req),
122
+ 'content-type': 'application/json',
123
+ },
124
+ body: JSON.stringify(upstreamBody),
125
+ });
126
+ const attempt = { model, response };
127
+ attempts.push(attempt);
128
+ if (!isRetryableUpstreamResponse(response) || model === candidateModels[candidateModels.length - 1]) {
129
+ return { attempt, attempts };
130
+ }
131
+ }
132
+ catch (error) {
133
+ const attempt = { model, error };
134
+ attempts.push(attempt);
135
+ if (model === candidateModels[candidateModels.length - 1]) {
136
+ return { attempt, attempts };
137
+ }
138
+ }
139
+ }
140
+ const attempt = attempts[attempts.length - 1] ?? { model: body.model, error: new Error('No upstream attempt executed') };
141
+ return { attempt, attempts };
142
+ }
98
143
  // Handle the only routed endpoint in this minimal version: chat completions.
99
144
  async function handleChat(req, res, upstreamBaseUrl, payFetch) {
100
145
  const raw = await collectBody(req);
@@ -105,10 +150,7 @@ async function handleChat(req, res, upstreamBaseUrl, payFetch) {
105
150
  }
106
151
  const normalizedLatestUser = normalizeLatestUserMessageForRouting(body);
107
152
  const decision = routeRequest(toRouterRequest(normalizedLatestUser.body));
108
- const upstreamBody = {
109
- ...body,
110
- model: decision.resolvedModel,
111
- };
153
+ const { attempt, attempts } = await tryUpstreamModels(req, upstreamBaseUrl, payFetch, body, decision.candidateModels);
112
154
  console.info(JSON.stringify({
113
155
  component: 'llm_router',
114
156
  event: 'route_request',
@@ -118,23 +160,38 @@ async function handleChat(req, res, upstreamBaseUrl, payFetch) {
118
160
  logicalModel: decision.logicalModel,
119
161
  category: decision.category,
120
162
  resolvedModel: decision.resolvedModel,
163
+ candidateModels: decision.candidateModels,
164
+ attemptedModels: attempts.map((current) => current.model),
121
165
  reason: decision.reason,
122
166
  hasTools: decision.hasTools,
123
167
  wantsJson: decision.wantsJson,
124
168
  hasImage: decision.hasImage,
125
169
  }));
126
- const upstreamResponse = await payFetch(new URL('/v1/chat/completions', upstreamBaseUrl), {
127
- method: 'POST',
128
- headers: {
129
- ...copyRequestHeaders(req),
130
- 'content-type': 'application/json',
131
- },
132
- body: JSON.stringify(upstreamBody),
133
- });
170
+ if (attempt.error) {
171
+ throw attempt.error;
172
+ }
173
+ const upstreamResponse = attempt.response;
174
+ if (!upstreamResponse) {
175
+ throw new Error('Upstream returned no response');
176
+ }
177
+ if (attempts.length > 1) {
178
+ console.info(JSON.stringify({
179
+ component: 'llm_router',
180
+ event: 'route_fallback_result',
181
+ requestPath: req.url ?? '/v1/chat/completions',
182
+ finalModel: attempt.model,
183
+ attempts: await Promise.all(attempts.map(async (current) => ({
184
+ model: current.model,
185
+ status: current.response?.status,
186
+ error: current.error instanceof Error ? current.error.message : undefined,
187
+ bodyPreview: current.response ? (await collectResponseText(current.response)).slice(0, 200) : undefined,
188
+ }))),
189
+ }));
190
+ }
134
191
  copyResponseHeaders(upstreamResponse, res);
135
192
  res.setHeader('x-llm-router-logical-model', decision.logicalModel);
136
193
  res.setHeader('x-llm-router-category', decision.category);
137
- res.setHeader('x-llm-router-resolved-model', decision.resolvedModel);
194
+ res.setHeader('x-llm-router-resolved-model', attempt.model);
138
195
  res.statusCode = upstreamResponse.status;
139
196
  if (!upstreamResponse.body) {
140
197
  res.end();
@@ -146,7 +203,7 @@ async function handleChat(req, res, upstreamBaseUrl, payFetch) {
146
203
  export async function startProxyServer(options = {}) {
147
204
  const host = options.host ?? process.env.LLM_ROUTER_HOST ?? '127.0.0.1';
148
205
  const port = options.port ?? Number(process.env.LLM_ROUTER_PORT ?? 3000);
149
- const upstreamBaseUrl = options.upstreamBaseUrl ?? process.env.INFERENCE_PROVIDER_BASE_URL ?? 'http://127.0.0.1:8787';
206
+ const upstreamBaseUrl = options.upstreamBaseUrl ?? process.env.INFERENCE_PROVIDER_BASE_URL ?? 'https://api.rungate.ai';
150
207
  const payFetch = createPaymentFetch(fetch, process.env);
151
208
  const server = createServer(async (req, res) => {
152
209
  try {
@@ -1,4 +1,5 @@
1
1
  import type { RouteCategory } from '../types.js';
2
- export declare const LOGICAL_MODELS: readonly ["llmrouter/auto", "llmrouter/simple", "llmrouter/coding", "llmrouter/reasoning", "llmrouter/vision"];
2
+ export declare const LOGICAL_MODELS: readonly ["llmrouter/auto", "llmrouter/simple", "llmrouter/coding", "llmrouter/reasoning"];
3
+ export declare const CATEGORY_MODEL_CANDIDATES: Record<RouteCategory, string[]>;
3
4
  export declare const CATEGORY_MODEL_MAP: Record<RouteCategory, string>;
4
5
  export declare function logicalModelToCategory(model: string): RouteCategory | undefined;
@@ -3,14 +3,34 @@ export const LOGICAL_MODELS = [
3
3
  'llmrouter/simple',
4
4
  'llmrouter/coding',
5
5
  'llmrouter/reasoning',
6
- 'llmrouter/vision',
7
6
  ];
8
- export const CATEGORY_MODEL_MAP = {
9
- simple: 'deepseek/deepseek-chat',
10
- coding: 'qwen/qwen3-coder-next',
11
- reasoning: 'deepseek/deepseek-v3.2',
12
- vision: 'qwen/qwen3-vl-235b-a22b-thinking',
7
+ export const CATEGORY_MODEL_CANDIDATES = {
8
+ simple: [
9
+ 'deepseek/deepseek-chat',
10
+ 'xiaomi/mimo-v2-flash',
11
+ 'minimax/minimax-m2.1',
12
+ 'deepseek/deepseek-chat-v3.1',
13
+ 'deepseek/deepseek-chat-v3-0324',
14
+ ],
15
+ coding: [
16
+ 'qwen/qwen3-coder-next',
17
+ 'deepseek/deepseek-v3.2',
18
+ 'openai/gpt-oss-120b',
19
+ 'moonshotai/kimi-k2.5',
20
+ ],
21
+ reasoning: [
22
+ 'deepseek/deepseek-v3.2',
23
+ 'deepseek/deepseek-r1',
24
+ 'deepseek/deepseek-r1-0528',
25
+ 'qwen/qwen3-235b-a22b-thinking-2507',
26
+ 'moonshotai/kimi-k2.5',
27
+ 'moonshotai/kimi-k2-0905',
28
+ 'z-ai/glm-5',
29
+ 'minimax/minimax-m2.5',
30
+ ],
31
+ vision: ['qwen/qwen3-vl-235b-a22b-thinking'],
13
32
  };
33
+ export const CATEGORY_MODEL_MAP = Object.fromEntries(Object.entries(CATEGORY_MODEL_CANDIDATES).map(([category, models]) => [category, models[0]]));
14
34
  // Map logical OpenClaw-facing model names to fixed route categories.
15
35
  export function logicalModelToCategory(model) {
16
36
  if (model === 'llmrouter/simple' || model === 'simple')
@@ -19,8 +39,6 @@ export function logicalModelToCategory(model) {
19
39
  return 'coding';
20
40
  if (model === 'llmrouter/reasoning' || model === 'reasoning')
21
41
  return 'reasoning';
22
- if (model === 'llmrouter/vision' || model === 'vision')
23
- return 'vision';
24
42
  if (model === 'llmrouter/auto' || model === 'auto')
25
43
  return undefined;
26
44
  return undefined;
@@ -1,5 +1,5 @@
1
1
  import { classifyPrompt, requestSignals } from './classify.js';
2
- import { CATEGORY_MODEL_MAP, logicalModelToCategory } from './models.js';
2
+ import { CATEGORY_MODEL_CANDIDATES, CATEGORY_MODEL_MAP, logicalModelToCategory } from './models.js';
3
3
  function forcedClassification(request, category) {
4
4
  return {
5
5
  category,
@@ -17,6 +17,7 @@ export function routeRequest(request) {
17
17
  logicalModel: request.model,
18
18
  category: classification.category,
19
19
  resolvedModel: CATEGORY_MODEL_MAP[classification.category],
20
+ candidateModels: CATEGORY_MODEL_CANDIDATES[classification.category],
20
21
  reason: classification.reason,
21
22
  hasTools: classification.hasTools,
22
23
  wantsJson: classification.wantsJson,
@@ -49,6 +49,7 @@ export type RouteDecision = {
49
49
  logicalModel: string;
50
50
  category: RouteCategory;
51
51
  resolvedModel: string;
52
+ candidateModels: string[];
52
53
  reason: string;
53
54
  hasTools: boolean;
54
55
  wantsJson: boolean;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rungate/llmrouter",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "type": "module",
5
5
  "main": "./dist/src/index.js",
6
6
  "types": "./dist/src/index.d.ts",