claw-llm-router 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/classifier.ts ADDED
@@ -0,0 +1,516 @@
1
+ /**
2
+ * Claw LLM Router — Prompt Classifier
3
+ *
4
+ * 15-dimension weighted scoring. Runs 100% locally in <1ms.
5
+ * No external API calls. Translated from classifier.py.
6
+ */
7
+
8
+ // ── Keyword lists ─────────────────────────────────────────────────────────────
9
+
10
+ const CODE_KEYWORDS = [
11
+ "function",
12
+ "class",
13
+ "import",
14
+ "def",
15
+ "select",
16
+ "async",
17
+ "await",
18
+ "const",
19
+ "let",
20
+ "var",
21
+ "return",
22
+ "```",
23
+ ];
24
+ const REASONING_KEYWORDS = [
25
+ "prove",
26
+ "theorem",
27
+ "derive",
28
+ "step by step",
29
+ "chain of thought",
30
+ "formally",
31
+ "mathematical",
32
+ "proof",
33
+ "logically",
34
+ ];
35
+ const SIMPLE_KEYWORDS = [
36
+ "what is",
37
+ "define",
38
+ "translate",
39
+ "hello",
40
+ "yes or no",
41
+ "capital of",
42
+ "how old",
43
+ "who is",
44
+ "when was",
45
+ ];
46
+ const TECHNICAL_KEYWORDS = [
47
+ "algorithm",
48
+ "optimize",
49
+ "architecture",
50
+ "distributed",
51
+ "kubernetes",
52
+ "microservice",
53
+ "database",
54
+ "infrastructure",
55
+ ];
56
+ const CREATIVE_KEYWORDS = [
57
+ "story",
58
+ "poem",
59
+ "compose",
60
+ "brainstorm",
61
+ "creative",
62
+ "imagine",
63
+ "write a",
64
+ ];
65
+ const IMPERATIVE_VERBS = [
66
+ "build",
67
+ "create",
68
+ "implement",
69
+ "design",
70
+ "develop",
71
+ "construct",
72
+ "generate",
73
+ "deploy",
74
+ "configure",
75
+ "set up",
76
+ ];
77
+ const CONSTRAINT_INDICATORS = [
78
+ "under",
79
+ "at most",
80
+ "at least",
81
+ "within",
82
+ "no more than",
83
+ "maximum",
84
+ "minimum",
85
+ "limit",
86
+ "budget",
87
+ ];
88
+ const OUTPUT_FORMAT_KEYWORDS = [
89
+ "json",
90
+ "yaml",
91
+ "xml",
92
+ "table",
93
+ "csv",
94
+ "markdown",
95
+ "schema",
96
+ "format as",
97
+ "structured",
98
+ ];
99
+ const REFERENCE_KEYWORDS = [
100
+ "above",
101
+ "below",
102
+ "previous",
103
+ "following",
104
+ "the docs",
105
+ "the api",
106
+ "the code",
107
+ "earlier",
108
+ "attached",
109
+ ];
110
+ const NEGATION_KEYWORDS = [
111
+ "don't",
112
+ "do not",
113
+ "avoid",
114
+ "never",
115
+ "without",
116
+ "except",
117
+ "exclude",
118
+ "no longer",
119
+ ];
120
+ const DOMAIN_SPECIFIC_KEYWORDS = [
121
+ "quantum",
122
+ "fpga",
123
+ "vlsi",
124
+ "risc-v",
125
+ "asic",
126
+ "photonics",
127
+ "genomics",
128
+ "proteomics",
129
+ "topological",
130
+ "homomorphic",
131
+ "zero-knowledge",
132
+ ];
133
+ const AGENTIC_TASK_KEYWORDS = [
134
+ "read file",
135
+ "look at",
136
+ "check the",
137
+ "open the",
138
+ "edit",
139
+ "modify",
140
+ "update the",
141
+ "change the",
142
+ "write to",
143
+ "create file",
144
+ "execute",
145
+ "deploy",
146
+ "install",
147
+ "npm",
148
+ "pip",
149
+ "compile",
150
+ "after that",
151
+ "once done",
152
+ "step 1",
153
+ "step 2",
154
+ "fix",
155
+ "debug",
156
+ "until it works",
157
+ "iterate",
158
+ "make sure",
159
+ "verify",
160
+ "confirm",
161
+ ];
162
+ const MULTI_STEP_PATTERNS = [/first.*then/i, /step\s+\d/i, /\d\.\s/];
163
+
164
+ // ── Weights (must sum to 1.0) ─────────────────────────────────────────────────
165
+
166
+ // Weights aligned with ClawRouter (14 dims), scaled to fit our 15th (agenticTask)
167
+ const WEIGHTS: Record<string, number> = {
168
+ reasoningMarkers: 0.17, // ClawRouter: 0.18
169
+ codePresence: 0.14, // ClawRouter: 0.15
170
+ simpleIndicators: 0.11, // ClawRouter: 0.12 (was 0.02 — key change)
171
+ multiStepPatterns: 0.11, // ClawRouter: 0.12
172
+ technicalTerms: 0.09, // ClawRouter: 0.10
173
+ tokenCount: 0.08, // ClawRouter: 0.08
174
+ agenticTask: 0.06, // ours only (not in ClawRouter)
175
+ creativeMarkers: 0.05, // ClawRouter: 0.05
176
+ questionComplexity: 0.04, // ClawRouter: 0.05
177
+ constraintCount: 0.04, // ClawRouter: 0.04
178
+ imperativeVerbs: 0.03, // ClawRouter: 0.03
179
+ outputFormat: 0.03, // ClawRouter: 0.03
180
+ domainSpecificity: 0.02, // ClawRouter: 0.02
181
+ referenceComplexity: 0.02, // ClawRouter: 0.02
182
+ negationComplexity: 0.01, // ClawRouter: 0.01
183
+ };
184
+
185
+ // ── Tier boundaries ───────────────────────────────────────────────────────────
186
+
187
+ const SIMPLE_MEDIUM_BOUNDARY = 0.0;
188
+ const MEDIUM_COMPLEX_BOUNDARY = 0.3;
189
+ const COMPLEX_REASONING_BOUNDARY = 0.5;
190
+ const CONFIDENCE_STEEPNESS = 12.0;
191
+ const MAX_TOKENS_FORCE_COMPLEX = 100_000;
192
+
193
+ export type Tier = "SIMPLE" | "MEDIUM" | "COMPLEX" | "REASONING";
194
+
195
+ export type ClassificationResult = {
196
+ tier: Tier;
197
+ confidence: number;
198
+ score: number;
199
+ signals: string[];
200
+ reasoningMatches: number;
201
+ };
202
+
203
+ function sigmoid(x: number): number {
204
+ return 1 / (1 + Math.exp(-CONFIDENCE_STEEPNESS * x));
205
+ }
206
+
207
+ function countKeywords(text: string, keywords: string[]): string[] {
208
+ return keywords.filter((kw) => text.includes(kw));
209
+ }
210
+
211
+ function scoreKeywords(
212
+ text: string,
213
+ keywords: string[],
214
+ lowThreshold: number,
215
+ highThreshold: number,
216
+ scoreNone: number,
217
+ scoreLow: number,
218
+ scoreHigh: number,
219
+ label: string,
220
+ ): [number, string | null] {
221
+ const matches = countKeywords(text, keywords);
222
+ if (matches.length >= highThreshold) {
223
+ return [scoreHigh, `${label} (${matches.slice(0, 3).join(", ")})`];
224
+ }
225
+ if (matches.length >= lowThreshold) {
226
+ return [scoreLow, `${label} (${matches.slice(0, 3).join(", ")})`];
227
+ }
228
+ return [scoreNone, null];
229
+ }
230
+
231
+ export function classify(prompt: string, _systemPrompt?: string): ClassificationResult {
232
+ const userText = prompt.toLowerCase();
233
+
234
+ const estimatedTokens = Math.floor(userText.length / 4);
235
+ const signals: string[] = [];
236
+ const dimScores: Record<string, number> = {};
237
+
238
+ // 1. Token count
239
+ if (estimatedTokens < 50) {
240
+ dimScores.tokenCount = -1.0;
241
+ signals.push(`short (${estimatedTokens} tokens)`);
242
+ } else if (estimatedTokens > 500) {
243
+ dimScores.tokenCount = 1.0;
244
+ signals.push(`long (${estimatedTokens} tokens)`);
245
+ } else {
246
+ dimScores.tokenCount = 0.0;
247
+ }
248
+
249
+ // 2. Code presence
250
+ {
251
+ const [score, sig] = scoreKeywords(userText, CODE_KEYWORDS, 1, 2, 0, 0.5, 1.0, "code");
252
+ dimScores.codePresence = score;
253
+ if (sig) signals.push(sig);
254
+ }
255
+
256
+ // 3. Reasoning markers (user prompt only)
257
+ {
258
+ const matches = countKeywords(userText, REASONING_KEYWORDS);
259
+ if (matches.length >= 2) {
260
+ dimScores.reasoningMarkers = 1.0;
261
+ signals.push(`reasoning (${matches.slice(0, 3).join(", ")})`);
262
+ } else if (matches.length === 1) {
263
+ dimScores.reasoningMarkers = 0.7;
264
+ signals.push(`reasoning (${matches[0]})`);
265
+ } else {
266
+ dimScores.reasoningMarkers = 0.0;
267
+ }
268
+ }
269
+
270
+ const reasoningMatchCount = countKeywords(userText, REASONING_KEYWORDS).length;
271
+
272
+ // 4. Technical terms
273
+ {
274
+ const [score, sig] = scoreKeywords(
275
+ userText,
276
+ TECHNICAL_KEYWORDS,
277
+ 2,
278
+ 4,
279
+ 0,
280
+ 0.5,
281
+ 1.0,
282
+ "technical",
283
+ );
284
+ dimScores.technicalTerms = score;
285
+ if (sig) signals.push(sig);
286
+ }
287
+
288
+ // 5. Creative markers
289
+ {
290
+ const [score, sig] = scoreKeywords(userText, CREATIVE_KEYWORDS, 1, 2, 0, 0.5, 0.7, "creative");
291
+ dimScores.creativeMarkers = score;
292
+ if (sig) signals.push(sig);
293
+ }
294
+
295
+ // 6. Simple indicators (negative signal)
296
+ {
297
+ const [score, sig] = scoreKeywords(userText, SIMPLE_KEYWORDS, 1, 2, 0, -1.0, -1.0, "simple");
298
+ dimScores.simpleIndicators = score;
299
+ if (sig) signals.push(sig);
300
+ }
301
+
302
+ // 7. Multi-step patterns
303
+ if (MULTI_STEP_PATTERNS.some((p) => p.test(userText))) {
304
+ dimScores.multiStepPatterns = 0.5;
305
+ signals.push("multi-step");
306
+ } else {
307
+ dimScores.multiStepPatterns = 0.0;
308
+ }
309
+
310
+ // 8. Question complexity
311
+ const qCount = (prompt.match(/\?/g) ?? []).length;
312
+ dimScores.questionComplexity = qCount > 3 ? 0.5 : 0.0;
313
+ if (qCount > 3) signals.push(`${qCount} questions`);
314
+
315
+ // 9. Imperative verbs
316
+ {
317
+ const [score, sig] = scoreKeywords(userText, IMPERATIVE_VERBS, 1, 2, 0, 0.3, 0.5, "imperative");
318
+ dimScores.imperativeVerbs = score;
319
+ if (sig) signals.push(sig);
320
+ }
321
+
322
+ // 10. Constraint indicators
323
+ {
324
+ const [score, sig] = scoreKeywords(
325
+ userText,
326
+ CONSTRAINT_INDICATORS,
327
+ 1,
328
+ 3,
329
+ 0,
330
+ 0.3,
331
+ 0.7,
332
+ "constraints",
333
+ );
334
+ dimScores.constraintCount = score;
335
+ if (sig) signals.push(sig);
336
+ }
337
+
338
+ // 11. Output format keywords
339
+ {
340
+ const [score, sig] = scoreKeywords(
341
+ userText,
342
+ OUTPUT_FORMAT_KEYWORDS,
343
+ 1,
344
+ 2,
345
+ 0,
346
+ 0.4,
347
+ 0.7,
348
+ "format",
349
+ );
350
+ dimScores.outputFormat = score;
351
+ if (sig) signals.push(sig);
352
+ }
353
+
354
+ // 12. Reference complexity
355
+ {
356
+ const [score, sig] = scoreKeywords(
357
+ userText,
358
+ REFERENCE_KEYWORDS,
359
+ 1,
360
+ 2,
361
+ 0,
362
+ 0.3,
363
+ 0.5,
364
+ "references",
365
+ );
366
+ dimScores.referenceComplexity = score;
367
+ if (sig) signals.push(sig);
368
+ }
369
+
370
+ // 13. Negation complexity
371
+ {
372
+ const [score, sig] = scoreKeywords(userText, NEGATION_KEYWORDS, 2, 3, 0, 0.3, 0.5, "negation");
373
+ dimScores.negationComplexity = score;
374
+ if (sig) signals.push(sig);
375
+ }
376
+
377
+ // 14. Domain specificity
378
+ {
379
+ const [score, sig] = scoreKeywords(
380
+ userText,
381
+ DOMAIN_SPECIFIC_KEYWORDS,
382
+ 1,
383
+ 2,
384
+ 0,
385
+ 0.5,
386
+ 0.8,
387
+ "domain-specific",
388
+ );
389
+ dimScores.domainSpecificity = score;
390
+ if (sig) signals.push(sig);
391
+ }
392
+
393
+ // 15. Agentic task
394
+ {
395
+ const matches = countKeywords(userText, AGENTIC_TASK_KEYWORDS);
396
+ if (matches.length >= 4) {
397
+ dimScores.agenticTask = 1.0;
398
+ signals.push(`agentic (${matches.slice(0, 3).join(", ")})`);
399
+ } else if (matches.length >= 2) {
400
+ dimScores.agenticTask = 0.5;
401
+ signals.push(`agentic (${matches.slice(0, 2).join(", ")})`);
402
+ } else {
403
+ dimScores.agenticTask = 0.0;
404
+ }
405
+ }
406
+
407
+ // ── Weighted sum ──────────────────────────────────────────────────────────
408
+ const weightedScore = Object.entries(dimScores).reduce(
409
+ (sum, [dim, score]) => sum + (WEIGHTS[dim] ?? 0) * score,
410
+ 0,
411
+ );
412
+
413
+ // ── Special overrides ─────────────────────────────────────────────────────
414
+
415
+ // Large context → force COMPLEX
416
+ if (estimatedTokens > MAX_TOKENS_FORCE_COMPLEX) {
417
+ signals.push(`large context (${estimatedTokens} tokens) → COMPLEX`);
418
+ return {
419
+ tier: "COMPLEX",
420
+ confidence: 0.95,
421
+ score: weightedScore,
422
+ signals,
423
+ reasoningMatches: reasoningMatchCount,
424
+ };
425
+ }
426
+
427
+ // 2+ reasoning keywords → force REASONING
428
+ if (reasoningMatchCount >= 2) {
429
+ const conf = Math.max(sigmoid(weightedScore), 0.85);
430
+ signals.push(`reasoning override (${reasoningMatchCount} markers)`);
431
+ return {
432
+ tier: "REASONING",
433
+ confidence: conf,
434
+ score: weightedScore,
435
+ signals,
436
+ reasoningMatches: reasoningMatchCount,
437
+ };
438
+ }
439
+
440
+ // Strong complexity signals → force COMPLEX
441
+ // (mirrors the REASONING override pattern)
442
+ const techMatches = countKeywords(userText, TECHNICAL_KEYWORDS);
443
+ const imperativeMatches = countKeywords(userText, IMPERATIVE_VERBS);
444
+ const agenticMatches = countKeywords(userText, AGENTIC_TASK_KEYWORDS);
445
+ const complexitySignals = techMatches.length + imperativeMatches.length + agenticMatches.length;
446
+ const hasMultiStep = MULTI_STEP_PATTERNS.some((p) => p.test(userText));
447
+ const isLongPrompt = userText.length > 300;
448
+
449
+ if (complexitySignals >= 4 && (hasMultiStep || isLongPrompt)) {
450
+ const conf = Math.max(sigmoid(weightedScore), 0.85);
451
+ signals.push(
452
+ `complex override (${complexitySignals} signals: ${[...techMatches, ...imperativeMatches].slice(0, 3).join(", ")})`,
453
+ );
454
+ return {
455
+ tier: "COMPLEX",
456
+ confidence: conf,
457
+ score: weightedScore,
458
+ signals,
459
+ reasoningMatches: reasoningMatchCount,
460
+ };
461
+ }
462
+
463
+ // ── Map score to tier ─────────────────────────────────────────────────────
464
+ let tier: Tier;
465
+ let distance: number;
466
+
467
+ if (weightedScore < SIMPLE_MEDIUM_BOUNDARY) {
468
+ tier = "SIMPLE";
469
+ distance = SIMPLE_MEDIUM_BOUNDARY - weightedScore;
470
+ } else if (weightedScore < MEDIUM_COMPLEX_BOUNDARY) {
471
+ tier = "MEDIUM";
472
+ distance = Math.min(
473
+ weightedScore - SIMPLE_MEDIUM_BOUNDARY,
474
+ MEDIUM_COMPLEX_BOUNDARY - weightedScore,
475
+ );
476
+ } else if (weightedScore < COMPLEX_REASONING_BOUNDARY) {
477
+ tier = "COMPLEX";
478
+ distance = Math.min(
479
+ weightedScore - MEDIUM_COMPLEX_BOUNDARY,
480
+ COMPLEX_REASONING_BOUNDARY - weightedScore,
481
+ );
482
+ } else {
483
+ tier = "REASONING";
484
+ distance = weightedScore - COMPLEX_REASONING_BOUNDARY;
485
+ }
486
+
487
+ const confidence = sigmoid(distance);
488
+
489
+ return {
490
+ tier,
491
+ confidence,
492
+ score: weightedScore,
493
+ signals,
494
+ reasoningMatches: reasoningMatchCount,
495
+ };
496
+ }
497
+
498
+ /** Map a virtual model id to a forced tier override (undefined = use classifier) */
499
+ export function tierFromModelId(modelId: string): Tier | undefined {
500
+ const id = modelId.replace("claw-llm-router/", "").toLowerCase();
501
+ const map: Record<string, Tier> = {
502
+ simple: "SIMPLE",
503
+ medium: "MEDIUM",
504
+ complex: "COMPLEX",
505
+ reasoning: "REASONING",
506
+ };
507
+ return map[id];
508
+ }
509
+
510
+ /** Fallback chain: if a tier fails, try the next one up */
511
+ export const FALLBACK_CHAIN: Record<Tier, Tier[]> = {
512
+ SIMPLE: ["SIMPLE", "MEDIUM", "COMPLEX"],
513
+ MEDIUM: ["MEDIUM", "COMPLEX"],
514
+ COMPLEX: ["COMPLEX", "REASONING"],
515
+ REASONING: ["REASONING"],
516
+ };
@@ -0,0 +1,82 @@
1
+ # Architecture
2
+
3
+ ```mermaid
4
+ flowchart LR
5
+ subgraph Plugin ["claw-llm-router plugin"]
6
+ IDX[index.ts<br/>Plugin Entry] --> PROXY[proxy.ts<br/>HTTP Proxy :8401]
7
+ PROXY --> CLS[classifier.ts<br/>Rule-Based]
8
+ PROXY --> CALL[providers/index.ts<br/>Provider Registry]
9
+ CALL --> OAI[OpenAI-Compatible<br/>Provider]
10
+ CALL --> ANT[Anthropic<br/>Provider]
11
+ CALL --> GW[Gateway<br/>Provider]
12
+ CALL --> GWO[Gateway + Override<br/>Provider]
13
+ end
14
+
15
+ OAI -->|Direct API| GOOGLE[Google Gemini]
16
+ OAI -->|Direct API| OPENAI[OpenAI]
17
+ OAI -->|Direct API| GROQ[Groq]
18
+ OAI -->|Direct API| XAI[xAI Grok]
19
+ OAI -->|Direct API| MINIMAX[MiniMax]
20
+ OAI -->|Direct API| MOONSHOT[MoonShot Kimi]
21
+ ANT -->|Direct API| ANTAPI[Anthropic API]
22
+ GW -->|Via Gateway| GWSVC[OpenClaw Gateway]
23
+ GWO -->|Via Gateway<br/>+ model override hook| GWSVC
24
+ ```
25
+
26
+ ## Provider Strategy
27
+
28
+ All providers implement the `LLMProvider` interface:
29
+
30
+ ```typescript
31
+ interface LLMProvider {
32
+ readonly name: string;
33
+ chatCompletion(
34
+ body: Record<string, unknown>,
35
+ spec: { modelId: string; apiKey: string; baseUrl: string },
36
+ stream: boolean,
37
+ res: ServerResponse,
38
+ log: PluginLogger,
39
+ ): Promise<void>;
40
+ }
41
+ ```
42
+
43
+ Provider resolution:
44
+
45
+ | Condition | Provider | How It Works |
46
+ | ---------------------------------------------- | -------------------------- | -------------------------------------------------------------------- |
47
+ | Any provider + OAuth token | `GatewayProvider` | Routes through OpenClaw gateway (handles token refresh + API format) |
48
+ | Any provider + OAuth + router is primary model | `gateway-with-override` | Gateway call with `before_model_resolve` hook to prevent recursion |
49
+ | Anthropic + direct API key | `AnthropicProvider` | Converts OpenAI format to Anthropic Messages API |
50
+ | All other providers | `OpenAICompatibleProvider` | POST to `{baseUrl}/chat/completions` with Bearer auth |
51
+
52
+ ## OAuth Model Override (Recursion Prevention)
53
+
54
+ When the router is set as OpenClaw's primary model and Anthropic uses an OAuth token, a naive gateway call would cause infinite recursion:
55
+
56
+ ```mermaid
57
+ sequenceDiagram
58
+ participant U as User / OpenClaw
59
+ participant GW as OpenClaw Gateway
60
+ participant R as Router Proxy :8401
61
+ participant OVR as before_model_resolve Hook
62
+ participant A as Anthropic API
63
+
64
+ U->>GW: POST /v1/chat/completions<br/>model: claw-llm-router/auto
65
+ GW->>R: Forward to router proxy
66
+ R->>R: Classify → MEDIUM
67
+ R->>R: Resolve: Anthropic + OAuth + router is primary
68
+ R->>R: Store pending override<br/>(prompt → anthropic/claude-haiku)
69
+ R->>GW: POST /v1/chat/completions<br/>model: anthropic/claude-haiku
70
+ Note over GW: Gateway creates agent session<br/>Normally uses primary model (router) → recursion!
71
+ GW->>OVR: before_model_resolve fires
72
+ OVR->>OVR: Match pending override by prompt
73
+ OVR-->>GW: modelOverride: claude-haiku<br/>providerOverride: anthropic
74
+ Note over GW: Model overridden ✓<br/>No recursion back to router
75
+ GW->>A: Call Anthropic with OAuth
76
+ A-->>GW: Response
77
+ GW-->>R: Response
78
+ R-->>GW: Response
79
+ GW-->>U: Response
80
+ ```
81
+
82
+ The override uses an in-process `Map` keyed by the first 500 characters of the user prompt. Entries auto-expire after 30 seconds.