npm - claw-llm-router - Versions diffs - 1.0.0 - Mend

claw-llm-router 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/LICENSE +21 -0
package/README.md +336 -0
package/classifier.ts +516 -0
package/docs/ARCHITECTURE.md +82 -0
package/docs/CLASSIFIER.md +146 -0
package/docs/PROVIDERS.md +228 -0
package/index.ts +602 -0
package/models.ts +104 -0
package/openclaw.plugin.json +55 -0
package/package.json +52 -0
package/provider.ts +30 -0
package/providers/anthropic.ts +332 -0
package/providers/gateway.ts +128 -0
package/providers/index.ts +135 -0
package/providers/model-override.ts +81 -0
package/providers/openai-compatible.ts +126 -0
package/providers/types.ts +29 -0
package/proxy.ts +282 -0
package/router-logger.ts +101 -0
package/tier-config.ts +288 -0

package/openclaw.plugin.json ADDED Viewed

@@ -0,0 +1,55 @@
+{
+  "id": "claw-llm-router",
+  "name": "Claw LLM Router",
+  "description": "Local prompt classifier that routes to the cheapest capable model. 15-dimension weighted scoring, <1ms local routing. Supports any OpenAI-compatible provider + Anthropic native API. Direct to providers via your own API keys — no third parties.",
+  "version": "1.0.0",
+  "author": "Donn Felker",
+  "license": "MIT",
+  "repository": "https://github.com/donnfelker/claw-llm-router",
+  "entryPoint": "index.ts",
+  "providers": ["claw-llm-router"],
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {
+      "port": {
+        "type": "number",
+        "description": "Local proxy port (default: 8401)"
+      },
+      "defaultModel": {
+        "type": "string",
+        "description": "Default routing model (default: claw-llm-router/auto)"
+      },
+      "tiers": {
+        "type": "object",
+        "description": "Model assignment per tier (format: provider/model-id)",
+        "properties": {
+          "SIMPLE": {
+            "type": "string",
+            "description": "Model for simple tasks (e.g., google/gemini-2.5-flash)"
+          },
+          "MEDIUM": {
+            "type": "string",
+            "description": "Model for medium tasks (e.g., anthropic/claude-haiku-4-5-20251001)"
+          },
+          "COMPLEX": {
+            "type": "string",
+            "description": "Model for complex tasks (e.g., anthropic/claude-sonnet-4-6)"
+          },
+          "REASONING": {
+            "type": "string",
+            "description": "Model for reasoning tasks (e.g., anthropic/claude-opus-4-6)"
+          }
+        }
+      },
+      "classifierModel": {
+        "type": "string",
+        "description": "Model for LLM-assisted classification when rule-based confidence is low (e.g., google/gemini-2.5-flash)"
+      }
+    }
+  },
+  "uiHints": {
+    "port": { "label": "Proxy Port", "placeholder": "8401" },
+    "defaultModel": { "label": "Default Model", "placeholder": "claw-llm-router/auto" }
+  }
+}

package/package.json ADDED Viewed

@@ -0,0 +1,52 @@
+{
+  "name": "claw-llm-router",
+  "version": "1.0.0",
+  "description": "OpenClaw plugin that cuts LLM costs 40-80% by routing prompts to the cheapest capable model. 15-dimension classifier, <1ms local routing, automatic fallback chain.",
+  "type": "module",
+  "author": "Donn Felker",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/donnfelker/claw-llm-router.git"
+  },
+  "keywords": [
+    "openclaw",
+    "openclaw-plugin",
+    "llm",
+    "router",
+    "classifier",
+    "ai"
+  ],
+  "openclaw": {
+    "extensions": [
+      "./index.ts"
+    ]
+  },
+  "files": [
+    "*.ts",
+    "providers/",
+    "openclaw.plugin.json",
+    "README.md",
+    "LICENSE",
+    "docs/"
+  ],
+  "scripts": {
+    "test": "npx tsx --test tests/providers/*.test.ts tests/classifier.test.ts tests/proxy.test.ts tests/tier-config.test.ts tests/router-logger.test.ts",
+    "typecheck": "tsc --noEmit",
+    "lint": "eslint .",
+    "format": "prettier --check .",
+    "format:fix": "prettier --write .",
+    "check": "npm run format && npm run lint && npm run typecheck && npm run test"
+  },
+  "dependencies": {
+    "tsx": "^4.0.0"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.0.0",
+    "@types/node": "^22.0.0",
+    "eslint": "^9.0.0",
+    "prettier": "^3.0.0",
+    "typescript": "^5.5.0",
+    "typescript-eslint": "^8.0.0"
+  }
+}

package/provider.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Claw LLM Router — Provider Plugin Definition
+ *
+ * Registered at runtime via api.registerProvider().
+ * No auth required from OpenClaw's perspective — the proxy handles
+ * routing to providers using credentials from OpenClaw's auth stores
+ * (auth-profiles.json, auth.json, env vars). Never stores credentials itself.
+ */
+import { buildProviderConfig, PROVIDER_ID } from "./models.js";
+export type ProviderPlugin = {
+  id: string;
+  label: string;
+  docsPath?: string;
+  aliases?: string[];
+  envVars?: string[];
+  models?: ReturnType<typeof buildProviderConfig>;
+  auth: unknown[];
+};
+export const clawRouterProvider: ProviderPlugin = {
+  id: PROVIDER_ID,
+  label: "Claw LLM Router",
+  docsPath: "https://github.com/donnfelker/claw-llm-router",
+  aliases: ["clawrouter", "router"],
+  envVars: ["ANTHROPIC_API_KEY", "GEMINI_API_KEY"],
+  models: buildProviderConfig(),
+  auth: [], // No auth needed — proxy handles it
+};

package/providers/anthropic.ts ADDED Viewed

@@ -0,0 +1,332 @@
+/**
+ * Claw LLM Router — Anthropic Messages API Provider
+ *
+ * Handles Anthropic models when the user has a direct API key (not OAuth).
+ * Converts OpenAI chat completion format ↔ Anthropic Messages API format.
+ */
+import type { ServerResponse } from "node:http";
+import {
+  REQUEST_TIMEOUT_MS,
+  type LLMProvider,
+  type PluginLogger,
+  type ChatMessage,
+} from "./types.js";
+import { RouterLogger } from "../router-logger.js";
+// ── OpenAI → Anthropic request conversion ────────────────────────────────────
+type AnthropicMessage = { role: "user" | "assistant"; content: string };
+function convertMessages(messages: ChatMessage[]): {
+  system: string | undefined;
+  messages: AnthropicMessage[];
+} {
+  const systemParts: string[] = [];
+  const converted: AnthropicMessage[] = [];
+  for (const msg of messages) {
+    const content =
+      typeof msg.content === "string"
+        ? msg.content
+        : Array.isArray(msg.content)
+          ? (msg.content as Array<{ type: string; text?: string }>)
+              .filter((c) => c.type === "text")
+              .map((c) => c.text ?? "")
+              .join("")
+          : String(msg.content ?? "");
+    if (msg.role === "system") {
+      systemParts.push(content);
+    } else if (msg.role === "user" || msg.role === "assistant") {
+      converted.push({ role: msg.role, content });
+    }
+  }
+  return {
+    system: systemParts.length > 0 ? systemParts.join("\n\n") : undefined,
+    messages: converted,
+  };
+}
+// Remove OpenAI-only params that Anthropic doesn't accept
+const OPENAI_ONLY_PARAMS = new Set([
+  "n",
+  "frequency_penalty",
+  "presence_penalty",
+  "logprobs",
+  "top_logprobs",
+  "logit_bias",
+  "response_format",
+  "seed",
+  "service_tier",
+  "tools",
+  "tool_choice",
+  "parallel_tool_calls",
+  "user",
+  "store",
+  "metadata",
+  "stream_options",
+]);
+function buildAnthropicBody(
+  body: Record<string, unknown>,
+  modelId: string,
+  systemText: string | undefined,
+  messages: AnthropicMessage[],
+): Record<string, unknown> {
+  const anthropicBody: Record<string, unknown> = {};
+  // Copy non-OpenAI-only params
+  for (const [key, value] of Object.entries(body)) {
+    if (key === "model" || key === "messages" || key === "stream" || OPENAI_ONLY_PARAMS.has(key)) {
+      continue;
+    }
+    anthropicBody[key] = value;
+  }
+  anthropicBody.model = modelId;
+  anthropicBody.messages = messages;
+  if (systemText) anthropicBody.system = systemText;
+  anthropicBody.max_tokens = (body.max_tokens as number) ?? 8192;
+  return anthropicBody;
+}
+// ── Anthropic → OpenAI response conversion (non-streaming) ──────────────────
+type AnthropicResponse = {
+  id: string;
+  type: "message";
+  role: "assistant";
+  content: Array<{ type: string; text?: string }>;
+  model: string;
+  stop_reason: string | null;
+  usage: { input_tokens: number; output_tokens: number };
+};
+function mapStopReason(reason: string | null): string {
+  switch (reason) {
+    case "end_turn":
+      return "stop";
+    case "max_tokens":
+      return "length";
+    case "stop_sequence":
+      return "stop";
+    default:
+      return "stop";
+  }
+}
+function toOpenAIResponse(anthropic: AnthropicResponse): Record<string, unknown> {
+  const text = anthropic.content
+    .filter((c) => c.type === "text")
+    .map((c) => c.text ?? "")
+    .join("");
+  return {
+    id: `chatcmpl-${anthropic.id}`,
+    object: "chat.completion",
+    created: Math.floor(Date.now() / 1000),
+    model: anthropic.model,
+    choices: [
+      {
+        index: 0,
+        message: { role: "assistant", content: text },
+        finish_reason: mapStopReason(anthropic.stop_reason),
+      },
+    ],
+    usage: {
+      prompt_tokens: anthropic.usage.input_tokens,
+      completion_tokens: anthropic.usage.output_tokens,
+      total_tokens: anthropic.usage.input_tokens + anthropic.usage.output_tokens,
+    },
+  };
+}
+// ── Anthropic → OpenAI streaming SSE conversion ─────────────────────────────
+function buildStreamChunk(
+  id: string,
+  model: string,
+  delta: Record<string, unknown>,
+  finishReason: string | null = null,
+): string {
+  const chunk = {
+    id: `chatcmpl-${id}`,
+    object: "chat.completion.chunk",
+    created: Math.floor(Date.now() / 1000),
+    model,
+    choices: [
+      {
+        index: 0,
+        delta,
+        finish_reason: finishReason,
+      },
+    ],
+  };
+  return `data: ${JSON.stringify(chunk)}\n\n`;
+}
+async function convertAnthropicStream(
+  reader: ReadableStreamDefaultReader<Uint8Array>,
+  res: ServerResponse,
+  _log: PluginLogger,
+): Promise<void> {
+  const decoder = new TextDecoder();
+  let buffer = "";
+  let messageId = "";
+  let model = "";
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    buffer += decoder.decode(value, { stream: true });
+    const lines = buffer.split("\n");
+    buffer = lines.pop() ?? "";
+    for (const line of lines) {
+      if (!line.startsWith("data: ")) continue;
+      const dataStr = line.slice(6).trim();
+      if (!dataStr || dataStr === "[DONE]") continue;
+      let event: Record<string, unknown>;
+      try {
+        event = JSON.parse(dataStr) as Record<string, unknown>;
+      } catch {
+        continue;
+      }
+      const eventType = event.type as string;
+      if (eventType === "message_start") {
+        const msg = event.message as Record<string, unknown>;
+        messageId = (msg.id as string) ?? "";
+        model = (msg.model as string) ?? "";
+        // Send initial chunk with role
+        if (!res.writableEnded) {
+          res.write(buildStreamChunk(messageId, model, { role: "assistant" }));
+        }
+      } else if (eventType === "content_block_delta") {
+        const delta = event.delta as Record<string, unknown>;
+        if (delta.type === "text_delta") {
+          const text = delta.text as string;
+          if (!res.writableEnded) {
+            res.write(buildStreamChunk(messageId, model, { content: text }));
+          }
+        }
+      } else if (eventType === "message_delta") {
+        const delta = event.delta as Record<string, unknown>;
+        const stopReason = delta.stop_reason as string | null;
+        if (!res.writableEnded) {
+          res.write(buildStreamChunk(messageId, model, {}, mapStopReason(stopReason)));
+        }
+      } else if (eventType === "message_stop") {
+        if (!res.writableEnded) {
+          res.write("data: [DONE]\n\n");
+        }
+      }
+    }
+  }
+  // Flush any remaining buffered data
+  if (buffer.trim()) {
+    const line = buffer.trim();
+    if (line.startsWith("data: ")) {
+      const dataStr = line.slice(6).trim();
+      if (dataStr && dataStr !== "[DONE]") {
+        try {
+          const event = JSON.parse(dataStr) as Record<string, unknown>;
+          if (event.type === "message_stop" && !res.writableEnded) {
+            res.write("data: [DONE]\n\n");
+          }
+        } catch {
+          // ignore
+        }
+      }
+    }
+  }
+  if (!res.writableEnded) res.end();
+}
+// ── Provider implementation ──────────────────────────────────────────────────
+export class AnthropicProvider implements LLMProvider {
+  readonly name = "anthropic";
+  async chatCompletion(
+    body: Record<string, unknown>,
+    spec: { modelId: string; apiKey: string; baseUrl: string },
+    stream: boolean,
+    res: ServerResponse,
+    log: PluginLogger,
+  ): Promise<void> {
+    const messages = (body.messages ?? []) as ChatMessage[];
+    const { system, messages: convertedMessages } = convertMessages(messages);
+    const anthropicBody = buildAnthropicBody(body, spec.modelId, system, convertedMessages);
+    anthropicBody.stream = stream;
+    const url = `${spec.baseUrl}/messages`;
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
+    try {
+      const resp = await fetch(url, {
+        method: "POST",
+        headers: {
+          "x-api-key": spec.apiKey,
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json",
+        },
+        body: JSON.stringify(anthropicBody),
+        signal: controller.signal,
+      });
+      if (!resp.ok) {
+        const errText = await resp.text();
+        throw new Error(`Anthropic ${spec.modelId} ${resp.status}: ${errText.slice(0, 300)}`);
+      }
+      const rlog = new RouterLogger(log);
+      if (stream) {
+        res.writeHead(200, {
+          "Content-Type": "text/event-stream",
+          "Cache-Control": "no-cache",
+          "X-Accel-Buffering": "no",
+        });
+        const reader = resp.body?.getReader();
+        if (!reader) throw new Error(`No response body from Anthropic ${spec.modelId}`);
+        await convertAnthropicStream(reader, res, log);
+        rlog.done({ model: spec.modelId, via: "anthropic", streamed: true });
+      } else {
+        const data = (await resp.json()) as AnthropicResponse;
+        const openaiResponse = toOpenAIResponse(data);
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify(openaiResponse));
+        rlog.done({
+          model: spec.modelId,
+          via: "anthropic",
+          streamed: false,
+          tokensIn: data.usage.input_tokens,
+          tokensOut: data.usage.output_tokens,
+        });
+      }
+    } catch (err) {
+      if (err instanceof Error && err.name === "AbortError") {
+        throw new Error(
+          `Anthropic ${spec.modelId} request timed out after ${REQUEST_TIMEOUT_MS}ms`,
+        );
+      }
+      throw err;
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+}
+// Exported for testing
+export { convertMessages, buildAnthropicBody, toOpenAIResponse, mapStopReason, buildStreamChunk };

package/providers/gateway.ts ADDED Viewed

@@ -0,0 +1,128 @@
+/**
+ * Claw LLM Router — Gateway Fallback Provider
+ *
+ * Routes requests through the OpenClaw gateway instead of calling providers directly.
+ * Used when:
+ *   - Anthropic OAuth token (sk-ant-oat01-*) is detected (can't be used directly)
+ *   - Direct provider calls fail and we need a fallback
+ *
+ * The gateway handles all provider-specific auth and format conversion.
+ * Auth: Bearer {gateway.token} (gateway token, not provider token)
+ * Model: {provider}/{modelId}
+ */
+import { readFileSync } from "node:fs";
+import type { ServerResponse } from "node:http";
+import { REQUEST_TIMEOUT_MS, type LLMProvider, type PluginLogger } from "./types.js";
+import { RouterLogger } from "../router-logger.js";
+const HOME = process.env.HOME;
+if (!HOME) throw new Error("[claw-llm-router] HOME environment variable not set");
+const OPENCLAW_CONFIG_PATH = `${HOME}/.openclaw/openclaw.json`;
+type GatewayInfo = { port: number; token: string };
+export function getGatewayInfo(): GatewayInfo {
+  try {
+    const raw = readFileSync(OPENCLAW_CONFIG_PATH, "utf8");
+    const config = JSON.parse(raw) as { gateway?: { port?: number; auth?: { token?: string } } };
+    return {
+      port: config.gateway?.port ?? 18789,
+      token: config.gateway?.auth?.token ?? "",
+    };
+  } catch {
+    return { port: 18789, token: "" };
+  }
+}
+let hasWarnedGatewayFallback = false;
+export class GatewayProvider implements LLMProvider {
+  readonly name = "gateway";
+  async chatCompletion(
+    body: Record<string, unknown>,
+    spec: { modelId: string; apiKey: string; baseUrl: string; provider?: string },
+    stream: boolean,
+    res: ServerResponse,
+    log: PluginLogger,
+  ): Promise<void> {
+    if (!hasWarnedGatewayFallback) {
+      log.warn(
+        `Using gateway fallback for ${(spec as { provider?: string }).provider ?? "unknown"} — direct API key recommended for use as primary model`,
+      );
+      hasWarnedGatewayFallback = true;
+    }
+    const gw = getGatewayInfo();
+    const provider = (spec as { provider?: string }).provider ?? "";
+    const modelId = `${provider}/${spec.modelId}`;
+    const url = `http://127.0.0.1:${gw.port}/v1/chat/completions`;
+    const payload = { ...body, model: modelId, stream };
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
+    try {
+      const resp = await fetch(url, {
+        method: "POST",
+        headers: {
+          Authorization: `Bearer ${gw.token}`,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify(payload),
+        signal: controller.signal,
+      });
+      if (!resp.ok) {
+        const errText = await resp.text();
+        throw new Error(`Gateway → ${modelId} ${resp.status}: ${errText.slice(0, 300)}`);
+      }
+      const rlog = new RouterLogger(log);
+      if (stream) {
+        res.writeHead(200, {
+          "Content-Type": "text/event-stream",
+          "Cache-Control": "no-cache",
+          "X-Accel-Buffering": "no",
+        });
+        const reader = resp.body?.getReader();
+        if (!reader) throw new Error(`No response body from gateway for ${modelId}`);
+        const decoder = new TextDecoder();
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) break;
+          if (!res.writableEnded) res.write(decoder.decode(value, { stream: true }));
+        }
+        if (!res.writableEnded) res.end();
+        rlog.done({ model: modelId, via: "gateway", streamed: true });
+      } else {
+        const data = (await resp.json()) as Record<string, unknown>;
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify(data));
+        const usage = (data.usage ?? {}) as Record<string, number>;
+        rlog.done({
+          model: modelId,
+          via: "gateway",
+          streamed: false,
+          tokensIn: usage.prompt_tokens ?? "?",
+          tokensOut: usage.completion_tokens ?? "?",
+        });
+      }
+    } catch (err) {
+      if (err instanceof Error && err.name === "AbortError") {
+        throw new Error(`Gateway → ${modelId} request timed out after ${REQUEST_TIMEOUT_MS}ms`);
+      }
+      throw err;
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+}
+// Export for testing
+export function resetGatewayWarning(): void {
+  hasWarnedGatewayFallback = false;
+}