npm - claw-llm-router - Versions diffs - 1.0.0 - Mend

claw-llm-router 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/LICENSE +21 -0
package/README.md +336 -0
package/classifier.ts +516 -0
package/docs/ARCHITECTURE.md +82 -0
package/docs/CLASSIFIER.md +146 -0
package/docs/PROVIDERS.md +228 -0
package/index.ts +602 -0
package/models.ts +104 -0
package/openclaw.plugin.json +55 -0
package/package.json +52 -0
package/provider.ts +30 -0
package/providers/anthropic.ts +332 -0
package/providers/gateway.ts +128 -0
package/providers/index.ts +135 -0
package/providers/model-override.ts +81 -0
package/providers/openai-compatible.ts +126 -0
package/providers/types.ts +29 -0
package/proxy.ts +282 -0
package/router-logger.ts +101 -0
package/tier-config.ts +288 -0

package/providers/index.ts ADDED Viewed

@@ -0,0 +1,135 @@
+/**
+ * Claw LLM Router — Provider Registry
+ *
+ * Resolves the correct provider based on model spec:
+ *   1. Anthropic + OAuth token (sk-ant-oat01-*) → GatewayProvider
+ *      - When router is NOT the primary model → plain gateway call
+ *      - When router IS the primary model → gateway with model override
+ *        (uses before_model_resolve hook to break recursion)
+ *   2. Anthropic + direct API key → AnthropicProvider
+ *   3. Everything else → OpenAICompatibleProvider
+ */
+import { readFileSync } from "node:fs";
+import type { ServerResponse } from "node:http";
+import type { LLMProvider, PluginLogger } from "./types.js";
+import type { TierModelSpec } from "../tier-config.js";
+import { envVarName } from "../tier-config.js";
+import { OpenAICompatibleProvider } from "./openai-compatible.js";
+import { AnthropicProvider } from "./anthropic.js";
+import { GatewayProvider } from "./gateway.js";
+import { setPendingOverride, extractUserPromptFromBody } from "./model-override.js";
+import { RouterLogger } from "../router-logger.js";
+const openaiCompatibleProvider = new OpenAICompatibleProvider();
+const anthropicProvider = new AnthropicProvider();
+const gatewayProvider = new GatewayProvider();
+const HOME = process.env.HOME ?? "";
+const OPENCLAW_CONFIG_PATH = `${HOME}/.openclaw/openclaw.json`;
+/**
+ * Check if the router is set as the primary model.
+ * When it is, gateway calls will recurse (gateway creates agent sessions
+ * using the primary model → calls the router → calls gateway → loop).
+ */
+function isRouterPrimaryModel(): boolean {
+  try {
+    const raw = readFileSync(OPENCLAW_CONFIG_PATH, "utf8");
+    const config = JSON.parse(raw) as {
+      agents?: { defaults?: { model?: { primary?: string } } };
+    };
+    const primary = config.agents?.defaults?.model?.primary ?? "";
+    return primary.startsWith("claw-llm-router");
+  } catch {
+    return false;
+  }
+}
+let cachedIsRouterPrimary: boolean | undefined;
+function getIsRouterPrimary(): boolean {
+  if (cachedIsRouterPrimary === undefined) {
+    cachedIsRouterPrimary = isRouterPrimaryModel();
+  }
+  return cachedIsRouterPrimary;
+}
+// Refresh the cache periodically (every 30s) in case config changes
+const _cacheInterval = setInterval(() => {
+  cachedIsRouterPrimary = undefined;
+}, 30_000);
+_cacheInterval.unref?.();
+/**
+ * Gateway-with-override provider: routes through the gateway but sets a
+ * pending model override so the before_model_resolve hook can redirect
+ * the agent session to the actual Anthropic model (breaking the recursion).
+ */
+const gatewayOverrideProvider: LLMProvider = {
+  name: "gateway-with-override",
+  async chatCompletion(body, spec, stream, res, log): Promise<void> {
+    const rlog = new RouterLogger(log);
+    const fullSpec = spec as TierModelSpec;
+    const userPrompt = extractUserPromptFromBody(body);
+    if (!userPrompt) {
+      log.warn("Gateway override: no user prompt found — override may not match");
+    }
+    rlog.override({ provider: fullSpec.provider, model: spec.modelId });
+    setPendingOverride(userPrompt, spec.modelId, fullSpec.provider);
+    await gatewayProvider.chatCompletion(body, spec, stream, res, log);
+  },
+};
+export class MissingApiKeyError extends Error {
+  public readonly provider: string;
+  public readonly modelId: string;
+  public readonly envVar: string;
+  constructor(provider: string, modelId: string, envVar: string) {
+    super(
+      `No API key for ${provider}/${modelId}. ` +
+        `Set ${envVar} or run /auth to add ${provider} credentials. ` +
+        `Details: /router doctor`,
+    );
+    this.name = "MissingApiKeyError";
+    this.provider = provider;
+    this.modelId = modelId;
+    this.envVar = envVar;
+  }
+}
+export function resolveProvider(spec: TierModelSpec): LLMProvider {
+  // Any provider with OAuth credentials → route through gateway
+  // (gateway handles token refresh and API format conversion)
+  if (spec.isOAuth) {
+    if (getIsRouterPrimary()) {
+      return gatewayOverrideProvider;
+    }
+    return gatewayProvider;
+  }
+  if (spec.isAnthropic) {
+    return anthropicProvider;
+  }
+  return openaiCompatibleProvider;
+}
+export async function callProvider(
+  spec: TierModelSpec,
+  body: Record<string, unknown>,
+  stream: boolean,
+  res: ServerResponse,
+  log: PluginLogger,
+): Promise<void> {
+  if (!spec.apiKey) {
+    throw new MissingApiKeyError(spec.provider, spec.modelId, envVarName(spec.provider));
+  }
+  const rlog = new RouterLogger(log);
+  const provider = resolveProvider(spec);
+  rlog.provider({ name: provider.name, provider: spec.provider, model: spec.modelId });
+  await provider.chatCompletion(body, spec, stream, res, log);
+}
+export { openaiCompatibleProvider, anthropicProvider, gatewayProvider };
+// Export for testing
+export { getIsRouterPrimary };

package/providers/model-override.ts ADDED Viewed

@@ -0,0 +1,81 @@
+/**
+ * Claw LLM Router — In-Process Model Override Store
+ *
+ * When the router is the primary model AND Anthropic OAuth is detected,
+ * direct gateway calls cause recursion (gateway creates agent sessions
+ * using the primary model → routes back through the router).
+ *
+ * Solution: Before calling the gateway, store a pending model override
+ * keyed by the user prompt. The plugin's `before_model_resolve` hook
+ * consumes the override and tells the gateway to use the actual Anthropic
+ * model instead of routing back through the router.
+ *
+ * Key = first 500 chars of the user prompt (enough for uniqueness).
+ * Entries auto-expire after 30 seconds.
+ */
+const pendingOverrides = new Map<string, { model: string; provider: string; expires: number }>();
+function makeKey(prompt: string): string {
+  return prompt.slice(0, 500);
+}
+export function setPendingOverride(prompt: string, model: string, provider: string): void {
+  const key = makeKey(prompt);
+  pendingOverrides.set(key, {
+    model,
+    provider,
+    expires: Date.now() + 30_000,
+  });
+}
+export function consumeOverride(prompt: string): { model: string; provider: string } | undefined {
+  const key = makeKey(prompt);
+  const entry = pendingOverrides.get(key);
+  if (!entry) return undefined;
+  pendingOverrides.delete(key);
+  if (Date.now() > entry.expires) return undefined;
+  return { model: entry.model, provider: entry.provider };
+}
+/**
+ * Extract the last user message from a chat completion request body.
+ * Used to generate the override key.
+ */
+export function extractUserPromptFromBody(body: Record<string, unknown>): string {
+  const messages = (body.messages ?? []) as Array<{
+    role: string;
+    content: string | unknown;
+  }>;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role === "user") {
+      const content = messages[i].content;
+      if (typeof content === "string") return content;
+      if (Array.isArray(content)) {
+        return (content as Array<{ type: string; text?: string }>)
+          .filter((c) => c.type === "text")
+          .map((c) => c.text ?? "")
+          .join(" ");
+      }
+    }
+  }
+  return "";
+}
+// Cleanup expired entries periodically
+const cleanupInterval = setInterval(() => {
+  const now = Date.now();
+  for (const [key, val] of pendingOverrides) {
+    if (now > val.expires) pendingOverrides.delete(key);
+  }
+}, 60_000);
+cleanupInterval.unref?.();
+// For testing
+export function clearOverrides(): void {
+  pendingOverrides.clear();
+}
+export function pendingCount(): number {
+  return pendingOverrides.size;
+}

package/providers/openai-compatible.ts ADDED Viewed

@@ -0,0 +1,126 @@
+/**
+ * Claw LLM Router — OpenAI-Compatible Provider
+ *
+ * Handles: Google Gemini, OpenAI, Groq, Mistral, DeepSeek, Together,
+ * Fireworks, Perplexity, xAI, and any other OpenAI-compatible API.
+ *
+ * POST to {baseUrl}/chat/completions with Bearer auth.
+ * Forwards request body with only standard OpenAI chat completion params
+ * (non-standard fields like `store` cause 400 errors on Google/Groq/etc).
+ */
+import type { ServerResponse } from "node:http";
+import { REQUEST_TIMEOUT_MS, type LLMProvider, type PluginLogger } from "./types.js";
+import { RouterLogger } from "../router-logger.js";
+// Standard OpenAI chat completion parameters that providers generally accept.
+// Non-standard or provider-specific fields (e.g. `store`, `metadata`) are stripped
+// to avoid 400 errors from providers like Google Gemini.
+const ALLOWED_PARAMS = new Set([
+  "messages",
+  "model",
+  "stream",
+  "max_tokens",
+  "max_completion_tokens",
+  "temperature",
+  "top_p",
+  "n",
+  "stop",
+  "presence_penalty",
+  "frequency_penalty",
+  "logit_bias",
+  "logprobs",
+  "top_logprobs",
+  "response_format",
+  "seed",
+  "tools",
+  "tool_choice",
+  "parallel_tool_calls",
+  "user",
+  "stream_options",
+  "service_tier",
+]);
+function sanitizeBody(body: Record<string, unknown>): Record<string, unknown> {
+  const clean: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(body)) {
+    if (ALLOWED_PARAMS.has(key)) {
+      clean[key] = value;
+    }
+  }
+  return clean;
+}
+export class OpenAICompatibleProvider implements LLMProvider {
+  readonly name = "openai-compatible";
+  async chatCompletion(
+    body: Record<string, unknown>,
+    spec: { modelId: string; apiKey: string; baseUrl: string },
+    stream: boolean,
+    res: ServerResponse,
+    log: PluginLogger,
+  ): Promise<void> {
+    const url = `${spec.baseUrl}/chat/completions`;
+    const payload = { ...sanitizeBody(body), model: spec.modelId, stream };
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
+    try {
+      const resp = await fetch(url, {
+        method: "POST",
+        headers: {
+          Authorization: `Bearer ${spec.apiKey}`,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify(payload),
+        signal: controller.signal,
+      });
+      if (!resp.ok) {
+        const errText = await resp.text();
+        throw new Error(`${spec.modelId} ${resp.status}: ${errText.slice(0, 300)}`);
+      }
+      const rlog = new RouterLogger(log);
+      if (stream) {
+        res.writeHead(200, {
+          "Content-Type": "text/event-stream",
+          "Cache-Control": "no-cache",
+          "X-Accel-Buffering": "no",
+        });
+        const reader = resp.body?.getReader();
+        if (!reader) throw new Error(`No response body from ${spec.modelId}`);
+        const decoder = new TextDecoder();
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) break;
+          if (!res.writableEnded) res.write(decoder.decode(value, { stream: true }));
+        }
+        if (!res.writableEnded) res.end();
+        rlog.done({ model: spec.modelId, via: "direct", streamed: true });
+      } else {
+        const data = (await resp.json()) as Record<string, unknown>;
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify(data));
+        const usage = (data.usage ?? {}) as Record<string, number>;
+        rlog.done({
+          model: spec.modelId,
+          via: "direct",
+          streamed: false,
+          tokensIn: usage.prompt_tokens ?? "?",
+          tokensOut: usage.completion_tokens ?? "?",
+        });
+      }
+    } catch (err) {
+      if (err instanceof Error && err.name === "AbortError") {
+        throw new Error(`${spec.modelId} request timed out after ${REQUEST_TIMEOUT_MS}ms`);
+      }
+      throw err;
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+}

package/providers/types.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * Claw LLM Router — Provider Types
+ *
+ * Shared interface and types for all LLM providers.
+ */
+import type { ServerResponse } from "node:http";
+export type PluginLogger = {
+  info: (msg: string) => void;
+  warn: (msg: string) => void;
+  error: (msg: string) => void;
+};
+export type ChatMessage = { role: string; content: string | unknown };
+/** Default timeout for outbound provider requests (3 minutes). */
+export const REQUEST_TIMEOUT_MS = 180_000;
+export interface LLMProvider {
+  readonly name: string;
+  chatCompletion(
+    body: Record<string, unknown>,
+    spec: { modelId: string; apiKey: string; baseUrl: string },
+    stream: boolean,
+    res: ServerResponse,
+    log: PluginLogger,
+  ): Promise<void>;
+}

package/proxy.ts ADDED Viewed

@@ -0,0 +1,282 @@
+/**
+ * Claw LLM Router — In-Process HTTP Proxy
+ *
+ * Runs inside the OpenClaw gateway process (no subprocess).
+ * Classifies prompts locally, then routes to the right model via
+ * direct provider calls (OpenAI-compatible, Anthropic Messages API,
+ * or gateway fallback for OAuth tokens).
+ *
+ * Auth is NEVER stored in the plugin — keys are read from OpenClaw's auth stores.
+ */
+import { createServer, type Server, type IncomingMessage, type ServerResponse } from "node:http";
+import { classify, tierFromModelId, FALLBACK_CHAIN, type Tier } from "./classifier.js";
+import { PROXY_PORT } from "./models.js";
+import { loadTierConfig } from "./tier-config.js";
+import { callProvider, MissingApiKeyError } from "./providers/index.js";
+import type { PluginLogger, ChatMessage } from "./providers/types.js";
+import { RouterLogger } from "./router-logger.js";
+// ── Message extraction ───────────────────────────────────────────────────────
+function extractUserPrompt(messages: ChatMessage[]): string {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const m = messages[i];
+    if (m.role === "user") {
+      if (typeof m.content === "string") return m.content;
+      if (Array.isArray(m.content)) {
+        return (m.content as Array<{ type: string; text?: string }>)
+          .filter((c) => c.type === "text")
+          .map((c) => c.text ?? "")
+          .join(" ");
+      }
+    }
+  }
+  return "";
+}
+function extractSystemPrompt(messages: ChatMessage[]): string {
+  return messages
+    .filter((m) => m.role === "system")
+    .map((m) => (typeof m.content === "string" ? m.content : ""))
+    .join(" ");
+}
+// ── Request router ────────────────────────────────────────────────────────────
+async function handleChatCompletion(
+  _req: IncomingMessage,
+  res: ServerResponse,
+  body: Record<string, unknown>,
+  log: PluginLogger,
+): Promise<void> {
+  const messages = (body.messages ?? []) as ChatMessage[];
+  const stream = (body.stream as boolean) ?? false;
+  const modelId = ((body.model as string) ?? "auto").replace("claw-llm-router/", "");
+  const rlog = new RouterLogger(log);
+  const userPrompt = extractUserPrompt(messages);
+  const systemPrompt = extractSystemPrompt(messages);
+  // ── Extract classifiable prompt ──────────────────────────────────────────
+  // The user message may contain more than just the user's input:
+  //  1. Packed context (group chats / subagents): history + current message
+  //  2. Embedded system prompt: system instructions prepended to user text
+  // We need to isolate the actual user text for accurate classification.
+  const isPackedContext =
+    userPrompt.startsWith("[Chat messages since") || userPrompt.startsWith("[chat messages since");
+  let classifiablePrompt = userPrompt;
+  // Case 0: Strip "Conversation info (untrusted metadata)" wrapper
+  // OpenClaw prepends message metadata as a fenced JSON block:
+  //   Conversation info (untrusted metadata): ```json { ... }```\n\nActual prompt
+  // Strip it before classification so ```/json don't pollute scoring.
+  const metadataPrefix = "Conversation info (untrusted metadata):";
+  if (classifiablePrompt.startsWith(metadataPrefix)) {
+    const closingFence = classifiablePrompt.indexOf("```", metadataPrefix.length + 4); // skip opening ```
+    if (closingFence !== -1) {
+      classifiablePrompt = classifiablePrompt.slice(closingFence + 3).trim();
+    }
+  }
+  if (isPackedContext) {
+    // Case 1: Packed context — extract text after the current-message marker
+    const marker = "[Current message - respond to this]";
+    const markerIdx = userPrompt.indexOf(marker);
+    if (markerIdx !== -1) {
+      classifiablePrompt = userPrompt.slice(markerIdx + marker.length).trim();
+    }
+  } else if (systemPrompt && userPrompt.length > systemPrompt.length) {
+    // Case 2: System prompt embedded in user message — strip it
+    // Some paths (e.g. webchat) prepend the system prompt to the user message
+    // instead of sending it as a separate system-role message.
+    const sysIdx = userPrompt.indexOf(systemPrompt);
+    if (sysIdx !== -1) {
+      const stripped = (
+        userPrompt.slice(0, sysIdx) + userPrompt.slice(sysIdx + systemPrompt.length)
+      ).trim();
+      if (stripped) classifiablePrompt = stripped;
+    }
+  } else if (!systemPrompt && userPrompt.length > 500) {
+    // Case 3: No separate system message and user message is suspiciously long.
+    // The system prompt is likely embedded. The actual user text is at the end,
+    // after the last paragraph break.
+    const lastBreak = userPrompt.lastIndexOf("\n\n");
+    if (lastBreak !== -1) {
+      const tail = userPrompt.slice(lastBreak).trim();
+      if (tail && tail.length < 500) {
+        classifiablePrompt = tail;
+      }
+    }
+  }
+  // ── Classify ────────────────────────────────────────────────────────────
+  const extracted = classifiablePrompt !== userPrompt;
+  rlog.request({
+    model: modelId,
+    stream,
+    prompt: classifiablePrompt,
+    extraction: extracted ? { from: userPrompt.length, to: classifiablePrompt.length } : undefined,
+  });
+  let tier: Tier;
+  let classificationMethod: string;
+  const tierOverride = tierFromModelId(modelId);
+  if (tierOverride) {
+    tier = tierOverride;
+    classificationMethod = "forced";
+    rlog.classify({ tier, method: "forced", detail: `(model=${modelId})` });
+  } else if (isPackedContext && !classifiablePrompt) {
+    tier = "MEDIUM";
+    classificationMethod = "packed-default";
+    rlog.classify({
+      tier: "MEDIUM",
+      method: "packed-default",
+      detail: `(no current-message marker in ${userPrompt.length}-char packed context)`,
+    });
+  } else {
+    const result = classify(classifiablePrompt);
+    tier = result.tier;
+    classificationMethod = "rule-based";
+    rlog.classify({
+      tier,
+      method: "rule-based",
+      score: result.score,
+      confidence: result.confidence,
+      signals: result.signals,
+    });
+  }
+  // ── Route ──────────────────────────────────────────────────────────────
+  const tierConfig = loadTierConfig();
+  const chain = FALLBACK_CHAIN[tier];
+  const targetSpec = tierConfig[tier];
+  rlog.route({
+    tier,
+    provider: targetSpec.provider,
+    model: targetSpec.modelId,
+    method: classificationMethod,
+    chain,
+  });
+  let lastError: Error | undefined;
+  let allMissingKeys = true;
+  for (const attemptTier of chain) {
+    const spec = tierConfig[attemptTier];
+    try {
+      await callProvider(spec, body, stream, res, log);
+      return; // success
+    } catch (err) {
+      lastError = err instanceof Error ? err : new Error(String(err));
+      if (!(err instanceof MissingApiKeyError)) allMissingKeys = false;
+      rlog.fallback({
+        tier: attemptTier,
+        provider: spec.provider,
+        model: spec.modelId,
+        error: lastError.message,
+      });
+    }
+  }
+  rlog.failed({ chain, error: lastError?.message ?? "unknown" });
+  if (!res.headersSent) {
+    res.writeHead(502, { "Content-Type": "application/json" });
+  }
+  if (!res.writableEnded) {
+    const message = allMissingKeys
+      ? `No API keys configured. Run /router doctor to see what's needed, or set API keys for your providers (e.g. GEMINI_API_KEY, ANTHROPIC_API_KEY). See: https://github.com/anthropics/claw-llm-router#setup`
+      : `All providers failed: ${lastError?.message}`;
+    res.end(
+      JSON.stringify({
+        error: { message, type: "router_error" },
+      }),
+    );
+  }
+}
+// ── Server ────────────────────────────────────────────────────────────────────
+function readBody(req: IncomingMessage): Promise<Buffer> {
+  return new Promise((resolve, reject) => {
+    const chunks: Buffer[] = [];
+    req.on("data", (chunk: Buffer) => chunks.push(chunk));
+    req.on("end", () => resolve(Buffer.concat(chunks)));
+    req.on("error", reject);
+  });
+}
+const CREATED_AT = Math.floor(Date.now() / 1000);
+export async function startProxy(log: PluginLogger): Promise<Server> {
+  const server = createServer(async (req: IncomingMessage, res: ServerResponse) => {
+    try {
+      // Health check
+      if (req.url === "/health" || req.url?.startsWith("/health?")) {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ status: "ok", version: "1.0.0" }));
+        return;
+      }
+      // Models list
+      if (req.url === "/v1/models" && req.method === "GET") {
+        const { ROUTER_MODELS, PROVIDER_ID } = await import("./models.js");
+        const models = ROUTER_MODELS.map((m) => ({
+          id: m.id,
+          object: "model",
+          created: CREATED_AT,
+          owned_by: PROVIDER_ID,
+        }));
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ object: "list", data: models }));
+        return;
+      }
+      // Chat completions
+      if (req.url === "/v1/chat/completions" && req.method === "POST") {
+        const rawBody = await readBody(req);
+        let body: Record<string, unknown>;
+        try {
+          body = JSON.parse(rawBody.toString()) as Record<string, unknown>;
+        } catch {
+          res.writeHead(400, { "Content-Type": "application/json" });
+          res.end(JSON.stringify({ error: { message: "Invalid JSON", type: "invalid_request" } }));
+          return;
+        }
+        await handleChatCompletion(req, res, body, log);
+        return;
+      }
+      res.writeHead(404, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: { message: "Not found", type: "not_found" } }));
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      log.error(`Proxy error: ${msg}`);
+      if (!res.headersSent) {
+        res.writeHead(502, { "Content-Type": "application/json" });
+      }
+      if (!res.writableEnded) {
+        res.end(JSON.stringify({ error: { message: msg, type: "proxy_error" } }));
+      }
+    }
+  });
+  return new Promise((resolve, reject) => {
+    server.on("error", (err: NodeJS.ErrnoException) => {
+      if (err.code === "EADDRINUSE") {
+        log.warn(`Port ${PROXY_PORT} already in use — proxy may already be running`);
+        reject(err);
+      } else {
+        reject(err);
+      }
+    });
+    server.listen(PROXY_PORT, "127.0.0.1", () => {
+      log.info(`Proxy started on http://127.0.0.1:${PROXY_PORT}`);
+      resolve(server);
+    });
+  });
+}