npm - nvicode - Versions diffs - 0.1.2 → 0.1.6 - Mend

nvicode 0.1.2 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/config.js CHANGED Viewed

@@ -3,10 +3,46 @@ import { promises as fs } from "node:fs";
 import os from "node:os";
 import path from "node:path";
 const DEFAULT_PROXY_PORT = 8788;
-const DEFAULT_MODEL = "moonshotai/kimi-k2.5";
+const DEFAULT_PROVIDER = "nvidia";
+const DEFAULT_NVIDIA_MODEL = "moonshotai/kimi-k2.5";
+const DEFAULT_OPENROUTER_MODEL = "anthropic/claude-sonnet-4.6";
+const DEFAULT_MAX_REQUESTS_PER_MINUTE = 40;
+const getEnvNumber = (name) => {
+    const raw = process.env[name];
+    if (!raw) {
+        return null;
+    }
+    const parsed = Number(raw);
+    if (!Number.isFinite(parsed) || parsed <= 0) {
+        return null;
+    }
+    return Math.floor(parsed);
+};
+const getDefaultConfigHome = () => {
+    if (process.env.XDG_CONFIG_HOME) {
+        return process.env.XDG_CONFIG_HOME;
+    }
+    if (process.platform === "win32") {
+        return (process.env.APPDATA ||
+            process.env.LOCALAPPDATA ||
+            path.join(os.homedir(), ".local", "share"));
+    }
+    return path.join(os.homedir(), ".local", "share");
+};
+const getDefaultStateHome = () => {
+    if (process.env.XDG_STATE_HOME) {
+        return process.env.XDG_STATE_HOME;
+    }
+    if (process.platform === "win32") {
+        return (process.env.LOCALAPPDATA ||
+            process.env.APPDATA ||
+            path.join(os.homedir(), ".local", "state"));
+    }
+    return path.join(os.homedir(), ".local", "state");
+};
 export const getNvicodePaths = () => {
-    const configHome = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), ".local", "share");
-    const stateHome = process.env.XDG_STATE_HOME || path.join(os.homedir(), ".local", "state");
+    const configHome = getDefaultConfigHome();
+    const stateHome = getDefaultStateHome();
     const configDir = path.join(configHome, "nvicode");
     const stateDir = path.join(stateHome, "nvicode");
     return {
@@ -15,17 +51,31 @@ export const getNvicodePaths = () => {
         stateDir,
         logFile: path.join(stateDir, "proxy.log"),
         pidFile: path.join(stateDir, "proxy.pid"),
+        usageLogFile: path.join(stateDir, "usage.jsonl"),
+    };
+};
+const withDefaults = (config) => {
+    const envMaxRequestsPerMinute = getEnvNumber("NVICODE_MAX_RPM");
+    const legacyApiKey = config.apiKey?.trim() || "";
+    const legacyModel = config.model?.trim() || DEFAULT_NVIDIA_MODEL;
+    return {
+        provider: config.provider === "openrouter" ? "openrouter" : DEFAULT_PROVIDER,
+        nvidiaApiKey: config.nvidiaApiKey?.trim() || legacyApiKey,
+        nvidiaModel: config.nvidiaModel?.trim() || legacyModel,
+        openrouterApiKey: config.openrouterApiKey?.trim() || "",
+        openrouterModel: config.openrouterModel?.trim() || DEFAULT_OPENROUTER_MODEL,
+        proxyPort: Number.isInteger(config.proxyPort) && config.proxyPort > 0
+            ? config.proxyPort
+            : DEFAULT_PROXY_PORT,
+        proxyToken: config.proxyToken?.trim() || randomUUID(),
+        thinking: config.thinking ?? false,
+        maxRequestsPerMinute: envMaxRequestsPerMinute ||
+            (Number.isInteger(config.maxRequestsPerMinute) &&
+                config.maxRequestsPerMinute > 0
+                ? config.maxRequestsPerMinute
+                : DEFAULT_MAX_REQUESTS_PER_MINUTE),
     };
 };
-const withDefaults = (config) => ({
-    apiKey: config.apiKey?.trim() || "",
-    model: config.model?.trim() || DEFAULT_MODEL,
-    proxyPort: Number.isInteger(config.proxyPort) && config.proxyPort > 0
-        ? config.proxyPort
-        : DEFAULT_PROXY_PORT,
-    proxyToken: config.proxyToken?.trim() || randomUUID(),
-    thinking: config.thinking ?? false,
-});
 export const loadConfig = async () => {
     const paths = getNvicodePaths();
     try {
@@ -54,3 +104,5 @@ export const updateConfig = async (patch) => {
         ...patch,
     });
 };
+export const getActiveApiKey = (config) => config.provider === "openrouter" ? config.openrouterApiKey : config.nvidiaApiKey;
+export const getActiveModel = (config) => config.provider === "openrouter" ? config.openrouterModel : config.nvidiaModel;

package/dist/models.js CHANGED Viewed

@@ -1,4 +1,4 @@
-export const CURATED_MODELS = [
+export const NVIDIA_CURATED_MODELS = [
     {
         id: "moonshotai/kimi-k2.5",
         label: "Kimi K2.5",
@@ -30,6 +30,28 @@ export const CURATED_MODELS = [
         description: "Smaller coding-focused Qwen model.",
     },
 ];
+export const OPENROUTER_CURATED_MODELS = [
+    {
+        id: "qwen/qwen3.6-plus-preview:free",
+        label: "Qwen 3.6 Plus Preview (Free)",
+        description: "Free OpenRouter Qwen preview model.",
+    },
+    {
+        id: "anthropic/claude-sonnet-4.6",
+        label: "Claude Sonnet 4.6",
+        description: "Recommended OpenRouter model for Claude Code compatibility.",
+    },
+    {
+        id: "anthropic/claude-opus-4.6",
+        label: "Claude Opus 4.6",
+        description: "Higher-end Anthropic model through OpenRouter.",
+    },
+    {
+        id: "anthropic/claude-haiku-4.5",
+        label: "Claude Haiku 4.5",
+        description: "Faster lower-cost Anthropic model through OpenRouter.",
+    },
+];
 const MODELS_URL = "https://integrate.api.nvidia.com/v1/models";
 export const fetchAvailableModelIds = async (apiKey) => {
     const response = await fetch(MODELS_URL, {
@@ -49,13 +71,16 @@ export const fetchAvailableModelIds = async (apiKey) => {
     }
     return ids;
 };
-export const getRecommendedModels = async (apiKey) => {
+export const getRecommendedModels = async (provider, apiKey) => {
+    if (provider === "openrouter") {
+        return OPENROUTER_CURATED_MODELS;
+    }
     try {
         const available = await fetchAvailableModelIds(apiKey);
-        const curated = CURATED_MODELS.filter((model) => available.has(model.id));
-        return curated.length > 0 ? curated : CURATED_MODELS;
+        const curated = NVIDIA_CURATED_MODELS.filter((model) => available.has(model.id));
+        return curated.length > 0 ? curated : NVIDIA_CURATED_MODELS;
     }
     catch {
-        return CURATED_MODELS;
+        return NVIDIA_CURATED_MODELS;
     }
 };

package/dist/proxy.js CHANGED Viewed

@@ -1,6 +1,46 @@
 import { randomUUID } from "node:crypto";
 import { createServer } from "node:http";
+import { appendUsageRecord, buildUsageRecord, getPricingSnapshot, } from "./usage.js";
 const NVIDIA_URL = "https://integrate.api.nvidia.com/v1/chat/completions";
+const DEFAULT_RETRY_DELAY_MS = 2_000;
+const MAX_NVIDIA_RETRIES = 3;
+const sleep = async (ms) => {
+    if (ms <= 0) {
+        return;
+    }
+    await new Promise((resolve) => setTimeout(resolve, ms));
+};
+const parseRetryAfterMs = (value) => {
+    if (!value) {
+        return null;
+    }
+    const seconds = Number(value);
+    if (Number.isFinite(seconds) && seconds >= 0) {
+        return Math.ceil(seconds * 1000);
+    }
+    const timestamp = Date.parse(value);
+    if (Number.isNaN(timestamp)) {
+        return null;
+    }
+    return Math.max(0, timestamp - Date.now());
+};
+const createRequestScheduler = (maxRequestsPerMinute) => {
+    const intervalMs = Math.max(1, Math.ceil(60_000 / maxRequestsPerMinute));
+    let nextAvailableAt = 0;
+    let queue = Promise.resolve();
+    return async (task) => {
+        const runTask = async () => {
+            const now = Date.now();
+            const scheduledAt = Math.max(now, nextAvailableAt);
+            nextAvailableAt = scheduledAt + intervalMs;
+            await sleep(scheduledAt - now);
+            return task();
+        };
+        const result = queue.then(runTask, runTask);
+        queue = result.then(() => undefined, () => undefined);
+        return result;
+    };
+};
 const sendJson = (response, statusCode, payload) => {
     response.writeHead(statusCode, {
         "Content-Type": "application/json",
@@ -296,10 +336,60 @@ const estimateTokens = (payload) => {
     const raw = JSON.stringify(payload);
     return Math.max(1, Math.ceil(raw.length / 4));
 };
-const callNvidia = async (config, payload) => {
-    const targetModel = payload.model && payload.model.includes("/") && !payload.model.startsWith("claude-")
-        ? payload.model
-        : config.model;
+const getCurrentTurnMessages = (messages) => {
+    const entries = messages ?? [];
+    for (let index = entries.length - 1; index >= 0; index -= 1) {
+        if (entries[index]?.role === "assistant") {
+            return entries.slice(index + 1);
+        }
+    }
+    return entries;
+};
+const extractPromptInput = (messages) => {
+    const parts = [];
+    for (const message of messages) {
+        if (message.role !== "user") {
+            continue;
+        }
+        if (typeof message.content === "string") {
+            if (message.content.trim().length > 0) {
+                parts.push(message.content);
+            }
+            continue;
+        }
+        for (const block of message.content) {
+            if (block.type === "text" && block.text.trim().length > 0) {
+                parts.push(block.text);
+                continue;
+            }
+            if (block.type === "image" && block.source?.data) {
+                parts.push({
+                    type: "image_url",
+                    image_url: {
+                        url: `data:${block.source.media_type || "application/octet-stream"};base64,${block.source.data}`,
+                    },
+                });
+            }
+        }
+    }
+    return parts;
+};
+const estimateTurnInputTokens = (payload) => {
+    const currentTurnMessages = getCurrentTurnMessages(payload.messages);
+    const promptInput = extractPromptInput(currentTurnMessages);
+    if (promptInput.length === 0) {
+        return 0;
+    }
+    return estimateTokens({
+        prompt: promptInput,
+    });
+};
+const estimateTurnOutputTokens = (content) => estimateTokens(content);
+const resolveTargetModel = (config, payload) => payload.model && payload.model.includes("/") && !payload.model.startsWith("claude-")
+    ? payload.model
+    : config.nvidiaModel;
+const callNvidia = async (config, scheduleRequest, payload) => {
+    const targetModel = resolveTargetModel(config, payload);
     const requestBody = {
         model: targetModel,
         messages: mapMessages(payload),
@@ -328,25 +418,38 @@ const callNvidia = async (config, payload) => {
             thinking: true,
         };
     }
-    const response = await fetch(NVIDIA_URL, {
-        method: "POST",
-        headers: {
-            Authorization: `Bearer ${config.apiKey}`,
-            Accept: "application/json",
-            "Content-Type": "application/json",
-        },
-        body: JSON.stringify(requestBody),
-    });
-    const raw = await response.text();
-    if (!response.ok) {
-        throw new Error(`NVIDIA API HTTP ${response.status}: ${raw}`);
-    }
+    const invoke = async () => {
+        for (let attempt = 0; attempt <= MAX_NVIDIA_RETRIES; attempt += 1) {
+            const response = await fetch(NVIDIA_URL, {
+                method: "POST",
+                headers: {
+                    Authorization: `Bearer ${config.nvidiaApiKey}`,
+                    Accept: "application/json",
+                    "Content-Type": "application/json",
+                },
+                body: JSON.stringify(requestBody),
+            });
+            const raw = await response.text();
+            if (response.ok) {
+                return JSON.parse(raw);
+            }
+            if (response.status === 429 && attempt < MAX_NVIDIA_RETRIES) {
+                const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after")) ||
+                    DEFAULT_RETRY_DELAY_MS * 2 ** attempt;
+                await sleep(retryAfterMs);
+                continue;
+            }
+            throw new Error(`NVIDIA API HTTP ${response.status}: ${raw}`);
+        }
+        throw new Error("NVIDIA API retry loop exhausted unexpectedly.");
+    };
     return {
         targetModel,
-        upstream: JSON.parse(raw),
+        upstream: await scheduleRequest(invoke),
     };
 };
 export const createProxyServer = (config) => {
+    const scheduleNvidiaRequest = createRequestScheduler(config.maxRequestsPerMinute);
     return createServer(async (request, response) => {
         try {
             const url = new URL(request.url || "/", "http://127.0.0.1");
@@ -358,9 +461,10 @@ export const createProxyServer = (config) => {
             if (url.pathname === "/health") {
                 sendJson(response, 200, {
                     ok: true,
-                    model: config.model,
+                    model: config.nvidiaModel,
                     port: config.proxyPort,
                     thinking: config.thinking,
+                    maxRequestsPerMinute: config.maxRequestsPerMinute,
                 });
                 return;
             }
@@ -384,114 +488,149 @@ export const createProxyServer = (config) => {
             if (request.method === "POST" && url.pathname === "/v1/messages") {
                 const rawBody = await readRequestBody(request);
                 const payload = JSON.parse(rawBody);
-                const { upstream, targetModel } = await callNvidia(config, payload);
-                const choice = upstream.choices?.[0];
-                const mappedContent = mapResponseContent(choice);
-                const anthropicResponse = {
-                    id: upstream.id || `msg_${randomUUID()}`,
-                    type: "message",
-                    role: "assistant",
-                    model: targetModel,
-                    content: mappedContent,
-                    stop_reason: mapStopReason(choice?.finish_reason),
-                    stop_sequence: null,
-                    usage: {
-                        input_tokens: upstream.usage?.prompt_tokens ??
-                            estimateTokens({
-                                system: payload.system ?? null,
-                                messages: payload.messages ?? [],
-                                tools: payload.tools ?? [],
-                            }),
-                        output_tokens: upstream.usage?.completion_tokens ?? 0,
-                    },
-                };
-                if (!payload.stream) {
-                    sendJson(response, 200, anthropicResponse);
-                    return;
-                }
-                response.writeHead(200, {
-                    "Cache-Control": "no-cache, no-transform",
-                    Connection: "keep-alive",
-                    "Content-Type": "text/event-stream",
+                const targetModel = resolveTargetModel(config, payload);
+                const estimatedInputTokens = estimateTokens({
+                    system: payload.system ?? null,
+                    messages: payload.messages ?? [],
+                    tools: payload.tools ?? [],
                 });
-                writeSse(response, "message_start", {
-                    type: "message_start",
-                    message: {
-                        ...anthropicResponse,
-                        content: [],
-                        stop_reason: null,
+                const estimatedTurnInputTokens = estimateTurnInputTokens(payload);
+                const startedAt = Date.now();
+                const pricing = getPricingSnapshot();
+                try {
+                    const { upstream } = await callNvidia(config, scheduleNvidiaRequest, payload);
+                    const choice = upstream.choices?.[0];
+                    const mappedContent = mapResponseContent(choice);
+                    const estimatedTurnOutputTokens = estimateTurnOutputTokens(mappedContent);
+                    const anthropicResponse = {
+                        id: upstream.id || `msg_${randomUUID()}`,
+                        type: "message",
+                        role: "assistant",
+                        model: targetModel,
+                        content: mappedContent,
+                        stop_reason: mapStopReason(choice?.finish_reason),
+                        stop_sequence: null,
                         usage: {
-                            input_tokens: anthropicResponse.usage.input_tokens,
-                            output_tokens: 0,
+                            input_tokens: upstream.usage?.prompt_tokens ?? estimatedInputTokens,
+                            output_tokens: upstream.usage?.completion_tokens ?? 0,
                         },
-                    },
-                });
-                mappedContent.forEach((block, index) => {
-                    if (block.type === "text") {
-                        writeSse(response, "content_block_start", {
-                            type: "content_block_start",
-                            index,
-                            content_block: {
-                                type: "text",
-                                text: "",
+                    };
+                    await appendUsageRecord(buildUsageRecord({
+                        id: anthropicResponse.id,
+                        status: "success",
+                        model: targetModel,
+                        inputTokens: anthropicResponse.usage.input_tokens,
+                        outputTokens: anthropicResponse.usage.output_tokens,
+                        turnInputTokens: estimatedTurnInputTokens,
+                        turnOutputTokens: estimatedTurnOutputTokens,
+                        latencyMs: Date.now() - startedAt,
+                        stopReason: anthropicResponse.stop_reason,
+                        pricing,
+                    }));
+                    if (!payload.stream) {
+                        sendJson(response, 200, anthropicResponse);
+                        return;
+                    }
+                    response.writeHead(200, {
+                        "Cache-Control": "no-cache, no-transform",
+                        Connection: "keep-alive",
+                        "Content-Type": "text/event-stream",
+                    });
+                    writeSse(response, "message_start", {
+                        type: "message_start",
+                        message: {
+                            ...anthropicResponse,
+                            content: [],
+                            stop_reason: null,
+                            usage: {
+                                input_tokens: anthropicResponse.usage.input_tokens,
+                                output_tokens: 0,
                             },
-                        });
-                        for (const chunk of chunkText(block.text)) {
+                        },
+                    });
+                    mappedContent.forEach((block, index) => {
+                        if (block.type === "text") {
+                            writeSse(response, "content_block_start", {
+                                type: "content_block_start",
+                                index,
+                                content_block: {
+                                    type: "text",
+                                    text: "",
+                                },
+                            });
+                            for (const chunk of chunkText(block.text)) {
+                                writeSse(response, "content_block_delta", {
+                                    type: "content_block_delta",
+                                    index,
+                                    delta: {
+                                        type: "text_delta",
+                                        text: chunk,
+                                    },
+                                });
+                            }
+                            writeSse(response, "content_block_stop", {
+                                type: "content_block_stop",
+                                index,
+                            });
+                            return;
+                        }
+                        if (block.type === "tool_use") {
+                            writeSse(response, "content_block_start", {
+                                type: "content_block_start",
+                                index,
+                                content_block: {
+                                    type: "tool_use",
+                                    id: block.id,
+                                    name: block.name,
+                                    input: {},
+                                },
+                            });
                             writeSse(response, "content_block_delta", {
                                 type: "content_block_delta",
                                 index,
                                 delta: {
-                                    type: "text_delta",
-                                    text: chunk,
+                                    type: "input_json_delta",
+                                    partial_json: JSON.stringify(block.input ?? {}),
                                 },
                             });
+                            writeSse(response, "content_block_stop", {
+                                type: "content_block_stop",
+                                index,
+                            });
                         }
-                        writeSse(response, "content_block_stop", {
-                            type: "content_block_stop",
-                            index,
-                        });
-                        return;
-                    }
-                    if (block.type === "tool_use") {
-                        writeSse(response, "content_block_start", {
-                            type: "content_block_start",
-                            index,
-                            content_block: {
-                                type: "tool_use",
-                                id: block.id,
-                                name: block.name,
-                                input: {},
-                            },
-                        });
-                        writeSse(response, "content_block_delta", {
-                            type: "content_block_delta",
-                            index,
-                            delta: {
-                                type: "input_json_delta",
-                                partial_json: JSON.stringify(block.input ?? {}),
-                            },
-                        });
-                        writeSse(response, "content_block_stop", {
-                            type: "content_block_stop",
-                            index,
-                        });
-                    }
-                });
-                writeSse(response, "message_delta", {
-                    type: "message_delta",
-                    delta: {
-                        stop_reason: anthropicResponse.stop_reason,
-                        stop_sequence: null,
-                    },
-                    usage: {
-                        output_tokens: anthropicResponse.usage.output_tokens,
-                    },
-                });
-                writeSse(response, "message_stop", {
-                    type: "message_stop",
-                });
-                response.end();
-                return;
+                    });
+                    writeSse(response, "message_delta", {
+                        type: "message_delta",
+                        delta: {
+                            stop_reason: anthropicResponse.stop_reason,
+                            stop_sequence: null,
+                        },
+                        usage: {
+                            output_tokens: anthropicResponse.usage.output_tokens,
+                        },
+                    });
+                    writeSse(response, "message_stop", {
+                        type: "message_stop",
+                    });
+                    response.end();
+                    return;
+                }
+                catch (error) {
+                    const message = error instanceof Error ? error.message : String(error);
+                    await appendUsageRecord(buildUsageRecord({
+                        id: `err_${randomUUID()}`,
+                        status: "error",
+                        model: targetModel,
+                        inputTokens: estimatedInputTokens,
+                        outputTokens: 0,
+                        turnInputTokens: estimatedTurnInputTokens,
+                        turnOutputTokens: 0,
+                        latencyMs: Date.now() - startedAt,
+                        error: message,
+                        pricing,
+                    }));
+                    throw error;
+                }
             }
             sendAnthropicError(response, 404, "not_found_error", `Unsupported route: ${request.method || "GET"} ${url.pathname}`);
         }