npm - nvicode - Versions diffs - 0.1.1 → 0.1.5 - Mend

nvicode 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -2,6 +2,12 @@
 Run Claude Code through NVIDIA-hosted models using a local Anthropic-compatible gateway.
+Supported environments:
+- macOS
+- Ubuntu/Linux
+- WSL
+- Native Windows with Claude Code installed and working from PowerShell, CMD, or Git Bash
 ## Quickstart
 Install the published package:
@@ -12,6 +18,8 @@ npm install -g nvicode
 Save your NVIDIA API key:
+Get a free key from [NVIDIA Build API Keys](https://build.nvidia.com/settings/api-keys).
 ```sh
 nvicode auth
 ```
@@ -28,11 +36,28 @@ Launch Claude Code through NVIDIA:
 nvicode launch claude
 ```
+## Screenshots
+### Save your API key
+![nvicode auth](https://raw.githubusercontent.com/dineshpotla/nvicode/main/assets/screenshots/auth.png)
+### Choose a model
+![nvicode select model](https://raw.githubusercontent.com/dineshpotla/nvicode/main/assets/screenshots/select-model.png)
+### Launch Claude Code through NVIDIA
+![nvicode launch claude](https://raw.githubusercontent.com/dineshpotla/nvicode/main/assets/screenshots/launch.png)
 ## Commands
 Useful commands:
 ```sh
+nvicode dashboard
+nvicode usage
+nvicode activity
 nvicode models
 nvicode config
 nvicode auth
@@ -42,11 +67,15 @@ nvicode launch claude -p "Reply with exactly OK"
 The launcher starts a local proxy on `127.0.0.1:8788`, points Claude Code at it with `ANTHROPIC_BASE_URL`, and forwards requests to NVIDIA `chat/completions`.
 If no NVIDIA API key is saved yet, `nvicode` prompts for one on first use.
+By default, the proxy paces upstream NVIDIA requests at `40 RPM`. Override that with `NVICODE_MAX_RPM` if your account has a different limit.
+The usage dashboard compares your local NVIDIA run cost against Claude Opus 4.6 at `$5 / MTok input` and `$25 / MTok output`, based on Anthropic pricing as of `2026-03-30`.
+If your NVIDIA endpoint is not free, override local cost estimates with `NVICODE_INPUT_USD_PER_MTOK` and `NVICODE_OUTPUT_USD_PER_MTOK`.
 ## Requirements
 - Claude Code must already be installed on the machine.
 - Node.js 20 or newer is required to install `nvicode`.
+- On native Windows, Claude Code itself requires Git for Windows. See the [Claude Code setup docs](https://code.claude.com/docs/en/setup).
 ## Local Development
@@ -55,11 +84,12 @@ These steps are only for contributors working from a git checkout. End users do
 ```sh
 npm install
 npm run build
-ln -sf "$(pwd)/dist/cli.js" ~/.local/bin/nvicode
+npm link
 ```
 ## Notes
 - `thinking` is disabled by default because some NVIDIA reasoning models can consume the entire output budget and return no visible answer to Claude Code.
 - The proxy supports basic text, tool calls, tool results, and token count estimation.
+- The proxy includes upstream request pacing and retries on NVIDIA `429` responses.
 - Claude Code remains the frontend; the selected NVIDIA model becomes the backend.

package/dist/cli.js CHANGED Viewed

@@ -10,6 +10,7 @@ import { fileURLToPath } from "node:url";
 import { getNvicodePaths, loadConfig, saveConfig, } from "./config.js";
 import { createProxyServer } from "./proxy.js";
 import { CURATED_MODELS, getRecommendedModels } from "./models.js";
+import { filterRecordsSince, formatDuration, formatInteger, formatTimestamp, formatUsd, readUsageRecords, summarizeUsage, } from "./usage.js";
 const __filename = fileURLToPath(import.meta.url);
 const usage = () => {
     console.log(`nvicode
@@ -19,10 +20,26 @@ Commands:
   nvicode models              Show recommended coding models
   nvicode auth                Save or update NVIDIA API key
   nvicode config              Show current nvicode config
+  nvicode usage               Show token usage and cost comparison
+  nvicode activity            Show recent request activity
+  nvicode dashboard           Show usage summary and recent activity
   nvicode launch claude [...] Launch Claude Code through nvicode
   nvicode serve               Run the local proxy in the foreground
 `);
 };
+const isWindows = process.platform === "win32";
+const getPathExts = () => {
+    if (!isWindows) {
+        return [""];
+    }
+    const raw = process.env.PATHEXT || ".COM;.EXE;.BAT;.CMD";
+    return raw
+        .split(";")
+        .map((ext) => ext.trim())
+        .filter(Boolean)
+        .map((ext) => ext.toLowerCase());
+};
+const unique = (values) => [...new Set(values)];
 const question = async (prompt) => {
     const rl = createInterface({
         input: process.stdin,
@@ -114,11 +131,91 @@ const runConfig = async () => {
     const paths = getNvicodePaths();
     console.log(`Config file: ${paths.configFile}`);
     console.log(`State dir:   ${paths.stateDir}`);
+    console.log(`Usage log:   ${paths.usageLogFile}`);
     console.log(`Model:       ${config.model}`);
     console.log(`Proxy port:  ${config.proxyPort}`);
+    console.log(`Max RPM:     ${config.maxRequestsPerMinute}`);
     console.log(`Thinking:    ${config.thinking ? "on" : "off"}`);
     console.log(`API key:     ${config.apiKey ? "saved" : "missing"}`);
 };
+const printUsageBlock = (label, records) => {
+    const summary = summarizeUsage(records);
+    console.log(label);
+    console.log(`Requests: ${formatInteger(summary.requests)} (${formatInteger(summary.successes)} ok, ${formatInteger(summary.errors)} error)`);
+    console.log(`Input tokens: ${formatInteger(summary.inputTokens)}`);
+    console.log(`Output tokens: ${formatInteger(summary.outputTokens)}`);
+    console.log(`NVIDIA cost: ${formatUsd(summary.providerCostUsd)}`);
+    console.log(`Opus 4.6 equivalent: ${formatUsd(summary.compareCostUsd)}`);
+    console.log(`Estimated savings: ${formatUsd(summary.savingsUsd)}`);
+};
+const runUsage = async () => {
+    const records = await readUsageRecords();
+    if (records.length === 0) {
+        console.log("No usage recorded yet.");
+        return;
+    }
+    const now = Date.now();
+    const latestPricing = records[0]?.pricing;
+    if (latestPricing) {
+        console.log("Pricing basis:");
+        console.log(`- NVIDIA configured cost: ${formatUsd(latestPricing.providerInputUsdPerMTok)} / MTok input, ${formatUsd(latestPricing.providerOutputUsdPerMTok)} / MTok output`);
+        console.log(`- ${latestPricing.compareModel}: ${formatUsd(latestPricing.compareInputUsdPerMTok)} / MTok input, ${formatUsd(latestPricing.compareOutputUsdPerMTok)} / MTok output`);
+        console.log(`- Comparison source: ${latestPricing.comparePricingSource} (${latestPricing.comparePricingUpdatedAt})`);
+        console.log("");
+    }
+    const windows = [
+        { label: "Last 1 hour", durationMs: 1 * 60 * 60 * 1000 },
+        { label: "Last 6 hours", durationMs: 6 * 60 * 60 * 1000 },
+        { label: "Last 12 hours", durationMs: 12 * 60 * 60 * 1000 },
+        { label: "Last 1 day", durationMs: 24 * 60 * 60 * 1000 },
+        { label: "Last 1 week", durationMs: 7 * 24 * 60 * 60 * 1000 },
+        { label: "Last 1 month", durationMs: 30 * 24 * 60 * 60 * 1000 },
+    ];
+    const rows = windows.map((window) => {
+        const summary = summarizeUsage(filterRecordsSince(records, now - window.durationMs));
+        return {
+            window: window.label,
+            requests: `${formatInteger(summary.requests)} (${formatInteger(summary.successes)} ok/${formatInteger(summary.errors)} err)`,
+            inputTokens: formatInteger(summary.inputTokens),
+            outputTokens: formatInteger(summary.outputTokens),
+            nvidiaCost: formatUsd(summary.providerCostUsd),
+            savings: formatUsd(summary.savingsUsd),
+        };
+    });
+    console.log("Window        Requests         Input Tok  Output Tok  NVIDIA      Saved");
+    rows.forEach((row) => {
+        console.log(`${row.window.padEnd(13)} ${row.requests.padEnd(16)} ${row.inputTokens.padStart(10)} ${row.outputTokens.padStart(11)} ${row.nvidiaCost.padStart(10)} ${row.savings.padStart(10)}`);
+    });
+};
+const runActivity = async () => {
+    const records = await readUsageRecords();
+    if (records.length === 0) {
+        console.log("No activity recorded yet.");
+        return;
+    }
+    console.log("Timestamp             Status  Model                           In Tok  Out Tok  Latency  NVIDIA     Saved");
+    for (const record of records.slice(0, 15)) {
+        const model = record.model.length > 30 ? `${record.model.slice(0, 27)}...` : record.model;
+        const status = record.status === "success" ? "ok" : "error";
+        console.log(`${formatTimestamp(record.timestamp).padEnd(21)} ${status.padEnd(6)} ${model.padEnd(31)} ${formatInteger(record.inputTokens).padStart(7)} ${formatInteger(record.outputTokens).padStart(8)} ${formatDuration(record.latencyMs).padStart(8)} ${formatUsd(record.providerCostUsd).padStart(10)} ${formatUsd(record.savingsUsd).padStart(10)}`);
+        if (record.error) {
+            console.log(`  error: ${record.error}`);
+        }
+    }
+};
+const runDashboard = async () => {
+    const records = await readUsageRecords();
+    if (records.length === 0) {
+        console.log("No usage recorded yet.");
+        return;
+    }
+    const last7Days = filterRecordsSince(records, Date.now() - 7 * 24 * 60 * 60 * 1000);
+    printUsageBlock("Usage (7d)", last7Days);
+    console.log("");
+    console.log("Recent activity");
+    console.log("");
+    await runActivity();
+};
 const waitForHealthyProxy = async (port) => {
     for (let attempt = 0; attempt < 50; attempt += 1) {
         try {
@@ -147,6 +244,7 @@ const ensureProxyRunning = async (config) => {
             ...process.env,
         },
         stdio: ["ignore", logFd, logFd],
+        windowsHide: true,
     });
     child.unref();
     await fs.writeFile(paths.pidFile, `${child.pid}\n`);
@@ -156,17 +254,63 @@ const ensureProxyRunning = async (config) => {
 };
 const isExecutable = async (filePath) => {
     try {
-        await fs.access(filePath, constants.X_OK);
+        await fs.access(filePath, isWindows ? constants.F_OK : constants.X_OK);
         return true;
     }
     catch {
         return false;
     }
 };
+const buildExecutableCandidates = (entry, name) => {
+    const base = path.join(entry, name);
+    if (!isWindows) {
+        return [base];
+    }
+    if (path.extname(name)) {
+        return [base];
+    }
+    return unique([base, ...getPathExts().map((ext) => `${base}${ext}`)]);
+};
+const resolveClaudeVersionEntry = async (entryPath) => {
+    if (await isExecutable(entryPath)) {
+        return entryPath;
+    }
+    const nestedCandidates = isWindows
+        ? ["claude.exe", "claude.cmd", "claude.bat", "claude"]
+        : ["claude"];
+    for (const candidateName of nestedCandidates) {
+        const candidate = path.join(entryPath, candidateName);
+        if (await isExecutable(candidate)) {
+            return candidate;
+        }
+    }
+    return null;
+};
 const resolveClaudeBinary = async () => {
-    const nativeInPath = await findExecutableInPath("claude-native");
-    if (nativeInPath) {
-        return nativeInPath;
+    const nativeNames = isWindows
+        ? ["claude-native.exe", "claude-native.cmd", "claude-native.bat", "claude-native"]
+        : ["claude-native"];
+    for (const name of nativeNames) {
+        const nativeInPath = await findExecutableInPath(name);
+        if (nativeInPath) {
+            return nativeInPath;
+        }
+    }
+    const homeBinCandidates = isWindows
+        ? [
+            path.join(os.homedir(), ".local", "bin", "claude.exe"),
+            path.join(os.homedir(), ".local", "bin", "claude.cmd"),
+            path.join(os.homedir(), ".local", "bin", "claude.bat"),
+            path.join(os.homedir(), ".local", "bin", "claude"),
+        ]
+        : [
+            path.join(os.homedir(), ".local", "bin", "claude-native"),
+            path.join(os.homedir(), ".local", "bin", "claude"),
+        ];
+    for (const candidate of homeBinCandidates) {
+        if (await isExecutable(candidate)) {
+            return candidate;
+        }
     }
     const versionsDir = path.join(os.homedir(), ".local", "share", "claude", "versions");
     try {
@@ -176,15 +320,23 @@ const resolveClaudeBinary = async () => {
             sensitivity: "base",
         })).at(-1);
         if (latest) {
-            return path.join(versionsDir, latest);
+            const resolved = await resolveClaudeVersionEntry(path.join(versionsDir, latest));
+            if (resolved) {
+                return resolved;
+            }
         }
     }
     catch {
         // continue
     }
-    const claudeInPath = await findExecutableInPath("claude");
-    if (claudeInPath) {
-        return claudeInPath;
+    const cliNames = isWindows
+        ? ["claude.exe", "claude.cmd", "claude.bat", "claude"]
+        : ["claude"];
+    for (const name of cliNames) {
+        const claudeInPath = await findExecutableInPath(name);
+        if (claudeInPath) {
+            return claudeInPath;
+        }
     }
     throw new Error("Unable to locate Claude Code binary.");
 };
@@ -194,30 +346,43 @@ const findExecutableInPath = async (name) => {
         if (!entry) {
             continue;
         }
-        const candidate = path.join(entry, name);
-        if (await isExecutable(candidate)) {
-            return candidate;
+        for (const candidate of buildExecutableCandidates(entry, name)) {
+            if (await isExecutable(candidate)) {
+                return candidate;
+            }
         }
     }
     return null;
 };
+const spawnClaudeProcess = (claudeBinary, args, env) => {
+    if (isWindows && /\.(cmd|bat)$/i.test(claudeBinary)) {
+        return spawn(claudeBinary, args, {
+            stdio: "inherit",
+            env,
+            shell: true,
+            windowsHide: true,
+        });
+    }
+    return spawn(claudeBinary, args, {
+        stdio: "inherit",
+        env,
+        windowsHide: true,
+    });
+};
 const runLaunchClaude = async (args) => {
     const config = await ensureConfigured();
     await ensureProxyRunning(config);
     const claudeBinary = await resolveClaudeBinary();
-    const child = spawn(claudeBinary, args, {
-        stdio: "inherit",
-        env: {
-            ...process.env,
-            ANTHROPIC_BASE_URL: `http://127.0.0.1:${config.proxyPort}`,
-            ANTHROPIC_AUTH_TOKEN: config.proxyToken,
-            ANTHROPIC_API_KEY: "",
-            ANTHROPIC_MODEL: config.model,
-            CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1",
-            ANTHROPIC_CUSTOM_MODEL_OPTION: config.model,
-            ANTHROPIC_CUSTOM_MODEL_OPTION_NAME: "nvicode custom model",
-            ANTHROPIC_CUSTOM_MODEL_OPTION_DESCRIPTION: "Claude Code via local NVIDIA gateway",
-        },
+    const child = spawnClaudeProcess(claudeBinary, args, {
+        ...process.env,
+        ANTHROPIC_BASE_URL: `http://127.0.0.1:${config.proxyPort}`,
+        ANTHROPIC_AUTH_TOKEN: config.proxyToken,
+        ANTHROPIC_API_KEY: "",
+        ANTHROPIC_MODEL: config.model,
+        CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: "1",
+        ANTHROPIC_CUSTOM_MODEL_OPTION: config.model,
+        ANTHROPIC_CUSTOM_MODEL_OPTION_NAME: "nvicode custom model",
+        ANTHROPIC_CUSTOM_MODEL_OPTION_DESCRIPTION: "Claude Code via local NVIDIA gateway",
     });
     await new Promise((resolve, reject) => {
         child.on("exit", (code, signal) => {
@@ -269,6 +434,18 @@ const main = async () => {
         await runConfig();
         return;
     }
+    if (command === "usage") {
+        await runUsage();
+        return;
+    }
+    if (command === "activity") {
+        await runActivity();
+        return;
+    }
+    if (command === "dashboard") {
+        await runDashboard();
+        return;
+    }
     if ((command === "select" && rest[0] === "model") ||
         command === "select-model") {
         await runSelectModel();

package/dist/config.js CHANGED Viewed

@@ -4,9 +4,43 @@ import os from "node:os";
 import path from "node:path";
 const DEFAULT_PROXY_PORT = 8788;
 const DEFAULT_MODEL = "moonshotai/kimi-k2.5";
+const DEFAULT_MAX_REQUESTS_PER_MINUTE = 40;
+const getEnvNumber = (name) => {
+    const raw = process.env[name];
+    if (!raw) {
+        return null;
+    }
+    const parsed = Number(raw);
+    if (!Number.isFinite(parsed) || parsed <= 0) {
+        return null;
+    }
+    return Math.floor(parsed);
+};
+const getDefaultConfigHome = () => {
+    if (process.env.XDG_CONFIG_HOME) {
+        return process.env.XDG_CONFIG_HOME;
+    }
+    if (process.platform === "win32") {
+        return (process.env.APPDATA ||
+            process.env.LOCALAPPDATA ||
+            path.join(os.homedir(), ".local", "share"));
+    }
+    return path.join(os.homedir(), ".local", "share");
+};
+const getDefaultStateHome = () => {
+    if (process.env.XDG_STATE_HOME) {
+        return process.env.XDG_STATE_HOME;
+    }
+    if (process.platform === "win32") {
+        return (process.env.LOCALAPPDATA ||
+            process.env.APPDATA ||
+            path.join(os.homedir(), ".local", "state"));
+    }
+    return path.join(os.homedir(), ".local", "state");
+};
 export const getNvicodePaths = () => {
-    const configHome = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), ".local", "share");
-    const stateHome = process.env.XDG_STATE_HOME || path.join(os.homedir(), ".local", "state");
+    const configHome = getDefaultConfigHome();
+    const stateHome = getDefaultStateHome();
     const configDir = path.join(configHome, "nvicode");
     const stateDir = path.join(stateHome, "nvicode");
     return {
@@ -15,17 +49,26 @@ export const getNvicodePaths = () => {
         stateDir,
         logFile: path.join(stateDir, "proxy.log"),
         pidFile: path.join(stateDir, "proxy.pid"),
+        usageLogFile: path.join(stateDir, "usage.jsonl"),
+    };
+};
+const withDefaults = (config) => {
+    const envMaxRequestsPerMinute = getEnvNumber("NVICODE_MAX_RPM");
+    return {
+        apiKey: config.apiKey?.trim() || "",
+        model: config.model?.trim() || DEFAULT_MODEL,
+        proxyPort: Number.isInteger(config.proxyPort) && config.proxyPort > 0
+            ? config.proxyPort
+            : DEFAULT_PROXY_PORT,
+        proxyToken: config.proxyToken?.trim() || randomUUID(),
+        thinking: config.thinking ?? false,
+        maxRequestsPerMinute: envMaxRequestsPerMinute ||
+            (Number.isInteger(config.maxRequestsPerMinute) &&
+                config.maxRequestsPerMinute > 0
+                ? config.maxRequestsPerMinute
+                : DEFAULT_MAX_REQUESTS_PER_MINUTE),
     };
 };
-const withDefaults = (config) => ({
-    apiKey: config.apiKey?.trim() || "",
-    model: config.model?.trim() || DEFAULT_MODEL,
-    proxyPort: Number.isInteger(config.proxyPort) && config.proxyPort > 0
-        ? config.proxyPort
-        : DEFAULT_PROXY_PORT,
-    proxyToken: config.proxyToken?.trim() || randomUUID(),
-    thinking: config.thinking ?? false,
-});
 export const loadConfig = async () => {
     const paths = getNvicodePaths();
     try {

package/dist/proxy.js CHANGED Viewed

@@ -1,6 +1,46 @@
 import { randomUUID } from "node:crypto";
 import { createServer } from "node:http";
+import { appendUsageRecord, buildUsageRecord, getPricingSnapshot, } from "./usage.js";
 const NVIDIA_URL = "https://integrate.api.nvidia.com/v1/chat/completions";
+const DEFAULT_RETRY_DELAY_MS = 2_000;
+const MAX_NVIDIA_RETRIES = 3;
+const sleep = async (ms) => {
+    if (ms <= 0) {
+        return;
+    }
+    await new Promise((resolve) => setTimeout(resolve, ms));
+};
+const parseRetryAfterMs = (value) => {
+    if (!value) {
+        return null;
+    }
+    const seconds = Number(value);
+    if (Number.isFinite(seconds) && seconds >= 0) {
+        return Math.ceil(seconds * 1000);
+    }
+    const timestamp = Date.parse(value);
+    if (Number.isNaN(timestamp)) {
+        return null;
+    }
+    return Math.max(0, timestamp - Date.now());
+};
+const createRequestScheduler = (maxRequestsPerMinute) => {
+    const intervalMs = Math.max(1, Math.ceil(60_000 / maxRequestsPerMinute));
+    let nextAvailableAt = 0;
+    let queue = Promise.resolve();
+    return async (task) => {
+        const runTask = async () => {
+            const now = Date.now();
+            const scheduledAt = Math.max(now, nextAvailableAt);
+            nextAvailableAt = scheduledAt + intervalMs;
+            await sleep(scheduledAt - now);
+            return task();
+        };
+        const result = queue.then(runTask, runTask);
+        queue = result.then(() => undefined, () => undefined);
+        return result;
+    };
+};
 const sendJson = (response, statusCode, payload) => {
     response.writeHead(statusCode, {
         "Content-Type": "application/json",
@@ -296,10 +336,11 @@ const estimateTokens = (payload) => {
     const raw = JSON.stringify(payload);
     return Math.max(1, Math.ceil(raw.length / 4));
 };
-const callNvidia = async (config, payload) => {
-    const targetModel = payload.model && payload.model.includes("/") && !payload.model.startsWith("claude-")
-        ? payload.model
-        : config.model;
+const resolveTargetModel = (config, payload) => payload.model && payload.model.includes("/") && !payload.model.startsWith("claude-")
+    ? payload.model
+    : config.model;
+const callNvidia = async (config, scheduleRequest, payload) => {
+    const targetModel = resolveTargetModel(config, payload);
     const requestBody = {
         model: targetModel,
         messages: mapMessages(payload),
@@ -328,25 +369,38 @@ const callNvidia = async (config, payload) => {
             thinking: true,
         };
     }
-    const response = await fetch(NVIDIA_URL, {
-        method: "POST",
-        headers: {
-            Authorization: `Bearer ${config.apiKey}`,
-            Accept: "application/json",
-            "Content-Type": "application/json",
-        },
-        body: JSON.stringify(requestBody),
-    });
-    const raw = await response.text();
-    if (!response.ok) {
-        throw new Error(`NVIDIA API HTTP ${response.status}: ${raw}`);
-    }
+    const invoke = async () => {
+        for (let attempt = 0; attempt <= MAX_NVIDIA_RETRIES; attempt += 1) {
+            const response = await fetch(NVIDIA_URL, {
+                method: "POST",
+                headers: {
+                    Authorization: `Bearer ${config.apiKey}`,
+                    Accept: "application/json",
+                    "Content-Type": "application/json",
+                },
+                body: JSON.stringify(requestBody),
+            });
+            const raw = await response.text();
+            if (response.ok) {
+                return JSON.parse(raw);
+            }
+            if (response.status === 429 && attempt < MAX_NVIDIA_RETRIES) {
+                const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after")) ||
+                    DEFAULT_RETRY_DELAY_MS * 2 ** attempt;
+                await sleep(retryAfterMs);
+                continue;
+            }
+            throw new Error(`NVIDIA API HTTP ${response.status}: ${raw}`);
+        }
+        throw new Error("NVIDIA API retry loop exhausted unexpectedly.");
+    };
     return {
         targetModel,
-        upstream: JSON.parse(raw),
+        upstream: await scheduleRequest(invoke),
     };
 };
 export const createProxyServer = (config) => {
+    const scheduleNvidiaRequest = createRequestScheduler(config.maxRequestsPerMinute);
     return createServer(async (request, response) => {
         try {
             const url = new URL(request.url || "/", "http://127.0.0.1");
@@ -361,6 +415,7 @@ export const createProxyServer = (config) => {
                     model: config.model,
                     port: config.proxyPort,
                     thinking: config.thinking,
+                    maxRequestsPerMinute: config.maxRequestsPerMinute,
                 });
                 return;
             }
@@ -384,114 +439,143 @@ export const createProxyServer = (config) => {
             if (request.method === "POST" && url.pathname === "/v1/messages") {
                 const rawBody = await readRequestBody(request);
                 const payload = JSON.parse(rawBody);
-                const { upstream, targetModel } = await callNvidia(config, payload);
-                const choice = upstream.choices?.[0];
-                const mappedContent = mapResponseContent(choice);
-                const anthropicResponse = {
-                    id: upstream.id || `msg_${randomUUID()}`,
-                    type: "message",
-                    role: "assistant",
-                    model: targetModel,
-                    content: mappedContent,
-                    stop_reason: mapStopReason(choice?.finish_reason),
-                    stop_sequence: null,
-                    usage: {
-                        input_tokens: upstream.usage?.prompt_tokens ??
-                            estimateTokens({
-                                system: payload.system ?? null,
-                                messages: payload.messages ?? [],
-                                tools: payload.tools ?? [],
-                            }),
-                        output_tokens: upstream.usage?.completion_tokens ?? 0,
-                    },
-                };
-                if (!payload.stream) {
-                    sendJson(response, 200, anthropicResponse);
-                    return;
-                }
-                response.writeHead(200, {
-                    "Cache-Control": "no-cache, no-transform",
-                    Connection: "keep-alive",
-                    "Content-Type": "text/event-stream",
+                const targetModel = resolveTargetModel(config, payload);
+                const estimatedInputTokens = estimateTokens({
+                    system: payload.system ?? null,
+                    messages: payload.messages ?? [],
+                    tools: payload.tools ?? [],
                 });
-                writeSse(response, "message_start", {
-                    type: "message_start",
-                    message: {
-                        ...anthropicResponse,
-                        content: [],
-                        stop_reason: null,
+                const startedAt = Date.now();
+                const pricing = getPricingSnapshot();
+                try {
+                    const { upstream } = await callNvidia(config, scheduleNvidiaRequest, payload);
+                    const choice = upstream.choices?.[0];
+                    const mappedContent = mapResponseContent(choice);
+                    const anthropicResponse = {
+                        id: upstream.id || `msg_${randomUUID()}`,
+                        type: "message",
+                        role: "assistant",
+                        model: targetModel,
+                        content: mappedContent,
+                        stop_reason: mapStopReason(choice?.finish_reason),
+                        stop_sequence: null,
                         usage: {
-                            input_tokens: anthropicResponse.usage.input_tokens,
-                            output_tokens: 0,
+                            input_tokens: upstream.usage?.prompt_tokens ?? estimatedInputTokens,
+                            output_tokens: upstream.usage?.completion_tokens ?? 0,
                         },
-                    },
-                });
-                mappedContent.forEach((block, index) => {
-                    if (block.type === "text") {
-                        writeSse(response, "content_block_start", {
-                            type: "content_block_start",
-                            index,
-                            content_block: {
-                                type: "text",
-                                text: "",
+                    };
+                    await appendUsageRecord(buildUsageRecord({
+                        id: anthropicResponse.id,
+                        status: "success",
+                        model: targetModel,
+                        inputTokens: anthropicResponse.usage.input_tokens,
+                        outputTokens: anthropicResponse.usage.output_tokens,
+                        latencyMs: Date.now() - startedAt,
+                        stopReason: anthropicResponse.stop_reason,
+                        pricing,
+                    }));
+                    if (!payload.stream) {
+                        sendJson(response, 200, anthropicResponse);
+                        return;
+                    }
+                    response.writeHead(200, {
+                        "Cache-Control": "no-cache, no-transform",
+                        Connection: "keep-alive",
+                        "Content-Type": "text/event-stream",
+                    });
+                    writeSse(response, "message_start", {
+                        type: "message_start",
+                        message: {
+                            ...anthropicResponse,
+                            content: [],
+                            stop_reason: null,
+                            usage: {
+                                input_tokens: anthropicResponse.usage.input_tokens,
+                                output_tokens: 0,
                             },
-                        });
-                        for (const chunk of chunkText(block.text)) {
+                        },
+                    });
+                    mappedContent.forEach((block, index) => {
+                        if (block.type === "text") {
+                            writeSse(response, "content_block_start", {
+                                type: "content_block_start",
+                                index,
+                                content_block: {
+                                    type: "text",
+                                    text: "",
+                                },
+                            });
+                            for (const chunk of chunkText(block.text)) {
+                                writeSse(response, "content_block_delta", {
+                                    type: "content_block_delta",
+                                    index,
+                                    delta: {
+                                        type: "text_delta",
+                                        text: chunk,
+                                    },
+                                });
+                            }
+                            writeSse(response, "content_block_stop", {
+                                type: "content_block_stop",
+                                index,
+                            });
+                            return;
+                        }
+                        if (block.type === "tool_use") {
+                            writeSse(response, "content_block_start", {
+                                type: "content_block_start",
+                                index,
+                                content_block: {
+                                    type: "tool_use",
+                                    id: block.id,
+                                    name: block.name,
+                                    input: {},
+                                },
+                            });
                             writeSse(response, "content_block_delta", {
                                 type: "content_block_delta",
                                 index,
                                 delta: {
-                                    type: "text_delta",
-                                    text: chunk,
+                                    type: "input_json_delta",
+                                    partial_json: JSON.stringify(block.input ?? {}),
                                 },
                             });
+                            writeSse(response, "content_block_stop", {
+                                type: "content_block_stop",
+                                index,
+                            });
                         }
-                        writeSse(response, "content_block_stop", {
-                            type: "content_block_stop",
-                            index,
-                        });
-                        return;
-                    }
-                    if (block.type === "tool_use") {
-                        writeSse(response, "content_block_start", {
-                            type: "content_block_start",
-                            index,
-                            content_block: {
-                                type: "tool_use",
-                                id: block.id,
-                                name: block.name,
-                                input: {},
-                            },
-                        });
-                        writeSse(response, "content_block_delta", {
-                            type: "content_block_delta",
-                            index,
-                            delta: {
-                                type: "input_json_delta",
-                                partial_json: JSON.stringify(block.input ?? {}),
-                            },
-                        });
-                        writeSse(response, "content_block_stop", {
-                            type: "content_block_stop",
-                            index,
-                        });
-                    }
-                });
-                writeSse(response, "message_delta", {
-                    type: "message_delta",
-                    delta: {
-                        stop_reason: anthropicResponse.stop_reason,
-                        stop_sequence: null,
-                    },
-                    usage: {
-                        output_tokens: anthropicResponse.usage.output_tokens,
-                    },
-                });
-                writeSse(response, "message_stop", {
-                    type: "message_stop",
-                });
-                response.end();
-                return;
+                    });
+                    writeSse(response, "message_delta", {
+                        type: "message_delta",
+                        delta: {
+                            stop_reason: anthropicResponse.stop_reason,
+                            stop_sequence: null,
+                        },
+                        usage: {
+                            output_tokens: anthropicResponse.usage.output_tokens,
+                        },
+                    });
+                    writeSse(response, "message_stop", {
+                        type: "message_stop",
+                    });
+                    response.end();
+                    return;
+                }
+                catch (error) {
+                    const message = error instanceof Error ? error.message : String(error);
+                    await appendUsageRecord(buildUsageRecord({
+                        id: `err_${randomUUID()}`,
+                        status: "error",
+                        model: targetModel,
+                        inputTokens: estimatedInputTokens,
+                        outputTokens: 0,
+                        latencyMs: Date.now() - startedAt,
+                        error: message,
+                        pricing,
+                    }));
+                    throw error;
+                }
             }
             sendAnthropicError(response, 404, "not_found_error", `Unsupported route: ${request.method || "GET"} ${url.pathname}`);
         }

package/dist/usage.js ADDED Viewed

@@ -0,0 +1,120 @@
+import { promises as fs } from "node:fs";
+import { getNvicodePaths } from "./config.js";
+const OPUS_4_6_INPUT_USD_PER_MTOK = 5;
+const OPUS_4_6_OUTPUT_USD_PER_MTOK = 25;
+const OPUS_4_6_PRICING_SOURCE = "https://www.anthropic.com/claude/opus";
+const OPUS_4_6_PRICING_UPDATED_AT = "2026-03-30";
+const getEnvUsdRate = (name, fallback) => {
+    const raw = process.env[name];
+    if (raw === undefined || raw === null || raw.trim() === "") {
+        return fallback;
+    }
+    const parsed = Number(raw);
+    if (!Number.isFinite(parsed) || parsed < 0) {
+        return fallback;
+    }
+    return parsed;
+};
+export const getPricingSnapshot = () => ({
+    providerInputUsdPerMTok: getEnvUsdRate("NVICODE_INPUT_USD_PER_MTOK", 0),
+    providerOutputUsdPerMTok: getEnvUsdRate("NVICODE_OUTPUT_USD_PER_MTOK", 0),
+    compareModel: "Claude Opus 4.6",
+    compareInputUsdPerMTok: OPUS_4_6_INPUT_USD_PER_MTOK,
+    compareOutputUsdPerMTok: OPUS_4_6_OUTPUT_USD_PER_MTOK,
+    comparePricingSource: OPUS_4_6_PRICING_SOURCE,
+    comparePricingUpdatedAt: OPUS_4_6_PRICING_UPDATED_AT,
+});
+export const estimateCostUsd = (inputTokens, outputTokens, inputUsdPerMTok, outputUsdPerMTok) => (inputTokens / 1_000_000) * inputUsdPerMTok +
+    (outputTokens / 1_000_000) * outputUsdPerMTok;
+export const buildUsageRecord = ({ id, timestamp = new Date().toISOString(), status, model, inputTokens, outputTokens, latencyMs, stopReason, error, pricing = getPricingSnapshot(), }) => {
+    const providerCostUsd = estimateCostUsd(inputTokens, outputTokens, pricing.providerInputUsdPerMTok, pricing.providerOutputUsdPerMTok);
+    const compareCostUsd = estimateCostUsd(inputTokens, outputTokens, pricing.compareInputUsdPerMTok, pricing.compareOutputUsdPerMTok);
+    return {
+        id,
+        timestamp,
+        status,
+        model,
+        inputTokens,
+        outputTokens,
+        latencyMs,
+        providerCostUsd,
+        compareCostUsd,
+        savingsUsd: compareCostUsd - providerCostUsd,
+        stopReason: stopReason ?? null,
+        ...(error ? { error } : {}),
+        pricing,
+    };
+};
+export const appendUsageRecord = async (record) => {
+    const paths = getNvicodePaths();
+    await fs.mkdir(paths.stateDir, { recursive: true });
+    await fs.appendFile(paths.usageLogFile, `${JSON.stringify(record)}\n`, "utf8");
+};
+export const readUsageRecords = async () => {
+    const paths = getNvicodePaths();
+    try {
+        const raw = await fs.readFile(paths.usageLogFile, "utf8");
+        return raw
+            .split("\n")
+            .map((line) => line.trim())
+            .filter(Boolean)
+            .map((line) => JSON.parse(line))
+            .filter((record) => typeof record.timestamp === "string")
+            .sort((left, right) => right.timestamp.localeCompare(left.timestamp));
+    }
+    catch (error) {
+        if (error.code === "ENOENT") {
+            return [];
+        }
+        throw error;
+    }
+};
+export const summarizeUsage = (records) => records.reduce((summary, record) => {
+    summary.requests += 1;
+    summary.successes += record.status === "success" ? 1 : 0;
+    summary.errors += record.status === "error" ? 1 : 0;
+    summary.inputTokens += record.inputTokens;
+    summary.outputTokens += record.outputTokens;
+    summary.providerCostUsd += record.providerCostUsd;
+    summary.compareCostUsd += record.compareCostUsd;
+    summary.savingsUsd += record.savingsUsd;
+    return summary;
+}, {
+    requests: 0,
+    successes: 0,
+    errors: 0,
+    inputTokens: 0,
+    outputTokens: 0,
+    providerCostUsd: 0,
+    compareCostUsd: 0,
+    savingsUsd: 0,
+});
+export const filterRecordsSince = (records, sinceMs) => records.filter((record) => {
+    const timestamp = Date.parse(record.timestamp);
+    return !Number.isNaN(timestamp) && timestamp >= sinceMs;
+});
+const integerFormatter = new Intl.NumberFormat("en-US");
+const moneyFormatter = new Intl.NumberFormat("en-US", {
+    style: "currency",
+    currency: "USD",
+    minimumFractionDigits: 4,
+    maximumFractionDigits: 4,
+});
+export const formatInteger = (value) => integerFormatter.format(Math.round(value));
+export const formatUsd = (value) => moneyFormatter.format(value);
+export const formatDuration = (ms) => {
+    if (ms < 1_000) {
+        return `${ms}ms`;
+    }
+    if (ms < 60_000) {
+        return `${(ms / 1_000).toFixed(1)}s`;
+    }
+    return `${(ms / 60_000).toFixed(1)}m`;
+};
+export const formatTimestamp = (value) => {
+    const timestamp = new Date(value);
+    if (Number.isNaN(timestamp.getTime())) {
+        return value;
+    }
+    return timestamp.toISOString().replace("T", " ").slice(0, 19);
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nvicode",
-  "version": "0.1.1",
+  "version": "0.1.5",
   "description": "Run Claude Code through NVIDIA-hosted models using a local Anthropic-compatible gateway.",
   "author": "Dinesh Potla",
   "keywords": [