nvicode 0.1.2 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.js CHANGED
@@ -3,10 +3,46 @@ import { promises as fs } from "node:fs";
3
3
  import os from "node:os";
4
4
  import path from "node:path";
5
5
  const DEFAULT_PROXY_PORT = 8788;
6
- const DEFAULT_MODEL = "moonshotai/kimi-k2.5";
6
+ const DEFAULT_PROVIDER = "nvidia";
7
+ const DEFAULT_NVIDIA_MODEL = "moonshotai/kimi-k2.5";
8
+ const DEFAULT_OPENROUTER_MODEL = "anthropic/claude-sonnet-4.6";
9
+ const DEFAULT_MAX_REQUESTS_PER_MINUTE = 40;
10
+ const getEnvNumber = (name) => {
11
+ const raw = process.env[name];
12
+ if (!raw) {
13
+ return null;
14
+ }
15
+ const parsed = Number(raw);
16
+ if (!Number.isFinite(parsed) || parsed <= 0) {
17
+ return null;
18
+ }
19
+ return Math.floor(parsed);
20
+ };
21
+ const getDefaultConfigHome = () => {
22
+ if (process.env.XDG_CONFIG_HOME) {
23
+ return process.env.XDG_CONFIG_HOME;
24
+ }
25
+ if (process.platform === "win32") {
26
+ return (process.env.APPDATA ||
27
+ process.env.LOCALAPPDATA ||
28
+ path.join(os.homedir(), ".local", "share"));
29
+ }
30
+ return path.join(os.homedir(), ".local", "share");
31
+ };
32
+ const getDefaultStateHome = () => {
33
+ if (process.env.XDG_STATE_HOME) {
34
+ return process.env.XDG_STATE_HOME;
35
+ }
36
+ if (process.platform === "win32") {
37
+ return (process.env.LOCALAPPDATA ||
38
+ process.env.APPDATA ||
39
+ path.join(os.homedir(), ".local", "state"));
40
+ }
41
+ return path.join(os.homedir(), ".local", "state");
42
+ };
7
43
  export const getNvicodePaths = () => {
8
- const configHome = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), ".local", "share");
9
- const stateHome = process.env.XDG_STATE_HOME || path.join(os.homedir(), ".local", "state");
44
+ const configHome = getDefaultConfigHome();
45
+ const stateHome = getDefaultStateHome();
10
46
  const configDir = path.join(configHome, "nvicode");
11
47
  const stateDir = path.join(stateHome, "nvicode");
12
48
  return {
@@ -15,17 +51,31 @@ export const getNvicodePaths = () => {
15
51
  stateDir,
16
52
  logFile: path.join(stateDir, "proxy.log"),
17
53
  pidFile: path.join(stateDir, "proxy.pid"),
54
+ usageLogFile: path.join(stateDir, "usage.jsonl"),
55
+ };
56
+ };
57
+ const withDefaults = (config) => {
58
+ const envMaxRequestsPerMinute = getEnvNumber("NVICODE_MAX_RPM");
59
+ const legacyApiKey = config.apiKey?.trim() || "";
60
+ const legacyModel = config.model?.trim() || DEFAULT_NVIDIA_MODEL;
61
+ return {
62
+ provider: config.provider === "openrouter" ? "openrouter" : DEFAULT_PROVIDER,
63
+ nvidiaApiKey: config.nvidiaApiKey?.trim() || legacyApiKey,
64
+ nvidiaModel: config.nvidiaModel?.trim() || legacyModel,
65
+ openrouterApiKey: config.openrouterApiKey?.trim() || "",
66
+ openrouterModel: config.openrouterModel?.trim() || DEFAULT_OPENROUTER_MODEL,
67
+ proxyPort: Number.isInteger(config.proxyPort) && config.proxyPort > 0
68
+ ? config.proxyPort
69
+ : DEFAULT_PROXY_PORT,
70
+ proxyToken: config.proxyToken?.trim() || randomUUID(),
71
+ thinking: config.thinking ?? false,
72
+ maxRequestsPerMinute: envMaxRequestsPerMinute ||
73
+ (Number.isInteger(config.maxRequestsPerMinute) &&
74
+ config.maxRequestsPerMinute > 0
75
+ ? config.maxRequestsPerMinute
76
+ : DEFAULT_MAX_REQUESTS_PER_MINUTE),
18
77
  };
19
78
  };
20
- const withDefaults = (config) => ({
21
- apiKey: config.apiKey?.trim() || "",
22
- model: config.model?.trim() || DEFAULT_MODEL,
23
- proxyPort: Number.isInteger(config.proxyPort) && config.proxyPort > 0
24
- ? config.proxyPort
25
- : DEFAULT_PROXY_PORT,
26
- proxyToken: config.proxyToken?.trim() || randomUUID(),
27
- thinking: config.thinking ?? false,
28
- });
29
79
  export const loadConfig = async () => {
30
80
  const paths = getNvicodePaths();
31
81
  try {
@@ -54,3 +104,5 @@ export const updateConfig = async (patch) => {
54
104
  ...patch,
55
105
  });
56
106
  };
107
+ export const getActiveApiKey = (config) => config.provider === "openrouter" ? config.openrouterApiKey : config.nvidiaApiKey;
108
+ export const getActiveModel = (config) => config.provider === "openrouter" ? config.openrouterModel : config.nvidiaModel;
package/dist/models.js CHANGED
@@ -1,4 +1,4 @@
1
- export const CURATED_MODELS = [
1
+ export const NVIDIA_CURATED_MODELS = [
2
2
  {
3
3
  id: "moonshotai/kimi-k2.5",
4
4
  label: "Kimi K2.5",
@@ -30,6 +30,28 @@ export const CURATED_MODELS = [
30
30
  description: "Smaller coding-focused Qwen model.",
31
31
  },
32
32
  ];
33
+ export const OPENROUTER_CURATED_MODELS = [
34
+ {
35
+ id: "qwen/qwen3.6-plus-preview:free",
36
+ label: "Qwen 3.6 Plus Preview (Free)",
37
+ description: "Free OpenRouter Qwen preview model.",
38
+ },
39
+ {
40
+ id: "anthropic/claude-sonnet-4.6",
41
+ label: "Claude Sonnet 4.6",
42
+ description: "Recommended OpenRouter model for Claude Code compatibility.",
43
+ },
44
+ {
45
+ id: "anthropic/claude-opus-4.6",
46
+ label: "Claude Opus 4.6",
47
+ description: "Higher-end Anthropic model through OpenRouter.",
48
+ },
49
+ {
50
+ id: "anthropic/claude-haiku-4.5",
51
+ label: "Claude Haiku 4.5",
52
+ description: "Faster lower-cost Anthropic model through OpenRouter.",
53
+ },
54
+ ];
33
55
  const MODELS_URL = "https://integrate.api.nvidia.com/v1/models";
34
56
  export const fetchAvailableModelIds = async (apiKey) => {
35
57
  const response = await fetch(MODELS_URL, {
@@ -49,13 +71,16 @@ export const fetchAvailableModelIds = async (apiKey) => {
49
71
  }
50
72
  return ids;
51
73
  };
52
- export const getRecommendedModels = async (apiKey) => {
74
+ export const getRecommendedModels = async (provider, apiKey) => {
75
+ if (provider === "openrouter") {
76
+ return OPENROUTER_CURATED_MODELS;
77
+ }
53
78
  try {
54
79
  const available = await fetchAvailableModelIds(apiKey);
55
- const curated = CURATED_MODELS.filter((model) => available.has(model.id));
56
- return curated.length > 0 ? curated : CURATED_MODELS;
80
+ const curated = NVIDIA_CURATED_MODELS.filter((model) => available.has(model.id));
81
+ return curated.length > 0 ? curated : NVIDIA_CURATED_MODELS;
57
82
  }
58
83
  catch {
59
- return CURATED_MODELS;
84
+ return NVIDIA_CURATED_MODELS;
60
85
  }
61
86
  };
package/dist/proxy.js CHANGED
@@ -1,6 +1,46 @@
1
1
  import { randomUUID } from "node:crypto";
2
2
  import { createServer } from "node:http";
3
+ import { appendUsageRecord, buildUsageRecord, getPricingSnapshot, } from "./usage.js";
3
4
  const NVIDIA_URL = "https://integrate.api.nvidia.com/v1/chat/completions";
5
+ const DEFAULT_RETRY_DELAY_MS = 2_000;
6
+ const MAX_NVIDIA_RETRIES = 3;
7
+ const sleep = async (ms) => {
8
+ if (ms <= 0) {
9
+ return;
10
+ }
11
+ await new Promise((resolve) => setTimeout(resolve, ms));
12
+ };
13
+ const parseRetryAfterMs = (value) => {
14
+ if (!value) {
15
+ return null;
16
+ }
17
+ const seconds = Number(value);
18
+ if (Number.isFinite(seconds) && seconds >= 0) {
19
+ return Math.ceil(seconds * 1000);
20
+ }
21
+ const timestamp = Date.parse(value);
22
+ if (Number.isNaN(timestamp)) {
23
+ return null;
24
+ }
25
+ return Math.max(0, timestamp - Date.now());
26
+ };
27
+ const createRequestScheduler = (maxRequestsPerMinute) => {
28
+ const intervalMs = Math.max(1, Math.ceil(60_000 / maxRequestsPerMinute));
29
+ let nextAvailableAt = 0;
30
+ let queue = Promise.resolve();
31
+ return async (task) => {
32
+ const runTask = async () => {
33
+ const now = Date.now();
34
+ const scheduledAt = Math.max(now, nextAvailableAt);
35
+ nextAvailableAt = scheduledAt + intervalMs;
36
+ await sleep(scheduledAt - now);
37
+ return task();
38
+ };
39
+ const result = queue.then(runTask, runTask);
40
+ queue = result.then(() => undefined, () => undefined);
41
+ return result;
42
+ };
43
+ };
4
44
  const sendJson = (response, statusCode, payload) => {
5
45
  response.writeHead(statusCode, {
6
46
  "Content-Type": "application/json",
@@ -296,10 +336,60 @@ const estimateTokens = (payload) => {
296
336
  const raw = JSON.stringify(payload);
297
337
  return Math.max(1, Math.ceil(raw.length / 4));
298
338
  };
299
- const callNvidia = async (config, payload) => {
300
- const targetModel = payload.model && payload.model.includes("/") && !payload.model.startsWith("claude-")
301
- ? payload.model
302
- : config.model;
339
+ const getCurrentTurnMessages = (messages) => {
340
+ const entries = messages ?? [];
341
+ for (let index = entries.length - 1; index >= 0; index -= 1) {
342
+ if (entries[index]?.role === "assistant") {
343
+ return entries.slice(index + 1);
344
+ }
345
+ }
346
+ return entries;
347
+ };
348
+ const extractPromptInput = (messages) => {
349
+ const parts = [];
350
+ for (const message of messages) {
351
+ if (message.role !== "user") {
352
+ continue;
353
+ }
354
+ if (typeof message.content === "string") {
355
+ if (message.content.trim().length > 0) {
356
+ parts.push(message.content);
357
+ }
358
+ continue;
359
+ }
360
+ for (const block of message.content) {
361
+ if (block.type === "text" && block.text.trim().length > 0) {
362
+ parts.push(block.text);
363
+ continue;
364
+ }
365
+ if (block.type === "image" && block.source?.data) {
366
+ parts.push({
367
+ type: "image_url",
368
+ image_url: {
369
+ url: `data:${block.source.media_type || "application/octet-stream"};base64,${block.source.data}`,
370
+ },
371
+ });
372
+ }
373
+ }
374
+ }
375
+ return parts;
376
+ };
377
+ const estimateTurnInputTokens = (payload) => {
378
+ const currentTurnMessages = getCurrentTurnMessages(payload.messages);
379
+ const promptInput = extractPromptInput(currentTurnMessages);
380
+ if (promptInput.length === 0) {
381
+ return 0;
382
+ }
383
+ return estimateTokens({
384
+ prompt: promptInput,
385
+ });
386
+ };
387
+ const estimateTurnOutputTokens = (content) => estimateTokens(content);
388
+ const resolveTargetModel = (config, payload) => payload.model && payload.model.includes("/") && !payload.model.startsWith("claude-")
389
+ ? payload.model
390
+ : config.nvidiaModel;
391
+ const callNvidia = async (config, scheduleRequest, payload) => {
392
+ const targetModel = resolveTargetModel(config, payload);
303
393
  const requestBody = {
304
394
  model: targetModel,
305
395
  messages: mapMessages(payload),
@@ -328,25 +418,38 @@ const callNvidia = async (config, payload) => {
328
418
  thinking: true,
329
419
  };
330
420
  }
331
- const response = await fetch(NVIDIA_URL, {
332
- method: "POST",
333
- headers: {
334
- Authorization: `Bearer ${config.apiKey}`,
335
- Accept: "application/json",
336
- "Content-Type": "application/json",
337
- },
338
- body: JSON.stringify(requestBody),
339
- });
340
- const raw = await response.text();
341
- if (!response.ok) {
342
- throw new Error(`NVIDIA API HTTP ${response.status}: ${raw}`);
343
- }
421
+ const invoke = async () => {
422
+ for (let attempt = 0; attempt <= MAX_NVIDIA_RETRIES; attempt += 1) {
423
+ const response = await fetch(NVIDIA_URL, {
424
+ method: "POST",
425
+ headers: {
426
+ Authorization: `Bearer ${config.nvidiaApiKey}`,
427
+ Accept: "application/json",
428
+ "Content-Type": "application/json",
429
+ },
430
+ body: JSON.stringify(requestBody),
431
+ });
432
+ const raw = await response.text();
433
+ if (response.ok) {
434
+ return JSON.parse(raw);
435
+ }
436
+ if (response.status === 429 && attempt < MAX_NVIDIA_RETRIES) {
437
+ const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after")) ||
438
+ DEFAULT_RETRY_DELAY_MS * 2 ** attempt;
439
+ await sleep(retryAfterMs);
440
+ continue;
441
+ }
442
+ throw new Error(`NVIDIA API HTTP ${response.status}: ${raw}`);
443
+ }
444
+ throw new Error("NVIDIA API retry loop exhausted unexpectedly.");
445
+ };
344
446
  return {
345
447
  targetModel,
346
- upstream: JSON.parse(raw),
448
+ upstream: await scheduleRequest(invoke),
347
449
  };
348
450
  };
349
451
  export const createProxyServer = (config) => {
452
+ const scheduleNvidiaRequest = createRequestScheduler(config.maxRequestsPerMinute);
350
453
  return createServer(async (request, response) => {
351
454
  try {
352
455
  const url = new URL(request.url || "/", "http://127.0.0.1");
@@ -358,9 +461,10 @@ export const createProxyServer = (config) => {
358
461
  if (url.pathname === "/health") {
359
462
  sendJson(response, 200, {
360
463
  ok: true,
361
- model: config.model,
464
+ model: config.nvidiaModel,
362
465
  port: config.proxyPort,
363
466
  thinking: config.thinking,
467
+ maxRequestsPerMinute: config.maxRequestsPerMinute,
364
468
  });
365
469
  return;
366
470
  }
@@ -384,114 +488,149 @@ export const createProxyServer = (config) => {
384
488
  if (request.method === "POST" && url.pathname === "/v1/messages") {
385
489
  const rawBody = await readRequestBody(request);
386
490
  const payload = JSON.parse(rawBody);
387
- const { upstream, targetModel } = await callNvidia(config, payload);
388
- const choice = upstream.choices?.[0];
389
- const mappedContent = mapResponseContent(choice);
390
- const anthropicResponse = {
391
- id: upstream.id || `msg_${randomUUID()}`,
392
- type: "message",
393
- role: "assistant",
394
- model: targetModel,
395
- content: mappedContent,
396
- stop_reason: mapStopReason(choice?.finish_reason),
397
- stop_sequence: null,
398
- usage: {
399
- input_tokens: upstream.usage?.prompt_tokens ??
400
- estimateTokens({
401
- system: payload.system ?? null,
402
- messages: payload.messages ?? [],
403
- tools: payload.tools ?? [],
404
- }),
405
- output_tokens: upstream.usage?.completion_tokens ?? 0,
406
- },
407
- };
408
- if (!payload.stream) {
409
- sendJson(response, 200, anthropicResponse);
410
- return;
411
- }
412
- response.writeHead(200, {
413
- "Cache-Control": "no-cache, no-transform",
414
- Connection: "keep-alive",
415
- "Content-Type": "text/event-stream",
491
+ const targetModel = resolveTargetModel(config, payload);
492
+ const estimatedInputTokens = estimateTokens({
493
+ system: payload.system ?? null,
494
+ messages: payload.messages ?? [],
495
+ tools: payload.tools ?? [],
416
496
  });
417
- writeSse(response, "message_start", {
418
- type: "message_start",
419
- message: {
420
- ...anthropicResponse,
421
- content: [],
422
- stop_reason: null,
497
+ const estimatedTurnInputTokens = estimateTurnInputTokens(payload);
498
+ const startedAt = Date.now();
499
+ const pricing = getPricingSnapshot();
500
+ try {
501
+ const { upstream } = await callNvidia(config, scheduleNvidiaRequest, payload);
502
+ const choice = upstream.choices?.[0];
503
+ const mappedContent = mapResponseContent(choice);
504
+ const estimatedTurnOutputTokens = estimateTurnOutputTokens(mappedContent);
505
+ const anthropicResponse = {
506
+ id: upstream.id || `msg_${randomUUID()}`,
507
+ type: "message",
508
+ role: "assistant",
509
+ model: targetModel,
510
+ content: mappedContent,
511
+ stop_reason: mapStopReason(choice?.finish_reason),
512
+ stop_sequence: null,
423
513
  usage: {
424
- input_tokens: anthropicResponse.usage.input_tokens,
425
- output_tokens: 0,
514
+ input_tokens: upstream.usage?.prompt_tokens ?? estimatedInputTokens,
515
+ output_tokens: upstream.usage?.completion_tokens ?? 0,
426
516
  },
427
- },
428
- });
429
- mappedContent.forEach((block, index) => {
430
- if (block.type === "text") {
431
- writeSse(response, "content_block_start", {
432
- type: "content_block_start",
433
- index,
434
- content_block: {
435
- type: "text",
436
- text: "",
517
+ };
518
+ await appendUsageRecord(buildUsageRecord({
519
+ id: anthropicResponse.id,
520
+ status: "success",
521
+ model: targetModel,
522
+ inputTokens: anthropicResponse.usage.input_tokens,
523
+ outputTokens: anthropicResponse.usage.output_tokens,
524
+ turnInputTokens: estimatedTurnInputTokens,
525
+ turnOutputTokens: estimatedTurnOutputTokens,
526
+ latencyMs: Date.now() - startedAt,
527
+ stopReason: anthropicResponse.stop_reason,
528
+ pricing,
529
+ }));
530
+ if (!payload.stream) {
531
+ sendJson(response, 200, anthropicResponse);
532
+ return;
533
+ }
534
+ response.writeHead(200, {
535
+ "Cache-Control": "no-cache, no-transform",
536
+ Connection: "keep-alive",
537
+ "Content-Type": "text/event-stream",
538
+ });
539
+ writeSse(response, "message_start", {
540
+ type: "message_start",
541
+ message: {
542
+ ...anthropicResponse,
543
+ content: [],
544
+ stop_reason: null,
545
+ usage: {
546
+ input_tokens: anthropicResponse.usage.input_tokens,
547
+ output_tokens: 0,
437
548
  },
438
- });
439
- for (const chunk of chunkText(block.text)) {
549
+ },
550
+ });
551
+ mappedContent.forEach((block, index) => {
552
+ if (block.type === "text") {
553
+ writeSse(response, "content_block_start", {
554
+ type: "content_block_start",
555
+ index,
556
+ content_block: {
557
+ type: "text",
558
+ text: "",
559
+ },
560
+ });
561
+ for (const chunk of chunkText(block.text)) {
562
+ writeSse(response, "content_block_delta", {
563
+ type: "content_block_delta",
564
+ index,
565
+ delta: {
566
+ type: "text_delta",
567
+ text: chunk,
568
+ },
569
+ });
570
+ }
571
+ writeSse(response, "content_block_stop", {
572
+ type: "content_block_stop",
573
+ index,
574
+ });
575
+ return;
576
+ }
577
+ if (block.type === "tool_use") {
578
+ writeSse(response, "content_block_start", {
579
+ type: "content_block_start",
580
+ index,
581
+ content_block: {
582
+ type: "tool_use",
583
+ id: block.id,
584
+ name: block.name,
585
+ input: {},
586
+ },
587
+ });
440
588
  writeSse(response, "content_block_delta", {
441
589
  type: "content_block_delta",
442
590
  index,
443
591
  delta: {
444
- type: "text_delta",
445
- text: chunk,
592
+ type: "input_json_delta",
593
+ partial_json: JSON.stringify(block.input ?? {}),
446
594
  },
447
595
  });
596
+ writeSse(response, "content_block_stop", {
597
+ type: "content_block_stop",
598
+ index,
599
+ });
448
600
  }
449
- writeSse(response, "content_block_stop", {
450
- type: "content_block_stop",
451
- index,
452
- });
453
- return;
454
- }
455
- if (block.type === "tool_use") {
456
- writeSse(response, "content_block_start", {
457
- type: "content_block_start",
458
- index,
459
- content_block: {
460
- type: "tool_use",
461
- id: block.id,
462
- name: block.name,
463
- input: {},
464
- },
465
- });
466
- writeSse(response, "content_block_delta", {
467
- type: "content_block_delta",
468
- index,
469
- delta: {
470
- type: "input_json_delta",
471
- partial_json: JSON.stringify(block.input ?? {}),
472
- },
473
- });
474
- writeSse(response, "content_block_stop", {
475
- type: "content_block_stop",
476
- index,
477
- });
478
- }
479
- });
480
- writeSse(response, "message_delta", {
481
- type: "message_delta",
482
- delta: {
483
- stop_reason: anthropicResponse.stop_reason,
484
- stop_sequence: null,
485
- },
486
- usage: {
487
- output_tokens: anthropicResponse.usage.output_tokens,
488
- },
489
- });
490
- writeSse(response, "message_stop", {
491
- type: "message_stop",
492
- });
493
- response.end();
494
- return;
601
+ });
602
+ writeSse(response, "message_delta", {
603
+ type: "message_delta",
604
+ delta: {
605
+ stop_reason: anthropicResponse.stop_reason,
606
+ stop_sequence: null,
607
+ },
608
+ usage: {
609
+ output_tokens: anthropicResponse.usage.output_tokens,
610
+ },
611
+ });
612
+ writeSse(response, "message_stop", {
613
+ type: "message_stop",
614
+ });
615
+ response.end();
616
+ return;
617
+ }
618
+ catch (error) {
619
+ const message = error instanceof Error ? error.message : String(error);
620
+ await appendUsageRecord(buildUsageRecord({
621
+ id: `err_${randomUUID()}`,
622
+ status: "error",
623
+ model: targetModel,
624
+ inputTokens: estimatedInputTokens,
625
+ outputTokens: 0,
626
+ turnInputTokens: estimatedTurnInputTokens,
627
+ turnOutputTokens: 0,
628
+ latencyMs: Date.now() - startedAt,
629
+ error: message,
630
+ pricing,
631
+ }));
632
+ throw error;
633
+ }
495
634
  }
496
635
  sendAnthropicError(response, 404, "not_found_error", `Unsupported route: ${request.method || "GET"} ${url.pathname}`);
497
636
  }