@juspay/neurolink 9.59.6 → 9.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +11 -7
  3. package/dist/adapters/providerImageAdapter.js +52 -2
  4. package/dist/browser/neurolink.min.js +352 -352
  5. package/dist/cli/factories/commandFactory.js +15 -1
  6. package/dist/cli/utils/interactiveSetup.js +64 -0
  7. package/dist/constants/contextWindows.d.ts +5 -1
  8. package/dist/constants/contextWindows.js +67 -3
  9. package/dist/constants/enums.d.ts +52 -0
  10. package/dist/constants/enums.js +63 -0
  11. package/dist/core/baseProvider.d.ts +15 -6
  12. package/dist/core/baseProvider.js +28 -0
  13. package/dist/factories/providerRegistry.js +25 -1
  14. package/dist/lib/adapters/providerImageAdapter.js +52 -2
  15. package/dist/lib/constants/contextWindows.d.ts +5 -1
  16. package/dist/lib/constants/contextWindows.js +67 -3
  17. package/dist/lib/constants/enums.d.ts +52 -0
  18. package/dist/lib/constants/enums.js +63 -0
  19. package/dist/lib/core/baseProvider.d.ts +15 -6
  20. package/dist/lib/core/baseProvider.js +28 -0
  21. package/dist/lib/factories/providerRegistry.js +25 -1
  22. package/dist/lib/providers/deepseek.d.ts +29 -0
  23. package/dist/lib/providers/deepseek.js +216 -0
  24. package/dist/lib/providers/index.d.ts +4 -0
  25. package/dist/lib/providers/index.js +4 -0
  26. package/dist/lib/providers/llamaCpp.d.ts +34 -0
  27. package/dist/lib/providers/llamaCpp.js +315 -0
  28. package/dist/lib/providers/lmStudio.d.ts +34 -0
  29. package/dist/lib/providers/lmStudio.js +306 -0
  30. package/dist/lib/providers/nvidiaNim.d.ts +31 -0
  31. package/dist/lib/providers/nvidiaNim.js +354 -0
  32. package/dist/lib/proxy/proxyFetch.d.ts +9 -0
  33. package/dist/lib/proxy/proxyFetch.js +6 -1
  34. package/dist/lib/types/providers.d.ts +37 -2
  35. package/dist/lib/types/providers.js +1 -1
  36. package/dist/lib/utils/modelChoices.js +68 -4
  37. package/dist/lib/utils/pricing.d.ts +5 -0
  38. package/dist/lib/utils/pricing.js +94 -3
  39. package/dist/lib/utils/providerConfig.d.ts +16 -0
  40. package/dist/lib/utils/providerConfig.js +82 -0
  41. package/dist/providers/deepseek.d.ts +29 -0
  42. package/dist/providers/deepseek.js +215 -0
  43. package/dist/providers/index.d.ts +4 -0
  44. package/dist/providers/index.js +4 -0
  45. package/dist/providers/llamaCpp.d.ts +34 -0
  46. package/dist/providers/llamaCpp.js +314 -0
  47. package/dist/providers/lmStudio.d.ts +34 -0
  48. package/dist/providers/lmStudio.js +305 -0
  49. package/dist/providers/nvidiaNim.d.ts +31 -0
  50. package/dist/providers/nvidiaNim.js +353 -0
  51. package/dist/proxy/proxyFetch.d.ts +9 -0
  52. package/dist/proxy/proxyFetch.js +6 -1
  53. package/dist/types/providers.d.ts +37 -2
  54. package/dist/utils/modelChoices.js +68 -4
  55. package/dist/utils/pricing.d.ts +5 -0
  56. package/dist/utils/pricing.js +94 -3
  57. package/dist/utils/providerConfig.d.ts +16 -0
  58. package/dist/utils/providerConfig.js +82 -0
  59. package/package.json +19 -12
@@ -0,0 +1,31 @@
1
+ import { type LanguageModel } from "ai";
2
+ import type { AIProviderName } from "../constants/enums.js";
3
+ import { BaseProvider } from "../core/baseProvider.js";
4
+ import type { NeurolinkCredentials, StreamOptions, StreamResult, ValidationSchema } from "../types/index.js";
5
+ /**
6
+ * NVIDIA NIM Provider
7
+ * Wraps NVIDIA's hosted (or self-hosted) inference endpoints via OpenAI-compat.
8
+ * Passes NIM-specific extras (top_k, min_p, repetition_penalty,
9
+ * chat_template_kwargs.reasoning_budget) via providerOptions.openai.body.
10
+ * Implements one-retry-on-400 to drop unsupported extras gracefully.
11
+ */
12
+ export declare class NvidiaNimProvider extends BaseProvider {
13
+ private model;
14
+ private apiKey;
15
+ private baseURL;
16
+ constructor(modelName?: string, sdk?: unknown, _region?: string, credentials?: NeurolinkCredentials["nvidiaNim"]);
17
+ protected executeStream(options: StreamOptions, _analysisSchema?: ValidationSchema): Promise<StreamResult>;
18
+ private executeStreamInner;
19
+ protected getProviderName(): AIProviderName;
20
+ protected getDefaultModel(): string;
21
+ protected getAISDKModel(): LanguageModel;
22
+ protected formatProviderError(error: unknown): Error;
23
+ validateConfiguration(): Promise<boolean>;
24
+ getConfiguration(): {
25
+ provider: AIProviderName;
26
+ model: string;
27
+ defaultModel: string;
28
+ baseURL: string;
29
+ };
30
+ }
31
+ export default NvidiaNimProvider;
@@ -0,0 +1,353 @@
1
+ import { createOpenAI } from "@ai-sdk/openai";
2
+ import { stepCountIs, streamText } from "ai";
3
+ import { NvidiaNimModels } from "../constants/enums.js";
4
+ import { BaseProvider } from "../core/baseProvider.js";
5
+ import { DEFAULT_MAX_STEPS } from "../core/constants.js";
6
+ import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
7
+ import { createProxyFetch, maskProxyUrl } from "../proxy/proxyFetch.js";
8
+ import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
9
+ import { logger } from "../utils/logger.js";
10
+ import { createNvidiaNimConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
11
+ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
12
+ import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
13
+ const makeLoggingFetch = (provider) => {
14
+ const base = createProxyFetch();
15
+ return (async (input, init) => {
16
+ const url = typeof input === "string"
17
+ ? input
18
+ : input instanceof URL
19
+ ? input.toString()
20
+ : input.url;
21
+ const reqSize = init?.body && typeof init.body === "string" ? init.body.length : 0;
22
+ const response = await base(input, init);
23
+ if (!response.ok) {
24
+ // If maskProxyUrl can't safely sanitize the URL (returns null), don't
25
+ // log the raw URL — that defeats the redaction. Use a placeholder so
26
+ // operators still get the warning without leaking credentials.
27
+ const safeUrl = maskProxyUrl(url) ?? "<redacted>";
28
+ if (process.env.NEUROLINK_DEBUG_HTTP === "1") {
29
+ const clone = response.clone();
30
+ const body = await clone.text().catch(() => "<unreadable>");
31
+ logger.warn(`[${provider}] upstream ${response.status}`, {
32
+ url: safeUrl,
33
+ body: body.slice(0, 800),
34
+ reqSize,
35
+ });
36
+ }
37
+ else {
38
+ logger.warn(`[${provider}] upstream ${response.status} url=${safeUrl} reqSize=${reqSize}`);
39
+ }
40
+ }
41
+ return response;
42
+ });
43
+ };
44
+ import { resolveToolChoice } from "../utils/toolChoice.js";
45
+ import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
46
+ const NVIDIA_NIM_DEFAULT_BASE_URL = "https://integrate.api.nvidia.com/v1";
47
+ const envInt = (k) => {
48
+ const v = process.env[k];
49
+ if (!v) {
50
+ return undefined;
51
+ }
52
+ const parsed = Number.parseInt(v, 10);
53
+ return Number.isFinite(parsed) ? parsed : undefined;
54
+ };
55
+ const envFloat = (k) => {
56
+ const v = process.env[k];
57
+ if (!v) {
58
+ return undefined;
59
+ }
60
+ const parsed = Number.parseFloat(v);
61
+ return Number.isFinite(parsed) ? parsed : undefined;
62
+ };
63
+ const buildNvidiaNimExtraBody = (thinkingEnabled, maxTokens) => {
64
+ const extra = {};
65
+ const topK = envInt("NVIDIA_NIM_TOP_K");
66
+ if (topK !== undefined && topK !== -1) {
67
+ extra.top_k = topK;
68
+ }
69
+ const minP = envFloat("NVIDIA_NIM_MIN_P");
70
+ if (minP !== undefined && minP !== 0) {
71
+ extra.min_p = minP;
72
+ }
73
+ const repPenalty = envFloat("NVIDIA_NIM_REPETITION_PENALTY");
74
+ if (repPenalty !== undefined && repPenalty !== 1) {
75
+ extra.repetition_penalty = repPenalty;
76
+ }
77
+ const minTokens = envInt("NVIDIA_NIM_MIN_TOKENS");
78
+ if (minTokens !== undefined && minTokens !== 0) {
79
+ extra.min_tokens = minTokens;
80
+ }
81
+ const chatTemplate = process.env.NVIDIA_NIM_CHAT_TEMPLATE;
82
+ if (chatTemplate) {
83
+ extra.chat_template = chatTemplate;
84
+ }
85
+ if (thinkingEnabled) {
86
+ extra.chat_template_kwargs = {
87
+ thinking: true,
88
+ enable_thinking: true,
89
+ ...(maxTokens ? { reasoning_budget: maxTokens } : {}),
90
+ };
91
+ }
92
+ return extra;
93
+ };
94
+ const stripReasoningBudget = (body) => {
95
+ const cloned = { ...body };
96
+ if (cloned.chat_template_kwargs) {
97
+ const { reasoning_budget: _ignored, ...rest } = cloned.chat_template_kwargs;
98
+ cloned.chat_template_kwargs = rest;
99
+ if (Object.keys(cloned.chat_template_kwargs).length === 0) {
100
+ delete cloned.chat_template_kwargs;
101
+ }
102
+ }
103
+ return cloned;
104
+ };
105
+ const stripChatTemplate = (body) => {
106
+ const { chat_template: _ignored, ...rest } = body;
107
+ return rest;
108
+ };
109
+ const getNimApiKey = () => {
110
+ return validateApiKey(createNvidiaNimConfig());
111
+ };
112
+ const getDefaultNimModel = () => {
113
+ return getProviderModel("NVIDIA_NIM_MODEL", NvidiaNimModels.LLAMA_3_3_70B_INSTRUCT);
114
+ };
115
+ /**
116
+ * NVIDIA NIM Provider
117
+ * Wraps NVIDIA's hosted (or self-hosted) inference endpoints via OpenAI-compat.
118
+ * Passes NIM-specific extras (top_k, min_p, repetition_penalty,
119
+ * chat_template_kwargs.reasoning_budget) via providerOptions.openai.body.
120
+ * Implements one-retry-on-400 to drop unsupported extras gracefully.
121
+ */
122
+ export class NvidiaNimProvider extends BaseProvider {
123
+ model;
124
+ apiKey;
125
+ baseURL;
126
+ constructor(modelName, sdk, _region, credentials) {
127
+ const validatedNeurolink = sdk && typeof sdk === "object" && "getInMemoryServers" in sdk
128
+ ? sdk
129
+ : undefined;
130
+ super(modelName, "nvidia-nim", validatedNeurolink);
131
+ // Trim the override before applying precedence. A blank/whitespace
132
+ // `credentials.apiKey` should NOT bypass `getNimApiKey()` — that would
133
+ // build a client with an unusable bearer token and fail at request time
134
+ // with a confusing 401 instead of at construction time.
135
+ const overrideApiKey = credentials?.apiKey?.trim();
136
+ this.apiKey =
137
+ overrideApiKey && overrideApiKey.length > 0
138
+ ? overrideApiKey
139
+ : getNimApiKey();
140
+ this.baseURL =
141
+ credentials?.baseURL ??
142
+ process.env.NVIDIA_NIM_BASE_URL ??
143
+ NVIDIA_NIM_DEFAULT_BASE_URL;
144
+ const nim = createOpenAI({
145
+ apiKey: this.apiKey,
146
+ baseURL: this.baseURL,
147
+ fetch: makeLoggingFetch("nvidia-nim"),
148
+ });
149
+ // .chat() — NIM exposes /v1/chat/completions, not /v1/responses
150
+ this.model = nim.chat(this.modelName);
151
+ logger.debug("NVIDIA NIM Provider initialized", {
152
+ modelName: this.modelName,
153
+ providerName: this.providerName,
154
+ baseURL: this.baseURL,
155
+ });
156
+ }
157
+ async executeStream(options, _analysisSchema) {
158
+ return withClientSpan({
159
+ name: "neurolink.provider.stream",
160
+ tracer: tracers.provider,
161
+ attributes: {
162
+ [ATTR.GEN_AI_SYSTEM]: "nvidia-nim",
163
+ [ATTR.GEN_AI_MODEL]: this.modelName,
164
+ [ATTR.GEN_AI_OPERATION]: "stream",
165
+ [ATTR.NL_STREAM_MODE]: true,
166
+ },
167
+ }, async () => this.executeStreamInner(options));
168
+ }
169
+ async executeStreamInner(options) {
170
+ this.validateStreamOptions(options);
171
+ const startTime = Date.now();
172
+ const timeout = this.getTimeout(options);
173
+ const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
174
+ try {
175
+ const shouldUseTools = !options.disableTools && this.supportsTools();
176
+ const tools = shouldUseTools
177
+ ? options.tools || (await this.getAllTools())
178
+ : {};
179
+ const messages = await this.buildMessagesForStream(options);
180
+ const model = await this.getAISDKModelWithMiddleware(options);
181
+ // Callers pass `thinkingLevel` directly on generate/stream options
182
+ // (matching Anthropic / Gemini 2.5+ / Gemini 3 conventions). Fall back
183
+ // to the legacy `thinkingConfig.thinkingLevel` shape for compatibility.
184
+ const tl = options.thinkingLevel ??
185
+ options.thinkingConfig?.thinkingLevel;
186
+ const thinkingEnabled = tl !== undefined && tl !== "minimal";
187
+ let extraBody = buildNvidiaNimExtraBody(thinkingEnabled, options.maxTokens);
188
+ // Inline the retry-strip union — CLAUDE.md rule 2 forbids type aliases
189
+ // outside src/lib/types/. The two literals match the 400 error keys NIM
190
+ // returns for the only two extras we know how to drop and retry.
191
+ const callStream = (body, stripped = []) => streamText({
192
+ model,
193
+ messages,
194
+ temperature: options.temperature,
195
+ maxOutputTokens: options.maxTokens,
196
+ tools,
197
+ stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
198
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
199
+ abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
200
+ providerOptions: (() => {
201
+ // StreamOptions doesn't formally type providerOptions but the
202
+ // upstream Vercel AI SDK accepts it. Read it via an indexed access
203
+ // and merge with NIM extras instead of overwriting any per-call
204
+ // openai.body.
205
+ const callerBase = options
206
+ .providerOptions ?? {};
207
+ const callerOpenai = callerBase.openai ?? {};
208
+ const callerBody = callerOpenai.body ?? {};
209
+ // Per-call overrides win over env/NIM defaults — defaults first,
210
+ // overrides last. chat_template_kwargs is merged shallowly too so
211
+ // a request that only sets `reasoning_budget` doesn't drop the
212
+ // env-driven `thinking: true` flag (and vice versa).
213
+ const defaultsBody = body;
214
+ const mergedBody = {
215
+ ...defaultsBody,
216
+ ...callerBody,
217
+ };
218
+ const mergedKwargs = {
219
+ ...(defaultsBody.chat_template_kwargs ?? {}),
220
+ ...(callerBody.chat_template_kwargs ?? {}),
221
+ };
222
+ // Apply retry-strip AFTER merging so caller-supplied copies of
223
+ // the offending field are also dropped (otherwise the retry would
224
+ // re-send the field that NIM just rejected).
225
+ if (stripped.includes("chat_template")) {
226
+ delete mergedBody.chat_template;
227
+ }
228
+ if (stripped.includes("reasoning_budget")) {
229
+ delete mergedKwargs.reasoning_budget;
230
+ }
231
+ if (Object.keys(mergedKwargs).length > 0) {
232
+ mergedBody.chat_template_kwargs = mergedKwargs;
233
+ }
234
+ else {
235
+ delete mergedBody.chat_template_kwargs;
236
+ }
237
+ if (Object.keys(callerBase).length === 0 &&
238
+ Object.keys(mergedBody).length === 0) {
239
+ return undefined;
240
+ }
241
+ return {
242
+ ...callerBase,
243
+ openai: {
244
+ ...callerOpenai,
245
+ body: mergedBody,
246
+ },
247
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
248
+ };
249
+ })(),
250
+ experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
251
+ experimental_repairToolCall: this.getToolCallRepairFn(options),
252
+ onStepFinish: ({ toolCalls, toolResults }) => {
253
+ emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
254
+ this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
255
+ logger.warn("[NvidiaNimProvider] Failed to store tool executions", {
256
+ provider: this.providerName,
257
+ error: error instanceof Error ? error.message : String(error),
258
+ });
259
+ });
260
+ },
261
+ });
262
+ let result;
263
+ try {
264
+ result = await callStream(extraBody);
265
+ }
266
+ catch (error) {
267
+ const errMsg = error instanceof Error ? error.message : String(error);
268
+ const status = error?.statusCode;
269
+ if (status === 400) {
270
+ const lower = errMsg.toLowerCase();
271
+ if (lower.includes("reasoning_budget")) {
272
+ logger.warn("NIM rejected reasoning_budget; retrying without it");
273
+ extraBody = stripReasoningBudget(extraBody);
274
+ result = await callStream(extraBody, ["reasoning_budget"]);
275
+ }
276
+ else if (lower.includes("chat_template")) {
277
+ logger.warn("NIM rejected chat_template; retrying without it");
278
+ extraBody = stripChatTemplate(extraBody);
279
+ result = await callStream(extraBody, ["chat_template"]);
280
+ }
281
+ else {
282
+ throw error;
283
+ }
284
+ }
285
+ else {
286
+ throw error;
287
+ }
288
+ }
289
+ timeoutController?.cleanup();
290
+ const transformedStream = this.createTextStream(result);
291
+ const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, toAnalyticsStreamResult(result), Date.now() - startTime, {
292
+ requestId: `nvidia-nim-stream-${Date.now()}`,
293
+ streamingMode: true,
294
+ });
295
+ return {
296
+ stream: transformedStream,
297
+ provider: this.providerName,
298
+ model: this.modelName,
299
+ analytics: analyticsPromise,
300
+ metadata: { startTime, streamId: `nvidia-nim-${Date.now()}` },
301
+ };
302
+ }
303
+ catch (error) {
304
+ timeoutController?.cleanup();
305
+ throw this.handleProviderError(error);
306
+ }
307
+ }
308
+ getProviderName() {
309
+ return this.providerName;
310
+ }
311
+ getDefaultModel() {
312
+ return getDefaultNimModel();
313
+ }
314
+ getAISDKModel() {
315
+ return this.model;
316
+ }
317
+ formatProviderError(error) {
318
+ if (error instanceof TimeoutError) {
319
+ return new Error(`NVIDIA NIM request timed out: ${error.message}`);
320
+ }
321
+ const errorRecord = error;
322
+ const message = typeof errorRecord?.message === "string"
323
+ ? errorRecord.message
324
+ : "Unknown error";
325
+ if (message.includes("Invalid API key") ||
326
+ message.includes("401") ||
327
+ message.includes("Unauthorized")) {
328
+ return new Error("Invalid NVIDIA NIM API key. Get one at https://build.nvidia.com/settings/api-keys");
329
+ }
330
+ if (message.includes("rate limit") || message.includes("429")) {
331
+ return new Error("NVIDIA NIM rate limit exceeded");
332
+ }
333
+ if (message.includes("404") || message.includes("model_not_found")) {
334
+ return new Error(`NVIDIA NIM model '${this.modelName}' not available. Browse the catalog at https://build.nvidia.com/models`);
335
+ }
336
+ if (message.includes("quota") || message.includes("403")) {
337
+ return new Error("NVIDIA NIM quota exceeded for your account");
338
+ }
339
+ return new Error(`NVIDIA NIM error: ${message}`);
340
+ }
341
+ async validateConfiguration() {
342
+ return typeof this.apiKey === "string" && this.apiKey.trim().length > 0;
343
+ }
344
+ getConfiguration() {
345
+ return {
346
+ provider: this.providerName,
347
+ model: this.modelName,
348
+ defaultModel: getDefaultNimModel(),
349
+ baseURL: this.baseURL,
350
+ };
351
+ }
352
+ }
353
+ export default NvidiaNimProvider;
@@ -8,6 +8,15 @@
8
8
  * Supports HTTP/HTTPS, SOCKS4/5, authentication, and NO_PROXY bypass
9
9
  */
10
10
  export declare function createProxyFetch(): typeof fetch;
11
+ /**
12
+ * Mask credentials in a proxy URL for safe logging/reporting.
13
+ *
14
+ * Exported so provider-side fetch loggers (lmStudio, llamaCpp, deepseek,
15
+ * nvidiaNim) can sanitize upstream URLs before emitting warnings — reverse-
16
+ * proxied deployments can embed credentials or signed query params in the
17
+ * base URL, and those should never reach application logs verbatim.
18
+ */
19
+ export declare function maskProxyUrl(url: string | null | undefined): string | null;
11
20
  /**
12
21
  * Get enhanced proxy status information
13
22
  */
@@ -614,8 +614,13 @@ export function createProxyFetch() {
614
614
  }
615
615
  /**
616
616
  * Mask credentials in a proxy URL for safe logging/reporting.
617
+ *
618
+ * Exported so provider-side fetch loggers (lmStudio, llamaCpp, deepseek,
619
+ * nvidiaNim) can sanitize upstream URLs before emitting warnings — reverse-
620
+ * proxied deployments can embed credentials or signed query params in the
621
+ * base URL, and those should never reach application logs verbatim.
617
622
  */
618
- function maskProxyUrl(url) {
623
+ export function maskProxyUrl(url) {
619
624
  if (!url) {
620
625
  return null;
621
626
  }
@@ -3,7 +3,7 @@
3
3
  */
4
4
  import type { UnknownRecord, JsonValue, StreamingCapability } from "./common.js";
5
5
  import type { ProviderError } from "./errors.js";
6
- import { AIProviderName, AnthropicModels, BedrockModels, GoogleAIModels, OpenAIModels, VertexModels } from "../constants/enums.js";
6
+ import { AIProviderName, AnthropicModels, BedrockModels, DeepSeekModels, GoogleAIModels, LlamaCppModels, LMStudioModels, NvidiaNimModels, OpenAIModels, VertexModels } from "../constants/enums.js";
7
7
  import type { Tool } from "ai";
8
8
  import type { ValidationSchema } from "./aliases.js";
9
9
  import type { EnhancedGenerateResult, GenerateResult, TextGenerationOptions } from "./generate.js";
@@ -20,7 +20,7 @@ export type AISDKModel = {
20
20
  /**
21
21
  * Union type of all supported model names
22
22
  */
23
- export type SupportedModelName = BedrockModels | OpenAIModels | VertexModels | GoogleAIModels | AnthropicModels;
23
+ export type SupportedModelName = BedrockModels | DeepSeekModels | OpenAIModels | VertexModels | GoogleAIModels | AnthropicModels | NvidiaNimModels | LMStudioModels | LlamaCppModels;
24
24
  /**
25
25
  * Extract provider names from enum
26
26
  */
@@ -147,6 +147,40 @@ export type NeurolinkCredentials = {
147
147
  ollama?: {
148
148
  baseURL?: string;
149
149
  };
150
+ deepseek?: {
151
+ apiKey?: string;
152
+ baseURL?: string;
153
+ };
154
+ nvidiaNim?: {
155
+ apiKey?: string;
156
+ baseURL?: string;
157
+ };
158
+ lmStudio?: {
159
+ apiKey?: string;
160
+ baseURL?: string;
161
+ };
162
+ llamacpp?: {
163
+ apiKey?: string;
164
+ baseURL?: string;
165
+ };
166
+ };
167
+ /**
168
+ * NVIDIA NIM extra request body parameters passed via `providerOptions.openai.body`.
169
+ * Lives here (not in providers/nvidiaNim.ts) per CLAUDE.md rule 2.
170
+ */
171
+ export type NvidiaNimExtraBody = {
172
+ top_k?: number;
173
+ min_p?: number;
174
+ repetition_penalty?: number;
175
+ min_tokens?: number;
176
+ chat_template?: string;
177
+ request_id?: string;
178
+ ignore_eos?: boolean;
179
+ chat_template_kwargs?: {
180
+ thinking?: boolean;
181
+ enable_thinking?: boolean;
182
+ reasoning_budget?: number;
183
+ };
150
184
  };
151
185
  /**
152
186
  * AWS Credential Validation Result
@@ -398,6 +432,7 @@ export type ProviderConfigOptions = {
398
432
  description: string;
399
433
  instructions: string[];
400
434
  fallbackEnvVars?: string[];
435
+ optional?: boolean;
401
436
  };
402
437
  /**
403
438
  * AI Provider type with flexible parameter support
@@ -2,7 +2,7 @@
2
2
  * Centralized model choices for CLI commands
3
3
  * Derives choices from model enums to ensure consistency
4
4
  */
5
- import { AIProviderName, OpenAIModels, AnthropicModels, GoogleAIModels, BedrockModels, VertexModels, MistralModels, OllamaModels, AzureOpenAIModels, LiteLLMModels, HuggingFaceModels, SageMakerModels, OpenRouterModels, } from "../constants/enums.js";
5
+ import { AIProviderName, OpenAIModels, AnthropicModels, GoogleAIModels, BedrockModels, VertexModels, MistralModels, OllamaModels, AzureOpenAIModels, LiteLLMModels, HuggingFaceModels, SageMakerModels, OpenRouterModels, DeepSeekModels, NvidiaNimModels, } from "../constants/enums.js";
6
6
  /**
7
7
  * Top models per provider with descriptions for CLI prompts
8
8
  * These are curated lists of the most commonly used/recommended models
@@ -230,6 +230,47 @@ const TOP_MODELS_CONFIG = {
230
230
  { model: "gpt-4-turbo", description: "Turbo compatible model" },
231
231
  { model: "gpt-3.5-turbo", description: "Legacy compatible model" },
232
232
  ],
233
+ [AIProviderName.DEEPSEEK]: [
234
+ { model: "deepseek-chat", description: "DeepSeek-V3 general chat" },
235
+ {
236
+ model: "deepseek-reasoner",
237
+ description: "DeepSeek-R1 reasoning (slower, deeper)",
238
+ },
239
+ ],
240
+ [AIProviderName.NVIDIA_NIM]: [
241
+ {
242
+ model: "meta/llama-3.3-70b-instruct",
243
+ description: "Recommended - Llama 3.3 70B",
244
+ },
245
+ {
246
+ model: "nvidia/llama-3.3-nemotron-super-49b-v1",
247
+ description: "Nemotron Super (reasoning)",
248
+ },
249
+ {
250
+ model: "deepseek-ai/deepseek-r1",
251
+ description: "DeepSeek-R1 hosted on NIM",
252
+ },
253
+ {
254
+ model: "meta/llama-3.2-90b-vision-instruct",
255
+ description: "Llama 3.2 vision",
256
+ },
257
+ {
258
+ model: "mistralai/mixtral-8x22b-instruct-v0.1",
259
+ description: "Mixtral 8x22B",
260
+ },
261
+ ],
262
+ [AIProviderName.LM_STUDIO]: [
263
+ {
264
+ model: "",
265
+ description: "Auto-discover loaded model from /v1/models",
266
+ },
267
+ ],
268
+ [AIProviderName.LLAMACPP]: [
269
+ {
270
+ model: "",
271
+ description: "Use whatever model llama-server has loaded",
272
+ },
273
+ ],
233
274
  [AIProviderName.AUTO]: [],
234
275
  };
235
276
  /**
@@ -249,6 +290,12 @@ export const DEFAULT_MODELS = {
249
290
  [AIProviderName.SAGEMAKER]: SageMakerModels.LLAMA_4_MAVERICK_17B_128E,
250
291
  [AIProviderName.OPENROUTER]: OpenRouterModels.CLAUDE_3_5_SONNET,
251
292
  [AIProviderName.OPENAI_COMPATIBLE]: "gpt-4o",
293
+ [AIProviderName.DEEPSEEK]: DeepSeekModels.DEEPSEEK_CHAT,
294
+ [AIProviderName.NVIDIA_NIM]: NvidiaNimModels.LLAMA_3_3_70B_INSTRUCT,
295
+ // LM Studio + llama.cpp auto-discover their loaded model from /v1/models;
296
+ // an empty default is the documented signal to use that path.
297
+ [AIProviderName.LM_STUDIO]: "",
298
+ [AIProviderName.LLAMACPP]: "",
252
299
  };
253
300
  /**
254
301
  * Model enum mappings for getAllModels
@@ -267,6 +314,10 @@ const MODEL_ENUMS = {
267
314
  [AIProviderName.SAGEMAKER]: SageMakerModels,
268
315
  [AIProviderName.OPENROUTER]: OpenRouterModels,
269
316
  [AIProviderName.OPENAI_COMPATIBLE]: null,
317
+ [AIProviderName.DEEPSEEK]: DeepSeekModels,
318
+ [AIProviderName.NVIDIA_NIM]: NvidiaNimModels,
319
+ [AIProviderName.LM_STUDIO]: null,
320
+ [AIProviderName.LLAMACPP]: null,
270
321
  [AIProviderName.AUTO]: null,
271
322
  };
272
323
  /**
@@ -283,7 +334,14 @@ export function getTopModelChoices(provider, limit = 5) {
283
334
  return [];
284
335
  }
285
336
  const choices = config.slice(0, limit).map((item) => ({
286
- name: `${item.model} (${item.description})`,
337
+ // Empty-string entries are auto-discovery sentinels for LM Studio /
338
+ // llama.cpp. Surface them with a friendly label so the CLI doesn't show a
339
+ // blank row, but keep `value: ""` so it matches `DEFAULT_MODELS` (which
340
+ // also uses `""`) and any caller that preselects the active choice via
341
+ // the default model still resolves to this entry.
342
+ name: item.model.length > 0
343
+ ? `${item.model} (${item.description})`
344
+ : `Auto-discover loaded model (${item.description})`,
287
345
  value: item.model,
288
346
  description: item.description,
289
347
  }));
@@ -387,8 +445,14 @@ export function getPopularModelsAcrossProviders() {
387
445
  const popularModels = [];
388
446
  for (const [provider, config] of Object.entries(TOP_MODELS_CONFIG)) {
389
447
  if (config && config.length > 0) {
390
- // Take top 2 from each provider
391
- config.slice(0, 2).forEach((item) => {
448
+ // Take top 2 from each provider, ignoring blank auto-discovery sentinels.
449
+ // (Auto-discovery is surfaced separately by `getTopModelChoices` for
450
+ // LM Studio / llama.cpp; we don't want it to appear in the cross-
451
+ // provider popular-models list as an empty value.)
452
+ config
453
+ .filter((item) => item.model.length > 0)
454
+ .slice(0, 2)
455
+ .forEach((item) => {
392
456
  popularModels.push({
393
457
  provider: provider,
394
458
  model: item.model,
@@ -8,5 +8,10 @@ export declare function calculateCost(provider: string, model: string, usage: To
8
8
  * Check if pricing is available for a provider/model combination.
9
9
  * Checks the rate table directly instead of computing a cost,
10
10
  * so even very cheap models (e.g. gemini-1.5-flash) are detected correctly.
11
+ *
12
+ * Zero-rate entries (the local-provider `_default` for lm-studio / llamacpp)
13
+ * count as "no pricing" — those providers explicitly don't have an upstream
14
+ * USD price, and any caller gated by `hasPricing()` should treat them as
15
+ * non-billable rather than zero-cost-billable.
11
16
  */
12
17
  export declare function hasPricing(provider: string, model: string): boolean;