@juspay/neurolink 9.59.5 → 9.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +11 -7
- package/dist/adapters/providerImageAdapter.js +52 -2
- package/dist/browser/neurolink.min.js +352 -352
- package/dist/cli/factories/commandFactory.js +15 -1
- package/dist/cli/utils/interactiveSetup.js +64 -0
- package/dist/constants/contextWindows.d.ts +5 -1
- package/dist/constants/contextWindows.js +67 -3
- package/dist/constants/enums.d.ts +52 -0
- package/dist/constants/enums.js +63 -0
- package/dist/core/baseProvider.d.ts +15 -6
- package/dist/core/baseProvider.js +28 -0
- package/dist/factories/providerRegistry.js +25 -1
- package/dist/lib/adapters/providerImageAdapter.js +52 -2
- package/dist/lib/constants/contextWindows.d.ts +5 -1
- package/dist/lib/constants/contextWindows.js +67 -3
- package/dist/lib/constants/enums.d.ts +52 -0
- package/dist/lib/constants/enums.js +63 -0
- package/dist/lib/core/baseProvider.d.ts +15 -6
- package/dist/lib/core/baseProvider.js +28 -0
- package/dist/lib/factories/providerRegistry.js +25 -1
- package/dist/lib/neurolink.js +1 -1
- package/dist/lib/providers/deepseek.d.ts +29 -0
- package/dist/lib/providers/deepseek.js +216 -0
- package/dist/lib/providers/index.d.ts +4 -0
- package/dist/lib/providers/index.js +4 -0
- package/dist/lib/providers/llamaCpp.d.ts +34 -0
- package/dist/lib/providers/llamaCpp.js +315 -0
- package/dist/lib/providers/lmStudio.d.ts +34 -0
- package/dist/lib/providers/lmStudio.js +306 -0
- package/dist/lib/providers/nvidiaNim.d.ts +31 -0
- package/dist/lib/providers/nvidiaNim.js +354 -0
- package/dist/lib/proxy/proxyFetch.d.ts +9 -0
- package/dist/lib/proxy/proxyFetch.js +6 -1
- package/dist/lib/types/providers.d.ts +37 -2
- package/dist/lib/types/providers.js +1 -1
- package/dist/lib/utils/modelChoices.js +68 -4
- package/dist/lib/utils/pricing.d.ts +5 -0
- package/dist/lib/utils/pricing.js +94 -3
- package/dist/lib/utils/providerConfig.d.ts +16 -0
- package/dist/lib/utils/providerConfig.js +82 -0
- package/dist/neurolink.js +1 -1
- package/dist/providers/deepseek.d.ts +29 -0
- package/dist/providers/deepseek.js +215 -0
- package/dist/providers/index.d.ts +4 -0
- package/dist/providers/index.js +4 -0
- package/dist/providers/llamaCpp.d.ts +34 -0
- package/dist/providers/llamaCpp.js +314 -0
- package/dist/providers/lmStudio.d.ts +34 -0
- package/dist/providers/lmStudio.js +305 -0
- package/dist/providers/nvidiaNim.d.ts +31 -0
- package/dist/providers/nvidiaNim.js +353 -0
- package/dist/proxy/proxyFetch.d.ts +9 -0
- package/dist/proxy/proxyFetch.js +6 -1
- package/dist/types/providers.d.ts +37 -2
- package/dist/utils/modelChoices.js +68 -4
- package/dist/utils/pricing.d.ts +5 -0
- package/dist/utils/pricing.js +94 -3
- package/dist/utils/providerConfig.d.ts +16 -0
- package/dist/utils/providerConfig.js +82 -0
- package/package.json +19 -12
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { type LanguageModel } from "ai";
|
|
2
|
+
import type { AIProviderName } from "../constants/enums.js";
|
|
3
|
+
import { BaseProvider } from "../core/baseProvider.js";
|
|
4
|
+
import type { NeurolinkCredentials, StreamOptions, StreamResult, ValidationSchema } from "../types/index.js";
|
|
5
|
+
/**
|
|
6
|
+
* NVIDIA NIM Provider
|
|
7
|
+
* Wraps NVIDIA's hosted (or self-hosted) inference endpoints via OpenAI-compat.
|
|
8
|
+
* Passes NIM-specific extras (top_k, min_p, repetition_penalty,
|
|
9
|
+
* chat_template_kwargs.reasoning_budget) via providerOptions.openai.body.
|
|
10
|
+
* Implements one-retry-on-400 to drop unsupported extras gracefully.
|
|
11
|
+
*/
|
|
12
|
+
export declare class NvidiaNimProvider extends BaseProvider {
|
|
13
|
+
private model;
|
|
14
|
+
private apiKey;
|
|
15
|
+
private baseURL;
|
|
16
|
+
constructor(modelName?: string, sdk?: unknown, _region?: string, credentials?: NeurolinkCredentials["nvidiaNim"]);
|
|
17
|
+
protected executeStream(options: StreamOptions, _analysisSchema?: ValidationSchema): Promise<StreamResult>;
|
|
18
|
+
private executeStreamInner;
|
|
19
|
+
protected getProviderName(): AIProviderName;
|
|
20
|
+
protected getDefaultModel(): string;
|
|
21
|
+
protected getAISDKModel(): LanguageModel;
|
|
22
|
+
protected formatProviderError(error: unknown): Error;
|
|
23
|
+
validateConfiguration(): Promise<boolean>;
|
|
24
|
+
getConfiguration(): {
|
|
25
|
+
provider: AIProviderName;
|
|
26
|
+
model: string;
|
|
27
|
+
defaultModel: string;
|
|
28
|
+
baseURL: string;
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
export default NvidiaNimProvider;
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
|
+
import { stepCountIs, streamText } from "ai";
|
|
3
|
+
import { NvidiaNimModels } from "../constants/enums.js";
|
|
4
|
+
import { BaseProvider } from "../core/baseProvider.js";
|
|
5
|
+
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
6
|
+
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
7
|
+
import { createProxyFetch, maskProxyUrl } from "../proxy/proxyFetch.js";
|
|
8
|
+
import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
|
|
9
|
+
import { logger } from "../utils/logger.js";
|
|
10
|
+
import { createNvidiaNimConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
|
|
11
|
+
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
12
|
+
import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
|
|
13
|
+
const makeLoggingFetch = (provider) => {
|
|
14
|
+
const base = createProxyFetch();
|
|
15
|
+
return (async (input, init) => {
|
|
16
|
+
const url = typeof input === "string"
|
|
17
|
+
? input
|
|
18
|
+
: input instanceof URL
|
|
19
|
+
? input.toString()
|
|
20
|
+
: input.url;
|
|
21
|
+
const reqSize = init?.body && typeof init.body === "string" ? init.body.length : 0;
|
|
22
|
+
const response = await base(input, init);
|
|
23
|
+
if (!response.ok) {
|
|
24
|
+
// If maskProxyUrl can't safely sanitize the URL (returns null), don't
|
|
25
|
+
// log the raw URL — that defeats the redaction. Use a placeholder so
|
|
26
|
+
// operators still get the warning without leaking credentials.
|
|
27
|
+
const safeUrl = maskProxyUrl(url) ?? "<redacted>";
|
|
28
|
+
if (process.env.NEUROLINK_DEBUG_HTTP === "1") {
|
|
29
|
+
const clone = response.clone();
|
|
30
|
+
const body = await clone.text().catch(() => "<unreadable>");
|
|
31
|
+
logger.warn(`[${provider}] upstream ${response.status}`, {
|
|
32
|
+
url: safeUrl,
|
|
33
|
+
body: body.slice(0, 800),
|
|
34
|
+
reqSize,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
logger.warn(`[${provider}] upstream ${response.status} url=${safeUrl} reqSize=${reqSize}`);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return response;
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
import { resolveToolChoice } from "../utils/toolChoice.js";
|
|
45
|
+
import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
|
|
46
|
+
const NVIDIA_NIM_DEFAULT_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
47
|
+
const envInt = (k) => {
|
|
48
|
+
const v = process.env[k];
|
|
49
|
+
if (!v) {
|
|
50
|
+
return undefined;
|
|
51
|
+
}
|
|
52
|
+
const parsed = Number.parseInt(v, 10);
|
|
53
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
54
|
+
};
|
|
55
|
+
const envFloat = (k) => {
|
|
56
|
+
const v = process.env[k];
|
|
57
|
+
if (!v) {
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
const parsed = Number.parseFloat(v);
|
|
61
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
62
|
+
};
|
|
63
|
+
const buildNvidiaNimExtraBody = (thinkingEnabled, maxTokens) => {
|
|
64
|
+
const extra = {};
|
|
65
|
+
const topK = envInt("NVIDIA_NIM_TOP_K");
|
|
66
|
+
if (topK !== undefined && topK !== -1) {
|
|
67
|
+
extra.top_k = topK;
|
|
68
|
+
}
|
|
69
|
+
const minP = envFloat("NVIDIA_NIM_MIN_P");
|
|
70
|
+
if (minP !== undefined && minP !== 0) {
|
|
71
|
+
extra.min_p = minP;
|
|
72
|
+
}
|
|
73
|
+
const repPenalty = envFloat("NVIDIA_NIM_REPETITION_PENALTY");
|
|
74
|
+
if (repPenalty !== undefined && repPenalty !== 1) {
|
|
75
|
+
extra.repetition_penalty = repPenalty;
|
|
76
|
+
}
|
|
77
|
+
const minTokens = envInt("NVIDIA_NIM_MIN_TOKENS");
|
|
78
|
+
if (minTokens !== undefined && minTokens !== 0) {
|
|
79
|
+
extra.min_tokens = minTokens;
|
|
80
|
+
}
|
|
81
|
+
const chatTemplate = process.env.NVIDIA_NIM_CHAT_TEMPLATE;
|
|
82
|
+
if (chatTemplate) {
|
|
83
|
+
extra.chat_template = chatTemplate;
|
|
84
|
+
}
|
|
85
|
+
if (thinkingEnabled) {
|
|
86
|
+
extra.chat_template_kwargs = {
|
|
87
|
+
thinking: true,
|
|
88
|
+
enable_thinking: true,
|
|
89
|
+
...(maxTokens ? { reasoning_budget: maxTokens } : {}),
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
return extra;
|
|
93
|
+
};
|
|
94
|
+
const stripReasoningBudget = (body) => {
|
|
95
|
+
const cloned = { ...body };
|
|
96
|
+
if (cloned.chat_template_kwargs) {
|
|
97
|
+
const { reasoning_budget: _ignored, ...rest } = cloned.chat_template_kwargs;
|
|
98
|
+
cloned.chat_template_kwargs = rest;
|
|
99
|
+
if (Object.keys(cloned.chat_template_kwargs).length === 0) {
|
|
100
|
+
delete cloned.chat_template_kwargs;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return cloned;
|
|
104
|
+
};
|
|
105
|
+
const stripChatTemplate = (body) => {
|
|
106
|
+
const { chat_template: _ignored, ...rest } = body;
|
|
107
|
+
return rest;
|
|
108
|
+
};
|
|
109
|
+
const getNimApiKey = () => {
|
|
110
|
+
return validateApiKey(createNvidiaNimConfig());
|
|
111
|
+
};
|
|
112
|
+
const getDefaultNimModel = () => {
|
|
113
|
+
return getProviderModel("NVIDIA_NIM_MODEL", NvidiaNimModels.LLAMA_3_3_70B_INSTRUCT);
|
|
114
|
+
};
|
|
115
|
+
/**
|
|
116
|
+
* NVIDIA NIM Provider
|
|
117
|
+
* Wraps NVIDIA's hosted (or self-hosted) inference endpoints via OpenAI-compat.
|
|
118
|
+
* Passes NIM-specific extras (top_k, min_p, repetition_penalty,
|
|
119
|
+
* chat_template_kwargs.reasoning_budget) via providerOptions.openai.body.
|
|
120
|
+
* Implements one-retry-on-400 to drop unsupported extras gracefully.
|
|
121
|
+
*/
|
|
122
|
+
export class NvidiaNimProvider extends BaseProvider {
|
|
123
|
+
model;
|
|
124
|
+
apiKey;
|
|
125
|
+
baseURL;
|
|
126
|
+
constructor(modelName, sdk, _region, credentials) {
|
|
127
|
+
const validatedNeurolink = sdk && typeof sdk === "object" && "getInMemoryServers" in sdk
|
|
128
|
+
? sdk
|
|
129
|
+
: undefined;
|
|
130
|
+
super(modelName, "nvidia-nim", validatedNeurolink);
|
|
131
|
+
// Trim the override before applying precedence. A blank/whitespace
|
|
132
|
+
// `credentials.apiKey` should NOT bypass `getNimApiKey()` — that would
|
|
133
|
+
// build a client with an unusable bearer token and fail at request time
|
|
134
|
+
// with a confusing 401 instead of at construction time.
|
|
135
|
+
const overrideApiKey = credentials?.apiKey?.trim();
|
|
136
|
+
this.apiKey =
|
|
137
|
+
overrideApiKey && overrideApiKey.length > 0
|
|
138
|
+
? overrideApiKey
|
|
139
|
+
: getNimApiKey();
|
|
140
|
+
this.baseURL =
|
|
141
|
+
credentials?.baseURL ??
|
|
142
|
+
process.env.NVIDIA_NIM_BASE_URL ??
|
|
143
|
+
NVIDIA_NIM_DEFAULT_BASE_URL;
|
|
144
|
+
const nim = createOpenAI({
|
|
145
|
+
apiKey: this.apiKey,
|
|
146
|
+
baseURL: this.baseURL,
|
|
147
|
+
fetch: makeLoggingFetch("nvidia-nim"),
|
|
148
|
+
});
|
|
149
|
+
// .chat() — NIM exposes /v1/chat/completions, not /v1/responses
|
|
150
|
+
this.model = nim.chat(this.modelName);
|
|
151
|
+
logger.debug("NVIDIA NIM Provider initialized", {
|
|
152
|
+
modelName: this.modelName,
|
|
153
|
+
providerName: this.providerName,
|
|
154
|
+
baseURL: this.baseURL,
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
async executeStream(options, _analysisSchema) {
|
|
158
|
+
return withClientSpan({
|
|
159
|
+
name: "neurolink.provider.stream",
|
|
160
|
+
tracer: tracers.provider,
|
|
161
|
+
attributes: {
|
|
162
|
+
[ATTR.GEN_AI_SYSTEM]: "nvidia-nim",
|
|
163
|
+
[ATTR.GEN_AI_MODEL]: this.modelName,
|
|
164
|
+
[ATTR.GEN_AI_OPERATION]: "stream",
|
|
165
|
+
[ATTR.NL_STREAM_MODE]: true,
|
|
166
|
+
},
|
|
167
|
+
}, async () => this.executeStreamInner(options));
|
|
168
|
+
}
|
|
169
|
+
async executeStreamInner(options) {
|
|
170
|
+
this.validateStreamOptions(options);
|
|
171
|
+
const startTime = Date.now();
|
|
172
|
+
const timeout = this.getTimeout(options);
|
|
173
|
+
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
|
|
174
|
+
try {
|
|
175
|
+
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
176
|
+
const tools = shouldUseTools
|
|
177
|
+
? options.tools || (await this.getAllTools())
|
|
178
|
+
: {};
|
|
179
|
+
const messages = await this.buildMessagesForStream(options);
|
|
180
|
+
const model = await this.getAISDKModelWithMiddleware(options);
|
|
181
|
+
// Callers pass `thinkingLevel` directly on generate/stream options
|
|
182
|
+
// (matching Anthropic / Gemini 2.5+ / Gemini 3 conventions). Fall back
|
|
183
|
+
// to the legacy `thinkingConfig.thinkingLevel` shape for compatibility.
|
|
184
|
+
const tl = options.thinkingLevel ??
|
|
185
|
+
options.thinkingConfig?.thinkingLevel;
|
|
186
|
+
const thinkingEnabled = tl !== undefined && tl !== "minimal";
|
|
187
|
+
let extraBody = buildNvidiaNimExtraBody(thinkingEnabled, options.maxTokens);
|
|
188
|
+
// Inline the retry-strip union — CLAUDE.md rule 2 forbids type aliases
|
|
189
|
+
// outside src/lib/types/. The two literals match the 400 error keys NIM
|
|
190
|
+
// returns for the only two extras we know how to drop and retry.
|
|
191
|
+
const callStream = (body, stripped = []) => streamText({
|
|
192
|
+
model,
|
|
193
|
+
messages,
|
|
194
|
+
temperature: options.temperature,
|
|
195
|
+
maxOutputTokens: options.maxTokens,
|
|
196
|
+
tools,
|
|
197
|
+
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
|
|
198
|
+
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
|
|
199
|
+
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
200
|
+
providerOptions: (() => {
|
|
201
|
+
// StreamOptions doesn't formally type providerOptions but the
|
|
202
|
+
// upstream Vercel AI SDK accepts it. Read it via an indexed access
|
|
203
|
+
// and merge with NIM extras instead of overwriting any per-call
|
|
204
|
+
// openai.body.
|
|
205
|
+
const callerBase = options
|
|
206
|
+
.providerOptions ?? {};
|
|
207
|
+
const callerOpenai = callerBase.openai ?? {};
|
|
208
|
+
const callerBody = callerOpenai.body ?? {};
|
|
209
|
+
// Per-call overrides win over env/NIM defaults — defaults first,
|
|
210
|
+
// overrides last. chat_template_kwargs is merged shallowly too so
|
|
211
|
+
// a request that only sets `reasoning_budget` doesn't drop the
|
|
212
|
+
// env-driven `thinking: true` flag (and vice versa).
|
|
213
|
+
const defaultsBody = body;
|
|
214
|
+
const mergedBody = {
|
|
215
|
+
...defaultsBody,
|
|
216
|
+
...callerBody,
|
|
217
|
+
};
|
|
218
|
+
const mergedKwargs = {
|
|
219
|
+
...(defaultsBody.chat_template_kwargs ?? {}),
|
|
220
|
+
...(callerBody.chat_template_kwargs ?? {}),
|
|
221
|
+
};
|
|
222
|
+
// Apply retry-strip AFTER merging so caller-supplied copies of
|
|
223
|
+
// the offending field are also dropped (otherwise the retry would
|
|
224
|
+
// re-send the field that NIM just rejected).
|
|
225
|
+
if (stripped.includes("chat_template")) {
|
|
226
|
+
delete mergedBody.chat_template;
|
|
227
|
+
}
|
|
228
|
+
if (stripped.includes("reasoning_budget")) {
|
|
229
|
+
delete mergedKwargs.reasoning_budget;
|
|
230
|
+
}
|
|
231
|
+
if (Object.keys(mergedKwargs).length > 0) {
|
|
232
|
+
mergedBody.chat_template_kwargs = mergedKwargs;
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
delete mergedBody.chat_template_kwargs;
|
|
236
|
+
}
|
|
237
|
+
if (Object.keys(callerBase).length === 0 &&
|
|
238
|
+
Object.keys(mergedBody).length === 0) {
|
|
239
|
+
return undefined;
|
|
240
|
+
}
|
|
241
|
+
return {
|
|
242
|
+
...callerBase,
|
|
243
|
+
openai: {
|
|
244
|
+
...callerOpenai,
|
|
245
|
+
body: mergedBody,
|
|
246
|
+
},
|
|
247
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
248
|
+
};
|
|
249
|
+
})(),
|
|
250
|
+
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
251
|
+
experimental_repairToolCall: this.getToolCallRepairFn(options),
|
|
252
|
+
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
253
|
+
emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
|
|
254
|
+
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
|
|
255
|
+
logger.warn("[NvidiaNimProvider] Failed to store tool executions", {
|
|
256
|
+
provider: this.providerName,
|
|
257
|
+
error: error instanceof Error ? error.message : String(error),
|
|
258
|
+
});
|
|
259
|
+
});
|
|
260
|
+
},
|
|
261
|
+
});
|
|
262
|
+
let result;
|
|
263
|
+
try {
|
|
264
|
+
result = await callStream(extraBody);
|
|
265
|
+
}
|
|
266
|
+
catch (error) {
|
|
267
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
268
|
+
const status = error?.statusCode;
|
|
269
|
+
if (status === 400) {
|
|
270
|
+
const lower = errMsg.toLowerCase();
|
|
271
|
+
if (lower.includes("reasoning_budget")) {
|
|
272
|
+
logger.warn("NIM rejected reasoning_budget; retrying without it");
|
|
273
|
+
extraBody = stripReasoningBudget(extraBody);
|
|
274
|
+
result = await callStream(extraBody, ["reasoning_budget"]);
|
|
275
|
+
}
|
|
276
|
+
else if (lower.includes("chat_template")) {
|
|
277
|
+
logger.warn("NIM rejected chat_template; retrying without it");
|
|
278
|
+
extraBody = stripChatTemplate(extraBody);
|
|
279
|
+
result = await callStream(extraBody, ["chat_template"]);
|
|
280
|
+
}
|
|
281
|
+
else {
|
|
282
|
+
throw error;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
else {
|
|
286
|
+
throw error;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
timeoutController?.cleanup();
|
|
290
|
+
const transformedStream = this.createTextStream(result);
|
|
291
|
+
const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, toAnalyticsStreamResult(result), Date.now() - startTime, {
|
|
292
|
+
requestId: `nvidia-nim-stream-${Date.now()}`,
|
|
293
|
+
streamingMode: true,
|
|
294
|
+
});
|
|
295
|
+
return {
|
|
296
|
+
stream: transformedStream,
|
|
297
|
+
provider: this.providerName,
|
|
298
|
+
model: this.modelName,
|
|
299
|
+
analytics: analyticsPromise,
|
|
300
|
+
metadata: { startTime, streamId: `nvidia-nim-${Date.now()}` },
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
catch (error) {
|
|
304
|
+
timeoutController?.cleanup();
|
|
305
|
+
throw this.handleProviderError(error);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
getProviderName() {
|
|
309
|
+
return this.providerName;
|
|
310
|
+
}
|
|
311
|
+
getDefaultModel() {
|
|
312
|
+
return getDefaultNimModel();
|
|
313
|
+
}
|
|
314
|
+
getAISDKModel() {
|
|
315
|
+
return this.model;
|
|
316
|
+
}
|
|
317
|
+
formatProviderError(error) {
|
|
318
|
+
if (error instanceof TimeoutError) {
|
|
319
|
+
return new Error(`NVIDIA NIM request timed out: ${error.message}`);
|
|
320
|
+
}
|
|
321
|
+
const errorRecord = error;
|
|
322
|
+
const message = typeof errorRecord?.message === "string"
|
|
323
|
+
? errorRecord.message
|
|
324
|
+
: "Unknown error";
|
|
325
|
+
if (message.includes("Invalid API key") ||
|
|
326
|
+
message.includes("401") ||
|
|
327
|
+
message.includes("Unauthorized")) {
|
|
328
|
+
return new Error("Invalid NVIDIA NIM API key. Get one at https://build.nvidia.com/settings/api-keys");
|
|
329
|
+
}
|
|
330
|
+
if (message.includes("rate limit") || message.includes("429")) {
|
|
331
|
+
return new Error("NVIDIA NIM rate limit exceeded");
|
|
332
|
+
}
|
|
333
|
+
if (message.includes("404") || message.includes("model_not_found")) {
|
|
334
|
+
return new Error(`NVIDIA NIM model '${this.modelName}' not available. Browse the catalog at https://build.nvidia.com/models`);
|
|
335
|
+
}
|
|
336
|
+
if (message.includes("quota") || message.includes("403")) {
|
|
337
|
+
return new Error("NVIDIA NIM quota exceeded for your account");
|
|
338
|
+
}
|
|
339
|
+
return new Error(`NVIDIA NIM error: ${message}`);
|
|
340
|
+
}
|
|
341
|
+
async validateConfiguration() {
|
|
342
|
+
return typeof this.apiKey === "string" && this.apiKey.trim().length > 0;
|
|
343
|
+
}
|
|
344
|
+
getConfiguration() {
|
|
345
|
+
return {
|
|
346
|
+
provider: this.providerName,
|
|
347
|
+
model: this.modelName,
|
|
348
|
+
defaultModel: getDefaultNimModel(),
|
|
349
|
+
baseURL: this.baseURL,
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
export default NvidiaNimProvider;
|
|
@@ -8,6 +8,15 @@
|
|
|
8
8
|
* Supports HTTP/HTTPS, SOCKS4/5, authentication, and NO_PROXY bypass
|
|
9
9
|
*/
|
|
10
10
|
export declare function createProxyFetch(): typeof fetch;
|
|
11
|
+
/**
|
|
12
|
+
* Mask credentials in a proxy URL for safe logging/reporting.
|
|
13
|
+
*
|
|
14
|
+
* Exported so provider-side fetch loggers (lmStudio, llamaCpp, deepseek,
|
|
15
|
+
* nvidiaNim) can sanitize upstream URLs before emitting warnings — reverse-
|
|
16
|
+
* proxied deployments can embed credentials or signed query params in the
|
|
17
|
+
* base URL, and those should never reach application logs verbatim.
|
|
18
|
+
*/
|
|
19
|
+
export declare function maskProxyUrl(url: string | null | undefined): string | null;
|
|
11
20
|
/**
|
|
12
21
|
* Get enhanced proxy status information
|
|
13
22
|
*/
|
package/dist/proxy/proxyFetch.js
CHANGED
|
@@ -614,8 +614,13 @@ export function createProxyFetch() {
|
|
|
614
614
|
}
|
|
615
615
|
/**
|
|
616
616
|
* Mask credentials in a proxy URL for safe logging/reporting.
|
|
617
|
+
*
|
|
618
|
+
* Exported so provider-side fetch loggers (lmStudio, llamaCpp, deepseek,
|
|
619
|
+
* nvidiaNim) can sanitize upstream URLs before emitting warnings — reverse-
|
|
620
|
+
* proxied deployments can embed credentials or signed query params in the
|
|
621
|
+
* base URL, and those should never reach application logs verbatim.
|
|
617
622
|
*/
|
|
618
|
-
function maskProxyUrl(url) {
|
|
623
|
+
export function maskProxyUrl(url) {
|
|
619
624
|
if (!url) {
|
|
620
625
|
return null;
|
|
621
626
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import type { UnknownRecord, JsonValue, StreamingCapability } from "./common.js";
|
|
5
5
|
import type { ProviderError } from "./errors.js";
|
|
6
|
-
import { AIProviderName, AnthropicModels, BedrockModels, GoogleAIModels, OpenAIModels, VertexModels } from "../constants/enums.js";
|
|
6
|
+
import { AIProviderName, AnthropicModels, BedrockModels, DeepSeekModels, GoogleAIModels, LlamaCppModels, LMStudioModels, NvidiaNimModels, OpenAIModels, VertexModels } from "../constants/enums.js";
|
|
7
7
|
import type { Tool } from "ai";
|
|
8
8
|
import type { ValidationSchema } from "./aliases.js";
|
|
9
9
|
import type { EnhancedGenerateResult, GenerateResult, TextGenerationOptions } from "./generate.js";
|
|
@@ -20,7 +20,7 @@ export type AISDKModel = {
|
|
|
20
20
|
/**
|
|
21
21
|
* Union type of all supported model names
|
|
22
22
|
*/
|
|
23
|
-
export type SupportedModelName = BedrockModels | OpenAIModels | VertexModels | GoogleAIModels | AnthropicModels;
|
|
23
|
+
export type SupportedModelName = BedrockModels | DeepSeekModels | OpenAIModels | VertexModels | GoogleAIModels | AnthropicModels | NvidiaNimModels | LMStudioModels | LlamaCppModels;
|
|
24
24
|
/**
|
|
25
25
|
* Extract provider names from enum
|
|
26
26
|
*/
|
|
@@ -147,6 +147,40 @@ export type NeurolinkCredentials = {
|
|
|
147
147
|
ollama?: {
|
|
148
148
|
baseURL?: string;
|
|
149
149
|
};
|
|
150
|
+
deepseek?: {
|
|
151
|
+
apiKey?: string;
|
|
152
|
+
baseURL?: string;
|
|
153
|
+
};
|
|
154
|
+
nvidiaNim?: {
|
|
155
|
+
apiKey?: string;
|
|
156
|
+
baseURL?: string;
|
|
157
|
+
};
|
|
158
|
+
lmStudio?: {
|
|
159
|
+
apiKey?: string;
|
|
160
|
+
baseURL?: string;
|
|
161
|
+
};
|
|
162
|
+
llamacpp?: {
|
|
163
|
+
apiKey?: string;
|
|
164
|
+
baseURL?: string;
|
|
165
|
+
};
|
|
166
|
+
};
|
|
167
|
+
/**
|
|
168
|
+
* NVIDIA NIM extra request body parameters passed via `providerOptions.openai.body`.
|
|
169
|
+
* Lives here (not in providers/nvidiaNim.ts) per CLAUDE.md rule 2.
|
|
170
|
+
*/
|
|
171
|
+
export type NvidiaNimExtraBody = {
|
|
172
|
+
top_k?: number;
|
|
173
|
+
min_p?: number;
|
|
174
|
+
repetition_penalty?: number;
|
|
175
|
+
min_tokens?: number;
|
|
176
|
+
chat_template?: string;
|
|
177
|
+
request_id?: string;
|
|
178
|
+
ignore_eos?: boolean;
|
|
179
|
+
chat_template_kwargs?: {
|
|
180
|
+
thinking?: boolean;
|
|
181
|
+
enable_thinking?: boolean;
|
|
182
|
+
reasoning_budget?: number;
|
|
183
|
+
};
|
|
150
184
|
};
|
|
151
185
|
/**
|
|
152
186
|
* AWS Credential Validation Result
|
|
@@ -398,6 +432,7 @@ export type ProviderConfigOptions = {
|
|
|
398
432
|
description: string;
|
|
399
433
|
instructions: string[];
|
|
400
434
|
fallbackEnvVars?: string[];
|
|
435
|
+
optional?: boolean;
|
|
401
436
|
};
|
|
402
437
|
/**
|
|
403
438
|
* AI Provider type with flexible parameter support
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Centralized model choices for CLI commands
|
|
3
3
|
* Derives choices from model enums to ensure consistency
|
|
4
4
|
*/
|
|
5
|
-
import { AIProviderName, OpenAIModels, AnthropicModels, GoogleAIModels, BedrockModels, VertexModels, MistralModels, OllamaModels, AzureOpenAIModels, LiteLLMModels, HuggingFaceModels, SageMakerModels, OpenRouterModels, } from "../constants/enums.js";
|
|
5
|
+
import { AIProviderName, OpenAIModels, AnthropicModels, GoogleAIModels, BedrockModels, VertexModels, MistralModels, OllamaModels, AzureOpenAIModels, LiteLLMModels, HuggingFaceModels, SageMakerModels, OpenRouterModels, DeepSeekModels, NvidiaNimModels, } from "../constants/enums.js";
|
|
6
6
|
/**
|
|
7
7
|
* Top models per provider with descriptions for CLI prompts
|
|
8
8
|
* These are curated lists of the most commonly used/recommended models
|
|
@@ -230,6 +230,47 @@ const TOP_MODELS_CONFIG = {
|
|
|
230
230
|
{ model: "gpt-4-turbo", description: "Turbo compatible model" },
|
|
231
231
|
{ model: "gpt-3.5-turbo", description: "Legacy compatible model" },
|
|
232
232
|
],
|
|
233
|
+
[AIProviderName.DEEPSEEK]: [
|
|
234
|
+
{ model: "deepseek-chat", description: "DeepSeek-V3 general chat" },
|
|
235
|
+
{
|
|
236
|
+
model: "deepseek-reasoner",
|
|
237
|
+
description: "DeepSeek-R1 reasoning (slower, deeper)",
|
|
238
|
+
},
|
|
239
|
+
],
|
|
240
|
+
[AIProviderName.NVIDIA_NIM]: [
|
|
241
|
+
{
|
|
242
|
+
model: "meta/llama-3.3-70b-instruct",
|
|
243
|
+
description: "Recommended - Llama 3.3 70B",
|
|
244
|
+
},
|
|
245
|
+
{
|
|
246
|
+
model: "nvidia/llama-3.3-nemotron-super-49b-v1",
|
|
247
|
+
description: "Nemotron Super (reasoning)",
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
model: "deepseek-ai/deepseek-r1",
|
|
251
|
+
description: "DeepSeek-R1 hosted on NIM",
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
model: "meta/llama-3.2-90b-vision-instruct",
|
|
255
|
+
description: "Llama 3.2 vision",
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
model: "mistralai/mixtral-8x22b-instruct-v0.1",
|
|
259
|
+
description: "Mixtral 8x22B",
|
|
260
|
+
},
|
|
261
|
+
],
|
|
262
|
+
[AIProviderName.LM_STUDIO]: [
|
|
263
|
+
{
|
|
264
|
+
model: "",
|
|
265
|
+
description: "Auto-discover loaded model from /v1/models",
|
|
266
|
+
},
|
|
267
|
+
],
|
|
268
|
+
[AIProviderName.LLAMACPP]: [
|
|
269
|
+
{
|
|
270
|
+
model: "",
|
|
271
|
+
description: "Use whatever model llama-server has loaded",
|
|
272
|
+
},
|
|
273
|
+
],
|
|
233
274
|
[AIProviderName.AUTO]: [],
|
|
234
275
|
};
|
|
235
276
|
/**
|
|
@@ -249,6 +290,12 @@ export const DEFAULT_MODELS = {
|
|
|
249
290
|
[AIProviderName.SAGEMAKER]: SageMakerModels.LLAMA_4_MAVERICK_17B_128E,
|
|
250
291
|
[AIProviderName.OPENROUTER]: OpenRouterModels.CLAUDE_3_5_SONNET,
|
|
251
292
|
[AIProviderName.OPENAI_COMPATIBLE]: "gpt-4o",
|
|
293
|
+
[AIProviderName.DEEPSEEK]: DeepSeekModels.DEEPSEEK_CHAT,
|
|
294
|
+
[AIProviderName.NVIDIA_NIM]: NvidiaNimModels.LLAMA_3_3_70B_INSTRUCT,
|
|
295
|
+
// LM Studio + llama.cpp auto-discover their loaded model from /v1/models;
|
|
296
|
+
// an empty default is the documented signal to use that path.
|
|
297
|
+
[AIProviderName.LM_STUDIO]: "",
|
|
298
|
+
[AIProviderName.LLAMACPP]: "",
|
|
252
299
|
};
|
|
253
300
|
/**
|
|
254
301
|
* Model enum mappings for getAllModels
|
|
@@ -267,6 +314,10 @@ const MODEL_ENUMS = {
|
|
|
267
314
|
[AIProviderName.SAGEMAKER]: SageMakerModels,
|
|
268
315
|
[AIProviderName.OPENROUTER]: OpenRouterModels,
|
|
269
316
|
[AIProviderName.OPENAI_COMPATIBLE]: null,
|
|
317
|
+
[AIProviderName.DEEPSEEK]: DeepSeekModels,
|
|
318
|
+
[AIProviderName.NVIDIA_NIM]: NvidiaNimModels,
|
|
319
|
+
[AIProviderName.LM_STUDIO]: null,
|
|
320
|
+
[AIProviderName.LLAMACPP]: null,
|
|
270
321
|
[AIProviderName.AUTO]: null,
|
|
271
322
|
};
|
|
272
323
|
/**
|
|
@@ -283,7 +334,14 @@ export function getTopModelChoices(provider, limit = 5) {
|
|
|
283
334
|
return [];
|
|
284
335
|
}
|
|
285
336
|
const choices = config.slice(0, limit).map((item) => ({
|
|
286
|
-
|
|
337
|
+
// Empty-string entries are auto-discovery sentinels for LM Studio /
|
|
338
|
+
// llama.cpp. Surface them with a friendly label so the CLI doesn't show a
|
|
339
|
+
// blank row, but keep `value: ""` so it matches `DEFAULT_MODELS` (which
|
|
340
|
+
// also uses `""`) and any caller that preselects the active choice via
|
|
341
|
+
// the default model still resolves to this entry.
|
|
342
|
+
name: item.model.length > 0
|
|
343
|
+
? `${item.model} (${item.description})`
|
|
344
|
+
: `Auto-discover loaded model (${item.description})`,
|
|
287
345
|
value: item.model,
|
|
288
346
|
description: item.description,
|
|
289
347
|
}));
|
|
@@ -387,8 +445,14 @@ export function getPopularModelsAcrossProviders() {
|
|
|
387
445
|
const popularModels = [];
|
|
388
446
|
for (const [provider, config] of Object.entries(TOP_MODELS_CONFIG)) {
|
|
389
447
|
if (config && config.length > 0) {
|
|
390
|
-
// Take top 2 from each provider
|
|
391
|
-
|
|
448
|
+
// Take top 2 from each provider, ignoring blank auto-discovery sentinels.
|
|
449
|
+
// (Auto-discovery is surfaced separately by `getTopModelChoices` for
|
|
450
|
+
// LM Studio / llama.cpp; we don't want it to appear in the cross-
|
|
451
|
+
// provider popular-models list as an empty value.)
|
|
452
|
+
config
|
|
453
|
+
.filter((item) => item.model.length > 0)
|
|
454
|
+
.slice(0, 2)
|
|
455
|
+
.forEach((item) => {
|
|
392
456
|
popularModels.push({
|
|
393
457
|
provider: provider,
|
|
394
458
|
model: item.model,
|
package/dist/utils/pricing.d.ts
CHANGED
|
@@ -8,5 +8,10 @@ export declare function calculateCost(provider: string, model: string, usage: To
|
|
|
8
8
|
* Check if pricing is available for a provider/model combination.
|
|
9
9
|
* Checks the rate table directly instead of computing a cost,
|
|
10
10
|
* so even very cheap models (e.g. gemini-1.5-flash) are detected correctly.
|
|
11
|
+
*
|
|
12
|
+
* Zero-rate entries (the local-provider `_default` for lm-studio / llamacpp)
|
|
13
|
+
* count as "no pricing" — those providers explicitly don't have an upstream
|
|
14
|
+
* USD price, and any caller gated by `hasPricing()` should treat them as
|
|
15
|
+
* non-billable rather than zero-cost-billable.
|
|
11
16
|
*/
|
|
12
17
|
export declare function hasPricing(provider: string, model: string): boolean;
|