@juspay/neurolink 9.59.5 → 9.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +11 -7
- package/dist/adapters/providerImageAdapter.js +52 -2
- package/dist/browser/neurolink.min.js +352 -352
- package/dist/cli/factories/commandFactory.js +15 -1
- package/dist/cli/utils/interactiveSetup.js +64 -0
- package/dist/constants/contextWindows.d.ts +5 -1
- package/dist/constants/contextWindows.js +67 -3
- package/dist/constants/enums.d.ts +52 -0
- package/dist/constants/enums.js +63 -0
- package/dist/core/baseProvider.d.ts +15 -6
- package/dist/core/baseProvider.js +28 -0
- package/dist/factories/providerRegistry.js +25 -1
- package/dist/lib/adapters/providerImageAdapter.js +52 -2
- package/dist/lib/constants/contextWindows.d.ts +5 -1
- package/dist/lib/constants/contextWindows.js +67 -3
- package/dist/lib/constants/enums.d.ts +52 -0
- package/dist/lib/constants/enums.js +63 -0
- package/dist/lib/core/baseProvider.d.ts +15 -6
- package/dist/lib/core/baseProvider.js +28 -0
- package/dist/lib/factories/providerRegistry.js +25 -1
- package/dist/lib/neurolink.js +1 -1
- package/dist/lib/providers/deepseek.d.ts +29 -0
- package/dist/lib/providers/deepseek.js +216 -0
- package/dist/lib/providers/index.d.ts +4 -0
- package/dist/lib/providers/index.js +4 -0
- package/dist/lib/providers/llamaCpp.d.ts +34 -0
- package/dist/lib/providers/llamaCpp.js +315 -0
- package/dist/lib/providers/lmStudio.d.ts +34 -0
- package/dist/lib/providers/lmStudio.js +306 -0
- package/dist/lib/providers/nvidiaNim.d.ts +31 -0
- package/dist/lib/providers/nvidiaNim.js +354 -0
- package/dist/lib/proxy/proxyFetch.d.ts +9 -0
- package/dist/lib/proxy/proxyFetch.js +6 -1
- package/dist/lib/types/providers.d.ts +37 -2
- package/dist/lib/types/providers.js +1 -1
- package/dist/lib/utils/modelChoices.js +68 -4
- package/dist/lib/utils/pricing.d.ts +5 -0
- package/dist/lib/utils/pricing.js +94 -3
- package/dist/lib/utils/providerConfig.d.ts +16 -0
- package/dist/lib/utils/providerConfig.js +82 -0
- package/dist/neurolink.js +1 -1
- package/dist/providers/deepseek.d.ts +29 -0
- package/dist/providers/deepseek.js +215 -0
- package/dist/providers/index.d.ts +4 -0
- package/dist/providers/index.js +4 -0
- package/dist/providers/llamaCpp.d.ts +34 -0
- package/dist/providers/llamaCpp.js +314 -0
- package/dist/providers/lmStudio.d.ts +34 -0
- package/dist/providers/lmStudio.js +305 -0
- package/dist/providers/nvidiaNim.d.ts +31 -0
- package/dist/providers/nvidiaNim.js +353 -0
- package/dist/proxy/proxyFetch.d.ts +9 -0
- package/dist/proxy/proxyFetch.js +6 -1
- package/dist/types/providers.d.ts +37 -2
- package/dist/utils/modelChoices.js +68 -4
- package/dist/utils/pricing.d.ts +5 -0
- package/dist/utils/pricing.js +94 -3
- package/dist/utils/providerConfig.d.ts +16 -0
- package/dist/utils/providerConfig.js +82 -0
- package/package.json +19 -12
|
@@ -51,6 +51,16 @@ export class CLICommandFactory {
|
|
|
51
51
|
"mistral",
|
|
52
52
|
"litellm",
|
|
53
53
|
"sagemaker",
|
|
54
|
+
"deepseek",
|
|
55
|
+
"ds",
|
|
56
|
+
"nvidia-nim",
|
|
57
|
+
"nim",
|
|
58
|
+
"nvidia",
|
|
59
|
+
"lm-studio",
|
|
60
|
+
"lmstudio",
|
|
61
|
+
"lms",
|
|
62
|
+
"llamacpp",
|
|
63
|
+
"llama.cpp",
|
|
54
64
|
],
|
|
55
65
|
default: "auto",
|
|
56
66
|
description: "AI provider to use (auto-selects best available). Use 'anthropic-subscription' for Claude subscription plans.",
|
|
@@ -1322,6 +1332,10 @@ export class CLICommandFactory {
|
|
|
1322
1332
|
"vertex",
|
|
1323
1333
|
"huggingface",
|
|
1324
1334
|
"mistral",
|
|
1335
|
+
"deepseek",
|
|
1336
|
+
"nvidia-nim",
|
|
1337
|
+
"lm-studio",
|
|
1338
|
+
"llamacpp",
|
|
1325
1339
|
],
|
|
1326
1340
|
})
|
|
1327
1341
|
.option("list", {
|
|
@@ -2877,7 +2891,7 @@ export class CLICommandFactory {
|
|
|
2877
2891
|
" generate|gen)\n" +
|
|
2878
2892
|
' case "${prev}" in\n' +
|
|
2879
2893
|
" --provider|-p)\n" +
|
|
2880
|
-
' COMPREPLY=( $(compgen -W "auto openai bedrock vertex googleVertex anthropic azure google-ai huggingface ollama mistral litellm" -- ${cur}) )\n' +
|
|
2894
|
+
' COMPREPLY=( $(compgen -W "auto openai openai-compatible openrouter or bedrock vertex googleVertex anthropic anthropic-subscription azure google-ai google-ai-studio huggingface ollama mistral litellm sagemaker deepseek ds nvidia-nim nim lm-studio lmstudio llamacpp llama.cpp" -- ${cur}) )\n' +
|
|
2881
2895
|
" return 0\n" +
|
|
2882
2896
|
" ;;\n" +
|
|
2883
2897
|
" --format|-f|--output-format)\n" +
|
|
@@ -204,6 +204,70 @@ export const PROVIDER_CONFIGS = [
|
|
|
204
204
|
},
|
|
205
205
|
],
|
|
206
206
|
},
|
|
207
|
+
{
|
|
208
|
+
id: AIProviderName.DEEPSEEK,
|
|
209
|
+
name: "DeepSeek",
|
|
210
|
+
description: "Cost-efficient frontier models (deepseek-chat V3, deepseek-reasoner R1)",
|
|
211
|
+
envVars: [
|
|
212
|
+
{
|
|
213
|
+
key: "DEEPSEEK_API_KEY",
|
|
214
|
+
prompt: "DeepSeek API Key (get one at https://platform.deepseek.com)",
|
|
215
|
+
secure: true,
|
|
216
|
+
},
|
|
217
|
+
],
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
id: AIProviderName.NVIDIA_NIM,
|
|
221
|
+
name: "NVIDIA NIM",
|
|
222
|
+
description: "NVIDIA-hosted Llama, Nemotron, Mistral, and DeepSeek-R1 models",
|
|
223
|
+
envVars: [
|
|
224
|
+
{
|
|
225
|
+
key: "NVIDIA_NIM_API_KEY",
|
|
226
|
+
prompt: "NVIDIA NIM API Key (get one at https://build.nvidia.com/settings/api-keys)",
|
|
227
|
+
secure: true,
|
|
228
|
+
},
|
|
229
|
+
],
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
id: AIProviderName.LM_STUDIO,
|
|
233
|
+
name: "LM Studio",
|
|
234
|
+
description: "Local inference via LM Studio desktop app (https://lmstudio.ai)",
|
|
235
|
+
envVars: [
|
|
236
|
+
{
|
|
237
|
+
key: "LM_STUDIO_BASE_URL",
|
|
238
|
+
prompt: "LM Studio server URL",
|
|
239
|
+
default: "http://localhost:1234/v1",
|
|
240
|
+
secure: false,
|
|
241
|
+
optional: true,
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
key: "LM_STUDIO_API_KEY",
|
|
245
|
+
prompt: "LM Studio API Key (leave blank — only needed behind an auth proxy)",
|
|
246
|
+
secure: false,
|
|
247
|
+
optional: true,
|
|
248
|
+
},
|
|
249
|
+
],
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
id: AIProviderName.LLAMACPP,
|
|
253
|
+
name: "llama.cpp",
|
|
254
|
+
description: "Local inference via llama-server (https://github.com/ggerganov/llama.cpp). Start with: ./llama-server -m model.gguf --port 8080 --jinja",
|
|
255
|
+
envVars: [
|
|
256
|
+
{
|
|
257
|
+
key: "LLAMACPP_BASE_URL",
|
|
258
|
+
prompt: "llama-server URL",
|
|
259
|
+
default: "http://localhost:8080/v1",
|
|
260
|
+
secure: false,
|
|
261
|
+
optional: true,
|
|
262
|
+
},
|
|
263
|
+
{
|
|
264
|
+
key: "LLAMACPP_API_KEY",
|
|
265
|
+
prompt: "llama-server API Key (leave blank — only needed behind an auth proxy)",
|
|
266
|
+
secure: false,
|
|
267
|
+
optional: true,
|
|
268
|
+
},
|
|
269
|
+
],
|
|
270
|
+
},
|
|
207
271
|
];
|
|
208
272
|
/**
|
|
209
273
|
* Run the interactive setup wizard
|
|
@@ -36,9 +36,13 @@ export declare function getContextWindowSize(provider: string, model?: string):
|
|
|
36
36
|
/**
|
|
37
37
|
* Calculate output token reserve for a given context window.
|
|
38
38
|
*
|
|
39
|
+
* Returns the *real* token count that will be reserved for output so callers
|
|
40
|
+
* (`getAvailableInputTokens`, `BudgetChecker`, conversation-memory pruning, file
|
|
41
|
+
* summarisation) compute input budget against the actual outgoing maxTokens.
|
|
42
|
+
*
|
|
39
43
|
* @param contextWindow - Total context window size
|
|
40
44
|
* @param maxTokens - Explicit maxTokens from user config (if set)
|
|
41
|
-
* @returns Number of tokens reserved for output
|
|
45
|
+
* @returns Number of tokens reserved for output (matches what's sent upstream)
|
|
42
46
|
*/
|
|
43
47
|
export declare function getOutputReserve(contextWindow: number, maxTokens?: number): number;
|
|
44
48
|
/**
|
|
@@ -23,6 +23,34 @@ export const DEFAULT_OUTPUT_RESERVE_RATIO = 0.35;
|
|
|
23
23
|
* The "_default" key is the fallback for unknown models within a provider.
|
|
24
24
|
*/
|
|
25
25
|
export const MODEL_CONTEXT_WINDOWS = {
|
|
26
|
+
deepseek: {
|
|
27
|
+
_default: 64_000,
|
|
28
|
+
"deepseek-chat": 64_000,
|
|
29
|
+
"deepseek-reasoner": 64_000,
|
|
30
|
+
},
|
|
31
|
+
"nvidia-nim": {
|
|
32
|
+
_default: 128_000,
|
|
33
|
+
"meta/llama-3.3-70b-instruct": 128_000,
|
|
34
|
+
"meta/llama-3.1-405b-instruct": 128_000,
|
|
35
|
+
"meta/llama-3.1-70b-instruct": 128_000,
|
|
36
|
+
"meta/llama-3.2-90b-vision-instruct": 128_000,
|
|
37
|
+
"meta/llama-3.2-11b-vision-instruct": 128_000,
|
|
38
|
+
"nvidia/llama-3.3-nemotron-super-49b-v1": 128_000,
|
|
39
|
+
"nvidia/llama-3.1-nemotron-nano-8b-v1": 128_000,
|
|
40
|
+
"nvidia/llama-3.1-nemotron-70b-instruct": 128_000,
|
|
41
|
+
"deepseek-ai/deepseek-r1": 128_000,
|
|
42
|
+
"deepseek-ai/deepseek-r1-distill-llama-70b": 128_000,
|
|
43
|
+
"mistralai/mixtral-8x22b-instruct-v0.1": 65_536,
|
|
44
|
+
"mistralai/mixtral-8x7b-instruct-v0.1": 32_768,
|
|
45
|
+
"microsoft/phi-4": 16_384,
|
|
46
|
+
"google/gemma-3-27b-it": 8_192,
|
|
47
|
+
},
|
|
48
|
+
"lm-studio": {
|
|
49
|
+
_default: 8_192,
|
|
50
|
+
},
|
|
51
|
+
llamacpp: {
|
|
52
|
+
_default: 8_192,
|
|
53
|
+
},
|
|
26
54
|
anthropic: {
|
|
27
55
|
_default: 200_000,
|
|
28
56
|
// Claude 4.6 (Feb 2026) — 1M context window
|
|
@@ -223,6 +251,36 @@ export const MODEL_CONTEXT_WINDOWS = {
|
|
|
223
251
|
"qwen3-vl-8b-instruct": 32_768,
|
|
224
252
|
},
|
|
225
253
|
};
|
|
254
|
+
/**
|
|
255
|
+
* Map of provider aliases to canonical MODEL_CONTEXT_WINDOWS keys.
|
|
256
|
+
*
|
|
257
|
+
* Callers reach `getContextWindowSize` via the unnormalized form on
|
|
258
|
+
* `options.provider` (e.g. CLI `--provider lmstudio`, alias `llama.cpp`),
|
|
259
|
+
* and `ProviderFactory.normalizeProviderName` runs only at instantiation —
|
|
260
|
+
* its output never reaches budget calculations. Without this normalization
|
|
261
|
+
* those alias forms miss the table and fall back to `DEFAULT_CONTEXT_WINDOW`,
|
|
262
|
+
* understating the budget for LM Studio / llama.cpp / NVIDIA NIM.
|
|
263
|
+
*
|
|
264
|
+
* The keys here are the result of stripping non-alpha characters, so
|
|
265
|
+
* `lm-studio` -> `lmstudio`, `nvidia-nim` -> `nvidianim`, `llama.cpp` -> `llamacpp`.
|
|
266
|
+
*/
|
|
267
|
+
const PROVIDER_ALIAS_MAP = {
|
|
268
|
+
googleaistudio: "google-ai-studio",
|
|
269
|
+
lmstudio: "lm-studio",
|
|
270
|
+
llamacpp: "llamacpp",
|
|
271
|
+
nvidianim: "nvidia-nim",
|
|
272
|
+
nim: "nvidia-nim",
|
|
273
|
+
nvidia: "nvidia-nim",
|
|
274
|
+
deepseek: "deepseek",
|
|
275
|
+
};
|
|
276
|
+
function normalizeProviderForLookup(provider) {
|
|
277
|
+
const stripped = provider.toLowerCase().replace(/[^a-z]/g, "");
|
|
278
|
+
// On alias miss, return the *stripped* key — not the raw input — so case /
|
|
279
|
+
// separator variants ("OpenAI", "open-ai", "Vertex AI") still find their
|
|
280
|
+
// table entry under the lowercase canonical key instead of falling through
|
|
281
|
+
// to DEFAULT_CONTEXT_WINDOW.
|
|
282
|
+
return PROVIDER_ALIAS_MAP[stripped] ?? stripped;
|
|
283
|
+
}
|
|
226
284
|
/**
|
|
227
285
|
* Resolve context window size for a provider/model combination.
|
|
228
286
|
*
|
|
@@ -252,8 +310,10 @@ export function getContextWindowSize(provider, model) {
|
|
|
252
310
|
// Dynamic registry not initialized yet — fall through to static lookup
|
|
253
311
|
}
|
|
254
312
|
}
|
|
255
|
-
// Static fallback chain
|
|
256
|
-
|
|
313
|
+
// Static fallback chain — normalize aliases first so "lmstudio" / "llama.cpp" /
|
|
314
|
+
// "nvidianim" find their canonical entries instead of falling back to default.
|
|
315
|
+
const canonical = normalizeProviderForLookup(provider);
|
|
316
|
+
const providerWindows = MODEL_CONTEXT_WINDOWS[canonical] ?? MODEL_CONTEXT_WINDOWS[provider];
|
|
257
317
|
if (!providerWindows) {
|
|
258
318
|
return DEFAULT_CONTEXT_WINDOW;
|
|
259
319
|
}
|
|
@@ -273,9 +333,13 @@ export function getContextWindowSize(provider, model) {
|
|
|
273
333
|
/**
|
|
274
334
|
* Calculate output token reserve for a given context window.
|
|
275
335
|
*
|
|
336
|
+
* Returns the *real* token count that will be reserved for output so callers
|
|
337
|
+
* (`getAvailableInputTokens`, `BudgetChecker`, conversation-memory pruning, file
|
|
338
|
+
* summarisation) compute input budget against the actual outgoing maxTokens.
|
|
339
|
+
*
|
|
276
340
|
* @param contextWindow - Total context window size
|
|
277
341
|
* @param maxTokens - Explicit maxTokens from user config (if set)
|
|
278
|
-
* @returns Number of tokens reserved for output
|
|
342
|
+
* @returns Number of tokens reserved for output (matches what's sent upstream)
|
|
279
343
|
*/
|
|
280
344
|
export function getOutputReserve(contextWindow, maxTokens) {
|
|
281
345
|
if (maxTokens !== undefined && maxTokens > 0) {
|
|
@@ -15,6 +15,10 @@ export declare enum AIProviderName {
|
|
|
15
15
|
MISTRAL = "mistral",
|
|
16
16
|
LITELLM = "litellm",
|
|
17
17
|
SAGEMAKER = "sagemaker",
|
|
18
|
+
DEEPSEEK = "deepseek",
|
|
19
|
+
NVIDIA_NIM = "nvidia-nim",
|
|
20
|
+
LM_STUDIO = "lm-studio",
|
|
21
|
+
LLAMACPP = "llamacpp",
|
|
18
22
|
AUTO = "auto"
|
|
19
23
|
}
|
|
20
24
|
/**
|
|
@@ -622,6 +626,54 @@ export declare enum AnthropicBetaFeature {
|
|
|
622
626
|
INTERLEAVED_THINKING = "interleaved-thinking-2025-05-14",
|
|
623
627
|
FINE_GRAINED_STREAMING = "fine-grained-tool-streaming-2025-05-14"
|
|
624
628
|
}
|
|
629
|
+
/**
|
|
630
|
+
* Supported Models for DeepSeek
|
|
631
|
+
* Docs: https://api-docs.deepseek.com/quick_start/pricing
|
|
632
|
+
*/
|
|
633
|
+
export declare enum DeepSeekModels {
|
|
634
|
+
/** General-purpose chat (DeepSeek-V3) */
|
|
635
|
+
DEEPSEEK_CHAT = "deepseek-chat",
|
|
636
|
+
/** Reasoning model (DeepSeek-R1) — exposes reasoning_content */
|
|
637
|
+
DEEPSEEK_REASONER = "deepseek-reasoner"
|
|
638
|
+
}
|
|
639
|
+
/**
|
|
640
|
+
* Selected NVIDIA NIM Models
|
|
641
|
+
* Full catalog: https://build.nvidia.com/models
|
|
642
|
+
* Note: NIM hosts hundreds of models; pass arbitrary IDs via --model.
|
|
643
|
+
*/
|
|
644
|
+
export declare enum NvidiaNimModels {
|
|
645
|
+
LLAMA_3_3_70B_INSTRUCT = "meta/llama-3.3-70b-instruct",
|
|
646
|
+
LLAMA_3_1_405B_INSTRUCT = "meta/llama-3.1-405b-instruct",
|
|
647
|
+
LLAMA_3_1_70B_INSTRUCT = "meta/llama-3.1-70b-instruct",
|
|
648
|
+
LLAMA_3_2_90B_VISION = "meta/llama-3.2-90b-vision-instruct",
|
|
649
|
+
LLAMA_3_2_11B_VISION = "meta/llama-3.2-11b-vision-instruct",
|
|
650
|
+
NEMOTRON_SUPER_49B = "nvidia/llama-3.3-nemotron-super-49b-v1",
|
|
651
|
+
NEMOTRON_NANO_8B = "nvidia/llama-3.1-nemotron-nano-8b-v1",
|
|
652
|
+
NEMOTRON_70B_INSTRUCT = "nvidia/llama-3.1-nemotron-70b-instruct",
|
|
653
|
+
DEEPSEEK_R1 = "deepseek-ai/deepseek-r1",
|
|
654
|
+
DEEPSEEK_R1_DISTILL_LLAMA_70B = "deepseek-ai/deepseek-r1-distill-llama-70b",
|
|
655
|
+
MIXTRAL_8X22B_INSTRUCT = "mistralai/mixtral-8x22b-instruct-v0.1",
|
|
656
|
+
MIXTRAL_8X7B_INSTRUCT = "mistralai/mixtral-8x7b-instruct-v0.1",
|
|
657
|
+
PHI_4 = "microsoft/phi-4",
|
|
658
|
+
GEMMA_3_27B_IT = "google/gemma-3-27b-it",
|
|
659
|
+
GLM_4_5 = "z-ai/glm4.5"
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* LM Studio loads any GGUF model the user has downloaded.
|
|
663
|
+
* Default: empty string → triggers /v1/models auto-discovery.
|
|
664
|
+
*/
|
|
665
|
+
export declare enum LMStudioModels {
|
|
666
|
+
/** Sentinel value — triggers auto-discovery from /v1/models */
|
|
667
|
+
AUTO_DISCOVER = ""
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* llama.cpp serves a single model loaded at server startup.
|
|
671
|
+
* Default: empty string → uses whatever is loaded.
|
|
672
|
+
*/
|
|
673
|
+
export declare enum LlamaCppModels {
|
|
674
|
+
/** Sentinel value — uses the model loaded by the llama-server process */
|
|
675
|
+
AUTO_DISCOVER = ""
|
|
676
|
+
}
|
|
625
677
|
/**
|
|
626
678
|
* Buffer time in milliseconds before token expiry to trigger refresh
|
|
627
679
|
*
|
package/dist/constants/enums.js
CHANGED
|
@@ -19,6 +19,10 @@ export var AIProviderName;
|
|
|
19
19
|
AIProviderName["MISTRAL"] = "mistral";
|
|
20
20
|
AIProviderName["LITELLM"] = "litellm";
|
|
21
21
|
AIProviderName["SAGEMAKER"] = "sagemaker";
|
|
22
|
+
AIProviderName["DEEPSEEK"] = "deepseek";
|
|
23
|
+
AIProviderName["NVIDIA_NIM"] = "nvidia-nim";
|
|
24
|
+
AIProviderName["LM_STUDIO"] = "lm-studio";
|
|
25
|
+
AIProviderName["LLAMACPP"] = "llamacpp";
|
|
22
26
|
AIProviderName["AUTO"] = "auto";
|
|
23
27
|
})(AIProviderName || (AIProviderName = {}));
|
|
24
28
|
/**
|
|
@@ -848,6 +852,65 @@ export var AnthropicBetaFeature;
|
|
|
848
852
|
AnthropicBetaFeature["INTERLEAVED_THINKING"] = "interleaved-thinking-2025-05-14";
|
|
849
853
|
AnthropicBetaFeature["FINE_GRAINED_STREAMING"] = "fine-grained-tool-streaming-2025-05-14";
|
|
850
854
|
})(AnthropicBetaFeature || (AnthropicBetaFeature = {}));
|
|
855
|
+
/**
|
|
856
|
+
* Supported Models for DeepSeek
|
|
857
|
+
* Docs: https://api-docs.deepseek.com/quick_start/pricing
|
|
858
|
+
*/
|
|
859
|
+
export var DeepSeekModels;
|
|
860
|
+
(function (DeepSeekModels) {
|
|
861
|
+
/** General-purpose chat (DeepSeek-V3) */
|
|
862
|
+
DeepSeekModels["DEEPSEEK_CHAT"] = "deepseek-chat";
|
|
863
|
+
/** Reasoning model (DeepSeek-R1) — exposes reasoning_content */
|
|
864
|
+
DeepSeekModels["DEEPSEEK_REASONER"] = "deepseek-reasoner";
|
|
865
|
+
})(DeepSeekModels || (DeepSeekModels = {}));
|
|
866
|
+
/**
|
|
867
|
+
* Selected NVIDIA NIM Models
|
|
868
|
+
* Full catalog: https://build.nvidia.com/models
|
|
869
|
+
* Note: NIM hosts hundreds of models; pass arbitrary IDs via --model.
|
|
870
|
+
*/
|
|
871
|
+
export var NvidiaNimModels;
|
|
872
|
+
(function (NvidiaNimModels) {
|
|
873
|
+
// Meta Llama
|
|
874
|
+
NvidiaNimModels["LLAMA_3_3_70B_INSTRUCT"] = "meta/llama-3.3-70b-instruct";
|
|
875
|
+
NvidiaNimModels["LLAMA_3_1_405B_INSTRUCT"] = "meta/llama-3.1-405b-instruct";
|
|
876
|
+
NvidiaNimModels["LLAMA_3_1_70B_INSTRUCT"] = "meta/llama-3.1-70b-instruct";
|
|
877
|
+
NvidiaNimModels["LLAMA_3_2_90B_VISION"] = "meta/llama-3.2-90b-vision-instruct";
|
|
878
|
+
NvidiaNimModels["LLAMA_3_2_11B_VISION"] = "meta/llama-3.2-11b-vision-instruct";
|
|
879
|
+
// NVIDIA Nemotron (reasoning)
|
|
880
|
+
NvidiaNimModels["NEMOTRON_SUPER_49B"] = "nvidia/llama-3.3-nemotron-super-49b-v1";
|
|
881
|
+
NvidiaNimModels["NEMOTRON_NANO_8B"] = "nvidia/llama-3.1-nemotron-nano-8b-v1";
|
|
882
|
+
NvidiaNimModels["NEMOTRON_70B_INSTRUCT"] = "nvidia/llama-3.1-nemotron-70b-instruct";
|
|
883
|
+
// DeepSeek hosted on NIM
|
|
884
|
+
NvidiaNimModels["DEEPSEEK_R1"] = "deepseek-ai/deepseek-r1";
|
|
885
|
+
NvidiaNimModels["DEEPSEEK_R1_DISTILL_LLAMA_70B"] = "deepseek-ai/deepseek-r1-distill-llama-70b";
|
|
886
|
+
// Mistral / Mixtral
|
|
887
|
+
NvidiaNimModels["MIXTRAL_8X22B_INSTRUCT"] = "mistralai/mixtral-8x22b-instruct-v0.1";
|
|
888
|
+
NvidiaNimModels["MIXTRAL_8X7B_INSTRUCT"] = "mistralai/mixtral-8x7b-instruct-v0.1";
|
|
889
|
+
// Microsoft Phi
|
|
890
|
+
NvidiaNimModels["PHI_4"] = "microsoft/phi-4";
|
|
891
|
+
// Google Gemma
|
|
892
|
+
NvidiaNimModels["GEMMA_3_27B_IT"] = "google/gemma-3-27b-it";
|
|
893
|
+
// Z.AI GLM
|
|
894
|
+
NvidiaNimModels["GLM_4_5"] = "z-ai/glm4.5";
|
|
895
|
+
})(NvidiaNimModels || (NvidiaNimModels = {}));
|
|
896
|
+
/**
|
|
897
|
+
* LM Studio loads any GGUF model the user has downloaded.
|
|
898
|
+
* Default: empty string → triggers /v1/models auto-discovery.
|
|
899
|
+
*/
|
|
900
|
+
export var LMStudioModels;
|
|
901
|
+
(function (LMStudioModels) {
|
|
902
|
+
/** Sentinel value — triggers auto-discovery from /v1/models */
|
|
903
|
+
LMStudioModels["AUTO_DISCOVER"] = "";
|
|
904
|
+
})(LMStudioModels || (LMStudioModels = {}));
|
|
905
|
+
/**
|
|
906
|
+
* llama.cpp serves a single model loaded at server startup.
|
|
907
|
+
* Default: empty string → uses whatever is loaded.
|
|
908
|
+
*/
|
|
909
|
+
export var LlamaCppModels;
|
|
910
|
+
(function (LlamaCppModels) {
|
|
911
|
+
/** Sentinel value — uses the model loaded by the llama-server process */
|
|
912
|
+
LlamaCppModels["AUTO_DISCOVER"] = "";
|
|
913
|
+
})(LlamaCppModels || (LlamaCppModels = {}));
|
|
851
914
|
// ============================================================================
|
|
852
915
|
// ANTHROPIC OAUTH CONSTANTS
|
|
853
916
|
// ============================================================================
|
|
@@ -9,7 +9,7 @@ import { TelemetryHandler } from "./modules/TelemetryHandler.js";
|
|
|
9
9
|
* Tools are integrated as first-class citizens - always available by default
|
|
10
10
|
*/
|
|
11
11
|
export declare abstract class BaseProvider implements AIProvider {
|
|
12
|
-
protected
|
|
12
|
+
protected modelName: string;
|
|
13
13
|
protected readonly providerName: AIProviderName;
|
|
14
14
|
protected readonly defaultTimeout: number;
|
|
15
15
|
protected middlewareOptions?: MiddlewareFactoryOptions;
|
|
@@ -29,13 +29,22 @@ export declare abstract class BaseProvider implements AIProvider {
|
|
|
29
29
|
traceId: string;
|
|
30
30
|
parentSpanId: string;
|
|
31
31
|
} | null): void;
|
|
32
|
-
private
|
|
33
|
-
private
|
|
34
|
-
private
|
|
35
|
-
protected
|
|
36
|
-
private
|
|
32
|
+
private messageBuilder;
|
|
33
|
+
private streamHandler;
|
|
34
|
+
private generationHandler;
|
|
35
|
+
protected telemetryHandler: TelemetryHandler;
|
|
36
|
+
private utilities;
|
|
37
37
|
private readonly toolsManager;
|
|
38
38
|
constructor(modelName?: string, providerName?: AIProviderName, neurolink?: NeuroLink, middleware?: MiddlewareFactoryOptions);
|
|
39
|
+
/**
|
|
40
|
+
* Update modelName and rebuild composition handlers with the new value.
|
|
41
|
+
*
|
|
42
|
+
* Auto-discovery providers (lm-studio, llamacpp) call this once they have
|
|
43
|
+
* resolved the loaded model from `/v1/models`. Without this, handlers
|
|
44
|
+
* (TelemetryHandler, MessageBuilder, ...) keep the pre-discovery name and
|
|
45
|
+
* pricing / span / log metadata reports the stale value.
|
|
46
|
+
*/
|
|
47
|
+
protected refreshHandlersForModel(model: string): void;
|
|
39
48
|
/**
|
|
40
49
|
* Check if this provider supports tool/function calling
|
|
41
50
|
* Override in subclasses to disable tools for specific providers or models
|
|
@@ -23,6 +23,10 @@ import { Utilities } from "./modules/Utilities.js";
|
|
|
23
23
|
* Tools are integrated as first-class citizens - always available by default
|
|
24
24
|
*/
|
|
25
25
|
export class BaseProvider {
|
|
26
|
+
// Not `readonly` because providers that auto-discover the model from a
|
|
27
|
+
// /v1/models endpoint (lm-studio, llamacpp) need to update modelName after
|
|
28
|
+
// construction so handlers (TelemetryHandler, MessageBuilder) cache the
|
|
29
|
+
// resolved name. All other providers treat this as effectively readonly.
|
|
26
30
|
modelName;
|
|
27
31
|
providerName;
|
|
28
32
|
defaultTimeout = 30000; // 30 seconds
|
|
@@ -43,6 +47,11 @@ export class BaseProvider {
|
|
|
43
47
|
this._traceContext = ctx;
|
|
44
48
|
}
|
|
45
49
|
// Composition modules - Single Responsibility Principle
|
|
50
|
+
// Handlers below are not `readonly` so that providers which auto-discover
|
|
51
|
+
// their model after construction (lm-studio, llamacpp) can rebuild them
|
|
52
|
+
// via `refreshHandlersForModel(...)` and propagate the resolved name into
|
|
53
|
+
// pricing / telemetry / span attributes. All other providers leave these
|
|
54
|
+
// alone.
|
|
46
55
|
messageBuilder;
|
|
47
56
|
streamHandler;
|
|
48
57
|
generationHandler;
|
|
@@ -67,6 +76,25 @@ export class BaseProvider {
|
|
|
67
76
|
fixSchemaForOpenAIStrictMode: (schema) => this.fixSchemaForOpenAIStrictMode(schema),
|
|
68
77
|
});
|
|
69
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Update modelName and rebuild composition handlers with the new value.
|
|
81
|
+
*
|
|
82
|
+
* Auto-discovery providers (lm-studio, llamacpp) call this once they have
|
|
83
|
+
* resolved the loaded model from `/v1/models`. Without this, handlers
|
|
84
|
+
* (TelemetryHandler, MessageBuilder, ...) keep the pre-discovery name and
|
|
85
|
+
* pricing / span / log metadata reports the stale value.
|
|
86
|
+
*/
|
|
87
|
+
refreshHandlersForModel(model) {
|
|
88
|
+
this.modelName = model;
|
|
89
|
+
trace
|
|
90
|
+
.getSpan(context.active())
|
|
91
|
+
?.setAttribute(ATTR.GEN_AI_MODEL, this.modelName);
|
|
92
|
+
this.messageBuilder = new MessageBuilder(this.providerName, this.modelName);
|
|
93
|
+
this.streamHandler = new StreamHandler(this.providerName, this.modelName);
|
|
94
|
+
this.telemetryHandler = new TelemetryHandler(this.providerName, this.modelName, this.neurolink);
|
|
95
|
+
this.generationHandler = new GenerationHandler(this.providerName, this.modelName, () => this.supportsTools(), (options, type) => this.telemetryHandler.getTelemetryConfig(options, type), (toolCalls, toolResults, options, timestamp) => this.handleToolExecutionStorage(toolCalls, toolResults, options, timestamp), () => this.neurolink?.getEventEmitter());
|
|
96
|
+
this.utilities = new Utilities(this.providerName, this.modelName, this.defaultTimeout, this.middlewareOptions);
|
|
97
|
+
}
|
|
70
98
|
/**
|
|
71
99
|
* Check if this provider supports tool/function calling
|
|
72
100
|
* Override in subclasses to disable tools for specific providers or models
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ProviderFactory } from "./providerFactory.js";
|
|
2
2
|
import { logger } from "../utils/logger.js";
|
|
3
|
-
import { AIProviderName, GoogleAIModels, OpenAIModels, AnthropicModels, VertexModels, MistralModels, OllamaModels, LiteLLMModels, HuggingFaceModels, } from "../constants/enums.js";
|
|
3
|
+
import { AIProviderName, GoogleAIModels, OpenAIModels, AnthropicModels, VertexModels, MistralModels, OllamaModels, LiteLLMModels, HuggingFaceModels, DeepSeekModels, NvidiaNimModels, } from "../constants/enums.js";
|
|
4
4
|
/**
|
|
5
5
|
* Provider Registry - registers all providers with the factory
|
|
6
6
|
* This is where we migrate providers one by one to the new pattern
|
|
@@ -128,6 +128,30 @@ export class ProviderRegistry {
|
|
|
128
128
|
const { AmazonSageMakerProvider } = await import("../providers/amazonSagemaker.js");
|
|
129
129
|
return new AmazonSageMakerProvider(modelName, undefined, region, undefined, sagemakerCreds);
|
|
130
130
|
}, process.env.SAGEMAKER_MODEL || "sagemaker-model", ["sagemaker", "aws-sagemaker"]);
|
|
131
|
+
// Register DeepSeek provider
|
|
132
|
+
ProviderFactory.registerProvider(AIProviderName.DEEPSEEK, async (modelName, _providerName, sdk, _region, credentials) => {
|
|
133
|
+
const deepseekCreds = credentials;
|
|
134
|
+
const { DeepSeekProvider } = await import("../providers/deepseek.js");
|
|
135
|
+
return new DeepSeekProvider(modelName, sdk, undefined, deepseekCreds);
|
|
136
|
+
}, process.env.DEEPSEEK_MODEL || DeepSeekModels.DEEPSEEK_CHAT, ["deepseek", "ds"]);
|
|
137
|
+
// Register NVIDIA NIM provider
|
|
138
|
+
ProviderFactory.registerProvider(AIProviderName.NVIDIA_NIM, async (modelName, _providerName, sdk, _region, credentials) => {
|
|
139
|
+
const nimCreds = credentials;
|
|
140
|
+
const { NvidiaNimProvider } = await import("../providers/nvidiaNim.js");
|
|
141
|
+
return new NvidiaNimProvider(modelName, sdk, undefined, nimCreds);
|
|
142
|
+
}, process.env.NVIDIA_NIM_MODEL || NvidiaNimModels.LLAMA_3_3_70B_INSTRUCT, ["nvidia", "nim", "nvidia-nim"]);
|
|
143
|
+
// Register LM Studio provider (local)
|
|
144
|
+
ProviderFactory.registerProvider(AIProviderName.LM_STUDIO, async (modelName, _providerName, sdk, _region, credentials) => {
|
|
145
|
+
const lmStudioCreds = credentials;
|
|
146
|
+
const { LMStudioProvider } = await import("../providers/lmStudio.js");
|
|
147
|
+
return new LMStudioProvider(modelName, sdk, undefined, lmStudioCreds);
|
|
148
|
+
}, process.env.LM_STUDIO_MODEL || undefined, ["lmstudio", "lm-studio", "lms"]);
|
|
149
|
+
// Register llama.cpp provider (local)
|
|
150
|
+
ProviderFactory.registerProvider(AIProviderName.LLAMACPP, async (modelName, _providerName, sdk, _region, credentials) => {
|
|
151
|
+
const llamaCppCreds = credentials;
|
|
152
|
+
const { LlamaCppProvider } = await import("../providers/llamaCpp.js");
|
|
153
|
+
return new LlamaCppProvider(modelName, sdk, undefined, llamaCppCreds);
|
|
154
|
+
}, process.env.LLAMACPP_MODEL || undefined, ["llamacpp", "llama.cpp", "llama-cpp"]);
|
|
131
155
|
logger.debug("All providers registered successfully");
|
|
132
156
|
this.registered = true;
|
|
133
157
|
// ===== TTS HANDLER REGISTRATION =====
|
|
@@ -50,7 +50,19 @@ const PROXY_PROVIDERS = new Set(["litellm", "openrouter"]);
|
|
|
50
50
|
*/
|
|
51
51
|
function normalizeVisionProvider(provider) {
|
|
52
52
|
const lower = provider.toLowerCase();
|
|
53
|
-
|
|
53
|
+
// Strip non-alpha characters so alias forms (e.g. "lm-studio", "lm_studio",
|
|
54
|
+
// "llama.cpp", "nvidia_nim") all collapse onto a canonical key. Mirrors
|
|
55
|
+
// the alias-normalization pattern used in pricing.ts and contextWindows.ts.
|
|
56
|
+
const stripped = lower.replace(/[^a-z]/g, "");
|
|
57
|
+
switch (stripped) {
|
|
58
|
+
case "lmstudio":
|
|
59
|
+
return "lm-studio";
|
|
60
|
+
case "llamacpp":
|
|
61
|
+
return "llamacpp";
|
|
62
|
+
case "nvidianim":
|
|
63
|
+
return "nvidia-nim";
|
|
64
|
+
case "googleaistudio":
|
|
65
|
+
return "google-ai";
|
|
54
66
|
case "or":
|
|
55
67
|
return "openrouter";
|
|
56
68
|
default:
|
|
@@ -436,6 +448,32 @@ const VISION_CAPABILITIES = {
|
|
|
436
448
|
"meta-llama-4-maverick-17b-128e-instruct",
|
|
437
449
|
"meta-llama-4-scout-17b-16e-instruct",
|
|
438
450
|
],
|
|
451
|
+
// DeepSeek has no vision support — empty list
|
|
452
|
+
deepseek: [],
|
|
453
|
+
"nvidia-nim": [
|
|
454
|
+
"meta/llama-3.2-90b-vision-instruct",
|
|
455
|
+
"meta/llama-3.2-11b-vision-instruct",
|
|
456
|
+
],
|
|
457
|
+
// LM Studio + llama.cpp: vision depends on the loaded model.
|
|
458
|
+
// Substrings must point at known multimodal variants only — bare
|
|
459
|
+
// "llama-3.2" matches the text-only Llama-3.2-1B/3B chat models.
|
|
460
|
+
"lm-studio": [
|
|
461
|
+
"llava",
|
|
462
|
+
"llama-3.2-11b-vision",
|
|
463
|
+
"llama-3.2-90b-vision",
|
|
464
|
+
"vision-instruct",
|
|
465
|
+
"qwen2-vl",
|
|
466
|
+
"qwen2.5-vl",
|
|
467
|
+
"phi-3-vision",
|
|
468
|
+
],
|
|
469
|
+
llamacpp: [
|
|
470
|
+
"llava",
|
|
471
|
+
"llama-3.2-11b-vision",
|
|
472
|
+
"llama-3.2-90b-vision",
|
|
473
|
+
"vision-instruct",
|
|
474
|
+
"qwen2-vl",
|
|
475
|
+
"phi-3-vision",
|
|
476
|
+
],
|
|
439
477
|
};
|
|
440
478
|
/**
|
|
441
479
|
* Provider Image Adapter - Smart routing and formatting
|
|
@@ -535,6 +573,13 @@ export class ProviderImageAdapter {
|
|
|
535
573
|
if (!supportedModels) {
|
|
536
574
|
return false;
|
|
537
575
|
}
|
|
576
|
+
// An empty list means the provider has NO vision support (e.g. deepseek).
|
|
577
|
+
// Without this guard, the no-model branch below would return `true` for
|
|
578
|
+
// every provider that has an entry in VISION_CAPABILITIES — even an empty
|
|
579
|
+
// one — letting vision requests through to a text-only API.
|
|
580
|
+
if (supportedModels.length === 0) {
|
|
581
|
+
return false;
|
|
582
|
+
}
|
|
538
583
|
if (!model) {
|
|
539
584
|
return true; // Provider supports vision, but need to check specific model
|
|
540
585
|
}
|
|
@@ -562,7 +607,12 @@ export class ProviderImageAdapter {
|
|
|
562
607
|
* Get all vision-capable providers
|
|
563
608
|
*/
|
|
564
609
|
static getVisionProviders() {
|
|
565
|
-
|
|
610
|
+
// Filter out providers whose allowlist is empty (e.g. deepseek). They're
|
|
611
|
+
// listed in VISION_CAPABILITIES so supportsVision can return false for
|
|
612
|
+
// them, but they should not be advertised as vision-capable.
|
|
613
|
+
return Object.entries(VISION_CAPABILITIES)
|
|
614
|
+
.filter(([, models]) => models.length > 0)
|
|
615
|
+
.map(([provider]) => provider);
|
|
566
616
|
}
|
|
567
617
|
/**
|
|
568
618
|
* Count total "images" in a message (actual images + PDF pages)
|
|
@@ -36,9 +36,13 @@ export declare function getContextWindowSize(provider: string, model?: string):
|
|
|
36
36
|
/**
|
|
37
37
|
* Calculate output token reserve for a given context window.
|
|
38
38
|
*
|
|
39
|
+
* Returns the *real* token count that will be reserved for output so callers
|
|
40
|
+
* (`getAvailableInputTokens`, `BudgetChecker`, conversation-memory pruning, file
|
|
41
|
+
* summarisation) compute input budget against the actual outgoing maxTokens.
|
|
42
|
+
*
|
|
39
43
|
* @param contextWindow - Total context window size
|
|
40
44
|
* @param maxTokens - Explicit maxTokens from user config (if set)
|
|
41
|
-
* @returns Number of tokens reserved for output
|
|
45
|
+
* @returns Number of tokens reserved for output (matches what's sent upstream)
|
|
42
46
|
*/
|
|
43
47
|
export declare function getOutputReserve(contextWindow: number, maxTokens?: number): number;
|
|
44
48
|
/**
|