@juspay/neurolink 9.59.5 → 9.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +11 -7
  3. package/dist/adapters/providerImageAdapter.js +52 -2
  4. package/dist/browser/neurolink.min.js +352 -352
  5. package/dist/cli/factories/commandFactory.js +15 -1
  6. package/dist/cli/utils/interactiveSetup.js +64 -0
  7. package/dist/constants/contextWindows.d.ts +5 -1
  8. package/dist/constants/contextWindows.js +67 -3
  9. package/dist/constants/enums.d.ts +52 -0
  10. package/dist/constants/enums.js +63 -0
  11. package/dist/core/baseProvider.d.ts +15 -6
  12. package/dist/core/baseProvider.js +28 -0
  13. package/dist/factories/providerRegistry.js +25 -1
  14. package/dist/lib/adapters/providerImageAdapter.js +52 -2
  15. package/dist/lib/constants/contextWindows.d.ts +5 -1
  16. package/dist/lib/constants/contextWindows.js +67 -3
  17. package/dist/lib/constants/enums.d.ts +52 -0
  18. package/dist/lib/constants/enums.js +63 -0
  19. package/dist/lib/core/baseProvider.d.ts +15 -6
  20. package/dist/lib/core/baseProvider.js +28 -0
  21. package/dist/lib/factories/providerRegistry.js +25 -1
  22. package/dist/lib/neurolink.js +1 -1
  23. package/dist/lib/providers/deepseek.d.ts +29 -0
  24. package/dist/lib/providers/deepseek.js +216 -0
  25. package/dist/lib/providers/index.d.ts +4 -0
  26. package/dist/lib/providers/index.js +4 -0
  27. package/dist/lib/providers/llamaCpp.d.ts +34 -0
  28. package/dist/lib/providers/llamaCpp.js +315 -0
  29. package/dist/lib/providers/lmStudio.d.ts +34 -0
  30. package/dist/lib/providers/lmStudio.js +306 -0
  31. package/dist/lib/providers/nvidiaNim.d.ts +31 -0
  32. package/dist/lib/providers/nvidiaNim.js +354 -0
  33. package/dist/lib/proxy/proxyFetch.d.ts +9 -0
  34. package/dist/lib/proxy/proxyFetch.js +6 -1
  35. package/dist/lib/types/providers.d.ts +37 -2
  36. package/dist/lib/types/providers.js +1 -1
  37. package/dist/lib/utils/modelChoices.js +68 -4
  38. package/dist/lib/utils/pricing.d.ts +5 -0
  39. package/dist/lib/utils/pricing.js +94 -3
  40. package/dist/lib/utils/providerConfig.d.ts +16 -0
  41. package/dist/lib/utils/providerConfig.js +82 -0
  42. package/dist/neurolink.js +1 -1
  43. package/dist/providers/deepseek.d.ts +29 -0
  44. package/dist/providers/deepseek.js +215 -0
  45. package/dist/providers/index.d.ts +4 -0
  46. package/dist/providers/index.js +4 -0
  47. package/dist/providers/llamaCpp.d.ts +34 -0
  48. package/dist/providers/llamaCpp.js +314 -0
  49. package/dist/providers/lmStudio.d.ts +34 -0
  50. package/dist/providers/lmStudio.js +305 -0
  51. package/dist/providers/nvidiaNim.d.ts +31 -0
  52. package/dist/providers/nvidiaNim.js +353 -0
  53. package/dist/proxy/proxyFetch.d.ts +9 -0
  54. package/dist/proxy/proxyFetch.js +6 -1
  55. package/dist/types/providers.d.ts +37 -2
  56. package/dist/utils/modelChoices.js +68 -4
  57. package/dist/utils/pricing.d.ts +5 -0
  58. package/dist/utils/pricing.js +94 -3
  59. package/dist/utils/providerConfig.d.ts +16 -0
  60. package/dist/utils/providerConfig.js +82 -0
  61. package/package.json +19 -12
@@ -51,6 +51,16 @@ export class CLICommandFactory {
51
51
  "mistral",
52
52
  "litellm",
53
53
  "sagemaker",
54
+ "deepseek",
55
+ "ds",
56
+ "nvidia-nim",
57
+ "nim",
58
+ "nvidia",
59
+ "lm-studio",
60
+ "lmstudio",
61
+ "lms",
62
+ "llamacpp",
63
+ "llama.cpp",
54
64
  ],
55
65
  default: "auto",
56
66
  description: "AI provider to use (auto-selects best available). Use 'anthropic-subscription' for Claude subscription plans.",
@@ -1322,6 +1332,10 @@ export class CLICommandFactory {
1322
1332
  "vertex",
1323
1333
  "huggingface",
1324
1334
  "mistral",
1335
+ "deepseek",
1336
+ "nvidia-nim",
1337
+ "lm-studio",
1338
+ "llamacpp",
1325
1339
  ],
1326
1340
  })
1327
1341
  .option("list", {
@@ -2877,7 +2891,7 @@ export class CLICommandFactory {
2877
2891
  " generate|gen)\n" +
2878
2892
  ' case "${prev}" in\n' +
2879
2893
  " --provider|-p)\n" +
2880
- ' COMPREPLY=( $(compgen -W "auto openai bedrock vertex googleVertex anthropic azure google-ai huggingface ollama mistral litellm" -- ${cur}) )\n' +
2894
+ ' COMPREPLY=( $(compgen -W "auto openai openai-compatible openrouter or bedrock vertex googleVertex anthropic anthropic-subscription azure google-ai google-ai-studio huggingface ollama mistral litellm sagemaker deepseek ds nvidia-nim nim lm-studio lmstudio llamacpp llama.cpp" -- ${cur}) )\n' +
2881
2895
  " return 0\n" +
2882
2896
  " ;;\n" +
2883
2897
  " --format|-f|--output-format)\n" +
@@ -204,6 +204,70 @@ export const PROVIDER_CONFIGS = [
204
204
  },
205
205
  ],
206
206
  },
207
+ {
208
+ id: AIProviderName.DEEPSEEK,
209
+ name: "DeepSeek",
210
+ description: "Cost-efficient frontier models (deepseek-chat V3, deepseek-reasoner R1)",
211
+ envVars: [
212
+ {
213
+ key: "DEEPSEEK_API_KEY",
214
+ prompt: "DeepSeek API Key (get one at https://platform.deepseek.com)",
215
+ secure: true,
216
+ },
217
+ ],
218
+ },
219
+ {
220
+ id: AIProviderName.NVIDIA_NIM,
221
+ name: "NVIDIA NIM",
222
+ description: "NVIDIA-hosted Llama, Nemotron, Mistral, and DeepSeek-R1 models",
223
+ envVars: [
224
+ {
225
+ key: "NVIDIA_NIM_API_KEY",
226
+ prompt: "NVIDIA NIM API Key (get one at https://build.nvidia.com/settings/api-keys)",
227
+ secure: true,
228
+ },
229
+ ],
230
+ },
231
+ {
232
+ id: AIProviderName.LM_STUDIO,
233
+ name: "LM Studio",
234
+ description: "Local inference via LM Studio desktop app (https://lmstudio.ai)",
235
+ envVars: [
236
+ {
237
+ key: "LM_STUDIO_BASE_URL",
238
+ prompt: "LM Studio server URL",
239
+ default: "http://localhost:1234/v1",
240
+ secure: false,
241
+ optional: true,
242
+ },
243
+ {
244
+ key: "LM_STUDIO_API_KEY",
245
+ prompt: "LM Studio API Key (leave blank — only needed behind an auth proxy)",
246
+ secure: false,
247
+ optional: true,
248
+ },
249
+ ],
250
+ },
251
+ {
252
+ id: AIProviderName.LLAMACPP,
253
+ name: "llama.cpp",
254
+ description: "Local inference via llama-server (https://github.com/ggerganov/llama.cpp). Start with: ./llama-server -m model.gguf --port 8080 --jinja",
255
+ envVars: [
256
+ {
257
+ key: "LLAMACPP_BASE_URL",
258
+ prompt: "llama-server URL",
259
+ default: "http://localhost:8080/v1",
260
+ secure: false,
261
+ optional: true,
262
+ },
263
+ {
264
+ key: "LLAMACPP_API_KEY",
265
+ prompt: "llama-server API Key (leave blank — only needed behind an auth proxy)",
266
+ secure: false,
267
+ optional: true,
268
+ },
269
+ ],
270
+ },
207
271
  ];
208
272
  /**
209
273
  * Run the interactive setup wizard
@@ -36,9 +36,13 @@ export declare function getContextWindowSize(provider: string, model?: string):
36
36
  /**
37
37
  * Calculate output token reserve for a given context window.
38
38
  *
39
+ * Returns the *real* token count that will be reserved for output so callers
40
+ * (`getAvailableInputTokens`, `BudgetChecker`, conversation-memory pruning, file
41
+ * summarisation) compute input budget against the actual outgoing maxTokens.
42
+ *
39
43
  * @param contextWindow - Total context window size
40
44
  * @param maxTokens - Explicit maxTokens from user config (if set)
41
- * @returns Number of tokens reserved for output
45
+ * @returns Number of tokens reserved for output (matches what's sent upstream)
42
46
  */
43
47
  export declare function getOutputReserve(contextWindow: number, maxTokens?: number): number;
44
48
  /**
@@ -23,6 +23,34 @@ export const DEFAULT_OUTPUT_RESERVE_RATIO = 0.35;
23
23
  * The "_default" key is the fallback for unknown models within a provider.
24
24
  */
25
25
  export const MODEL_CONTEXT_WINDOWS = {
26
+ deepseek: {
27
+ _default: 64_000,
28
+ "deepseek-chat": 64_000,
29
+ "deepseek-reasoner": 64_000,
30
+ },
31
+ "nvidia-nim": {
32
+ _default: 128_000,
33
+ "meta/llama-3.3-70b-instruct": 128_000,
34
+ "meta/llama-3.1-405b-instruct": 128_000,
35
+ "meta/llama-3.1-70b-instruct": 128_000,
36
+ "meta/llama-3.2-90b-vision-instruct": 128_000,
37
+ "meta/llama-3.2-11b-vision-instruct": 128_000,
38
+ "nvidia/llama-3.3-nemotron-super-49b-v1": 128_000,
39
+ "nvidia/llama-3.1-nemotron-nano-8b-v1": 128_000,
40
+ "nvidia/llama-3.1-nemotron-70b-instruct": 128_000,
41
+ "deepseek-ai/deepseek-r1": 128_000,
42
+ "deepseek-ai/deepseek-r1-distill-llama-70b": 128_000,
43
+ "mistralai/mixtral-8x22b-instruct-v0.1": 65_536,
44
+ "mistralai/mixtral-8x7b-instruct-v0.1": 32_768,
45
+ "microsoft/phi-4": 16_384,
46
+ "google/gemma-3-27b-it": 8_192,
47
+ },
48
+ "lm-studio": {
49
+ _default: 8_192,
50
+ },
51
+ llamacpp: {
52
+ _default: 8_192,
53
+ },
26
54
  anthropic: {
27
55
  _default: 200_000,
28
56
  // Claude 4.6 (Feb 2026) — 1M context window
@@ -223,6 +251,36 @@ export const MODEL_CONTEXT_WINDOWS = {
223
251
  "qwen3-vl-8b-instruct": 32_768,
224
252
  },
225
253
  };
254
+ /**
255
+ * Map of provider aliases to canonical MODEL_CONTEXT_WINDOWS keys.
256
+ *
257
+ * Callers reach `getContextWindowSize` via the unnormalized form on
258
+ * `options.provider` (e.g. CLI `--provider lmstudio`, alias `llama.cpp`),
259
+ * and `ProviderFactory.normalizeProviderName` runs only at instantiation —
260
+ * its output never reaches budget calculations. Without this normalization
261
+ * those alias forms miss the table and fall back to `DEFAULT_CONTEXT_WINDOW`,
262
+ * understating the budget for LM Studio / llama.cpp / NVIDIA NIM.
263
+ *
264
+ * The keys here are the result of stripping non-alpha characters, so
265
+ * `lm-studio` -> `lmstudio`, `nvidia-nim` -> `nvidianim`, `llama.cpp` -> `llamacpp`.
266
+ */
267
+ const PROVIDER_ALIAS_MAP = {
268
+ googleaistudio: "google-ai-studio",
269
+ lmstudio: "lm-studio",
270
+ llamacpp: "llamacpp",
271
+ nvidianim: "nvidia-nim",
272
+ nim: "nvidia-nim",
273
+ nvidia: "nvidia-nim",
274
+ deepseek: "deepseek",
275
+ };
276
+ function normalizeProviderForLookup(provider) {
277
+ const stripped = provider.toLowerCase().replace(/[^a-z]/g, "");
278
+ // On alias miss, return the *stripped* key — not the raw input — so case /
279
+ // separator variants ("OpenAI", "open-ai", "Vertex AI") still find their
280
+ // table entry under the lowercase canonical key instead of falling through
281
+ // to DEFAULT_CONTEXT_WINDOW.
282
+ return PROVIDER_ALIAS_MAP[stripped] ?? stripped;
283
+ }
226
284
  /**
227
285
  * Resolve context window size for a provider/model combination.
228
286
  *
@@ -252,8 +310,10 @@ export function getContextWindowSize(provider, model) {
252
310
  // Dynamic registry not initialized yet — fall through to static lookup
253
311
  }
254
312
  }
255
- // Static fallback chain
256
- const providerWindows = MODEL_CONTEXT_WINDOWS[provider];
313
+ // Static fallback chain — normalize aliases first so "lmstudio" / "llama.cpp" /
314
+ // "nvidianim" find their canonical entries instead of falling back to default.
315
+ const canonical = normalizeProviderForLookup(provider);
316
+ const providerWindows = MODEL_CONTEXT_WINDOWS[canonical] ?? MODEL_CONTEXT_WINDOWS[provider];
257
317
  if (!providerWindows) {
258
318
  return DEFAULT_CONTEXT_WINDOW;
259
319
  }
@@ -273,9 +333,13 @@ export function getContextWindowSize(provider, model) {
273
333
  /**
274
334
  * Calculate output token reserve for a given context window.
275
335
  *
336
+ * Returns the *real* token count that will be reserved for output so callers
337
+ * (`getAvailableInputTokens`, `BudgetChecker`, conversation-memory pruning, file
338
+ * summarisation) compute input budget against the actual outgoing maxTokens.
339
+ *
276
340
  * @param contextWindow - Total context window size
277
341
  * @param maxTokens - Explicit maxTokens from user config (if set)
278
- * @returns Number of tokens reserved for output
342
+ * @returns Number of tokens reserved for output (matches what's sent upstream)
279
343
  */
280
344
  export function getOutputReserve(contextWindow, maxTokens) {
281
345
  if (maxTokens !== undefined && maxTokens > 0) {
@@ -15,6 +15,10 @@ export declare enum AIProviderName {
15
15
  MISTRAL = "mistral",
16
16
  LITELLM = "litellm",
17
17
  SAGEMAKER = "sagemaker",
18
+ DEEPSEEK = "deepseek",
19
+ NVIDIA_NIM = "nvidia-nim",
20
+ LM_STUDIO = "lm-studio",
21
+ LLAMACPP = "llamacpp",
18
22
  AUTO = "auto"
19
23
  }
20
24
  /**
@@ -622,6 +626,54 @@ export declare enum AnthropicBetaFeature {
622
626
  INTERLEAVED_THINKING = "interleaved-thinking-2025-05-14",
623
627
  FINE_GRAINED_STREAMING = "fine-grained-tool-streaming-2025-05-14"
624
628
  }
629
+ /**
630
+ * Supported Models for DeepSeek
631
+ * Docs: https://api-docs.deepseek.com/quick_start/pricing
632
+ */
633
+ export declare enum DeepSeekModels {
634
+ /** General-purpose chat (DeepSeek-V3) */
635
+ DEEPSEEK_CHAT = "deepseek-chat",
636
+ /** Reasoning model (DeepSeek-R1) — exposes reasoning_content */
637
+ DEEPSEEK_REASONER = "deepseek-reasoner"
638
+ }
639
+ /**
640
+ * Selected NVIDIA NIM Models
641
+ * Full catalog: https://build.nvidia.com/models
642
+ * Note: NIM hosts hundreds of models; pass arbitrary IDs via --model.
643
+ */
644
+ export declare enum NvidiaNimModels {
645
+ LLAMA_3_3_70B_INSTRUCT = "meta/llama-3.3-70b-instruct",
646
+ LLAMA_3_1_405B_INSTRUCT = "meta/llama-3.1-405b-instruct",
647
+ LLAMA_3_1_70B_INSTRUCT = "meta/llama-3.1-70b-instruct",
648
+ LLAMA_3_2_90B_VISION = "meta/llama-3.2-90b-vision-instruct",
649
+ LLAMA_3_2_11B_VISION = "meta/llama-3.2-11b-vision-instruct",
650
+ NEMOTRON_SUPER_49B = "nvidia/llama-3.3-nemotron-super-49b-v1",
651
+ NEMOTRON_NANO_8B = "nvidia/llama-3.1-nemotron-nano-8b-v1",
652
+ NEMOTRON_70B_INSTRUCT = "nvidia/llama-3.1-nemotron-70b-instruct",
653
+ DEEPSEEK_R1 = "deepseek-ai/deepseek-r1",
654
+ DEEPSEEK_R1_DISTILL_LLAMA_70B = "deepseek-ai/deepseek-r1-distill-llama-70b",
655
+ MIXTRAL_8X22B_INSTRUCT = "mistralai/mixtral-8x22b-instruct-v0.1",
656
+ MIXTRAL_8X7B_INSTRUCT = "mistralai/mixtral-8x7b-instruct-v0.1",
657
+ PHI_4 = "microsoft/phi-4",
658
+ GEMMA_3_27B_IT = "google/gemma-3-27b-it",
659
+ GLM_4_5 = "z-ai/glm4.5"
660
+ }
661
+ /**
662
+ * LM Studio loads any GGUF model the user has downloaded.
663
+ * Default: empty string → triggers /v1/models auto-discovery.
664
+ */
665
+ export declare enum LMStudioModels {
666
+ /** Sentinel value — triggers auto-discovery from /v1/models */
667
+ AUTO_DISCOVER = ""
668
+ }
669
+ /**
670
+ * llama.cpp serves a single model loaded at server startup.
671
+ * Default: empty string → uses whatever is loaded.
672
+ */
673
+ export declare enum LlamaCppModels {
674
+ /** Sentinel value — uses the model loaded by the llama-server process */
675
+ AUTO_DISCOVER = ""
676
+ }
625
677
  /**
626
678
  * Buffer time in milliseconds before token expiry to trigger refresh
627
679
  *
@@ -19,6 +19,10 @@ export var AIProviderName;
19
19
  AIProviderName["MISTRAL"] = "mistral";
20
20
  AIProviderName["LITELLM"] = "litellm";
21
21
  AIProviderName["SAGEMAKER"] = "sagemaker";
22
+ AIProviderName["DEEPSEEK"] = "deepseek";
23
+ AIProviderName["NVIDIA_NIM"] = "nvidia-nim";
24
+ AIProviderName["LM_STUDIO"] = "lm-studio";
25
+ AIProviderName["LLAMACPP"] = "llamacpp";
22
26
  AIProviderName["AUTO"] = "auto";
23
27
  })(AIProviderName || (AIProviderName = {}));
24
28
  /**
@@ -848,6 +852,65 @@ export var AnthropicBetaFeature;
848
852
  AnthropicBetaFeature["INTERLEAVED_THINKING"] = "interleaved-thinking-2025-05-14";
849
853
  AnthropicBetaFeature["FINE_GRAINED_STREAMING"] = "fine-grained-tool-streaming-2025-05-14";
850
854
  })(AnthropicBetaFeature || (AnthropicBetaFeature = {}));
855
+ /**
856
+ * Supported Models for DeepSeek
857
+ * Docs: https://api-docs.deepseek.com/quick_start/pricing
858
+ */
859
+ export var DeepSeekModels;
860
+ (function (DeepSeekModels) {
861
+ /** General-purpose chat (DeepSeek-V3) */
862
+ DeepSeekModels["DEEPSEEK_CHAT"] = "deepseek-chat";
863
+ /** Reasoning model (DeepSeek-R1) — exposes reasoning_content */
864
+ DeepSeekModels["DEEPSEEK_REASONER"] = "deepseek-reasoner";
865
+ })(DeepSeekModels || (DeepSeekModels = {}));
866
+ /**
867
+ * Selected NVIDIA NIM Models
868
+ * Full catalog: https://build.nvidia.com/models
869
+ * Note: NIM hosts hundreds of models; pass arbitrary IDs via --model.
870
+ */
871
+ export var NvidiaNimModels;
872
+ (function (NvidiaNimModels) {
873
+ // Meta Llama
874
+ NvidiaNimModels["LLAMA_3_3_70B_INSTRUCT"] = "meta/llama-3.3-70b-instruct";
875
+ NvidiaNimModels["LLAMA_3_1_405B_INSTRUCT"] = "meta/llama-3.1-405b-instruct";
876
+ NvidiaNimModels["LLAMA_3_1_70B_INSTRUCT"] = "meta/llama-3.1-70b-instruct";
877
+ NvidiaNimModels["LLAMA_3_2_90B_VISION"] = "meta/llama-3.2-90b-vision-instruct";
878
+ NvidiaNimModels["LLAMA_3_2_11B_VISION"] = "meta/llama-3.2-11b-vision-instruct";
879
+ // NVIDIA Nemotron (reasoning)
880
+ NvidiaNimModels["NEMOTRON_SUPER_49B"] = "nvidia/llama-3.3-nemotron-super-49b-v1";
881
+ NvidiaNimModels["NEMOTRON_NANO_8B"] = "nvidia/llama-3.1-nemotron-nano-8b-v1";
882
+ NvidiaNimModels["NEMOTRON_70B_INSTRUCT"] = "nvidia/llama-3.1-nemotron-70b-instruct";
883
+ // DeepSeek hosted on NIM
884
+ NvidiaNimModels["DEEPSEEK_R1"] = "deepseek-ai/deepseek-r1";
885
+ NvidiaNimModels["DEEPSEEK_R1_DISTILL_LLAMA_70B"] = "deepseek-ai/deepseek-r1-distill-llama-70b";
886
+ // Mistral / Mixtral
887
+ NvidiaNimModels["MIXTRAL_8X22B_INSTRUCT"] = "mistralai/mixtral-8x22b-instruct-v0.1";
888
+ NvidiaNimModels["MIXTRAL_8X7B_INSTRUCT"] = "mistralai/mixtral-8x7b-instruct-v0.1";
889
+ // Microsoft Phi
890
+ NvidiaNimModels["PHI_4"] = "microsoft/phi-4";
891
+ // Google Gemma
892
+ NvidiaNimModels["GEMMA_3_27B_IT"] = "google/gemma-3-27b-it";
893
+ // Z.AI GLM
894
+ NvidiaNimModels["GLM_4_5"] = "z-ai/glm4.5";
895
+ })(NvidiaNimModels || (NvidiaNimModels = {}));
896
+ /**
897
+ * LM Studio loads any GGUF model the user has downloaded.
898
+ * Default: empty string → triggers /v1/models auto-discovery.
899
+ */
900
+ export var LMStudioModels;
901
+ (function (LMStudioModels) {
902
+ /** Sentinel value — triggers auto-discovery from /v1/models */
903
+ LMStudioModels["AUTO_DISCOVER"] = "";
904
+ })(LMStudioModels || (LMStudioModels = {}));
905
+ /**
906
+ * llama.cpp serves a single model loaded at server startup.
907
+ * Default: empty string → uses whatever is loaded.
908
+ */
909
+ export var LlamaCppModels;
910
+ (function (LlamaCppModels) {
911
+ /** Sentinel value — uses the model loaded by the llama-server process */
912
+ LlamaCppModels["AUTO_DISCOVER"] = "";
913
+ })(LlamaCppModels || (LlamaCppModels = {}));
851
914
  // ============================================================================
852
915
  // ANTHROPIC OAUTH CONSTANTS
853
916
  // ============================================================================
@@ -9,7 +9,7 @@ import { TelemetryHandler } from "./modules/TelemetryHandler.js";
9
9
  * Tools are integrated as first-class citizens - always available by default
10
10
  */
11
11
  export declare abstract class BaseProvider implements AIProvider {
12
- protected readonly modelName: string;
12
+ protected modelName: string;
13
13
  protected readonly providerName: AIProviderName;
14
14
  protected readonly defaultTimeout: number;
15
15
  protected middlewareOptions?: MiddlewareFactoryOptions;
@@ -29,13 +29,22 @@ export declare abstract class BaseProvider implements AIProvider {
29
29
  traceId: string;
30
30
  parentSpanId: string;
31
31
  } | null): void;
32
- private readonly messageBuilder;
33
- private readonly streamHandler;
34
- private readonly generationHandler;
35
- protected readonly telemetryHandler: TelemetryHandler;
36
- private readonly utilities;
32
+ private messageBuilder;
33
+ private streamHandler;
34
+ private generationHandler;
35
+ protected telemetryHandler: TelemetryHandler;
36
+ private utilities;
37
37
  private readonly toolsManager;
38
38
  constructor(modelName?: string, providerName?: AIProviderName, neurolink?: NeuroLink, middleware?: MiddlewareFactoryOptions);
39
+ /**
40
+ * Update modelName and rebuild composition handlers with the new value.
41
+ *
42
+ * Auto-discovery providers (lm-studio, llamacpp) call this once they have
43
+ * resolved the loaded model from `/v1/models`. Without this, handlers
44
+ * (TelemetryHandler, MessageBuilder, ...) keep the pre-discovery name and
45
+ * pricing / span / log metadata reports the stale value.
46
+ */
47
+ protected refreshHandlersForModel(model: string): void;
39
48
  /**
40
49
  * Check if this provider supports tool/function calling
41
50
  * Override in subclasses to disable tools for specific providers or models
@@ -23,6 +23,10 @@ import { Utilities } from "./modules/Utilities.js";
23
23
  * Tools are integrated as first-class citizens - always available by default
24
24
  */
25
25
  export class BaseProvider {
26
+ // Not `readonly` because providers that auto-discover the model from a
27
+ // /v1/models endpoint (lm-studio, llamacpp) need to update modelName after
28
+ // construction so handlers (TelemetryHandler, MessageBuilder) cache the
29
+ // resolved name. All other providers treat this as effectively readonly.
26
30
  modelName;
27
31
  providerName;
28
32
  defaultTimeout = 30000; // 30 seconds
@@ -43,6 +47,11 @@ export class BaseProvider {
43
47
  this._traceContext = ctx;
44
48
  }
45
49
  // Composition modules - Single Responsibility Principle
50
+ // Handlers below are not `readonly` so that providers which auto-discover
51
+ // their model after construction (lm-studio, llamacpp) can rebuild them
52
+ // via `refreshHandlersForModel(...)` and propagate the resolved name into
53
+ // pricing / telemetry / span attributes. All other providers leave these
54
+ // alone.
46
55
  messageBuilder;
47
56
  streamHandler;
48
57
  generationHandler;
@@ -67,6 +76,25 @@ export class BaseProvider {
67
76
  fixSchemaForOpenAIStrictMode: (schema) => this.fixSchemaForOpenAIStrictMode(schema),
68
77
  });
69
78
  }
79
+ /**
80
+ * Update modelName and rebuild composition handlers with the new value.
81
+ *
82
+ * Auto-discovery providers (lm-studio, llamacpp) call this once they have
83
+ * resolved the loaded model from `/v1/models`. Without this, handlers
84
+ * (TelemetryHandler, MessageBuilder, ...) keep the pre-discovery name and
85
+ * pricing / span / log metadata reports the stale value.
86
+ */
87
+ refreshHandlersForModel(model) {
88
+ this.modelName = model;
89
+ trace
90
+ .getSpan(context.active())
91
+ ?.setAttribute(ATTR.GEN_AI_MODEL, this.modelName);
92
+ this.messageBuilder = new MessageBuilder(this.providerName, this.modelName);
93
+ this.streamHandler = new StreamHandler(this.providerName, this.modelName);
94
+ this.telemetryHandler = new TelemetryHandler(this.providerName, this.modelName, this.neurolink);
95
+ this.generationHandler = new GenerationHandler(this.providerName, this.modelName, () => this.supportsTools(), (options, type) => this.telemetryHandler.getTelemetryConfig(options, type), (toolCalls, toolResults, options, timestamp) => this.handleToolExecutionStorage(toolCalls, toolResults, options, timestamp), () => this.neurolink?.getEventEmitter());
96
+ this.utilities = new Utilities(this.providerName, this.modelName, this.defaultTimeout, this.middlewareOptions);
97
+ }
70
98
  /**
71
99
  * Check if this provider supports tool/function calling
72
100
  * Override in subclasses to disable tools for specific providers or models
@@ -1,6 +1,6 @@
1
1
  import { ProviderFactory } from "./providerFactory.js";
2
2
  import { logger } from "../utils/logger.js";
3
- import { AIProviderName, GoogleAIModels, OpenAIModels, AnthropicModels, VertexModels, MistralModels, OllamaModels, LiteLLMModels, HuggingFaceModels, } from "../constants/enums.js";
3
+ import { AIProviderName, GoogleAIModels, OpenAIModels, AnthropicModels, VertexModels, MistralModels, OllamaModels, LiteLLMModels, HuggingFaceModels, DeepSeekModels, NvidiaNimModels, } from "../constants/enums.js";
4
4
  /**
5
5
  * Provider Registry - registers all providers with the factory
6
6
  * This is where we migrate providers one by one to the new pattern
@@ -128,6 +128,30 @@ export class ProviderRegistry {
128
128
  const { AmazonSageMakerProvider } = await import("../providers/amazonSagemaker.js");
129
129
  return new AmazonSageMakerProvider(modelName, undefined, region, undefined, sagemakerCreds);
130
130
  }, process.env.SAGEMAKER_MODEL || "sagemaker-model", ["sagemaker", "aws-sagemaker"]);
131
+ // Register DeepSeek provider
132
+ ProviderFactory.registerProvider(AIProviderName.DEEPSEEK, async (modelName, _providerName, sdk, _region, credentials) => {
133
+ const deepseekCreds = credentials;
134
+ const { DeepSeekProvider } = await import("../providers/deepseek.js");
135
+ return new DeepSeekProvider(modelName, sdk, undefined, deepseekCreds);
136
+ }, process.env.DEEPSEEK_MODEL || DeepSeekModels.DEEPSEEK_CHAT, ["deepseek", "ds"]);
137
+ // Register NVIDIA NIM provider
138
+ ProviderFactory.registerProvider(AIProviderName.NVIDIA_NIM, async (modelName, _providerName, sdk, _region, credentials) => {
139
+ const nimCreds = credentials;
140
+ const { NvidiaNimProvider } = await import("../providers/nvidiaNim.js");
141
+ return new NvidiaNimProvider(modelName, sdk, undefined, nimCreds);
142
+ }, process.env.NVIDIA_NIM_MODEL || NvidiaNimModels.LLAMA_3_3_70B_INSTRUCT, ["nvidia", "nim", "nvidia-nim"]);
143
+ // Register LM Studio provider (local)
144
+ ProviderFactory.registerProvider(AIProviderName.LM_STUDIO, async (modelName, _providerName, sdk, _region, credentials) => {
145
+ const lmStudioCreds = credentials;
146
+ const { LMStudioProvider } = await import("../providers/lmStudio.js");
147
+ return new LMStudioProvider(modelName, sdk, undefined, lmStudioCreds);
148
+ }, process.env.LM_STUDIO_MODEL || undefined, ["lmstudio", "lm-studio", "lms"]);
149
+ // Register llama.cpp provider (local)
150
+ ProviderFactory.registerProvider(AIProviderName.LLAMACPP, async (modelName, _providerName, sdk, _region, credentials) => {
151
+ const llamaCppCreds = credentials;
152
+ const { LlamaCppProvider } = await import("../providers/llamaCpp.js");
153
+ return new LlamaCppProvider(modelName, sdk, undefined, llamaCppCreds);
154
+ }, process.env.LLAMACPP_MODEL || undefined, ["llamacpp", "llama.cpp", "llama-cpp"]);
131
155
  logger.debug("All providers registered successfully");
132
156
  this.registered = true;
133
157
  // ===== TTS HANDLER REGISTRATION =====
@@ -50,7 +50,19 @@ const PROXY_PROVIDERS = new Set(["litellm", "openrouter"]);
50
50
  */
51
51
  function normalizeVisionProvider(provider) {
52
52
  const lower = provider.toLowerCase();
53
- switch (lower) {
53
+ // Strip non-alpha characters so alias forms (e.g. "lm-studio", "lm_studio",
54
+ // "llama.cpp", "nvidia_nim") all collapse onto a canonical key. Mirrors
55
+ // the alias-normalization pattern used in pricing.ts and contextWindows.ts.
56
+ const stripped = lower.replace(/[^a-z]/g, "");
57
+ switch (stripped) {
58
+ case "lmstudio":
59
+ return "lm-studio";
60
+ case "llamacpp":
61
+ return "llamacpp";
62
+ case "nvidianim":
63
+ return "nvidia-nim";
64
+ case "googleaistudio":
65
+ return "google-ai";
54
66
  case "or":
55
67
  return "openrouter";
56
68
  default:
@@ -436,6 +448,32 @@ const VISION_CAPABILITIES = {
436
448
  "meta-llama-4-maverick-17b-128e-instruct",
437
449
  "meta-llama-4-scout-17b-16e-instruct",
438
450
  ],
451
+ // DeepSeek has no vision support — empty list
452
+ deepseek: [],
453
+ "nvidia-nim": [
454
+ "meta/llama-3.2-90b-vision-instruct",
455
+ "meta/llama-3.2-11b-vision-instruct",
456
+ ],
457
+ // LM Studio + llama.cpp: vision depends on the loaded model.
458
+ // Substrings must point at known multimodal variants only — bare
459
+ // "llama-3.2" matches the text-only Llama-3.2-1B/3B chat models.
460
+ "lm-studio": [
461
+ "llava",
462
+ "llama-3.2-11b-vision",
463
+ "llama-3.2-90b-vision",
464
+ "vision-instruct",
465
+ "qwen2-vl",
466
+ "qwen2.5-vl",
467
+ "phi-3-vision",
468
+ ],
469
+ llamacpp: [
470
+ "llava",
471
+ "llama-3.2-11b-vision",
472
+ "llama-3.2-90b-vision",
473
+ "vision-instruct",
474
+ "qwen2-vl",
475
+ "phi-3-vision",
476
+ ],
439
477
  };
440
478
  /**
441
479
  * Provider Image Adapter - Smart routing and formatting
@@ -535,6 +573,13 @@ export class ProviderImageAdapter {
535
573
  if (!supportedModels) {
536
574
  return false;
537
575
  }
576
+ // An empty list means the provider has NO vision support (e.g. deepseek).
577
+ // Without this guard, the no-model branch below would return `true` for
578
+ // every provider that has an entry in VISION_CAPABILITIES — even an empty
579
+ // one — letting vision requests through to a text-only API.
580
+ if (supportedModels.length === 0) {
581
+ return false;
582
+ }
538
583
  if (!model) {
539
584
  return true; // Provider supports vision, but need to check specific model
540
585
  }
@@ -562,7 +607,12 @@ export class ProviderImageAdapter {
562
607
  * Get all vision-capable providers
563
608
  */
564
609
  static getVisionProviders() {
565
- return Object.keys(VISION_CAPABILITIES);
610
+ // Filter out providers whose allowlist is empty (e.g. deepseek). They're
611
+ // listed in VISION_CAPABILITIES so supportsVision can return false for
612
+ // them, but they should not be advertised as vision-capable.
613
+ return Object.entries(VISION_CAPABILITIES)
614
+ .filter(([, models]) => models.length > 0)
615
+ .map(([provider]) => provider);
566
616
  }
567
617
  /**
568
618
  * Count total "images" in a message (actual images + PDF pages)
@@ -36,9 +36,13 @@ export declare function getContextWindowSize(provider: string, model?: string):
36
36
  /**
37
37
  * Calculate output token reserve for a given context window.
38
38
  *
39
+ * Returns the *real* token count that will be reserved for output so callers
40
+ * (`getAvailableInputTokens`, `BudgetChecker`, conversation-memory pruning, file
41
+ * summarisation) compute input budget against the actual outgoing maxTokens.
42
+ *
39
43
  * @param contextWindow - Total context window size
40
44
  * @param maxTokens - Explicit maxTokens from user config (if set)
41
- * @returns Number of tokens reserved for output
45
+ * @returns Number of tokens reserved for output (matches what's sent upstream)
42
46
  */
43
47
  export declare function getOutputReserve(contextWindow: number, maxTokens?: number): number;
44
48
  /**