@juspay/neurolink 9.59.6 → 9.60.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +11 -7
  3. package/dist/adapters/providerImageAdapter.js +52 -2
  4. package/dist/browser/neurolink.min.js +352 -352
  5. package/dist/cli/commands/proxy.js +54 -11
  6. package/dist/cli/factories/commandFactory.js +15 -1
  7. package/dist/cli/utils/interactiveSetup.js +64 -0
  8. package/dist/constants/contextWindows.d.ts +5 -1
  9. package/dist/constants/contextWindows.js +67 -3
  10. package/dist/constants/enums.d.ts +52 -0
  11. package/dist/constants/enums.js +63 -0
  12. package/dist/core/baseProvider.d.ts +15 -6
  13. package/dist/core/baseProvider.js +28 -0
  14. package/dist/factories/providerRegistry.js +25 -1
  15. package/dist/lib/adapters/providerImageAdapter.js +52 -2
  16. package/dist/lib/constants/contextWindows.d.ts +5 -1
  17. package/dist/lib/constants/contextWindows.js +67 -3
  18. package/dist/lib/constants/enums.d.ts +52 -0
  19. package/dist/lib/constants/enums.js +63 -0
  20. package/dist/lib/core/baseProvider.d.ts +15 -6
  21. package/dist/lib/core/baseProvider.js +28 -0
  22. package/dist/lib/factories/providerRegistry.js +25 -1
  23. package/dist/lib/providers/deepseek.d.ts +29 -0
  24. package/dist/lib/providers/deepseek.js +216 -0
  25. package/dist/lib/providers/index.d.ts +4 -0
  26. package/dist/lib/providers/index.js +4 -0
  27. package/dist/lib/providers/llamaCpp.d.ts +34 -0
  28. package/dist/lib/providers/llamaCpp.js +315 -0
  29. package/dist/lib/providers/lmStudio.d.ts +34 -0
  30. package/dist/lib/providers/lmStudio.js +306 -0
  31. package/dist/lib/providers/nvidiaNim.d.ts +31 -0
  32. package/dist/lib/providers/nvidiaNim.js +354 -0
  33. package/dist/lib/proxy/proxyFetch.d.ts +9 -0
  34. package/dist/lib/proxy/proxyFetch.js +6 -1
  35. package/dist/lib/types/providers.d.ts +37 -2
  36. package/dist/lib/types/providers.js +1 -1
  37. package/dist/lib/utils/modelChoices.js +68 -4
  38. package/dist/lib/utils/pricing.d.ts +5 -0
  39. package/dist/lib/utils/pricing.js +94 -3
  40. package/dist/lib/utils/providerConfig.d.ts +16 -0
  41. package/dist/lib/utils/providerConfig.js +82 -0
  42. package/dist/providers/deepseek.d.ts +29 -0
  43. package/dist/providers/deepseek.js +215 -0
  44. package/dist/providers/index.d.ts +4 -0
  45. package/dist/providers/index.js +4 -0
  46. package/dist/providers/llamaCpp.d.ts +34 -0
  47. package/dist/providers/llamaCpp.js +314 -0
  48. package/dist/providers/lmStudio.d.ts +34 -0
  49. package/dist/providers/lmStudio.js +305 -0
  50. package/dist/providers/nvidiaNim.d.ts +31 -0
  51. package/dist/providers/nvidiaNim.js +353 -0
  52. package/dist/proxy/proxyFetch.d.ts +9 -0
  53. package/dist/proxy/proxyFetch.js +6 -1
  54. package/dist/types/providers.d.ts +37 -2
  55. package/dist/utils/modelChoices.js +68 -4
  56. package/dist/utils/pricing.d.ts +5 -0
  57. package/dist/utils/pricing.js +94 -3
  58. package/dist/utils/providerConfig.d.ts +16 -0
  59. package/dist/utils/providerConfig.js +82 -0
  60. package/package.json +19 -12
@@ -0,0 +1,314 @@
1
+ import { createOpenAI } from "@ai-sdk/openai";
2
+ import { stepCountIs, streamText } from "ai";
3
+ import { BaseProvider } from "../core/baseProvider.js";
4
+ import { DEFAULT_MAX_STEPS } from "../core/constants.js";
5
+ import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
6
+ import { createProxyFetch, maskProxyUrl } from "../proxy/proxyFetch.js";
7
+ import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
8
+ import { logger } from "../utils/logger.js";
9
+ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
10
+ import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
11
+ import { resolveToolChoice } from "../utils/toolChoice.js";
12
+ import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
13
+ const makeLoggingFetch = (provider) => {
14
+ const base = createProxyFetch();
15
+ return (async (input, init) => {
16
+ const url = typeof input === "string"
17
+ ? input
18
+ : input instanceof URL
19
+ ? input.toString()
20
+ : input.url;
21
+ const reqSize = init?.body && typeof init.body === "string" ? init.body.length : 0;
22
+ const response = await base(input, init);
23
+ if (!response.ok) {
24
+ // Mask any embedded credentials / signed query params before logging.
25
+ // Fall back to "<redacted>" rather than the raw URL on a masking miss.
26
+ const safeUrl = maskProxyUrl(url) ?? "<redacted>";
27
+ // Don't log the raw upstream body — it can echo prompt fragments or
28
+ // tool payloads. Log size + status + URL only. Opt into bodies via
29
+ // NEUROLINK_DEBUG_HTTP=1 for local debugging.
30
+ if (process.env.NEUROLINK_DEBUG_HTTP === "1") {
31
+ const clone = response.clone();
32
+ const body = await clone.text().catch(() => "<unreadable>");
33
+ logger.warn(`[${provider}] upstream ${response.status}`, {
34
+ url: safeUrl,
35
+ body: body.slice(0, 800),
36
+ reqSize,
37
+ });
38
+ }
39
+ else {
40
+ logger.warn(`[${provider}] upstream ${response.status} url=${safeUrl} reqSize=${reqSize}`);
41
+ }
42
+ }
43
+ return response;
44
+ });
45
+ };
46
+ const LLAMACPP_DEFAULT_BASE_URL = "http://localhost:8080/v1";
47
+ const LLAMACPP_PLACEHOLDER_KEY = "llamacpp";
48
+ const FALLBACK_MODEL = "loaded-model";
49
+ const getLlamaCppBaseURL = () => {
50
+ return process.env.LLAMACPP_BASE_URL || LLAMACPP_DEFAULT_BASE_URL;
51
+ };
52
+ /**
53
+ * llama.cpp Provider
54
+ * Wraps a llama-server process (https://github.com/ggerganov/llama.cpp) that
55
+ * exposes an OpenAI-compatible API at http://localhost:8080/v1 by default.
56
+ * llama-server hosts ONE model loaded at startup; /v1/models returns just that.
57
+ */
58
+ export class LlamaCppProvider extends BaseProvider {
59
+ model;
60
+ // Caller-supplied model name — never overwritten by discovery, so a
61
+ // FALLBACK_MODEL miss can't poison the explicit-vs-discover branch on
62
+ // subsequent calls.
63
+ requestedModelName;
64
+ baseURL;
65
+ apiKey;
66
+ discoveredModel;
67
+ llamaCppClient;
68
+ constructor(modelName, sdk, _region, credentials) {
69
+ const validatedNeurolink = sdk && typeof sdk === "object" && "getInMemoryServers" in sdk
70
+ ? sdk
71
+ : undefined;
72
+ super(modelName, "llamacpp", validatedNeurolink);
73
+ this.requestedModelName = modelName;
74
+ this.baseURL = credentials?.baseURL ?? getLlamaCppBaseURL();
75
+ // llama-server doesn't authenticate, but the AI SDK's createOpenAI() requires
76
+ // an apiKey. Allow override via credentials/env for users who run llama-server
77
+ // behind an auth-proxying reverse-proxy.
78
+ this.apiKey =
79
+ credentials?.apiKey ??
80
+ process.env.LLAMACPP_API_KEY ??
81
+ LLAMACPP_PLACEHOLDER_KEY;
82
+ this.llamaCppClient = createOpenAI({
83
+ baseURL: this.baseURL,
84
+ apiKey: this.apiKey,
85
+ fetch: makeLoggingFetch("llamacpp"),
86
+ });
87
+ logger.debug("llama.cpp Provider initialized", {
88
+ modelName: this.modelName,
89
+ providerName: this.providerName,
90
+ baseURL: this.baseURL,
91
+ });
92
+ }
93
+ async getAvailableModels(callerSignal) {
94
+ const url = `${this.baseURL.replace(/\/$/, "")}/models`;
95
+ // Use the proxy-aware fetch + bearer auth so users running llama-server
96
+ // behind an auth-proxying reverse-proxy can still discover the model.
97
+ // Compose the caller's request signal (per-request timeout / abort) with
98
+ // a fixed 5s discovery cap so cancellation propagates AND a hung server
99
+ // can't stall provider initialization.
100
+ const proxyFetch = createProxyFetch();
101
+ const discoveryTimeout = AbortSignal.timeout(5000);
102
+ const composedSignal = callerSignal
103
+ ? AbortSignal.any([callerSignal, discoveryTimeout])
104
+ : discoveryTimeout;
105
+ const response = await proxyFetch(url, {
106
+ headers: this.apiKey && this.apiKey !== LLAMACPP_PLACEHOLDER_KEY
107
+ ? { Authorization: `Bearer ${this.apiKey}` }
108
+ : undefined,
109
+ signal: composedSignal,
110
+ });
111
+ if (!response.ok) {
112
+ throw new Error(`llama-server /v1/models returned ${response.status}: ${response.statusText}`);
113
+ }
114
+ const data = (await response.json());
115
+ return data.data.map((m) => m.id);
116
+ }
117
+ async getAISDKModel(signal) {
118
+ if (this.model) {
119
+ return this.model;
120
+ }
121
+ let modelToUse;
122
+ let discoverySucceeded = false;
123
+ // Use requestedModelName, not this.modelName — refreshHandlersForModel()
124
+ // mutates this.modelName, so on a retry after a discovery miss the
125
+ // FALLBACK_MODEL would look like an explicit user choice. See lmStudio.ts.
126
+ const explicit = this.requestedModelName;
127
+ if (explicit && explicit.trim() !== "") {
128
+ modelToUse = explicit;
129
+ discoverySucceeded = true; // explicit user choice — treat as success
130
+ }
131
+ else {
132
+ try {
133
+ const models = await this.getAvailableModels(signal);
134
+ if (models.length > 0) {
135
+ this.discoveredModel = models[0];
136
+ modelToUse = this.discoveredModel;
137
+ discoverySucceeded = true;
138
+ logger.info(`llama.cpp loaded model: ${modelToUse}`);
139
+ }
140
+ else {
141
+ modelToUse = FALLBACK_MODEL;
142
+ }
143
+ }
144
+ catch (error) {
145
+ logger.warn(`llama.cpp model discovery failed: ${error instanceof Error ? error.message : String(error)}`);
146
+ modelToUse = FALLBACK_MODEL;
147
+ }
148
+ }
149
+ // Persist resolved model on the instance and rebuild the composed
150
+ // handlers (TelemetryHandler, MessageBuilder, etc.) so pricing /
151
+ // telemetry / span attributes report the discovered model name. Plain
152
+ // assignment to `this.modelName` is not enough — handlers cached the
153
+ // pre-discovery value at construction time.
154
+ this.refreshHandlersForModel(modelToUse);
155
+ // .chat() — llama-server exposes /v1/chat/completions, not /v1/responses
156
+ const resolvedModel = this.llamaCppClient.chat(modelToUse);
157
+ // Only memoize on success — see lmStudio.ts for the same rationale: a
158
+ // discovery miss should let the next call retry instead of being stuck
159
+ // on FALLBACK_MODEL until the provider instance is recreated.
160
+ if (discoverySucceeded) {
161
+ this.model = resolvedModel;
162
+ }
163
+ return resolvedModel;
164
+ }
165
+ async executeStream(options, _analysisSchema) {
166
+ // Resolve the llama.cpp model BEFORE opening the span so OTEL
167
+ // attributes, MessageBuilder, and downstream image/tool adapters all see
168
+ // the discovered model id rather than the empty pre-discovery placeholder.
169
+ // Pass the caller's abort signal so user cancellation / per-request
170
+ // timeouts are honored during the discovery probe.
171
+ await this.getAISDKModel(options.abortSignal);
172
+ return withClientSpan({
173
+ name: "neurolink.provider.stream",
174
+ tracer: tracers.provider,
175
+ attributes: {
176
+ [ATTR.GEN_AI_SYSTEM]: "llamacpp",
177
+ [ATTR.GEN_AI_MODEL]: this.modelName || this.discoveredModel || FALLBACK_MODEL,
178
+ [ATTR.GEN_AI_OPERATION]: "stream",
179
+ [ATTR.NL_STREAM_MODE]: true,
180
+ },
181
+ }, async () => this.executeStreamInner(options));
182
+ }
183
+ async executeStreamInner(options) {
184
+ this.validateStreamOptions(options);
185
+ const startTime = Date.now();
186
+ const timeout = this.getTimeout(options);
187
+ const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
188
+ try {
189
+ const shouldUseTools = !options.disableTools && this.supportsTools();
190
+ const tools = shouldUseTools
191
+ ? options.tools || (await this.getAllTools())
192
+ : {};
193
+ // Resolve the AI SDK model BEFORE building messages so message/image
194
+ // adapters see the same handlers/model that streamText will use. See
195
+ // lmStudio.ts for the same rationale.
196
+ const model = await this.getAISDKModelWithMiddleware(options);
197
+ const messages = await this.buildMessagesForStream(options);
198
+ const result = await streamText({
199
+ model,
200
+ messages,
201
+ temperature: options.temperature,
202
+ maxOutputTokens: options.maxTokens,
203
+ tools,
204
+ stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
205
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
206
+ abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
207
+ experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
208
+ experimental_repairToolCall: this.getToolCallRepairFn(options),
209
+ onStepFinish: ({ toolCalls, toolResults }) => {
210
+ emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
211
+ this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
212
+ logger.warn("[LlamaCppProvider] Failed to store tool executions", {
213
+ provider: this.providerName,
214
+ error: error instanceof Error ? error.message : String(error),
215
+ });
216
+ });
217
+ },
218
+ });
219
+ timeoutController?.cleanup();
220
+ const transformedStream = this.createTextStream(result);
221
+ const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName || this.discoveredModel || FALLBACK_MODEL, toAnalyticsStreamResult(result), Date.now() - startTime, {
222
+ requestId: `llamacpp-stream-${Date.now()}`,
223
+ streamingMode: true,
224
+ });
225
+ return {
226
+ stream: transformedStream,
227
+ provider: this.providerName,
228
+ model: this.modelName || this.discoveredModel || FALLBACK_MODEL,
229
+ analytics: analyticsPromise,
230
+ metadata: { startTime, streamId: `llamacpp-${Date.now()}` },
231
+ };
232
+ }
233
+ catch (error) {
234
+ timeoutController?.cleanup();
235
+ throw this.handleProviderError(error);
236
+ }
237
+ }
238
+ getProviderName() {
239
+ return this.providerName;
240
+ }
241
+ getDefaultModel() {
242
+ return process.env.LLAMACPP_MODEL || "";
243
+ }
244
+ formatProviderError(error) {
245
+ if (error instanceof TimeoutError) {
246
+ return new Error(`llama.cpp request timed out: ${error.message}`);
247
+ }
248
+ const errorRecord = error;
249
+ const message = typeof errorRecord?.message === "string"
250
+ ? errorRecord.message
251
+ : "Unknown error";
252
+ const cause = errorRecord?.cause ?? {};
253
+ const code = (errorRecord?.code ?? cause?.code);
254
+ if (code === "ECONNREFUSED" ||
255
+ message.includes("ECONNREFUSED") ||
256
+ message.includes("Failed to fetch") ||
257
+ message.includes("fetch failed")) {
258
+ return new Error(`llama.cpp server not reachable at ${this.baseURL}. ` +
259
+ "Start it with: ./llama-server -m model.gguf --port 8080");
260
+ }
261
+ if (message.includes("400")) {
262
+ return new Error("llama.cpp rejected the request. Common cause: model doesn't support tools (start llama-server with --jinja for tool support).");
263
+ }
264
+ return new Error(`llama.cpp error: ${message}`);
265
+ }
266
+ async validateConfiguration() {
267
+ // Retry up to 3x with 500ms backoff. llama-server can be briefly unresponsive
268
+ // under load (CPU inference saturates the event loop). Use the proxy-aware
269
+ // fetch + bearer auth header so reverse-proxied setups still validate.
270
+ const healthURL = this.baseURL.replace(/\/v1\/?$/, "/health");
271
+ const modelsURL = `${this.baseURL.replace(/\/$/, "")}/models`;
272
+ const proxyFetch = createProxyFetch();
273
+ const headers = this.apiKey && this.apiKey !== LLAMACPP_PLACEHOLDER_KEY
274
+ ? { Authorization: `Bearer ${this.apiKey}` }
275
+ : undefined;
276
+ for (let attempt = 0; attempt < 3; attempt++) {
277
+ try {
278
+ const r = await proxyFetch(healthURL, {
279
+ headers,
280
+ signal: AbortSignal.timeout(2000),
281
+ });
282
+ if (r.ok) {
283
+ return true;
284
+ }
285
+ }
286
+ catch {
287
+ /* fall through */
288
+ }
289
+ try {
290
+ const r2 = await proxyFetch(modelsURL, {
291
+ headers,
292
+ signal: AbortSignal.timeout(2000),
293
+ });
294
+ if (r2.ok) {
295
+ return true;
296
+ }
297
+ }
298
+ catch {
299
+ /* fall through */
300
+ }
301
+ await new Promise((resolve) => setTimeout(resolve, 500));
302
+ }
303
+ return false;
304
+ }
305
+ getConfiguration() {
306
+ return {
307
+ provider: this.providerName,
308
+ model: this.modelName || this.discoveredModel || FALLBACK_MODEL,
309
+ defaultModel: this.getDefaultModel(),
310
+ baseURL: this.baseURL,
311
+ };
312
+ }
313
+ }
314
+ export default LlamaCppProvider;
@@ -0,0 +1,34 @@
1
+ import { type LanguageModel } from "ai";
2
+ import type { AIProviderName } from "../constants/enums.js";
3
+ import { BaseProvider } from "../core/baseProvider.js";
4
+ import type { NeurolinkCredentials, StreamOptions, StreamResult, ValidationSchema } from "../types/index.js";
5
+ /**
6
+ * LM Studio Provider
7
+ * Wraps the LM Studio local server (https://lmstudio.ai/) which exposes an
8
+ * OpenAI-compatible API at http://localhost:1234/v1 by default.
9
+ * Auto-discovers the loaded model via /v1/models if no model specified.
10
+ */
11
+ export declare class LMStudioProvider extends BaseProvider {
12
+ private model?;
13
+ private readonly requestedModelName?;
14
+ private baseURL;
15
+ private apiKey;
16
+ private discoveredModel?;
17
+ private lmstudioClient;
18
+ constructor(modelName?: string, sdk?: unknown, _region?: string, credentials?: NeurolinkCredentials["lmStudio"]);
19
+ private getAvailableModels;
20
+ protected getAISDKModel(signal?: AbortSignal): Promise<LanguageModel>;
21
+ protected executeStream(options: StreamOptions, _analysisSchema?: ValidationSchema): Promise<StreamResult>;
22
+ private executeStreamInner;
23
+ protected getProviderName(): AIProviderName;
24
+ protected getDefaultModel(): string;
25
+ protected formatProviderError(error: unknown): Error;
26
+ validateConfiguration(): Promise<boolean>;
27
+ getConfiguration(): {
28
+ provider: AIProviderName;
29
+ model: string;
30
+ defaultModel: string;
31
+ baseURL: string;
32
+ };
33
+ }
34
+ export default LMStudioProvider;
@@ -0,0 +1,305 @@
1
+ import { createOpenAI } from "@ai-sdk/openai";
2
+ import { stepCountIs, streamText } from "ai";
3
+ import { BaseProvider } from "../core/baseProvider.js";
4
+ import { DEFAULT_MAX_STEPS } from "../core/constants.js";
5
+ import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
6
+ import { createProxyFetch, maskProxyUrl } from "../proxy/proxyFetch.js";
7
+ import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
8
+ import { logger } from "../utils/logger.js";
9
+ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
10
+ import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
11
+ import { resolveToolChoice } from "../utils/toolChoice.js";
12
+ import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
13
+ const makeLoggingFetch = (provider) => {
14
+ const base = createProxyFetch();
15
+ return (async (input, init) => {
16
+ const url = typeof input === "string"
17
+ ? input
18
+ : input instanceof URL
19
+ ? input.toString()
20
+ : input.url;
21
+ const reqSize = init?.body && typeof init.body === "string" ? init.body.length : 0;
22
+ const response = await base(input, init);
23
+ if (!response.ok) {
24
+ // Mask any embedded credentials / signed query params before logging.
25
+ // Fall back to "<redacted>" rather than the raw URL on a masking miss —
26
+ // logging the unsanitized form would defeat the redaction.
27
+ const safeUrl = maskProxyUrl(url) ?? "<redacted>";
28
+ // Don't log the raw upstream body or request payload — they can contain
29
+ // user prompts, tool arguments, and other sensitive data. Log size +
30
+ // status + URL only. Set NEUROLINK_DEBUG_HTTP=1 to opt into raw bodies.
31
+ if (process.env.NEUROLINK_DEBUG_HTTP === "1") {
32
+ const clone = response.clone();
33
+ const body = await clone.text().catch(() => "<unreadable>");
34
+ logger.warn(`[${provider}] upstream ${response.status}`, {
35
+ url: safeUrl,
36
+ body: body.slice(0, 400),
37
+ reqSize,
38
+ });
39
+ }
40
+ else {
41
+ logger.warn(`[${provider}] upstream ${response.status} url=${safeUrl} reqSize=${reqSize}`);
42
+ }
43
+ }
44
+ return response;
45
+ });
46
+ };
47
+ const LM_STUDIO_DEFAULT_BASE_URL = "http://localhost:1234/v1";
48
+ const LM_STUDIO_PLACEHOLDER_KEY = "lm-studio";
49
+ const FALLBACK_MODEL = "local-model";
50
+ const getLmStudioBaseURL = () => {
51
+ return process.env.LM_STUDIO_BASE_URL || LM_STUDIO_DEFAULT_BASE_URL;
52
+ };
53
+ /**
54
+ * LM Studio Provider
55
+ * Wraps the LM Studio local server (https://lmstudio.ai/) which exposes an
56
+ * OpenAI-compatible API at http://localhost:1234/v1 by default.
57
+ * Auto-discovers the loaded model via /v1/models if no model specified.
58
+ */
59
+ export class LMStudioProvider extends BaseProvider {
60
+ model;
61
+ // The model name passed by the caller — never overwritten by auto-discovery,
62
+ // so a discovery-miss FALLBACK_MODEL never poisons the next call's branch
63
+ // through `if (explicit && explicit.trim() !== "")`.
64
+ requestedModelName;
65
+ baseURL;
66
+ apiKey;
67
+ discoveredModel;
68
+ lmstudioClient;
69
+ constructor(modelName, sdk, _region, credentials) {
70
+ const validatedNeurolink = sdk && typeof sdk === "object" && "getInMemoryServers" in sdk
71
+ ? sdk
72
+ : undefined;
73
+ super(modelName, "lm-studio", validatedNeurolink);
74
+ this.requestedModelName = modelName;
75
+ this.baseURL = credentials?.baseURL ?? getLmStudioBaseURL();
76
+ // LM Studio's local server doesn't authenticate, but the AI SDK's
77
+ // createOpenAI() requires an apiKey. Allow override via credentials/env
78
+ // for users who run LM Studio behind an auth-proxying reverse-proxy.
79
+ this.apiKey =
80
+ credentials?.apiKey ??
81
+ process.env.LM_STUDIO_API_KEY ??
82
+ LM_STUDIO_PLACEHOLDER_KEY;
83
+ this.lmstudioClient = createOpenAI({
84
+ baseURL: this.baseURL,
85
+ apiKey: this.apiKey,
86
+ fetch: makeLoggingFetch("lm-studio"),
87
+ });
88
+ logger.debug("LM Studio Provider initialized", {
89
+ modelName: this.modelName,
90
+ providerName: this.providerName,
91
+ baseURL: this.baseURL,
92
+ });
93
+ }
94
+ async getAvailableModels(callerSignal) {
95
+ const url = `${this.baseURL.replace(/\/$/, "")}/models`;
96
+ // Use the proxy-aware fetch + bearer auth header so users running LM
97
+ // Studio behind an auth-proxying reverse-proxy can still discover models.
98
+ // Compose the caller's request signal (per-request timeout / abort) with
99
+ // a fixed 5s discovery cap so cancellation propagates AND a hung server
100
+ // can't stall provider initialization.
101
+ const proxyFetch = createProxyFetch();
102
+ const discoveryTimeout = AbortSignal.timeout(5000);
103
+ const composedSignal = callerSignal
104
+ ? AbortSignal.any([callerSignal, discoveryTimeout])
105
+ : discoveryTimeout;
106
+ const response = await proxyFetch(url, {
107
+ headers: this.apiKey && this.apiKey !== LM_STUDIO_PLACEHOLDER_KEY
108
+ ? { Authorization: `Bearer ${this.apiKey}` }
109
+ : undefined,
110
+ signal: composedSignal,
111
+ });
112
+ if (!response.ok) {
113
+ throw new Error(`LM Studio /v1/models returned ${response.status}: ${response.statusText}`);
114
+ }
115
+ const data = (await response.json());
116
+ return data.data.map((m) => m.id);
117
+ }
118
+ async getAISDKModel(signal) {
119
+ if (this.model) {
120
+ return this.model;
121
+ }
122
+ let modelToUse;
123
+ let discoverySucceeded = false;
124
+ // Use requestedModelName, not this.modelName — refreshHandlersForModel()
125
+ // mutates this.modelName, so on a retry after a discovery miss the
126
+ // FALLBACK_MODEL would look like an explicit user choice and we'd never
127
+ // re-attempt /v1/models. The constructor-captured name preserves intent.
128
+ const explicit = this.requestedModelName;
129
+ if (explicit && explicit.trim() !== "") {
130
+ modelToUse = explicit;
131
+ discoverySucceeded = true; // explicit user choice — treat as success
132
+ }
133
+ else {
134
+ try {
135
+ const models = await this.getAvailableModels(signal);
136
+ if (models.length > 0) {
137
+ this.discoveredModel = models[0];
138
+ modelToUse = this.discoveredModel;
139
+ discoverySucceeded = true;
140
+ logger.info(`LM Studio auto-discovered model: ${modelToUse} (${models.length} loaded)`);
141
+ }
142
+ else {
143
+ modelToUse = FALLBACK_MODEL;
144
+ logger.warn("LM Studio /v1/models returned no models. Load a model in the LM Studio app.");
145
+ }
146
+ }
147
+ catch (error) {
148
+ logger.warn(`LM Studio model auto-discovery failed: ${error instanceof Error ? error.message : String(error)}`);
149
+ modelToUse = FALLBACK_MODEL;
150
+ }
151
+ }
152
+ // Persist resolved model on the instance and rebuild the composed
153
+ // handlers (TelemetryHandler, MessageBuilder, etc.) so pricing /
154
+ // telemetry / span attributes report the discovered model name. Plain
155
+ // assignment to `this.modelName` is not enough — handlers cached the
156
+ // pre-discovery value at construction time.
157
+ this.refreshHandlersForModel(modelToUse);
158
+ // .chat() — LM Studio exposes /v1/chat/completions, not /v1/responses
159
+ const resolvedModel = this.lmstudioClient.chat(modelToUse);
160
+ // Only memoize on actual success. After a discovery miss (server down,
161
+ // empty /v1/models, /models 5xx), starting LM Studio or loading a model
162
+ // should let the next call re-attempt discovery instead of being stuck
163
+ // on FALLBACK_MODEL for the lifetime of this provider instance.
164
+ if (discoverySucceeded) {
165
+ this.model = resolvedModel;
166
+ }
167
+ return resolvedModel;
168
+ }
169
+ async executeStream(options, _analysisSchema) {
170
+ // Resolve the LM Studio model BEFORE opening the span so OTEL
171
+ // attributes, MessageBuilder, and downstream image/tool adapters all see
172
+ // the discovered model id rather than the empty pre-discovery placeholder.
173
+ // Pass the caller's abort signal so user cancellation / per-request
174
+ // timeouts are honored during the discovery probe (not just after it).
175
+ await this.getAISDKModel(options.abortSignal);
176
+ return withClientSpan({
177
+ name: "neurolink.provider.stream",
178
+ tracer: tracers.provider,
179
+ attributes: {
180
+ [ATTR.GEN_AI_SYSTEM]: "lm-studio",
181
+ [ATTR.GEN_AI_MODEL]: this.modelName || this.discoveredModel || FALLBACK_MODEL,
182
+ [ATTR.GEN_AI_OPERATION]: "stream",
183
+ [ATTR.NL_STREAM_MODE]: true,
184
+ },
185
+ }, async () => this.executeStreamInner(options));
186
+ }
187
+ async executeStreamInner(options) {
188
+ this.validateStreamOptions(options);
189
+ const startTime = Date.now();
190
+ const timeout = this.getTimeout(options);
191
+ const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
192
+ try {
193
+ const shouldUseTools = !options.disableTools && this.supportsTools();
194
+ const tools = shouldUseTools
195
+ ? options.tools || (await this.getAllTools())
196
+ : {};
197
+ // Resolve the AI SDK model BEFORE building messages so message/image
198
+ // adapters see the same handlers/model that streamText will use. Without
199
+ // this, a fallback warm-up + late-server-start pattern could build
200
+ // messages under FALLBACK_MODEL handlers and stream under a different
201
+ // discovered model — and pay an extra `/v1/models` probe each time.
202
+ const model = await this.getAISDKModelWithMiddleware(options);
203
+ const messages = await this.buildMessagesForStream(options);
204
+ const result = await streamText({
205
+ model,
206
+ messages,
207
+ temperature: options.temperature,
208
+ maxOutputTokens: options.maxTokens,
209
+ tools,
210
+ stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
211
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
212
+ abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
213
+ experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
214
+ experimental_repairToolCall: this.getToolCallRepairFn(options),
215
+ onStepFinish: ({ toolCalls, toolResults }) => {
216
+ emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
217
+ this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
218
+ logger.warn("[LMStudioProvider] Failed to store tool executions", {
219
+ provider: this.providerName,
220
+ error: error instanceof Error ? error.message : String(error),
221
+ });
222
+ });
223
+ },
224
+ });
225
+ timeoutController?.cleanup();
226
+ const transformedStream = this.createTextStream(result);
227
+ const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName || this.discoveredModel || FALLBACK_MODEL, toAnalyticsStreamResult(result), Date.now() - startTime, {
228
+ requestId: `lmstudio-stream-${Date.now()}`,
229
+ streamingMode: true,
230
+ });
231
+ return {
232
+ stream: transformedStream,
233
+ provider: this.providerName,
234
+ model: this.modelName || this.discoveredModel || FALLBACK_MODEL,
235
+ analytics: analyticsPromise,
236
+ metadata: { startTime, streamId: `lmstudio-${Date.now()}` },
237
+ };
238
+ }
239
+ catch (error) {
240
+ timeoutController?.cleanup();
241
+ throw this.handleProviderError(error);
242
+ }
243
+ }
244
+ getProviderName() {
245
+ return this.providerName;
246
+ }
247
+ getDefaultModel() {
248
+ return process.env.LM_STUDIO_MODEL || "";
249
+ }
250
+ formatProviderError(error) {
251
+ if (error instanceof TimeoutError) {
252
+ return new Error(`LM Studio request timed out: ${error.message}`);
253
+ }
254
+ const errorRecord = error;
255
+ const message = typeof errorRecord?.message === "string"
256
+ ? errorRecord.message
257
+ : "Unknown error";
258
+ const cause = errorRecord?.cause ?? {};
259
+ const code = (errorRecord?.code ?? cause?.code);
260
+ if (code === "ECONNREFUSED" ||
261
+ message.includes("ECONNREFUSED") ||
262
+ message.includes("Failed to fetch") ||
263
+ message.includes("fetch failed")) {
264
+ return new Error(`LM Studio server not reachable at ${this.baseURL}. ` +
265
+ `Open the LM Studio app, load a model, and click "Start Server".`);
266
+ }
267
+ if (message.includes("model_not_found") || message.includes("404")) {
268
+ return new Error(`LM Studio model '${this.modelName}' is not loaded. Load it in the LM Studio app first.`);
269
+ }
270
+ return new Error(`LM Studio error: ${message}`);
271
+ }
272
+ async validateConfiguration() {
273
+ try {
274
+ const url = `${this.baseURL.replace(/\/$/, "")}/models`;
275
+ const proxyFetch = createProxyFetch();
276
+ const r = await proxyFetch(url, {
277
+ headers: this.apiKey && this.apiKey !== LM_STUDIO_PLACEHOLDER_KEY
278
+ ? { Authorization: `Bearer ${this.apiKey}` }
279
+ : undefined,
280
+ signal: AbortSignal.timeout(5000),
281
+ });
282
+ if (!r.ok) {
283
+ return false;
284
+ }
285
+ // A 200 with an empty data array means LM Studio is up but no model is
286
+ // loaded — `getAISDKModel()` will fall back to FALLBACK_MODEL and the
287
+ // first real request will fail. Require at least one loaded model so
288
+ // health checks honestly reflect whether the provider is usable.
289
+ const data = (await r.json().catch(() => null));
290
+ return Boolean(data?.data?.some((m) => typeof m?.id === "string" && m.id.trim().length > 0));
291
+ }
292
+ catch {
293
+ return false;
294
+ }
295
+ }
296
+ getConfiguration() {
297
+ return {
298
+ provider: this.providerName,
299
+ model: this.modelName || this.discoveredModel || FALLBACK_MODEL,
300
+ defaultModel: this.getDefaultModel(),
301
+ baseURL: this.baseURL,
302
+ };
303
+ }
304
+ }
305
+ export default LMStudioProvider;