@juspay/neurolink 9.59.6 → 9.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +11 -7
- package/dist/adapters/providerImageAdapter.js +52 -2
- package/dist/browser/neurolink.min.js +352 -352
- package/dist/cli/factories/commandFactory.js +15 -1
- package/dist/cli/utils/interactiveSetup.js +64 -0
- package/dist/constants/contextWindows.d.ts +5 -1
- package/dist/constants/contextWindows.js +67 -3
- package/dist/constants/enums.d.ts +52 -0
- package/dist/constants/enums.js +63 -0
- package/dist/core/baseProvider.d.ts +15 -6
- package/dist/core/baseProvider.js +28 -0
- package/dist/factories/providerRegistry.js +25 -1
- package/dist/lib/adapters/providerImageAdapter.js +52 -2
- package/dist/lib/constants/contextWindows.d.ts +5 -1
- package/dist/lib/constants/contextWindows.js +67 -3
- package/dist/lib/constants/enums.d.ts +52 -0
- package/dist/lib/constants/enums.js +63 -0
- package/dist/lib/core/baseProvider.d.ts +15 -6
- package/dist/lib/core/baseProvider.js +28 -0
- package/dist/lib/factories/providerRegistry.js +25 -1
- package/dist/lib/providers/deepseek.d.ts +29 -0
- package/dist/lib/providers/deepseek.js +216 -0
- package/dist/lib/providers/index.d.ts +4 -0
- package/dist/lib/providers/index.js +4 -0
- package/dist/lib/providers/llamaCpp.d.ts +34 -0
- package/dist/lib/providers/llamaCpp.js +315 -0
- package/dist/lib/providers/lmStudio.d.ts +34 -0
- package/dist/lib/providers/lmStudio.js +306 -0
- package/dist/lib/providers/nvidiaNim.d.ts +31 -0
- package/dist/lib/providers/nvidiaNim.js +354 -0
- package/dist/lib/proxy/proxyFetch.d.ts +9 -0
- package/dist/lib/proxy/proxyFetch.js +6 -1
- package/dist/lib/types/providers.d.ts +37 -2
- package/dist/lib/types/providers.js +1 -1
- package/dist/lib/utils/modelChoices.js +68 -4
- package/dist/lib/utils/pricing.d.ts +5 -0
- package/dist/lib/utils/pricing.js +94 -3
- package/dist/lib/utils/providerConfig.d.ts +16 -0
- package/dist/lib/utils/providerConfig.js +82 -0
- package/dist/providers/deepseek.d.ts +29 -0
- package/dist/providers/deepseek.js +215 -0
- package/dist/providers/index.d.ts +4 -0
- package/dist/providers/index.js +4 -0
- package/dist/providers/llamaCpp.d.ts +34 -0
- package/dist/providers/llamaCpp.js +314 -0
- package/dist/providers/lmStudio.d.ts +34 -0
- package/dist/providers/lmStudio.js +305 -0
- package/dist/providers/nvidiaNim.d.ts +31 -0
- package/dist/providers/nvidiaNim.js +353 -0
- package/dist/proxy/proxyFetch.d.ts +9 -0
- package/dist/proxy/proxyFetch.js +6 -1
- package/dist/types/providers.d.ts +37 -2
- package/dist/utils/modelChoices.js +68 -4
- package/dist/utils/pricing.d.ts +5 -0
- package/dist/utils/pricing.js +94 -3
- package/dist/utils/providerConfig.d.ts +16 -0
- package/dist/utils/providerConfig.js +82 -0
- package/package.json +19 -12
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { type LanguageModel } from "ai";
|
|
2
|
+
import type { AIProviderName } from "../constants/enums.js";
|
|
3
|
+
import { BaseProvider } from "../core/baseProvider.js";
|
|
4
|
+
import type { NeurolinkCredentials, StreamOptions, StreamResult, ValidationSchema } from "../types/index.js";
|
|
5
|
+
/**
|
|
6
|
+
* llama.cpp Provider
|
|
7
|
+
* Wraps a llama-server process (https://github.com/ggerganov/llama.cpp) that
|
|
8
|
+
* exposes an OpenAI-compatible API at http://localhost:8080/v1 by default.
|
|
9
|
+
* llama-server hosts ONE model loaded at startup; /v1/models returns just that.
|
|
10
|
+
*/
|
|
11
|
+
export declare class LlamaCppProvider extends BaseProvider {
|
|
12
|
+
private model?;
|
|
13
|
+
private readonly requestedModelName?;
|
|
14
|
+
private baseURL;
|
|
15
|
+
private apiKey;
|
|
16
|
+
private discoveredModel?;
|
|
17
|
+
private llamaCppClient;
|
|
18
|
+
constructor(modelName?: string, sdk?: unknown, _region?: string, credentials?: NeurolinkCredentials["llamacpp"]);
|
|
19
|
+
private getAvailableModels;
|
|
20
|
+
protected getAISDKModel(signal?: AbortSignal): Promise<LanguageModel>;
|
|
21
|
+
protected executeStream(options: StreamOptions, _analysisSchema?: ValidationSchema): Promise<StreamResult>;
|
|
22
|
+
private executeStreamInner;
|
|
23
|
+
protected getProviderName(): AIProviderName;
|
|
24
|
+
protected getDefaultModel(): string;
|
|
25
|
+
protected formatProviderError(error: unknown): Error;
|
|
26
|
+
validateConfiguration(): Promise<boolean>;
|
|
27
|
+
getConfiguration(): {
|
|
28
|
+
provider: AIProviderName;
|
|
29
|
+
model: string;
|
|
30
|
+
defaultModel: string;
|
|
31
|
+
baseURL: string;
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
export default LlamaCppProvider;
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
|
+
import { stepCountIs, streamText } from "ai";
|
|
3
|
+
import { BaseProvider } from "../core/baseProvider.js";
|
|
4
|
+
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
5
|
+
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
6
|
+
import { createProxyFetch, maskProxyUrl } from "../proxy/proxyFetch.js";
|
|
7
|
+
import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
|
|
8
|
+
import { logger } from "../utils/logger.js";
|
|
9
|
+
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
10
|
+
import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
|
|
11
|
+
import { resolveToolChoice } from "../utils/toolChoice.js";
|
|
12
|
+
import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
|
|
13
|
+
const makeLoggingFetch = (provider) => {
|
|
14
|
+
const base = createProxyFetch();
|
|
15
|
+
return (async (input, init) => {
|
|
16
|
+
const url = typeof input === "string"
|
|
17
|
+
? input
|
|
18
|
+
: input instanceof URL
|
|
19
|
+
? input.toString()
|
|
20
|
+
: input.url;
|
|
21
|
+
const reqSize = init?.body && typeof init.body === "string" ? init.body.length : 0;
|
|
22
|
+
const response = await base(input, init);
|
|
23
|
+
if (!response.ok) {
|
|
24
|
+
// Mask any embedded credentials / signed query params before logging.
|
|
25
|
+
// Fall back to "<redacted>" rather than the raw URL on a masking miss.
|
|
26
|
+
const safeUrl = maskProxyUrl(url) ?? "<redacted>";
|
|
27
|
+
// Don't log the raw upstream body — it can echo prompt fragments or
|
|
28
|
+
// tool payloads. Log size + status + URL only. Opt into bodies via
|
|
29
|
+
// NEUROLINK_DEBUG_HTTP=1 for local debugging.
|
|
30
|
+
if (process.env.NEUROLINK_DEBUG_HTTP === "1") {
|
|
31
|
+
const clone = response.clone();
|
|
32
|
+
const body = await clone.text().catch(() => "<unreadable>");
|
|
33
|
+
logger.warn(`[${provider}] upstream ${response.status}`, {
|
|
34
|
+
url: safeUrl,
|
|
35
|
+
body: body.slice(0, 800),
|
|
36
|
+
reqSize,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
logger.warn(`[${provider}] upstream ${response.status} url=${safeUrl} reqSize=${reqSize}`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return response;
|
|
44
|
+
});
|
|
45
|
+
};
|
|
46
|
+
const LLAMACPP_DEFAULT_BASE_URL = "http://localhost:8080/v1";
|
|
47
|
+
const LLAMACPP_PLACEHOLDER_KEY = "llamacpp";
|
|
48
|
+
const FALLBACK_MODEL = "loaded-model";
|
|
49
|
+
const getLlamaCppBaseURL = () => {
|
|
50
|
+
return process.env.LLAMACPP_BASE_URL || LLAMACPP_DEFAULT_BASE_URL;
|
|
51
|
+
};
|
|
52
|
+
/**
|
|
53
|
+
* llama.cpp Provider
|
|
54
|
+
* Wraps a llama-server process (https://github.com/ggerganov/llama.cpp) that
|
|
55
|
+
* exposes an OpenAI-compatible API at http://localhost:8080/v1 by default.
|
|
56
|
+
* llama-server hosts ONE model loaded at startup; /v1/models returns just that.
|
|
57
|
+
*/
|
|
58
|
+
export class LlamaCppProvider extends BaseProvider {
|
|
59
|
+
model;
|
|
60
|
+
// Caller-supplied model name — never overwritten by discovery, so a
|
|
61
|
+
// FALLBACK_MODEL miss can't poison the explicit-vs-discover branch on
|
|
62
|
+
// subsequent calls.
|
|
63
|
+
requestedModelName;
|
|
64
|
+
baseURL;
|
|
65
|
+
apiKey;
|
|
66
|
+
discoveredModel;
|
|
67
|
+
llamaCppClient;
|
|
68
|
+
constructor(modelName, sdk, _region, credentials) {
|
|
69
|
+
const validatedNeurolink = sdk && typeof sdk === "object" && "getInMemoryServers" in sdk
|
|
70
|
+
? sdk
|
|
71
|
+
: undefined;
|
|
72
|
+
super(modelName, "llamacpp", validatedNeurolink);
|
|
73
|
+
this.requestedModelName = modelName;
|
|
74
|
+
this.baseURL = credentials?.baseURL ?? getLlamaCppBaseURL();
|
|
75
|
+
// llama-server doesn't authenticate, but the AI SDK's createOpenAI() requires
|
|
76
|
+
// an apiKey. Allow override via credentials/env for users who run llama-server
|
|
77
|
+
// behind an auth-proxying reverse-proxy.
|
|
78
|
+
this.apiKey =
|
|
79
|
+
credentials?.apiKey ??
|
|
80
|
+
process.env.LLAMACPP_API_KEY ??
|
|
81
|
+
LLAMACPP_PLACEHOLDER_KEY;
|
|
82
|
+
this.llamaCppClient = createOpenAI({
|
|
83
|
+
baseURL: this.baseURL,
|
|
84
|
+
apiKey: this.apiKey,
|
|
85
|
+
fetch: makeLoggingFetch("llamacpp"),
|
|
86
|
+
});
|
|
87
|
+
logger.debug("llama.cpp Provider initialized", {
|
|
88
|
+
modelName: this.modelName,
|
|
89
|
+
providerName: this.providerName,
|
|
90
|
+
baseURL: this.baseURL,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
async getAvailableModels(callerSignal) {
|
|
94
|
+
const url = `${this.baseURL.replace(/\/$/, "")}/models`;
|
|
95
|
+
// Use the proxy-aware fetch + bearer auth so users running llama-server
|
|
96
|
+
// behind an auth-proxying reverse-proxy can still discover the model.
|
|
97
|
+
// Compose the caller's request signal (per-request timeout / abort) with
|
|
98
|
+
// a fixed 5s discovery cap so cancellation propagates AND a hung server
|
|
99
|
+
// can't stall provider initialization.
|
|
100
|
+
const proxyFetch = createProxyFetch();
|
|
101
|
+
const discoveryTimeout = AbortSignal.timeout(5000);
|
|
102
|
+
const composedSignal = callerSignal
|
|
103
|
+
? AbortSignal.any([callerSignal, discoveryTimeout])
|
|
104
|
+
: discoveryTimeout;
|
|
105
|
+
const response = await proxyFetch(url, {
|
|
106
|
+
headers: this.apiKey && this.apiKey !== LLAMACPP_PLACEHOLDER_KEY
|
|
107
|
+
? { Authorization: `Bearer ${this.apiKey}` }
|
|
108
|
+
: undefined,
|
|
109
|
+
signal: composedSignal,
|
|
110
|
+
});
|
|
111
|
+
if (!response.ok) {
|
|
112
|
+
throw new Error(`llama-server /v1/models returned ${response.status}: ${response.statusText}`);
|
|
113
|
+
}
|
|
114
|
+
const data = (await response.json());
|
|
115
|
+
return data.data.map((m) => m.id);
|
|
116
|
+
}
|
|
117
|
+
async getAISDKModel(signal) {
|
|
118
|
+
if (this.model) {
|
|
119
|
+
return this.model;
|
|
120
|
+
}
|
|
121
|
+
let modelToUse;
|
|
122
|
+
let discoverySucceeded = false;
|
|
123
|
+
// Use requestedModelName, not this.modelName — refreshHandlersForModel()
|
|
124
|
+
// mutates this.modelName, so on a retry after a discovery miss the
|
|
125
|
+
// FALLBACK_MODEL would look like an explicit user choice. See lmStudio.ts.
|
|
126
|
+
const explicit = this.requestedModelName;
|
|
127
|
+
if (explicit && explicit.trim() !== "") {
|
|
128
|
+
modelToUse = explicit;
|
|
129
|
+
discoverySucceeded = true; // explicit user choice — treat as success
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
try {
|
|
133
|
+
const models = await this.getAvailableModels(signal);
|
|
134
|
+
if (models.length > 0) {
|
|
135
|
+
this.discoveredModel = models[0];
|
|
136
|
+
modelToUse = this.discoveredModel;
|
|
137
|
+
discoverySucceeded = true;
|
|
138
|
+
logger.info(`llama.cpp loaded model: ${modelToUse}`);
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
modelToUse = FALLBACK_MODEL;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (error) {
|
|
145
|
+
logger.warn(`llama.cpp model discovery failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
146
|
+
modelToUse = FALLBACK_MODEL;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// Persist resolved model on the instance and rebuild the composed
|
|
150
|
+
// handlers (TelemetryHandler, MessageBuilder, etc.) so pricing /
|
|
151
|
+
// telemetry / span attributes report the discovered model name. Plain
|
|
152
|
+
// assignment to `this.modelName` is not enough — handlers cached the
|
|
153
|
+
// pre-discovery value at construction time.
|
|
154
|
+
this.refreshHandlersForModel(modelToUse);
|
|
155
|
+
// .chat() — llama-server exposes /v1/chat/completions, not /v1/responses
|
|
156
|
+
const resolvedModel = this.llamaCppClient.chat(modelToUse);
|
|
157
|
+
// Only memoize on success — see lmStudio.ts for the same rationale: a
|
|
158
|
+
// discovery miss should let the next call retry instead of being stuck
|
|
159
|
+
// on FALLBACK_MODEL until the provider instance is recreated.
|
|
160
|
+
if (discoverySucceeded) {
|
|
161
|
+
this.model = resolvedModel;
|
|
162
|
+
}
|
|
163
|
+
return resolvedModel;
|
|
164
|
+
}
|
|
165
|
+
async executeStream(options, _analysisSchema) {
|
|
166
|
+
// Resolve the llama.cpp model BEFORE opening the span so OTEL
|
|
167
|
+
// attributes, MessageBuilder, and downstream image/tool adapters all see
|
|
168
|
+
// the discovered model id rather than the empty pre-discovery placeholder.
|
|
169
|
+
// Pass the caller's abort signal so user cancellation / per-request
|
|
170
|
+
// timeouts are honored during the discovery probe.
|
|
171
|
+
await this.getAISDKModel(options.abortSignal);
|
|
172
|
+
return withClientSpan({
|
|
173
|
+
name: "neurolink.provider.stream",
|
|
174
|
+
tracer: tracers.provider,
|
|
175
|
+
attributes: {
|
|
176
|
+
[ATTR.GEN_AI_SYSTEM]: "llamacpp",
|
|
177
|
+
[ATTR.GEN_AI_MODEL]: this.modelName || this.discoveredModel || FALLBACK_MODEL,
|
|
178
|
+
[ATTR.GEN_AI_OPERATION]: "stream",
|
|
179
|
+
[ATTR.NL_STREAM_MODE]: true,
|
|
180
|
+
},
|
|
181
|
+
}, async () => this.executeStreamInner(options));
|
|
182
|
+
}
|
|
183
|
+
async executeStreamInner(options) {
|
|
184
|
+
this.validateStreamOptions(options);
|
|
185
|
+
const startTime = Date.now();
|
|
186
|
+
const timeout = this.getTimeout(options);
|
|
187
|
+
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
|
|
188
|
+
try {
|
|
189
|
+
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
190
|
+
const tools = shouldUseTools
|
|
191
|
+
? options.tools || (await this.getAllTools())
|
|
192
|
+
: {};
|
|
193
|
+
// Resolve the AI SDK model BEFORE building messages so message/image
|
|
194
|
+
// adapters see the same handlers/model that streamText will use. See
|
|
195
|
+
// lmStudio.ts for the same rationale.
|
|
196
|
+
const model = await this.getAISDKModelWithMiddleware(options);
|
|
197
|
+
const messages = await this.buildMessagesForStream(options);
|
|
198
|
+
const result = await streamText({
|
|
199
|
+
model,
|
|
200
|
+
messages,
|
|
201
|
+
temperature: options.temperature,
|
|
202
|
+
maxOutputTokens: options.maxTokens,
|
|
203
|
+
tools,
|
|
204
|
+
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
|
|
205
|
+
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
|
|
206
|
+
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
|
|
207
|
+
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
|
|
208
|
+
experimental_repairToolCall: this.getToolCallRepairFn(options),
|
|
209
|
+
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
210
|
+
emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
|
|
211
|
+
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
|
|
212
|
+
logger.warn("[LlamaCppProvider] Failed to store tool executions", {
|
|
213
|
+
provider: this.providerName,
|
|
214
|
+
error: error instanceof Error ? error.message : String(error),
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
},
|
|
218
|
+
});
|
|
219
|
+
timeoutController?.cleanup();
|
|
220
|
+
const transformedStream = this.createTextStream(result);
|
|
221
|
+
const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName || this.discoveredModel || FALLBACK_MODEL, toAnalyticsStreamResult(result), Date.now() - startTime, {
|
|
222
|
+
requestId: `llamacpp-stream-${Date.now()}`,
|
|
223
|
+
streamingMode: true,
|
|
224
|
+
});
|
|
225
|
+
return {
|
|
226
|
+
stream: transformedStream,
|
|
227
|
+
provider: this.providerName,
|
|
228
|
+
model: this.modelName || this.discoveredModel || FALLBACK_MODEL,
|
|
229
|
+
analytics: analyticsPromise,
|
|
230
|
+
metadata: { startTime, streamId: `llamacpp-${Date.now()}` },
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
catch (error) {
|
|
234
|
+
timeoutController?.cleanup();
|
|
235
|
+
throw this.handleProviderError(error);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
getProviderName() {
|
|
239
|
+
return this.providerName;
|
|
240
|
+
}
|
|
241
|
+
getDefaultModel() {
|
|
242
|
+
return process.env.LLAMACPP_MODEL || "";
|
|
243
|
+
}
|
|
244
|
+
formatProviderError(error) {
|
|
245
|
+
if (error instanceof TimeoutError) {
|
|
246
|
+
return new Error(`llama.cpp request timed out: ${error.message}`);
|
|
247
|
+
}
|
|
248
|
+
const errorRecord = error;
|
|
249
|
+
const message = typeof errorRecord?.message === "string"
|
|
250
|
+
? errorRecord.message
|
|
251
|
+
: "Unknown error";
|
|
252
|
+
const cause = errorRecord?.cause ?? {};
|
|
253
|
+
const code = (errorRecord?.code ?? cause?.code);
|
|
254
|
+
if (code === "ECONNREFUSED" ||
|
|
255
|
+
message.includes("ECONNREFUSED") ||
|
|
256
|
+
message.includes("Failed to fetch") ||
|
|
257
|
+
message.includes("fetch failed")) {
|
|
258
|
+
return new Error(`llama.cpp server not reachable at ${this.baseURL}. ` +
|
|
259
|
+
"Start it with: ./llama-server -m model.gguf --port 8080");
|
|
260
|
+
}
|
|
261
|
+
if (message.includes("400")) {
|
|
262
|
+
return new Error("llama.cpp rejected the request. Common cause: model doesn't support tools (start llama-server with --jinja for tool support).");
|
|
263
|
+
}
|
|
264
|
+
return new Error(`llama.cpp error: ${message}`);
|
|
265
|
+
}
|
|
266
|
+
async validateConfiguration() {
|
|
267
|
+
// Retry up to 3x with 500ms backoff. llama-server can be briefly unresponsive
|
|
268
|
+
// under load (CPU inference saturates the event loop). Use the proxy-aware
|
|
269
|
+
// fetch + bearer auth header so reverse-proxied setups still validate.
|
|
270
|
+
const healthURL = this.baseURL.replace(/\/v1\/?$/, "/health");
|
|
271
|
+
const modelsURL = `${this.baseURL.replace(/\/$/, "")}/models`;
|
|
272
|
+
const proxyFetch = createProxyFetch();
|
|
273
|
+
const headers = this.apiKey && this.apiKey !== LLAMACPP_PLACEHOLDER_KEY
|
|
274
|
+
? { Authorization: `Bearer ${this.apiKey}` }
|
|
275
|
+
: undefined;
|
|
276
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
277
|
+
try {
|
|
278
|
+
const r = await proxyFetch(healthURL, {
|
|
279
|
+
headers,
|
|
280
|
+
signal: AbortSignal.timeout(2000),
|
|
281
|
+
});
|
|
282
|
+
if (r.ok) {
|
|
283
|
+
return true;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
catch {
|
|
287
|
+
/* fall through */
|
|
288
|
+
}
|
|
289
|
+
try {
|
|
290
|
+
const r2 = await proxyFetch(modelsURL, {
|
|
291
|
+
headers,
|
|
292
|
+
signal: AbortSignal.timeout(2000),
|
|
293
|
+
});
|
|
294
|
+
if (r2.ok) {
|
|
295
|
+
return true;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
catch {
|
|
299
|
+
/* fall through */
|
|
300
|
+
}
|
|
301
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
302
|
+
}
|
|
303
|
+
return false;
|
|
304
|
+
}
|
|
305
|
+
getConfiguration() {
|
|
306
|
+
return {
|
|
307
|
+
provider: this.providerName,
|
|
308
|
+
model: this.modelName || this.discoveredModel || FALLBACK_MODEL,
|
|
309
|
+
defaultModel: this.getDefaultModel(),
|
|
310
|
+
baseURL: this.baseURL,
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
export default LlamaCppProvider;
|
|
315
|
+
//# sourceMappingURL=llamaCpp.js.map
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { type LanguageModel } from "ai";
|
|
2
|
+
import type { AIProviderName } from "../constants/enums.js";
|
|
3
|
+
import { BaseProvider } from "../core/baseProvider.js";
|
|
4
|
+
import type { NeurolinkCredentials, StreamOptions, StreamResult, ValidationSchema } from "../types/index.js";
|
|
5
|
+
/**
|
|
6
|
+
* LM Studio Provider
|
|
7
|
+
* Wraps the LM Studio local server (https://lmstudio.ai/) which exposes an
|
|
8
|
+
* OpenAI-compatible API at http://localhost:1234/v1 by default.
|
|
9
|
+
* Auto-discovers the loaded model via /v1/models if no model specified.
|
|
10
|
+
*/
|
|
11
|
+
export declare class LMStudioProvider extends BaseProvider {
|
|
12
|
+
private model?;
|
|
13
|
+
private readonly requestedModelName?;
|
|
14
|
+
private baseURL;
|
|
15
|
+
private apiKey;
|
|
16
|
+
private discoveredModel?;
|
|
17
|
+
private lmstudioClient;
|
|
18
|
+
constructor(modelName?: string, sdk?: unknown, _region?: string, credentials?: NeurolinkCredentials["lmStudio"]);
|
|
19
|
+
private getAvailableModels;
|
|
20
|
+
protected getAISDKModel(signal?: AbortSignal): Promise<LanguageModel>;
|
|
21
|
+
protected executeStream(options: StreamOptions, _analysisSchema?: ValidationSchema): Promise<StreamResult>;
|
|
22
|
+
private executeStreamInner;
|
|
23
|
+
protected getProviderName(): AIProviderName;
|
|
24
|
+
protected getDefaultModel(): string;
|
|
25
|
+
protected formatProviderError(error: unknown): Error;
|
|
26
|
+
validateConfiguration(): Promise<boolean>;
|
|
27
|
+
getConfiguration(): {
|
|
28
|
+
provider: AIProviderName;
|
|
29
|
+
model: string;
|
|
30
|
+
defaultModel: string;
|
|
31
|
+
baseURL: string;
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
export default LMStudioProvider;
|