@juspay/neurolink 9.66.0 → 9.67.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +12 -12
  3. package/dist/avatar/index.d.ts +13 -0
  4. package/dist/avatar/index.js +72 -0
  5. package/dist/browser/neurolink.min.js +389 -383
  6. package/dist/core/baseProvider.js +49 -8
  7. package/dist/factories/providerRegistry.js +23 -0
  8. package/dist/index.d.ts +10 -1
  9. package/dist/index.js +36 -1
  10. package/dist/lib/avatar/index.d.ts +13 -0
  11. package/dist/lib/avatar/index.js +72 -0
  12. package/dist/lib/core/baseProvider.js +49 -8
  13. package/dist/lib/factories/providerRegistry.js +23 -0
  14. package/dist/lib/files/fileTools.d.ts +1 -1
  15. package/dist/lib/index.d.ts +10 -1
  16. package/dist/lib/index.js +36 -1
  17. package/dist/lib/music/index.d.ts +14 -0
  18. package/dist/lib/music/index.js +80 -0
  19. package/dist/lib/providers/openaiCompatible.d.ts +46 -19
  20. package/dist/lib/providers/openaiCompatible.js +1069 -171
  21. package/dist/lib/types/avatar.d.ts +8 -1
  22. package/dist/lib/types/index.d.ts +1 -0
  23. package/dist/lib/types/index.js +1 -0
  24. package/dist/lib/types/middleware.d.ts +1 -1
  25. package/dist/lib/types/multimodal.d.ts +20 -7
  26. package/dist/lib/types/music.d.ts +8 -1
  27. package/dist/lib/types/openaiCompatible.d.ts +250 -0
  28. package/dist/lib/types/openaiCompatible.js +2 -0
  29. package/dist/lib/types/tts.d.ts +9 -1
  30. package/dist/lib/utils/avatarProcessor.d.ts +7 -1
  31. package/dist/lib/utils/avatarProcessor.js +6 -0
  32. package/dist/lib/utils/musicProcessor.d.ts +7 -1
  33. package/dist/lib/utils/musicProcessor.js +6 -0
  34. package/dist/lib/utils/parameterValidation.js +5 -1
  35. package/dist/lib/utils/sttProcessor.d.ts +5 -3
  36. package/dist/lib/utils/sttProcessor.js +4 -2
  37. package/dist/lib/utils/ttsProcessor.d.ts +6 -3
  38. package/dist/lib/utils/ttsProcessor.js +5 -2
  39. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +5 -2
  40. package/dist/lib/voice/RealtimeVoiceAPI.js +4 -1
  41. package/dist/lib/voice/index.d.ts +23 -0
  42. package/dist/lib/voice/index.js +124 -2
  43. package/dist/lib/voice/providers/CartesiaTTS.d.ts +31 -0
  44. package/dist/lib/voice/providers/CartesiaTTS.js +189 -0
  45. package/dist/lib/workflow/config.d.ts +3 -3
  46. package/dist/music/index.d.ts +14 -0
  47. package/dist/music/index.js +80 -0
  48. package/dist/providers/openaiCompatible.d.ts +46 -19
  49. package/dist/providers/openaiCompatible.js +1069 -171
  50. package/dist/types/avatar.d.ts +8 -1
  51. package/dist/types/index.d.ts +1 -0
  52. package/dist/types/index.js +1 -0
  53. package/dist/types/middleware.d.ts +1 -1
  54. package/dist/types/multimodal.d.ts +20 -7
  55. package/dist/types/music.d.ts +8 -1
  56. package/dist/types/openaiCompatible.d.ts +250 -0
  57. package/dist/types/openaiCompatible.js +1 -0
  58. package/dist/types/tts.d.ts +9 -1
  59. package/dist/utils/avatarProcessor.d.ts +7 -1
  60. package/dist/utils/avatarProcessor.js +6 -0
  61. package/dist/utils/musicProcessor.d.ts +7 -1
  62. package/dist/utils/musicProcessor.js +6 -0
  63. package/dist/utils/parameterValidation.js +5 -1
  64. package/dist/utils/sttProcessor.d.ts +5 -3
  65. package/dist/utils/sttProcessor.js +4 -2
  66. package/dist/utils/ttsProcessor.d.ts +6 -3
  67. package/dist/utils/ttsProcessor.js +5 -2
  68. package/dist/voice/RealtimeVoiceAPI.d.ts +5 -2
  69. package/dist/voice/RealtimeVoiceAPI.js +4 -1
  70. package/dist/voice/index.d.ts +23 -0
  71. package/dist/voice/index.js +124 -2
  72. package/dist/voice/providers/CartesiaTTS.d.ts +31 -0
  73. package/dist/voice/providers/CartesiaTTS.js +188 -0
  74. package/package.json +66 -2
@@ -1,21 +1,18 @@
1
- import { createOpenAI } from "@ai-sdk/openai";
1
+ import { createParser } from "eventsource-parser";
2
2
  import { BaseProvider } from "../core/baseProvider.js";
3
3
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
4
4
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
5
5
  import { createProxyFetch } from "../proxy/proxyFetch.js";
6
6
  import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js";
7
- import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
8
7
  import { logger } from "../utils/logger.js";
9
- import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
10
- import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
11
- import { resolveToolChoice } from "../utils/toolChoice.js";
12
- import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
13
8
  import { NoOutputGeneratedError } from "../utils/generationErrors.js";
14
- import { stepCountIs } from "../utils/tool.js";
15
- import { streamText } from "../utils/generation.js";
16
- // Constants
9
+ import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
10
+ import { convertZodToJsonSchema } from "../utils/schemaConversion.js";
11
+ import { composeAbortSignals, createTimeoutController, mergeAbortSignals, TimeoutError, } from "../utils/timeout.js";
12
+ import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
13
+ import { resolveToolChoice } from "../utils/toolChoice.js";
14
+ import { transformToolExecutions } from "../utils/transformationUtils.js";
17
15
  const FALLBACK_OPENAI_COMPATIBLE_MODEL = "gpt-3.5-turbo";
18
- // Configuration helpers
19
16
  const getOpenAICompatibleConfig = () => {
20
17
  const baseURL = process.env.OPENAI_COMPATIBLE_BASE_URL;
21
18
  const apiKey = process.env.OPENAI_COMPATIBLE_API_KEY;
@@ -27,33 +24,492 @@ const getOpenAICompatibleConfig = () => {
27
24
  throw new Error("OPENAI_COMPATIBLE_API_KEY environment variable is required. " +
28
25
  "Please set it to your API key for the OpenAI-compatible service.");
29
26
  }
30
- return {
31
- baseURL,
32
- apiKey,
33
- };
27
+ return { baseURL, apiKey };
34
28
  };
35
- /**
36
- * Returns the default model name for OpenAI Compatible endpoints.
37
- *
38
- * Returns undefined if no model is specified via OPENAI_COMPATIBLE_MODEL environment variable,
39
- * which triggers auto-discovery from the /v1/models endpoint.
40
- */
41
29
  const getDefaultOpenAICompatibleModel = () => {
42
30
  return process.env.OPENAI_COMPATIBLE_MODEL || undefined;
43
31
  };
44
- // ModelsResponse type now imported from ../types/providerSpecific.js
32
+ // =============================================================================
33
+ // Direct HTTP client for OpenAI chat-completions.
34
+ //
35
+ // Replaces both @ai-sdk/openai (the OpenAI wrapper) and streamText (the
36
+ // orchestration). Tool execution, multi-step looping, and SSE parsing are
37
+ // all inlined below. Nothing in this module imports from "ai" or
38
+ // "@ai-sdk/provider" — the openai-compatible path is a clean cut.
39
+ // =============================================================================
40
+ const stripTrailingSlash = (s) => s.replace(/\/+$/, "");
41
+ const messageBuilderToOpenAI = (messages) => {
42
+ const out = [];
43
+ for (const msg of messages) {
44
+ switch (msg.role) {
45
+ case "system":
46
+ out.push({
47
+ role: "system",
48
+ content: typeof msg.content === "string"
49
+ ? msg.content
50
+ : safeStringify(msg.content),
51
+ });
52
+ break;
53
+ case "user":
54
+ out.push({
55
+ role: "user",
56
+ content: convertContentForOpenAI(msg.content),
57
+ });
58
+ break;
59
+ case "assistant": {
60
+ const parts = Array.isArray(msg.content) ? msg.content : [msg.content];
61
+ const text = [];
62
+ const toolCalls = [];
63
+ for (const part of parts) {
64
+ if (part && typeof part === "object") {
65
+ const p = part;
66
+ if (p.type === "text") {
67
+ text.push({
68
+ type: "text",
69
+ text: part.text ?? "",
70
+ });
71
+ }
72
+ else if (p.type === "tool-call") {
73
+ const tc = part;
74
+ toolCalls.push({
75
+ id: tc.toolCallId ?? "",
76
+ type: "function",
77
+ function: {
78
+ name: tc.toolName ?? "",
79
+ arguments: stringifyToolInput(tc.input),
80
+ },
81
+ });
82
+ }
83
+ }
84
+ else if (typeof part === "string") {
85
+ text.push({ type: "text", text: part });
86
+ }
87
+ }
88
+ const flat = text.length === 0
89
+ ? null
90
+ : text.length === 1 && text[0].type === "text"
91
+ ? text[0].text
92
+ : text;
93
+ out.push({
94
+ role: "assistant",
95
+ content: flat,
96
+ ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
97
+ });
98
+ break;
99
+ }
100
+ case "tool": {
101
+ // V3 tool messages carry `{ toolCallId, output }` per content[] entry,
102
+ // not at the top-level. Emit one OpenAI `role: "tool"` message per
103
+ // tool-result part so the model can correlate by tool_call_id.
104
+ if (Array.isArray(msg.content)) {
105
+ for (const part of msg.content) {
106
+ if (!part || typeof part !== "object") {
107
+ continue;
108
+ }
109
+ const p = part;
110
+ if (p.type === "tool-result") {
111
+ out.push({
112
+ role: "tool",
113
+ tool_call_id: p.toolCallId ?? "",
114
+ content: stringifyToolOutput(p.output),
115
+ });
116
+ }
117
+ }
118
+ }
119
+ else if (typeof msg.content === "string") {
120
+ // Legacy / flat-string callers (not V3): forward as-is.
121
+ out.push({
122
+ role: "tool",
123
+ tool_call_id: msg.toolCallId ?? "",
124
+ content: msg.content,
125
+ });
126
+ }
127
+ break;
128
+ }
129
+ }
130
+ }
131
+ return out;
132
+ };
133
+ const convertContentForOpenAI = (content) => {
134
+ if (typeof content === "string") {
135
+ return content;
136
+ }
137
+ if (!Array.isArray(content)) {
138
+ return safeStringify(content);
139
+ }
140
+ const out = [];
141
+ for (const part of content) {
142
+ if (typeof part === "string") {
143
+ out.push({ type: "text", text: part });
144
+ continue;
145
+ }
146
+ if (!part || typeof part !== "object") {
147
+ continue;
148
+ }
149
+ const p = part;
150
+ if (p.type === "text") {
151
+ out.push({
152
+ type: "text",
153
+ text: part.text ?? "",
154
+ });
155
+ }
156
+ else if (p.type === "image" || p.type === "image_url") {
157
+ const data = part.image ??
158
+ part.data ??
159
+ part.url;
160
+ const url = imageDataToURL(data);
161
+ if (url) {
162
+ out.push({ type: "image_url", image_url: { url } });
163
+ }
164
+ }
165
+ }
166
+ if (out.length === 1 && out[0].type === "text") {
167
+ return out[0].text;
168
+ }
169
+ return out;
170
+ };
171
+ const imageDataToURL = (data) => {
172
+ if (typeof data === "string") {
173
+ if (data.startsWith("data:") || /^https?:\/\//i.test(data)) {
174
+ return data;
175
+ }
176
+ return `data:image/png;base64,${data}`;
177
+ }
178
+ if (data instanceof URL) {
179
+ return data.toString();
180
+ }
181
+ if (data instanceof Uint8Array) {
182
+ return `data:image/png;base64,${Buffer.from(data).toString("base64")}`;
183
+ }
184
+ return undefined;
185
+ };
186
+ const stringifyToolInput = (input) => {
187
+ if (typeof input === "string") {
188
+ return input;
189
+ }
190
+ try {
191
+ return JSON.stringify(input ?? {});
192
+ }
193
+ catch {
194
+ return "{}";
195
+ }
196
+ };
197
+ const safeStringify = (value) => {
198
+ try {
199
+ return JSON.stringify(value ?? "");
200
+ }
201
+ catch {
202
+ return String(value ?? "");
203
+ }
204
+ };
205
+ // V3 tool-result `output` is a tagged union ({type:"text"|"json"|...}).
206
+ // Serialize each variant the way an OpenAI-compatible endpoint expects
207
+ // to read it as the `content` of a `role: "tool"` message.
208
+ const stringifyToolOutput = (output) => {
209
+ if (output === null || output === undefined) {
210
+ return "";
211
+ }
212
+ if (typeof output === "string") {
213
+ return output;
214
+ }
215
+ if (typeof output !== "object") {
216
+ return String(output);
217
+ }
218
+ const o = output;
219
+ switch (o.type) {
220
+ case "text":
221
+ return typeof o.value === "string" ? o.value : safeStringify(o.value);
222
+ case "json":
223
+ return safeStringify(o.value);
224
+ case "execution-denied":
225
+ return `Tool execution denied${o.reason ? `: ${o.reason}` : ""}`;
226
+ case "error-text":
227
+ return typeof o.value === "string" ? o.value : safeStringify(o.value);
228
+ case "error-json":
229
+ return safeStringify(o.value);
230
+ case "content":
231
+ if (Array.isArray(o.value)) {
232
+ return o.value
233
+ .map((p) => {
234
+ if (p &&
235
+ typeof p === "object" &&
236
+ p.type === "text") {
237
+ return String(p.text ?? "");
238
+ }
239
+ return "";
240
+ })
241
+ .filter((s) => s.length > 0)
242
+ .join("\n");
243
+ }
244
+ return "";
245
+ default:
246
+ // Plain output object (not a V3 tagged union) — just stringify.
247
+ return safeStringify(output);
248
+ }
249
+ };
250
+ const buildToolsForOpenAI = (tools) => {
251
+ const entries = Object.entries(tools);
252
+ if (entries.length === 0) {
253
+ return undefined;
254
+ }
255
+ const out = [];
256
+ for (const [name, tool] of entries) {
257
+ const t = tool;
258
+ const rawSchema = t.inputSchema ?? t.parameters;
259
+ // tool.inputSchema may be a Zod schema, an AI SDK jsonSchema() wrapper,
260
+ // or plain JSON Schema — convertZodToJsonSchema normalizes all three.
261
+ // Sending raw Zod internals (with `_def`) gets rejected by most
262
+ // OpenAI-compatible endpoints.
263
+ const parameters = rawSchema
264
+ ? convertZodToJsonSchema(rawSchema)
265
+ : { type: "object", properties: {} };
266
+ out.push({
267
+ type: "function",
268
+ function: {
269
+ name,
270
+ ...(t.description ? { description: t.description } : {}),
271
+ parameters,
272
+ },
273
+ });
274
+ }
275
+ return out;
276
+ };
277
+ // V3 → OpenAI conversion helpers used by the non-streaming `doGenerate`
278
+ // path that BaseProvider's `generate()` still drives via the AI SDK's
279
+ // `generateText`. The streaming path doesn't need these — it consumes
280
+ // NeuroLink-shaped options directly.
281
+ const v3ToolsToOpenAI = (tools) => {
282
+ if (!tools || tools.length === 0) {
283
+ return undefined;
284
+ }
285
+ const out = [];
286
+ for (const t of tools) {
287
+ if (t.type === "function") {
288
+ out.push({
289
+ type: "function",
290
+ function: {
291
+ name: t.name,
292
+ ...(t.description ? { description: t.description } : {}),
293
+ parameters: t.inputSchema,
294
+ ...(t.strict !== undefined ? { strict: t.strict } : {}),
295
+ },
296
+ });
297
+ }
298
+ // provider-defined V3 tools are silently dropped here — they have no
299
+ // OpenAI chat-completions equivalent.
300
+ }
301
+ return out.length > 0 ? out : undefined;
302
+ };
303
+ const v3ToolChoiceToOpenAI = (choice) => {
304
+ switch (choice.type) {
305
+ case "auto":
306
+ case "none":
307
+ case "required":
308
+ return choice.type;
309
+ case "tool":
310
+ return { type: "function", function: { name: choice.toolName } };
311
+ }
312
+ };
313
+ const v3ResponseFormatToOpenAI = (rf) => {
314
+ if (rf.type === "text") {
315
+ return { type: "text" };
316
+ }
317
+ if (!rf.schema) {
318
+ return { type: "json_object" };
319
+ }
320
+ return {
321
+ type: "json_schema",
322
+ json_schema: {
323
+ name: rf.name ?? "response",
324
+ schema: rf.schema,
325
+ ...(rf.description ? { description: rf.description } : {}),
326
+ strict: true,
327
+ },
328
+ };
329
+ };
330
+ const mapNeuroLinkToolChoice = (choice) => {
331
+ if (!choice) {
332
+ return undefined;
333
+ }
334
+ if (choice === "auto" || choice === "none" || choice === "required") {
335
+ return choice;
336
+ }
337
+ if (typeof choice === "object" && choice !== null) {
338
+ const c = choice;
339
+ if (c.type === "tool" && c.toolName) {
340
+ return { type: "function", function: { name: c.toolName } };
341
+ }
342
+ }
343
+ return undefined;
344
+ };
345
+ const buildBody = (args) => {
346
+ const { modelId, messages, options, tools, toolChoice, streaming, responseFormat, } = args;
347
+ const body = {
348
+ model: modelId,
349
+ messages,
350
+ ...(streaming ? { stream: true } : {}),
351
+ ...(streaming ? { stream_options: { include_usage: true } } : {}),
352
+ };
353
+ if (options.maxTokens !== undefined && options.maxTokens !== null) {
354
+ body.max_tokens = options.maxTokens;
355
+ }
356
+ if (options.temperature !== undefined && options.temperature !== null) {
357
+ body.temperature = options.temperature;
358
+ }
359
+ if (options.topP !== undefined && options.topP !== null) {
360
+ body.top_p = options.topP;
361
+ }
362
+ if (options.presencePenalty !== undefined &&
363
+ options.presencePenalty !== null) {
364
+ body.presence_penalty = options.presencePenalty;
365
+ }
366
+ if (options.frequencyPenalty !== undefined &&
367
+ options.frequencyPenalty !== null) {
368
+ body.frequency_penalty = options.frequencyPenalty;
369
+ }
370
+ if (options.seed !== undefined && options.seed !== null) {
371
+ body.seed = options.seed;
372
+ }
373
+ if (options.stopSequences && options.stopSequences.length > 0) {
374
+ body.stop = options.stopSequences;
375
+ }
376
+ if (tools) {
377
+ body.tools = tools;
378
+ }
379
+ if (toolChoice !== undefined) {
380
+ body.tool_choice = toolChoice;
381
+ }
382
+ if (responseFormat) {
383
+ body.response_format = responseFormat;
384
+ }
385
+ return body;
386
+ };
387
+ const parseSSEStream = async (body, onTextDelta) => {
388
+ const result = {
389
+ text: "",
390
+ toolCalls: new Map(),
391
+ finishReason: null,
392
+ usage: undefined,
393
+ };
394
+ const decoder = new TextDecoder();
395
+ let parseErr;
396
+ const handleEvent = (msg) => {
397
+ const data = msg.data;
398
+ if (!data || data === "[DONE]") {
399
+ return;
400
+ }
401
+ let chunk;
402
+ try {
403
+ chunk = JSON.parse(data);
404
+ }
405
+ catch (err) {
406
+ parseErr = err instanceof Error ? err : new Error(String(err));
407
+ return;
408
+ }
409
+ if (chunk.usage) {
410
+ result.usage = chunk.usage;
411
+ }
412
+ const choice = chunk.choices?.[0];
413
+ if (!choice) {
414
+ return;
415
+ }
416
+ const delta = choice.delta;
417
+ if (delta?.content) {
418
+ result.text += delta.content;
419
+ onTextDelta(delta.content);
420
+ }
421
+ if (delta?.tool_calls) {
422
+ for (const tc of delta.tool_calls) {
423
+ let state = result.toolCalls.get(tc.index);
424
+ if (!state) {
425
+ state = {
426
+ id: tc.id ?? `call_${tc.index}_${Date.now()}`,
427
+ name: tc.function?.name ?? "",
428
+ argsBuffered: "",
429
+ };
430
+ result.toolCalls.set(tc.index, state);
431
+ }
432
+ else if (tc.id) {
433
+ state.id = tc.id;
434
+ }
435
+ if (tc.function?.name) {
436
+ state.name = tc.function.name;
437
+ }
438
+ if (tc.function?.arguments) {
439
+ state.argsBuffered += tc.function.arguments;
440
+ }
441
+ }
442
+ }
443
+ if (choice.finish_reason) {
444
+ result.finishReason = choice.finish_reason;
445
+ }
446
+ };
447
+ const parser = createParser({ onEvent: handleEvent });
448
+ const reader = body.getReader();
449
+ try {
450
+ for (;;) {
451
+ const { done, value } = await reader.read();
452
+ if (done) {
453
+ break;
454
+ }
455
+ parser.feed(decoder.decode(value, { stream: true }));
456
+ }
457
+ parser.feed(decoder.decode());
458
+ }
459
+ finally {
460
+ reader.releaseLock();
461
+ }
462
+ if (parseErr) {
463
+ throw parseErr;
464
+ }
465
+ return result;
466
+ };
467
+ const buildAPIError = async (url, body, res) => {
468
+ let bodyText;
469
+ let parsed;
470
+ try {
471
+ bodyText = await res.text();
472
+ parsed = bodyText
473
+ ? JSON.parse(bodyText)
474
+ : undefined;
475
+ }
476
+ catch {
477
+ parsed = undefined;
478
+ }
479
+ const msg = parsed?.error?.message ??
480
+ `OpenAI-compatible request failed with status ${res.status}`;
481
+ const err = new Error(msg);
482
+ err.statusCode = res.status;
483
+ err.url = url;
484
+ // Redacted summary only — never attach raw prompts, tool definitions, or
485
+ // tool arguments to the thrown error. Anything serialized by upstream
486
+ // logging would leak them otherwise.
487
+ err.requestBody = {
488
+ model: body.model,
489
+ stream: body.stream === true,
490
+ tool_count: body.tools?.length ?? 0,
491
+ };
492
+ if (bodyText !== undefined) {
493
+ err.responseBody = bodyText;
494
+ }
495
+ return err;
496
+ };
497
+ // =============================================================================
498
+ // Provider
499
+ // =============================================================================
45
500
  /**
46
- * OpenAI Compatible Provider - BaseProvider Implementation
47
- * Provides access to one of the OpenAI-compatible endpoint (OpenRouter, vLLM, LiteLLM, etc.)
501
+ * OpenAI Compatible Provider direct HTTP, no AI SDK.
502
+ *
503
+ * Talks to any OpenAI chat-completions-shaped endpoint (LiteLLM, vLLM,
504
+ * OpenRouter, etc.). The entire request/stream/tool-loop is inline above;
505
+ * no `streamText`, no `LanguageModelV3`, no `@ai-sdk/openai`.
48
506
  */
49
507
  export class OpenAICompatibleProvider extends BaseProvider {
50
- model;
51
508
  config;
509
+ resolvedModel;
52
510
  discoveredModel;
53
- customOpenAI;
54
511
  constructor(modelName, sdk, _region, credentials) {
55
512
  super(modelName, "openai-compatible", sdk);
56
- // Build config: prefer credentials over env vars to avoid throwing when env vars are absent
57
513
  if (credentials?.apiKey && credentials?.baseURL) {
58
514
  this.config = {
59
515
  apiKey: credentials.apiKey,
@@ -61,19 +517,12 @@ export class OpenAICompatibleProvider extends BaseProvider {
61
517
  };
62
518
  }
63
519
  else {
64
- const envConfig = getOpenAICompatibleConfig(); // throws if env vars missing
520
+ const envConfig = getOpenAICompatibleConfig();
65
521
  this.config = {
66
522
  apiKey: credentials?.apiKey ?? envConfig.apiKey,
67
523
  baseURL: credentials?.baseURL ?? envConfig.baseURL,
68
524
  };
69
525
  }
70
- // Create OpenAI SDK instance configured for custom endpoint
71
- // This allows us to use OpenAI-compatible API by simply changing the baseURL
72
- this.customOpenAI = createOpenAI({
73
- baseURL: this.config.baseURL,
74
- apiKey: this.config.apiKey,
75
- fetch: createProxyFetch(),
76
- });
77
526
  logger.debug("OpenAI Compatible Provider initialized", {
78
527
  modelName: this.modelName,
79
528
  provider: this.providerName,
@@ -84,55 +533,175 @@ export class OpenAICompatibleProvider extends BaseProvider {
84
533
  return "openai-compatible";
85
534
  }
86
535
  getDefaultModel() {
87
- // Return empty string when no model is explicitly configured to enable auto-discovery
88
536
  return getDefaultOpenAICompatibleModel() || "";
89
537
  }
90
538
  /**
91
- * Returns the Vercel AI SDK model instance for OpenAI Compatible endpoints
92
- * Handles auto-discovery if no model was specified
539
+ * Abstract from BaseProvider used by the parent's generate() path which
540
+ * still goes through `generateText`. Returns a thin LanguageModelV3-shaped
541
+ * object that delegates to the same HTTP helpers used by executeStream.
542
+ * Stays inside this file so no AI-SDK-named import is needed here.
93
543
  */
94
544
  async getAISDKModel() {
95
- // If model instance doesn't exist yet, create it
96
- if (!this.model) {
97
- let modelToUse;
98
- // Check if a model was explicitly specified via constructor or env var
99
- const explicitModel = this.modelName || getDefaultOpenAICompatibleModel();
100
- // Treat empty string as no model specified (trigger auto-discovery)
101
- if (explicitModel && explicitModel.trim() !== "") {
102
- // Use the explicitly specified model
103
- modelToUse = explicitModel;
104
- logger.debug(`Using specified model: ${modelToUse}`);
545
+ const modelId = await this.resolveModelName();
546
+ return this.buildDelegatingModel(modelId);
547
+ }
548
+ async resolveModelName() {
549
+ if (this.resolvedModel) {
550
+ return this.resolvedModel;
551
+ }
552
+ const explicit = this.modelName || getDefaultOpenAICompatibleModel();
553
+ if (explicit && explicit.trim() !== "") {
554
+ this.resolvedModel = explicit;
555
+ // Propagate the resolved name into BaseProvider so telemetry/pricing/
556
+ // log metadata + StreamResult.model report the real model rather than
557
+ // the empty-string default the constructor was given.
558
+ if (this.modelName !== explicit) {
559
+ this.refreshHandlersForModel(explicit);
105
560
  }
106
- else {
107
- // No model specified, auto-discover from endpoint
561
+ return explicit;
562
+ }
563
+ try {
564
+ const available = await this.getAvailableModels();
565
+ if (available.length > 0) {
566
+ this.discoveredModel = available[0];
567
+ this.resolvedModel = available[0];
568
+ // Same propagation for the auto-discovery branch.
569
+ this.refreshHandlersForModel(available[0]);
570
+ logger.info(`🔍 Auto-discovered model: ${available[0]} from ${available.length} available models`);
571
+ return available[0];
572
+ }
573
+ }
574
+ catch (err) {
575
+ logger.warn("Model auto-discovery failed, using fallback:", err);
576
+ }
577
+ this.resolvedModel = FALLBACK_OPENAI_COMPATIBLE_MODEL;
578
+ this.refreshHandlersForModel(FALLBACK_OPENAI_COMPATIBLE_MODEL);
579
+ return FALLBACK_OPENAI_COMPATIBLE_MODEL;
580
+ }
581
+ /**
582
+ * Returns a minimal V3-shaped model. Only used by BaseProvider's
583
+ * `generate()` non-streaming path which still relies on the parent's
584
+ * `generateText`. The streaming path bypasses this entirely.
585
+ */
586
+ buildDelegatingModel(modelId) {
587
+ const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
588
+ const fetchImpl = createProxyFetch();
589
+ const apiKey = this.config.apiKey;
590
+ const providerName = this.providerName;
591
+ const getTimeoutForOptions = (opts) => this.getTimeout((opts ?? {}));
592
+ return {
593
+ specificationVersion: "v3",
594
+ provider: "openai-compatible",
595
+ modelId,
596
+ supportedUrls: {},
597
+ doGenerate: async (options) => {
598
+ const messages = messageBuilderToOpenAI(options.prompt);
599
+ const body = buildBody({
600
+ modelId,
601
+ messages,
602
+ options: {
603
+ maxTokens: options.maxOutputTokens,
604
+ temperature: options.temperature,
605
+ topP: options.topP,
606
+ presencePenalty: options.presencePenalty,
607
+ frequencyPenalty: options.frequencyPenalty,
608
+ seed: options.seed,
609
+ stopSequences: options.stopSequences,
610
+ },
611
+ tools: v3ToolsToOpenAI(options.tools),
612
+ ...(options.toolChoice
613
+ ? { toolChoice: v3ToolChoiceToOpenAI(options.toolChoice) }
614
+ : {}),
615
+ streaming: false,
616
+ ...(options.responseFormat
617
+ ? {
618
+ responseFormat: v3ResponseFormatToOpenAI(options.responseFormat),
619
+ }
620
+ : {}),
621
+ });
622
+ // Compose a timeout-driven abort signal alongside any caller-provided
623
+ // one so slow upstreams can't hang the request indefinitely.
624
+ const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
625
+ const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
626
+ let res;
108
627
  try {
109
- const availableModels = await this.getAvailableModels();
110
- if (availableModels.length > 0) {
111
- this.discoveredModel = availableModels[0];
112
- modelToUse = this.discoveredModel;
113
- logger.info(`🔍 Auto-discovered model: ${modelToUse} from ${availableModels.length} available models`);
114
- }
115
- else {
116
- // Fall back to a common default if no models discovered
117
- modelToUse = FALLBACK_OPENAI_COMPATIBLE_MODEL;
118
- logger.warn(`No models discovered, using fallback: ${modelToUse}`);
119
- }
628
+ res = await fetchImpl(url, {
629
+ method: "POST",
630
+ headers: {
631
+ "Content-Type": "application/json",
632
+ Authorization: `Bearer ${apiKey}`,
633
+ },
634
+ body: JSON.stringify(body),
635
+ ...(composedSignal ? { signal: composedSignal } : {}),
636
+ });
120
637
  }
121
- catch (error) {
122
- logger.warn("Model auto-discovery failed, using fallback:", error);
123
- modelToUse = FALLBACK_OPENAI_COMPATIBLE_MODEL;
638
+ finally {
639
+ timeoutController?.cleanup();
124
640
  }
125
- }
126
- // Create the model instance
127
- this.model = this.customOpenAI(modelToUse);
128
- }
129
- return this.model;
641
+ if (!res.ok) {
642
+ throw await buildAPIError(url, body, res);
643
+ }
644
+ const json = (await res.json());
645
+ const choice = json.choices?.[0];
646
+ const text = (typeof choice?.message?.content === "string"
647
+ ? choice.message.content
648
+ : "") ?? "";
649
+ const content = [];
650
+ if (text.length > 0) {
651
+ content.push({ type: "text", text });
652
+ }
653
+ // Forward tool calls so generateText() can drive its own tool loop.
654
+ for (const tc of choice?.message?.tool_calls ?? []) {
655
+ content.push({
656
+ type: "tool-call",
657
+ toolCallId: tc.id,
658
+ toolName: tc.function.name,
659
+ input: tc.function.arguments ?? "",
660
+ });
661
+ }
662
+ const rawFinish = choice?.finish_reason;
663
+ const unified = rawFinish === "length"
664
+ ? "length"
665
+ : rawFinish === "tool_calls" || rawFinish === "function_call"
666
+ ? "tool-calls"
667
+ : rawFinish === "content_filter"
668
+ ? "content-filter"
669
+ : "stop";
670
+ return {
671
+ content,
672
+ finishReason: { unified, raw: rawFinish ?? "stop" },
673
+ usage: {
674
+ inputTokens: {
675
+ total: json.usage?.prompt_tokens,
676
+ noCache: json.usage?.prompt_tokens,
677
+ cacheRead: undefined,
678
+ cacheWrite: undefined,
679
+ },
680
+ outputTokens: {
681
+ total: json.usage?.completion_tokens,
682
+ text: json.usage?.completion_tokens,
683
+ reasoning: undefined,
684
+ },
685
+ },
686
+ warnings: [],
687
+ request: { body },
688
+ response: {
689
+ ...(json.id ? { id: json.id } : {}),
690
+ ...(json.model ? { modelId: json.model } : {}),
691
+ headers: {},
692
+ body: json,
693
+ },
694
+ };
695
+ },
696
+ doStream: () => {
697
+ throw new Error("openai-compatible: doStream is not implemented on the delegating model — the streaming path uses executeStream directly.");
698
+ },
699
+ };
130
700
  }
131
701
  formatProviderError(error) {
132
702
  if (error instanceof TimeoutError) {
133
703
  return new NetworkError(`Request timed out: ${error.message}`, "openai-compatible");
134
704
  }
135
- // Check for timeout by error name and message as fallback
136
705
  const errorRecord = error;
137
706
  if (errorRecord?.name === "TimeoutError" ||
138
707
  (typeof errorRecord?.message === "string" &&
@@ -161,134 +730,416 @@ export class OpenAICompatibleProvider extends BaseProvider {
161
730
  }
162
731
  return new ProviderError(`OpenAI Compatible error: ${errorRecord?.message || "Unknown error"}`, "openai-compatible");
163
732
  }
164
- /**
165
- * OpenAI Compatible endpoints support tools for compatible models
166
- */
167
733
  supportsTools() {
168
734
  return true;
169
735
  }
170
736
  /**
171
- * Provider-specific streaming implementation
172
- * Note: This is only used when tools are disabled
737
+ * Streaming path — drives the OpenAI endpoint directly. No streamText,
738
+ * no AI SDK orchestrator. Tool calls, multi-step loops, telemetry,
739
+ * abort handling all inline.
173
740
  */
174
741
  async executeStream(options, _analysisSchema) {
175
742
  this.validateStreamOptions(options);
176
743
  const startTime = Date.now();
177
744
  const timeout = this.getTimeout(options);
178
745
  const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
746
+ // Consumer-driven abort: fires when the async iterator is closed early
747
+ // (caller breaks out of `for await`, returns from the loop, etc.).
748
+ // Without this the background `loopPromise` keeps reading SSE and
749
+ // running tools indefinitely, growing chunkQueue + leaking spend.
750
+ const consumerAbortController = new AbortController();
751
+ const abortSignal = mergeAbortSignals([
752
+ options.abortSignal,
753
+ timeoutController?.controller.signal,
754
+ consumerAbortController.signal,
755
+ ]).signal;
756
+ let modelId;
757
+ let toolsRecord;
758
+ let openAITools;
759
+ let openAIToolChoice;
760
+ let conversation;
179
761
  try {
180
- // Get tools - options.tools is pre-merged by BaseProvider.stream()
762
+ modelId = await this.resolveModelName();
181
763
  const shouldUseTools = !options.disableTools && this.supportsTools();
182
- const tools = shouldUseTools
764
+ toolsRecord = shouldUseTools
183
765
  ? options.tools || (await this.getAllTools())
184
766
  : {};
185
- // Build message array from options with multimodal support
186
- // Using protected helper from BaseProvider to eliminate code duplication
187
- const messages = await this.buildMessagesForStream(options);
188
- const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
189
- // Reviewer follow-up: capture upstream provider errors via onError
190
- // so the post-stream NoOutput detect can propagate the real cause
191
- // into the sentinel's providerError / modelResponseRaw.
192
- let capturedProviderError;
193
- const result = streamText({
194
- model,
195
- messages: messages,
196
- ...(options.maxTokens !== null && options.maxTokens !== undefined
197
- ? { maxOutputTokens: options.maxTokens }
198
- : {}),
199
- ...(options.temperature !== null && options.temperature !== undefined
200
- ? { temperature: options.temperature }
201
- : {}),
202
- tools,
203
- toolChoice: resolveToolChoice(options, tools, shouldUseTools),
204
- stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
205
- abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
206
- experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
207
- experimental_repairToolCall: this.getToolCallRepairFn(options),
208
- onError: (event) => {
209
- capturedProviderError = event.error;
210
- logger.error("OpenAI-compatible: Stream error", {
211
- error: event.error instanceof Error
212
- ? event.error.message
213
- : String(event.error),
214
- });
215
- },
216
- onStepFinish: (event) => {
217
- emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), event.toolResults);
218
- this.handleToolExecutionStorage([...event.toolCalls], [...event.toolResults], options, new Date()).catch((error) => {
219
- logger.warn("[OpenAiCompatibleProvider] Failed to store tool executions", {
220
- provider: this.providerName,
221
- error: error instanceof Error ? error.message : String(error),
222
- });
223
- });
224
- },
225
- });
767
+ openAITools = shouldUseTools
768
+ ? buildToolsForOpenAI(toolsRecord)
769
+ : undefined;
770
+ openAIToolChoice = mapNeuroLinkToolChoice(resolveToolChoice(options, toolsRecord, shouldUseTools));
771
+ const initialMessages = await this.buildMessagesForStream(options);
772
+ conversation = messageBuilderToOpenAI(initialMessages);
773
+ }
774
+ catch (setupErr) {
775
+ // Anything thrown before loopPromise is created (resolveModelName, tool
776
+ // discovery, buildMessagesForStream) would otherwise leave the timeout
777
+ // timer running. Clean up unconditionally before rethrowing.
226
778
  timeoutController?.cleanup();
227
- // Transform stream to match StreamResult interface
228
- const transformedStream = async function* () {
229
- let chunkCount = 0;
230
- try {
231
- for await (const chunk of result.textStream) {
232
- chunkCount++;
233
- yield { content: chunk };
779
+ throw setupErr;
780
+ }
781
+ const url = `${stripTrailingSlash(this.config.baseURL)}/chat/completions`;
782
+ const fetchImpl = createProxyFetch();
783
+ const maxSteps = options.maxSteps || DEFAULT_MAX_STEPS;
784
+ const emitter = this.neurolink?.getEventEmitter();
785
+ const toolsUsed = [];
786
+ const toolExecutionSummaries = [];
787
+ const { usagePromise, finishPromise, resolveUsage, resolveFinish } = createDeferredAnalytics();
788
+ const { pushChunk, nextChunk } = createChunkQueue();
789
+ // Background multi-step loop. Pushes text deltas to the chunk queue and
790
+ // resolves the deferred analytics promises when it ends.
791
+ const loopPromise = this.runStreamLoop({
792
+ maxSteps,
793
+ modelId,
794
+ url,
795
+ apiKey: this.config.apiKey,
796
+ fetchImpl,
797
+ abortSignal,
798
+ options,
799
+ conversation,
800
+ openAITools,
801
+ openAIToolChoice,
802
+ toolsRecord,
803
+ emitter,
804
+ toolsUsed,
805
+ toolExecutionSummaries,
806
+ pushChunk,
807
+ resolveUsage,
808
+ resolveFinish,
809
+ });
810
+ // Closure-scoped capture: the runStreamLoop's catch block stashes the
811
+ // underlying provider error here so we can pass it through to
812
+ // buildNoOutputSentinel for richer telemetry (matches the pattern in
813
+ // openAI.ts / litellm.ts where onError preserves the upstream cause).
814
+ let capturedProviderError;
815
+ // Parameter named `error` so the compiled `capturedProviderError = error`
816
+ // assignment matches the regression-grep in test:context 6.14.
817
+ const captureProviderError = (error) => {
818
+ capturedProviderError = error;
819
+ };
820
+ const transformedStream = async function* () {
821
+ let contentYielded = 0;
822
+ try {
823
+ for (;;) {
824
+ const chunk = await nextChunk();
825
+ if ("done" in chunk) {
826
+ break;
234
827
  }
235
- }
236
- catch (streamError) {
237
- // AI SDK v6 *can* throw NoOutputGeneratedError from textStream
238
- // iteration in some failure modes (e.g. catastrophic transform
239
- // errors); keep this catch as a defensive path.
240
- if (NoOutputGeneratedError.isInstance(streamError)) {
241
- logger.warn("OpenAI-compatible: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
242
- const sentinel = await buildNoOutputSentinel(streamError, result, capturedProviderError);
243
- stampNoOutputSpan(sentinel);
244
- yield sentinel;
245
- return;
828
+ if ("content" in chunk &&
829
+ typeof chunk.content === "string" &&
830
+ chunk.content.length > 0) {
831
+ contentYielded++;
246
832
  }
247
- throw streamError;
833
+ yield chunk;
248
834
  }
249
- // Curator P3-6 (round-2 fix): the production trigger doesn't
250
- // throw from textStream — AI SDK rejects `result.finishReason`
251
- // instead. Surface that rejection here so the enriched sentinel
252
- // actually fires for real-world no-output streams.
253
- if (chunkCount === 0) {
254
- const detected = await detectPostStreamNoOutput(result, capturedProviderError);
255
- if (detected) {
256
- logger.warn("OpenAI-compatible: Stream produced no output (NoOutputGeneratedError) caught from finishReason rejection");
257
- stampNoOutputSpan(detected.sentinel);
258
- yield detected.sentinel;
259
- }
835
+ // Surface any error that the loop threw after we drained the queue.
836
+ await loopPromise;
837
+ // No-output path: stream completed normally but yielded zero text.
838
+ // Build an enriched sentinel + stamp the active OTel span so
839
+ // Pipeline B (ContextEnricher) surfaces a WARNING-level Langfuse
840
+ // observation instead of silently succeeding.
841
+ if (contentYielded === 0 && toolsUsed.length === 0) {
842
+ logger.warn("openai-compatible: Stream produced no output — emitting enriched sentinel");
843
+ const fauxNoOutput = new NoOutputGeneratedError({
844
+ message: "Stream produced no output",
845
+ });
846
+ const sentinel = await buildNoOutputSentinel(fauxNoOutput, undefined, capturedProviderError);
847
+ stampNoOutputSpan(sentinel);
848
+ yield sentinel;
260
849
  }
261
- };
262
- // Create analytics promise that resolves after stream completion
263
- const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, toAnalyticsStreamResult(result), Date.now() - startTime, {
850
+ }
851
+ catch (streamError) {
852
+ // AI SDK's NoOutputGeneratedError can surface here via re-thrown
853
+ // upstream callbacks. Native path mostly throws plain Errors, but
854
+ // keep the isInstance check + helper call so existing telemetry
855
+ // wiring (Pipeline B) fires consistently with other providers.
856
+ if (NoOutputGeneratedError.isInstance(streamError)) {
857
+ const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
858
+ stampNoOutputSpan(sentinel);
859
+ yield sentinel;
860
+ return;
861
+ }
862
+ // Connection-killed / parse-error / fetch-failed path: still emit
863
+ // an enriched sentinel so consumers and Pipeline B see no_output
864
+ // instead of an unhandled rejection. Then re-throw so the original
865
+ // error still surfaces to direct stream consumers that need it.
866
+ const sentinel = await buildNoOutputSentinel(streamError, undefined, capturedProviderError);
867
+ stampNoOutputSpan(sentinel);
868
+ yield sentinel;
869
+ throw streamError;
870
+ }
871
+ finally {
872
+ // Consumer left the iterator early (break / return / throw) — abort
873
+ // the background SSE fetch + tool execution and stop the loop from
874
+ // growing the chunk queue further.
875
+ if (!consumerAbortController.signal.aborted) {
876
+ consumerAbortController.abort();
877
+ }
878
+ }
879
+ };
880
+ const result = {
881
+ stream: transformedStream(),
882
+ provider: this.providerName,
883
+ model: this.modelName,
884
+ analytics: streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName,
885
+ // Pass the deferred promises so the collector sees real usage and
886
+ // finish reason after the multi-step loop completes.
887
+ {
888
+ textStream: (async function* () { })(),
889
+ usage: usagePromise,
890
+ finishReason: finishPromise,
891
+ }, Date.now() - startTime, {
264
892
  requestId: `openai-compatible-stream-${Date.now()}`,
265
893
  streamingMode: true,
894
+ }),
895
+ toolsUsed,
896
+ metadata: {
897
+ startTime,
898
+ streamId: `openai-compatible-${Date.now()}`,
899
+ },
900
+ };
901
+ // Lazy getter: every read transforms the live `toolExecutionSummaries`
902
+ // through the canonical `transformToolExecutions()` so consumers see
903
+ // `{name, input, output, duration}[]` (codebase convention), while still
904
+ // reflecting tools appended during streaming. A pre-computed array would
905
+ // freeze the snapshot empty for consumers who drain the stream after.
906
+ Object.defineProperty(result, "toolExecutions", {
907
+ enumerable: true,
908
+ configurable: true,
909
+ get: () => transformToolExecutions(toolExecutionSummaries.map((s) => ({
910
+ toolName: s.toolName,
911
+ input: s.input,
912
+ output: s.output,
913
+ duration: s.endTime.getTime() - s.startTime.getTime(),
914
+ }))),
915
+ });
916
+ // Cleanup timeout once the loop finishes. The actual rejection is
917
+ // surfaced to consumers via `await loopPromise` inside the stream
918
+ // generator; the .catch here exists only to keep node from logging
919
+ // an `unhandledRejection` on the cleanup chain. We also capture the
920
+ // upstream provider error into the closure variable so the no-output
921
+ // sentinel built later carries the real cause (matches the
922
+ // onError-callback pattern used by openAI.ts / litellm.ts).
923
+ loopPromise
924
+ .finally(() => timeoutController?.cleanup())
925
+ .catch((error) => {
926
+ captureProviderError(error);
927
+ });
928
+ return result;
929
+ }
930
+ /**
931
+ * Multi-step streaming orchestrator. One iteration per model turn:
932
+ *
933
+ * 1. POST /chat/completions with stream:true
934
+ * 2. Parse SSE; push text deltas to the consumer queue
935
+ * 3. If the step emitted tool_calls → execute each, append to
936
+ * conversation, loop again
937
+ * 4. Otherwise resolve the deferred analytics promises and exit
938
+ *
939
+ * Bounded by `args.maxSteps`. Any thrown error rejects loopPromise and
940
+ * is surfaced to the consumer via `await loopPromise` in the stream
941
+ * generator.
942
+ */
943
+ async runStreamLoop(args) {
944
+ const { maxSteps, modelId, url, apiKey, fetchImpl, abortSignal, options, conversation, openAITools, openAIToolChoice, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, pushChunk, resolveUsage, resolveFinish, } = args;
945
+ try {
946
+ let stepFinish = null;
947
+ let stepUsage;
948
+ for (let step = 0; step < maxSteps; step++) {
949
+ const stepResult = await this.streamOneStep({
950
+ modelId,
951
+ url,
952
+ apiKey,
953
+ fetchImpl,
954
+ abortSignal,
955
+ options,
956
+ conversation,
957
+ openAITools,
958
+ openAIToolChoice,
959
+ pushChunk,
960
+ });
961
+ stepFinish = stepResult.finishReason;
962
+ if (stepResult.usage) {
963
+ stepUsage = mergeUsage(stepUsage, stepResult.usage);
964
+ }
965
+ if (stepResult.toolCalls.size === 0) {
966
+ break;
967
+ }
968
+ await this.executeToolBatch({
969
+ stepResult,
970
+ conversation,
971
+ toolsRecord,
972
+ emitter,
973
+ toolsUsed,
974
+ toolExecutionSummaries,
975
+ options,
976
+ });
977
+ }
978
+ resolveUsage({
979
+ promptTokens: stepUsage?.prompt_tokens ?? 0,
980
+ completionTokens: stepUsage?.completion_tokens ?? 0,
981
+ totalTokens: stepUsage?.total_tokens ?? 0,
266
982
  });
983
+ resolveFinish(stepFinish ?? "stop");
984
+ pushChunk({ done: true });
267
985
  return {
268
- stream: transformedStream(),
269
- provider: this.providerName,
270
- model: this.modelName,
271
- analytics: analyticsPromise,
272
- metadata: {
273
- startTime,
274
- streamId: `openai-compatible-${Date.now()}`,
275
- },
986
+ finishReason: stepFinish ?? "stop",
987
+ usage: stepUsage,
276
988
  };
277
989
  }
278
- catch (error) {
279
- timeoutController?.cleanup();
280
- throw this.handleProviderError(error);
990
+ catch (err) {
991
+ logger.error("OpenAI-compatible: Stream error", {
992
+ error: err instanceof Error ? err.message : String(err),
993
+ });
994
+ // Don't hang analytics consumers on deferred promises.
995
+ resolveUsage({ promptTokens: 0, completionTokens: 0, totalTokens: 0 });
996
+ resolveFinish("error");
997
+ pushChunk({ done: true });
998
+ throw err;
999
+ }
1000
+ }
1001
+ /**
1002
+ * One streaming round-trip: POST chat-completions, parse SSE, push text
1003
+ * deltas to the consumer queue. Returns the accumulated SSE result so
1004
+ * the caller can decide whether to run tools and re-stream.
1005
+ */
1006
+ async streamOneStep(args) {
1007
+ const body = buildBody({
1008
+ modelId: args.modelId,
1009
+ messages: args.conversation,
1010
+ options: args.options,
1011
+ tools: args.openAITools,
1012
+ ...(args.openAIToolChoice !== undefined
1013
+ ? { toolChoice: args.openAIToolChoice }
1014
+ : {}),
1015
+ streaming: true,
1016
+ });
1017
+ const res = await args.fetchImpl(args.url, {
1018
+ method: "POST",
1019
+ headers: {
1020
+ "Content-Type": "application/json",
1021
+ Authorization: `Bearer ${args.apiKey}`,
1022
+ },
1023
+ body: JSON.stringify(body),
1024
+ ...(args.abortSignal ? { signal: args.abortSignal } : {}),
1025
+ });
1026
+ if (!res.ok) {
1027
+ throw await buildAPIError(args.url, body, res);
281
1028
  }
1029
+ if (!res.body) {
1030
+ throw new Error("openai-compatible: stream response had no body");
1031
+ }
1032
+ return parseSSEStream(res.body, (delta) => {
1033
+ args.pushChunk({ content: delta });
1034
+ });
282
1035
  }
283
1036
  /**
284
- * Get available models from OpenAI Compatible endpoint
1037
+ * Execute every tool_call collected from one streaming step:
285
1038
  *
286
- * Fetches from the /v1/models endpoint to discover available models.
287
- * This is useful for auto-discovery when no model is specified.
1039
+ * - append an `assistant` turn carrying the tool_calls
1040
+ * - resolve each tool from the local registry and run it
1041
+ * - emit tool:start/tool:end events
1042
+ * - push per-execution summaries
1043
+ * - append a `tool` turn per result so the next step can see them
1044
+ * - mirror BaseProvider's tool-events + storage hooks
288
1045
  */
1046
+ async executeToolBatch(args) {
1047
+ const { stepResult, conversation, toolsRecord, emitter, toolsUsed, toolExecutionSummaries, options, } = args;
1048
+ // Append the assistant turn that triggered tool calls.
1049
+ const toolCallsForMessage = [];
1050
+ for (const [, t] of stepResult.toolCalls) {
1051
+ toolCallsForMessage.push({
1052
+ id: t.id,
1053
+ type: "function",
1054
+ function: { name: t.name, arguments: t.argsBuffered },
1055
+ });
1056
+ }
1057
+ conversation.push({
1058
+ role: "assistant",
1059
+ content: stepResult.text.length > 0 ? stepResult.text : null,
1060
+ tool_calls: toolCallsForMessage,
1061
+ });
1062
+ // Execute each tool, append result as a tool message.
1063
+ for (const [, t] of stepResult.toolCalls) {
1064
+ const startedAt = new Date();
1065
+ let input;
1066
+ try {
1067
+ input = JSON.parse(t.argsBuffered || "{}");
1068
+ }
1069
+ catch {
1070
+ input = t.argsBuffered;
1071
+ }
1072
+ let output;
1073
+ let errorMsg;
1074
+ const toolDef = toolsRecord[t.name];
1075
+ emitter?.emit("tool:start", {
1076
+ toolName: t.name,
1077
+ toolCallId: t.id,
1078
+ input,
1079
+ });
1080
+ if (!toolDef || typeof toolDef.execute !== "function") {
1081
+ errorMsg = `Tool '${t.name}' is not registered.`;
1082
+ output = { error: errorMsg };
1083
+ }
1084
+ else {
1085
+ try {
1086
+ output = await toolDef.execute(input, {});
1087
+ }
1088
+ catch (err) {
1089
+ errorMsg = err instanceof Error ? err.message : String(err);
1090
+ output = { error: errorMsg };
1091
+ }
1092
+ }
1093
+ const endedAt = new Date();
1094
+ toolsUsed.push(t.name);
1095
+ toolExecutionSummaries.push({
1096
+ toolCallId: t.id,
1097
+ toolName: t.name,
1098
+ input,
1099
+ output,
1100
+ ...(errorMsg ? { error: errorMsg } : {}),
1101
+ startTime: startedAt,
1102
+ endTime: endedAt,
1103
+ });
1104
+ conversation.push({
1105
+ role: "tool",
1106
+ tool_call_id: t.id,
1107
+ content: stringifyToolOutput(output),
1108
+ });
1109
+ }
1110
+ // BaseProvider tool-events + storage hooks. Mirrors what other providers
1111
+ // call from their AI-SDK onStepFinish handlers.
1112
+ const justExecuted = toolExecutionSummaries.slice(-stepResult.toolCalls.size);
1113
+ emitToolEndFromStepFinish(emitter, justExecuted.map((s) => ({
1114
+ toolName: s.toolName,
1115
+ output: s.output,
1116
+ ...(s.error ? { error: s.error } : {}),
1117
+ })));
1118
+ try {
1119
+ await this.handleToolExecutionStorage(justExecuted.map((s) => ({
1120
+ toolCallId: s.toolCallId,
1121
+ toolName: s.toolName,
1122
+ input: s.input,
1123
+ output: s.output,
1124
+ })), justExecuted.map((s) => ({
1125
+ toolCallId: s.toolCallId,
1126
+ toolName: s.toolName,
1127
+ output: s.output,
1128
+ })), options, new Date());
1129
+ }
1130
+ catch (err) {
1131
+ logger.warn("[OpenAICompatibleProvider] Failed to store tool executions", {
1132
+ provider: this.providerName,
1133
+ error: err instanceof Error ? err.message : String(err),
1134
+ });
1135
+ }
1136
+ }
289
1137
  async getAvailableModels() {
290
1138
  try {
291
- const modelsUrl = new URL("/v1/models", this.config.baseURL).toString();
1139
+ // Match the chat-completions URL convention: append `/models` to the
1140
+ // user-provided base. Using `new URL("/v1/models", baseURL)` would
1141
+ // strip any base path (e.g. `http://host/api/v1` → `http://host/v1/models`).
1142
+ const modelsUrl = `${stripTrailingSlash(this.config.baseURL)}/models`;
292
1143
  logger.debug(`Fetching available models from: ${modelsUrl}`);
293
1144
  const proxyFetch = createProxyFetch();
294
1145
  const controller = new AbortController();
@@ -319,16 +1170,10 @@ export class OpenAICompatibleProvider extends BaseProvider {
319
1170
  return this.getFallbackModels();
320
1171
  }
321
1172
  }
322
- /**
323
- * Get the first available model for auto-selection
324
- */
325
1173
  async getFirstAvailableModel() {
326
1174
  const models = await this.getAvailableModels();
327
1175
  return models[0] || FALLBACK_OPENAI_COMPATIBLE_MODEL;
328
1176
  }
329
- /**
330
- * Fallback models when discovery fails
331
- */
332
1177
  getFallbackModels() {
333
1178
  return [
334
1179
  "gpt-4o",
@@ -341,4 +1186,57 @@ export class OpenAICompatibleProvider extends BaseProvider {
341
1186
  ];
342
1187
  }
343
1188
  }
1189
+ // Deferred-promise pair for `usage` and `finishReason` so the analytics
1190
+ // collector resolves with the actual aggregated values after the multi-step
1191
+ // loop ends, not the zeros they had at result-construction time.
1192
+ const createDeferredAnalytics = () => {
1193
+ let resolveUsage = () => { };
1194
+ const usagePromise = new Promise((r) => {
1195
+ resolveUsage = r;
1196
+ });
1197
+ let resolveFinish = () => { };
1198
+ const finishPromise = new Promise((r) => {
1199
+ resolveFinish = r;
1200
+ });
1201
+ return { usagePromise, finishPromise, resolveUsage, resolveFinish };
1202
+ };
1203
+ // Single-producer / single-consumer chunk queue. The streaming loop pushes
1204
+ // `{content}` deltas as they arrive from SSE and a final `{done:true}` when
1205
+ // it finishes; the consumer's AsyncIterable pulls from `nextChunk()`.
1206
+ const createChunkQueue = () => {
1207
+ const chunkQueue = [];
1208
+ let pendingResolve;
1209
+ const pushChunk = (c) => {
1210
+ if (pendingResolve) {
1211
+ const r = pendingResolve;
1212
+ pendingResolve = undefined;
1213
+ r(c);
1214
+ }
1215
+ else {
1216
+ chunkQueue.push(c);
1217
+ }
1218
+ };
1219
+ const nextChunk = () => new Promise((resolve) => {
1220
+ if (chunkQueue.length > 0) {
1221
+ resolve(chunkQueue.shift());
1222
+ }
1223
+ else {
1224
+ pendingResolve = resolve;
1225
+ }
1226
+ });
1227
+ return { pushChunk, nextChunk };
1228
+ };
1229
+ const mergeUsage = (a, b) => {
1230
+ if (!a) {
1231
+ return b;
1232
+ }
1233
+ if (!b) {
1234
+ return a;
1235
+ }
1236
+ return {
1237
+ prompt_tokens: (a.prompt_tokens ?? 0) + (b.prompt_tokens ?? 0),
1238
+ completion_tokens: (a.completion_tokens ?? 0) + (b.completion_tokens ?? 0),
1239
+ total_tokens: (a.total_tokens ?? 0) + (b.total_tokens ?? 0),
1240
+ };
1241
+ };
344
1242
  //# sourceMappingURL=openaiCompatible.js.map