@vybestack/llxprt-code-core 0.1.23-nightly.250829.6bacfcba → 0.1.23-nightly.250902.c3d3686d
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/code_assist/codeAssist.js +17 -6
- package/dist/src/code_assist/codeAssist.js.map +1 -1
- package/dist/src/code_assist/server.js +15 -4
- package/dist/src/code_assist/server.js.map +1 -1
- package/dist/src/code_assist/setup.js +7 -0
- package/dist/src/code_assist/setup.js.map +1 -1
- package/dist/src/core/ContentGeneratorAdapter.d.ts +37 -0
- package/dist/src/core/ContentGeneratorAdapter.js +58 -0
- package/dist/src/core/ContentGeneratorAdapter.js.map +1 -0
- package/dist/src/core/client.d.ts +9 -2
- package/dist/src/core/client.js +111 -36
- package/dist/src/core/client.js.map +1 -1
- package/dist/src/core/compression-config.d.ts +10 -0
- package/dist/src/core/compression-config.js +18 -0
- package/dist/src/core/compression-config.js.map +1 -0
- package/dist/src/core/geminiChat.d.ts +8 -2
- package/dist/src/core/geminiChat.js +148 -32
- package/dist/src/core/geminiChat.js.map +1 -1
- package/dist/src/core/prompts.js +4 -2
- package/dist/src/core/prompts.js.map +1 -1
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.js +2 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/providers/BaseProvider.d.ts +1 -1
- package/dist/src/providers/BaseProvider.js.map +1 -1
- package/dist/src/providers/anthropic/AnthropicProvider.js +1 -1
- package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
- package/dist/src/providers/gemini/GeminiProvider.js +100 -36
- package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
- package/dist/src/providers/openai/OpenAIProvider.d.ts +54 -25
- package/dist/src/providers/openai/OpenAIProvider.js +528 -984
- package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
- package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +91 -0
- package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +440 -0
- package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -0
- package/dist/src/providers/openai-responses/index.d.ts +1 -0
- package/dist/src/providers/openai-responses/index.js +2 -0
- package/dist/src/providers/openai-responses/index.js.map +1 -0
- package/dist/src/services/history/ContentConverters.d.ts +38 -0
- package/dist/src/services/history/ContentConverters.js +188 -0
- package/dist/src/services/history/ContentConverters.js.map +1 -0
- package/dist/src/services/history/HistoryEvents.d.ts +32 -0
- package/dist/src/services/history/HistoryEvents.js +17 -0
- package/dist/src/services/history/HistoryEvents.js.map +1 -0
- package/dist/src/services/history/HistoryService.d.ts +168 -0
- package/dist/src/services/history/HistoryService.js +521 -0
- package/dist/src/services/history/HistoryService.js.map +1 -0
- package/dist/src/services/history/IContent.d.ts +179 -0
- package/dist/src/services/history/IContent.js +104 -0
- package/dist/src/services/history/IContent.js.map +1 -0
- package/package.json +1 -1
@@ -22,12 +22,6 @@ import { ContentGeneratorRole } from '../ContentGeneratorRole.js';
|
|
22
22
|
import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
|
23
23
|
import { ToolFormatter } from '../../tools/ToolFormatter.js';
|
24
24
|
import OpenAI from 'openai';
|
25
|
-
import { RESPONSES_API_MODELS } from './RESPONSES_API_MODELS.js';
|
26
|
-
import { ConversationCache } from './ConversationCache.js';
|
27
|
-
import { estimateMessagesTokens, estimateRemoteTokens, } from './estimateRemoteTokens.js';
|
28
|
-
// ConversationContext removed - using inline conversation ID generation
|
29
|
-
import { parseResponsesStream, parseErrorResponse, } from './parseResponsesStream.js';
|
30
|
-
import { buildResponsesRequest } from './buildResponsesRequest.js';
|
31
25
|
import { BaseProvider } from '../BaseProvider.js';
|
32
26
|
import { isQwenEndpoint, generateOAuthEndpointMismatchError, } from '../../config/endpoints.js';
|
33
27
|
import { getSettingsService } from '../../settings/settingsServiceInstance.js';
|
@@ -39,7 +33,6 @@ export class OpenAIProvider extends BaseProvider {
|
|
39
33
|
providerConfig;
|
40
34
|
toolFormatter;
|
41
35
|
toolFormatOverride;
|
42
|
-
conversationCache;
|
43
36
|
modelParams;
|
44
37
|
_cachedClient;
|
45
38
|
_cachedClientKey;
|
@@ -78,7 +71,6 @@ export class OpenAIProvider extends BaseProvider {
|
|
78
71
|
this.baseURL = baseURL;
|
79
72
|
this.providerConfig = config;
|
80
73
|
this.toolFormatter = new ToolFormatter();
|
81
|
-
this.conversationCache = new ConversationCache();
|
82
74
|
// Initialize from SettingsService
|
83
75
|
this.initializeFromSettings().catch((error) => {
|
84
76
|
this.logger.debug(() => `Failed to initialize from SettingsService: ${error}`);
|
@@ -148,7 +140,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
148
140
|
const oauthManager = this.baseProviderConfig.oauthManager;
|
149
141
|
if (oauthManager?.getOAuthToken) {
|
150
142
|
const oauthToken = await oauthManager.getOAuthToken('qwen');
|
151
|
-
this.logger.debug(() => `OAuth token retrieved
|
143
|
+
this.logger.debug(() => `OAuth token retrieved:\n` +
|
144
|
+
` resource_url: ${oauthToken?.resource_url}\n` +
|
145
|
+
` access_token: ${oauthToken?.access_token?.substring(0, 10)}...`);
|
152
146
|
if (oauthToken?.resource_url) {
|
153
147
|
// Use the resource_url from the OAuth token
|
154
148
|
effectiveBaseURL = `https://${oauthToken.resource_url}/v1`;
|
@@ -210,254 +204,6 @@ export class OpenAIProvider extends BaseProvider {
|
|
210
204
|
// Default to OpenAI format
|
211
205
|
return 'openai';
|
212
206
|
}
|
213
|
-
shouldUseResponses(model) {
|
214
|
-
// Check env flag override (highest priority)
|
215
|
-
if (process.env.OPENAI_RESPONSES_DISABLE === 'true') {
|
216
|
-
return false;
|
217
|
-
}
|
218
|
-
// Check settings override - if explicitly set to false, always respect that
|
219
|
-
if (this.providerConfig?.openaiResponsesEnabled === false) {
|
220
|
-
return false;
|
221
|
-
}
|
222
|
-
// Never use Responses API for non-OpenAI providers (those with custom base URLs)
|
223
|
-
const baseURL = this.baseURL || 'https://api.openai.com/v1';
|
224
|
-
if (baseURL !== 'https://api.openai.com/v1') {
|
225
|
-
return false;
|
226
|
-
}
|
227
|
-
// Default: Check if model starts with any of the responses API model prefixes
|
228
|
-
return RESPONSES_API_MODELS.some((responsesModel) => model.startsWith(responsesModel));
|
229
|
-
}
|
230
|
-
async callResponsesEndpoint(messages, tools, options) {
|
231
|
-
// Check if API key is available (using resolved authentication)
|
232
|
-
const apiKey = await this.getAuthToken();
|
233
|
-
if (!apiKey) {
|
234
|
-
const endpoint = this.baseURL || 'https://api.openai.com/v1';
|
235
|
-
if (this.isOAuthEnabled() && !this.supportsOAuth()) {
|
236
|
-
throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
|
237
|
-
}
|
238
|
-
throw new Error('OpenAI API key is required to make API calls');
|
239
|
-
}
|
240
|
-
// Remove the stateful mode error to allow O3 to work with conversation IDs
|
241
|
-
// Check context usage and warn if getting close to limit
|
242
|
-
if (options?.conversationId && options?.parentId) {
|
243
|
-
const contextInfo = this.estimateContextUsage(options.conversationId, options.parentId, messages);
|
244
|
-
// Warn if less than 4k tokens remaining
|
245
|
-
if (contextInfo.tokensRemaining < 4000) {
|
246
|
-
this.logger.debug(() => `Warning: Only ${contextInfo.tokensRemaining} tokens remaining (${contextInfo.contextUsedPercent.toFixed(1)}% context used). Consider starting a new conversation.`);
|
247
|
-
}
|
248
|
-
}
|
249
|
-
// Check cache for existing conversation
|
250
|
-
if (options?.conversationId && options?.parentId) {
|
251
|
-
const cachedMessages = this.conversationCache.get(options.conversationId, options.parentId);
|
252
|
-
if (cachedMessages) {
|
253
|
-
// Return cached messages as an async iterable
|
254
|
-
return (async function* () {
|
255
|
-
for (const message of cachedMessages) {
|
256
|
-
yield message;
|
257
|
-
}
|
258
|
-
})();
|
259
|
-
}
|
260
|
-
}
|
261
|
-
// Format tools for Responses API
|
262
|
-
const formattedTools = tools
|
263
|
-
? this.toolFormatter.toResponsesTool(tools)
|
264
|
-
: undefined;
|
265
|
-
// Patch messages to include synthetic responses for cancelled tools
|
266
|
-
const { SyntheticToolResponseHandler } = await import('./syntheticToolResponses.js');
|
267
|
-
const patchedMessages = SyntheticToolResponseHandler.patchMessageHistory(messages);
|
268
|
-
// Build the request
|
269
|
-
const request = buildResponsesRequest({
|
270
|
-
model: this.currentModel,
|
271
|
-
messages: patchedMessages,
|
272
|
-
tools: formattedTools,
|
273
|
-
stream: options?.stream ?? true,
|
274
|
-
conversationId: options?.conversationId,
|
275
|
-
parentId: options?.parentId,
|
276
|
-
tool_choice: options?.tool_choice,
|
277
|
-
...(this.modelParams || {}),
|
278
|
-
});
|
279
|
-
// Make the API call
|
280
|
-
const baseURL = this.baseURL || 'https://api.openai.com/v1';
|
281
|
-
const responsesURL = `${baseURL}/responses`;
|
282
|
-
// Ensure proper UTF-8 encoding for the request body
|
283
|
-
// This is crucial for handling multibyte characters (e.g., Japanese, Chinese)
|
284
|
-
const requestBody = JSON.stringify(request);
|
285
|
-
const bodyBlob = new Blob([requestBody], {
|
286
|
-
type: 'application/json; charset=utf-8',
|
287
|
-
});
|
288
|
-
const response = await fetch(responsesURL, {
|
289
|
-
method: 'POST',
|
290
|
-
headers: {
|
291
|
-
Authorization: `Bearer ${apiKey}`,
|
292
|
-
'Content-Type': 'application/json; charset=utf-8',
|
293
|
-
},
|
294
|
-
body: bodyBlob,
|
295
|
-
});
|
296
|
-
// Handle errors
|
297
|
-
if (!response.ok) {
|
298
|
-
const errorBody = await response.text();
|
299
|
-
// Handle 422 context_length_exceeded error
|
300
|
-
if (response.status === 422 &&
|
301
|
-
errorBody.includes('context_length_exceeded')) {
|
302
|
-
this.logger.debug(() => 'Context length exceeded, invalidating cache and retrying stateless...');
|
303
|
-
// Invalidate the cache for this conversation
|
304
|
-
if (options?.conversationId && options?.parentId) {
|
305
|
-
this.conversationCache.invalidate(options.conversationId, options.parentId);
|
306
|
-
}
|
307
|
-
// Retry without conversation context (pure stateless)
|
308
|
-
const retryRequest = buildResponsesRequest({
|
309
|
-
model: this.currentModel,
|
310
|
-
messages,
|
311
|
-
tools: formattedTools,
|
312
|
-
stream: options?.stream ?? true,
|
313
|
-
// Omit conversationId and parentId for stateless retry
|
314
|
-
tool_choice: options?.tool_choice,
|
315
|
-
...(this.modelParams || {}),
|
316
|
-
});
|
317
|
-
// Ensure proper UTF-8 encoding for retry request as well
|
318
|
-
const retryRequestBody = JSON.stringify(retryRequest);
|
319
|
-
const retryBodyBlob = new Blob([retryRequestBody], {
|
320
|
-
type: 'application/json; charset=utf-8',
|
321
|
-
});
|
322
|
-
const retryResponse = await fetch(responsesURL, {
|
323
|
-
method: 'POST',
|
324
|
-
headers: {
|
325
|
-
Authorization: `Bearer ${apiKey}`,
|
326
|
-
'Content-Type': 'application/json; charset=utf-8',
|
327
|
-
},
|
328
|
-
body: retryBodyBlob,
|
329
|
-
});
|
330
|
-
if (!retryResponse.ok) {
|
331
|
-
const retryErrorBody = await retryResponse.text();
|
332
|
-
throw parseErrorResponse(retryResponse.status, retryErrorBody, this.name);
|
333
|
-
}
|
334
|
-
// Use the retry response
|
335
|
-
return this.handleResponsesApiResponse(retryResponse, messages, undefined, // No conversation context on retry
|
336
|
-
undefined, options?.stream !== false);
|
337
|
-
}
|
338
|
-
throw parseErrorResponse(response.status, errorBody, this.name);
|
339
|
-
}
|
340
|
-
// Handle the response
|
341
|
-
return this.handleResponsesApiResponse(response, messages, options?.conversationId, options?.parentId, options?.stream !== false);
|
342
|
-
}
|
343
|
-
async handleResponsesApiResponse(response, messages, conversationId, parentId, isStreaming) {
|
344
|
-
// Handle streaming response
|
345
|
-
if (isStreaming && response.body) {
|
346
|
-
const collectedMessages = [];
|
347
|
-
const cache = this.conversationCache;
|
348
|
-
return (async function* () {
|
349
|
-
for await (const message of parseResponsesStream(response.body)) {
|
350
|
-
// Collect messages for caching
|
351
|
-
if (message.content || message.tool_calls) {
|
352
|
-
collectedMessages.push(message);
|
353
|
-
}
|
354
|
-
else if (message.usage && collectedMessages.length === 0) {
|
355
|
-
// If we only got a usage message with no content, add a placeholder
|
356
|
-
collectedMessages.push({
|
357
|
-
role: ContentGeneratorRole.ASSISTANT,
|
358
|
-
content: '',
|
359
|
-
});
|
360
|
-
}
|
361
|
-
// Update the parentId in the context as soon as we get a message ID
|
362
|
-
if (message.id) {
|
363
|
-
// ConversationContext.setParentId(message.id);
|
364
|
-
// TODO: Handle parent ID updates when ConversationContext is available
|
365
|
-
}
|
366
|
-
yield message;
|
367
|
-
}
|
368
|
-
// Cache the collected messages with token count
|
369
|
-
if (conversationId && parentId && collectedMessages.length > 0) {
|
370
|
-
// Get previous accumulated tokens
|
371
|
-
const previousTokens = cache.getAccumulatedTokens(conversationId, parentId);
|
372
|
-
// Calculate tokens for this request (messages + response)
|
373
|
-
const requestTokens = estimateMessagesTokens(messages);
|
374
|
-
const responseTokens = estimateMessagesTokens(collectedMessages);
|
375
|
-
const totalTokensForRequest = requestTokens + responseTokens;
|
376
|
-
// Update cache with new accumulated total
|
377
|
-
cache.set(conversationId, parentId, collectedMessages, previousTokens + totalTokensForRequest);
|
378
|
-
}
|
379
|
-
})();
|
380
|
-
}
|
381
|
-
const data = (await response.json());
|
382
|
-
const resultMessages = [];
|
383
|
-
// DEFENSIVE FIX: Handle potential array response from providers that violate OpenAI spec
|
384
|
-
// Some providers (like Cerebras) may return an array of responses instead of a single response
|
385
|
-
if (Array.isArray(data)) {
|
386
|
-
this.logger.error(() => '[Cerebras Corruption] Detected malformed array response from provider, aggregating...', {
|
387
|
-
provider: this.baseURL,
|
388
|
-
arrayLength: data.length,
|
389
|
-
});
|
390
|
-
const aggregatedContent = [];
|
391
|
-
let aggregatedToolCalls = [];
|
392
|
-
let aggregatedUsage = undefined;
|
393
|
-
for (const item of data) {
|
394
|
-
if (item.choices?.[0]?.message?.content) {
|
395
|
-
aggregatedContent.push(item.choices[0].message.content);
|
396
|
-
}
|
397
|
-
if (item.choices?.[0]?.message?.tool_calls) {
|
398
|
-
aggregatedToolCalls = item.choices[0].message.tool_calls;
|
399
|
-
}
|
400
|
-
if (item.usage) {
|
401
|
-
aggregatedUsage = item.usage;
|
402
|
-
}
|
403
|
-
}
|
404
|
-
const message = {
|
405
|
-
role: ContentGeneratorRole.ASSISTANT,
|
406
|
-
content: aggregatedContent.join(''),
|
407
|
-
};
|
408
|
-
if (aggregatedToolCalls.length > 0) {
|
409
|
-
message.tool_calls = aggregatedToolCalls;
|
410
|
-
}
|
411
|
-
if (aggregatedUsage) {
|
412
|
-
message.usage = {
|
413
|
-
prompt_tokens: aggregatedUsage.prompt_tokens || 0,
|
414
|
-
completion_tokens: aggregatedUsage.completion_tokens || 0,
|
415
|
-
total_tokens: aggregatedUsage.total_tokens || 0,
|
416
|
-
};
|
417
|
-
}
|
418
|
-
resultMessages.push(message);
|
419
|
-
// Convert to async iterator for consistent return type
|
420
|
-
return (async function* () {
|
421
|
-
for (const msg of resultMessages) {
|
422
|
-
yield msg;
|
423
|
-
}
|
424
|
-
})();
|
425
|
-
}
|
426
|
-
if (data.choices && data.choices.length > 0) {
|
427
|
-
const choice = data.choices[0];
|
428
|
-
const message = {
|
429
|
-
role: choice.message.role,
|
430
|
-
content: choice.message.content || '',
|
431
|
-
};
|
432
|
-
if (choice.message.tool_calls) {
|
433
|
-
message.tool_calls = choice.message.tool_calls;
|
434
|
-
}
|
435
|
-
if (data.usage) {
|
436
|
-
message.usage = {
|
437
|
-
prompt_tokens: data.usage.prompt_tokens || 0,
|
438
|
-
completion_tokens: data.usage.completion_tokens || 0,
|
439
|
-
total_tokens: data.usage.total_tokens || 0,
|
440
|
-
};
|
441
|
-
}
|
442
|
-
resultMessages.push(message);
|
443
|
-
}
|
444
|
-
// Cache the result with token count
|
445
|
-
if (conversationId && parentId && resultMessages.length > 0) {
|
446
|
-
// Get previous accumulated tokens
|
447
|
-
const previousTokens = this.conversationCache.getAccumulatedTokens(conversationId, parentId);
|
448
|
-
// Calculate tokens for this request
|
449
|
-
const requestTokens = estimateMessagesTokens(messages);
|
450
|
-
const responseTokens = estimateMessagesTokens(resultMessages);
|
451
|
-
const totalTokensForRequest = requestTokens + responseTokens;
|
452
|
-
// Update cache with new accumulated total
|
453
|
-
this.conversationCache.set(conversationId, parentId, resultMessages, previousTokens + totalTokensForRequest);
|
454
|
-
}
|
455
|
-
return (async function* () {
|
456
|
-
for (const message of resultMessages) {
|
457
|
-
yield message;
|
458
|
-
}
|
459
|
-
})();
|
460
|
-
}
|
461
207
|
async getModels() {
|
462
208
|
// Check if API key is available (using resolved authentication)
|
463
209
|
const apiKey = await this.getAuthToken();
|
@@ -530,739 +276,122 @@ export class OpenAIProvider extends BaseProvider {
|
|
530
276
|
}
|
531
277
|
}
|
532
278
|
async *generateChatCompletion(messages, tools, _toolFormat) {
|
533
|
-
//
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
const patchedMessages = messages;
|
564
|
-
// Validate tool messages have required tool_call_id
|
565
|
-
const toolMessages = patchedMessages.filter((msg) => msg.role === 'tool');
|
566
|
-
const missingIds = toolMessages.filter((msg) => !msg.tool_call_id);
|
567
|
-
if (missingIds.length > 0) {
|
568
|
-
this.logger.error(() => `FATAL: Tool messages missing tool_call_id: ${JSON.stringify(missingIds)}`);
|
569
|
-
throw new Error(`OpenAI API requires tool_call_id for all tool messages. Found ${missingIds.length} tool message(s) without IDs.`);
|
570
|
-
}
|
571
|
-
// Log synthetic responses for debugging
|
572
|
-
const syntheticMessages = patchedMessages.filter((msg) => msg._synthetic);
|
573
|
-
if (syntheticMessages.length > 0) {
|
574
|
-
this.logger.debug(() => `[Synthetic] Added ${syntheticMessages.length} synthetic tool responses`);
|
575
|
-
// Check for ordering issues - using debug logger which only executes when enabled
|
576
|
-
this.logger.debug(() => {
|
577
|
-
const orderingErrors = [];
|
578
|
-
const orderingWarnings = [];
|
579
|
-
for (let i = 0; i < patchedMessages.length - 1; i++) {
|
580
|
-
const current = patchedMessages[i];
|
581
|
-
const next = patchedMessages[i + 1];
|
582
|
-
// Check if a tool response comes before its corresponding tool call
|
583
|
-
if (current.role === 'tool' && current.tool_call_id) {
|
584
|
-
// Find the assistant message with this tool call
|
585
|
-
const callIndex = patchedMessages.findIndex((m) => m.role === 'assistant' &&
|
586
|
-
m.tool_calls?.some((tc) => tc.id === current.tool_call_id));
|
587
|
-
if (callIndex === -1 || callIndex > i) {
|
588
|
-
orderingErrors.push(`Tool response ${current.tool_call_id} appears before its tool call or call not found`);
|
589
|
-
}
|
590
|
-
}
|
591
|
-
// Check if we have consecutive assistant messages with tool calls
|
592
|
-
if (current.role === 'assistant' &&
|
593
|
-
current.tool_calls &&
|
594
|
-
next.role === 'assistant' &&
|
595
|
-
next.tool_calls) {
|
596
|
-
orderingWarnings.push(`Consecutive assistant messages with tool calls at indices ${i} and ${i + 1}`);
|
597
|
-
}
|
598
|
-
}
|
599
|
-
if (orderingErrors.length > 0) {
|
600
|
-
return `[Synthetic Order Check] Errors found: ${orderingErrors.join('; ')}`;
|
601
|
-
}
|
602
|
-
else if (orderingWarnings.length > 0) {
|
603
|
-
return `[Synthetic Order Check] Warnings: ${orderingWarnings.join('; ')}`;
|
604
|
-
}
|
605
|
-
else {
|
606
|
-
return '[Synthetic Order Check] No issues found';
|
607
|
-
}
|
608
|
-
});
|
609
|
-
}
|
610
|
-
const parser = this.requiresTextToolCallParsing()
|
611
|
-
? new GemmaToolCallParser()
|
612
|
-
: null;
|
613
|
-
// Get current tool format (with override support)
|
614
|
-
const currentToolFormat = this.getToolFormat();
|
615
|
-
// Format tools using formatToolsForAPI method
|
616
|
-
const formattedTools = tools ? this.formatToolsForAPI(tools) : undefined;
|
617
|
-
// Get stream_options from ephemeral settings (not model params)
|
618
|
-
const streamOptions = this.providerConfig?.getEphemeralSettings?.()?.['stream-options'];
|
619
|
-
// Default stream_options to { include_usage: true } unless explicitly set
|
620
|
-
const finalStreamOptions = streamOptions !== undefined ? streamOptions : { include_usage: true };
|
621
|
-
// Get streaming setting from ephemeral settings (default: enabled)
|
622
|
-
const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
|
623
|
-
let streamingEnabled = streamingSetting !== 'disabled';
|
624
|
-
// Get resolved authentication and update client if needed
|
625
|
-
await this.updateClientWithResolvedAuth();
|
626
|
-
// Strip internal tracking fields that some APIs don't accept
|
627
|
-
// We keep the synthetic responses but remove the metadata fields
|
628
|
-
const cleanedMessages = patchedMessages.map((msg) => {
|
629
|
-
// Create a shallow copy and remove internal fields
|
630
|
-
const { _synthetic, _cancelled, ...cleanMsg } = msg;
|
631
|
-
// Log synthetic tool responses for debugging
|
632
|
-
if (msg._synthetic) {
|
633
|
-
this.logger.debug(() => `[Synthetic Tool Response] ${JSON.stringify(cleanMsg)}`);
|
634
|
-
}
|
635
|
-
return cleanMsg;
|
636
|
-
});
|
637
|
-
this.logger.debug(() => `About to make API call with model: ${this.currentModel}, baseURL: ${this.openai.baseURL}, apiKey: ${this.openai.apiKey?.substring(0, 10)}..., streaming: ${streamingEnabled}`);
|
638
|
-
// Debug: Log message roles being sent
|
639
|
-
this.logger.debug(() => `Messages being sent to OpenAI (${cleanedMessages.length} total): ${cleanedMessages
|
640
|
-
.map((m) => `${m.role}${m.role === 'system' ? ` (length: ${m.content?.length})` : ''}`)
|
641
|
-
.join(', ')}`);
|
642
|
-
let response;
|
643
|
-
try {
|
644
|
-
// Build request params with exact order from original
|
645
|
-
response = await this.openai.chat.completions.create({
|
646
|
-
model: this.currentModel,
|
647
|
-
messages: cleanedMessages,
|
648
|
-
stream: streamingEnabled,
|
649
|
-
...(streamingEnabled && finalStreamOptions
|
650
|
-
? { stream_options: finalStreamOptions }
|
651
|
-
: {}),
|
652
|
-
tools: formattedTools,
|
653
|
-
tool_choice: this.getToolChoiceForFormat(tools),
|
654
|
-
...this.modelParams,
|
655
|
-
});
|
656
|
-
}
|
657
|
-
catch (error) {
|
658
|
-
// Debug the error
|
659
|
-
const errorStatus = error?.status ||
|
660
|
-
error?.response?.status;
|
661
|
-
const errorLabel = errorStatus === 400 ? '[API Error 400]' : '[API Error]';
|
662
|
-
this.logger.error(() => `${errorLabel} Error caught in API call: ${error}`);
|
663
|
-
this.logger.error(() => `${errorLabel} Error type: ${error?.constructor?.name}`);
|
664
|
-
this.logger.error(() => `${errorLabel} Error status: ${errorStatus}`);
|
665
|
-
this.logger.error(() => `${errorLabel} Error response data: ${JSON.stringify(error?.response?.data, null, 2)}`);
|
666
|
-
// Log the last few messages to understand what's being sent
|
667
|
-
if (errorStatus === 400) {
|
668
|
-
// Log additional diagnostics for 400 errors
|
669
|
-
const hasSyntheticMessages = cleanedMessages.some((msg) => msg.role === 'tool' &&
|
670
|
-
msg.content === 'Tool execution cancelled by user');
|
671
|
-
const hasPendingToolCalls = cleanedMessages.some((msg, idx) => {
|
672
|
-
if (msg.role === 'assistant' && msg.tool_calls) {
|
673
|
-
// Check if there's a matching tool response
|
674
|
-
const toolCallIds = msg.tool_calls.map((tc) => tc.id);
|
675
|
-
const hasResponses = toolCallIds.every((id) => cleanedMessages
|
676
|
-
.slice(idx + 1)
|
677
|
-
.some((m) => m.role === 'tool' && m.tool_call_id === id));
|
678
|
-
return !hasResponses;
|
679
|
-
}
|
680
|
-
return false;
|
681
|
-
});
|
682
|
-
this.logger.error(() => `${errorLabel} Last 5 messages being sent:`);
|
683
|
-
this.logger.error(() => `${errorLabel} Has synthetic messages: ${hasSyntheticMessages}`);
|
684
|
-
this.logger.error(() => `${errorLabel} Has pending tool calls without responses: ${hasPendingToolCalls}`);
|
685
|
-
const lastMessages = cleanedMessages.slice(-5);
|
686
|
-
lastMessages.forEach((msg, idx) => {
|
687
|
-
this.logger.error(() => ` [${cleanedMessages.length - 5 + idx}] ${msg.role}${msg.tool_call_id ? ` (tool response for ${msg.tool_call_id})` : ''}${msg.tool_calls ? ` (${msg.tool_calls.length} tool calls)` : ''}`);
|
688
|
-
if (msg.tool_calls) {
|
689
|
-
msg.tool_calls.forEach((tc) => {
|
690
|
-
this.logger.error(() => ` - Tool call: ${tc.id} -> ${tc.function.name}`);
|
691
|
-
});
|
692
|
-
}
|
693
|
-
});
|
694
|
-
}
|
695
|
-
// Check for JSONResponse mutation errors
|
696
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
697
|
-
if (errorMessage?.includes('JSONResponse') &&
|
698
|
-
errorMessage?.includes('does not support item assignment')) {
|
699
|
-
this.logger.debug(() => '[JSONResponse Error] Detected JSONResponse mutation error, retrying without streaming');
|
700
|
-
this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error detected. This typically occurs with certain providers like Cerebras. Falling back to non-streaming mode.', {
|
701
|
-
errorMessage,
|
702
|
-
provider: this.baseURL,
|
703
|
-
streamingEnabled,
|
704
|
-
});
|
705
|
-
// Retry with streaming disabled
|
706
|
-
response = await this.openai.chat.completions.create({
|
707
|
-
model: this.currentModel,
|
708
|
-
messages: cleanedMessages,
|
709
|
-
stream: false, // Force non-streaming
|
710
|
-
tools: formattedTools,
|
711
|
-
tool_choice: this.getToolChoiceForFormat(tools),
|
712
|
-
...this.modelParams,
|
713
|
-
});
|
714
|
-
// Override streamingEnabled for the rest of this function
|
715
|
-
streamingEnabled = false;
|
716
|
-
}
|
717
|
-
else {
|
718
|
-
this.logger.debug(() => `${errorLabel} Re-throwing error (not a JSONResponse mutation)`);
|
719
|
-
// Re-throw other errors
|
720
|
-
throw error;
|
721
|
-
}
|
722
|
-
}
|
723
|
-
let fullContent = '';
|
724
|
-
const accumulatedToolCalls = [];
|
725
|
-
let hasStreamedContent = false;
|
726
|
-
let usageData;
|
727
|
-
// For Qwen streaming, buffer whitespace-only chunks to preserve spacing across chunk boundaries
|
728
|
-
let pendingWhitespace = null;
|
729
|
-
// Handle streaming vs non-streaming response
|
730
|
-
if (streamingEnabled) {
|
731
|
-
// We need to buffer all chunks to detect and handle malformed streams
|
732
|
-
// Some providers (like Cerebras) send message format instead of delta
|
733
|
-
const allChunks = [];
|
734
|
-
this.logger.debug(() => '[Stream Detection] Starting to buffer chunks for corruption detection', {
|
735
|
-
provider: this.baseURL,
|
736
|
-
streamingEnabled,
|
737
|
-
isUsingQwen: this.isUsingQwen(),
|
738
|
-
currentModel: this.currentModel,
|
739
|
-
});
|
740
|
-
try {
|
741
|
-
for await (const chunk of response) {
|
742
|
-
// CRITICAL: Create a deep copy to avoid JSONResponse mutation issues
|
743
|
-
// Cerebras and other providers may return immutable JSONResponse objects
|
744
|
-
// Cast to unknown first to bypass type checking, then to our extended type
|
745
|
-
const extendedChunk = chunk;
|
746
|
-
const safeChunk = {
|
747
|
-
choices: extendedChunk.choices?.map((choice) => ({
|
748
|
-
delta: choice.delta
|
749
|
-
? {
|
750
|
-
content: choice.delta.content ?? undefined,
|
751
|
-
role: choice.delta.role,
|
752
|
-
tool_calls: choice.delta.tool_calls?.map((tc, idx) => ({
|
753
|
-
id: tc.id,
|
754
|
-
type: tc.type,
|
755
|
-
function: tc.function
|
756
|
-
? {
|
757
|
-
name: tc.function.name,
|
758
|
-
arguments: tc.function.arguments,
|
759
|
-
}
|
760
|
-
: undefined,
|
761
|
-
index: tc.index !== undefined ? tc.index : idx,
|
762
|
-
})),
|
763
|
-
}
|
764
|
-
: undefined,
|
765
|
-
message: choice.message
|
766
|
-
? {
|
767
|
-
content: choice.message.content ?? undefined,
|
768
|
-
role: choice.message.role,
|
769
|
-
tool_calls: choice.message.tool_calls?.map((tc) => ({
|
770
|
-
id: tc.id,
|
771
|
-
type: tc.type,
|
772
|
-
function: tc.function
|
773
|
-
? {
|
774
|
-
name: tc.function.name,
|
775
|
-
arguments: tc.function.arguments,
|
776
|
-
}
|
777
|
-
: undefined,
|
778
|
-
})),
|
779
|
-
}
|
780
|
-
: undefined,
|
781
|
-
index: choice.index,
|
782
|
-
finish_reason: choice.finish_reason,
|
783
|
-
})),
|
784
|
-
usage: extendedChunk.usage
|
785
|
-
? {
|
786
|
-
prompt_tokens: extendedChunk.usage.prompt_tokens,
|
787
|
-
completion_tokens: extendedChunk.usage.completion_tokens,
|
788
|
-
total_tokens: extendedChunk.usage.total_tokens,
|
789
|
-
}
|
790
|
-
: undefined,
|
791
|
-
};
|
792
|
-
allChunks.push(safeChunk);
|
793
|
-
}
|
794
|
-
this.logger.debug(() => `[Stream Buffering Complete] Collected ${allChunks.length} chunks`, {
|
795
|
-
chunkCount: allChunks.length,
|
796
|
-
hasContent: allChunks.some((c) => c.choices?.[0]?.delta?.content),
|
797
|
-
hasToolCalls: allChunks.some((c) => c.choices?.[0]?.delta?.tool_calls),
|
798
|
-
});
|
799
|
-
}
|
800
|
-
catch (error) {
|
801
|
-
// Handle JSONResponse mutation errors that occur during iteration
|
802
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
803
|
-
if (errorMessage?.includes('JSONResponse') &&
|
804
|
-
errorMessage?.includes('does not support item assignment')) {
|
805
|
-
this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error during stream iteration. This is a known issue with Cerebras. The OpenAI client library is trying to mutate immutable response objects. Falling back to non-streaming mode.', {
|
806
|
-
error: errorMessage,
|
807
|
-
provider: this.baseURL,
|
808
|
-
chunksCollected: allChunks.length,
|
809
|
-
});
|
810
|
-
// Retry the entire request with streaming disabled
|
811
|
-
// This is the nuclear option but ensures we get a response
|
812
|
-
const nonStreamingResponse = await this.openai.chat.completions.create({
|
813
|
-
model: this.currentModel,
|
814
|
-
messages: cleanedMessages,
|
815
|
-
stream: false, // Force non-streaming
|
816
|
-
tools: formattedTools,
|
817
|
-
tool_choice: this.getToolChoiceForFormat(tools),
|
818
|
-
...this.modelParams,
|
819
|
-
});
|
820
|
-
// Handle as non-streaming response
|
821
|
-
const completionResponse = nonStreamingResponse;
|
822
|
-
const choice = completionResponse.choices[0];
|
823
|
-
if (choice?.message.content) {
|
824
|
-
fullContent = choice.message.content;
|
825
|
-
}
|
826
|
-
if (choice?.message.tool_calls) {
|
827
|
-
for (const toolCall of choice.message.tool_calls) {
|
828
|
-
if (toolCall.type === 'function' && toolCall.function) {
|
829
|
-
accumulatedToolCalls.push({
|
830
|
-
id: toolCall.id,
|
831
|
-
type: 'function',
|
832
|
-
function: toolCall.function,
|
833
|
-
});
|
834
|
-
}
|
279
|
+
// 1. Validate authentication and messages
|
280
|
+
await this.validateRequestPreconditions(messages);
|
281
|
+
// 2. Prepare request configuration
|
282
|
+
const requestConfig = this.prepareApiRequest(messages, tools);
|
283
|
+
// 3. Make API call with error handling
|
284
|
+
const response = await this.executeApiCall(messages, tools, requestConfig);
|
285
|
+
// 4. Process response based on streaming mode
|
286
|
+
let processedData = {
|
287
|
+
fullContent: '',
|
288
|
+
accumulatedToolCalls: [],
|
289
|
+
hasStreamedContent: false,
|
290
|
+
usageData: undefined,
|
291
|
+
pendingWhitespace: null,
|
292
|
+
};
|
293
|
+
if (requestConfig.streamingEnabled) {
|
294
|
+
// Need to yield streaming content as it comes
|
295
|
+
const streamResponse = response;
|
296
|
+
for await (const chunk of streamResponse) {
|
297
|
+
const delta = chunk.choices?.[0]?.delta;
|
298
|
+
if (delta?.content && !requestConfig.parser) {
|
299
|
+
if (this.isUsingQwen()) {
|
300
|
+
// Handle Qwen whitespace buffering inline for yielding
|
301
|
+
// This is needed because we yield during streaming
|
302
|
+
// We'll refactor this separately if needed
|
303
|
+
const whitespaceResult = this.handleQwenStreamingWhitespace(delta, processedData.pendingWhitespace, processedData.fullContent);
|
304
|
+
if (whitespaceResult.shouldYield) {
|
305
|
+
yield {
|
306
|
+
role: ContentGeneratorRole.ASSISTANT,
|
307
|
+
content: whitespaceResult.content,
|
308
|
+
};
|
835
309
|
}
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
310
|
+
// Update our tracking of processed data
|
311
|
+
processedData = {
|
312
|
+
fullContent: whitespaceResult.updatedFullContent,
|
313
|
+
accumulatedToolCalls: processedData.accumulatedToolCalls,
|
314
|
+
hasStreamedContent: processedData.hasStreamedContent ||
|
315
|
+
whitespaceResult.shouldYield,
|
316
|
+
usageData: processedData.usageData,
|
317
|
+
pendingWhitespace: whitespaceResult.updatedPendingWhitespace,
|
842
318
|
};
|
843
319
|
}
|
844
|
-
|
845
|
-
yield {
|
846
|
-
role: ContentGeneratorRole.ASSISTANT,
|
847
|
-
content: fullContent || '',
|
848
|
-
tool_calls: accumulatedToolCalls.length > 0
|
849
|
-
? accumulatedToolCalls
|
850
|
-
: undefined,
|
851
|
-
usage: usageData,
|
852
|
-
};
|
853
|
-
return;
|
854
|
-
}
|
855
|
-
// Re-throw other errors
|
856
|
-
throw error;
|
857
|
-
}
|
858
|
-
// Check first chunk to see if we have malformed stream
|
859
|
-
let detectedMalformedStream = false;
|
860
|
-
if (allChunks.length > 0) {
|
861
|
-
const firstChunk = allChunks[0];
|
862
|
-
if (firstChunk.choices?.[0]?.message &&
|
863
|
-
!firstChunk.choices?.[0]?.delta) {
|
864
|
-
detectedMalformedStream = true;
|
865
|
-
this.logger.debug(() => 'Detected malformed stream (message instead of delta), using aggregation mode');
|
866
|
-
}
|
867
|
-
}
|
868
|
-
// If we detected issues, aggregate everything
|
869
|
-
if (detectedMalformedStream) {
|
870
|
-
const contentParts = [];
|
871
|
-
let aggregatedToolCalls = [];
|
872
|
-
let finalUsageData = undefined;
|
873
|
-
// Process all buffered chunks
|
874
|
-
for (const chunk of allChunks) {
|
875
|
-
const message = chunk.choices?.[0]?.message || chunk.choices?.[0]?.delta;
|
876
|
-
if (message?.content) {
|
877
|
-
contentParts.push(message.content);
|
878
|
-
}
|
879
|
-
if (message?.tool_calls) {
|
880
|
-
// Ensure tool_calls match the expected format
|
881
|
-
aggregatedToolCalls = message.tool_calls.map((tc) => ({
|
882
|
-
id: tc.id || `call_${Date.now()}`,
|
883
|
-
type: (tc.type || 'function'),
|
884
|
-
function: {
|
885
|
-
name: tc.function?.name || '',
|
886
|
-
arguments: tc.function?.arguments || '',
|
887
|
-
},
|
888
|
-
}));
|
889
|
-
}
|
890
|
-
if (chunk.usage) {
|
891
|
-
finalUsageData = {
|
892
|
-
prompt_tokens: chunk.usage.prompt_tokens || 0,
|
893
|
-
completion_tokens: chunk.usage.completion_tokens || 0,
|
894
|
-
total_tokens: chunk.usage.total_tokens || 0,
|
895
|
-
};
|
896
|
-
}
|
897
|
-
}
|
898
|
-
// Yield single reconstructed message
|
899
|
-
yield {
|
900
|
-
role: ContentGeneratorRole.ASSISTANT,
|
901
|
-
content: contentParts.join(''),
|
902
|
-
tool_calls: aggregatedToolCalls.length > 0 ? aggregatedToolCalls : undefined,
|
903
|
-
usage: finalUsageData,
|
904
|
-
};
|
905
|
-
return;
|
906
|
-
}
|
907
|
-
// Process chunks normally - stream them as they come
|
908
|
-
this.logger.debug(() => `[Processing Chunks] Starting to process ${allChunks.length} buffered chunks`, {
|
909
|
-
isUsingQwen: this.isUsingQwen(),
|
910
|
-
});
|
911
|
-
let chunkIndex = 0;
|
912
|
-
for (const chunk of allChunks) {
|
913
|
-
chunkIndex++;
|
914
|
-
// Since we created safe copies during buffering, chunks are now mutable
|
915
|
-
// Check if this chunk has message format instead of delta (malformed stream)
|
916
|
-
let processedChunk = chunk;
|
917
|
-
if (chunk.choices?.[0]?.message && !chunk.choices?.[0]?.delta) {
|
918
|
-
this.logger.error(() => '[Cerebras Corruption] Converting malformed chunk from message to delta format', {
|
919
|
-
provider: this.baseURL,
|
920
|
-
hasMessage: true,
|
921
|
-
hasDelta: false,
|
922
|
-
messageContent: chunk.choices[0].message?.content?.substring(0, 100),
|
923
|
-
});
|
924
|
-
// Convert message format to delta format for consistent processing
|
925
|
-
const message = chunk.choices[0].message;
|
926
|
-
processedChunk = {
|
927
|
-
choices: [
|
928
|
-
{
|
929
|
-
delta: {
|
930
|
-
content: message?.content ?? undefined,
|
931
|
-
role: message?.role,
|
932
|
-
tool_calls: message?.tool_calls,
|
933
|
-
},
|
934
|
-
},
|
935
|
-
],
|
936
|
-
usage: chunk.usage,
|
937
|
-
};
|
938
|
-
}
|
939
|
-
const delta = processedChunk.choices?.[0]?.delta;
|
940
|
-
if (delta?.content) {
|
941
|
-
// Enhanced debug logging to understand streaming behavior
|
942
|
-
if (this.isUsingQwen()) {
|
943
|
-
this.logger.debug(() => `Chunk: ${JSON.stringify({
|
944
|
-
content: delta.content,
|
945
|
-
contentLength: delta.content?.length ?? 0,
|
946
|
-
isWhitespaceOnly: delta.content?.trim() === '',
|
947
|
-
chunkIndex: 0,
|
948
|
-
})}`);
|
949
|
-
}
|
950
|
-
// For text-based models, don't yield content chunks yet
|
951
|
-
if (!parser && delta.content) {
|
952
|
-
this.logger.debug(() => `[Content Processing] Chunk ${chunkIndex}/${allChunks.length} has content`, {
|
953
|
-
contentLength: delta.content.length,
|
954
|
-
contentPreview: delta.content.substring(0, 50),
|
955
|
-
isUsingQwen: this.isUsingQwen(),
|
956
|
-
willBuffer: this.isUsingQwen() && delta.content.trim() === '',
|
957
|
-
});
|
958
|
-
if (this.isUsingQwen()) {
|
959
|
-
const isWhitespaceOnly = delta.content.trim() === '';
|
960
|
-
if (isWhitespaceOnly) {
|
961
|
-
// Buffer whitespace-only chunk
|
962
|
-
pendingWhitespace = (pendingWhitespace || '') + delta.content;
|
963
|
-
this.logger.debug(() => `[Whitespace Buffering] Buffered whitespace-only chunk (len=${delta.content?.length ?? 0}). pendingWhitespace now len=${pendingWhitespace?.length ?? 0}`, {
|
964
|
-
chunkIndex,
|
965
|
-
totalChunks: allChunks.length,
|
966
|
-
isLastChunk: chunkIndex === allChunks.length,
|
967
|
-
contentHex: Buffer.from(delta.content).toString('hex'),
|
968
|
-
});
|
969
|
-
continue;
|
970
|
-
}
|
971
|
-
else if (pendingWhitespace) {
|
972
|
-
// Flush buffered whitespace before non-empty chunk to preserve spacing
|
973
|
-
this.logger.debug(() => `Flushing pending whitespace (len=${pendingWhitespace?.length ?? 0}) before non-empty chunk`);
|
974
|
-
yield {
|
975
|
-
role: ContentGeneratorRole.ASSISTANT,
|
976
|
-
content: pendingWhitespace,
|
977
|
-
};
|
978
|
-
hasStreamedContent = true;
|
979
|
-
fullContent += pendingWhitespace;
|
980
|
-
pendingWhitespace = null;
|
981
|
-
}
|
982
|
-
}
|
983
|
-
this.logger.debug(() => `[Yielding Content] Yielding chunk ${chunkIndex}/${allChunks.length}`, {
|
984
|
-
contentLength: delta.content.length,
|
985
|
-
hasStreamedContent,
|
986
|
-
});
|
320
|
+
else {
|
987
321
|
yield {
|
988
322
|
role: ContentGeneratorRole.ASSISTANT,
|
989
323
|
content: delta.content,
|
990
324
|
};
|
991
|
-
|
325
|
+
processedData = {
|
326
|
+
fullContent: processedData.fullContent + delta.content,
|
327
|
+
accumulatedToolCalls: processedData.accumulatedToolCalls,
|
328
|
+
hasStreamedContent: true,
|
329
|
+
usageData: processedData.usageData,
|
330
|
+
pendingWhitespace: null,
|
331
|
+
};
|
992
332
|
}
|
993
|
-
fullContent += delta.content;
|
994
333
|
}
|
334
|
+
else if (delta?.content) {
|
335
|
+
// Parser mode - just accumulate
|
336
|
+
processedData = {
|
337
|
+
fullContent: processedData.fullContent + delta.content,
|
338
|
+
accumulatedToolCalls: processedData.accumulatedToolCalls,
|
339
|
+
hasStreamedContent: processedData.hasStreamedContent,
|
340
|
+
usageData: processedData.usageData,
|
341
|
+
pendingWhitespace: processedData.pendingWhitespace,
|
342
|
+
};
|
343
|
+
}
|
344
|
+
// Handle tool calls
|
995
345
|
if (delta?.tool_calls) {
|
346
|
+
const accumulated = processedData.accumulatedToolCalls;
|
996
347
|
for (const toolCall of delta.tool_calls) {
|
997
|
-
this.toolFormatter.accumulateStreamingToolCall(toolCall,
|
348
|
+
this.toolFormatter.accumulateStreamingToolCall(toolCall, accumulated, requestConfig.currentToolFormat);
|
998
349
|
}
|
350
|
+
processedData = {
|
351
|
+
...processedData,
|
352
|
+
accumulatedToolCalls: accumulated,
|
353
|
+
};
|
999
354
|
}
|
1000
|
-
// Check for usage data
|
1001
|
-
if (
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
355
|
+
// Check for usage data
|
356
|
+
if (chunk.usage) {
|
357
|
+
processedData = {
|
358
|
+
...processedData,
|
359
|
+
usageData: {
|
360
|
+
prompt_tokens: chunk.usage.prompt_tokens || 0,
|
361
|
+
completion_tokens: chunk.usage.completion_tokens || 0,
|
362
|
+
total_tokens: chunk.usage.total_tokens || 0,
|
363
|
+
},
|
1006
364
|
};
|
1007
365
|
}
|
1008
366
|
}
|
1009
367
|
}
|
1010
368
|
else {
|
1011
|
-
// Non-streaming response
|
1012
|
-
|
1013
|
-
|
1014
|
-
if (
|
1015
|
-
fullContent = choice.message.content;
|
1016
|
-
}
|
1017
|
-
if (choice?.message.tool_calls) {
|
1018
|
-
// Convert tool calls to the standard format
|
1019
|
-
for (const toolCall of choice.message.tool_calls) {
|
1020
|
-
if (toolCall.type === 'function' && toolCall.function) {
|
1021
|
-
// Don't fix double stringification here - it's handled later in the final processing
|
1022
|
-
accumulatedToolCalls.push({
|
1023
|
-
id: toolCall.id,
|
1024
|
-
type: 'function',
|
1025
|
-
function: toolCall.function,
|
1026
|
-
});
|
1027
|
-
}
|
1028
|
-
}
|
1029
|
-
}
|
1030
|
-
if (completionResponse.usage) {
|
1031
|
-
usageData = {
|
1032
|
-
prompt_tokens: completionResponse.usage.prompt_tokens,
|
1033
|
-
completion_tokens: completionResponse.usage.completion_tokens,
|
1034
|
-
total_tokens: completionResponse.usage.total_tokens,
|
1035
|
-
};
|
1036
|
-
}
|
1037
|
-
// For non-streaming, we yield the full content at once if there's no parser
|
1038
|
-
if (!parser && fullContent) {
|
369
|
+
// Non-streaming response
|
370
|
+
processedData = this.processNonStreamingResponse(response);
|
371
|
+
// For non-streaming, yield content if no parser
|
372
|
+
if (!requestConfig.parser && processedData.fullContent) {
|
1039
373
|
yield {
|
1040
374
|
role: ContentGeneratorRole.ASSISTANT,
|
1041
|
-
content: fullContent,
|
375
|
+
content: processedData.fullContent,
|
1042
376
|
};
|
1043
|
-
hasStreamedContent = true;
|
377
|
+
processedData.hasStreamedContent = true;
|
1044
378
|
}
|
1045
379
|
}
|
1046
|
-
// Flush
|
1047
|
-
if (pendingWhitespace &&
|
1048
|
-
this.
|
380
|
+
// 5. Flush pending whitespace if needed (for Qwen)
|
381
|
+
if (processedData.pendingWhitespace &&
|
382
|
+
this.isUsingQwen() &&
|
383
|
+
!requestConfig.parser) {
|
384
|
+
this.logger.debug(() => `Flushing trailing pending whitespace (len=${processedData.pendingWhitespace?.length ?? 0}) at stream end`);
|
1049
385
|
yield {
|
1050
386
|
role: ContentGeneratorRole.ASSISTANT,
|
1051
|
-
content: pendingWhitespace,
|
387
|
+
content: processedData.pendingWhitespace,
|
1052
388
|
};
|
1053
|
-
hasStreamedContent = true;
|
1054
|
-
fullContent += pendingWhitespace;
|
1055
|
-
pendingWhitespace = null;
|
1056
|
-
}
|
1057
|
-
// After stream ends, parse text-based tool calls if needed
|
1058
|
-
if (parser && fullContent) {
|
1059
|
-
const { cleanedContent, toolCalls } = parser.parse(fullContent);
|
1060
|
-
if (toolCalls.length > 0) {
|
1061
|
-
// Convert to standard format
|
1062
|
-
const standardToolCalls = toolCalls.map((tc, index) => ({
|
1063
|
-
id: `call_${Date.now()}_${index}`,
|
1064
|
-
type: 'function',
|
1065
|
-
function: {
|
1066
|
-
name: tc.name,
|
1067
|
-
arguments: JSON.stringify(tc.arguments),
|
1068
|
-
},
|
1069
|
-
}));
|
1070
|
-
yield {
|
1071
|
-
role: ContentGeneratorRole.ASSISTANT,
|
1072
|
-
content: cleanedContent,
|
1073
|
-
tool_calls: standardToolCalls,
|
1074
|
-
usage: usageData,
|
1075
|
-
};
|
1076
|
-
}
|
1077
|
-
else {
|
1078
|
-
// No tool calls found, yield cleaned content
|
1079
|
-
yield {
|
1080
|
-
role: ContentGeneratorRole.ASSISTANT,
|
1081
|
-
content: cleanedContent,
|
1082
|
-
usage: usageData,
|
1083
|
-
};
|
1084
|
-
}
|
1085
|
-
}
|
1086
|
-
else {
|
1087
|
-
// Standard OpenAI tool call handling
|
1088
|
-
if (accumulatedToolCalls.length > 0) {
|
1089
|
-
// Fix double stringification for Qwen tool calls
|
1090
|
-
// Qwen models pre-stringify arguments values, but later in the process
|
1091
|
-
// they are being JSON.stringify'd again
|
1092
|
-
let fixedToolCalls = accumulatedToolCalls;
|
1093
|
-
if (this.isUsingQwen()) {
|
1094
|
-
this.logger.debug(() => `[Qwen Fix] Processing ${accumulatedToolCalls.length} tool calls for double-stringification fix`);
|
1095
|
-
fixedToolCalls = accumulatedToolCalls.map((toolCall, index) => {
|
1096
|
-
this.logger.debug(() => `[Qwen Fix] Tool call ${index}: ${JSON.stringify({
|
1097
|
-
name: toolCall.function.name,
|
1098
|
-
argumentsType: typeof toolCall.function.arguments,
|
1099
|
-
argumentsLength: toolCall.function.arguments?.length,
|
1100
|
-
argumentsSample: toolCall.function.arguments?.substring(0, 100),
|
1101
|
-
})}`);
|
1102
|
-
// For Qwen, check for nested double-stringification
|
1103
|
-
// Qwen models stringify array/object values WITHIN the JSON arguments
|
1104
|
-
if (toolCall.function.arguments &&
|
1105
|
-
typeof toolCall.function.arguments === 'string') {
|
1106
|
-
try {
|
1107
|
-
// First, parse the arguments to get the JSON object
|
1108
|
-
const parsedArgs = JSON.parse(toolCall.function.arguments);
|
1109
|
-
let hasNestedStringification = false;
|
1110
|
-
// Check each property to see if it's a stringified array/object/number
|
1111
|
-
const fixedArgs = {};
|
1112
|
-
for (const [key, value] of Object.entries(parsedArgs)) {
|
1113
|
-
if (typeof value === 'string') {
|
1114
|
-
const trimmed = value.trim();
|
1115
|
-
// Check if it's a stringified number (integer or float)
|
1116
|
-
if (/^-?\d+(\.\d+)?$/.test(trimmed)) {
|
1117
|
-
const numValue = trimmed.includes('.')
|
1118
|
-
? parseFloat(trimmed)
|
1119
|
-
: parseInt(trimmed, 10);
|
1120
|
-
fixedArgs[key] = numValue;
|
1121
|
-
hasNestedStringification = true;
|
1122
|
-
this.logger.debug(() => `[Qwen Fix] Fixed stringified number in property '${key}' for ${toolCall.function.name}: "${value}" -> ${numValue}`);
|
1123
|
-
}
|
1124
|
-
// Check if it looks like a stringified array or object
|
1125
|
-
// Also check for Python-style dictionaries with single quotes
|
1126
|
-
else if ((trimmed.startsWith('[') && trimmed.endsWith(']')) ||
|
1127
|
-
(trimmed.startsWith('{') && trimmed.endsWith('}'))) {
|
1128
|
-
try {
|
1129
|
-
// Try to parse it as JSON
|
1130
|
-
const nestedParsed = JSON.parse(value);
|
1131
|
-
fixedArgs[key] = nestedParsed;
|
1132
|
-
hasNestedStringification = true;
|
1133
|
-
this.logger.debug(() => `[Qwen Fix] Fixed nested stringification in property '${key}' for ${toolCall.function.name}`);
|
1134
|
-
}
|
1135
|
-
catch {
|
1136
|
-
// Try to convert Python-style to JSON (single quotes to double quotes)
|
1137
|
-
try {
|
1138
|
-
const jsonified = value
|
1139
|
-
.replace(/'/g, '"')
|
1140
|
-
.replace(/: True/g, ': true')
|
1141
|
-
.replace(/: False/g, ': false')
|
1142
|
-
.replace(/: None/g, ': null');
|
1143
|
-
const nestedParsed = JSON.parse(jsonified);
|
1144
|
-
fixedArgs[key] = nestedParsed;
|
1145
|
-
hasNestedStringification = true;
|
1146
|
-
this.logger.debug(() => `[Qwen Fix] Fixed Python-style nested stringification in property '${key}' for ${toolCall.function.name}`);
|
1147
|
-
}
|
1148
|
-
catch {
|
1149
|
-
// Not valid JSON even after conversion, keep as string
|
1150
|
-
fixedArgs[key] = value;
|
1151
|
-
}
|
1152
|
-
}
|
1153
|
-
}
|
1154
|
-
else {
|
1155
|
-
fixedArgs[key] = value;
|
1156
|
-
}
|
1157
|
-
}
|
1158
|
-
else {
|
1159
|
-
fixedArgs[key] = value;
|
1160
|
-
}
|
1161
|
-
}
|
1162
|
-
if (hasNestedStringification) {
|
1163
|
-
this.logger.debug(() => `[Qwen Fix] Fixed nested double-stringification for ${toolCall.function.name}`);
|
1164
|
-
return {
|
1165
|
-
...toolCall,
|
1166
|
-
function: {
|
1167
|
-
...toolCall.function,
|
1168
|
-
arguments: JSON.stringify(fixedArgs),
|
1169
|
-
},
|
1170
|
-
};
|
1171
|
-
}
|
1172
|
-
}
|
1173
|
-
catch (_e) {
|
1174
|
-
// If parsing fails, check for old-style double-stringification
|
1175
|
-
if (toolCall.function.arguments.startsWith('"') &&
|
1176
|
-
toolCall.function.arguments.endsWith('"')) {
|
1177
|
-
try {
|
1178
|
-
// Old fix: entire arguments were double-stringified
|
1179
|
-
const parsedArgs = JSON.parse(toolCall.function.arguments);
|
1180
|
-
this.logger.debug(() => `[Qwen Fix] Fixed whole-argument double-stringification for ${toolCall.function.name}`);
|
1181
|
-
return {
|
1182
|
-
...toolCall,
|
1183
|
-
function: {
|
1184
|
-
...toolCall.function,
|
1185
|
-
arguments: JSON.stringify(parsedArgs),
|
1186
|
-
},
|
1187
|
-
};
|
1188
|
-
}
|
1189
|
-
catch {
|
1190
|
-
// Leave as-is if we can't parse
|
1191
|
-
}
|
1192
|
-
}
|
1193
|
-
}
|
1194
|
-
}
|
1195
|
-
// No fix needed
|
1196
|
-
this.logger.debug(() => `[Qwen Fix] No double-stringification detected for ${toolCall.function.name}, keeping original`);
|
1197
|
-
return toolCall;
|
1198
|
-
});
|
1199
|
-
}
|
1200
|
-
if (this.isUsingQwen()) {
|
1201
|
-
this.logger.debug(() => `Final message with tool calls: ${JSON.stringify({
|
1202
|
-
contentLength: fullContent.length,
|
1203
|
-
content: fullContent.substring(0, 200) +
|
1204
|
-
(fullContent.length > 200 ? '...' : ''),
|
1205
|
-
toolCallCount: accumulatedToolCalls.length,
|
1206
|
-
hasStreamedContent,
|
1207
|
-
})}`);
|
1208
|
-
}
|
1209
|
-
// For Qwen models, don't duplicate content if we've already streamed it
|
1210
|
-
// BUT Cerebras needs at least a space to continue after tool responses
|
1211
|
-
const isCerebras = this.baseURL?.toLowerCase().includes('cerebras.ai');
|
1212
|
-
if (isCerebras) {
|
1213
|
-
this.logger.debug(() => '[Cerebras] Special handling for Cerebras provider after tool responses', {
|
1214
|
-
hasStreamedContent,
|
1215
|
-
willSendSpace: hasStreamedContent,
|
1216
|
-
});
|
1217
|
-
}
|
1218
|
-
const shouldOmitContent = hasStreamedContent && this.isUsingQwen() && !isCerebras;
|
1219
|
-
this.logger.debug(() => '[Tool Call Handling] Deciding how to yield tool calls', {
|
1220
|
-
hasStreamedContent,
|
1221
|
-
isUsingQwen: this.isUsingQwen(),
|
1222
|
-
isCerebras,
|
1223
|
-
shouldOmitContent,
|
1224
|
-
fullContentLength: fullContent.length,
|
1225
|
-
toolCallCount: fixedToolCalls?.length || 0,
|
1226
|
-
});
|
1227
|
-
if (shouldOmitContent) {
|
1228
|
-
// Qwen: Send just a space (like Cerebras) to prevent stream stopping
|
1229
|
-
yield {
|
1230
|
-
role: ContentGeneratorRole.ASSISTANT,
|
1231
|
-
content: ' ', // Single space instead of empty to keep stream alive
|
1232
|
-
tool_calls: fixedToolCalls,
|
1233
|
-
usage: usageData,
|
1234
|
-
};
|
1235
|
-
}
|
1236
|
-
else if (isCerebras && hasStreamedContent) {
|
1237
|
-
// Cerebras: Send just a space to prevent duplication but allow continuation
|
1238
|
-
// This prevents the repeated "Let me search..." text
|
1239
|
-
this.logger.debug(() => '[Cerebras] Sending minimal space content to prevent duplication');
|
1240
|
-
yield {
|
1241
|
-
role: ContentGeneratorRole.ASSISTANT,
|
1242
|
-
content: ' ', // Single space instead of full content
|
1243
|
-
tool_calls: fixedToolCalls,
|
1244
|
-
usage: usageData,
|
1245
|
-
};
|
1246
|
-
}
|
1247
|
-
else {
|
1248
|
-
// Include full content with tool calls
|
1249
|
-
yield {
|
1250
|
-
role: ContentGeneratorRole.ASSISTANT,
|
1251
|
-
content: fullContent || '',
|
1252
|
-
tool_calls: fixedToolCalls,
|
1253
|
-
usage: usageData,
|
1254
|
-
};
|
1255
|
-
}
|
1256
|
-
}
|
1257
|
-
else if (usageData) {
|
1258
|
-
// Always emit usage data so downstream consumers can update stats
|
1259
|
-
yield {
|
1260
|
-
role: ContentGeneratorRole.ASSISTANT,
|
1261
|
-
content: '',
|
1262
|
-
usage: usageData,
|
1263
|
-
};
|
1264
|
-
}
|
389
|
+
processedData.hasStreamedContent = true;
|
390
|
+
processedData.fullContent += processedData.pendingWhitespace;
|
391
|
+
processedData.pendingWhitespace = null;
|
1265
392
|
}
|
393
|
+
// 6. Process and yield final results
|
394
|
+
yield* this.processFinalResponse(processedData, requestConfig.parser);
|
1266
395
|
}
|
1267
396
|
setModel(modelId) {
|
1268
397
|
// Update SettingsService as the source of truth
|
@@ -1297,7 +426,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
1297
426
|
}
|
1298
427
|
setApiKey(apiKey) {
|
1299
428
|
// Call base provider implementation
|
1300
|
-
super.setApiKey
|
429
|
+
super.setApiKey(apiKey);
|
1301
430
|
// Persist to SettingsService if available
|
1302
431
|
this.setApiKeyInSettings(apiKey).catch((error) => {
|
1303
432
|
this.logger.debug(() => `Failed to persist API key to SettingsService: ${error}`);
|
@@ -1349,24 +478,6 @@ export class OpenAIProvider extends BaseProvider {
|
|
1349
478
|
setToolFormatOverride(format) {
|
1350
479
|
this.toolFormatOverride = format || undefined;
|
1351
480
|
}
|
1352
|
-
/**
|
1353
|
-
* Estimates the remote context usage for the current conversation
|
1354
|
-
* @param conversationId The conversation ID
|
1355
|
-
* @param parentId The parent message ID
|
1356
|
-
* @param promptMessages The messages being sent in the current prompt
|
1357
|
-
* @returns Context usage information including remote tokens
|
1358
|
-
*/
|
1359
|
-
estimateContextUsage(conversationId, parentId, promptMessages) {
|
1360
|
-
const promptTokens = estimateMessagesTokens(promptMessages);
|
1361
|
-
return estimateRemoteTokens(this.currentModel, this.conversationCache, conversationId, parentId, promptTokens);
|
1362
|
-
}
|
1363
|
-
/**
|
1364
|
-
* Get the conversation cache instance
|
1365
|
-
* @returns The conversation cache
|
1366
|
-
*/
|
1367
|
-
getConversationCache() {
|
1368
|
-
return this.conversationCache;
|
1369
|
-
}
|
1370
481
|
/**
|
1371
482
|
* OpenAI always requires payment (API key)
|
1372
483
|
*/
|
@@ -1374,8 +485,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
1374
485
|
return true;
|
1375
486
|
}
|
1376
487
|
clearState() {
|
1377
|
-
//
|
1378
|
-
this.conversationCache.clear();
|
488
|
+
// No state to clear in base OpenAI provider
|
1379
489
|
}
|
1380
490
|
/**
|
1381
491
|
* Get the list of server tools supported by this provider
|
@@ -1530,5 +640,439 @@ export class OpenAIProvider extends BaseProvider {
|
|
1530
640
|
// For now, return the response as-is
|
1531
641
|
return response;
|
1532
642
|
}
|
643
|
+
/**
|
644
|
+
* Validate authentication and message preconditions for API calls
|
645
|
+
*/
|
646
|
+
async validateRequestPreconditions(messages) {
|
647
|
+
// Check if API key is available (using resolved authentication)
|
648
|
+
const apiKey = await this.getAuthToken();
|
649
|
+
if (!apiKey) {
|
650
|
+
const endpoint = this.baseURL || 'https://api.openai.com/v1';
|
651
|
+
if (this.isOAuthEnabled() && !this.supportsOAuth()) {
|
652
|
+
throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
|
653
|
+
}
|
654
|
+
throw new Error('OpenAI API key is required to generate completions');
|
655
|
+
}
|
656
|
+
// Validate tool messages have required tool_call_id
|
657
|
+
const toolMessages = messages.filter((msg) => msg.role === 'tool');
|
658
|
+
const missingIds = toolMessages.filter((msg) => !msg.tool_call_id);
|
659
|
+
if (missingIds.length > 0) {
|
660
|
+
this.logger.error(() => `FATAL: Tool messages missing tool_call_id: ${JSON.stringify(missingIds)}`);
|
661
|
+
throw new Error(`OpenAI API requires tool_call_id for all tool messages. Found ${missingIds.length} tool message(s) without IDs.`);
|
662
|
+
}
|
663
|
+
}
|
664
|
+
/**
|
665
|
+
* Prepare API request configuration
|
666
|
+
*/
|
667
|
+
prepareApiRequest(messages, tools) {
|
668
|
+
const parser = this.requiresTextToolCallParsing()
|
669
|
+
? new GemmaToolCallParser()
|
670
|
+
: null;
|
671
|
+
// Get current tool format (with override support)
|
672
|
+
const currentToolFormat = this.getToolFormat();
|
673
|
+
// Format tools using formatToolsForAPI method
|
674
|
+
const formattedTools = tools ? this.formatToolsForAPI(tools) : undefined;
|
675
|
+
// Get stream_options from ephemeral settings (not model params)
|
676
|
+
const streamOptions = this.providerConfig?.getEphemeralSettings?.()?.['stream-options'];
|
677
|
+
// Default stream_options to { include_usage: true } unless explicitly set
|
678
|
+
const finalStreamOptions = streamOptions !== undefined ? streamOptions : { include_usage: true };
|
679
|
+
// Get streaming setting from ephemeral settings (default: enabled)
|
680
|
+
const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
|
681
|
+
const streamingEnabled = streamingSetting !== 'disabled';
|
682
|
+
return {
|
683
|
+
parser,
|
684
|
+
currentToolFormat,
|
685
|
+
formattedTools,
|
686
|
+
finalStreamOptions,
|
687
|
+
streamingEnabled,
|
688
|
+
};
|
689
|
+
}
|
690
|
+
/**
|
691
|
+
* Execute API call with error handling
|
692
|
+
*/
|
693
|
+
async executeApiCall(messages, tools, requestConfig) {
|
694
|
+
// Get resolved authentication and update client if needed
|
695
|
+
await this.updateClientWithResolvedAuth();
|
696
|
+
this.logger.debug(() => `About to make API call with model: ${this.currentModel}, baseURL: ${this.openai.baseURL}, apiKey: ${this.openai.apiKey?.substring(0, 10)}..., streaming: ${requestConfig.streamingEnabled}, messages (${messages.length} total): ${messages
|
697
|
+
.map((m) => `${m.role}${m.role === 'system' ? ` (length: ${m.content?.length})` : ''}`)
|
698
|
+
.join(', ')}`);
|
699
|
+
try {
|
700
|
+
// Build request params with exact order from original
|
701
|
+
return await this.openai.chat.completions.create({
|
702
|
+
model: this.currentModel,
|
703
|
+
messages: messages,
|
704
|
+
stream: requestConfig.streamingEnabled,
|
705
|
+
...(requestConfig.streamingEnabled && requestConfig.finalStreamOptions
|
706
|
+
? { stream_options: requestConfig.finalStreamOptions }
|
707
|
+
: {}),
|
708
|
+
tools: requestConfig.formattedTools,
|
709
|
+
tool_choice: this.getToolChoiceForFormat(tools),
|
710
|
+
...this.modelParams,
|
711
|
+
});
|
712
|
+
}
|
713
|
+
catch (error) {
|
714
|
+
this.handleApiError(error, messages);
|
715
|
+
throw error; // Re-throw after logging
|
716
|
+
}
|
717
|
+
}
|
718
|
+
/**
|
719
|
+
* Handle and log API errors
|
720
|
+
*/
|
721
|
+
handleApiError(error, messages) {
|
722
|
+
const errorStatus = error?.status ||
|
723
|
+
error?.response?.status;
|
724
|
+
const errorLabel = errorStatus === 400 ? '[API Error 400]' : '[API Error]';
|
725
|
+
this.logger.error(() => `${errorLabel} Error caught in API call:\n` +
|
726
|
+
` Error: ${error}\n` +
|
727
|
+
` Type: ${error?.constructor?.name}\n` +
|
728
|
+
` Status: ${errorStatus}\n` +
|
729
|
+
` Response data: ${JSON.stringify(error?.response?.data, null, 2)}`);
|
730
|
+
// Log the last few messages to understand what's being sent
|
731
|
+
if (errorStatus === 400) {
|
732
|
+
// Log additional diagnostics for 400 errors
|
733
|
+
const hasPendingToolCalls = messages.some((msg, idx) => {
|
734
|
+
if (msg.role === 'assistant' && msg.tool_calls) {
|
735
|
+
// Check if there's a matching tool response
|
736
|
+
const toolCallIds = msg.tool_calls.map((tc) => tc.id);
|
737
|
+
const hasResponses = toolCallIds.every((id) => messages
|
738
|
+
.slice(idx + 1)
|
739
|
+
.some((m) => m.role === 'tool' && m.tool_call_id === id));
|
740
|
+
return !hasResponses;
|
741
|
+
}
|
742
|
+
return false;
|
743
|
+
});
|
744
|
+
this.logger.error(() => `${errorLabel} Last 5 messages being sent:\n` +
|
745
|
+
` Has pending tool calls without responses: ${hasPendingToolCalls}`);
|
746
|
+
const lastMessages = messages.slice(-5);
|
747
|
+
lastMessages.forEach((msg, idx) => {
|
748
|
+
this.logger.error(() => ` [${messages.length - 5 + idx}] ${msg.role}${msg.tool_call_id ? ` (tool response for ${msg.tool_call_id})` : ''}${msg.tool_calls ? ` (${msg.tool_calls.length} tool calls)` : ''}`);
|
749
|
+
if (msg.tool_calls) {
|
750
|
+
msg.tool_calls.forEach((tc) => {
|
751
|
+
this.logger.error(() => ` - Tool call: ${tc.id} -> ${tc.function.name}`);
|
752
|
+
});
|
753
|
+
}
|
754
|
+
});
|
755
|
+
}
|
756
|
+
}
|
757
|
+
/**
|
758
|
+
* Process non-streaming response
|
759
|
+
*/
|
760
|
+
processNonStreamingResponse(response) {
|
761
|
+
const choice = response.choices[0];
|
762
|
+
let fullContent = '';
|
763
|
+
const accumulatedToolCalls = [];
|
764
|
+
let usageData;
|
765
|
+
if (choice?.message.content) {
|
766
|
+
fullContent = choice.message.content;
|
767
|
+
}
|
768
|
+
if (choice?.message.tool_calls) {
|
769
|
+
// Convert tool calls to the standard format
|
770
|
+
for (const toolCall of choice.message.tool_calls) {
|
771
|
+
if (toolCall.type === 'function' && toolCall.function) {
|
772
|
+
// Don't fix double stringification here - it's handled later in the final processing
|
773
|
+
accumulatedToolCalls.push({
|
774
|
+
id: toolCall.id,
|
775
|
+
type: 'function',
|
776
|
+
function: toolCall.function,
|
777
|
+
});
|
778
|
+
}
|
779
|
+
}
|
780
|
+
}
|
781
|
+
if (response.usage) {
|
782
|
+
usageData = {
|
783
|
+
prompt_tokens: response.usage.prompt_tokens,
|
784
|
+
completion_tokens: response.usage.completion_tokens,
|
785
|
+
total_tokens: response.usage.total_tokens,
|
786
|
+
};
|
787
|
+
}
|
788
|
+
return {
|
789
|
+
fullContent,
|
790
|
+
accumulatedToolCalls,
|
791
|
+
hasStreamedContent: false, // Non-streaming never has streamed content
|
792
|
+
usageData,
|
793
|
+
pendingWhitespace: null,
|
794
|
+
};
|
795
|
+
}
|
796
|
+
/**
|
797
|
+
* Process and build final response messages
|
798
|
+
*/
|
799
|
+
*processFinalResponse(processedData, parser) {
|
800
|
+
const { fullContent, accumulatedToolCalls, hasStreamedContent, usageData, pendingWhitespace, } = processedData;
|
801
|
+
// Flush any remaining pending whitespace for Qwen
|
802
|
+
let finalFullContent = fullContent;
|
803
|
+
if (pendingWhitespace && this.isUsingQwen() && !parser) {
|
804
|
+
this.logger.debug(() => `Flushing trailing pending whitespace (len=${pendingWhitespace?.length ?? 0}) at stream end`);
|
805
|
+
finalFullContent += pendingWhitespace;
|
806
|
+
}
|
807
|
+
// After stream ends, parse text-based tool calls if needed
|
808
|
+
if (parser && finalFullContent) {
|
809
|
+
const { cleanedContent, toolCalls } = parser.parse(finalFullContent);
|
810
|
+
if (toolCalls.length > 0) {
|
811
|
+
// Convert to standard format
|
812
|
+
const standardToolCalls = toolCalls.map((tc, index) => ({
|
813
|
+
id: `call_${Date.now()}_${index}`,
|
814
|
+
type: 'function',
|
815
|
+
function: {
|
816
|
+
name: tc.name,
|
817
|
+
arguments: JSON.stringify(tc.arguments),
|
818
|
+
},
|
819
|
+
}));
|
820
|
+
yield {
|
821
|
+
role: ContentGeneratorRole.ASSISTANT,
|
822
|
+
content: cleanedContent,
|
823
|
+
tool_calls: standardToolCalls,
|
824
|
+
usage: usageData,
|
825
|
+
};
|
826
|
+
}
|
827
|
+
else {
|
828
|
+
// No tool calls found, yield cleaned content
|
829
|
+
yield {
|
830
|
+
role: ContentGeneratorRole.ASSISTANT,
|
831
|
+
content: cleanedContent,
|
832
|
+
usage: usageData,
|
833
|
+
};
|
834
|
+
}
|
835
|
+
}
|
836
|
+
else {
|
837
|
+
// Standard OpenAI tool call handling
|
838
|
+
if (accumulatedToolCalls.length > 0) {
|
839
|
+
// Process tool calls with Qwen-specific fixes if needed
|
840
|
+
const fixedToolCalls = this.processQwenToolCalls(accumulatedToolCalls);
|
841
|
+
if (this.isUsingQwen()) {
|
842
|
+
this.logger.debug(() => `Final message with tool calls: ${JSON.stringify({
|
843
|
+
contentLength: finalFullContent.length,
|
844
|
+
content: finalFullContent.substring(0, 200) +
|
845
|
+
(finalFullContent.length > 200 ? '...' : ''),
|
846
|
+
toolCallCount: accumulatedToolCalls.length,
|
847
|
+
hasStreamedContent,
|
848
|
+
})}`);
|
849
|
+
}
|
850
|
+
// Build the final message based on provider-specific requirements
|
851
|
+
const finalMessage = this.buildFinalToolCallMessage(hasStreamedContent, finalFullContent, fixedToolCalls, usageData);
|
852
|
+
yield finalMessage;
|
853
|
+
}
|
854
|
+
else if (usageData) {
|
855
|
+
// Always emit usage data so downstream consumers can update stats
|
856
|
+
yield {
|
857
|
+
role: ContentGeneratorRole.ASSISTANT,
|
858
|
+
content: '',
|
859
|
+
usage: usageData,
|
860
|
+
};
|
861
|
+
}
|
862
|
+
}
|
863
|
+
}
|
864
|
+
/**
|
865
|
+
* Handle Qwen-specific whitespace buffering during streaming
|
866
|
+
* @param delta The stream delta containing content
|
867
|
+
* @param pendingWhitespace Current buffered whitespace
|
868
|
+
* @param fullContent Accumulated full content
|
869
|
+
* @returns Object with updated state and whether to yield content
|
870
|
+
*/
|
871
|
+
handleQwenStreamingWhitespace(delta, pendingWhitespace, fullContent) {
|
872
|
+
if (!delta.content) {
|
873
|
+
return {
|
874
|
+
shouldYield: false,
|
875
|
+
content: '',
|
876
|
+
updatedPendingWhitespace: pendingWhitespace,
|
877
|
+
updatedFullContent: fullContent,
|
878
|
+
};
|
879
|
+
}
|
880
|
+
const isWhitespaceOnly = delta.content.trim() === '';
|
881
|
+
if (isWhitespaceOnly) {
|
882
|
+
// Buffer whitespace-only chunk
|
883
|
+
const newPendingWhitespace = (pendingWhitespace || '') + delta.content;
|
884
|
+
this.logger.debug(() => `[Whitespace Buffering] Buffered whitespace-only chunk (len=${delta.content?.length ?? 0}). pendingWhitespace now len=${newPendingWhitespace?.length ?? 0}`);
|
885
|
+
return {
|
886
|
+
shouldYield: false,
|
887
|
+
content: '',
|
888
|
+
updatedPendingWhitespace: newPendingWhitespace,
|
889
|
+
updatedFullContent: fullContent + delta.content,
|
890
|
+
};
|
891
|
+
}
|
892
|
+
// Non-whitespace content - flush any pending whitespace first
|
893
|
+
if (pendingWhitespace) {
|
894
|
+
this.logger.debug(() => `Flushing pending whitespace (len=${pendingWhitespace?.length ?? 0}) before non-empty chunk`);
|
895
|
+
return {
|
896
|
+
shouldYield: true,
|
897
|
+
content: pendingWhitespace + delta.content,
|
898
|
+
updatedPendingWhitespace: null,
|
899
|
+
updatedFullContent: fullContent + pendingWhitespace + delta.content,
|
900
|
+
};
|
901
|
+
}
|
902
|
+
return {
|
903
|
+
shouldYield: true,
|
904
|
+
content: delta.content,
|
905
|
+
updatedPendingWhitespace: null,
|
906
|
+
updatedFullContent: fullContent + delta.content,
|
907
|
+
};
|
908
|
+
}
|
909
|
+
/**
|
910
|
+
* Process tool calls for Qwen models, fixing double stringification
|
911
|
+
* @param toolCalls The tool calls to process
|
912
|
+
* @returns Processed tool calls with fixes applied
|
913
|
+
*/
|
914
|
+
processQwenToolCalls(toolCalls) {
|
915
|
+
if (!this.isUsingQwen()) {
|
916
|
+
return toolCalls;
|
917
|
+
}
|
918
|
+
this.logger.debug(() => `[Qwen Fix] Processing ${toolCalls.length} tool calls for double-stringification fix`);
|
919
|
+
return toolCalls.map((toolCall, index) => {
|
920
|
+
this.logger.debug(() => `[Qwen Fix] Tool call ${index}: ${JSON.stringify({
|
921
|
+
name: toolCall.function.name,
|
922
|
+
argumentsType: typeof toolCall.function.arguments,
|
923
|
+
argumentsLength: toolCall.function.arguments?.length,
|
924
|
+
argumentsSample: toolCall.function.arguments?.substring(0, 100),
|
925
|
+
})}`);
|
926
|
+
return this.fixQwenDoubleStringification(toolCall);
|
927
|
+
});
|
928
|
+
}
|
929
|
+
/**
|
930
|
+
* Determine how to yield the final message with tool calls based on provider quirks
|
931
|
+
* @param hasStreamedContent Whether content was already streamed
|
932
|
+
* @param fullContent The complete content
|
933
|
+
* @param toolCalls The tool calls to include
|
934
|
+
* @param usageData Optional usage statistics
|
935
|
+
* @returns The message to yield
|
936
|
+
*/
|
937
|
+
buildFinalToolCallMessage(hasStreamedContent, fullContent, toolCalls, usageData) {
|
938
|
+
const isCerebras = this.baseURL?.toLowerCase().includes('cerebras.ai');
|
939
|
+
if (isCerebras) {
|
940
|
+
this.logger.debug(() => '[Cerebras] Special handling for Cerebras provider after tool responses', {
|
941
|
+
hasStreamedContent,
|
942
|
+
willSendSpace: hasStreamedContent,
|
943
|
+
});
|
944
|
+
}
|
945
|
+
const shouldOmitContent = hasStreamedContent && this.isUsingQwen() && !isCerebras;
|
946
|
+
this.logger.debug(() => '[Tool Call Handling] Deciding how to yield tool calls', {
|
947
|
+
hasStreamedContent,
|
948
|
+
isUsingQwen: this.isUsingQwen(),
|
949
|
+
isCerebras,
|
950
|
+
shouldOmitContent,
|
951
|
+
fullContentLength: fullContent.length,
|
952
|
+
toolCallCount: toolCalls?.length || 0,
|
953
|
+
});
|
954
|
+
if (shouldOmitContent || (isCerebras && hasStreamedContent)) {
|
955
|
+
// Send just a space to prevent stream stopping or duplication
|
956
|
+
if (isCerebras && hasStreamedContent) {
|
957
|
+
this.logger.debug(() => '[Cerebras] Sending minimal space content to prevent duplication');
|
958
|
+
}
|
959
|
+
return {
|
960
|
+
role: ContentGeneratorRole.ASSISTANT,
|
961
|
+
content: ' ',
|
962
|
+
tool_calls: toolCalls,
|
963
|
+
usage: usageData,
|
964
|
+
};
|
965
|
+
}
|
966
|
+
// Include full content with tool calls
|
967
|
+
return {
|
968
|
+
role: ContentGeneratorRole.ASSISTANT,
|
969
|
+
content: fullContent || '',
|
970
|
+
tool_calls: toolCalls,
|
971
|
+
usage: usageData,
|
972
|
+
};
|
973
|
+
}
|
974
|
+
/**
|
975
|
+
* Fix Qwen's double stringification of tool call arguments
|
976
|
+
* Qwen models stringify array/object values WITHIN the JSON arguments
|
977
|
+
* @param toolCall The tool call to fix
|
978
|
+
* @returns The fixed tool call or the original if no fix is needed
|
979
|
+
*/
|
980
|
+
fixQwenDoubleStringification(toolCall) {
|
981
|
+
if (!toolCall.function.arguments ||
|
982
|
+
typeof toolCall.function.arguments !== 'string') {
|
983
|
+
return toolCall;
|
984
|
+
}
|
985
|
+
try {
|
986
|
+
// First, parse the arguments to get the JSON object
|
987
|
+
const parsedArgs = JSON.parse(toolCall.function.arguments);
|
988
|
+
let hasNestedStringification = false;
|
989
|
+
// Check each property to see if it's a stringified array/object/number
|
990
|
+
const fixedArgs = {};
|
991
|
+
for (const [key, value] of Object.entries(parsedArgs)) {
|
992
|
+
if (typeof value === 'string') {
|
993
|
+
const trimmed = value.trim();
|
994
|
+
// Check if it's a stringified number (integer or float)
|
995
|
+
if (/^-?\d+(\.\d+)?$/.test(trimmed)) {
|
996
|
+
const numValue = trimmed.includes('.')
|
997
|
+
? parseFloat(trimmed)
|
998
|
+
: parseInt(trimmed, 10);
|
999
|
+
fixedArgs[key] = numValue;
|
1000
|
+
hasNestedStringification = true;
|
1001
|
+
this.logger.debug(() => `[Qwen Fix] Fixed stringified number in property '${key}' for ${toolCall.function.name}: "${value}" -> ${numValue}`);
|
1002
|
+
}
|
1003
|
+
// Check if it looks like a stringified array or object
|
1004
|
+
// Also check for Python-style dictionaries with single quotes
|
1005
|
+
else if ((trimmed.startsWith('[') && trimmed.endsWith(']')) ||
|
1006
|
+
(trimmed.startsWith('{') && trimmed.endsWith('}'))) {
|
1007
|
+
try {
|
1008
|
+
// Try to parse it as JSON
|
1009
|
+
const nestedParsed = JSON.parse(value);
|
1010
|
+
fixedArgs[key] = nestedParsed;
|
1011
|
+
hasNestedStringification = true;
|
1012
|
+
this.logger.debug(() => `[Qwen Fix] Fixed nested stringification in property '${key}' for ${toolCall.function.name}`);
|
1013
|
+
}
|
1014
|
+
catch {
|
1015
|
+
// Try to convert Python-style to JSON (single quotes to double quotes)
|
1016
|
+
try {
|
1017
|
+
const jsonified = value
|
1018
|
+
.replace(/'/g, '"')
|
1019
|
+
.replace(/: True/g, ': true')
|
1020
|
+
.replace(/: False/g, ': false')
|
1021
|
+
.replace(/: None/g, ': null');
|
1022
|
+
const nestedParsed = JSON.parse(jsonified);
|
1023
|
+
fixedArgs[key] = nestedParsed;
|
1024
|
+
hasNestedStringification = true;
|
1025
|
+
this.logger.debug(() => `[Qwen Fix] Fixed Python-style nested stringification in property '${key}' for ${toolCall.function.name}`);
|
1026
|
+
}
|
1027
|
+
catch {
|
1028
|
+
// Not valid JSON even after conversion, keep as string
|
1029
|
+
fixedArgs[key] = value;
|
1030
|
+
}
|
1031
|
+
}
|
1032
|
+
}
|
1033
|
+
else {
|
1034
|
+
fixedArgs[key] = value;
|
1035
|
+
}
|
1036
|
+
}
|
1037
|
+
else {
|
1038
|
+
fixedArgs[key] = value;
|
1039
|
+
}
|
1040
|
+
}
|
1041
|
+
if (hasNestedStringification) {
|
1042
|
+
this.logger.debug(() => `[Qwen Fix] Fixed nested double-stringification for ${toolCall.function.name}`);
|
1043
|
+
return {
|
1044
|
+
...toolCall,
|
1045
|
+
function: {
|
1046
|
+
...toolCall.function,
|
1047
|
+
arguments: JSON.stringify(fixedArgs),
|
1048
|
+
},
|
1049
|
+
};
|
1050
|
+
}
|
1051
|
+
}
|
1052
|
+
catch (_e) {
|
1053
|
+
// If parsing fails, check for old-style double-stringification
|
1054
|
+
if (toolCall.function.arguments.startsWith('"') &&
|
1055
|
+
toolCall.function.arguments.endsWith('"')) {
|
1056
|
+
try {
|
1057
|
+
// Old fix: entire arguments were double-stringified
|
1058
|
+
const parsedArgs = JSON.parse(toolCall.function.arguments);
|
1059
|
+
this.logger.debug(() => `[Qwen Fix] Fixed whole-argument double-stringification for ${toolCall.function.name}`);
|
1060
|
+
return {
|
1061
|
+
...toolCall,
|
1062
|
+
function: {
|
1063
|
+
...toolCall.function,
|
1064
|
+
arguments: JSON.stringify(parsedArgs),
|
1065
|
+
},
|
1066
|
+
};
|
1067
|
+
}
|
1068
|
+
catch {
|
1069
|
+
// Leave as-is if we can't parse
|
1070
|
+
}
|
1071
|
+
}
|
1072
|
+
}
|
1073
|
+
// No fix needed
|
1074
|
+
this.logger.debug(() => `[Qwen Fix] No double-stringification detected for ${toolCall.function.name}, keeping original`);
|
1075
|
+
return toolCall;
|
1076
|
+
}
|
1533
1077
|
}
|
1534
1078
|
//# sourceMappingURL=OpenAIProvider.js.map
|