@vybestack/llxprt-code-core 0.1.23-nightly.250904.97906524 → 0.1.23-nightly.250905.67589d14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/src/adapters/IStreamAdapter.d.ts +3 -3
  2. package/dist/src/auth/types.d.ts +4 -4
  3. package/dist/src/config/index.d.ts +7 -0
  4. package/dist/src/config/index.js +8 -0
  5. package/dist/src/config/index.js.map +1 -0
  6. package/dist/src/core/client.d.ts +9 -21
  7. package/dist/src/core/client.js +46 -144
  8. package/dist/src/core/client.js.map +1 -1
  9. package/dist/src/core/compression-config.d.ts +1 -1
  10. package/dist/src/core/compression-config.js +4 -5
  11. package/dist/src/core/compression-config.js.map +1 -1
  12. package/dist/src/core/coreToolScheduler.js +50 -15
  13. package/dist/src/core/coreToolScheduler.js.map +1 -1
  14. package/dist/src/core/geminiChat.d.ts +51 -2
  15. package/dist/src/core/geminiChat.js +592 -93
  16. package/dist/src/core/geminiChat.js.map +1 -1
  17. package/dist/src/core/nonInteractiveToolExecutor.js +70 -19
  18. package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
  19. package/dist/src/index.d.ts +1 -2
  20. package/dist/src/index.js +2 -2
  21. package/dist/src/index.js.map +1 -1
  22. package/dist/src/providers/BaseProvider.d.ts +8 -3
  23. package/dist/src/providers/BaseProvider.js.map +1 -1
  24. package/dist/src/providers/IProvider.d.ts +9 -3
  25. package/dist/src/providers/LoggingProviderWrapper.d.ts +10 -3
  26. package/dist/src/providers/LoggingProviderWrapper.js +33 -27
  27. package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
  28. package/dist/src/providers/ProviderContentGenerator.d.ts +2 -2
  29. package/dist/src/providers/ProviderContentGenerator.js +9 -6
  30. package/dist/src/providers/ProviderContentGenerator.js.map +1 -1
  31. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +12 -17
  32. package/dist/src/providers/anthropic/AnthropicProvider.js +238 -447
  33. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  34. package/dist/src/providers/gemini/GeminiProvider.d.ts +12 -6
  35. package/dist/src/providers/gemini/GeminiProvider.js +184 -458
  36. package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
  37. package/dist/src/providers/openai/ConversationCache.d.ts +3 -3
  38. package/dist/src/providers/openai/IChatGenerateParams.d.ts +9 -4
  39. package/dist/src/providers/openai/OpenAIProvider.d.ts +14 -61
  40. package/dist/src/providers/openai/OpenAIProvider.js +270 -575
  41. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  42. package/dist/src/providers/openai/buildResponsesRequest.d.ts +3 -3
  43. package/dist/src/providers/openai/buildResponsesRequest.js +67 -37
  44. package/dist/src/providers/openai/buildResponsesRequest.js.map +1 -1
  45. package/dist/src/providers/openai/estimateRemoteTokens.d.ts +2 -2
  46. package/dist/src/providers/openai/estimateRemoteTokens.js +21 -8
  47. package/dist/src/providers/openai/estimateRemoteTokens.js.map +1 -1
  48. package/dist/src/providers/openai/parseResponsesStream.d.ts +6 -2
  49. package/dist/src/providers/openai/parseResponsesStream.js +99 -391
  50. package/dist/src/providers/openai/parseResponsesStream.js.map +1 -1
  51. package/dist/src/providers/openai/syntheticToolResponses.d.ts +5 -5
  52. package/dist/src/providers/openai/syntheticToolResponses.js +102 -91
  53. package/dist/src/providers/openai/syntheticToolResponses.js.map +1 -1
  54. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +16 -17
  55. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +222 -224
  56. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -1
  57. package/dist/src/providers/types.d.ts +1 -1
  58. package/dist/src/services/history/ContentConverters.d.ts +6 -1
  59. package/dist/src/services/history/ContentConverters.js +155 -18
  60. package/dist/src/services/history/ContentConverters.js.map +1 -1
  61. package/dist/src/services/history/HistoryService.d.ts +52 -0
  62. package/dist/src/services/history/HistoryService.js +245 -93
  63. package/dist/src/services/history/HistoryService.js.map +1 -1
  64. package/dist/src/services/history/IContent.d.ts +4 -0
  65. package/dist/src/services/history/IContent.js.map +1 -1
  66. package/dist/src/telemetry/types.d.ts +16 -4
  67. package/dist/src/telemetry/types.js.map +1 -1
  68. package/dist/src/tools/IToolFormatter.d.ts +2 -2
  69. package/dist/src/tools/ToolFormatter.d.ts +3 -3
  70. package/dist/src/tools/ToolFormatter.js +80 -37
  71. package/dist/src/tools/ToolFormatter.js.map +1 -1
  72. package/dist/src/tools/todo-schemas.d.ts +4 -4
  73. package/package.json +8 -7
  74. package/dist/src/core/ContentGeneratorAdapter.d.ts +0 -37
  75. package/dist/src/core/ContentGeneratorAdapter.js +0 -58
  76. package/dist/src/core/ContentGeneratorAdapter.js.map +0 -1
  77. package/dist/src/providers/IMessage.d.ts +0 -38
  78. package/dist/src/providers/IMessage.js +0 -17
  79. package/dist/src/providers/IMessage.js.map +0 -1
  80. package/dist/src/providers/adapters/GeminiCompatibleWrapper.d.ts +0 -69
  81. package/dist/src/providers/adapters/GeminiCompatibleWrapper.js +0 -577
  82. package/dist/src/providers/adapters/GeminiCompatibleWrapper.js.map +0 -1
@@ -18,13 +18,12 @@
18
18
  * @requirement REQ-INT-001.1
19
19
  */
20
20
  import { DebugLogger } from '../../debug/index.js';
21
- import { ContentGeneratorRole } from '../ContentGeneratorRole.js';
22
- import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
23
21
  import { ToolFormatter } from '../../tools/ToolFormatter.js';
24
22
  import OpenAI from 'openai';
25
23
  import { BaseProvider } from '../BaseProvider.js';
26
24
  import { isQwenEndpoint, generateOAuthEndpointMismatchError, } from '../../config/endpoints.js';
27
25
  import { getSettingsService } from '../../settings/settingsServiceInstance.js';
26
+ import { retryWithBackoff } from '../../utils/retry.js';
28
27
  export class OpenAIProvider extends BaseProvider {
29
28
  logger;
30
29
  openai;
@@ -68,7 +67,7 @@ export class OpenAIProvider extends BaseProvider {
68
67
  super(baseConfig);
69
68
  this.logger = new DebugLogger('llxprt:providers:openai');
70
69
  this.logger.debug(() => `Constructor - baseURL: ${baseURL}, apiKey: ${apiKey?.substring(0, 10) || 'none'}, oauthManager: ${!!oauthManager}, shouldEnableQwenOAuth: ${shouldEnableQwenOAuth}`);
71
- this.baseURL = baseURL;
70
+ this.baseURL = baseURL || 'https://api.openai.com/v1';
72
71
  this.providerConfig = config;
73
72
  this.toolFormatter = new ToolFormatter();
74
73
  // Initialize from SettingsService
@@ -170,19 +169,6 @@ export class OpenAIProvider extends BaseProvider {
170
169
  }
171
170
  }
172
171
  }
173
- requiresTextToolCallParsing() {
174
- if (this.providerConfig?.enableTextToolCallParsing === false) {
175
- return false;
176
- }
177
- // Check if current tool format requires text-based parsing
178
- const currentFormat = this.getToolFormat();
179
- const textBasedFormats = ['hermes', 'xml', 'llama'];
180
- if (textBasedFormats.includes(currentFormat)) {
181
- return true;
182
- }
183
- const configuredModels = this.providerConfig?.textToolCallModels || [];
184
- return configuredModels.includes(this.currentModel);
185
- }
186
172
  getToolFormat() {
187
173
  // Check manual override first
188
174
  if (this.toolFormatOverride) {
@@ -275,124 +261,6 @@ export class OpenAIProvider extends BaseProvider {
275
261
  ];
276
262
  }
277
263
  }
278
- async *generateChatCompletion(messages, tools, _toolFormat) {
279
- // 1. Validate authentication and messages
280
- await this.validateRequestPreconditions(messages);
281
- // 2. Prepare request configuration
282
- const requestConfig = this.prepareApiRequest(messages, tools);
283
- // 3. Make API call with error handling
284
- const response = await this.executeApiCall(messages, tools, requestConfig);
285
- // 4. Process response based on streaming mode
286
- let processedData = {
287
- fullContent: '',
288
- accumulatedToolCalls: [],
289
- hasStreamedContent: false,
290
- usageData: undefined,
291
- pendingWhitespace: null,
292
- };
293
- if (requestConfig.streamingEnabled) {
294
- // Need to yield streaming content as it comes
295
- const streamResponse = response;
296
- for await (const chunk of streamResponse) {
297
- const delta = chunk.choices?.[0]?.delta;
298
- if (delta?.content && !requestConfig.parser) {
299
- if (this.isUsingQwen()) {
300
- // Handle Qwen whitespace buffering inline for yielding
301
- // This is needed because we yield during streaming
302
- // We'll refactor this separately if needed
303
- const whitespaceResult = this.handleQwenStreamingWhitespace(delta, processedData.pendingWhitespace, processedData.fullContent);
304
- if (whitespaceResult.shouldYield) {
305
- yield {
306
- role: ContentGeneratorRole.ASSISTANT,
307
- content: whitespaceResult.content,
308
- };
309
- }
310
- // Update our tracking of processed data
311
- processedData = {
312
- fullContent: whitespaceResult.updatedFullContent,
313
- accumulatedToolCalls: processedData.accumulatedToolCalls,
314
- hasStreamedContent: processedData.hasStreamedContent ||
315
- whitespaceResult.shouldYield,
316
- usageData: processedData.usageData,
317
- pendingWhitespace: whitespaceResult.updatedPendingWhitespace,
318
- };
319
- }
320
- else {
321
- yield {
322
- role: ContentGeneratorRole.ASSISTANT,
323
- content: delta.content,
324
- };
325
- processedData = {
326
- fullContent: processedData.fullContent + delta.content,
327
- accumulatedToolCalls: processedData.accumulatedToolCalls,
328
- hasStreamedContent: true,
329
- usageData: processedData.usageData,
330
- pendingWhitespace: null,
331
- };
332
- }
333
- }
334
- else if (delta?.content) {
335
- // Parser mode - just accumulate
336
- processedData = {
337
- fullContent: processedData.fullContent + delta.content,
338
- accumulatedToolCalls: processedData.accumulatedToolCalls,
339
- hasStreamedContent: processedData.hasStreamedContent,
340
- usageData: processedData.usageData,
341
- pendingWhitespace: processedData.pendingWhitespace,
342
- };
343
- }
344
- // Handle tool calls
345
- if (delta?.tool_calls) {
346
- const accumulated = processedData.accumulatedToolCalls;
347
- for (const toolCall of delta.tool_calls) {
348
- this.toolFormatter.accumulateStreamingToolCall(toolCall, accumulated, requestConfig.currentToolFormat);
349
- }
350
- processedData = {
351
- ...processedData,
352
- accumulatedToolCalls: accumulated,
353
- };
354
- }
355
- // Check for usage data
356
- if (chunk.usage) {
357
- processedData = {
358
- ...processedData,
359
- usageData: {
360
- prompt_tokens: chunk.usage.prompt_tokens || 0,
361
- completion_tokens: chunk.usage.completion_tokens || 0,
362
- total_tokens: chunk.usage.total_tokens || 0,
363
- },
364
- };
365
- }
366
- }
367
- }
368
- else {
369
- // Non-streaming response
370
- processedData = this.processNonStreamingResponse(response);
371
- // For non-streaming, yield content if no parser
372
- if (!requestConfig.parser && processedData.fullContent) {
373
- yield {
374
- role: ContentGeneratorRole.ASSISTANT,
375
- content: processedData.fullContent,
376
- };
377
- processedData.hasStreamedContent = true;
378
- }
379
- }
380
- // 5. Flush pending whitespace if needed (for Qwen)
381
- if (processedData.pendingWhitespace &&
382
- this.isUsingQwen() &&
383
- !requestConfig.parser) {
384
- this.logger.debug(() => `Flushing trailing pending whitespace (len=${processedData.pendingWhitespace?.length ?? 0}) at stream end`);
385
- yield {
386
- role: ContentGeneratorRole.ASSISTANT,
387
- content: processedData.pendingWhitespace,
388
- };
389
- processedData.hasStreamedContent = true;
390
- processedData.fullContent += processedData.pendingWhitespace;
391
- processedData.pendingWhitespace = null;
392
- }
393
- // 6. Process and yield final results
394
- yield* this.processFinalResponse(processedData, requestConfig.parser);
395
- }
396
264
  setModel(modelId) {
397
265
  // Update SettingsService as the source of truth
398
266
  this.setModelInSettings(modelId).catch((error) => {
@@ -444,8 +312,9 @@ export class OpenAIProvider extends BaseProvider {
444
312
  this._cachedClientKey = apiKey; // Update cached key
445
313
  }
446
314
  setBaseUrl(baseUrl) {
447
- // If no baseUrl is provided, clear to default (undefined)
448
- this.baseURL = baseUrl && baseUrl.trim() !== '' ? baseUrl : undefined;
315
+ // If no baseUrl is provided, use default OpenAI URL
316
+ this.baseURL =
317
+ baseUrl && baseUrl.trim() !== '' ? baseUrl : 'https://api.openai.com/v1';
449
318
  // Persist to SettingsService if available
450
319
  this.setBaseUrlInSettings(this.baseURL).catch((error) => {
451
320
  this.logger.debug(() => `Failed to persist base URL to SettingsService: ${error}`);
@@ -522,6 +391,220 @@ export class OpenAIProvider extends BaseProvider {
522
391
  getModelParams() {
523
392
  return this.modelParams;
524
393
  }
394
+ /**
395
+ * Generate chat completion with IContent interface
396
+ * Internally converts to OpenAI API format, but only yields IContent
397
+ */
398
+ async *generateChatCompletion(content, tools) {
399
+ // Convert IContent directly to OpenAI API format (no IMessage!)
400
+ const apiMessages = [];
401
+ for (const c of content) {
402
+ if (c.speaker === 'human') {
403
+ const textBlock = c.blocks.find((b) => b.type === 'text');
404
+ apiMessages.push({
405
+ role: 'user',
406
+ content: textBlock?.text || '',
407
+ });
408
+ }
409
+ else if (c.speaker === 'ai') {
410
+ const textBlocks = c.blocks.filter((b) => b.type === 'text');
411
+ const toolCallBlocks = c.blocks.filter((b) => b.type === 'tool_call');
412
+ const contentText = textBlocks.map((b) => b.text).join('');
413
+ const toolCalls = toolCallBlocks.length > 0
414
+ ? toolCallBlocks.map((tc) => ({
415
+ id: tc.id,
416
+ type: 'function',
417
+ function: {
418
+ name: tc.name,
419
+ arguments: JSON.stringify(tc.parameters),
420
+ },
421
+ }))
422
+ : undefined;
423
+ apiMessages.push({
424
+ role: 'assistant',
425
+ content: contentText || null,
426
+ tool_calls: toolCalls,
427
+ });
428
+ }
429
+ else if (c.speaker === 'tool') {
430
+ const toolResponseBlock = c.blocks.find((b) => b.type === 'tool_response');
431
+ if (!toolResponseBlock) {
432
+ throw new Error('Tool content must have a tool_response block');
433
+ }
434
+ apiMessages.push({
435
+ role: 'tool',
436
+ content: JSON.stringify(toolResponseBlock.result),
437
+ tool_call_id: toolResponseBlock.callId,
438
+ });
439
+ }
440
+ else {
441
+ throw new Error(`Unknown speaker type: ${c.speaker}`);
442
+ }
443
+ }
444
+ // Debug log the converted messages
445
+ this.logger.debug(() => `Converted messages for OpenAI API: ${JSON.stringify(apiMessages, null, 2)}`);
446
+ // Convert Gemini format tools to OpenAI format
447
+ // Handle both legacy 'parameters' and new 'parametersJsonSchema' formats
448
+ const apiTools = tools
449
+ ? tools[0].functionDeclarations.map((decl) => {
450
+ // Support both old 'parameters' and new 'parametersJsonSchema' formats
451
+ // DeclarativeTool uses parametersJsonSchema, while legacy tools use parameters
452
+ const toolParameters = 'parametersJsonSchema' in decl
453
+ ? decl
454
+ .parametersJsonSchema
455
+ : decl.parameters;
456
+ return {
457
+ type: 'function',
458
+ function: {
459
+ name: decl.name,
460
+ description: decl.description || '',
461
+ parameters: toolParameters || {},
462
+ },
463
+ };
464
+ })
465
+ : undefined;
466
+ // Get auth token
467
+ const apiKey = await this.getAuthToken();
468
+ if (!apiKey) {
469
+ throw new Error('OpenAI API key is required');
470
+ }
471
+ // Build request
472
+ const requestBody = {
473
+ model: this.currentModel || 'gpt-4o-mini',
474
+ messages: apiMessages,
475
+ ...(apiTools && { tools: apiTools }),
476
+ stream: true,
477
+ ...(this.modelParams || {}),
478
+ };
479
+ // Wrap the API call with retry logic
480
+ const makeApiCall = async () => {
481
+ const response = await fetch(`${this.baseURL}/chat/completions`, {
482
+ method: 'POST',
483
+ headers: {
484
+ 'Content-Type': 'application/json',
485
+ Authorization: `Bearer ${apiKey}`,
486
+ },
487
+ body: JSON.stringify(requestBody),
488
+ });
489
+ if (!response.ok) {
490
+ const errorText = await response.text();
491
+ // Create an error object that matches what we check for in isRetryableError
492
+ const error = new Error(`OpenAI API error: ${errorText}`);
493
+ error.status = response.status;
494
+ // Try to parse the error response
495
+ try {
496
+ const errorObj = JSON.parse(errorText);
497
+ error.error = errorObj;
498
+ }
499
+ catch {
500
+ // If not JSON, keep as text
501
+ }
502
+ this.logger.debug(() => `API call error in generateChatCompletion: status=${response.status}, error=${errorText}`);
503
+ throw error;
504
+ }
505
+ return response;
506
+ };
507
+ // Use retry logic with longer delays for rate limits
508
+ const response = await retryWithBackoff(makeApiCall, {
509
+ shouldRetry: (error) => {
510
+ const shouldRetry = this.isRetryableError(error);
511
+ this.logger.debug(() => `Retry decision in generateChatCompletion: shouldRetry=${shouldRetry}, error=${String(error).substring(0, 200)}`);
512
+ return shouldRetry;
513
+ },
514
+ maxAttempts: 6, // Allow up to 6 attempts (initial + 5 retries)
515
+ initialDelayMs: 4000, // Start with 4 seconds for 429 errors
516
+ maxDelayMs: 65000, // Allow up to 65 seconds delay
517
+ });
518
+ // Parse streaming response and emit IContent
519
+ const reader = response.body?.getReader();
520
+ if (!reader)
521
+ throw new Error('No response body');
522
+ const decoder = new TextDecoder();
523
+ let buffer = '';
524
+ const accumulatedToolCalls = [];
525
+ while (true) {
526
+ const { done, value } = await reader.read();
527
+ if (done)
528
+ break;
529
+ buffer += decoder.decode(value, { stream: true });
530
+ const lines = buffer.split('\n');
531
+ buffer = lines.pop() || '';
532
+ for (const line of lines) {
533
+ if (!line.startsWith('data: '))
534
+ continue;
535
+ const data = line.slice(6);
536
+ if (data === '[DONE]')
537
+ continue;
538
+ try {
539
+ const parsed = JSON.parse(data);
540
+ const delta = parsed.choices?.[0]?.delta;
541
+ if (delta?.content) {
542
+ // Emit text content immediately as IContent
543
+ yield {
544
+ speaker: 'ai',
545
+ blocks: [{ type: 'text', text: delta.content }],
546
+ };
547
+ }
548
+ if (delta?.tool_calls) {
549
+ // Accumulate tool calls
550
+ for (const toolCall of delta.tool_calls) {
551
+ if (toolCall.index !== undefined) {
552
+ if (!accumulatedToolCalls[toolCall.index]) {
553
+ accumulatedToolCalls[toolCall.index] = {
554
+ id: toolCall.id || '',
555
+ type: 'function',
556
+ function: { name: '', arguments: '' },
557
+ };
558
+ }
559
+ const tc = accumulatedToolCalls[toolCall.index];
560
+ if (toolCall.id)
561
+ tc.id = toolCall.id;
562
+ if (toolCall.function?.name)
563
+ tc.function.name = toolCall.function.name;
564
+ if (toolCall.function?.arguments)
565
+ tc.function.arguments += toolCall.function.arguments;
566
+ }
567
+ }
568
+ }
569
+ }
570
+ catch (e) {
571
+ // Skip invalid JSON lines
572
+ this.logger.debug(() => `Failed to parse SSE line: ${e}`);
573
+ }
574
+ }
575
+ }
576
+ // Emit accumulated tool calls as IContent if any
577
+ if (accumulatedToolCalls.length > 0) {
578
+ const blocks = [];
579
+ for (const tc of accumulatedToolCalls) {
580
+ if (!tc)
581
+ continue;
582
+ try {
583
+ blocks.push({
584
+ type: 'tool_call',
585
+ id: tc.id,
586
+ name: tc.function.name,
587
+ parameters: JSON.parse(tc.function.arguments),
588
+ });
589
+ }
590
+ catch (_e) {
591
+ // If parsing fails, emit with string parameters
592
+ blocks.push({
593
+ type: 'tool_call',
594
+ id: tc.id,
595
+ name: tc.function.name,
596
+ parameters: tc.function.arguments,
597
+ });
598
+ }
599
+ }
600
+ if (blocks.length > 0) {
601
+ yield {
602
+ speaker: 'ai',
603
+ blocks,
604
+ };
605
+ }
606
+ }
607
+ }
525
608
  /**
526
609
  * Initialize provider configuration from SettingsService
527
610
  */
@@ -535,7 +618,7 @@ export class OpenAIProvider extends BaseProvider {
535
618
  // Load saved base URL if available
536
619
  const savedBaseUrl = await this.getBaseUrlFromSettings();
537
620
  if (savedBaseUrl !== undefined) {
538
- this.baseURL = savedBaseUrl;
621
+ this.baseURL = savedBaseUrl || 'https://api.openai.com/v1';
539
622
  }
540
623
  // Load saved model parameters if available
541
624
  const savedParams = await this.getModelParamsFromSettings();
@@ -597,19 +680,6 @@ export class OpenAIProvider extends BaseProvider {
597
680
  return 'openai';
598
681
  }
599
682
  }
600
- /**
601
- * Get appropriate tool_choice value based on detected tool format
602
- * @param tools Array of tools (if any)
603
- * @returns Appropriate tool_choice value for the current format
604
- */
605
- getToolChoiceForFormat(tools) {
606
- if (!tools || tools.length === 0) {
607
- return undefined;
608
- }
609
- // For all formats, use 'auto' (standard behavior)
610
- // Future enhancement: different formats may need different tool_choice values
611
- return 'auto';
612
- }
613
683
  /**
614
684
  * Format tools for API based on detected tool format
615
685
  * @param tools Array of tools to format
@@ -641,438 +711,63 @@ export class OpenAIProvider extends BaseProvider {
641
711
  return response;
642
712
  }
643
713
  /**
644
- * Validate authentication and message preconditions for API calls
645
- */
646
- async validateRequestPreconditions(messages) {
647
- // Check if API key is available (using resolved authentication)
648
- const apiKey = await this.getAuthToken();
649
- if (!apiKey) {
650
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
651
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
652
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
653
- }
654
- throw new Error('OpenAI API key is required to generate completions');
655
- }
656
- // Validate tool messages have required tool_call_id
657
- const toolMessages = messages.filter((msg) => msg.role === 'tool');
658
- const missingIds = toolMessages.filter((msg) => !msg.tool_call_id);
659
- if (missingIds.length > 0) {
660
- this.logger.error(() => `FATAL: Tool messages missing tool_call_id: ${JSON.stringify(missingIds)}`);
661
- throw new Error(`OpenAI API requires tool_call_id for all tool messages. Found ${missingIds.length} tool message(s) without IDs.`);
662
- }
663
- }
664
- /**
665
- * Prepare API request configuration
666
- */
667
- prepareApiRequest(messages, tools) {
668
- const parser = this.requiresTextToolCallParsing()
669
- ? new GemmaToolCallParser()
670
- : null;
671
- // Get current tool format (with override support)
672
- const currentToolFormat = this.getToolFormat();
673
- // Format tools using formatToolsForAPI method
674
- const formattedTools = tools ? this.formatToolsForAPI(tools) : undefined;
675
- // Get stream_options from ephemeral settings (not model params)
676
- const streamOptions = this.providerConfig?.getEphemeralSettings?.()?.['stream-options'];
677
- // Default stream_options to { include_usage: true } unless explicitly set
678
- const finalStreamOptions = streamOptions !== undefined ? streamOptions : { include_usage: true };
679
- // Get streaming setting from ephemeral settings (default: enabled)
680
- const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
681
- const streamingEnabled = streamingSetting !== 'disabled';
682
- return {
683
- parser,
684
- currentToolFormat,
685
- formattedTools,
686
- finalStreamOptions,
687
- streamingEnabled,
688
- };
689
- }
690
- /**
691
- * Execute API call with error handling
692
- */
693
- async executeApiCall(messages, tools, requestConfig) {
694
- // Get resolved authentication and update client if needed
695
- await this.updateClientWithResolvedAuth();
696
- this.logger.debug(() => `About to make API call with model: ${this.currentModel}, baseURL: ${this.openai.baseURL}, apiKey: ${this.openai.apiKey?.substring(0, 10)}..., streaming: ${requestConfig.streamingEnabled}, messages (${messages.length} total): ${messages
697
- .map((m) => `${m.role}${m.role === 'system' ? ` (length: ${m.content?.length})` : ''}`)
698
- .join(', ')}`);
699
- try {
700
- // Build request params with exact order from original
701
- return await this.openai.chat.completions.create({
702
- model: this.currentModel,
703
- messages: messages,
704
- stream: requestConfig.streamingEnabled,
705
- ...(requestConfig.streamingEnabled && requestConfig.finalStreamOptions
706
- ? { stream_options: requestConfig.finalStreamOptions }
707
- : {}),
708
- tools: requestConfig.formattedTools,
709
- tool_choice: this.getToolChoiceForFormat(tools),
710
- ...this.modelParams,
711
- });
712
- }
713
- catch (error) {
714
- this.handleApiError(error, messages);
715
- throw error; // Re-throw after logging
716
- }
717
- }
718
- /**
719
- * Handle and log API errors
720
- */
721
- handleApiError(error, messages) {
722
- const errorStatus = error?.status ||
723
- error?.response?.status;
724
- const errorLabel = errorStatus === 400 ? '[API Error 400]' : '[API Error]';
725
- this.logger.error(() => `${errorLabel} Error caught in API call:\n` +
726
- ` Error: ${error}\n` +
727
- ` Type: ${error?.constructor?.name}\n` +
728
- ` Status: ${errorStatus}\n` +
729
- ` Response data: ${JSON.stringify(error?.response?.data, null, 2)}`);
730
- // Log the last few messages to understand what's being sent
731
- if (errorStatus === 400) {
732
- // Log additional diagnostics for 400 errors
733
- const hasPendingToolCalls = messages.some((msg, idx) => {
734
- if (msg.role === 'assistant' && msg.tool_calls) {
735
- // Check if there's a matching tool response
736
- const toolCallIds = msg.tool_calls.map((tc) => tc.id);
737
- const hasResponses = toolCallIds.every((id) => messages
738
- .slice(idx + 1)
739
- .some((m) => m.role === 'tool' && m.tool_call_id === id));
740
- return !hasResponses;
741
- }
742
- return false;
743
- });
744
- this.logger.error(() => `${errorLabel} Last 5 messages being sent:\n` +
745
- ` Has pending tool calls without responses: ${hasPendingToolCalls}`);
746
- const lastMessages = messages.slice(-5);
747
- lastMessages.forEach((msg, idx) => {
748
- this.logger.error(() => ` [${messages.length - 5 + idx}] ${msg.role}${msg.tool_call_id ? ` (tool response for ${msg.tool_call_id})` : ''}${msg.tool_calls ? ` (${msg.tool_calls.length} tool calls)` : ''}`);
749
- if (msg.tool_calls) {
750
- msg.tool_calls.forEach((tc) => {
751
- this.logger.error(() => ` - Tool call: ${tc.id} -> ${tc.function.name}`);
752
- });
753
- }
754
- });
755
- }
756
- }
757
- /**
758
- * Process non-streaming response
759
- */
760
- processNonStreamingResponse(response) {
761
- const choice = response.choices[0];
762
- let fullContent = '';
763
- const accumulatedToolCalls = [];
764
- let usageData;
765
- if (choice?.message.content) {
766
- fullContent = choice.message.content;
767
- }
768
- if (choice?.message.tool_calls) {
769
- // Convert tool calls to the standard format
770
- for (const toolCall of choice.message.tool_calls) {
771
- if (toolCall.type === 'function' && toolCall.function) {
772
- // Don't fix double stringification here - it's handled later in the final processing
773
- accumulatedToolCalls.push({
774
- id: toolCall.id,
775
- type: 'function',
776
- function: toolCall.function,
777
- });
778
- }
779
- }
780
- }
781
- if (response.usage) {
782
- usageData = {
783
- prompt_tokens: response.usage.prompt_tokens,
784
- completion_tokens: response.usage.completion_tokens,
785
- total_tokens: response.usage.total_tokens,
786
- };
787
- }
788
- return {
789
- fullContent,
790
- accumulatedToolCalls,
791
- hasStreamedContent: false, // Non-streaming never has streamed content
792
- usageData,
793
- pendingWhitespace: null,
794
- };
795
- }
796
- /**
797
- * Process and build final response messages
798
- */
799
- *processFinalResponse(processedData, parser) {
800
- const { fullContent, accumulatedToolCalls, hasStreamedContent, usageData, pendingWhitespace, } = processedData;
801
- // Flush any remaining pending whitespace for Qwen
802
- let finalFullContent = fullContent;
803
- if (pendingWhitespace && this.isUsingQwen() && !parser) {
804
- this.logger.debug(() => `Flushing trailing pending whitespace (len=${pendingWhitespace?.length ?? 0}) at stream end`);
805
- finalFullContent += pendingWhitespace;
806
- }
807
- // After stream ends, parse text-based tool calls if needed
808
- if (parser && finalFullContent) {
809
- const { cleanedContent, toolCalls } = parser.parse(finalFullContent);
810
- if (toolCalls.length > 0) {
811
- // Convert to standard format
812
- const standardToolCalls = toolCalls.map((tc, index) => ({
813
- id: `call_${Date.now()}_${index}`,
814
- type: 'function',
815
- function: {
816
- name: tc.name,
817
- arguments: JSON.stringify(tc.arguments),
818
- },
819
- }));
820
- yield {
821
- role: ContentGeneratorRole.ASSISTANT,
822
- content: cleanedContent,
823
- tool_calls: standardToolCalls,
824
- usage: usageData,
825
- };
826
- }
827
- else {
828
- // No tool calls found, yield cleaned content
829
- yield {
830
- role: ContentGeneratorRole.ASSISTANT,
831
- content: cleanedContent,
832
- usage: usageData,
833
- };
834
- }
835
- }
836
- else {
837
- // Standard OpenAI tool call handling
838
- if (accumulatedToolCalls.length > 0) {
839
- // Process tool calls with Qwen-specific fixes if needed
840
- const fixedToolCalls = this.processQwenToolCalls(accumulatedToolCalls);
841
- if (this.isUsingQwen()) {
842
- this.logger.debug(() => `Final message with tool calls: ${JSON.stringify({
843
- contentLength: finalFullContent.length,
844
- content: finalFullContent.substring(0, 200) +
845
- (finalFullContent.length > 200 ? '...' : ''),
846
- toolCallCount: accumulatedToolCalls.length,
847
- hasStreamedContent,
848
- })}`);
849
- }
850
- // Build the final message based on provider-specific requirements
851
- const finalMessage = this.buildFinalToolCallMessage(hasStreamedContent, finalFullContent, fixedToolCalls, usageData);
852
- yield finalMessage;
853
- }
854
- else if (usageData) {
855
- // Always emit usage data so downstream consumers can update stats
856
- yield {
857
- role: ContentGeneratorRole.ASSISTANT,
858
- content: '',
859
- usage: usageData,
860
- };
861
- }
862
- }
863
- }
864
- /**
865
- * Handle Qwen-specific whitespace buffering during streaming
866
- * @param delta The stream delta containing content
867
- * @param pendingWhitespace Current buffered whitespace
868
- * @param fullContent Accumulated full content
869
- * @returns Object with updated state and whether to yield content
714
+ * Determines if an error should trigger a retry
870
715
  */
871
- handleQwenStreamingWhitespace(delta, pendingWhitespace, fullContent) {
872
- if (!delta.content) {
873
- return {
874
- shouldYield: false,
875
- content: '',
876
- updatedPendingWhitespace: pendingWhitespace,
877
- updatedFullContent: fullContent,
878
- };
879
- }
880
- const isWhitespaceOnly = delta.content.trim() === '';
881
- if (isWhitespaceOnly) {
882
- // Buffer whitespace-only chunk
883
- const newPendingWhitespace = (pendingWhitespace || '') + delta.content;
884
- this.logger.debug(() => `[Whitespace Buffering] Buffered whitespace-only chunk (len=${delta.content?.length ?? 0}). pendingWhitespace now len=${newPendingWhitespace?.length ?? 0}`);
885
- return {
886
- shouldYield: false,
887
- content: '',
888
- updatedPendingWhitespace: newPendingWhitespace,
889
- updatedFullContent: fullContent + delta.content,
890
- };
891
- }
892
- // Non-whitespace content - flush any pending whitespace first
893
- if (pendingWhitespace) {
894
- this.logger.debug(() => `Flushing pending whitespace (len=${pendingWhitespace?.length ?? 0}) before non-empty chunk`);
895
- return {
896
- shouldYield: true,
897
- content: pendingWhitespace + delta.content,
898
- updatedPendingWhitespace: null,
899
- updatedFullContent: fullContent + pendingWhitespace + delta.content,
900
- };
901
- }
902
- return {
903
- shouldYield: true,
904
- content: delta.content,
905
- updatedPendingWhitespace: null,
906
- updatedFullContent: fullContent + delta.content,
907
- };
908
- }
909
- /**
910
- * Process tool calls for Qwen models, fixing double stringification
911
- * @param toolCalls The tool calls to process
912
- * @returns Processed tool calls with fixes applied
913
- */
914
- processQwenToolCalls(toolCalls) {
915
- if (!this.isUsingQwen()) {
916
- return toolCalls;
917
- }
918
- this.logger.debug(() => `[Qwen Fix] Processing ${toolCalls.length} tool calls for double-stringification fix`);
919
- return toolCalls.map((toolCall, index) => {
920
- this.logger.debug(() => `[Qwen Fix] Tool call ${index}: ${JSON.stringify({
921
- name: toolCall.function.name,
922
- argumentsType: typeof toolCall.function.arguments,
923
- argumentsLength: toolCall.function.arguments?.length,
924
- argumentsSample: toolCall.function.arguments?.substring(0, 100),
925
- })}`);
926
- return this.fixQwenDoubleStringification(toolCall);
927
- });
928
- }
929
- /**
930
- * Determine how to yield the final message with tool calls based on provider quirks
931
- * @param hasStreamedContent Whether content was already streamed
932
- * @param fullContent The complete content
933
- * @param toolCalls The tool calls to include
934
- * @param usageData Optional usage statistics
935
- * @returns The message to yield
936
- */
937
- buildFinalToolCallMessage(hasStreamedContent, fullContent, toolCalls, usageData) {
938
- const isCerebras = this.baseURL?.toLowerCase().includes('cerebras.ai');
939
- if (isCerebras) {
940
- this.logger.debug(() => '[Cerebras] Special handling for Cerebras provider after tool responses', {
941
- hasStreamedContent,
942
- willSendSpace: hasStreamedContent,
943
- });
944
- }
945
- const shouldOmitContent = hasStreamedContent && this.isUsingQwen() && !isCerebras;
946
- this.logger.debug(() => '[Tool Call Handling] Deciding how to yield tool calls', {
947
- hasStreamedContent,
948
- isUsingQwen: this.isUsingQwen(),
949
- isCerebras,
950
- shouldOmitContent,
951
- fullContentLength: fullContent.length,
952
- toolCallCount: toolCalls?.length || 0,
953
- });
954
- if (shouldOmitContent || (isCerebras && hasStreamedContent)) {
955
- // Send just a space to prevent stream stopping or duplication
956
- if (isCerebras && hasStreamedContent) {
957
- this.logger.debug(() => '[Cerebras] Sending minimal space content to prevent duplication');
716
+ isRetryableError(error) {
717
+ // Check for OpenAI SDK specific error types
718
+ // The OpenAI SDK throws specific error classes for different error types
719
+ if (error && typeof error === 'object') {
720
+ const errorName = error.constructor?.name;
721
+ // Check for OpenAI SDK RateLimitError or InternalServerError
722
+ if (errorName === 'RateLimitError' ||
723
+ errorName === 'InternalServerError') {
724
+ this.logger.debug(() => `Retryable OpenAI SDK error detected: ${errorName}`);
725
+ return true;
958
726
  }
959
- return {
960
- role: ContentGeneratorRole.ASSISTANT,
961
- content: ' ',
962
- tool_calls: toolCalls,
963
- usage: usageData,
964
- };
965
- }
966
- // Include full content with tool calls
967
- return {
968
- role: ContentGeneratorRole.ASSISTANT,
969
- content: fullContent || '',
970
- tool_calls: toolCalls,
971
- usage: usageData,
972
- };
973
- }
974
- /**
975
- * Fix Qwen's double stringification of tool call arguments
976
- * Qwen models stringify array/object values WITHIN the JSON arguments
977
- * @param toolCall The tool call to fix
978
- * @returns The fixed tool call or the original if no fix is needed
979
- */
980
- fixQwenDoubleStringification(toolCall) {
981
- if (!toolCall.function.arguments ||
982
- typeof toolCall.function.arguments !== 'string') {
983
- return toolCall;
984
- }
985
- try {
986
- // First, parse the arguments to get the JSON object
987
- const parsedArgs = JSON.parse(toolCall.function.arguments);
988
- let hasNestedStringification = false;
989
- // Check each property to see if it's a stringified array/object/number
990
- const fixedArgs = {};
991
- for (const [key, value] of Object.entries(parsedArgs)) {
992
- if (typeof value === 'string') {
993
- const trimmed = value.trim();
994
- // Check if it's a stringified number (integer or float)
995
- if (/^-?\d+(\.\d+)?$/.test(trimmed)) {
996
- const numValue = trimmed.includes('.')
997
- ? parseFloat(trimmed)
998
- : parseInt(trimmed, 10);
999
- fixedArgs[key] = numValue;
1000
- hasNestedStringification = true;
1001
- this.logger.debug(() => `[Qwen Fix] Fixed stringified number in property '${key}' for ${toolCall.function.name}: "${value}" -> ${numValue}`);
1002
- }
1003
- // Check if it looks like a stringified array or object
1004
- // Also check for Python-style dictionaries with single quotes
1005
- else if ((trimmed.startsWith('[') && trimmed.endsWith(']')) ||
1006
- (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
1007
- try {
1008
- // Try to parse it as JSON
1009
- const nestedParsed = JSON.parse(value);
1010
- fixedArgs[key] = nestedParsed;
1011
- hasNestedStringification = true;
1012
- this.logger.debug(() => `[Qwen Fix] Fixed nested stringification in property '${key}' for ${toolCall.function.name}`);
1013
- }
1014
- catch {
1015
- // Try to convert Python-style to JSON (single quotes to double quotes)
1016
- try {
1017
- const jsonified = value
1018
- .replace(/'/g, '"')
1019
- .replace(/: True/g, ': true')
1020
- .replace(/: False/g, ': false')
1021
- .replace(/: None/g, ': null');
1022
- const nestedParsed = JSON.parse(jsonified);
1023
- fixedArgs[key] = nestedParsed;
1024
- hasNestedStringification = true;
1025
- this.logger.debug(() => `[Qwen Fix] Fixed Python-style nested stringification in property '${key}' for ${toolCall.function.name}`);
1026
- }
1027
- catch {
1028
- // Not valid JSON even after conversion, keep as string
1029
- fixedArgs[key] = value;
1030
- }
1031
- }
1032
- }
1033
- else {
1034
- fixedArgs[key] = value;
1035
- }
727
+ // Check for status property (OpenAI APIError has a status property)
728
+ if ('status' in error) {
729
+ const status = error.status;
730
+ // Retry on 429 (rate limit) and 5xx errors
731
+ if (status === 429 || (status >= 500 && status < 600)) {
732
+ this.logger.debug(() => `Retryable error detected - status: ${status}`);
733
+ return true;
1036
734
  }
1037
- else {
1038
- fixedArgs[key] = value;
1039
- }
1040
- }
1041
- if (hasNestedStringification) {
1042
- this.logger.debug(() => `[Qwen Fix] Fixed nested double-stringification for ${toolCall.function.name}`);
1043
- return {
1044
- ...toolCall,
1045
- function: {
1046
- ...toolCall.function,
1047
- arguments: JSON.stringify(fixedArgs),
1048
- },
1049
- };
1050
735
  }
1051
- }
1052
- catch (_e) {
1053
- // If parsing fails, check for old-style double-stringification
1054
- if (toolCall.function.arguments.startsWith('"') &&
1055
- toolCall.function.arguments.endsWith('"')) {
1056
- try {
1057
- // Old fix: entire arguments were double-stringified
1058
- const parsedArgs = JSON.parse(toolCall.function.arguments);
1059
- this.logger.debug(() => `[Qwen Fix] Fixed whole-argument double-stringification for ${toolCall.function.name}`);
1060
- return {
1061
- ...toolCall,
1062
- function: {
1063
- ...toolCall.function,
1064
- arguments: JSON.stringify(parsedArgs),
1065
- },
1066
- };
1067
- }
1068
- catch {
1069
- // Leave as-is if we can't parse
736
+ // Check for nested error object (some OpenAI errors have error.error structure)
737
+ if ('error' in error &&
738
+ typeof error.error === 'object') {
739
+ const nestedError = error.error;
740
+ if (nestedError?.code === 'token_quota_exceeded' ||
741
+ nestedError?.type === 'too_many_tokens_error' ||
742
+ nestedError?.code === 'rate_limit_exceeded') {
743
+ this.logger.debug(() => `Retryable error detected from error code: ${nestedError.code || nestedError.type}`);
744
+ return true;
1070
745
  }
1071
746
  }
1072
747
  }
1073
- // No fix needed
1074
- this.logger.debug(() => `[Qwen Fix] No double-stringification detected for ${toolCall.function.name}, keeping original`);
1075
- return toolCall;
748
+ // Check error message for rate limit indicators
749
+ const errorMessage = String(error).toLowerCase();
750
+ const retryablePatterns = [
751
+ 'rate limit',
752
+ 'rate_limit',
753
+ 'quota_exceeded',
754
+ 'too_many_tokens',
755
+ 'too many requests',
756
+ '429',
757
+ 'overloaded',
758
+ 'server_error',
759
+ 'service_unavailable',
760
+ 'internal server error',
761
+ '500',
762
+ '502',
763
+ '503',
764
+ '504',
765
+ ];
766
+ const shouldRetry = retryablePatterns.some((pattern) => errorMessage.includes(pattern));
767
+ if (shouldRetry) {
768
+ this.logger.debug(() => `Retryable error detected from message pattern: ${errorMessage}`);
769
+ }
770
+ return shouldRetry;
1076
771
  }
1077
772
  }
1078
773
  //# sourceMappingURL=OpenAIProvider.js.map