@vybestack/llxprt-code-core 0.5.0-nightly.251109.557a0fe7 → 0.5.0-nightly.251110.c0116408

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/src/core/coreToolScheduler.js +5 -5
  2. package/dist/src/core/coreToolScheduler.js.map +1 -1
  3. package/dist/src/core/prompts.js +53 -0
  4. package/dist/src/core/prompts.js.map +1 -1
  5. package/dist/src/core/turn.js +19 -2
  6. package/dist/src/core/turn.js.map +1 -1
  7. package/dist/src/parsers/TextToolCallParser.d.ts +3 -1
  8. package/dist/src/parsers/TextToolCallParser.js +68 -20
  9. package/dist/src/parsers/TextToolCallParser.js.map +1 -1
  10. package/dist/src/providers/anthropic/AnthropicProvider.js +12 -18
  11. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  12. package/dist/src/providers/gemini/GeminiProvider.js +9 -1
  13. package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
  14. package/dist/src/providers/openai/OpenAIProvider.d.ts +27 -5
  15. package/dist/src/providers/openai/OpenAIProvider.js +1078 -117
  16. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  17. package/dist/src/providers/openai/ToolCallCollector.d.ts +77 -0
  18. package/dist/src/providers/openai/ToolCallCollector.js +150 -0
  19. package/dist/src/providers/openai/ToolCallCollector.js.map +1 -0
  20. package/dist/src/providers/openai/ToolCallExecutor.d.ts +65 -0
  21. package/dist/src/providers/openai/ToolCallExecutor.js +120 -0
  22. package/dist/src/providers/openai/ToolCallExecutor.js.map +1 -0
  23. package/dist/src/providers/openai/ToolCallNormalizer.d.ts +47 -0
  24. package/dist/src/providers/openai/ToolCallNormalizer.js +101 -0
  25. package/dist/src/providers/openai/ToolCallNormalizer.js.map +1 -0
  26. package/dist/src/providers/openai/ToolCallPipeline.d.ts +80 -0
  27. package/dist/src/providers/openai/ToolCallPipeline.js +137 -0
  28. package/dist/src/providers/openai/ToolCallPipeline.js.map +1 -0
  29. package/dist/src/providers/openai/ToolCallValidator.d.ts +55 -0
  30. package/dist/src/providers/openai/ToolCallValidator.js +108 -0
  31. package/dist/src/providers/openai/ToolCallValidator.js.map +1 -0
  32. package/dist/src/providers/openai/ToolNameValidator.d.ts +38 -0
  33. package/dist/src/providers/openai/ToolNameValidator.js +90 -0
  34. package/dist/src/providers/openai/ToolNameValidator.js.map +1 -0
  35. package/dist/src/providers/openai/test-types.d.ts +37 -0
  36. package/dist/src/providers/openai/test-types.js +3 -0
  37. package/dist/src/providers/openai/test-types.js.map +1 -0
  38. package/dist/src/providers/openai/toolNameUtils.d.ts +57 -0
  39. package/dist/src/providers/openai/toolNameUtils.js +180 -0
  40. package/dist/src/providers/openai/toolNameUtils.js.map +1 -0
  41. package/dist/src/providers/types/IProviderConfig.d.ts +6 -0
  42. package/dist/src/providers/utils/toolResponsePayload.d.ts +18 -0
  43. package/dist/src/providers/utils/toolResponsePayload.js +130 -0
  44. package/dist/src/providers/utils/toolResponsePayload.js.map +1 -0
  45. package/dist/src/runtime/AgentRuntimeLoader.js +5 -5
  46. package/dist/src/runtime/AgentRuntimeLoader.js.map +1 -1
  47. package/dist/src/services/history/HistoryService.js +7 -19
  48. package/dist/src/services/history/HistoryService.js.map +1 -1
  49. package/dist/src/tools/ToolFormatter.js +9 -40
  50. package/dist/src/tools/ToolFormatter.js.map +1 -1
  51. package/dist/src/tools/tool-registry.js +20 -9
  52. package/dist/src/tools/tool-registry.js.map +1 -1
  53. package/dist/src/tools/toolNameUtils.d.ts +43 -0
  54. package/dist/src/tools/toolNameUtils.js +126 -0
  55. package/dist/src/tools/toolNameUtils.js.map +1 -0
  56. package/package.json +1 -1
@@ -18,20 +18,31 @@
18
18
  * @requirement REQ-INT-001.1
19
19
  */
20
20
  import OpenAI from 'openai';
21
+ import crypto from 'node:crypto';
21
22
  import * as http from 'http';
22
23
  import * as https from 'https';
23
24
  import * as net from 'net';
24
25
  import { BaseProvider, } from '../BaseProvider.js';
25
26
  import { DebugLogger } from '../../debug/index.js';
26
27
  import { ToolFormatter } from '../../tools/ToolFormatter.js';
28
+ import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
27
29
  import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
28
30
  import { getCoreSystemPromptAsync } from '../../core/prompts.js';
29
31
  import { retryWithBackoff } from '../../utils/retry.js';
30
32
  import { resolveUserMemory } from '../utils/userMemory.js';
31
33
  import { resolveRuntimeAuthToken } from '../utils/authToken.js';
32
34
  import { filterOpenAIRequestParams } from './openaiRequestParams.js';
35
+ import { ensureJsonSafe } from '../../utils/unicodeUtils.js';
36
+ import { ToolCallPipeline } from './ToolCallPipeline.js';
37
+ import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../utils/toolResponsePayload.js';
38
+ const MAX_TOOL_RESPONSE_CHARS = 1024;
39
+ const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
40
+ const TOOL_ARGS_PREVIEW_LENGTH = 500;
41
+ const TEXTUAL_TOOL_REPLAY_MODELS = new Set(['openrouter/polaris-alpha']);
33
42
  export class OpenAIProvider extends BaseProvider {
34
- name = 'openai';
43
+ textToolParser = new GemmaToolCallParser();
44
+ toolCallPipeline = new ToolCallPipeline();
45
+ toolCallProcessingMode;
35
46
  getLogger() {
36
47
  return new DebugLogger('llxprt:provider:openai');
37
48
  }
@@ -75,6 +86,8 @@ export class OpenAIProvider extends BaseProvider {
75
86
  oauthProvider: isQwenEndpoint || forceQwenOAuth ? 'qwen' : undefined,
76
87
  oauthManager,
77
88
  }, config);
89
+ // Initialize tool call processing mode - default to 'pipeline' (optimized)
90
+ this.toolCallProcessingMode = config?.toolCallProcessingMode ?? 'pipeline';
78
91
  // @plan:PLAN-20251023-STATELESS-HARDENING.P08
79
92
  // @requirement:REQ-SP4-002
80
93
  // No constructor-captured state - all values sourced from normalized options per call
@@ -385,22 +398,24 @@ export class OpenAIProvider extends BaseProvider {
385
398
  * Handles IDs from OpenAI (call_xxx), Anthropic (toolu_xxx), and history (hist_tool_xxx)
386
399
  */
387
400
  normalizeToOpenAIToolId(id) {
401
+ const sanitize = (value) => value.replace(/[^a-zA-Z0-9_]/g, '') ||
402
+ 'call_' + crypto.randomUUID().replace(/-/g, '');
388
403
  // If already in OpenAI format, return as-is
389
404
  if (id.startsWith('call_')) {
390
- return id;
405
+ return sanitize(id);
391
406
  }
392
407
  // For history format, extract the UUID and add OpenAI prefix
393
408
  if (id.startsWith('hist_tool_')) {
394
409
  const uuid = id.substring('hist_tool_'.length);
395
- return 'call_' + uuid;
410
+ return sanitize('call_' + uuid);
396
411
  }
397
412
  // For Anthropic format, extract the UUID and add OpenAI prefix
398
413
  if (id.startsWith('toolu_')) {
399
414
  const uuid = id.substring('toolu_'.length);
400
- return 'call_' + uuid;
415
+ return sanitize('call_' + uuid);
401
416
  }
402
417
  // Unknown format - assume it's a raw UUID
403
- return 'call_' + id;
418
+ return sanitize('call_' + id);
404
419
  }
405
420
  /**
406
421
  * Normalize tool IDs from OpenAI format to history format
@@ -458,10 +473,124 @@ export class OpenAIProvider extends BaseProvider {
458
473
  yield item;
459
474
  }
460
475
  }
476
+ normalizeToolCallArguments(parameters) {
477
+ if (parameters === undefined || parameters === null) {
478
+ return '{}';
479
+ }
480
+ if (typeof parameters === 'string') {
481
+ const trimmed = parameters.trim();
482
+ if (!trimmed) {
483
+ return '{}';
484
+ }
485
+ try {
486
+ const parsed = JSON.parse(trimmed);
487
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
488
+ return JSON.stringify(parsed);
489
+ }
490
+ return JSON.stringify({ value: parsed });
491
+ }
492
+ catch {
493
+ return JSON.stringify({ raw: trimmed });
494
+ }
495
+ }
496
+ if (typeof parameters === 'object') {
497
+ try {
498
+ return JSON.stringify(parameters);
499
+ }
500
+ catch {
501
+ return JSON.stringify({ raw: '[unserializable object]' });
502
+ }
503
+ }
504
+ return JSON.stringify({ value: parameters });
505
+ }
506
+ determineToolReplayMode(model) {
507
+ if (!model) {
508
+ return 'native';
509
+ }
510
+ const normalized = model.toLowerCase();
511
+ if (TEXTUAL_TOOL_REPLAY_MODELS.has(normalized)) {
512
+ return 'textual';
513
+ }
514
+ return 'native';
515
+ }
516
+ describeToolCallForText(block) {
517
+ const normalizedArgs = this.normalizeToolCallArguments(block.parameters);
518
+ const preview = normalizedArgs.length > MAX_TOOL_RESPONSE_CHARS
519
+ ? `${normalizedArgs.slice(0, MAX_TOOL_RESPONSE_CHARS)}… [truncated ${normalizedArgs.length - MAX_TOOL_RESPONSE_CHARS} chars]`
520
+ : normalizedArgs;
521
+ const callId = block.id ? ` ${this.normalizeToOpenAIToolId(block.id)}` : '';
522
+ return `[TOOL CALL${callId ? ` ${callId}` : ''}] ${block.name ?? 'unknown_tool'} args=${preview}`;
523
+ }
524
+ describeToolResponseForText(block, config) {
525
+ const payload = buildToolResponsePayload(block, config);
526
+ const header = `[TOOL RESULT] ${payload.toolName ?? block.toolName ?? 'unknown_tool'} (${payload.status ?? 'unknown'})`;
527
+ const bodyParts = [];
528
+ if (payload.error) {
529
+ bodyParts.push(`error: ${payload.error}`);
530
+ }
531
+ if (payload.result && payload.result !== EMPTY_TOOL_RESULT_PLACEHOLDER) {
532
+ bodyParts.push(payload.result);
533
+ }
534
+ if (payload.limitMessage) {
535
+ bodyParts.push(payload.limitMessage);
536
+ }
537
+ return bodyParts.length > 0 ? `${header}\n${bodyParts.join('\n')}` : header;
538
+ }
539
+ buildToolResponseContent(block, config) {
540
+ const payload = buildToolResponsePayload(block, config);
541
+ return ensureJsonSafe(JSON.stringify(payload));
542
+ }
543
+ shouldCompressToolMessages(error, logger) {
544
+ if (error &&
545
+ typeof error === 'object' &&
546
+ 'status' in error &&
547
+ error.status === 400) {
548
+ const raw = error &&
549
+ typeof error === 'object' &&
550
+ 'error' in error &&
551
+ typeof error.error ===
552
+ 'object'
553
+ ? (error.error ?? {})
554
+ .metadata?.raw
555
+ : undefined;
556
+ if (raw === 'ERROR') {
557
+ logger.debug(() => `[OpenAIProvider] Detected OpenRouter 400 response with raw metadata. Will attempt tool-response compression.`);
558
+ return true;
559
+ }
560
+ }
561
+ return false;
562
+ }
563
+ compressToolMessages(messages, maxLength, logger) {
564
+ let modified = false;
565
+ messages.forEach((message, index) => {
566
+ if (message.role !== 'tool' || typeof message.content !== 'string') {
567
+ return;
568
+ }
569
+ const original = message.content;
570
+ if (original.length <= maxLength) {
571
+ return;
572
+ }
573
+ let nextContent = original;
574
+ try {
575
+ const parsed = JSON.parse(original);
576
+ parsed.result = `[omitted ${original.length} chars due to provider limits]`;
577
+ parsed.truncated = true;
578
+ parsed.originalLength = original.length;
579
+ nextContent = JSON.stringify(parsed);
580
+ }
581
+ catch {
582
+ nextContent = `${original.slice(0, maxLength)}… [truncated ${original.length - maxLength} chars]`;
583
+ }
584
+ message.content = ensureJsonSafe(nextContent);
585
+ modified = true;
586
+ logger.debug(() => `[OpenAIProvider] Compressed tool message #${index} from ${original.length} chars to ${message.content.length} chars`);
587
+ });
588
+ return modified;
589
+ }
461
590
  /**
462
591
  * Convert IContent array to OpenAI ChatCompletionMessageParam array
463
592
  */
464
- convertToOpenAIMessages(contents) {
593
+ convertToOpenAIMessages(contents, mode = 'native', config) {
465
594
  const messages = [];
466
595
  for (const content of contents) {
467
596
  if (content.speaker === 'human') {
@@ -478,28 +607,43 @@ export class OpenAIProvider extends BaseProvider {
478
607
  else if (content.speaker === 'ai') {
479
608
  // Convert AI messages
480
609
  const textBlocks = content.blocks.filter((b) => b.type === 'text');
610
+ const text = textBlocks.map((b) => b.text).join('\n');
481
611
  const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
482
612
  if (toolCalls.length > 0) {
483
- // Assistant message with tool calls
484
- const text = textBlocks.map((b) => b.text).join('\n');
485
- messages.push({
486
- role: 'assistant',
487
- content: text || null,
488
- tool_calls: toolCalls.map((tc) => ({
489
- id: this.normalizeToOpenAIToolId(tc.id),
490
- type: 'function',
491
- function: {
492
- name: tc.name,
493
- arguments: typeof tc.parameters === 'string'
494
- ? tc.parameters
495
- : JSON.stringify(tc.parameters),
496
- },
497
- })),
498
- });
613
+ if (mode === 'textual') {
614
+ const segments = [];
615
+ if (text) {
616
+ segments.push(text);
617
+ }
618
+ for (const tc of toolCalls) {
619
+ segments.push(this.describeToolCallForText(tc));
620
+ }
621
+ const combined = segments.join('\n\n').trim();
622
+ if (combined) {
623
+ messages.push({
624
+ role: 'assistant',
625
+ content: combined,
626
+ });
627
+ }
628
+ }
629
+ else {
630
+ // Assistant message with tool calls
631
+ messages.push({
632
+ role: 'assistant',
633
+ content: text || null,
634
+ tool_calls: toolCalls.map((tc) => ({
635
+ id: this.normalizeToOpenAIToolId(tc.id),
636
+ type: 'function',
637
+ function: {
638
+ name: tc.name,
639
+ arguments: this.normalizeToolCallArguments(tc.parameters),
640
+ },
641
+ })),
642
+ });
643
+ }
499
644
  }
500
645
  else if (textBlocks.length > 0) {
501
646
  // Plain assistant message
502
- const text = textBlocks.map((b) => b.text).join('\n');
503
647
  messages.push({
504
648
  role: 'assistant',
505
649
  content: text,
@@ -509,27 +653,72 @@ export class OpenAIProvider extends BaseProvider {
509
653
  else if (content.speaker === 'tool') {
510
654
  // Convert tool responses
511
655
  const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
512
- for (const tr of toolResponses) {
513
- messages.push({
514
- role: 'tool',
515
- content: typeof tr.result === 'string'
516
- ? tr.result
517
- : JSON.stringify(tr.result),
518
- tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
519
- });
656
+ if (mode === 'textual') {
657
+ const segments = toolResponses
658
+ .map((tr) => this.describeToolResponseForText(tr, config))
659
+ .filter(Boolean);
660
+ if (segments.length > 0) {
661
+ messages.push({
662
+ role: 'user',
663
+ content: segments.join('\n\n'),
664
+ });
665
+ }
666
+ }
667
+ else {
668
+ for (const tr of toolResponses) {
669
+ messages.push({
670
+ role: 'tool',
671
+ content: this.buildToolResponseContent(tr, config),
672
+ tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
673
+ });
674
+ }
520
675
  }
521
676
  }
522
677
  }
523
678
  return messages;
524
679
  }
680
+ getContentPreview(content, maxLength = 200) {
681
+ if (content === null || content === undefined) {
682
+ return undefined;
683
+ }
684
+ if (typeof content === 'string') {
685
+ if (content.length <= maxLength) {
686
+ return content;
687
+ }
688
+ return `${content.slice(0, maxLength)}…`;
689
+ }
690
+ if (Array.isArray(content)) {
691
+ const textParts = content
692
+ .filter((part) => typeof part === 'object' && part !== null && 'type' in part)
693
+ .map((part) => part.type === 'text' && typeof part.text === 'string'
694
+ ? part.text
695
+ : JSON.stringify(part));
696
+ const joined = textParts.join('\n');
697
+ if (joined.length <= maxLength) {
698
+ return joined;
699
+ }
700
+ return `${joined.slice(0, maxLength)}…`;
701
+ }
702
+ try {
703
+ const serialized = JSON.stringify(content);
704
+ if (serialized.length <= maxLength) {
705
+ return serialized;
706
+ }
707
+ return `${serialized.slice(0, maxLength)}…`;
708
+ }
709
+ catch {
710
+ return '[unserializable content]';
711
+ }
712
+ }
525
713
  /**
526
714
  * @plan:PLAN-20251023-STATELESS-HARDENING.P08
527
715
  * @requirement:REQ-SP4-003
528
- * Internal implementation for chat completion using normalized options
716
+ * Legacy implementation for chat completion using accumulated tool calls approach
529
717
  */
530
- async *generateChatCompletionImpl(options, toolFormatter, client, logger) {
718
+ async *generateLegacyChatCompletionImpl(options, toolFormatter, client, logger) {
531
719
  const { contents, tools, metadata } = options;
532
720
  const model = options.resolved.model || this.getDefaultModel();
721
+ const toolReplayMode = this.determineToolReplayMode(model);
533
722
  const abortSignal = metadata?.abortSignal;
534
723
  const ephemeralSettings = options.invocation?.ephemerals ?? {};
535
724
  if (logger.enabled) {
@@ -546,7 +735,11 @@ export class OpenAIProvider extends BaseProvider {
546
735
  });
547
736
  }
548
737
  // Convert IContent to OpenAI messages format
549
- const messages = this.convertToOpenAIMessages(contents);
738
+ const configForMessages = options.config ?? options.runtime?.config ?? this.globalConfig;
739
+ const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
740
+ if (logger.enabled && toolReplayMode !== 'native') {
741
+ logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
742
+ }
550
743
  // Detect the tool format to use (once at the start of the method)
551
744
  const detectedFormat = this.detectToolFormat();
552
745
  // Log the detected format for debugging
@@ -581,6 +774,9 @@ export class OpenAIProvider extends BaseProvider {
581
774
  outputToolsLength: formattedTools?.length,
582
775
  outputToolNames: formattedTools?.map((t) => t.function.name),
583
776
  });
777
+ logger.debug(() => `[OpenAIProvider] Tool conversion detail`, {
778
+ tools: formattedTools,
779
+ });
584
780
  }
585
781
  // Get streaming setting from ephemeral settings (default: enabled)
586
782
  const streamingSetting = ephemeralSettings['streaming'];
@@ -602,6 +798,40 @@ export class OpenAIProvider extends BaseProvider {
602
798
  { role: 'system', content: systemPrompt },
603
799
  ...messages,
604
800
  ];
801
+ if (logger.enabled) {
802
+ logger.debug(() => `[OpenAIProvider] Chat payload snapshot`, {
803
+ messageCount: messagesWithSystem.length,
804
+ messages: messagesWithSystem.map((msg) => ({
805
+ role: msg.role,
806
+ contentPreview: this.getContentPreview(msg.content),
807
+ contentLength: typeof msg.content === 'string' ? msg.content.length : undefined,
808
+ rawContent: typeof msg.content === 'string' ? msg.content : undefined,
809
+ toolCallCount: 'tool_calls' in msg && Array.isArray(msg.tool_calls)
810
+ ? msg.tool_calls.length
811
+ : undefined,
812
+ toolCalls: 'tool_calls' in msg && Array.isArray(msg.tool_calls)
813
+ ? msg.tool_calls.map((call) => {
814
+ if (call.type === 'function') {
815
+ const args = call.function.arguments ?? '';
816
+ const preview = typeof args === 'string' &&
817
+ args.length > TOOL_ARGS_PREVIEW_LENGTH
818
+ ? `${args.slice(0, TOOL_ARGS_PREVIEW_LENGTH)}…`
819
+ : args;
820
+ return {
821
+ id: call.id,
822
+ name: call.function.name,
823
+ argumentsPreview: preview,
824
+ };
825
+ }
826
+ return { id: call.id, type: call.type };
827
+ })
828
+ : undefined,
829
+ toolCallId: 'tool_call_id' in msg
830
+ ? msg.tool_call_id
831
+ : undefined,
832
+ })),
833
+ });
834
+ }
605
835
  const maxTokens = metadata?.maxTokens ??
606
836
  ephemeralSettings['max-tokens'];
607
837
  // Build request - only include tools if they exist and are not empty
@@ -674,8 +904,10 @@ export class OpenAIProvider extends BaseProvider {
674
904
  requestHasSystemPrompt: Boolean(systemPrompt?.length),
675
905
  messageCount: messagesWithSystem.length,
676
906
  });
907
+ logger.debug(() => `[OpenAIProvider] Request body detail`, {
908
+ body: requestBody,
909
+ });
677
910
  }
678
- let response;
679
911
  // Debug log throttle tracker status
680
912
  logger.debug(() => `Retry configuration:`, {
681
913
  hasThrottleTracker: !!this.throttleTracker,
@@ -684,6 +916,11 @@ export class OpenAIProvider extends BaseProvider {
684
916
  initialDelayMs,
685
917
  });
686
918
  const customHeaders = this.getCustomHeaders();
919
+ if (logger.enabled && customHeaders) {
920
+ logger.debug(() => `[OpenAIProvider] Applying custom headers`, {
921
+ headerKeys: Object.keys(customHeaders),
922
+ });
923
+ }
687
924
  if (logger.enabled) {
688
925
  logger.debug(() => `[OpenAIProvider] Request body preview`, {
689
926
  model: requestBody.model,
@@ -693,53 +930,80 @@ export class OpenAIProvider extends BaseProvider {
693
930
  overrideKeys: requestOverrides ? Object.keys(requestOverrides) : [],
694
931
  });
695
932
  }
696
- try {
697
- response = await retryWithBackoff(() => client.chat.completions.create(requestBody, {
698
- ...(abortSignal ? { signal: abortSignal } : {}),
699
- ...(customHeaders ? { headers: customHeaders } : {}),
700
- }), {
933
+ const executeRequest = () => client.chat.completions.create(requestBody, {
934
+ ...(abortSignal ? { signal: abortSignal } : {}),
935
+ ...(customHeaders ? { headers: customHeaders } : {}),
936
+ });
937
+ let response;
938
+ if (streamingEnabled) {
939
+ response = await retryWithBackoff(executeRequest, {
701
940
  maxAttempts: maxRetries,
702
941
  initialDelayMs,
703
942
  shouldRetry: this.shouldRetryResponse.bind(this),
704
943
  trackThrottleWaitTime: this.throttleTracker,
705
944
  });
706
945
  }
707
- catch (error) {
708
- // Special handling for Cerebras/Qwen "Tool not present" errors
709
- const errorMessage = String(error);
710
- if (errorMessage.includes('Tool is not present in the tools list') &&
711
- (model.toLowerCase().includes('qwen') ||
712
- this.getBaseURL()?.includes('cerebras'))) {
713
- logger.error('Cerebras/Qwen API error: Tool not found despite being in request. This is a known API issue.', {
714
- error,
715
- model,
716
- toolsProvided: formattedTools?.length || 0,
717
- toolNames: formattedTools?.map((t) => t.function.name),
718
- streamingEnabled,
719
- });
720
- // Re-throw but with better context
721
- const enhancedError = new Error(`Cerebras/Qwen API bug: Tool not found in list. We sent ${formattedTools?.length || 0} tools. Known API issue.`);
722
- enhancedError.originalError =
723
- error;
724
- throw enhancedError;
946
+ else {
947
+ let compressedOnce = false;
948
+ while (true) {
949
+ try {
950
+ response = (await retryWithBackoff(executeRequest, {
951
+ maxAttempts: maxRetries,
952
+ initialDelayMs,
953
+ shouldRetry: this.shouldRetryResponse.bind(this),
954
+ trackThrottleWaitTime: this.throttleTracker,
955
+ }));
956
+ break;
957
+ }
958
+ catch (error) {
959
+ const errorMessage = String(error);
960
+ logger.debug(() => `[OpenAIProvider] Chat request error`, {
961
+ errorType: error?.constructor?.name,
962
+ status: typeof error === 'object' && error && 'status' in error
963
+ ? error.status
964
+ : undefined,
965
+ errorKeys: error && typeof error === 'object' ? Object.keys(error) : [],
966
+ });
967
+ const isCerebrasToolError = errorMessage.includes('Tool is not present in the tools list') &&
968
+ (model.toLowerCase().includes('qwen') ||
969
+ this.getBaseURL()?.includes('cerebras'));
970
+ if (isCerebrasToolError) {
971
+ logger.error('Cerebras/Qwen API error: Tool not found despite being in request. This is a known API issue.', {
972
+ error,
973
+ model,
974
+ toolsProvided: formattedTools?.length || 0,
975
+ toolNames: formattedTools?.map((t) => t.function.name),
976
+ streamingEnabled,
977
+ });
978
+ const enhancedError = new Error(`Cerebras/Qwen API bug: Tool not found in list. We sent ${formattedTools?.length || 0} tools. Known API issue.`);
979
+ enhancedError.originalError = error;
980
+ throw enhancedError;
981
+ }
982
+ if (!compressedOnce &&
983
+ this.shouldCompressToolMessages(error, logger) &&
984
+ this.compressToolMessages(requestBody.messages, MAX_TOOL_RESPONSE_RETRY_CHARS, logger)) {
985
+ compressedOnce = true;
986
+ logger.warn(() => `[OpenAIProvider] Retrying request after compressing tool responses due to provider 400`);
987
+ continue;
988
+ }
989
+ const capturedErrorMessage = error instanceof Error ? error.message : String(error);
990
+ const status = typeof error === 'object' &&
991
+ error !== null &&
992
+ 'status' in error &&
993
+ typeof error.status === 'number'
994
+ ? error.status
995
+ : undefined;
996
+ logger.error(() => `[OpenAIProvider] Chat completion failed for model '${model}' at '${baseURL ?? this.getBaseURL() ?? 'default'}': ${capturedErrorMessage}`, {
997
+ model,
998
+ baseURL: baseURL ?? this.getBaseURL(),
999
+ streamingEnabled,
1000
+ hasTools: formattedTools?.length ?? 0,
1001
+ requestHasSystemPrompt: !!systemPrompt,
1002
+ status,
1003
+ });
1004
+ throw error;
1005
+ }
725
1006
  }
726
- // Re-throw other errors as-is
727
- const capturedErrorMessage = error instanceof Error ? error.message : String(error);
728
- const status = typeof error === 'object' &&
729
- error !== null &&
730
- 'status' in error &&
731
- typeof error.status === 'number'
732
- ? error.status
733
- : undefined;
734
- logger.error(() => `[OpenAIProvider] Chat completion failed for model '${model}' at '${baseURL ?? this.getBaseURL() ?? 'default'}': ${capturedErrorMessage}`, {
735
- model,
736
- baseURL: baseURL ?? this.getBaseURL(),
737
- streamingEnabled,
738
- hasTools: formattedTools?.length ?? 0,
739
- requestHasSystemPrompt: !!systemPrompt,
740
- status,
741
- });
742
- throw error;
743
1007
  }
744
1008
  // Check if response is streaming or not
745
1009
  if (streamingEnabled) {
@@ -869,13 +1133,12 @@ export class OpenAIProvider extends BaseProvider {
869
1133
  };
870
1134
  }
871
1135
  }
872
- // Handle tool calls
1136
+ // Handle tool calls using legacy accumulated approach
873
1137
  const deltaToolCalls = choice.delta?.tool_calls;
874
1138
  if (deltaToolCalls && deltaToolCalls.length > 0) {
875
1139
  for (const deltaToolCall of deltaToolCalls) {
876
1140
  if (deltaToolCall.index === undefined)
877
1141
  continue;
878
- // Initialize or update accumulated tool call
879
1142
  if (!accumulatedToolCalls[deltaToolCall.index]) {
880
1143
  accumulatedToolCalls[deltaToolCall.index] = {
881
1144
  id: deltaToolCall.id || '',
@@ -905,37 +1168,16 @@ export class OpenAIProvider extends BaseProvider {
905
1168
  if (!toolCall || toolCall.type !== 'function') {
906
1169
  return;
907
1170
  }
908
- let targetIndex = index;
909
- const annotated = toolCall;
910
- if (typeof annotated.index === 'number') {
911
- targetIndex = annotated.index;
912
- }
913
- else if (toolCall.id) {
914
- const matchIndex = accumulatedToolCalls.findIndex((existing) => existing && existing.id === toolCall.id);
915
- if (matchIndex >= 0) {
916
- targetIndex = matchIndex;
917
- }
918
- }
919
- if (!accumulatedToolCalls[targetIndex]) {
920
- accumulatedToolCalls[targetIndex] = {
1171
+ if (!accumulatedToolCalls[index]) {
1172
+ accumulatedToolCalls[index] = {
921
1173
  id: toolCall.id || '',
922
1174
  type: 'function',
923
1175
  function: {
924
1176
  name: toolCall.function?.name || '',
925
- arguments: '',
1177
+ arguments: toolCall.function?.arguments || '',
926
1178
  },
927
1179
  };
928
1180
  }
929
- const target = accumulatedToolCalls[targetIndex];
930
- if (toolCall.id) {
931
- target.id = toolCall.id;
932
- }
933
- if (toolCall.function?.name) {
934
- target.function.name = toolCall.function.name;
935
- }
936
- if (toolCall.function?.arguments !== undefined) {
937
- target.function.arguments = toolCall.function.arguments ?? '';
938
- }
939
1181
  });
940
1182
  }
941
1183
  }
@@ -965,23 +1207,52 @@ export class OpenAIProvider extends BaseProvider {
965
1207
  throw error;
966
1208
  }
967
1209
  }
968
- // Flush any remaining buffered text
1210
+ // Check buffered text for <tool_call> format before flushing as plain text
969
1211
  if (textBuffer.length > 0) {
970
- yield {
971
- speaker: 'ai',
972
- blocks: [
973
- {
974
- type: 'text',
975
- text: textBuffer,
976
- },
977
- ],
978
- };
1212
+ // Try to parse <tool_call> format from buffered text
1213
+ let parsedToolCalls = [];
1214
+ let cleanedText = textBuffer;
1215
+ try {
1216
+ const parsedResult = this.textToolParser.parse(textBuffer);
1217
+ if (parsedResult.toolCalls.length > 0) {
1218
+ // Convert parsed tool calls to ToolCallBlock format
1219
+ parsedToolCalls = parsedResult.toolCalls.map((call) => ({
1220
+ type: 'tool_call',
1221
+ id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
1222
+ name: call.name,
1223
+ parameters: call.arguments,
1224
+ }));
1225
+ cleanedText = parsedResult.cleanedContent;
1226
+ }
1227
+ }
1228
+ catch (error) {
1229
+ const logger = this.getLogger();
1230
+ logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
1231
+ }
1232
+ // Emit tool calls from text parsing first
1233
+ if (parsedToolCalls.length > 0) {
1234
+ yield {
1235
+ speaker: 'ai',
1236
+ blocks: parsedToolCalls,
1237
+ };
1238
+ }
1239
+ // Then emit any remaining cleaned text
1240
+ if (cleanedText.trim().length > 0) {
1241
+ yield {
1242
+ speaker: 'ai',
1243
+ blocks: [
1244
+ {
1245
+ type: 'text',
1246
+ text: cleanedText,
1247
+ },
1248
+ ],
1249
+ };
1250
+ }
979
1251
  textBuffer = '';
980
1252
  }
981
- // Emit accumulated tool calls as IContent if any
1253
+ // Process and emit tool calls using legacy accumulated approach
982
1254
  if (accumulatedToolCalls.length > 0) {
983
1255
  const blocks = [];
984
- // Use the same detected format from earlier for consistency
985
1256
  for (const tc of accumulatedToolCalls) {
986
1257
  if (!tc)
987
1258
  continue;
@@ -1067,17 +1338,57 @@ export class OpenAIProvider extends BaseProvider {
1067
1338
  // Use the same detected format from earlier for consistency
1068
1339
  for (const toolCall of choice.message.tool_calls) {
1069
1340
  if (toolCall.type === 'function') {
1341
+ // Use tool name directly without normalization for legacy compatibility
1342
+ const toolName = toolCall.function.name || '';
1070
1343
  // Process tool parameters with double-escape handling
1071
- const processedParameters = processToolParameters(toolCall.function.arguments || '', toolCall.function.name || '', detectedFormat);
1344
+ const processedParameters = processToolParameters(toolCall.function.arguments || '', toolName, detectedFormat);
1072
1345
  blocks.push({
1073
1346
  type: 'tool_call',
1074
1347
  id: this.normalizeToHistoryToolId(toolCall.id),
1075
- name: toolCall.function.name || '',
1348
+ name: toolName,
1076
1349
  parameters: processedParameters,
1077
1350
  });
1078
1351
  }
1079
1352
  }
1080
1353
  }
1354
+ // Additionally check for <tool_call> format in text content
1355
+ if (choice.message?.content &&
1356
+ typeof choice.message.content === 'string') {
1357
+ try {
1358
+ const parsedResult = this.textToolParser.parse(choice.message.content);
1359
+ if (parsedResult.toolCalls.length > 0) {
1360
+ // Add tool calls found in text content
1361
+ for (const call of parsedResult.toolCalls) {
1362
+ blocks.push({
1363
+ type: 'tool_call',
1364
+ id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
1365
+ name: call.name,
1366
+ parameters: call.arguments,
1367
+ });
1368
+ }
1369
+ // Update the text content to remove the tool call parts
1370
+ if (choice.message.content !== parsedResult.cleanedContent) {
1371
+ // Find the text block and update it
1372
+ const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
1373
+ if (textBlockIndex >= 0) {
1374
+ blocks[textBlockIndex].text =
1375
+ parsedResult.cleanedContent;
1376
+ }
1377
+ else if (parsedResult.cleanedContent.trim()) {
1378
+ // Add cleaned text if it doesn't exist
1379
+ blocks.unshift({
1380
+ type: 'text',
1381
+ text: parsedResult.cleanedContent,
1382
+ });
1383
+ }
1384
+ }
1385
+ }
1386
+ }
1387
+ catch (error) {
1388
+ const logger = this.getLogger();
1389
+ logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
1390
+ }
1391
+ }
1081
1392
  // Emit the complete response as a single IContent
1082
1393
  if (blocks.length > 0) {
1083
1394
  const responseContent = {
@@ -1167,8 +1478,655 @@ export class OpenAIProvider extends BaseProvider {
1167
1478
  /**
1168
1479
  * @plan:PLAN-20251023-STATELESS-HARDENING.P08
1169
1480
  * @requirement:REQ-SP4-003
1170
- * Get the tool format for this provider using normalized options
1171
- * @returns The tool format to use
1481
+ * Internal implementation for chat completion using normalized options
1482
+ * Routes to appropriate implementation based on toolCallProcessingMode
1483
+ */
1484
+ async *generateChatCompletionImpl(options, toolFormatter, client, logger) {
1485
+ if (this.toolCallProcessingMode === 'legacy') {
1486
+ yield* this.generateLegacyChatCompletionImpl(options, toolFormatter, client, logger);
1487
+ }
1488
+ else {
1489
+ yield* this.generatePipelineChatCompletionImpl(options, toolFormatter, client, logger);
1490
+ }
1491
+ }
1492
+ /**
1493
+ * @plan:PLAN-20251023-STATELESS-HARDENING.P08
1494
+ * @requirement:REQ-SP4-003
1495
+ * Pipeline implementation for chat completion using optimized tool call pipeline
1496
+ */
1497
+ async *generatePipelineChatCompletionImpl(options, toolFormatter, client, logger) {
1498
+ const { contents, tools, metadata } = options;
1499
+ const model = options.resolved.model || this.getDefaultModel();
1500
+ const abortSignal = metadata?.abortSignal;
1501
+ const ephemeralSettings = options.invocation?.ephemerals ?? {};
1502
+ if (logger.enabled) {
1503
+ const resolved = options.resolved;
1504
+ logger.debug(() => `[OpenAIProvider] Resolved request context`, {
1505
+ provider: this.name,
1506
+ model,
1507
+ resolvedModel: resolved.model,
1508
+ resolvedBaseUrl: resolved.baseURL,
1509
+ authTokenPresent: Boolean(resolved.authToken),
1510
+ messageCount: contents.length,
1511
+ toolCount: tools?.length ?? 0,
1512
+ metadataKeys: Object.keys(metadata ?? {}),
1513
+ });
1514
+ }
1515
+ // Convert IContent to OpenAI messages format
1516
+ const messages = this.convertToOpenAIMessages(contents);
1517
+ // Detect the tool format to use (once at the start of the method)
1518
+ const detectedFormat = this.detectToolFormat();
1519
+ // Log the detected format for debugging
1520
+ logger.debug(() => `[OpenAIProvider] Using tool format '${detectedFormat}' for model '${model}'`, {
1521
+ model,
1522
+ detectedFormat,
1523
+ provider: this.name,
1524
+ });
1525
+ // Convert Gemini format tools to the detected format
1526
+ let formattedTools = toolFormatter.convertGeminiToFormat(tools, detectedFormat);
1527
+ // CRITICAL FIX: Ensure we never pass an empty tools array
1528
+ // The OpenAI API errors when tools=[] but a tool call is attempted
1529
+ if (Array.isArray(formattedTools) && formattedTools.length === 0) {
1530
+ logger.warn(() => `[OpenAIProvider] CRITICAL: Formatted tools is empty array! Setting to undefined to prevent API errors.`, {
1531
+ model,
1532
+ inputTools: tools,
1533
+ inputToolsLength: tools?.length,
1534
+ inputFirstGroup: tools?.[0],
1535
+ stackTrace: new Error().stack,
1536
+ });
1537
+ formattedTools = undefined;
1538
+ }
1539
+ // Debug log the conversion result - enhanced logging for intermittent issues
1540
+ if (logger.enabled && formattedTools) {
1541
+ logger.debug(() => `[OpenAIProvider] Tool conversion summary:`, {
1542
+ detectedFormat,
1543
+ inputHadTools: !!tools,
1544
+ inputToolsLength: tools?.length,
1545
+ inputFirstGroup: tools?.[0],
1546
+ inputFunctionDeclarationsLength: tools?.[0]?.functionDeclarations?.length,
1547
+ outputHasTools: !!formattedTools,
1548
+ outputToolsLength: formattedTools?.length,
1549
+ outputToolNames: formattedTools?.map((t) => t.function.name),
1550
+ });
1551
+ }
1552
+ // Get streaming setting from ephemeral settings (default: enabled)
1553
+ const streamingSetting = ephemeralSettings['streaming'];
1554
+ const streamingEnabled = streamingSetting !== 'disabled';
1555
+ // Get the system prompt
1556
+ const flattenedToolNames = tools?.flatMap((group) => group.functionDeclarations
1557
+ .map((decl) => decl.name)
1558
+ .filter((name) => !!name)) ?? [];
1559
+ const toolNamesArg = tools === undefined ? undefined : Array.from(new Set(flattenedToolNames));
1560
+ /**
1561
+ * @plan:PLAN-20251023-STATELESS-HARDENING.P08
1562
+ * @requirement:REQ-SP4-003
1563
+ * Source user memory from normalized options instead of global config
1564
+ */
1565
+ const userMemory = await resolveUserMemory(options.userMemory, () => options.invocation?.userMemory);
1566
+ const systemPrompt = await getCoreSystemPromptAsync(userMemory, model, toolNamesArg);
1567
+ // Add system prompt as the first message in the array
1568
+ const messagesWithSystem = [
1569
+ { role: 'system', content: systemPrompt },
1570
+ ...messages,
1571
+ ];
1572
+ const maxTokens = metadata?.maxTokens ??
1573
+ ephemeralSettings['max-tokens'];
1574
+ // Build request - only include tools if they exist and are not empty
1575
+ // IMPORTANT: Create a deep copy of tools to prevent mutation issues
1576
+ const requestBody = {
1577
+ model,
1578
+ messages: messagesWithSystem,
1579
+ stream: streamingEnabled,
1580
+ };
1581
+ if (formattedTools && formattedTools.length > 0) {
1582
+ requestBody.tools = JSON.parse(JSON.stringify(formattedTools));
1583
+ requestBody.tool_choice = 'auto';
1584
+ }
1585
+ /**
1586
+ * @plan:PLAN-20251023-STATELESS-HARDENING.P08
1587
+ * @requirement:REQ-SP4-002
1588
+ * Extract per-call request overrides from normalized options instead of cached state
1589
+ */
1590
+ const requestOverrides = this.extractModelParamsFromOptions(options);
1591
+ if (requestOverrides) {
1592
+ if (logger.enabled) {
1593
+ logger.debug(() => `[OpenAIProvider] Applying request overrides`, {
1594
+ overrideKeys: Object.keys(requestOverrides),
1595
+ });
1596
+ }
1597
+ Object.assign(requestBody, requestOverrides);
1598
+ }
1599
+ if (typeof maxTokens === 'number' && Number.isFinite(maxTokens)) {
1600
+ requestBody.max_tokens = maxTokens;
1601
+ }
1602
+ // Debug log request summary for Cerebras/Qwen
1603
+ const baseURL = options.resolved.baseURL ?? this.getBaseURL();
1604
+ if (logger.enabled &&
1605
+ (model.toLowerCase().includes('qwen') || baseURL?.includes('cerebras'))) {
1606
+ logger.debug(() => `Request to ${baseURL} for model ${model}:`, {
1607
+ baseURL,
1608
+ model,
1609
+ streamingEnabled,
1610
+ hasTools: 'tools' in requestBody,
1611
+ toolCount: formattedTools?.length || 0,
1612
+ messageCount: messages.length,
1613
+ toolsInRequest: 'tools' in requestBody ? requestBody.tools?.length : 'not included',
1614
+ });
1615
+ }
1616
+ // Get retry settings from ephemeral settings
1617
+ const maxRetries = ephemeralSettings['retries'] ?? 6; // Default for OpenAI
1618
+ const initialDelayMs = ephemeralSettings['retrywait'] ?? 4000; // Default for OpenAI
1619
+ // Get stream options from ephemeral settings (default: include usage for token tracking)
1620
+ const streamOptions = ephemeralSettings['stream-options'] || { include_usage: true };
1621
+ // Add stream options to request if streaming is enabled
1622
+ if (streamingEnabled && streamOptions) {
1623
+ Object.assign(requestBody, { stream_options: streamOptions });
1624
+ }
1625
+ // Log the exact tools being sent for debugging
1626
+ if (logger.enabled && 'tools' in requestBody) {
1627
+ logger.debug(() => `[OpenAIProvider] Exact tools being sent to API:`, {
1628
+ toolCount: requestBody.tools?.length,
1629
+ toolNames: requestBody.tools?.map((t) => 'function' in t ? t.function?.name : undefined),
1630
+ firstTool: requestBody.tools?.[0],
1631
+ });
1632
+ }
1633
+ // Wrap the API call with retry logic using centralized retry utility
1634
+ if (logger.enabled) {
1635
+ logger.debug(() => `[OpenAIProvider] Sending chat request`, {
1636
+ model,
1637
+ baseURL: baseURL ?? this.getBaseURL(),
1638
+ streamingEnabled,
1639
+ toolCount: formattedTools?.length ?? 0,
1640
+ hasAuthToken: Boolean(options.resolved.authToken),
1641
+ requestHasSystemPrompt: Boolean(systemPrompt?.length),
1642
+ messageCount: messagesWithSystem.length,
1643
+ });
1644
+ }
1645
+ let response;
1646
+ // Debug log throttle tracker status
1647
+ logger.debug(() => `Retry configuration:`, {
1648
+ hasThrottleTracker: !!this.throttleTracker,
1649
+ throttleTrackerType: typeof this.throttleTracker,
1650
+ maxRetries,
1651
+ initialDelayMs,
1652
+ });
1653
+ const customHeaders = this.getCustomHeaders();
1654
+ if (logger.enabled) {
1655
+ logger.debug(() => `[OpenAIProvider] Request body preview`, {
1656
+ model: requestBody.model,
1657
+ hasStop: 'stop' in requestBody,
1658
+ hasMaxTokens: 'max_tokens' in requestBody,
1659
+ hasResponseFormat: 'response_format' in requestBody,
1660
+ overrideKeys: requestOverrides ? Object.keys(requestOverrides) : [],
1661
+ });
1662
+ }
1663
+ try {
1664
+ response = await retryWithBackoff(() => client.chat.completions.create(requestBody, {
1665
+ ...(abortSignal ? { signal: abortSignal } : {}),
1666
+ ...(customHeaders ? { headers: customHeaders } : {}),
1667
+ }), {
1668
+ maxAttempts: maxRetries,
1669
+ initialDelayMs,
1670
+ shouldRetry: this.shouldRetryResponse.bind(this),
1671
+ trackThrottleWaitTime: this.throttleTracker,
1672
+ });
1673
+ }
1674
+ catch (error) {
1675
+ // Special handling for Cerebras/Qwen "Tool not present" errors
1676
+ const errorMessage = String(error);
1677
+ if (errorMessage.includes('Tool is not present in the tools list') &&
1678
+ (model.toLowerCase().includes('qwen') ||
1679
+ this.getBaseURL()?.includes('cerebras'))) {
1680
+ logger.error('Cerebras/Qwen API error: Tool not found despite being in request. This is a known API issue.', {
1681
+ error,
1682
+ model,
1683
+ toolsProvided: formattedTools?.length || 0,
1684
+ toolNames: formattedTools?.map((t) => t.function.name),
1685
+ streamingEnabled,
1686
+ });
1687
+ // Re-throw but with better context
1688
+ const enhancedError = new Error(`Cerebras/Qwen API bug: Tool not found in list. We sent ${formattedTools?.length || 0} tools. Known API issue.`);
1689
+ enhancedError.originalError =
1690
+ error;
1691
+ throw enhancedError;
1692
+ }
1693
+ // Re-throw other errors as-is
1694
+ const capturedErrorMessage = error instanceof Error ? error.message : String(error);
1695
+ const status = typeof error === 'object' &&
1696
+ error !== null &&
1697
+ 'status' in error &&
1698
+ typeof error.status === 'number'
1699
+ ? error.status
1700
+ : undefined;
1701
+ logger.error(() => `[OpenAIProvider] Chat completion failed for model '${model}' at '${baseURL ?? this.getBaseURL() ?? 'default'}': ${capturedErrorMessage}`, {
1702
+ model,
1703
+ baseURL: baseURL ?? this.getBaseURL(),
1704
+ streamingEnabled,
1705
+ hasTools: formattedTools?.length ?? 0,
1706
+ requestHasSystemPrompt: !!systemPrompt,
1707
+ status,
1708
+ });
1709
+ throw error;
1710
+ }
1711
+ // Check if response is streaming or not
1712
+ if (streamingEnabled) {
1713
+ // Process streaming response
1714
+ let _accumulatedText = '';
1715
+ // Initialize tool call pipeline for this streaming session
1716
+ this.toolCallPipeline.reset();
1717
+ // Buffer for accumulating text chunks for providers that need it
1718
+ let textBuffer = '';
1719
+ // Use the same detected format from earlier for consistency
1720
+ // Buffer text for Qwen format providers to avoid stanza formatting
1721
+ const shouldBufferText = detectedFormat === 'qwen';
1722
+ // Track token usage from streaming chunks
1723
+ let streamingUsage = null;
1724
+ const allChunks = []; // Collect all chunks first
1725
+ try {
1726
+ // Handle streaming response - collect all chunks
1727
+ for await (const chunk of response) {
1728
+ if (abortSignal?.aborted) {
1729
+ break;
1730
+ }
1731
+ allChunks.push(chunk);
1732
+ }
1733
+ // Now process all collected chunks
1734
+ for (const chunk of allChunks) {
1735
+ const chunkRecord = chunk;
1736
+ let parsedData;
1737
+ const rawData = chunkRecord?.data;
1738
+ if (typeof rawData === 'string') {
1739
+ try {
1740
+ parsedData = JSON.parse(rawData);
1741
+ }
1742
+ catch {
1743
+ parsedData = undefined;
1744
+ }
1745
+ }
1746
+ else if (rawData && typeof rawData === 'object') {
1747
+ parsedData = rawData;
1748
+ }
1749
+ const streamingError = chunkRecord?.error ??
1750
+ parsedData?.error ??
1751
+ parsedData?.data?.error;
1752
+ const streamingEvent = (chunkRecord?.event ?? parsedData?.event);
1753
+ const streamingErrorMessage = streamingError?.message ??
1754
+ streamingError?.error ??
1755
+ parsedData?.message;
1756
+ if (streamingEvent === 'error' ||
1757
+ (streamingError && typeof streamingError === 'object')) {
1758
+ const errorMessage = streamingErrorMessage ??
1759
+ (typeof streamingError === 'string'
1760
+ ? streamingError
1761
+ : 'Streaming response reported an error.');
1762
+ throw new Error(errorMessage);
1763
+ }
1764
+ // Extract usage information if present (typically in final chunk)
1765
+ if (chunk.usage) {
1766
+ streamingUsage = chunk.usage;
1767
+ }
1768
+ const choice = chunk.choices?.[0];
1769
+ if (!choice)
1770
+ continue;
1771
+ // Check for finish_reason to detect proper stream ending
1772
+ if (choice.finish_reason) {
1773
+ logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
1774
+ model,
1775
+ finishReason: choice.finish_reason,
1776
+ hasAccumulatedText: _accumulatedText.length > 0,
1777
+ hasAccumulatedTools: this.toolCallPipeline.getStats().collector.totalCalls > 0,
1778
+ hasBufferedText: textBuffer.length > 0,
1779
+ });
1780
+ // If finish_reason is 'length', the response was cut off
1781
+ if (choice.finish_reason === 'length') {
1782
+ logger.debug(() => `Response truncated due to length limit for model ${model}`);
1783
+ }
1784
+ // Flush any buffered text when stream finishes
1785
+ if (textBuffer.length > 0) {
1786
+ yield {
1787
+ speaker: 'ai',
1788
+ blocks: [
1789
+ {
1790
+ type: 'text',
1791
+ text: textBuffer,
1792
+ },
1793
+ ],
1794
+ };
1795
+ textBuffer = '';
1796
+ }
1797
+ }
1798
+ // Handle text content - buffer for Qwen format, emit immediately for others
1799
+ const deltaContent = choice.delta?.content;
1800
+ if (deltaContent) {
1801
+ _accumulatedText += deltaContent;
1802
+ // Debug log for providers that need buffering
1803
+ if (shouldBufferText) {
1804
+ logger.debug(() => `[Streaming] Chunk content for ${detectedFormat} format:`, {
1805
+ deltaContent,
1806
+ length: deltaContent.length,
1807
+ hasNewline: deltaContent.includes('\n'),
1808
+ escaped: JSON.stringify(deltaContent),
1809
+ bufferSize: textBuffer.length,
1810
+ });
1811
+ // Buffer text to avoid stanza formatting
1812
+ textBuffer += deltaContent;
1813
+ // Emit buffered text when we have a complete sentence or paragraph
1814
+ // Look for natural break points
1815
+ if (textBuffer.includes('\n') ||
1816
+ textBuffer.endsWith('. ') ||
1817
+ textBuffer.endsWith('! ') ||
1818
+ textBuffer.endsWith('? ') ||
1819
+ textBuffer.length > 100) {
1820
+ yield {
1821
+ speaker: 'ai',
1822
+ blocks: [
1823
+ {
1824
+ type: 'text',
1825
+ text: textBuffer,
1826
+ },
1827
+ ],
1828
+ };
1829
+ textBuffer = '';
1830
+ }
1831
+ }
1832
+ else {
1833
+ // For other providers, emit text immediately as before
1834
+ yield {
1835
+ speaker: 'ai',
1836
+ blocks: [
1837
+ {
1838
+ type: 'text',
1839
+ text: deltaContent,
1840
+ },
1841
+ ],
1842
+ };
1843
+ }
1844
+ }
1845
+ // Handle tool calls using the new pipeline
1846
+ const deltaToolCalls = choice.delta?.tool_calls;
1847
+ if (deltaToolCalls && deltaToolCalls.length > 0) {
1848
+ for (const deltaToolCall of deltaToolCalls) {
1849
+ if (deltaToolCall.index === undefined)
1850
+ continue;
1851
+ // Add fragment to pipeline instead of accumulating strings
1852
+ this.toolCallPipeline.addFragment(deltaToolCall.index, {
1853
+ name: deltaToolCall.function?.name,
1854
+ args: deltaToolCall.function?.arguments,
1855
+ });
1856
+ }
1857
+ }
1858
+ const choiceMessage = choice.message;
1859
+ const messageToolCalls = choiceMessage?.tool_calls;
1860
+ if (messageToolCalls && messageToolCalls.length > 0) {
1861
+ messageToolCalls.forEach((toolCall, index) => {
1862
+ if (!toolCall || toolCall.type !== 'function') {
1863
+ return;
1864
+ }
1865
+ // Add final complete tool call to pipeline
1866
+ this.toolCallPipeline.addFragment(index, {
1867
+ name: toolCall.function?.name,
1868
+ args: toolCall.function?.arguments,
1869
+ });
1870
+ });
1871
+ }
1872
+ }
1873
+ }
1874
+ catch (error) {
1875
+ if (abortSignal?.aborted) {
1876
+ throw error;
1877
+ }
1878
+ else {
1879
+ // Special handling for Cerebras/Qwen "Tool not present" errors
1880
+ const errorMessage = String(error);
1881
+ if (errorMessage.includes('Tool is not present in the tools list') &&
1882
+ (model.toLowerCase().includes('qwen') ||
1883
+ this.getBaseURL()?.includes('cerebras'))) {
1884
+ logger.error('Cerebras/Qwen API error: Tool not found despite being in request. This is a known API issue.', {
1885
+ error,
1886
+ model,
1887
+ toolsProvided: formattedTools?.length || 0,
1888
+ toolNames: formattedTools?.map((t) => t.function.name),
1889
+ streamingEnabled,
1890
+ });
1891
+ // Re-throw but with better context
1892
+ const enhancedError = new Error(`Cerebras/Qwen API bug: Tool not found in list during streaming. We sent ${formattedTools?.length || 0} tools. Known API issue.`);
1893
+ enhancedError.originalError = error;
1894
+ throw enhancedError;
1895
+ }
1896
+ logger.error('Error processing streaming response:', error);
1897
+ throw error;
1898
+ }
1899
+ }
1900
+ // Check buffered text for <tool_call> format before flushing as plain text
1901
+ if (textBuffer.length > 0) {
1902
+ // Try to parse <tool_call> format from buffered text
1903
+ let parsedToolCalls = [];
1904
+ let cleanedText = textBuffer;
1905
+ try {
1906
+ const parsedResult = this.textToolParser.parse(textBuffer);
1907
+ if (parsedResult.toolCalls.length > 0) {
1908
+ // Convert parsed tool calls to ToolCallBlock format
1909
+ parsedToolCalls = parsedResult.toolCalls.map((call) => ({
1910
+ type: 'tool_call',
1911
+ id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
1912
+ name: call.name,
1913
+ parameters: call.arguments,
1914
+ }));
1915
+ cleanedText = parsedResult.cleanedContent;
1916
+ }
1917
+ }
1918
+ catch (error) {
1919
+ const logger = this.getLogger();
1920
+ logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
1921
+ }
1922
+ // Emit tool calls from text parsing first
1923
+ if (parsedToolCalls.length > 0) {
1924
+ yield {
1925
+ speaker: 'ai',
1926
+ blocks: parsedToolCalls,
1927
+ };
1928
+ }
1929
+ // Then emit any remaining cleaned text
1930
+ if (cleanedText.trim().length > 0) {
1931
+ yield {
1932
+ speaker: 'ai',
1933
+ blocks: [
1934
+ {
1935
+ type: 'text',
1936
+ text: cleanedText,
1937
+ },
1938
+ ],
1939
+ };
1940
+ }
1941
+ textBuffer = '';
1942
+ }
1943
+ // Process and emit tool calls using the pipeline
1944
+ const pipelineResult = await this.toolCallPipeline.process();
1945
+ if (pipelineResult.executed.length > 0 ||
1946
+ pipelineResult.failed.length > 0) {
1947
+ const blocks = [];
1948
+ // Process successful tool calls
1949
+ for (const normalizedCall of pipelineResult.normalized) {
1950
+ // Process tool parameters with double-escape handling
1951
+ const processedParameters = processToolParameters(normalizedCall.originalArgs || JSON.stringify(normalizedCall.args), normalizedCall.name, detectedFormat);
1952
+ blocks.push({
1953
+ type: 'tool_call',
1954
+ id: this.normalizeToHistoryToolId(`call_${normalizedCall.index}`),
1955
+ name: normalizedCall.name,
1956
+ parameters: processedParameters,
1957
+ });
1958
+ }
1959
+ // Handle failed tool calls (could emit as errors or warnings)
1960
+ for (const failed of pipelineResult.failed) {
1961
+ this.getLogger().warn(`Tool call validation failed for index ${failed.index}: ${failed.validationErrors.join(', ')}`);
1962
+ }
1963
+ if (blocks.length > 0) {
1964
+ const toolCallsContent = {
1965
+ speaker: 'ai',
1966
+ blocks,
1967
+ };
1968
+ // Add usage metadata if we captured it from streaming
1969
+ if (streamingUsage) {
1970
+ toolCallsContent.metadata = {
1971
+ usage: {
1972
+ promptTokens: streamingUsage.prompt_tokens || 0,
1973
+ completionTokens: streamingUsage.completion_tokens || 0,
1974
+ totalTokens: streamingUsage.total_tokens ||
1975
+ (streamingUsage.prompt_tokens || 0) +
1976
+ (streamingUsage.completion_tokens || 0),
1977
+ },
1978
+ };
1979
+ }
1980
+ yield toolCallsContent;
1981
+ }
1982
+ }
1983
+ // If we have usage information but no tool calls, emit a metadata-only response
1984
+ if (streamingUsage &&
1985
+ this.toolCallPipeline.getStats().collector.totalCalls === 0) {
1986
+ yield {
1987
+ speaker: 'ai',
1988
+ blocks: [],
1989
+ metadata: {
1990
+ usage: {
1991
+ promptTokens: streamingUsage.prompt_tokens || 0,
1992
+ completionTokens: streamingUsage.completion_tokens || 0,
1993
+ totalTokens: streamingUsage.total_tokens ||
1994
+ (streamingUsage.prompt_tokens || 0) +
1995
+ (streamingUsage.completion_tokens || 0),
1996
+ },
1997
+ },
1998
+ };
1999
+ }
2000
+ }
2001
+ else {
2002
+ // Handle non-streaming response
2003
+ const completion = response;
2004
+ const choice = completion.choices?.[0];
2005
+ if (!choice) {
2006
+ throw new Error('No choices in completion response');
2007
+ }
2008
+ // Log finish reason for debugging Qwen issues
2009
+ if (choice.finish_reason) {
2010
+ logger.debug(() => `[Non-streaming] Response finish_reason: ${choice.finish_reason}`, {
2011
+ model,
2012
+ finishReason: choice.finish_reason,
2013
+ hasContent: !!choice.message?.content,
2014
+ hasToolCalls: !!(choice.message?.tool_calls && choice.message.tool_calls.length > 0),
2015
+ contentLength: choice.message?.content?.length || 0,
2016
+ toolCallCount: choice.message?.tool_calls?.length || 0,
2017
+ detectedFormat,
2018
+ });
2019
+ // Warn if the response was truncated
2020
+ if (choice.finish_reason === 'length') {
2021
+ logger.warn(() => `Response truncated due to max_tokens limit for model ${model}. Consider increasing max_tokens.`);
2022
+ }
2023
+ }
2024
+ const blocks = [];
2025
+ // Handle text content
2026
+ if (choice.message?.content) {
2027
+ blocks.push({
2028
+ type: 'text',
2029
+ text: choice.message.content,
2030
+ });
2031
+ }
2032
+ // Handle tool calls
2033
+ if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
2034
+ // Use the same detected format from earlier for consistency
2035
+ for (const toolCall of choice.message.tool_calls) {
2036
+ if (toolCall.type === 'function') {
2037
+ // Normalize tool name for consistency with streaming path
2038
+ const normalizedName = this.toolCallPipeline.normalizeToolName(toolCall.function.name, toolCall.function.arguments);
2039
+ // Process tool parameters with double-escape handling
2040
+ const processedParameters = processToolParameters(toolCall.function.arguments || '', normalizedName, detectedFormat);
2041
+ blocks.push({
2042
+ type: 'tool_call',
2043
+ id: this.normalizeToHistoryToolId(toolCall.id),
2044
+ name: normalizedName,
2045
+ parameters: processedParameters,
2046
+ });
2047
+ }
2048
+ }
2049
+ }
2050
+ // Additionally check for <tool_call> format in text content
2051
+ if (choice.message?.content &&
2052
+ typeof choice.message.content === 'string') {
2053
+ try {
2054
+ const parsedResult = this.textToolParser.parse(choice.message.content);
2055
+ if (parsedResult.toolCalls.length > 0) {
2056
+ // Add tool calls found in text content
2057
+ for (const call of parsedResult.toolCalls) {
2058
+ blocks.push({
2059
+ type: 'tool_call',
2060
+ id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
2061
+ name: call.name,
2062
+ parameters: call.arguments,
2063
+ });
2064
+ }
2065
+ // Update the text content to remove the tool call parts
2066
+ if (choice.message.content !== parsedResult.cleanedContent) {
2067
+ // Find the text block and update it
2068
+ const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
2069
+ if (textBlockIndex >= 0) {
2070
+ blocks[textBlockIndex].text =
2071
+ parsedResult.cleanedContent;
2072
+ }
2073
+ else if (parsedResult.cleanedContent.trim()) {
2074
+ // Add cleaned text if it doesn't exist
2075
+ blocks.unshift({
2076
+ type: 'text',
2077
+ text: parsedResult.cleanedContent,
2078
+ });
2079
+ }
2080
+ }
2081
+ }
2082
+ }
2083
+ catch (error) {
2084
+ const logger = this.getLogger();
2085
+ logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
2086
+ }
2087
+ }
2088
+ // Emit the complete response as a single IContent
2089
+ if (blocks.length > 0) {
2090
+ const responseContent = {
2091
+ speaker: 'ai',
2092
+ blocks,
2093
+ };
2094
+ // Add usage metadata from non-streaming response
2095
+ if (completion.usage) {
2096
+ responseContent.metadata = {
2097
+ usage: {
2098
+ promptTokens: completion.usage.prompt_tokens || 0,
2099
+ completionTokens: completion.usage.completion_tokens || 0,
2100
+ totalTokens: completion.usage.total_tokens ||
2101
+ (completion.usage.prompt_tokens || 0) +
2102
+ (completion.usage.completion_tokens || 0),
2103
+ },
2104
+ };
2105
+ }
2106
+ yield responseContent;
2107
+ }
2108
+ else if (completion.usage) {
2109
+ // Emit metadata-only response if no content blocks but have usage info
2110
+ yield {
2111
+ speaker: 'ai',
2112
+ blocks: [],
2113
+ metadata: {
2114
+ usage: {
2115
+ promptTokens: completion.usage.prompt_tokens || 0,
2116
+ completionTokens: completion.usage.completion_tokens || 0,
2117
+ totalTokens: completion.usage.total_tokens ||
2118
+ (completion.usage.prompt_tokens || 0) +
2119
+ (completion.usage.completion_tokens || 0),
2120
+ },
2121
+ },
2122
+ };
2123
+ }
2124
+ }
2125
+ }
2126
+ /**
2127
+ * @plan:PLAN-20251023-STATELESS-HARDENING.P08
2128
+ * @requirement:REQ-SP4-003
2129
+ * Legacy implementation for chat completion using accumulated tool calls approach
1172
2130
  */
1173
2131
  getToolFormat() {
1174
2132
  const format = this.detectToolFormat();
@@ -1254,6 +2212,9 @@ export class OpenAIProvider extends BaseProvider {
1254
2212
  status,
1255
2213
  errorMessage: error instanceof Error ? error.message : String(error),
1256
2214
  errorKeys: error && typeof error === 'object' ? Object.keys(error) : [],
2215
+ errorData: error && typeof error === 'object' && 'error' in error
2216
+ ? error.error
2217
+ : undefined,
1257
2218
  });
1258
2219
  // Retry on 429 rate limit errors or 5xx server errors
1259
2220
  const shouldRetry = Boolean(status === 429 || status === 503 || status === 504);