@vybestack/llxprt-code-core 0.1.23-nightly.250904.97906524 → 0.1.23-nightly.250905.67589d14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/src/adapters/IStreamAdapter.d.ts +3 -3
  2. package/dist/src/auth/types.d.ts +4 -4
  3. package/dist/src/config/index.d.ts +7 -0
  4. package/dist/src/config/index.js +8 -0
  5. package/dist/src/config/index.js.map +1 -0
  6. package/dist/src/core/client.d.ts +9 -21
  7. package/dist/src/core/client.js +46 -144
  8. package/dist/src/core/client.js.map +1 -1
  9. package/dist/src/core/compression-config.d.ts +1 -1
  10. package/dist/src/core/compression-config.js +4 -5
  11. package/dist/src/core/compression-config.js.map +1 -1
  12. package/dist/src/core/coreToolScheduler.js +50 -15
  13. package/dist/src/core/coreToolScheduler.js.map +1 -1
  14. package/dist/src/core/geminiChat.d.ts +51 -2
  15. package/dist/src/core/geminiChat.js +592 -93
  16. package/dist/src/core/geminiChat.js.map +1 -1
  17. package/dist/src/core/nonInteractiveToolExecutor.js +70 -19
  18. package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
  19. package/dist/src/index.d.ts +1 -2
  20. package/dist/src/index.js +2 -2
  21. package/dist/src/index.js.map +1 -1
  22. package/dist/src/providers/BaseProvider.d.ts +8 -3
  23. package/dist/src/providers/BaseProvider.js.map +1 -1
  24. package/dist/src/providers/IProvider.d.ts +9 -3
  25. package/dist/src/providers/LoggingProviderWrapper.d.ts +10 -3
  26. package/dist/src/providers/LoggingProviderWrapper.js +33 -27
  27. package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
  28. package/dist/src/providers/ProviderContentGenerator.d.ts +2 -2
  29. package/dist/src/providers/ProviderContentGenerator.js +9 -6
  30. package/dist/src/providers/ProviderContentGenerator.js.map +1 -1
  31. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +12 -17
  32. package/dist/src/providers/anthropic/AnthropicProvider.js +238 -447
  33. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  34. package/dist/src/providers/gemini/GeminiProvider.d.ts +12 -6
  35. package/dist/src/providers/gemini/GeminiProvider.js +184 -458
  36. package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
  37. package/dist/src/providers/openai/ConversationCache.d.ts +3 -3
  38. package/dist/src/providers/openai/IChatGenerateParams.d.ts +9 -4
  39. package/dist/src/providers/openai/OpenAIProvider.d.ts +14 -61
  40. package/dist/src/providers/openai/OpenAIProvider.js +270 -575
  41. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  42. package/dist/src/providers/openai/buildResponsesRequest.d.ts +3 -3
  43. package/dist/src/providers/openai/buildResponsesRequest.js +67 -37
  44. package/dist/src/providers/openai/buildResponsesRequest.js.map +1 -1
  45. package/dist/src/providers/openai/estimateRemoteTokens.d.ts +2 -2
  46. package/dist/src/providers/openai/estimateRemoteTokens.js +21 -8
  47. package/dist/src/providers/openai/estimateRemoteTokens.js.map +1 -1
  48. package/dist/src/providers/openai/parseResponsesStream.d.ts +6 -2
  49. package/dist/src/providers/openai/parseResponsesStream.js +99 -391
  50. package/dist/src/providers/openai/parseResponsesStream.js.map +1 -1
  51. package/dist/src/providers/openai/syntheticToolResponses.d.ts +5 -5
  52. package/dist/src/providers/openai/syntheticToolResponses.js +102 -91
  53. package/dist/src/providers/openai/syntheticToolResponses.js.map +1 -1
  54. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +16 -17
  55. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +222 -224
  56. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -1
  57. package/dist/src/providers/types.d.ts +1 -1
  58. package/dist/src/services/history/ContentConverters.d.ts +6 -1
  59. package/dist/src/services/history/ContentConverters.js +155 -18
  60. package/dist/src/services/history/ContentConverters.js.map +1 -1
  61. package/dist/src/services/history/HistoryService.d.ts +52 -0
  62. package/dist/src/services/history/HistoryService.js +245 -93
  63. package/dist/src/services/history/HistoryService.js.map +1 -1
  64. package/dist/src/services/history/IContent.d.ts +4 -0
  65. package/dist/src/services/history/IContent.js.map +1 -1
  66. package/dist/src/telemetry/types.d.ts +16 -4
  67. package/dist/src/telemetry/types.js.map +1 -1
  68. package/dist/src/tools/IToolFormatter.d.ts +2 -2
  69. package/dist/src/tools/ToolFormatter.d.ts +3 -3
  70. package/dist/src/tools/ToolFormatter.js +80 -37
  71. package/dist/src/tools/ToolFormatter.js.map +1 -1
  72. package/dist/src/tools/todo-schemas.d.ts +4 -4
  73. package/package.json +8 -7
  74. package/dist/src/core/ContentGeneratorAdapter.d.ts +0 -37
  75. package/dist/src/core/ContentGeneratorAdapter.js +0 -58
  76. package/dist/src/core/ContentGeneratorAdapter.js.map +0 -1
  77. package/dist/src/providers/IMessage.d.ts +0 -38
  78. package/dist/src/providers/IMessage.js +0 -17
  79. package/dist/src/providers/IMessage.js.map +0 -1
  80. package/dist/src/providers/adapters/GeminiCompatibleWrapper.d.ts +0 -69
  81. package/dist/src/providers/adapters/GeminiCompatibleWrapper.js +0 -577
  82. package/dist/src/providers/adapters/GeminiCompatibleWrapper.js.map +0 -1
@@ -17,6 +17,9 @@ import { ApiErrorEvent, ApiRequestEvent, ApiResponseEvent, } from '../telemetry/
17
17
  import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
18
18
  import { hasCycleInSchema } from '../tools/tools.js';
19
19
  import { isStructuredError } from '../utils/quotaErrorDetection.js';
20
+ import { DebugLogger } from '../debug/index.js';
21
+ import { getCompressionPrompt } from './prompts.js';
22
+ import { COMPRESSION_TOKEN_THRESHOLD, COMPRESSION_PRESERVE_THRESHOLD, } from './compression-config.js';
20
23
  /**
21
24
  * Custom createUserContent function that properly handles function response arrays.
22
25
  * This fixes the issue where multiple function responses are incorrectly nested.
@@ -56,9 +59,6 @@ function createUserContentWithFunctionResponseFix(message) {
56
59
  }
57
60
  else if (Array.isArray(item)) {
58
61
  // Nested array case - flatten it
59
- if (process.env.DEBUG) {
60
- console.log('[DEBUG] createUserContentWithFunctionResponseFix - flattening nested array:', JSON.stringify(item, null, 2));
61
- }
62
62
  for (const subItem of item) {
63
63
  parts.push(subItem);
64
64
  }
@@ -184,27 +184,59 @@ export class EmptyStreamError extends Error {
184
184
  */
185
185
  export class GeminiChat {
186
186
  config;
187
- contentGenerator;
188
187
  generationConfig;
189
188
  // A promise to represent the current state of the message being sent to the
190
189
  // model.
191
190
  sendPromise = Promise.resolve();
191
+ // A promise to represent any ongoing compression operation
192
+ compressionPromise = null;
192
193
  historyService;
194
+ logger = new DebugLogger('llxprt:gemini:chat');
195
+ // Cache the compression threshold to avoid recalculating
196
+ cachedCompressionThreshold = null;
193
197
  constructor(config, contentGenerator, generationConfig = {}, initialHistory = [], historyService) {
194
198
  this.config = config;
195
- this.contentGenerator = contentGenerator;
196
199
  this.generationConfig = generationConfig;
197
200
  validateHistory(initialHistory);
198
201
  // Use provided HistoryService or create a new one
199
202
  this.historyService = historyService || new HistoryService();
203
+ this.logger.debug('GeminiChat initialized:', {
204
+ model: this.config.getModel(),
205
+ initialHistoryLength: initialHistory.length,
206
+ hasHistoryService: !!historyService,
207
+ });
200
208
  // Convert and add initial history if provided
201
209
  if (initialHistory.length > 0) {
202
210
  const currentModel = this.config.getModel();
211
+ this.logger.debug('Adding initial history to service:', {
212
+ count: initialHistory.length,
213
+ });
214
+ const idGen = this.historyService.getIdGeneratorCallback();
203
215
  for (const content of initialHistory) {
204
- this.historyService.add(ContentConverters.toIContent(content), currentModel);
216
+ const matcher = this.makePositionMatcher();
217
+ this.historyService.add(ContentConverters.toIContent(content, idGen, matcher), currentModel);
205
218
  }
206
219
  }
207
220
  }
221
+ /**
222
+ * Create a position-based matcher for Gemini tool responses.
223
+ * It returns the next unmatched tool call from the current history.
224
+ */
225
+ makePositionMatcher() {
226
+ const queue = this.historyService
227
+ .findUnmatchedToolCalls()
228
+ .map((b) => ({ historyId: b.id, toolName: b.name }));
229
+ // Return undefined if there are no unmatched tool calls
230
+ if (queue.length === 0) {
231
+ return undefined;
232
+ }
233
+ // Return a function that always returns a valid value (never undefined)
234
+ return () => {
235
+ const result = queue.shift();
236
+ // If queue is empty, return a fallback value
237
+ return result || { historyId: '', toolName: undefined };
238
+ };
239
+ }
208
240
  _getRequestTextFromContents(contents) {
209
241
  return JSON.stringify(contents);
210
242
  }
@@ -288,28 +320,62 @@ export class GeminiChat {
288
320
  */
289
321
  async sendMessage(params, prompt_id) {
290
322
  await this.sendPromise;
323
+ // Check compression - first check if already compressing, then check if needed
324
+ if (this.compressionPromise) {
325
+ this.logger.debug('Waiting for ongoing compression to complete');
326
+ await this.compressionPromise;
327
+ }
328
+ else if (this.shouldCompress()) {
329
+ // Only check shouldCompress if not already compressing
330
+ this.logger.debug('Triggering compression before message send');
331
+ this.compressionPromise = this.performCompression(prompt_id);
332
+ await this.compressionPromise;
333
+ this.compressionPromise = null;
334
+ }
291
335
  const userContent = createUserContentWithFunctionResponseFix(params.message);
292
- // Add user content to history service
293
- this.historyService.add(ContentConverters.toIContent(userContent), this.config.getModel());
294
- // Get curated history and convert to Content[] for the request
295
- const iContents = this.historyService.getCurated();
296
- const requestContents = ContentConverters.toGeminiContents(iContents);
297
- this._logApiRequest(requestContents, this.config.getModel(), prompt_id);
336
+ // DO NOT add user content to history yet - use send-then-commit pattern
337
+ // Get the active provider
338
+ const provider = this.getActiveProvider();
339
+ if (!provider) {
340
+ throw new Error('No active provider configured');
341
+ }
342
+ // Check if provider supports IContent interface
343
+ if (!this.providerSupportsIContent(provider)) {
344
+ throw new Error(`Provider ${provider.name} does not support IContent interface`);
345
+ }
346
+ // Get curated history WITHOUT the new user message
347
+ const currentHistory = this.historyService.getCuratedForProvider();
348
+ // Convert user content to IContent
349
+ const idGen = this.historyService.getIdGeneratorCallback();
350
+ const matcher = this.makePositionMatcher();
351
+ const userIContent = ContentConverters.toIContent(userContent, idGen, matcher);
352
+ // Build request with history + new message
353
+ const iContents = [...currentHistory, userIContent];
354
+ this._logApiRequest(ContentConverters.toGeminiContents(iContents), this.config.getModel(), prompt_id);
298
355
  const startTime = Date.now();
299
356
  let response;
300
357
  try {
301
- const apiCall = () => {
358
+ const apiCall = async () => {
302
359
  const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
303
360
  // Prevent Flash model calls immediately after quota error
304
361
  if (this.config.getQuotaErrorOccurred() &&
305
362
  modelToUse === DEFAULT_GEMINI_FLASH_MODEL) {
306
363
  throw new Error('Please submit a new query to continue with the Flash model.');
307
364
  }
308
- return this.contentGenerator.generateContent({
309
- model: modelToUse,
310
- contents: requestContents,
311
- config: { ...this.generationConfig, ...params.config },
312
- }, prompt_id);
365
+ // Get tools in the format the provider expects
366
+ const tools = this.generationConfig.tools;
367
+ // Call the provider directly with IContent
368
+ const streamResponse = provider.generateChatCompletion(iContents, tools);
369
+ // Collect all chunks from the stream
370
+ let lastResponse;
371
+ for await (const iContent of streamResponse) {
372
+ lastResponse = iContent;
373
+ }
374
+ if (!lastResponse) {
375
+ throw new Error('No response from provider');
376
+ }
377
+ // Convert the final IContent to GenerateContentResponse
378
+ return this.convertIContentToResponse(lastResponse);
313
379
  };
314
380
  response = await retryWithBackoff(apiCall, {
315
381
  shouldRetry: (error) => {
@@ -331,51 +397,46 @@ export class GeminiChat {
331
397
  await this._logApiResponse(durationMs, prompt_id, response.usageMetadata, JSON.stringify(response));
332
398
  this.sendPromise = (async () => {
333
399
  const outputContent = response.candidates?.[0]?.content;
334
- // Because the AFC input contains the entire curated chat history in
335
- // addition to the new user input, we need to truncate the AFC history
336
- // to deduplicate the existing chat history.
400
+ // Send-then-commit: Now that we have a successful response, add both user and model messages
401
+ const currentModel = this.config.getModel();
402
+ // Handle AFC history or regular history
337
403
  const fullAutomaticFunctionCallingHistory = response.automaticFunctionCallingHistory;
338
- const curatedHistory = this.historyService.getCurated();
339
- const index = ContentConverters.toGeminiContents(curatedHistory).length;
340
- let automaticFunctionCallingHistory = [];
341
- if (fullAutomaticFunctionCallingHistory != null) {
342
- automaticFunctionCallingHistory =
343
- fullAutomaticFunctionCallingHistory.slice(index) ?? [];
344
- }
345
- // Note: modelOutput variable no longer used directly since we handle
346
- // responses inline below
347
- // Remove the user content we added and handle AFC history if present
348
- // Only do this if AFC history actually has content
349
- if (automaticFunctionCallingHistory &&
350
- automaticFunctionCallingHistory.length > 0) {
351
- // Pop the user content and replace with AFC history
352
- const allHistory = this.historyService.getAll();
353
- const trimmedHistory = allHistory.slice(0, -1);
354
- this.historyService.clear();
355
- const currentModel = this.config.getModel();
356
- for (const content of trimmedHistory) {
357
- this.historyService.add(content, currentModel);
358
- }
404
+ if (fullAutomaticFunctionCallingHistory &&
405
+ fullAutomaticFunctionCallingHistory.length > 0) {
406
+ // AFC case: Add the AFC history which includes the user input
407
+ const curatedHistory = this.historyService.getCurated();
408
+ const index = ContentConverters.toGeminiContents(curatedHistory).length;
409
+ const automaticFunctionCallingHistory = fullAutomaticFunctionCallingHistory.slice(index) ?? [];
359
410
  for (const content of automaticFunctionCallingHistory) {
360
- this.historyService.add(ContentConverters.toIContent(content), currentModel);
411
+ const idGen = this.historyService.getIdGeneratorCallback();
412
+ const matcher = this.makePositionMatcher();
413
+ this.historyService.add(ContentConverters.toIContent(content, idGen, matcher), currentModel);
361
414
  }
362
415
  }
416
+ else {
417
+ // Regular case: Add user content first
418
+ const idGen = this.historyService.getIdGeneratorCallback();
419
+ const matcher = this.makePositionMatcher();
420
+ this.historyService.add(ContentConverters.toIContent(userContent, idGen, matcher), currentModel);
421
+ }
363
422
  // Add model response if we have one (but filter out pure thinking responses)
364
423
  if (outputContent) {
365
424
  // Check if this is pure thinking content that should be filtered
366
425
  if (!this.isThoughtContent(outputContent)) {
367
426
  // Not pure thinking, add it
368
- this.historyService.add(ContentConverters.toIContent(outputContent), this.config.getModel());
427
+ const idGen = this.historyService.getIdGeneratorCallback();
428
+ this.historyService.add(ContentConverters.toIContent(outputContent, idGen), currentModel);
369
429
  }
370
430
  // If it's pure thinking content, don't add it to history
371
431
  }
372
432
  else if (response.candidates && response.candidates.length > 0) {
373
433
  // We have candidates but no content - add empty model response
374
434
  // This handles the case where the model returns empty content
375
- if (!automaticFunctionCallingHistory ||
376
- automaticFunctionCallingHistory.length === 0) {
435
+ if (!fullAutomaticFunctionCallingHistory ||
436
+ fullAutomaticFunctionCallingHistory.length === 0) {
377
437
  const emptyModelContent = { role: 'model', parts: [] };
378
- this.historyService.add(ContentConverters.toIContent(emptyModelContent), this.config.getModel());
438
+ const idGen = this.historyService.getIdGeneratorCallback();
439
+ this.historyService.add(ContentConverters.toIContent(emptyModelContent, idGen), currentModel);
379
440
  }
380
441
  }
381
442
  // If no candidates at all, don't add anything (error case)
@@ -431,23 +492,62 @@ export class GeminiChat {
431
492
  console.log('DEBUG: GeminiChat.sendMessageStream params.message:', JSON.stringify(params.message, null, 2));
432
493
  }
433
494
  await this.sendPromise;
495
+ // Check compression - first check if already compressing, then check if needed
496
+ if (this.compressionPromise) {
497
+ this.logger.debug('Waiting for ongoing compression to complete');
498
+ await this.compressionPromise;
499
+ }
500
+ else if (this.shouldCompress()) {
501
+ // Only check shouldCompress if not already compressing
502
+ this.logger.debug('Triggering compression before message send in stream');
503
+ this.compressionPromise = this.performCompression(prompt_id);
504
+ await this.compressionPromise;
505
+ this.compressionPromise = null;
506
+ }
507
+ // Check if this is a paired tool call/response array
508
+ let userContent;
509
+ // Quick check for paired tool call/response
510
+ const messageArray = Array.isArray(params.message) ? params.message : null;
511
+ const isPairedToolResponse = messageArray &&
512
+ messageArray.length === 2 &&
513
+ messageArray[0] &&
514
+ typeof messageArray[0] === 'object' &&
515
+ 'functionCall' in messageArray[0] &&
516
+ messageArray[1] &&
517
+ typeof messageArray[1] === 'object' &&
518
+ 'functionResponse' in messageArray[1];
519
+ if (isPairedToolResponse && messageArray) {
520
+ // This is a paired tool call/response from the executor
521
+ // Create separate Content objects with correct roles
522
+ userContent = [
523
+ {
524
+ role: 'model',
525
+ parts: [messageArray[0]],
526
+ },
527
+ {
528
+ role: 'user',
529
+ parts: [messageArray[1]],
530
+ },
531
+ ];
532
+ }
533
+ else {
534
+ userContent = createUserContentWithFunctionResponseFix(params.message);
535
+ }
536
+ // DO NOT add anything to history here - wait until after successful send!
537
+ // Tool responses will be handled in recordHistory after the model responds
434
538
  let streamDoneResolver;
435
539
  const streamDonePromise = new Promise((resolve) => {
436
540
  streamDoneResolver = resolve;
437
541
  });
438
542
  this.sendPromise = streamDonePromise;
439
- const userContent = createUserContentWithFunctionResponseFix(params.message);
440
- // Add user content to history ONCE before any attempts.
441
- this.historyService.add(ContentConverters.toIContent(userContent), this.config.getModel());
442
- // Note: requestContents is no longer needed as adapter gets history from HistoryService
443
- // eslint-disable-next-line @typescript-eslint/no-this-alias
444
- const self = this;
445
- return (async function* () {
543
+ // DO NOT add user content to history yet - wait until successful send
544
+ // This is the send-then-commit pattern to avoid orphaned tool calls
545
+ return (async function* (instance) {
446
546
  try {
447
547
  let lastError = new Error('Request failed after all retries.');
448
548
  for (let attempt = 0; attempt <= INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; attempt++) {
449
549
  try {
450
- const stream = await self.makeApiCallAndProcessStream(params, prompt_id, userContent);
550
+ const stream = await instance.makeApiCallAndProcessStream(params, prompt_id, userContent);
451
551
  for await (const chunk of stream) {
452
552
  yield chunk;
453
553
  }
@@ -469,31 +569,27 @@ export class GeminiChat {
469
569
  }
470
570
  }
471
571
  if (lastError) {
472
- // If the stream fails, remove the user message that was added.
473
- const allHistory = self.historyService.getAll();
474
- const lastIContent = allHistory[allHistory.length - 1];
475
- const userIContent = ContentConverters.toIContent(userContent);
476
- // Check if the last content is the user content we just added
477
- if (lastIContent?.speaker === userIContent.speaker &&
478
- JSON.stringify(lastIContent?.blocks) ===
479
- JSON.stringify(userIContent.blocks)) {
480
- // Remove the last item from history
481
- const trimmedHistory = allHistory.slice(0, -1);
482
- self.historyService.clear();
483
- for (const content of trimmedHistory) {
484
- self.historyService.add(content, self.config.getModel());
485
- }
486
- }
572
+ // With send-then-commit pattern, we don't add to history until success,
573
+ // so there's nothing to remove on failure
487
574
  throw lastError;
488
575
  }
489
576
  }
490
577
  finally {
491
578
  streamDoneResolver();
492
579
  }
493
- })();
580
+ })(this);
494
581
  }
495
- async makeApiCallAndProcessStream(params, prompt_id, userContent) {
496
- const apiCall = () => {
582
+ async makeApiCallAndProcessStream(_params, _prompt_id, userContent) {
583
+ // Get the active provider
584
+ const provider = this.getActiveProvider();
585
+ if (!provider) {
586
+ throw new Error('No active provider configured');
587
+ }
588
+ // Check if provider supports IContent interface
589
+ if (!this.providerSupportsIContent(provider)) {
590
+ throw new Error(`Provider ${provider.name} does not support IContent interface`);
591
+ }
592
+ const apiCall = async () => {
497
593
  const modelToUse = this.config.getModel();
498
594
  const authType = this.config.getContentGeneratorConfig()?.authType;
499
595
  // Prevent Flash model calls immediately after quota error (only for Gemini providers)
@@ -502,14 +598,39 @@ export class GeminiChat {
502
598
  modelToUse === DEFAULT_GEMINI_FLASH_MODEL) {
503
599
  throw new Error('Please submit a new query to continue with the Flash model.');
504
600
  }
505
- // Get curated history for the request
506
- const iContents = this.historyService.getCurated();
507
- const requestContents = ContentConverters.toGeminiContents(iContents);
508
- return this.contentGenerator.generateContentStream({
509
- model: modelToUse,
510
- contents: requestContents,
511
- config: { ...this.generationConfig, ...params.config },
512
- }, prompt_id);
601
+ // Convert user content to IContent first so we can check if it's a tool response
602
+ const idGen = this.historyService.getIdGeneratorCallback();
603
+ const matcher = this.makePositionMatcher();
604
+ let requestContents;
605
+ if (Array.isArray(userContent)) {
606
+ // This is a paired tool call/response - convert each separately
607
+ const userIContents = userContent.map((content) => ContentConverters.toIContent(content, idGen, matcher));
608
+ // Get curated history WITHOUT the new user message (since we haven't added it yet)
609
+ const currentHistory = this.historyService.getCuratedForProvider();
610
+ // Build request with history + new messages (but don't commit to history yet)
611
+ requestContents = [...currentHistory, ...userIContents];
612
+ }
613
+ else {
614
+ const userIContent = ContentConverters.toIContent(userContent, idGen, matcher);
615
+ // Get curated history WITHOUT the new user message (since we haven't added it yet)
616
+ const currentHistory = this.historyService.getCuratedForProvider();
617
+ // Build request with history + new message (but don't commit to history yet)
618
+ requestContents = [...currentHistory, userIContent];
619
+ }
620
+ // DEBUG: Check for malformed entries
621
+ if (process.env.DEBUG) {
622
+ console.log('[DEBUG] geminiChat IContent request (history + new message):', JSON.stringify(requestContents, null, 2));
623
+ }
624
+ // Get tools in the format the provider expects
625
+ const tools = this.generationConfig.tools;
626
+ // Call the provider directly with IContent
627
+ const streamResponse = provider.generateChatCompletion(requestContents, tools);
628
+ // Convert the IContent stream to GenerateContentResponse stream
629
+ return (async function* (instance) {
630
+ for await (const iContent of streamResponse) {
631
+ yield instance.convertIContentToResponse(iContent);
632
+ }
633
+ })(this);
513
634
  };
514
635
  const streamResponse = await retryWithBackoff(apiCall, {
515
636
  shouldRetry: (error) => {
@@ -584,6 +705,188 @@ export class GeminiChat {
584
705
  setTools(tools) {
585
706
  this.generationConfig.tools = tools;
586
707
  }
708
+ /**
709
+ * Check if compression is needed based on token count
710
+ */
711
+ shouldCompress() {
712
+ // Calculate compression threshold only if not cached
713
+ if (this.cachedCompressionThreshold === null) {
714
+ const threshold = this.config.getEphemeralSetting('compression-threshold') ?? COMPRESSION_TOKEN_THRESHOLD;
715
+ const contextLimit = this.config.getEphemeralSetting('context-limit') ?? 60000; // Default context limit
716
+ this.cachedCompressionThreshold = threshold * contextLimit;
717
+ this.logger.debug('Calculated compression threshold:', {
718
+ threshold,
719
+ contextLimit,
720
+ compressionThreshold: this.cachedCompressionThreshold,
721
+ });
722
+ }
723
+ const currentTokens = this.historyService.getTotalTokens();
724
+ const shouldCompress = currentTokens >= this.cachedCompressionThreshold;
725
+ if (shouldCompress) {
726
+ this.logger.debug('Compression needed:', {
727
+ currentTokens,
728
+ threshold: this.cachedCompressionThreshold,
729
+ });
730
+ }
731
+ return shouldCompress;
732
+ }
733
+ /**
734
+ * Perform compression of chat history
735
+ * Made public to allow manual compression triggering
736
+ */
737
+ async performCompression(prompt_id) {
738
+ this.logger.debug('Starting compression');
739
+ // Reset cached threshold after compression in case settings changed
740
+ this.cachedCompressionThreshold = null;
741
+ // Lock history service
742
+ this.historyService.startCompression();
743
+ try {
744
+ // Get compression split
745
+ const { toCompress, toKeep } = this.getCompressionSplit();
746
+ if (toCompress.length === 0) {
747
+ this.logger.debug('Nothing to compress');
748
+ return;
749
+ }
750
+ // Perform direct compression API call
751
+ const summary = await this.directCompressionCall(toCompress, prompt_id);
752
+ // Apply compression atomically
753
+ this.applyCompression(summary, toKeep);
754
+ this.logger.debug('Compression completed successfully');
755
+ }
756
+ catch (error) {
757
+ this.logger.error('Compression failed:', error);
758
+ throw error;
759
+ }
760
+ finally {
761
+ // Always unlock
762
+ this.historyService.endCompression();
763
+ }
764
+ }
765
+ /**
766
+ * Get the split point for compression
767
+ */
768
+ getCompressionSplit() {
769
+ const curated = this.historyService.getCurated();
770
+ // Calculate split point (keep last 30%)
771
+ const preserveThreshold = this.config.getEphemeralSetting('compression-preserve-threshold') ?? COMPRESSION_PRESERVE_THRESHOLD;
772
+ let splitIndex = Math.floor(curated.length * (1 - preserveThreshold));
773
+ // Adjust for tool call boundaries
774
+ splitIndex = this.adjustForToolCallBoundary(curated, splitIndex);
775
+ // Never compress if too few messages
776
+ if (splitIndex < 4) {
777
+ return { toCompress: [], toKeep: curated };
778
+ }
779
+ return {
780
+ toCompress: curated.slice(0, splitIndex),
781
+ toKeep: curated.slice(splitIndex),
782
+ };
783
+ }
784
+ /**
785
+ * Adjust compression boundary to not split tool call/response pairs
786
+ */
787
+ adjustForToolCallBoundary(history, index) {
788
+ // Don't split tool responses from their calls
789
+ while (index < history.length && history[index].speaker === 'tool') {
790
+ index++;
791
+ }
792
+ // Check if previous message has unmatched tool calls
793
+ if (index > 0) {
794
+ const prev = history[index - 1];
795
+ if (prev.speaker === 'ai') {
796
+ const toolCalls = prev.blocks.filter((b) => b.type === 'tool_call');
797
+ if (toolCalls.length > 0) {
798
+ // Check if there are matching tool responses in the kept portion
799
+ const keptHistory = history.slice(index);
800
+ const hasMatchingResponses = toolCalls.every((call) => {
801
+ const toolCall = call;
802
+ return keptHistory.some((msg) => msg.speaker === 'tool' &&
803
+ msg.blocks.some((b) => b.type === 'tool_response' &&
804
+ b.callId === toolCall.id));
805
+ });
806
+ if (!hasMatchingResponses) {
807
+ // Include the AI message with unmatched calls in the compression
808
+ return index - 1;
809
+ }
810
+ }
811
+ }
812
+ }
813
+ return index;
814
+ }
815
+ /**
816
+ * Direct API call for compression, bypassing normal message flow
817
+ */
818
+ async directCompressionCall(historyToCompress, _prompt_id) {
819
+ const provider = this.getActiveProvider();
820
+ if (!provider || !this.providerSupportsIContent(provider)) {
821
+ throw new Error('Provider does not support compression');
822
+ }
823
+ // Build compression request with system prompt and user history
824
+ const compressionRequest = [
825
+ // Add system instruction as the first message
826
+ {
827
+ speaker: 'human',
828
+ blocks: [
829
+ {
830
+ type: 'text',
831
+ text: getCompressionPrompt(),
832
+ },
833
+ ],
834
+ },
835
+ // Add the history to compress
836
+ ...historyToCompress,
837
+ // Add the trigger instruction
838
+ {
839
+ speaker: 'human',
840
+ blocks: [
841
+ {
842
+ type: 'text',
843
+ text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
844
+ },
845
+ ],
846
+ },
847
+ ];
848
+ // Direct provider call without tools for compression
849
+ const stream = provider.generateChatCompletion(compressionRequest, undefined);
850
+ // Collect response
851
+ let summary = '';
852
+ for await (const chunk of stream) {
853
+ if (chunk.blocks) {
854
+ for (const block of chunk.blocks) {
855
+ if (block.type === 'text') {
856
+ summary += block.text;
857
+ }
858
+ }
859
+ }
860
+ }
861
+ return summary;
862
+ }
863
+ /**
864
+ * Apply compression results to history
865
+ */
866
+ applyCompression(summary, toKeep) {
867
+ // Clear and rebuild history atomically
868
+ this.historyService.clear();
869
+ const currentModel = this.config.getModel();
870
+ // Add compressed summary as user message
871
+ this.historyService.add({
872
+ speaker: 'human',
873
+ blocks: [{ type: 'text', text: summary }],
874
+ }, currentModel);
875
+ // Add acknowledgment from AI
876
+ this.historyService.add({
877
+ speaker: 'ai',
878
+ blocks: [
879
+ {
880
+ type: 'text',
881
+ text: 'Got it. Thanks for the additional context!',
882
+ },
883
+ ],
884
+ }, currentModel);
885
+ // Add back the kept messages
886
+ for (const content of toKeep) {
887
+ this.historyService.add(content, currentModel);
888
+ }
889
+ }
587
890
  getFinalUsageMetadata(chunks) {
588
891
  const lastChunkWithMetadata = chunks
589
892
  .slice()
@@ -655,16 +958,20 @@ export class GeminiChat {
655
958
  }
656
959
  }
657
960
  else {
658
- // Guard for streaming calls where the user input might already be in the history.
659
- const allHistory = this.historyService.getAll();
660
- const lastEntry = allHistory[allHistory.length - 1];
661
- const userIContent = ContentConverters.toIContent(userInput);
662
- // Check if user input is already in history
663
- const isAlreadyInHistory = lastEntry &&
664
- lastEntry.speaker === userIContent.speaker &&
665
- JSON.stringify(lastEntry.blocks) ===
666
- JSON.stringify(userIContent.blocks);
667
- if (!isAlreadyInHistory) {
961
+ // Handle both single Content and Content[] (for paired tool call/response)
962
+ const idGen = this.historyService.getIdGeneratorCallback();
963
+ const matcher = this.makePositionMatcher();
964
+ if (Array.isArray(userInput)) {
965
+ // This is a paired tool call/response from the executor
966
+ // Add each part to history
967
+ for (const content of userInput) {
968
+ const userIContent = ContentConverters.toIContent(content, idGen, matcher);
969
+ newHistoryEntries.push(userIContent);
970
+ }
971
+ }
972
+ else {
973
+ // Normal user message
974
+ const userIContent = ContentConverters.toIContent(userInput, idGen, matcher);
668
975
  newHistoryEntries.push(userIContent);
669
976
  }
670
977
  }
@@ -675,6 +982,7 @@ export class GeminiChat {
675
982
  outputContents = nonThoughtModelOutput;
676
983
  }
677
984
  else if (modelOutput.length === 0 &&
985
+ !Array.isArray(userInput) &&
678
986
  !isFunctionResponse(userInput) &&
679
987
  !automaticFunctionCallingHistory) {
680
988
  // Add an empty model response if the model truly returned nothing.
@@ -702,7 +1010,15 @@ export class GeminiChat {
702
1010
  this.historyService.add(entry, currentModel);
703
1011
  }
704
1012
  for (const content of consolidatedOutputContents) {
705
- this.historyService.add(ContentConverters.toIContent(content), currentModel);
1013
+ // Check if this contains tool calls
1014
+ const hasToolCalls = content.parts?.some((part) => part && typeof part === 'object' && 'functionCall' in part);
1015
+ if (!hasToolCalls) {
1016
+ // Only add non-tool-call responses to history immediately
1017
+ // Tool calls will be added when the executor returns with the response
1018
+ this.historyService.add(ContentConverters.toIContent(content), currentModel);
1019
+ }
1020
+ // Tool calls are NOT added here - they'll come back from the executor
1021
+ // along with their responses and be added together
706
1022
  }
707
1023
  }
708
1024
  hasTextContent(content) {
@@ -855,7 +1171,7 @@ export class GeminiChat {
855
1171
  // Check for potentially problematic cyclic tools with cyclic schemas
856
1172
  // and include a recommendation to remove potentially problematic tools.
857
1173
  if (isStructuredError(error) && isSchemaDepthError(error.message)) {
858
- const tools = (await this.config.getToolRegistry()).getAllTools();
1174
+ const tools = this.config.getToolRegistry().getAllTools();
859
1175
  const cyclicSchemaTools = [];
860
1176
  for (const tool of tools) {
861
1177
  if ((tool.schema.parametersJsonSchema &&
@@ -872,6 +1188,189 @@ export class GeminiChat {
872
1188
  }
873
1189
  }
874
1190
  }
1191
+ /**
1192
+ * Convert PartListUnion (user input) to IContent format for provider/history
1193
+ */
1194
+ convertPartListUnionToIContent(input) {
1195
+ const blocks = [];
1196
+ if (typeof input === 'string') {
1197
+ // Simple string input from user
1198
+ return {
1199
+ speaker: 'human',
1200
+ blocks: [{ type: 'text', text: input }],
1201
+ };
1202
+ }
1203
+ // Handle Part or Part[]
1204
+ const parts = Array.isArray(input) ? input : [input];
1205
+ // Check if all parts are function responses (tool responses)
1206
+ const allFunctionResponses = parts.every((part) => part && typeof part === 'object' && 'functionResponse' in part);
1207
+ if (allFunctionResponses) {
1208
+ // Tool responses - speaker is 'tool'
1209
+ for (const part of parts) {
1210
+ if (typeof part === 'object' &&
1211
+ 'functionResponse' in part &&
1212
+ part.functionResponse) {
1213
+ blocks.push({
1214
+ type: 'tool_response',
1215
+ callId: part.functionResponse.id || '',
1216
+ toolName: part.functionResponse.name || '',
1217
+ result: part.functionResponse.response || {},
1218
+ error: undefined,
1219
+ });
1220
+ }
1221
+ }
1222
+ return {
1223
+ speaker: 'tool',
1224
+ blocks,
1225
+ };
1226
+ }
1227
+ // Mixed content or function calls - must be from AI
1228
+ let hasAIContent = false;
1229
+ for (const part of parts) {
1230
+ if (typeof part === 'string') {
1231
+ blocks.push({ type: 'text', text: part });
1232
+ }
1233
+ else if ('text' in part && part.text !== undefined) {
1234
+ blocks.push({ type: 'text', text: part.text });
1235
+ }
1236
+ else if ('functionCall' in part && part.functionCall) {
1237
+ hasAIContent = true; // Function calls only come from AI
1238
+ blocks.push({
1239
+ type: 'tool_call',
1240
+ id: part.functionCall.id || '',
1241
+ name: part.functionCall.name || '',
1242
+ parameters: part.functionCall.args || {},
1243
+ });
1244
+ }
1245
+ else if ('functionResponse' in part && part.functionResponse) {
1246
+ // Single function response in mixed content
1247
+ blocks.push({
1248
+ type: 'tool_response',
1249
+ callId: part.functionResponse.id || '',
1250
+ toolName: part.functionResponse.name || '',
1251
+ result: part.functionResponse.response || {},
1252
+ error: undefined,
1253
+ });
1254
+ }
1255
+ }
1256
+ // If we have function calls, it's AI content; otherwise assume human
1257
+ return {
1258
+ speaker: hasAIContent ? 'ai' : 'human',
1259
+ blocks,
1260
+ };
1261
+ }
1262
+ /**
1263
+ * Convert IContent (from provider) to GenerateContentResponse for SDK compatibility
1264
+ */
1265
+ convertIContentToResponse(input) {
1266
+ // Convert IContent blocks to Gemini Parts
1267
+ const parts = [];
1268
+ for (const block of input.blocks) {
1269
+ switch (block.type) {
1270
+ case 'text':
1271
+ parts.push({ text: block.text });
1272
+ break;
1273
+ case 'tool_call': {
1274
+ const toolCall = block;
1275
+ parts.push({
1276
+ functionCall: {
1277
+ id: toolCall.id,
1278
+ name: toolCall.name,
1279
+ args: toolCall.parameters,
1280
+ },
1281
+ });
1282
+ break;
1283
+ }
1284
+ case 'tool_response': {
1285
+ const toolResponse = block;
1286
+ parts.push({
1287
+ functionResponse: {
1288
+ id: toolResponse.callId,
1289
+ name: toolResponse.toolName,
1290
+ response: toolResponse.result,
1291
+ },
1292
+ });
1293
+ break;
1294
+ }
1295
+ case 'thinking':
1296
+ // Include thinking blocks as thought parts
1297
+ parts.push({
1298
+ thought: true,
1299
+ text: block.thought,
1300
+ });
1301
+ break;
1302
+ default:
1303
+ // Skip unsupported block types
1304
+ break;
1305
+ }
1306
+ }
1307
+ // Build the response structure
1308
+ const response = {
1309
+ candidates: [
1310
+ {
1311
+ content: {
1312
+ role: 'model',
1313
+ parts,
1314
+ },
1315
+ },
1316
+ ],
1317
+ // These are required properties that must be present
1318
+ get text() {
1319
+ return parts.find((p) => 'text' in p)?.text || '';
1320
+ },
1321
+ functionCalls: parts
1322
+ .filter((p) => 'functionCall' in p)
1323
+ .map((p) => p.functionCall),
1324
+ executableCode: undefined,
1325
+ codeExecutionResult: undefined,
1326
+ // data property will be added below
1327
+ };
1328
+ // Add data property that returns self-reference
1329
+ // Make it non-enumerable to avoid circular reference in JSON.stringify
1330
+ Object.defineProperty(response, 'data', {
1331
+ get() {
1332
+ return response;
1333
+ },
1334
+ enumerable: false, // Changed from true to false
1335
+ configurable: true,
1336
+ });
1337
+ // Add usage metadata if present
1338
+ if (input.metadata?.usage) {
1339
+ response.usageMetadata = {
1340
+ promptTokenCount: input.metadata.usage.promptTokens || 0,
1341
+ candidatesTokenCount: input.metadata.usage.completionTokens || 0,
1342
+ totalTokenCount: input.metadata.usage.totalTokens || 0,
1343
+ };
1344
+ }
1345
+ return response;
1346
+ }
1347
+ /**
1348
+ * Get the active provider from the ProviderManager via Config
1349
+ */
1350
+ getActiveProvider() {
1351
+ const providerManager = this.config.getProviderManager();
1352
+ if (!providerManager) {
1353
+ return undefined;
1354
+ }
1355
+ try {
1356
+ return providerManager.getActiveProvider();
1357
+ }
1358
+ catch {
1359
+ // No active provider set
1360
+ return undefined;
1361
+ }
1362
+ }
1363
+ /**
1364
+ * Check if a provider supports the IContent interface
1365
+ */
1366
+ providerSupportsIContent(provider) {
1367
+ if (!provider) {
1368
+ return false;
1369
+ }
1370
+ // Check if the provider has the IContent method
1371
+ return (typeof provider
1372
+ .generateChatCompletion === 'function');
1373
+ }
875
1374
  }
876
1375
  /** Visible for Testing */
877
1376
  export function isSchemaDepthError(errorMessage) {