@vybestack/llxprt-code-core 0.5.0-nightly.251106.c2b44a77 → 0.5.0-nightly.251107.2c6eee18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,6 +44,7 @@ export declare class GeminiClient {
44
44
  private lastComplexitySuggestionTurn?;
45
45
  private toolActivityCount;
46
46
  private toolCallReminderLevel;
47
+ private lastTodoSnapshot?;
47
48
  /**
48
49
  * At any point in this conversation, was compression triggered without
49
50
  * being forced and did it fail?
@@ -77,6 +78,13 @@ export declare class GeminiClient {
77
78
  private isTodoToolCall;
78
79
  private appendTodoSuffixToRequest;
79
80
  private recordModelActivity;
81
+ private readTodoSnapshot;
82
+ private getActiveTodos;
83
+ private areTodoSnapshotsEqual;
84
+ private getTodoReminderForCurrentState;
85
+ private appendSystemReminderToRequest;
86
+ private shouldDeferStreamEvent;
87
+ private isTodoPauseResponse;
80
88
  addHistory(content: Content): Promise<void>;
81
89
  getChat(): GeminiChat;
82
90
  /**
@@ -107,7 +115,7 @@ export declare class GeminiClient {
107
115
  generateDirectMessage(params: SendMessageParameters, promptId: string): Promise<GenerateContentResponse>;
108
116
  startChat(extraHistory?: Content[]): Promise<GeminiChat>;
109
117
  private getIdeContextParts;
110
- sendMessageStream(request: PartListUnion, signal: AbortSignal, prompt_id: string, turns?: number, originalModel?: string): AsyncGenerator<ServerGeminiStreamEvent, Turn>;
118
+ sendMessageStream(initialRequest: PartListUnion, signal: AbortSignal, prompt_id: string, turns?: number): AsyncGenerator<ServerGeminiStreamEvent, Turn>;
111
119
  generateJson(contents: Content[], schema: Record<string, unknown>, abortSignal: AbortSignal, model: string, config?: GenerateContentConfig): Promise<Record<string, unknown>>;
112
120
  generateContent(contents: Content[], generationConfig: GenerateContentConfig, abortSignal: AbortSignal, model: string): Promise<GenerateContentResponse>;
113
121
  generateEmbedding(texts: string[]): Promise<number[][]>;
@@ -25,13 +25,12 @@ import { LoopDetectionService } from '../services/loopDetectionService.js';
25
25
  import { ideContext } from '../ide/ideContext.js';
26
26
  import { ComplexityAnalyzer, } from '../services/complexity-analyzer.js';
27
27
  import { TodoReminderService } from '../services/todo-reminder-service.js';
28
+ import { TodoStore } from '../tools/todo-store.js';
28
29
  import { isFunctionResponse } from '../utils/messageInspectors.js';
29
30
  import { estimateTokens as estimateTextTokens } from '../utils/toolOutputLimiter.js';
30
31
  import { subscribeToAgentRuntimeState } from '../runtime/AgentRuntimeState.js';
31
32
  const COMPLEXITY_ESCALATION_TURN_THRESHOLD = 3;
32
33
  const TODO_PROMPT_SUFFIX = 'Use TODO List to organize this effort.';
33
- const TOOL_BASE_TODO_MESSAGE = 'After this next tool call I need to call todo_write and create a todo list to organize this effort.';
34
- const TOOL_ESCALATED_TODO_MESSAGE = 'I have already made several tool calls without a todo list. Immediately call todo_write after this next tool call to organize the work.';
35
34
  function isThinkingSupported(model) {
36
35
  if (model.startsWith('gemini-2.5'))
37
36
  return true;
@@ -101,6 +100,7 @@ export class GeminiClient {
101
100
  lastComplexitySuggestionTurn;
102
101
  toolActivityCount = 0;
103
102
  toolCallReminderLevel = 'none';
103
+ lastTodoSnapshot;
104
104
  /**
105
105
  * At any point in this conversation, was compression triggered without
106
106
  * being forced and did it fail?
@@ -263,8 +263,7 @@ export class GeminiClient {
263
263
  if (!this.todoToolsAvailable) {
264
264
  return;
265
265
  }
266
- if (event.type !== GeminiEventType.Content &&
267
- event.type !== GeminiEventType.ToolCallRequest) {
266
+ if (event.type !== GeminiEventType.ToolCallResponse) {
268
267
  return;
269
268
  }
270
269
  this.toolActivityCount += 1;
@@ -276,6 +275,87 @@ export class GeminiClient {
276
275
  this.toolCallReminderLevel = 'base';
277
276
  }
278
277
  }
278
+ async readTodoSnapshot() {
279
+ try {
280
+ const sessionId = this.config.getSessionId();
281
+ const store = new TodoStore(sessionId, DEFAULT_AGENT_ID);
282
+ return await store.readTodos();
283
+ }
284
+ catch (_error) {
285
+ return [];
286
+ }
287
+ }
288
+ getActiveTodos(todos) {
289
+ const inProgress = todos.filter((todo) => todo.status === 'in_progress');
290
+ const pending = todos.filter((todo) => todo.status === 'pending');
291
+ return [...inProgress, ...pending];
292
+ }
293
+ areTodoSnapshotsEqual(a, b) {
294
+ if (a.length !== b.length) {
295
+ return false;
296
+ }
297
+ const normalize = (todos) => todos
298
+ .map((todo) => ({
299
+ id: `${todo.id ?? ''}`,
300
+ status: (todo.status ?? 'pending').toLowerCase(),
301
+ content: todo.content ?? '',
302
+ priority: todo.priority ?? 'medium',
303
+ }))
304
+ .sort((left, right) => left.id.localeCompare(right.id));
305
+ const normalizedA = normalize(a);
306
+ const normalizedB = normalize(b);
307
+ return normalizedA.every((todo, index) => JSON.stringify(todo) === JSON.stringify(normalizedB[index]));
308
+ }
309
+ async getTodoReminderForCurrentState(options) {
310
+ const todos = options?.todoSnapshot ?? (await this.readTodoSnapshot());
311
+ const activeTodos = options?.activeTodos ?? this.getActiveTodos(todos);
312
+ let reminder = null;
313
+ if (todos.length === 0) {
314
+ reminder = this.todoReminderService.getCreateListReminder([]);
315
+ }
316
+ else if (activeTodos.length > 0) {
317
+ reminder = options?.escalate
318
+ ? this.todoReminderService.getEscalatedActiveTodoReminder(activeTodos[0])
319
+ : this.todoReminderService.getUpdateActiveTodoReminder(activeTodos[0]);
320
+ }
321
+ return { reminder, todos, activeTodos };
322
+ }
323
+ appendSystemReminderToRequest(request, reminderText) {
324
+ if (Array.isArray(request)) {
325
+ const cloned = [...request];
326
+ const alreadyPresent = cloned.some((part) => typeof part === 'object' &&
327
+ part !== null &&
328
+ 'text' in part &&
329
+ typeof part.text === 'string' &&
330
+ part.text === reminderText);
331
+ if (!alreadyPresent) {
332
+ cloned.push({ text: reminderText });
333
+ }
334
+ return cloned;
335
+ }
336
+ return [{ text: reminderText }];
337
+ }
338
+ shouldDeferStreamEvent(event) {
339
+ return (event.type === GeminiEventType.Content ||
340
+ event.type === GeminiEventType.Finished ||
341
+ event.type === GeminiEventType.Citation);
342
+ }
343
+ isTodoPauseResponse(response) {
344
+ if (!response?.responseParts) {
345
+ return false;
346
+ }
347
+ return response.responseParts.some((part) => {
348
+ if (part &&
349
+ typeof part === 'object' &&
350
+ 'functionResponse' in part &&
351
+ part.functionResponse &&
352
+ typeof part.functionResponse === 'object') {
353
+ const name = part.functionResponse.name;
354
+ return typeof name === 'string' && name.toLowerCase() === 'todo_pause';
355
+ }
356
+ return false;
357
+ });
358
+ }
279
359
  async addHistory(content) {
280
360
  // Ensure chat is initialized before adding history
281
361
  if (!this.hasChatInitialized()) {
@@ -710,21 +790,18 @@ export class GeminiClient {
710
790
  };
711
791
  }
712
792
  }
713
- async *sendMessageStream(request, signal, prompt_id, turns = this.MAX_TURNS, originalModel) {
793
+ async *sendMessageStream(initialRequest, signal, prompt_id, turns = this.MAX_TURNS) {
714
794
  const logger = new DebugLogger('llxprt:client:stream');
715
795
  logger.debug(() => 'DEBUG: GeminiClient.sendMessageStream called');
716
- logger.debug(() => `DEBUG: GeminiClient.sendMessageStream request: ${JSON.stringify(request, null, 2)}`);
717
- logger.debug(() => `DEBUG: GeminiClient.sendMessageStream typeof request: ${typeof request}`);
718
- logger.debug(() => `DEBUG: GeminiClient.sendMessageStream Array.isArray(request): ${Array.isArray(request)}`);
796
+ logger.debug(() => `DEBUG: GeminiClient.sendMessageStream request: ${JSON.stringify(initialRequest, null, 2)}`);
797
+ logger.debug(() => `DEBUG: GeminiClient.sendMessageStream typeof request: ${typeof initialRequest}`);
798
+ logger.debug(() => `DEBUG: GeminiClient.sendMessageStream Array.isArray(request): ${Array.isArray(initialRequest)}`);
719
799
  await this.lazyInitialize();
720
- // Ensure chat is initialized after lazyInitialize
721
800
  if (!this.chat) {
722
- // If we have previous history, restore it when creating the chat
723
801
  if (this._previousHistory && this._previousHistory.length > 0) {
724
802
  this.logger.debug('Restoring previous history during prompt generation', {
725
803
  historyLength: this._previousHistory.length,
726
804
  });
727
- // Extract the conversation history after the initial environment setup
728
805
  const conversationHistory = this._previousHistory.slice(2);
729
806
  this.chat = await this.startChat(conversationHistory);
730
807
  this.logger.debug('Chat started with restored history', {
@@ -750,7 +827,6 @@ export class GeminiClient {
750
827
  const providerName = providerManager?.getActiveProviderName() || 'backend';
751
828
  return new Turn(this.getChat(), prompt_id, DEFAULT_AGENT_ID, providerName);
752
829
  }
753
- // Ensure turns never exceeds MAX_TURNS to prevent infinite loops
754
830
  const boundedTurns = Math.min(turns, this.MAX_TURNS);
755
831
  if (!boundedTurns) {
756
832
  const contentGenConfig = this.config.getContentGeneratorConfig();
@@ -758,19 +834,10 @@ export class GeminiClient {
758
834
  const providerName = providerManager?.getActiveProviderName() || 'backend';
759
835
  return new Turn(this.getChat(), prompt_id, DEFAULT_AGENT_ID, providerName);
760
836
  }
761
- // Track the original model from the first call to detect model switching
762
- // @plan PLAN-20251027-STATELESS5.P10
763
- // @requirement REQ-STAT5-003.1
764
- const initialModel = originalModel || this.runtimeState.model;
765
837
  const compressed = await this.tryCompressChat(prompt_id);
766
838
  if (compressed.compressionStatus === CompressionStatus.COMPRESSED) {
767
839
  yield { type: GeminiEventType.ChatCompressed, value: compressed };
768
840
  }
769
- // Prevent context updates from being sent while a tool call is
770
- // waiting for a response. The Gemini API requires that a functionResponse
771
- // part from the user immediately follows a functionCall part from the model
772
- // in the conversation history . The IDE context is not discarded; it will
773
- // be included in the next regular message sent to the model.
774
841
  const history = await this.getHistory();
775
842
  const lastMessage = history.length > 0 ? history[history.length - 1] : undefined;
776
843
  const hasPendingToolCall = !!lastMessage &&
@@ -787,85 +854,207 @@ export class GeminiClient {
787
854
  this.lastSentIdeContext = newIdeContext;
788
855
  this.forceFullIdeContext = false;
789
856
  }
790
- let shouldAppendTodoSuffix = false;
791
- if (Array.isArray(request) && request.length > 0) {
792
- const userMessage = request
793
- .filter((part) => typeof part === 'object' && 'text' in part)
794
- .map((part) => part.text)
795
- .join(' ')
796
- .trim();
797
- if (userMessage.length > 0) {
798
- const analysis = this.complexityAnalyzer.analyzeComplexity(userMessage);
799
- const complexityReminder = this.processComplexityAnalysis(analysis);
800
- if (complexityReminder) {
801
- shouldAppendTodoSuffix = true;
857
+ let baseRequest = Array.isArray(initialRequest)
858
+ ? [...initialRequest]
859
+ : initialRequest;
860
+ let retryCount = 0;
861
+ const MAX_RETRIES = 2;
862
+ let lastTurn;
863
+ let hadToolCallsThisTurn = false; // Track if model executed tools - preserve across retries
864
+ while (retryCount < MAX_RETRIES) {
865
+ let request = Array.isArray(baseRequest)
866
+ ? [...baseRequest]
867
+ : baseRequest;
868
+ // Complexity analysis only on first iteration
869
+ if (retryCount === 0) {
870
+ let shouldAppendTodoSuffix = false;
871
+ if (Array.isArray(request) && request.length > 0) {
872
+ const userMessage = request
873
+ .filter((part) => typeof part === 'object' && 'text' in part)
874
+ .map((part) => part.text)
875
+ .join(' ')
876
+ .trim();
877
+ if (userMessage.length > 0) {
878
+ const analysis = this.complexityAnalyzer.analyzeComplexity(userMessage);
879
+ const complexityReminder = this.processComplexityAnalysis(analysis);
880
+ if (complexityReminder) {
881
+ shouldAppendTodoSuffix = true;
882
+ }
883
+ }
884
+ else {
885
+ this.consecutiveComplexTurns = 0;
886
+ }
887
+ }
888
+ else {
889
+ this.consecutiveComplexTurns = 0;
802
890
  }
891
+ if (shouldAppendTodoSuffix) {
892
+ request = this.appendTodoSuffixToRequest(request);
893
+ }
894
+ baseRequest = Array.isArray(request)
895
+ ? [...request]
896
+ : request;
803
897
  }
804
898
  else {
805
899
  this.consecutiveComplexTurns = 0;
806
900
  }
807
- }
808
- else {
809
- this.consecutiveComplexTurns = 0;
810
- }
811
- if (shouldAppendTodoSuffix) {
812
- request = this.appendTodoSuffixToRequest(request);
813
- }
814
- // Get provider name for error messages
815
- const contentGenConfig = this.config.getContentGeneratorConfig();
816
- const providerManager = contentGenConfig?.providerManager;
817
- const providerName = providerManager?.getActiveProviderName() || 'backend';
818
- const turn = new Turn(this.getChat(), prompt_id, DEFAULT_AGENT_ID, providerName);
819
- const loopDetected = await this.loopDetector.turnStarted(signal);
820
- if (loopDetected) {
821
- yield { type: GeminiEventType.LoopDetected };
822
- return turn;
823
- }
824
- const resultStream = turn.run(request, signal);
825
- for await (const event of resultStream) {
826
- if (this.loopDetector.addAndCheck(event)) {
901
+ // Apply todo reminder if one is pending from previous iteration
902
+ if (this.todoToolsAvailable && this.toolCallReminderLevel !== 'none') {
903
+ const reminderResult = await this.getTodoReminderForCurrentState({
904
+ todoSnapshot: this.lastTodoSnapshot,
905
+ escalate: this.toolCallReminderLevel === 'escalated',
906
+ });
907
+ if (reminderResult.reminder) {
908
+ request = this.appendSystemReminderToRequest(request, reminderResult.reminder);
909
+ this.lastTodoSnapshot = reminderResult.todos;
910
+ }
911
+ this.toolCallReminderLevel = 'none';
912
+ this.toolActivityCount = 0;
913
+ }
914
+ const contentGenConfig = this.config.getContentGeneratorConfig();
915
+ const providerManager = contentGenConfig?.providerManager;
916
+ const providerName = providerManager?.getActiveProviderName() || 'backend';
917
+ const turn = new Turn(this.getChat(), prompt_id, DEFAULT_AGENT_ID, providerName);
918
+ lastTurn = turn;
919
+ const loopDetected = await this.loopDetector.turnStarted(signal);
920
+ if (loopDetected) {
827
921
  yield { type: GeminiEventType.LoopDetected };
828
922
  return turn;
829
923
  }
830
- this.recordModelActivity(event);
831
- yield event;
832
- if (event.type === GeminiEventType.ToolCallRequest &&
833
- this.isTodoToolCall(event.value?.name)) {
834
- this.lastTodoToolTurn = this.sessionTurnCount;
835
- this.consecutiveComplexTurns = 0;
924
+ // Reset flags for this iteration (hadToolCallsThisTurn persists across duplicate todo retries)
925
+ let todoPauseSeen = false;
926
+ const deferredEvents = [];
927
+ const resultStream = turn.run(request, signal);
928
+ // Stream events, deferring Content/Finished/Citation until we decide on a retry
929
+ for await (const event of resultStream) {
930
+ if (this.loopDetector.addAndCheck(event)) {
931
+ yield { type: GeminiEventType.LoopDetected };
932
+ return turn;
933
+ }
934
+ this.recordModelActivity(event);
935
+ // Track tool execution during this turn
936
+ if (event.type === GeminiEventType.ToolCallRequest) {
937
+ hadToolCallsThisTurn = true;
938
+ }
939
+ if (event.type === GeminiEventType.ToolCallResponse) {
940
+ if (this.isTodoPauseResponse(event.value)) {
941
+ todoPauseSeen = true;
942
+ }
943
+ }
944
+ // Handle duplicate todo writes
945
+ if (event.type === GeminiEventType.ToolCallRequest &&
946
+ this.isTodoToolCall(event.value?.name)) {
947
+ this.lastTodoToolTurn = this.sessionTurnCount;
948
+ this.consecutiveComplexTurns = 0;
949
+ const requestedTodos = Array.isArray(event.value?.args?.todos)
950
+ ? event.value.args.todos
951
+ : [];
952
+ if (requestedTodos.length > 0) {
953
+ this.lastTodoSnapshot = requestedTodos.map((todo) => ({
954
+ id: `${todo.id ?? ''}`,
955
+ content: todo.content ?? '',
956
+ status: todo.status ?? 'pending',
957
+ priority: todo.priority ?? 'medium',
958
+ }));
959
+ }
960
+ }
961
+ if (this.shouldDeferStreamEvent(event)) {
962
+ deferredEvents.push(event);
963
+ }
964
+ else {
965
+ yield event;
966
+ }
967
+ if (event.type === GeminiEventType.Error) {
968
+ for (const deferred of deferredEvents) {
969
+ yield deferred;
970
+ }
971
+ return turn;
972
+ }
836
973
  }
837
- if (event.type === GeminiEventType.Error) {
974
+ // Turn stream is now complete. Decide if we should retry.
975
+ // Check if model made progress by executing tools FIRST
976
+ if (hadToolCallsThisTurn) {
977
+ // Model executed tools - that's progress, flush deferred events and exit
978
+ const reminderState = await this.getTodoReminderForCurrentState();
979
+ for (const deferred of deferredEvents) {
980
+ yield deferred;
981
+ }
982
+ this.lastTodoSnapshot = reminderState.todos;
983
+ this.toolCallReminderLevel = 'none';
984
+ this.toolActivityCount = 0;
838
985
  return turn;
839
986
  }
840
- }
841
- if (this.todoToolsAvailable && this.toolCallReminderLevel !== 'none') {
842
- const reminderText = this.toolCallReminderLevel === 'escalated'
843
- ? TOOL_ESCALATED_TODO_MESSAGE
844
- : TOOL_BASE_TODO_MESSAGE;
845
- this.getChat().addHistory({
846
- role: 'model',
847
- parts: [{ text: reminderText }],
848
- });
849
- const currentTime = Date.now();
850
- this.lastComplexitySuggestionTime = currentTime;
851
- this.lastComplexitySuggestionTurn = this.sessionTurnCount;
852
- this.consecutiveComplexTurns = 0;
853
- this.toolCallReminderLevel = 'none';
854
- this.toolActivityCount = 0;
855
- }
856
- if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
857
- // Check if model was switched during the call (likely due to quota error)
858
- // @plan PLAN-20251027-STATELESS5.P10
859
- // @requirement REQ-STAT5-003.1
860
- const currentModel = this.runtimeState.model;
861
- if (currentModel !== initialModel) {
862
- // Model was switched (likely due to quota error fallback)
863
- // Don't continue with recursive call to prevent unwanted Flash execution
987
+ // No tool work detected - check todo/pause state
988
+ const reminderState = await this.getTodoReminderForCurrentState();
989
+ const latestSnapshot = reminderState.todos;
990
+ const activeTodos = reminderState.activeTodos;
991
+ if (todoPauseSeen) {
992
+ // Model explicitly paused - respect that
993
+ for (const deferred of deferredEvents) {
994
+ yield deferred;
995
+ }
996
+ this.lastTodoSnapshot = latestSnapshot;
997
+ this.toolCallReminderLevel = 'none';
998
+ this.toolActivityCount = 0;
999
+ return turn;
1000
+ }
1001
+ // Check if todos are still pending
1002
+ const todosStillPending = activeTodos.length > 0;
1003
+ const hasPendingReminder = this.todoToolsAvailable && this.toolCallReminderLevel !== 'none';
1004
+ if (!todosStillPending && !hasPendingReminder) {
1005
+ // All todos complete or list is empty, and no reminder pending - return normally
1006
+ for (const deferred of deferredEvents) {
1007
+ yield deferred;
1008
+ }
1009
+ this.lastTodoSnapshot = latestSnapshot;
1010
+ this.toolCallReminderLevel = 'none';
1011
+ this.toolActivityCount = 0;
864
1012
  return turn;
865
1013
  }
866
- // nextSpeakerChecker disabled
1014
+ // Model tried to return with incomplete todos or has pending reminder - check if we should retry
1015
+ retryCount++;
1016
+ if (retryCount >= MAX_RETRIES) {
1017
+ // Hit retry limit - return anyway, let continuation service handle it
1018
+ for (const deferred of deferredEvents) {
1019
+ yield deferred;
1020
+ }
1021
+ this.lastTodoSnapshot = latestSnapshot;
1022
+ this.toolCallReminderLevel = 'none';
1023
+ this.toolActivityCount = 0;
1024
+ return turn;
1025
+ }
1026
+ // If we have a pending reminder (from toolActivityCount), it will be injected
1027
+ // at the start of the next iteration. Otherwise, prepare a followUp reminder.
1028
+ if (!hasPendingReminder) {
1029
+ // Prepare retry with escalated reminder
1030
+ const previousSnapshot = this.lastTodoSnapshot ?? [];
1031
+ const snapshotUnchanged = this.areTodoSnapshotsEqual(previousSnapshot, latestSnapshot);
1032
+ const followUpReminder = (await this.getTodoReminderForCurrentState({
1033
+ todoSnapshot: latestSnapshot,
1034
+ activeTodos,
1035
+ escalate: snapshotUnchanged,
1036
+ })).reminder;
1037
+ this.lastTodoSnapshot = latestSnapshot;
1038
+ if (!followUpReminder) {
1039
+ // No reminder to add - flush and return
1040
+ for (const deferred of deferredEvents) {
1041
+ yield deferred;
1042
+ }
1043
+ this.toolCallReminderLevel = 'none';
1044
+ this.toolActivityCount = 0;
1045
+ return turn;
1046
+ }
1047
+ // Set up retry request with reminder
1048
+ baseRequest = this.appendSystemReminderToRequest(baseRequest, followUpReminder);
1049
+ }
1050
+ else {
1051
+ // hasPendingReminder is true - reminder will be injected at loop start
1052
+ this.lastTodoSnapshot = latestSnapshot;
1053
+ }
1054
+ // Loop back for one more try
867
1055
  }
868
- return turn;
1056
+ // Shouldn't reach here, but return last turn if we do
1057
+ return lastTurn;
869
1058
  }
870
1059
  async generateJson(contents, schema, abortSignal, model, config = {}) {
871
1060
  await this.lazyInitialize();