orquesta-cli 0.2.70 → 0.2.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,4 @@
1
1
  import { Message, LLMRequestOptions } from '../../types/index.js';
2
- export declare function fetchWithRetry(url: string, options: RequestInit, maxRetries?: number): Promise<Response>;
3
2
  export interface LLMResponse {
4
3
  id: string;
5
4
  object: string;
@@ -100,20 +100,6 @@ function captureBatutaHeaders(headers) {
100
100
  setLastBatutaRoute({ tier, routedTo, routedFrom });
101
101
  }
102
102
  }
103
- export async function fetchWithRetry(url, options, maxRetries = 3) {
104
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
105
- const res = await fetch(url, options);
106
- if (res.ok || attempt === maxRetries)
107
- return res;
108
- if (res.status === 429 || res.status >= 500) {
109
- const delay = Math.min(1000 * 2 ** attempt, 30000);
110
- await new Promise(r => setTimeout(r, delay));
111
- continue;
112
- }
113
- return res;
114
- }
115
- throw new Error('Unreachable');
116
- }
117
103
  export class LLMClient {
118
104
  axiosInstance;
119
105
  baseUrl;
@@ -152,33 +138,29 @@ export class LLMClient {
152
138
  });
153
139
  }
154
140
  preprocessMessages(messages, modelId) {
155
- const isGptOss = /^gpt-oss-(120b|20b)$/i.test(modelId);
156
141
  return messages.map((msg) => {
157
- const multimodal = msg.multimodal;
142
+ let processedMsg = { ...msg };
143
+ const multimodal = processedMsg.multimodal;
158
144
  if (multimodal && Array.isArray(multimodal)) {
159
- const processedMsg = { ...msg };
160
145
  processedMsg.content = multimodal;
161
146
  delete processedMsg.multimodal;
162
147
  return processedMsg;
163
148
  }
164
149
  if (msg.role !== 'assistant') {
165
- return msg;
166
- }
167
- const msgAny = msg;
168
- const needsReasoningFix = msgAny.reasoning_content && (!msg.content || msg.content.trim() === '');
169
- const needsHarmonyFix = isGptOss && msg.tool_calls && msg.tool_calls.length > 0 && (!msg.content || msg.content.trim() === '');
170
- const needsNullFix = msg.content === undefined || msg.content === null;
171
- if (!needsReasoningFix && !needsHarmonyFix && !needsNullFix) {
172
- return msg;
150
+ return processedMsg;
173
151
  }
174
- const processedMsg = { ...msg };
175
- if (needsReasoningFix) {
152
+ const msgAny = processedMsg;
153
+ if (msgAny.reasoning_content && (!msg.content || msg.content.trim() === '')) {
176
154
  processedMsg.content = msgAny.reasoning_content;
177
155
  delete processedMsg.reasoning_content;
178
156
  }
179
- if (needsHarmonyFix) {
180
- const toolNames = msg.tool_calls.map(tc => tc.function.name).join(', ');
181
- processedMsg.content = msgAny.reasoning || `Calling tools: ${toolNames}`;
157
+ if (/^gpt-oss-(120b|20b)$/i.test(modelId)) {
158
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
159
+ if (!processedMsg.content || processedMsg.content.trim() === '') {
160
+ const toolNames = msg.tool_calls.map(tc => tc.function.name).join(', ');
161
+ processedMsg.content = msgAny.reasoning || `Calling tools: ${toolNames}`;
162
+ }
163
+ }
182
164
  }
183
165
  if (processedMsg.content === undefined || processedMsg.content === null) {
184
166
  processedMsg.content = '';
@@ -202,26 +184,13 @@ export class LLMClient {
202
184
  const modelId = options.model || this.model;
203
185
  const processedMessages = options.messages ?
204
186
  this.preprocessMessages(options.messages, modelId) : [];
205
- logger.vars({ name: 'modelId', value: modelId }, { name: 'originalMessages', value: options.messages?.length || 0 }, { name: 'processedMessages', value: processedMessages.length }, { name: 'temperature', value: options.temperature ?? 0 });
206
- let systemCached = false;
207
- for (const msg of processedMessages) {
208
- if (!systemCached && msg.role === 'system') {
209
- msg.cache_control = { type: 'ephemeral' };
210
- systemCached = true;
211
- }
212
- else if (msg.role === 'user' && typeof msg.content === 'string' && msg.content.length > 2000) {
213
- msg.cache_control = { type: 'ephemeral' };
214
- }
215
- }
216
- const isClaudeModel = /claude|sonnet|opus|haiku/i.test(modelId);
217
- const supportsThinking = isClaudeModel && /anthropic|getorquesta|batuta/i.test(this.baseUrl);
187
+ logger.vars({ name: 'modelId', value: modelId }, { name: 'originalMessages', value: options.messages?.length || 0 }, { name: 'processedMessages', value: processedMessages.length }, { name: 'temperature', value: options.temperature ?? 0.7 });
218
188
  const requestBody = {
219
189
  model: modelId,
220
190
  messages: processedMessages,
221
191
  temperature: options.temperature ?? 0,
222
192
  max_tokens: options.max_tokens,
223
193
  stream: false,
224
- ...(supportsThinking && { thinking: { type: 'enabled', budget_tokens: 10000 } }),
225
194
  ...(options.tools && {
226
195
  tools: options.tools,
227
196
  parallel_tool_calls: false,
@@ -482,15 +451,12 @@ export class LLMClient {
482
451
  const modelId = options.model || this.model;
483
452
  const processedMessages = options.messages ?
484
453
  this.preprocessMessages(options.messages, modelId) : [];
485
- const isClaudeModel = /claude|sonnet|opus|haiku/i.test(modelId);
486
- const supportsThinking = isClaudeModel && /anthropic|getorquesta|batuta/i.test(this.baseUrl);
487
454
  const requestBody = {
488
455
  model: modelId,
489
456
  messages: processedMessages,
490
457
  temperature: options.temperature ?? 0,
491
458
  max_tokens: options.max_tokens,
492
459
  stream: true,
493
- ...(supportsThinking && { thinking: { type: 'enabled', budget_tokens: 10000 } }),
494
460
  ...(options.tools && {
495
461
  tools: options.tools,
496
462
  ...(options.tool_choice && { tool_choice: options.tool_choice }),
@@ -629,9 +595,10 @@ export class LLMClient {
629
595
  const toolCallHistory = [];
630
596
  let iterations = 0;
631
597
  let contextLengthRecoveryAttempted = false;
598
+ let noToolCallRetries = 0;
632
599
  let finalResponseFailures = 0;
600
+ const MAX_NO_TOOL_CALL_RETRIES = 3;
633
601
  const MAX_FINAL_RESPONSE_FAILURES = 3;
634
- const { executeFileTool, requestToolApproval, emitAssistantResponse } = await import('../../tools/llm/simple/file-tools.js');
635
602
  const recentToolSignatures = [];
636
603
  const recentNormalizedSignatures = [];
637
604
  const LOOP_WINDOW = 5;
@@ -657,7 +624,7 @@ export class LLMClient {
657
624
  response = await this.chatCompletion({
658
625
  messages: workingMessages,
659
626
  tools,
660
- tool_choice: 'auto',
627
+ tool_choice: 'required',
661
628
  ...(roleModel ? { model: roleModel } : {}),
662
629
  });
663
630
  }
@@ -708,11 +675,6 @@ export class LLMClient {
708
675
  throw new Error('Cannot find choice in response.');
709
676
  }
710
677
  const assistantMessage = choice.message;
711
- const reasoning = assistantMessage.reasoning_content;
712
- if (reasoning && typeof reasoning === 'string' && reasoning.trim()) {
713
- const { emitReasoning } = await import('../../tools/llm/simple/file-tools.js');
714
- emitReasoning(reasoning.trim());
715
- }
716
678
  if ((!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) &&
717
679
  typeof assistantMessage.content === 'string') {
718
680
  const coerced = coerceSyntheticToolCalls(assistantMessage.content);
@@ -776,6 +738,7 @@ export class LLMClient {
776
738
  });
777
739
  continue;
778
740
  }
741
+ const { executeFileTool, requestToolApproval } = await import('../../tools/llm/simple/file-tools.js');
779
742
  const approvalResult = await requestToolApproval(toolName, toolArgs);
780
743
  if (approvalResult && typeof approvalResult === 'object' && approvalResult.reject) {
781
744
  logger.flow(`Tool rejected by user: ${toolName}`);
@@ -830,6 +793,7 @@ export class LLMClient {
830
793
  if (finalResponseFailures >= MAX_FINAL_RESPONSE_FAILURES) {
831
794
  logger.warn('Max final_response failures exceeded - forcing completion');
832
795
  const fallbackMessage = toolArgs['message'] || 'Task completed with incomplete TODOs.';
796
+ const { emitAssistantResponse } = await import('../../tools/llm/simple/file-tools.js');
833
797
  emitAssistantResponse(fallbackMessage);
834
798
  return {
835
799
  message: { role: 'assistant', content: fallbackMessage },
@@ -869,14 +833,34 @@ export class LLMClient {
869
833
  continue;
870
834
  }
871
835
  else {
872
- const finalContent = assistantMessage.content || 'Task completed.';
873
- const { emitAssistantResponse } = await import('../../tools/llm/simple/file-tools.js');
874
- emitAssistantResponse(finalContent);
875
- return {
876
- message: { role: 'assistant', content: finalContent },
877
- toolCalls: toolCallHistory,
878
- allMessages: workingMessages,
879
- };
836
+ noToolCallRetries++;
837
+ logger.flow(`No tool call - enforcing tool usage (attempt ${noToolCallRetries}/${MAX_NO_TOOL_CALL_RETRIES})`);
838
+ if (noToolCallRetries > MAX_NO_TOOL_CALL_RETRIES) {
839
+ logger.warn('Max no-tool-call retries exceeded - returning content as final response');
840
+ const fallbackContent = assistantMessage.content || 'Task completed.';
841
+ const { emitAssistantResponse } = await import('../../tools/llm/simple/file-tools.js');
842
+ emitAssistantResponse(fallbackContent);
843
+ return {
844
+ message: { role: 'assistant', content: fallbackContent },
845
+ toolCalls: toolCallHistory,
846
+ allMessages: workingMessages,
847
+ };
848
+ }
849
+ const hasMalformedToolCall = assistantMessage.content &&
850
+ (/<tool_call>/i.test(assistantMessage.content) ||
851
+ /<arg_key>/i.test(assistantMessage.content) ||
852
+ /<arg_value>/i.test(assistantMessage.content) ||
853
+ /<\/tool_call>/i.test(assistantMessage.content) ||
854
+ /bash<arg_key>/i.test(assistantMessage.content));
855
+ const retryMessage = hasMalformedToolCall
856
+ ? 'Your previous response contained a malformed tool call (XML tags in content). You MUST use the proper tool_calls API format. Use final_response tool to deliver your message to the user.'
857
+ : 'You must use tools for all actions. Use final_response tool to deliver your final message to the user after completing all tasks.';
858
+ workingMessages.push({
859
+ role: 'user',
860
+ content: retryMessage,
861
+ });
862
+ logger.debug('Enforcing tool call - added retry message');
863
+ continue;
880
864
  }
881
865
  }
882
866
  }
@@ -931,13 +915,13 @@ export class LLMClient {
931
915
  errorMessage,
932
916
  errorType,
933
917
  errorCode,
934
- responseBody: typeof data === 'string' ? data.slice(0, 2000) : JSON.stringify(data, null, 2)?.slice(0, 2000),
918
+ responseBody: JSON.stringify(data, null, 2),
935
919
  requestMethod: requestContext?.method,
936
920
  requestUrl: requestContext?.url,
937
921
  requestBody: requestContext?.body
938
922
  ? JSON.stringify(requestContext.body, null, 2).substring(0, 5000)
939
923
  : undefined,
940
- responseHeaders: Object.fromEntries(Object.entries(axiosError.response.headers || {}).filter(([, v]) => typeof v === 'string' || typeof v === 'number')),
924
+ responseHeaders: axiosError.response.headers,
941
925
  });
942
926
  logger.httpResponse(status, axiosError.response.statusText, data);
943
927
  if (errorType === 'invalid_request_error' &&
@@ -5,9 +5,7 @@ import type { StateCallbacks } from './types.js';
5
5
  export declare function setAppendedSystemPrompt(text: string): void;
6
6
  export declare class PlanExecutor {
7
7
  private currentLLMClient;
8
- private cachedSystemPrompt;
9
8
  constructor();
10
- private getSystemPrompt;
11
9
  executePlanMode(userMessage: string, llmClient: LLMClient, messages: Message[], isInterruptedRef: {
12
10
  current: boolean;
13
11
  }, callbacks: StateCallbacks): Promise<void>;
@@ -16,7 +16,6 @@ import { GIT_COMMIT_RULES } from '../prompts/shared/git-rules.js';
16
16
  import { logger } from '../utils/logger.js';
17
17
  import { getStreamLogger } from '../utils/json-stream-logger.js';
18
18
  import { detectGitRepo } from '../utils/git-utils.js';
19
- import { getRelevantContext } from '../core/embeddings-context.js';
20
19
  import { formatErrorMessage, buildTodoContext, findActiveTodo, getTodoStats } from './utils.js';
21
20
  import { BaseError } from '../errors/base.js';
22
21
  import { runParallelGraph, shouldUseParallelOrchestrator } from './parallel-orchestrator.js';
@@ -39,20 +38,10 @@ function buildSystemPrompt() {
39
38
  const appended = appendedSystemPrompt ? `\n\n${appendedSystemPrompt}` : '';
40
39
  return base + buildEnvironmentContext() + projectContext + getMemoryPrompt() + getGitContextPrompt() + appended;
41
40
  }
42
- function buildLightSystemPrompt() {
43
- return `You are Orquesta, an AI coding assistant. Respond concisely and naturally. Match the user's language.` + getMemoryPrompt();
44
- }
45
41
  export class PlanExecutor {
46
42
  currentLLMClient = null;
47
- cachedSystemPrompt = null;
48
43
  constructor() {
49
44
  }
50
- getSystemPrompt() {
51
- if (!this.cachedSystemPrompt) {
52
- this.cachedSystemPrompt = buildSystemPrompt();
53
- }
54
- return this.cachedSystemPrompt;
55
- }
56
45
  async executePlanMode(userMessage, llmClient, messages, isInterruptedRef, callbacks) {
57
46
  const planningStartTime = Date.now();
58
47
  const streamLogger = getStreamLogger();
@@ -93,112 +82,80 @@ export class PlanExecutor {
93
82
  throw new Error('INTERRUPTED');
94
83
  }
95
84
  let currentMessages = messages;
96
- const isSimpleTask = userMessage.length < 80 &&
97
- /^(ping|pong|hi|hello|hola|hey|thanks|gracias|ok|bye|adios|test)\s*[.!?]?$/i.test(userMessage.trim());
98
- const isConversational = userMessage.length < 100 &&
99
- /^(ping|hi|hello|hola|hey|thanks|ok|si|yes|no|que|how|what|why|when|who)\b/i.test(userMessage);
100
- if (isSimpleTask) {
101
- logger.flow('Simple task detected — skipping planner, executor will handle directly');
102
- streamLogger?.logPlanningEnd(0, [], false, 0);
103
- const lastMsg = currentMessages[currentMessages.length - 1];
104
- if (!(lastMsg?.role === 'user' && lastMsg?.content === userMessage)) {
105
- currentMessages = [...currentMessages, { role: 'user', content: userMessage }];
106
- }
85
+ callbacks.setCurrentActivity('Thinking');
86
+ const plannerModel = configManager.getRoleModel('planner');
87
+ const planningLLM = new PlanningLLM(llmClient, plannerModel ?? undefined);
88
+ const plannerStartedAt = Date.now();
89
+ if (callbacks.askUser) {
90
+ planningLLM.setAskUserCallback(callbacks.askUser);
107
91
  }
108
- else {
109
- callbacks.setCurrentActivity('Thinking');
110
- const plannerModel = configManager.getRoleModel('planner');
111
- const planningLLM = new PlanningLLM(llmClient, plannerModel ?? undefined);
112
- const plannerStartedAt = Date.now();
113
- if (callbacks.askUser) {
114
- planningLLM.setAskUserCallback(callbacks.askUser);
115
- }
116
- const planResult = await planningLLM.generateTODOListWithDocsDecision(userMessage, currentMessages);
117
- auditLog.emit(auditSid, 'planner.complete', {
118
- runId,
119
- model: plannerModel,
120
- durationMs: Date.now() - plannerStartedAt,
121
- todoCount: planResult.todos.length,
122
- directResponse: !!planResult.directResponse,
92
+ const planResult = await planningLLM.generateTODOListWithDocsDecision(userMessage, currentMessages);
93
+ auditLog.emit(auditSid, 'planner.complete', {
94
+ runId,
95
+ model: plannerModel,
96
+ durationMs: Date.now() - plannerStartedAt,
97
+ todoCount: planResult.todos.length,
98
+ directResponse: !!planResult.directResponse,
99
+ });
100
+ if (planResult.clarificationMessages?.length) {
101
+ currentMessages = [...currentMessages, ...planResult.clarificationMessages];
102
+ callbacks.setMessages([...currentMessages]);
103
+ logger.flow('Added planning clarification messages to history', {
104
+ count: planResult.clarificationMessages.length,
123
105
  });
124
- if (planResult.clarificationMessages?.length) {
125
- currentMessages = [...currentMessages, ...planResult.clarificationMessages];
126
- callbacks.setMessages([...currentMessages]);
127
- logger.flow('Added planning clarification messages to history', {
128
- count: planResult.clarificationMessages.length,
129
- });
130
- }
131
- if (planResult.directResponse) {
132
- logger.flow('Direct response - no execution needed');
133
- streamLogger?.logPlanningEnd(0, [], true, Date.now() - planningStartTime);
134
- const lastMsg = currentMessages[currentMessages.length - 1];
135
- const needsUserMessage = !(lastMsg?.role === 'user' && lastMsg?.content === userMessage);
136
- const updatedMessages = needsUserMessage
137
- ? [
138
- ...currentMessages,
139
- { role: 'user', content: userMessage },
140
- { role: 'assistant', content: planResult.directResponse }
141
- ]
142
- : [
143
- ...currentMessages,
144
- { role: 'assistant', content: planResult.directResponse }
145
- ];
146
- emitAssistantResponse(planResult.directResponse);
147
- callbacks.setMessages([...updatedMessages]);
148
- sessionManager.autoSaveCurrentSession(updatedMessages);
149
- callbacks.setExecutionPhase('idle');
150
- logger.exit('PlanExecutor.executePlanMode', { directResponse: true });
151
- return;
152
- }
153
- currentTodos = planResult.todos;
154
- streamLogger?.logPlanningEnd(currentTodos.length, currentTodos.map(t => ({ id: t.id, title: t.title, status: t.status })), false, Date.now() - planningStartTime);
155
- logger.vars({ name: 'todoCount', value: currentTodos.length }, { name: 'docsSearchNeeded', value: planResult.docsSearchNeeded });
156
- callbacks.setTodos(currentTodos);
157
- emitPlanCreated(currentTodos.map(t => t.title));
158
- const planMessage = planResult.docsSearchNeeded
159
- ? `🎼 Created ${currentTodos.length} tasks (including docs search). Starting execution...`
160
- : `🎼 Created ${currentTodos.length} tasks. Starting execution...`;
161
- const lastMsgForPlan = currentMessages[currentMessages.length - 1];
162
- const needsUserMessageForPlan = !(lastMsgForPlan?.role === 'user' && lastMsgForPlan?.content === userMessage);
163
- currentMessages = needsUserMessageForPlan
106
+ }
107
+ if (planResult.directResponse) {
108
+ logger.flow('Direct response - no execution needed');
109
+ streamLogger?.logPlanningEnd(0, [], true, Date.now() - planningStartTime);
110
+ const lastMsg = currentMessages[currentMessages.length - 1];
111
+ const needsUserMessage = !(lastMsg?.role === 'user' && lastMsg?.content === userMessage);
112
+ const updatedMessages = needsUserMessage
164
113
  ? [
165
114
  ...currentMessages,
166
115
  { role: 'user', content: userMessage },
167
- { role: 'assistant', content: planMessage }
116
+ { role: 'assistant', content: planResult.directResponse }
168
117
  ]
169
118
  : [
170
119
  ...currentMessages,
171
- { role: 'assistant', content: planMessage }
120
+ { role: 'assistant', content: planResult.directResponse }
172
121
  ];
173
- callbacks.setMessages(currentMessages);
122
+ emitAssistantResponse(planResult.directResponse);
123
+ callbacks.setMessages([...updatedMessages]);
124
+ sessionManager.autoSaveCurrentSession(updatedMessages);
125
+ callbacks.setExecutionPhase('idle');
126
+ logger.exit('PlanExecutor.executePlanMode', { directResponse: true });
127
+ return;
174
128
  }
129
+ currentTodos = planResult.todos;
130
+ streamLogger?.logPlanningEnd(currentTodos.length, currentTodos.map(t => ({ id: t.id, title: t.title, status: t.status })), false, Date.now() - planningStartTime);
131
+ logger.vars({ name: 'todoCount', value: currentTodos.length }, { name: 'docsSearchNeeded', value: planResult.docsSearchNeeded });
132
+ callbacks.setTodos(currentTodos);
133
+ emitPlanCreated(currentTodos.map(t => t.title));
134
+ const planMessage = planResult.docsSearchNeeded
135
+ ? `📋 Created ${currentTodos.length} tasks (including docs search). Starting execution...`
136
+ : `📋 Created ${currentTodos.length} tasks. Starting execution...`;
137
+ const lastMsgForPlan = currentMessages[currentMessages.length - 1];
138
+ const needsUserMessageForPlan = !(lastMsgForPlan?.role === 'user' && lastMsgForPlan?.content === userMessage);
139
+ currentMessages = needsUserMessageForPlan
140
+ ? [
141
+ ...currentMessages,
142
+ { role: 'user', content: userMessage },
143
+ { role: 'assistant', content: planMessage }
144
+ ]
145
+ : [
146
+ ...currentMessages,
147
+ { role: 'assistant', content: planMessage }
148
+ ];
149
+ callbacks.setMessages(currentMessages);
175
150
  this.setupTodoCallbacks(currentTodos, callbacks, (updated) => {
176
151
  currentTodos = updated;
177
152
  });
178
153
  callbacks.setExecutionPhase('executing');
179
- const allTools = toolRegistry.getLLMToolDefinitions();
180
- let tools;
181
- if (isConversational) {
182
- tools = [];
183
- }
184
- else if (currentTodos.length === 0) {
185
- const coreTools = new Set(['read_file', 'create_file', 'edit_file', 'list_files', 'find_files', 'search_content', 'bash', 'tell_to_user']);
186
- tools = allTools.filter((t) => coreTools.has(t.function?.name));
187
- }
188
- else {
189
- tools = allTools;
190
- }
191
- const systemContent = isConversational
192
- ? buildLightSystemPrompt()
193
- : this.getSystemPrompt() + getRelevantContext(userMessage);
194
- const sysIdx = currentMessages.findIndex(m => m.role === 'system');
195
- if (sysIdx >= 0) {
196
- currentMessages = [...currentMessages];
197
- currentMessages[sysIdx] = { role: 'system', content: systemContent };
198
- }
199
- else {
154
+ const tools = toolRegistry.getLLMToolDefinitions();
155
+ const hasSystemMessage = currentMessages.some(m => m.role === 'system');
156
+ if (!hasSystemMessage) {
200
157
  currentMessages = [
201
- { role: 'system', content: systemContent },
158
+ { role: 'system', content: buildSystemPrompt() },
202
159
  ...currentMessages
203
160
  ];
204
161
  }
@@ -226,7 +183,7 @@ export class PlanExecutor {
226
183
  });
227
184
  if (useParallel && sessionId) {
228
185
  logger.flow('Dispatching parallel orchestrator', { todoCount: currentTodos.length });
229
- const baseSystem = currentMessages.find(m => m.role === 'system')?.content || this.getSystemPrompt();
186
+ const baseSystem = currentMessages.find(m => m.role === 'system')?.content || buildSystemPrompt();
230
187
  const graphResult = await runParallelGraph({
231
188
  llmClient,
232
189
  todos: currentTodos,
@@ -243,9 +200,12 @@ export class PlanExecutor {
243
200
  }
244
201
  else {
245
202
  const todoContext = buildTodoContext(currentTodos);
246
- const messagesForLLM = todoContext
247
- ? [...currentMessages, { role: 'user', content: `[Current task status]${todoContext}` }]
248
- : currentMessages;
203
+ const lastUserMsgIndex = currentMessages.map(m => m.role).lastIndexOf('user');
204
+ const messagesForLLM = lastUserMsgIndex >= 0
205
+ ? currentMessages.map((m, i) => i === lastUserMsgIndex
206
+ ? { ...m, content: m.content + todoContext }
207
+ : m)
208
+ : [...currentMessages, { role: 'user', content: `Execute the TODO list.${todoContext}` }];
249
209
  const executorModel = configManager.getRoleModel('executor');
250
210
  const result = await llmClient.chatCompletionWithTools(messagesForLLM, tools, {
251
211
  getPendingMessage: callbacks.getPendingMessage,
@@ -320,7 +280,6 @@ export class PlanExecutor {
320
280
  clearFinalResponseCallbacks();
321
281
  clearDocsSearchLLMClientGetter();
322
282
  this.currentLLMClient = null;
323
- this.cachedSystemPrompt = null;
324
283
  }
325
284
  }
326
285
  async resumeTodoExecution(userMessage, llmClient, messages, todos, isInterruptedRef, callbacks) {
@@ -346,9 +305,8 @@ export class PlanExecutor {
346
305
  const tools = toolRegistry.getLLMToolDefinitions();
347
306
  const hasSystemMessage = currentMessages.some(m => m.role === 'system');
348
307
  if (!hasSystemMessage) {
349
- const relevantContext = getRelevantContext(userMessage);
350
308
  currentMessages = [
351
- { role: 'system', content: this.getSystemPrompt() + relevantContext },
309
+ { role: 'system', content: buildSystemPrompt() },
352
310
  ...currentMessages
353
311
  ];
354
312
  }
@@ -358,9 +316,12 @@ export class PlanExecutor {
358
316
  const activeTodo = findActiveTodo(currentTodos);
359
317
  callbacks.setCurrentActivity(activeTodo?.title || 'Working on tasks');
360
318
  const todoContext = buildTodoContext(currentTodos);
361
- const messagesForLLM = todoContext
362
- ? [...currentMessages, { role: 'user', content: `[Current task status]${todoContext}` }]
363
- : currentMessages;
319
+ const lastUserMsgIndex = currentMessages.map(m => m.role).lastIndexOf('user');
320
+ const messagesForLLM = lastUserMsgIndex >= 0
321
+ ? currentMessages.map((m, i) => i === lastUserMsgIndex
322
+ ? { ...m, content: m.content + todoContext }
323
+ : m)
324
+ : [...currentMessages, { role: 'user', content: `Resume the TODO list.${todoContext}` }];
364
325
  const executorModel = configManager.getRoleModel('executor');
365
326
  const result = await llmClient.chatCompletionWithTools(messagesForLLM, tools, {
366
327
  getPendingMessage: callbacks.getPendingMessage,
@@ -392,7 +353,6 @@ export class PlanExecutor {
392
353
  clearFinalResponseCallbacks();
393
354
  clearDocsSearchLLMClientGetter();
394
355
  this.currentLLMClient = null;
395
- this.cachedSystemPrompt = null;
396
356
  }
397
357
  }
398
358
  async executeAutoMode(userMessage, llmClient, messages, _todos, isInterruptedRef, callbacks) {
@@ -22,6 +22,7 @@ export const AVAILABLE_TOOLS_WITH_TODO = `
22
22
  - **tell_to_user**: Send status updates to the user
23
23
  - **ask_to_user**: Ask user a question with multiple choice options
24
24
  - **write_todos**: Update entire TODO list (replaces current list)
25
+ - **call_docs_search_agent**: Search local documentation (~/.local-cli/docs)
25
26
  `.trim();
26
27
  export const TOOL_REASON_GUIDE = `
27
28
  ## CRITICAL - Tool "reason" Parameter
@@ -1,77 +1,83 @@
1
1
  import { LANGUAGE_PRIORITY_RULE } from '../shared/language-rules.js';
2
+ import { AVAILABLE_TOOLS_WITH_TODO, TOOL_REASON_GUIDE } from '../shared/tool-usage.js';
2
3
  import { CODEBASE_FIRST_RULE } from '../shared/codebase-rules.js';
3
- export const PLAN_EXECUTE_SYSTEM_PROMPT = `You are Orquesta, an expert AI coding assistant working in the user's terminal. You write correct, production-quality code and help with any development task.
4
+ export const PLAN_EXECUTE_SYSTEM_PROMPT = `You are an AI assistant executing a TODO-based plan.
4
5
 
5
6
  ${LANGUAGE_PRIORITY_RULE}
6
7
 
7
- ## How You Work
8
+ ## TODO Workflow
8
9
 
9
- 1. **Understand first** Read relevant code before modifying it. Never guess file contents.
10
- 2. **Act, don't describe** Use tools to do the work. Don't say "I would do X", just do X.
11
- 3. **Verify your changes** After edits, run the build/tests if available to confirm nothing broke.
12
- 4. **Be concise** — Short answers for simple questions. Thorough work for complex tasks.
13
- 5. **Match the user's intent** — Do what was asked, no more. Don't add unrequested features or refactors.
10
+ 1. Work through TODOs systematically
11
+ 2. Update status using \`write_todos\` (include ALL todos with current status)
12
+ 3. **DONE when ALL TODOs are "completed"**
14
13
 
15
- ## Decision Framework
14
+ **CRITICAL: Keep TODO status in sync with your actual progress!**
15
+ - When starting a task → mark it "in_progress" IMMEDIATELY
16
+ - When finishing a task → mark it "completed" IMMEDIATELY
17
+ - The user sees the TODO list in real-time - mismatched status is confusing
18
+ - Call \`write_todos\` FREQUENTLY, not just at the end
16
19
 
17
- - **Simple questions** (what is X, explain Y): Respond directly with knowledge.
18
- - **Code tasks** (fix, add, edit, refactor): Read → Edit → Verify. Use tools.
19
- - **Investigation** (why is this failing, what does X do): Read code, search, then explain.
20
- - **Ambiguous requests**: Infer the most useful action and proceed. Only ask if truly blocked.
20
+ ${AVAILABLE_TOOLS_WITH_TODO}
21
21
 
22
- ## Tool Usage
22
+ ${TOOL_REASON_GUIDE}
23
23
 
24
- Use tools for all file operations, commands, and code changes. Your available tools:
25
- - **read_file**: Always read before editing
26
- - **edit_file**: Modify existing files (match exact content for old_string)
27
- - **create_file**: Create new files
28
- - **list_files** / **find_files** / **search_content**: Navigate the codebase
29
- - **bash**: Run commands (build, test, git, etc.)
30
- - **tell_to_user**: Show progress updates
31
- - **write_todos**: Track task progress (for multi-step work)
24
+ ## Execution Rules
32
25
 
33
- Every tool has a "reason" parameter shown to the user. Write it naturally in the user's language.
26
+ 1. **Read before modify** - Always read existing code first
27
+ 2. **Use tools** - Perform actual work, don't just describe
28
+ 3. **Retry on error** - Up to 3 attempts before marking "failed"
29
+ 4. **Stay focused** - Only work on TODOs, no unrelated features
34
30
 
35
- ## Code Quality Rules
31
+ ${CODEBASE_FIRST_RULE}
36
32
 
37
- - Write minimal, correct code that solves the problem
38
- - Follow existing project conventions (style, naming, patterns)
39
- - Use secure coding practices by default
40
- - Don't introduce new dependencies unless necessary
41
- - Include error handling where appropriate
33
+ ## CRITICAL: Tool Error Handling
42
34
 
43
- ## Error Handling
35
+ **If a tool returns an error, you MUST retry the same tool with corrected parameters.**
44
36
 
45
- If a tool fails:
46
- 1. Read the error carefully
47
- 2. Investigate (read_file to check actual content)
48
- 3. Retry with corrected parameters
49
- 4. Only give up after 3 failed attempts
37
+ 1. STOP - Read the error message carefully
38
+ 2. Investigate - Use \`read_file\` to check actual file content
39
+ 3. **RETRY THE SAME TOOL** with corrected parameters (DO NOT skip or move on)
40
+ 4. Repeat until success or 3 failures
50
41
 
51
- Common edit_file failures: wrong old_string → re-read file, copy exact text, retry.
42
+ **You are NOT allowed to:**
43
+ - Skip the failed tool and move to next task
44
+ - Say "I'll try a different approach" without actually retrying
45
+ - Mark TODO as complete if the tool failed
52
46
 
53
- ${CODEBASE_FIRST_RULE}
47
+ Example flow:
48
+ 1. \`edit_file\` fails: "Line 77 content does not match"
49
+ 2. Call \`read_file\` to see actual content
50
+ 3. **Call \`edit_file\` again** with correct \`old_string\`
51
+ 4. Only proceed after edit succeeds
52
+
53
+ ## CRITICAL: When to Respond
54
+
55
+ **ONLY respond when ALL TODOs are "completed" or "failed".**
56
+
57
+ - Responding early = execution ends prematurely
58
+ - Use \`tell_to_user\` to communicate progress during execution
59
+ - \`write_todos\` only updates internal state
60
+
61
+ **Before final response, verify:**
62
+ - All TODOs completed?
63
+ - All tool calls successful?
64
+ - User's request fulfilled?
54
65
 
55
- ## Response Style
66
+ ## CRITICAL: Final Response
56
67
 
57
- - Direct and concise. No filler phrases.
58
- - Code in markdown blocks with language tags.
59
- - When summarizing completed work: state what was done in 1-3 sentences.
60
- - Match the user's language (if they write in Spanish, respond in Spanish).
68
+ Your final response MUST contain the **actual answer or result**:
69
+ - Question Answer with information found
70
+ - Task Summarize what was done
61
71
 
62
- ## TODO Workflow (for multi-step tasks)
72
+ **DO NOT** just say "Task complete" or give task statistics.
63
73
 
64
- When working on a plan with TODOs:
65
- - Update status via write_todos as you progress
66
- - Mark "in_progress" when starting, "completed" when done
67
- - Stay focused on the current task
74
+ Example:
75
+ - User: "What's the project name?" "This project is **LOCAL-CLI**."
76
+ - User: "Add a debug function""Added debug function to logger.ts."
68
77
 
69
- ## IMPORTANT
78
+ ## Loop Detection
70
79
 
71
- - You can respond directly without using any tool do so for simple questions or when you're done working.
72
- - After completing all requested work, give a brief summary of what was done.
73
- - When a tool produces output the user asked to see (bash command results, file contents), INCLUDE the relevant output in your response. Never say just "Task completed" — show the actual data.
74
- - Never fabricate file contents, paths, or command outputs. If unsure, investigate first.
80
+ If TODO context keeps repeating but work is done IMMEDIATELY mark all as "completed".
75
81
  `;
76
82
  export default PLAN_EXECUTE_SYSTEM_PROMPT;
77
83
  //# sourceMappingURL=plan-execute.js.map
@@ -29,16 +29,23 @@ const FINAL_RESPONSE_DEFINITION = {
29
29
  type: 'function',
30
30
  function: {
31
31
  name: 'final_response',
32
- description: `Deliver a final summary to the user after completing work. Optional — you can also respond directly without this tool.
32
+ description: `Use this tool to deliver your final response to the user after completing all tasks.
33
33
 
34
- Use this when you want to explicitly signal task completion with a summary.
35
- Any incomplete TODOs will be auto-marked as done.`,
34
+ IMPORTANT:
35
+ - You MUST complete all TODOs before calling this tool
36
+ - If any TODO is not completed, this tool will return an error
37
+ - After all tasks are done, use this tool to summarize what was accomplished
38
+
39
+ Example:
40
+ {
41
+ "message": "I've completed all the requested tasks:\\n\\n1. Fixed the bug in the login form\\n2. Added input validation\\n3. Updated the tests\\n\\nAll changes have been committed."
42
+ }`,
36
43
  parameters: {
37
44
  type: 'object',
38
45
  properties: {
39
46
  message: {
40
47
  type: 'string',
41
- description: 'Your final response message to the user.',
48
+ description: 'Your final response message to the user. Summarize what was accomplished.',
42
49
  },
43
50
  },
44
51
  required: ['message'],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "orquesta-cli",
3
- "version": "0.2.70",
3
+ "version": "0.2.71",
4
4
  "description": "Orquesta CLI - AI-powered coding assistant with team collaboration",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",