npm - orquesta-cli - Versions diffs - 0.2.69 → 0.2.71 - Mend

orquesta-cli 0.2.69 → 0.2.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/core/llm/llm-client.d.ts +0 -1
package/dist/core/llm/llm-client.js +48 -64
package/dist/orchestration/plan-executor.d.ts +0 -2
package/dist/orchestration/plan-executor.js +73 -113
package/dist/prompts/shared/tool-usage.js +1 -0
package/dist/prompts/system/plan-execute.js +57 -51
package/dist/tools/llm/simple/final-response-tool.js +11 -4
package/package.json +1 -1

package/dist/core/llm/llm-client.d.ts CHANGED Viewed

@@ -1,5 +1,4 @@
 import { Message, LLMRequestOptions } from '../../types/index.js';
-export declare function fetchWithRetry(url: string, options: RequestInit, maxRetries?: number): Promise<Response>;
 export interface LLMResponse {
     id: string;
     object: string;

package/dist/core/llm/llm-client.js CHANGED Viewed

@@ -100,20 +100,6 @@ function captureBatutaHeaders(headers) {
         setLastBatutaRoute({ tier, routedTo, routedFrom });
     }
 }
-export async function fetchWithRetry(url, options, maxRetries = 3) {
-    for (let attempt = 0; attempt <= maxRetries; attempt++) {
-        const res = await fetch(url, options);
-        if (res.ok || attempt === maxRetries)
-            return res;
-        if (res.status === 429 || res.status >= 500) {
-            const delay = Math.min(1000 * 2 ** attempt, 30000);
-            await new Promise(r => setTimeout(r, delay));
-            continue;
-        }
-        return res;
-    }
-    throw new Error('Unreachable');
-}
 export class LLMClient {
     axiosInstance;
     baseUrl;
@@ -152,33 +138,29 @@ export class LLMClient {
         });
     }
     preprocessMessages(messages, modelId) {
-        const isGptOss = /^gpt-oss-(120b|20b)$/i.test(modelId);
         return messages.map((msg) => {
-            const multimodal = msg.multimodal;
+            let processedMsg = { ...msg };
+            const multimodal = processedMsg.multimodal;
             if (multimodal && Array.isArray(multimodal)) {
-                const processedMsg = { ...msg };
                 processedMsg.content = multimodal;
                 delete processedMsg.multimodal;
                 return processedMsg;
             }
             if (msg.role !== 'assistant') {
-                return msg;
-            }
-            const msgAny = msg;
-            const needsReasoningFix = msgAny.reasoning_content && (!msg.content || msg.content.trim() === '');
-            const needsHarmonyFix = isGptOss && msg.tool_calls && msg.tool_calls.length > 0 && (!msg.content || msg.content.trim() === '');
-            const needsNullFix = msg.content === undefined || msg.content === null;
-            if (!needsReasoningFix && !needsHarmonyFix && !needsNullFix) {
-                return msg;
+                return processedMsg;
             }
-            const processedMsg = { ...msg };
-            if (needsReasoningFix) {
+            const msgAny = processedMsg;
+            if (msgAny.reasoning_content && (!msg.content || msg.content.trim() === '')) {
                 processedMsg.content = msgAny.reasoning_content;
                 delete processedMsg.reasoning_content;
             }
-            if (needsHarmonyFix) {
-                const toolNames = msg.tool_calls.map(tc => tc.function.name).join(', ');
-                processedMsg.content = msgAny.reasoning || `Calling tools: ${toolNames}`;
+            if (/^gpt-oss-(120b|20b)$/i.test(modelId)) {
+                if (msg.tool_calls && msg.tool_calls.length > 0) {
+                    if (!processedMsg.content || processedMsg.content.trim() === '') {
+                        const toolNames = msg.tool_calls.map(tc => tc.function.name).join(', ');
+                        processedMsg.content = msgAny.reasoning || `Calling tools: ${toolNames}`;
+                    }
+                }
             }
             if (processedMsg.content === undefined || processedMsg.content === null) {
                 processedMsg.content = '';
@@ -202,26 +184,13 @@ export class LLMClient {
             const modelId = options.model || this.model;
             const processedMessages = options.messages ?
                 this.preprocessMessages(options.messages, modelId) : [];
-            logger.vars({ name: 'modelId', value: modelId }, { name: 'originalMessages', value: options.messages?.length || 0 }, { name: 'processedMessages', value: processedMessages.length }, { name: 'temperature', value: options.temperature ?? 0 });
-            let systemCached = false;
-            for (const msg of processedMessages) {
-                if (!systemCached && msg.role === 'system') {
-                    msg.cache_control = { type: 'ephemeral' };
-                    systemCached = true;
-                }
-                else if (msg.role === 'user' && typeof msg.content === 'string' && msg.content.length > 2000) {
-                    msg.cache_control = { type: 'ephemeral' };
-                }
-            }
-            const isClaudeModel = /claude|sonnet|opus|haiku/i.test(modelId);
-            const supportsThinking = isClaudeModel && /anthropic|getorquesta|batuta/i.test(this.baseUrl);
+            logger.vars({ name: 'modelId', value: modelId }, { name: 'originalMessages', value: options.messages?.length || 0 }, { name: 'processedMessages', value: processedMessages.length }, { name: 'temperature', value: options.temperature ?? 0.7 });
             const requestBody = {
                 model: modelId,
                 messages: processedMessages,
                 temperature: options.temperature ?? 0,
                 max_tokens: options.max_tokens,
                 stream: false,
-                ...(supportsThinking && { thinking: { type: 'enabled', budget_tokens: 10000 } }),
                 ...(options.tools && {
                     tools: options.tools,
                     parallel_tool_calls: false,
@@ -482,15 +451,12 @@ export class LLMClient {
             const modelId = options.model || this.model;
             const processedMessages = options.messages ?
                 this.preprocessMessages(options.messages, modelId) : [];
-            const isClaudeModel = /claude|sonnet|opus|haiku/i.test(modelId);
-            const supportsThinking = isClaudeModel && /anthropic|getorquesta|batuta/i.test(this.baseUrl);
             const requestBody = {
                 model: modelId,
                 messages: processedMessages,
                 temperature: options.temperature ?? 0,
                 max_tokens: options.max_tokens,
                 stream: true,
-                ...(supportsThinking && { thinking: { type: 'enabled', budget_tokens: 10000 } }),
                 ...(options.tools && {
                     tools: options.tools,
                     ...(options.tool_choice && { tool_choice: options.tool_choice }),
@@ -629,9 +595,10 @@ export class LLMClient {
         const toolCallHistory = [];
         let iterations = 0;
         let contextLengthRecoveryAttempted = false;
+        let noToolCallRetries = 0;
         let finalResponseFailures = 0;
+        const MAX_NO_TOOL_CALL_RETRIES = 3;
         const MAX_FINAL_RESPONSE_FAILURES = 3;
-        const { executeFileTool, requestToolApproval, emitAssistantResponse } = await import('../../tools/llm/simple/file-tools.js');
         const recentToolSignatures = [];
         const recentNormalizedSignatures = [];
         const LOOP_WINDOW = 5;
@@ -657,7 +624,7 @@ export class LLMClient {
                 response = await this.chatCompletion({
                     messages: workingMessages,
                     tools,
-                    tool_choice: 'auto',
+                    tool_choice: 'required',
                     ...(roleModel ? { model: roleModel } : {}),
                 });
             }
@@ -708,11 +675,6 @@ export class LLMClient {
                 throw new Error('Cannot find choice in response.');
             }
             const assistantMessage = choice.message;
-            const reasoning = assistantMessage.reasoning_content;
-            if (reasoning && typeof reasoning === 'string' && reasoning.trim()) {
-                const { emitReasoning } = await import('../../tools/llm/simple/file-tools.js');
-                emitReasoning(reasoning.trim());
-            }
             if ((!assistantMessage.tool_calls || assistantMessage.tool_calls.length === 0) &&
                 typeof assistantMessage.content === 'string') {
                 const coerced = coerceSyntheticToolCalls(assistantMessage.content);
@@ -776,6 +738,7 @@ export class LLMClient {
                         });
                         continue;
                     }
+                    const { executeFileTool, requestToolApproval } = await import('../../tools/llm/simple/file-tools.js');
                     const approvalResult = await requestToolApproval(toolName, toolArgs);
                     if (approvalResult && typeof approvalResult === 'object' && approvalResult.reject) {
                         logger.flow(`Tool rejected by user: ${toolName}`);
@@ -830,6 +793,7 @@ export class LLMClient {
                                 if (finalResponseFailures >= MAX_FINAL_RESPONSE_FAILURES) {
                                     logger.warn('Max final_response failures exceeded - forcing completion');
                                     const fallbackMessage = toolArgs['message'] || 'Task completed with incomplete TODOs.';
+                                    const { emitAssistantResponse } = await import('../../tools/llm/simple/file-tools.js');
                                     emitAssistantResponse(fallbackMessage);
                                     return {
                                         message: { role: 'assistant', content: fallbackMessage },
@@ -869,14 +833,34 @@ export class LLMClient {
                 continue;
             }
             else {
-                const finalContent = assistantMessage.content || 'Task completed.';
-                const { emitAssistantResponse } = await import('../../tools/llm/simple/file-tools.js');
-                emitAssistantResponse(finalContent);
-                return {
-                    message: { role: 'assistant', content: finalContent },
-                    toolCalls: toolCallHistory,
-                    allMessages: workingMessages,
-                };
+                noToolCallRetries++;
+                logger.flow(`No tool call - enforcing tool usage (attempt ${noToolCallRetries}/${MAX_NO_TOOL_CALL_RETRIES})`);
+                if (noToolCallRetries > MAX_NO_TOOL_CALL_RETRIES) {
+                    logger.warn('Max no-tool-call retries exceeded - returning content as final response');
+                    const fallbackContent = assistantMessage.content || 'Task completed.';
+                    const { emitAssistantResponse } = await import('../../tools/llm/simple/file-tools.js');
+                    emitAssistantResponse(fallbackContent);
+                    return {
+                        message: { role: 'assistant', content: fallbackContent },
+                        toolCalls: toolCallHistory,
+                        allMessages: workingMessages,
+                    };
+                }
+                const hasMalformedToolCall = assistantMessage.content &&
+                    (/<tool_call>/i.test(assistantMessage.content) ||
+                        /<arg_key>/i.test(assistantMessage.content) ||
+                        /<arg_value>/i.test(assistantMessage.content) ||
+                        /<\/tool_call>/i.test(assistantMessage.content) ||
+                        /bash<arg_key>/i.test(assistantMessage.content));
+                const retryMessage = hasMalformedToolCall
+                    ? 'Your previous response contained a malformed tool call (XML tags in content). You MUST use the proper tool_calls API format. Use final_response tool to deliver your message to the user.'
+                    : 'You must use tools for all actions. Use final_response tool to deliver your final message to the user after completing all tasks.';
+                workingMessages.push({
+                    role: 'user',
+                    content: retryMessage,
+                });
+                logger.debug('Enforcing tool call - added retry message');
+                continue;
             }
         }
     }
@@ -931,13 +915,13 @@ export class LLMClient {
                     errorMessage,
                     errorType,
                     errorCode,
-                    responseBody: typeof data === 'string' ? data.slice(0, 2000) : JSON.stringify(data, null, 2)?.slice(0, 2000),
+                    responseBody: JSON.stringify(data, null, 2),
                     requestMethod: requestContext?.method,
                     requestUrl: requestContext?.url,
                     requestBody: requestContext?.body
                         ? JSON.stringify(requestContext.body, null, 2).substring(0, 5000)
                         : undefined,
-                    responseHeaders: Object.fromEntries(Object.entries(axiosError.response.headers || {}).filter(([, v]) => typeof v === 'string' || typeof v === 'number')),
+                    responseHeaders: axiosError.response.headers,
                 });
                 logger.httpResponse(status, axiosError.response.statusText, data);
                 if (errorType === 'invalid_request_error' &&

package/dist/orchestration/plan-executor.d.ts CHANGED Viewed

@@ -5,9 +5,7 @@ import type { StateCallbacks } from './types.js';
 export declare function setAppendedSystemPrompt(text: string): void;
 export declare class PlanExecutor {
     private currentLLMClient;
-    private cachedSystemPrompt;
     constructor();
-    private getSystemPrompt;
     executePlanMode(userMessage: string, llmClient: LLMClient, messages: Message[], isInterruptedRef: {
         current: boolean;
     }, callbacks: StateCallbacks): Promise<void>;

package/dist/orchestration/plan-executor.js CHANGED Viewed

@@ -16,7 +16,6 @@ import { GIT_COMMIT_RULES } from '../prompts/shared/git-rules.js';
 import { logger } from '../utils/logger.js';
 import { getStreamLogger } from '../utils/json-stream-logger.js';
 import { detectGitRepo } from '../utils/git-utils.js';
-import { getRelevantContext } from '../core/embeddings-context.js';
 import { formatErrorMessage, buildTodoContext, findActiveTodo, getTodoStats } from './utils.js';
 import { BaseError } from '../errors/base.js';
 import { runParallelGraph, shouldUseParallelOrchestrator } from './parallel-orchestrator.js';
@@ -39,20 +38,10 @@ function buildSystemPrompt() {
     const appended = appendedSystemPrompt ? `\n\n${appendedSystemPrompt}` : '';
     return base + buildEnvironmentContext() + projectContext + getMemoryPrompt() + getGitContextPrompt() + appended;
 }
-function buildLightSystemPrompt() {
-    return `You are Orquesta, an AI coding assistant. Respond concisely and naturally. Match the user's language.` + getMemoryPrompt();
-}
 export class PlanExecutor {
     currentLLMClient = null;
-    cachedSystemPrompt = null;
     constructor() {
     }
-    getSystemPrompt() {
-        if (!this.cachedSystemPrompt) {
-            this.cachedSystemPrompt = buildSystemPrompt();
-        }
-        return this.cachedSystemPrompt;
-    }
     async executePlanMode(userMessage, llmClient, messages, isInterruptedRef, callbacks) {
         const planningStartTime = Date.now();
         const streamLogger = getStreamLogger();
@@ -93,112 +82,80 @@ export class PlanExecutor {
                 throw new Error('INTERRUPTED');
             }
             let currentMessages = messages;
-            const COMPLEX_PATTERNS = /\b(and then|after that|first.*then|step \d|multiple|several|refactor.*entire|migrate|rewrite.*all|crea.*app|create.*app|build.*app|construye|implement|set up|setup|scaffold|initialize|init.*project)\b/i;
-            const isSimpleTask = userMessage.length < 200 && !COMPLEX_PATTERNS.test(userMessage);
-            const isConversational = userMessage.length < 100 &&
-                /^(ping|hi|hello|hola|hey|thanks|ok|si|yes|no|que|how|what|why|when|who)\b/i.test(userMessage);
-            if (isSimpleTask) {
-                logger.flow('Simple task detected — skipping planner, executor will handle directly');
-                streamLogger?.logPlanningEnd(0, [], false, 0);
-                const lastMsg = currentMessages[currentMessages.length - 1];
-                if (!(lastMsg?.role === 'user' && lastMsg?.content === userMessage)) {
-                    currentMessages = [...currentMessages, { role: 'user', content: userMessage }];
-                }
+            callbacks.setCurrentActivity('Thinking');
+            const plannerModel = configManager.getRoleModel('planner');
+            const planningLLM = new PlanningLLM(llmClient, plannerModel ?? undefined);
+            const plannerStartedAt = Date.now();
+            if (callbacks.askUser) {
+                planningLLM.setAskUserCallback(callbacks.askUser);
             }
-            else {
-                callbacks.setCurrentActivity('Thinking');
-                const plannerModel = configManager.getRoleModel('planner');
-                const planningLLM = new PlanningLLM(llmClient, plannerModel ?? undefined);
-                const plannerStartedAt = Date.now();
-                if (callbacks.askUser) {
-                    planningLLM.setAskUserCallback(callbacks.askUser);
-                }
-                const planResult = await planningLLM.generateTODOListWithDocsDecision(userMessage, currentMessages);
-                auditLog.emit(auditSid, 'planner.complete', {
-                    runId,
-                    model: plannerModel,
-                    durationMs: Date.now() - plannerStartedAt,
-                    todoCount: planResult.todos.length,
-                    directResponse: !!planResult.directResponse,
+            const planResult = await planningLLM.generateTODOListWithDocsDecision(userMessage, currentMessages);
+            auditLog.emit(auditSid, 'planner.complete', {
+                runId,
+                model: plannerModel,
+                durationMs: Date.now() - plannerStartedAt,
+                todoCount: planResult.todos.length,
+                directResponse: !!planResult.directResponse,
+            });
+            if (planResult.clarificationMessages?.length) {
+                currentMessages = [...currentMessages, ...planResult.clarificationMessages];
+                callbacks.setMessages([...currentMessages]);
+                logger.flow('Added planning clarification messages to history', {
+                    count: planResult.clarificationMessages.length,
                 });
-                if (planResult.clarificationMessages?.length) {
-                    currentMessages = [...currentMessages, ...planResult.clarificationMessages];
-                    callbacks.setMessages([...currentMessages]);
-                    logger.flow('Added planning clarification messages to history', {
-                        count: planResult.clarificationMessages.length,
-                    });
-                }
-                if (planResult.directResponse) {
-                    logger.flow('Direct response - no execution needed');
-                    streamLogger?.logPlanningEnd(0, [], true, Date.now() - planningStartTime);
-                    const lastMsg = currentMessages[currentMessages.length - 1];
-                    const needsUserMessage = !(lastMsg?.role === 'user' && lastMsg?.content === userMessage);
-                    const updatedMessages = needsUserMessage
-                        ? [
-                            ...currentMessages,
-                            { role: 'user', content: userMessage },
-                            { role: 'assistant', content: planResult.directResponse }
-                        ]
-                        : [
-                            ...currentMessages,
-                            { role: 'assistant', content: planResult.directResponse }
-                        ];
-                    emitAssistantResponse(planResult.directResponse);
-                    callbacks.setMessages([...updatedMessages]);
-                    sessionManager.autoSaveCurrentSession(updatedMessages);
-                    callbacks.setExecutionPhase('idle');
-                    logger.exit('PlanExecutor.executePlanMode', { directResponse: true });
-                    return;
-                }
-                currentTodos = planResult.todos;
-                streamLogger?.logPlanningEnd(currentTodos.length, currentTodos.map(t => ({ id: t.id, title: t.title, status: t.status })), false, Date.now() - planningStartTime);
-                logger.vars({ name: 'todoCount', value: currentTodos.length }, { name: 'docsSearchNeeded', value: planResult.docsSearchNeeded });
-                callbacks.setTodos(currentTodos);
-                emitPlanCreated(currentTodos.map(t => t.title));
-                const planMessage = planResult.docsSearchNeeded
-                    ? `🎼 Created ${currentTodos.length} tasks (including docs search). Starting execution...`
-                    : `🎼 Created ${currentTodos.length} tasks. Starting execution...`;
-                const lastMsgForPlan = currentMessages[currentMessages.length - 1];
-                const needsUserMessageForPlan = !(lastMsgForPlan?.role === 'user' && lastMsgForPlan?.content === userMessage);
-                currentMessages = needsUserMessageForPlan
+            }
+            if (planResult.directResponse) {
+                logger.flow('Direct response - no execution needed');
+                streamLogger?.logPlanningEnd(0, [], true, Date.now() - planningStartTime);
+                const lastMsg = currentMessages[currentMessages.length - 1];
+                const needsUserMessage = !(lastMsg?.role === 'user' && lastMsg?.content === userMessage);
+                const updatedMessages = needsUserMessage
                     ? [
                         ...currentMessages,
                         { role: 'user', content: userMessage },
-                        { role: 'assistant', content: planMessage }
+                        { role: 'assistant', content: planResult.directResponse }
                     ]
                     : [
                         ...currentMessages,
-                        { role: 'assistant', content: planMessage }
+                        { role: 'assistant', content: planResult.directResponse }
                     ];
-                callbacks.setMessages(currentMessages);
+                emitAssistantResponse(planResult.directResponse);
+                callbacks.setMessages([...updatedMessages]);
+                sessionManager.autoSaveCurrentSession(updatedMessages);
+                callbacks.setExecutionPhase('idle');
+                logger.exit('PlanExecutor.executePlanMode', { directResponse: true });
+                return;
             }
+            currentTodos = planResult.todos;
+            streamLogger?.logPlanningEnd(currentTodos.length, currentTodos.map(t => ({ id: t.id, title: t.title, status: t.status })), false, Date.now() - planningStartTime);
+            logger.vars({ name: 'todoCount', value: currentTodos.length }, { name: 'docsSearchNeeded', value: planResult.docsSearchNeeded });
+            callbacks.setTodos(currentTodos);
+            emitPlanCreated(currentTodos.map(t => t.title));
+            const planMessage = planResult.docsSearchNeeded
+                ? `📋 Created ${currentTodos.length} tasks (including docs search). Starting execution...`
+                : `📋 Created ${currentTodos.length} tasks. Starting execution...`;
+            const lastMsgForPlan = currentMessages[currentMessages.length - 1];
+            const needsUserMessageForPlan = !(lastMsgForPlan?.role === 'user' && lastMsgForPlan?.content === userMessage);
+            currentMessages = needsUserMessageForPlan
+                ? [
+                    ...currentMessages,
+                    { role: 'user', content: userMessage },
+                    { role: 'assistant', content: planMessage }
+                ]
+                : [
+                    ...currentMessages,
+                    { role: 'assistant', content: planMessage }
+                ];
+            callbacks.setMessages(currentMessages);
             this.setupTodoCallbacks(currentTodos, callbacks, (updated) => {
                 currentTodos = updated;
             });
             callbacks.setExecutionPhase('executing');
-            const allTools = toolRegistry.getLLMToolDefinitions();
-            let tools;
-            if (isConversational) {
-                tools = [];
-            }
-            else if (currentTodos.length === 0) {
-                const coreTools = new Set(['read_file', 'create_file', 'edit_file', 'list_files', 'find_files', 'search_content', 'bash', 'tell_to_user']);
-                tools = allTools.filter((t) => coreTools.has(t.function?.name));
-            }
-            else {
-                tools = allTools;
-            }
-            const systemContent = isConversational
-                ? buildLightSystemPrompt()
-                : this.getSystemPrompt() + getRelevantContext(userMessage);
-            const sysIdx = currentMessages.findIndex(m => m.role === 'system');
-            if (sysIdx >= 0) {
-                currentMessages = [...currentMessages];
-                currentMessages[sysIdx] = { role: 'system', content: systemContent };
-            }
-            else {
+            const tools = toolRegistry.getLLMToolDefinitions();
+            const hasSystemMessage = currentMessages.some(m => m.role === 'system');
+            if (!hasSystemMessage) {
                 currentMessages = [
-                    { role: 'system', content: systemContent },
+                    { role: 'system', content: buildSystemPrompt() },
                     ...currentMessages
                 ];
             }
@@ -226,7 +183,7 @@ export class PlanExecutor {
             });
             if (useParallel && sessionId) {
                 logger.flow('Dispatching parallel orchestrator', { todoCount: currentTodos.length });
-                const baseSystem = currentMessages.find(m => m.role === 'system')?.content || this.getSystemPrompt();
+                const baseSystem = currentMessages.find(m => m.role === 'system')?.content || buildSystemPrompt();
                 const graphResult = await runParallelGraph({
                     llmClient,
                     todos: currentTodos,
@@ -243,9 +200,12 @@ export class PlanExecutor {
             }
             else {
                 const todoContext = buildTodoContext(currentTodos);
-                const messagesForLLM = todoContext
-                    ? [...currentMessages, { role: 'user', content: `[Current task status]${todoContext}` }]
-                    : currentMessages;
+                const lastUserMsgIndex = currentMessages.map(m => m.role).lastIndexOf('user');
+                const messagesForLLM = lastUserMsgIndex >= 0
+                    ? currentMessages.map((m, i) => i === lastUserMsgIndex
+                        ? { ...m, content: m.content + todoContext }
+                        : m)
+                    : [...currentMessages, { role: 'user', content: `Execute the TODO list.${todoContext}` }];
                 const executorModel = configManager.getRoleModel('executor');
                 const result = await llmClient.chatCompletionWithTools(messagesForLLM, tools, {
                     getPendingMessage: callbacks.getPendingMessage,
@@ -320,7 +280,6 @@ export class PlanExecutor {
             clearFinalResponseCallbacks();
             clearDocsSearchLLMClientGetter();
             this.currentLLMClient = null;
-            this.cachedSystemPrompt = null;
         }
     }
     async resumeTodoExecution(userMessage, llmClient, messages, todos, isInterruptedRef, callbacks) {
@@ -346,9 +305,8 @@ export class PlanExecutor {
             const tools = toolRegistry.getLLMToolDefinitions();
             const hasSystemMessage = currentMessages.some(m => m.role === 'system');
             if (!hasSystemMessage) {
-                const relevantContext = getRelevantContext(userMessage);
                 currentMessages = [
-                    { role: 'system', content: this.getSystemPrompt() + relevantContext },
+                    { role: 'system', content: buildSystemPrompt() },
                     ...currentMessages
                 ];
             }
@@ -358,9 +316,12 @@ export class PlanExecutor {
             const activeTodo = findActiveTodo(currentTodos);
             callbacks.setCurrentActivity(activeTodo?.title || 'Working on tasks');
             const todoContext = buildTodoContext(currentTodos);
-            const messagesForLLM = todoContext
-                ? [...currentMessages, { role: 'user', content: `[Current task status]${todoContext}` }]
-                : currentMessages;
+            const lastUserMsgIndex = currentMessages.map(m => m.role).lastIndexOf('user');
+            const messagesForLLM = lastUserMsgIndex >= 0
+                ? currentMessages.map((m, i) => i === lastUserMsgIndex
+                    ? { ...m, content: m.content + todoContext }
+                    : m)
+                : [...currentMessages, { role: 'user', content: `Resume the TODO list.${todoContext}` }];
             const executorModel = configManager.getRoleModel('executor');
             const result = await llmClient.chatCompletionWithTools(messagesForLLM, tools, {
                 getPendingMessage: callbacks.getPendingMessage,
@@ -392,7 +353,6 @@ export class PlanExecutor {
             clearFinalResponseCallbacks();
             clearDocsSearchLLMClientGetter();
             this.currentLLMClient = null;
-            this.cachedSystemPrompt = null;
         }
     }
     async executeAutoMode(userMessage, llmClient, messages, _todos, isInterruptedRef, callbacks) {

package/dist/prompts/shared/tool-usage.js CHANGED Viewed

@@ -22,6 +22,7 @@ export const AVAILABLE_TOOLS_WITH_TODO = `
 - **tell_to_user**: Send status updates to the user
 - **ask_to_user**: Ask user a question with multiple choice options
 - **write_todos**: Update entire TODO list (replaces current list)
+- **call_docs_search_agent**: Search local documentation (~/.local-cli/docs)
 `.trim();
 export const TOOL_REASON_GUIDE = `
 ## CRITICAL - Tool "reason" Parameter

package/dist/prompts/system/plan-execute.js CHANGED Viewed

@@ -1,77 +1,83 @@
 import { LANGUAGE_PRIORITY_RULE } from '../shared/language-rules.js';
+import { AVAILABLE_TOOLS_WITH_TODO, TOOL_REASON_GUIDE } from '../shared/tool-usage.js';
 import { CODEBASE_FIRST_RULE } from '../shared/codebase-rules.js';
-export const PLAN_EXECUTE_SYSTEM_PROMPT = `You are Orquesta, an expert AI coding assistant working in the user's terminal. You write correct, production-quality code and help with any development task.
+export const PLAN_EXECUTE_SYSTEM_PROMPT = `You are an AI assistant executing a TODO-based plan.
 ${LANGUAGE_PRIORITY_RULE}
-## How You Work
+## TODO Workflow
-1. **Understand first** — Read relevant code before modifying it. Never guess file contents.
-2. **Act, don't describe** — Use tools to do the work. Don't say "I would do X", just do X.
-3. **Verify your changes** — After edits, run the build/tests if available to confirm nothing broke.
-4. **Be concise** — Short answers for simple questions. Thorough work for complex tasks.
-5. **Match the user's intent** — Do what was asked, no more. Don't add unrequested features or refactors.
+1. Work through TODOs systematically
+2. Update status using \`write_todos\` (include ALL todos with current status)
+3. **DONE when ALL TODOs are "completed"**
-## Decision Framework
+**CRITICAL: Keep TODO status in sync with your actual progress!**
+- When starting a task → mark it "in_progress" IMMEDIATELY
+- When finishing a task → mark it "completed" IMMEDIATELY
+- The user sees the TODO list in real-time - mismatched status is confusing
+- Call \`write_todos\` FREQUENTLY, not just at the end
-- **Simple questions** (what is X, explain Y): Respond directly with knowledge.
-- **Code tasks** (fix, add, edit, refactor): Read → Edit → Verify. Use tools.
-- **Investigation** (why is this failing, what does X do): Read code, search, then explain.
-- **Ambiguous requests**: Infer the most useful action and proceed. Only ask if truly blocked.
+${AVAILABLE_TOOLS_WITH_TODO}
-## Tool Usage
+${TOOL_REASON_GUIDE}
-Use tools for all file operations, commands, and code changes. Your available tools:
-- **read_file**: Always read before editing
-- **edit_file**: Modify existing files (match exact content for old_string)
-- **create_file**: Create new files
-- **list_files** / **find_files** / **search_content**: Navigate the codebase
-- **bash**: Run commands (build, test, git, etc.)
-- **tell_to_user**: Show progress updates
-- **write_todos**: Track task progress (for multi-step work)
+## Execution Rules
-Every tool has a "reason" parameter shown to the user. Write it naturally in the user's language.
+1. **Read before modify** - Always read existing code first
+2. **Use tools** - Perform actual work, don't just describe
+3. **Retry on error** - Up to 3 attempts before marking "failed"
+4. **Stay focused** - Only work on TODOs, no unrelated features
-## Code Quality Rules
+${CODEBASE_FIRST_RULE}
-- Write minimal, correct code that solves the problem
-- Follow existing project conventions (style, naming, patterns)
-- Use secure coding practices by default
-- Don't introduce new dependencies unless necessary
-- Include error handling where appropriate
+## CRITICAL: Tool Error Handling
-## Error Handling
+**If a tool returns an error, you MUST retry the same tool with corrected parameters.**
-If a tool fails:
-1. Read the error carefully
-2. Investigate (read_file to check actual content)
-3. Retry with corrected parameters
-4. Only give up after 3 failed attempts
+1. STOP - Read the error message carefully
+2. Investigate - Use \`read_file\` to check actual file content
+3. **RETRY THE SAME TOOL** with corrected parameters (DO NOT skip or move on)
+4. Repeat until success or 3 failures
-Common edit_file failures: wrong old_string → re-read file, copy exact text, retry.
+**You are NOT allowed to:**
+- Skip the failed tool and move to next task
+- Say "I'll try a different approach" without actually retrying
+- Mark TODO as complete if the tool failed
-${CODEBASE_FIRST_RULE}
+Example flow:
+1. \`edit_file\` fails: "Line 77 content does not match"
+2. Call \`read_file\` to see actual content
+3. **Call \`edit_file\` again** with correct \`old_string\`
+4. Only proceed after edit succeeds
+## CRITICAL: When to Respond
+**ONLY respond when ALL TODOs are "completed" or "failed".**
+- Responding early = execution ends prematurely
+- Use \`tell_to_user\` to communicate progress during execution
+- \`write_todos\` only updates internal state
+**Before final response, verify:**
+- All TODOs completed?
+- All tool calls successful?
+- User's request fulfilled?
-## Response Style
+## CRITICAL: Final Response
-- Direct and concise. No filler phrases.
-- Code in markdown blocks with language tags.
-- When summarizing completed work: state what was done in 1-3 sentences.
-- Match the user's language (if they write in Spanish, respond in Spanish).
+Your final response MUST contain the **actual answer or result**:
+- Question → Answer with information found
+- Task → Summarize what was done
-## TODO Workflow (for multi-step tasks)
+**DO NOT** just say "Task complete" or give task statistics.
-When working on a plan with TODOs:
-- Update status via write_todos as you progress
-- Mark "in_progress" when starting, "completed" when done
-- Stay focused on the current task
+Example:
+- User: "What's the project name?" → "This project is **LOCAL-CLI**."
+- User: "Add a debug function" → "Added debug function to logger.ts."
-## IMPORTANT
+## Loop Detection
-- You can respond directly without using any tool — do so for simple questions or when you're done working.
-- After completing all requested work, give a brief summary of what was done.
-- When a tool produces output the user asked to see (bash command results, file contents), INCLUDE the relevant output in your response. Never say just "Task completed" — show the actual data.
-- Never fabricate file contents, paths, or command outputs. If unsure, investigate first.
+If TODO context keeps repeating but work is done → IMMEDIATELY mark all as "completed".
 `;
 export default PLAN_EXECUTE_SYSTEM_PROMPT;
 //# sourceMappingURL=plan-execute.js.map

package/dist/tools/llm/simple/final-response-tool.js CHANGED Viewed

@@ -29,16 +29,23 @@ const FINAL_RESPONSE_DEFINITION = {
     type: 'function',
     function: {
         name: 'final_response',
-        description: `Deliver a final summary to the user after completing work. Optional — you can also respond directly without this tool.
+        description: `Use this tool to deliver your final response to the user after completing all tasks.
-Use this when you want to explicitly signal task completion with a summary.
-Any incomplete TODOs will be auto-marked as done.`,
+IMPORTANT:
+- You MUST complete all TODOs before calling this tool
+- If any TODO is not completed, this tool will return an error
+- After all tasks are done, use this tool to summarize what was accomplished
+Example:
+{
+  "message": "I've completed all the requested tasks:\\n\\n1. Fixed the bug in the login form\\n2. Added input validation\\n3. Updated the tests\\n\\nAll changes have been committed."
+}`,
         parameters: {
             type: 'object',
             properties: {
                 message: {
                     type: 'string',
-                    description: 'Your final response message to the user.',
+                    description: 'Your final response message to the user. Summarize what was accomplished.',
                 },
             },
             required: ['message'],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "orquesta-cli",
-  "version": "0.2.69",
+  "version": "0.2.71",
   "description": "Orquesta CLI - AI-powered coding assistant with team collaboration",
   "type": "module",
   "main": "dist/index.js",