npm - @probelabs/probe - Versions diffs - 0.6.0-rc203 → 0.6.0-rc205 - Mend

@probelabs/probe 0.6.0-rc203 → 0.6.0-rc205

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/bin/binaries/probe-v0.6.0-rc205-aarch64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc205-aarch64-unknown-linux-musl.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc205-x86_64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc205-x86_64-pc-windows-msvc.zip +0 -0
package/bin/binaries/probe-v0.6.0-rc205-x86_64-unknown-linux-musl.tar.gz +0 -0
package/build/agent/ProbeAgent.d.ts +2 -0
package/build/agent/ProbeAgent.js +233 -40
package/build/agent/index.js +1566 -84
package/build/agent/simpleTelemetry.js +12 -0
package/build/agent/tasks/TaskManager.js +604 -0
package/build/agent/tasks/index.js +15 -0
package/build/agent/tasks/taskTool.js +476 -0
package/build/agent/tools.js +11 -0
package/build/delegate.js +7 -2
package/build/index.js +14 -1
package/build/search.js +19 -5
package/build/tools/common.js +67 -0
package/build/tools/vercel.js +28 -12
package/build/utils/error-types.js +303 -0
package/build/utils/path-validation.js +21 -3
package/cjs/agent/ProbeAgent.cjs +8940 -6393
package/cjs/agent/simpleTelemetry.cjs +10 -0
package/cjs/index.cjs +8960 -6393
package/package.json +2 -2
package/src/agent/ProbeAgent.d.ts +2 -0
package/src/agent/ProbeAgent.js +233 -40
package/src/agent/index.js +14 -2
package/src/agent/simpleTelemetry.js +12 -0
package/src/agent/tasks/TaskManager.js +604 -0
package/src/agent/tasks/index.js +15 -0
package/src/agent/tasks/taskTool.js +476 -0
package/src/agent/tools.js +11 -0
package/src/delegate.js +7 -2
package/src/index.js +14 -1
package/src/search.js +19 -5
package/src/tools/common.js +67 -0
package/src/tools/vercel.js +28 -12
package/src/utils/error-types.js +303 -0
package/src/utils/path-validation.js +21 -3
package/bin/binaries/probe-v0.6.0-rc203-aarch64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc203-aarch64-unknown-linux-musl.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc203-x86_64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc203-x86_64-pc-windows-msvc.zip +0 -0
package/bin/binaries/probe-v0.6.0-rc203-x86_64-unknown-linux-musl.tar.gz +0 -0

package/bin/binaries/probe-v0.6.0-rc205-aarch64-apple-darwin.tar.gz ADDED Viewed

Binary file

package/bin/binaries/probe-v0.6.0-rc205-aarch64-unknown-linux-musl.tar.gz ADDED Viewed

Binary file

package/bin/binaries/probe-v0.6.0-rc205-x86_64-apple-darwin.tar.gz ADDED Viewed

Binary file

package/bin/binaries/probe-v0.6.0-rc205-x86_64-pc-windows-msvc.zip ADDED Viewed

Binary file

package/bin/binaries/probe-v0.6.0-rc205-x86_64-unknown-linux-musl.tar.gz ADDED Viewed

Binary file

package/build/agent/ProbeAgent.d.ts CHANGED Viewed

@@ -76,6 +76,8 @@ export interface ProbeAgentOptions {
   skillDirs?: string[];
   /** Custom prompt to run after attempt_completion for validation/review (runs before mermaid/JSON validation) */
   completionPrompt?: string;
+  /** Enable task management system for tracking multi-step progress */
+  enableTasks?: boolean;
 }
 /**

package/build/agent/ProbeAgent.js CHANGED Viewed

@@ -37,7 +37,7 @@ import {
   attemptCompletionSchema,
   parseXmlToolCallWithThinking
 } from './tools.js';
-import { createMessagePreview } from '../tools/common.js';
+import { createMessagePreview, detectUnrecognizedToolCall } from '../tools/common.js';
 import {
   createWrappedTools,
   listFilesToolInstance,
@@ -69,6 +69,15 @@ import { createSkillToolInstances } from './skills/tools.js';
 import { RetryManager, createRetryManagerFromEnv } from './RetryManager.js';
 import { FallbackManager, createFallbackManagerFromEnv, buildFallbackProvidersFromEnv } from './FallbackManager.js';
 import { handleContextLimitError } from './contextCompactor.js';
+import { formatErrorForAI, ParameterError } from '../utils/error-types.js';
+import {
+  TaskManager,
+  createTaskTool,
+  taskToolDefinition,
+  taskSystemPrompt,
+  taskGuidancePrompt,
+  createTaskCompletionBlockedMessage
+} from './tasks/index.js';
 // Maximum tool iterations to prevent infinite loops - configurable via MAX_TOOL_ITERATIONS env var
 const MAX_TOOL_ITERATIONS = (() => {
@@ -118,6 +127,7 @@ export class ProbeAgent {
    * @param {string} [options.mcpConfigPath] - Path to MCP configuration file
    * @param {Object} [options.mcpConfig] - MCP configuration object (overrides mcpConfigPath)
    * @param {Array} [options.mcpServers] - Deprecated, use mcpConfig instead
+   * @param {boolean} [options.enableTasks=false] - Enable task management system for tracking progress
    * @param {Object} [options.storageAdapter] - Custom storage adapter for history management
    * @param {Object} [options.hooks] - Hook callbacks for events (e.g., {'tool:start': callback})
    * @param {Array<string>|null} [options.allowedTools] - List of allowed tool names. Use ['*'] for all tools (default), [] or null for no tools (raw AI mode), or specific tool names like ['search', 'query', 'extract']. Supports exclusion with '!' prefix (e.g., ['*', '!bash'])
@@ -255,6 +265,10 @@ export class ProbeAgent {
     this.mcpBridge = null;
     this._mcpInitialized = false; // Track if MCP initialization has been attempted
+    // Task management configuration
+    this.enableTasks = !!options.enableTasks;
+    this.taskManager = null; // Initialized per-request in answer()
     // Retry configuration
     this.retryConfig = options.retry || {};
     this.retryManager = null; // Will be initialized lazily when needed
@@ -580,6 +594,9 @@ export class ProbeAgent {
     // Store wrapped tools for ACP system
     this.wrappedTools = wrappedTools;
+    // Note: Task tool is registered dynamically in answer() when enableTasks is true
+    // This is because TaskManager is created per-request (request-scoped)
     // Log available tools in debug mode
     if (this.debug) {
       console.error('\n[DEBUG] ========================================');
@@ -1987,6 +2004,11 @@ ${extractGuidance}
       toolDefinitions += `${bashToolDefinition}\n`;
     }
+    // Task tool (require both enableTasks flag AND allowedTools permission)
+    if (this.enableTasks && isToolAllowed('task')) {
+      toolDefinitions += `${taskToolDefinition}\n`;
+    }
     // Always include attempt_completion (unless explicitly disabled in raw AI mode)
     if (isToolAllowed('attempt_completion')) {
       toolDefinitions += `${attemptCompletionToolDefinition}\n`;
@@ -1998,7 +2020,81 @@ ${extractGuidance}
       toolDefinitions += `${delegateToolDefinition}\n`;
     }
-    // Build XML tool guidelines
+    // Build XML tool guidelines with dynamic examples based on allowed tools
+    // Build examples only for allowed tools
+    let toolExamples = '';
+    if (isToolAllowed('search')) {
+      toolExamples += `
+<search>
+<query>error handling</query>
+<path>src/search</path>
+</search>
+`;
+    }
+    if (isToolAllowed('extract')) {
+      toolExamples += `
+<extract>
+<targets>src/config.js:15-25</targets>
+</extract>
+`;
+    }
+    if (isToolAllowed('attempt_completion')) {
+      toolExamples += `
+<attempt_completion>
+The configuration is loaded from src/config.js lines 15-25 which contains the database settings.
+</attempt_completion>
+`;
+    }
+    // Build available tools list dynamically based on allowedTools
+    let availableToolsList = '';
+    if (isToolAllowed('search')) {
+      availableToolsList += `- search: Search code using keyword queries${this.searchDelegate ? ' (returns extracted code blocks via a dedicated subagent)' : ''}.\n`;
+    }
+    if (isToolAllowed('query')) {
+      availableToolsList += '- query: Search code using structural AST patterns.\n';
+    }
+    if (isToolAllowed('extract')) {
+      availableToolsList += '- extract: Extract specific code blocks or lines from files.\n';
+    }
+    if (isToolAllowed('listFiles')) {
+      availableToolsList += '- listFiles: List files and directories in a specified location.\n';
+    }
+    if (isToolAllowed('searchFiles')) {
+      availableToolsList += '- searchFiles: Find files matching a glob pattern with recursive search capability.\n';
+    }
+    if (this.enableSkills && isToolAllowed('listSkills')) {
+      availableToolsList += '- listSkills: List available agent skills discovered in the repository.\n';
+    }
+    if (this.enableSkills && isToolAllowed('useSkill')) {
+      availableToolsList += '- useSkill: Load and activate a specific skill\'s instructions.\n';
+    }
+    if (isToolAllowed('readImage')) {
+      availableToolsList += '- readImage: Read and load an image file for AI analysis.\n';
+    }
+    if (this.allowEdit && isToolAllowed('implement')) {
+      availableToolsList += '- implement: Implement a feature or fix a bug using aider.\n';
+    }
+    if (this.allowEdit && isToolAllowed('edit')) {
+      availableToolsList += '- edit: Edit files using exact string replacement.\n';
+    }
+    if (this.allowEdit && isToolAllowed('create')) {
+      availableToolsList += '- create: Create new files with specified content.\n';
+    }
+    if (this.enableDelegate && isToolAllowed('delegate')) {
+      availableToolsList += '- delegate: Delegate big distinct tasks to specialized probe subagents.\n';
+    }
+    if (this.enableBash && isToolAllowed('bash')) {
+      availableToolsList += '- bash: Execute bash commands for system operations.\n';
+    }
+    if (this.enableTasks && isToolAllowed('task')) {
+      availableToolsList += '- task: Manage tasks for tracking progress (create, update, complete, delete, list).\n';
+    }
+    if (isToolAllowed('attempt_completion')) {
+      availableToolsList += '- attempt_completion: Finalize the task and provide the result to the user.\n';
+      availableToolsList += '- attempt_complete: Quick completion using previous response (shorthand).\n';
+    }
     let xmlToolGuidelines = `
 # Tool Use Formatting
@@ -2013,20 +2109,7 @@ Structure (note the closing tags):
 ...
 </tool_name>
-Examples:
-<search>
-<query>error handling</query>
-<path>src/search</path>
-</search>
-<extract>
-<targets>src/config.js:15-25</targets>
-</extract>
-<attempt_completion>
-The configuration is loaded from src/config.js lines 15-25 which contains the database settings.
-</attempt_completion>
+Examples:${toolExamples}
 # Special Case: Quick Completion
 If your previous response was already correct and complete, you may respond with just:
 <attempt_complete>
@@ -2055,16 +2138,7 @@ I need to find code related to error handling in the search module. The most app
 10. If your previous response was already correct and complete, you may use \`<attempt_complete>\` as a shorthand.
 Available Tools:
-- search: Search code using keyword queries${this.searchDelegate ? ' (returns extracted code blocks via a dedicated subagent)' : ''}.
-- query: Search code using structural AST patterns.
-- extract: Extract specific code blocks or lines from files.
-- listFiles: List files and directories in a specified location.
-- searchFiles: Find files matching a glob pattern with recursive search capability.
-${this.enableSkills ? '- listSkills: List available agent skills discovered in the repository.\n- useSkill: Load and activate a specific skill\'s instructions.\n' : ''}- readImage: Read and load an image file for AI analysis.
-${this.allowEdit ? '- implement: Implement a feature or fix a bug using aider.\n- edit: Edit files using exact string replacement.\n- create: Create new files with specified content.\n' : ''}${this.enableDelegate ? '- delegate: Delegate big distinct tasks to specialized probe subagents.\n' : ''}${this.enableBash ? '- bash: Execute bash commands for system operations.\n' : ''}
-- attempt_completion: Finalize the task and provide the result to the user.
-- attempt_complete: Quick completion using previous response (shorthand).
-`;
+${availableToolsList}`;
     // Common instructions
     const commonInstructions = `<instructions>
@@ -2126,6 +2200,11 @@ Follow these instructions carefully:
       }
     }
+    // Add task management system prompt if enabled
+    if (this.enableTasks) {
+      systemMessage += `\n${taskSystemPrompt}\n`;
+    }
     // Add MCP tools if available (filtered by allowedTools)
     if (this.mcpBridge && this.mcpBridge.getToolNames().length > 0) {
       const allMcpTools = this.mcpBridge.getToolNames();
@@ -2199,6 +2278,39 @@ Follow these instructions carefully:
       // Track initial history length for storage
       const oldHistoryLength = this.history.length;
+      // START CHECKPOINT: Initialize task management for this request
+      if (this.enableTasks) {
+        try {
+          // Create fresh TaskManager for each request (request-scoped)
+          this.taskManager = new TaskManager({ debug: this.debug });
+          // Register task tool for this request
+          const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
+          if (isToolAllowed('task')) {
+            this.toolImplementations.task = createTaskTool({
+              taskManager: this.taskManager,
+              tracer: this.tracer,
+              debug: this.debug
+            });
+          }
+          // Record telemetry for task initialization
+          if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
+            this.tracer.recordTaskEvent('session_started', {
+              'task.enabled': true
+            });
+          }
+          if (this.debug) {
+            console.log('[DEBUG] Task management initialized for this request');
+          }
+        } catch (taskInitError) {
+          // Log error but don't fail the request - task management is optional
+          console.error('[ProbeAgent] Failed to initialize task management:', taskInitError.message);
+          this.taskManager = null;
+        }
+      }
       // Emit user message hook
       await this.hooks.emit(HOOK_TYPES.MESSAGE_USER, {
         sessionId: this.sessionId,
@@ -2212,6 +2324,14 @@ Follow these instructions carefully:
       // Create user message with optional image support
       let userMessage = { role: 'user', content: message.trim() };
+      // START CHECKPOINT: Inject task guidance if tasks are enabled
+      if (this.enableTasks) {
+        userMessage.content = userMessage.content + '\n\n' + taskGuidancePrompt;
+        if (this.debug) {
+          console.log('[DEBUG] Task guidance injected into user message');
+        }
+      }
       // If schema is provided, prepend JSON format requirement to user message
       if (options.schema && !options._schemaFormatted) {
         const schemaInstructions = generateSchemaInstructions(options.schema, { debug: this.debug });
@@ -2518,9 +2638,16 @@ Follow these instructions carefully:
             };
             if (this.tracer) {
+              // Prepare input preview for tracing (truncate if very long)
+              const inputPreview = message.length > 1000
+                ? message.substring(0, 1000) + '... [truncated]'
+                : message;
               await this.tracer.withSpan('ai.request', executeAIRequest, {
                 'ai.model': this.model,
                 'ai.provider': this.clientApiProvider || 'auto',
+                'ai.input': inputPreview,
+                'ai.input_length': message.length,
                 'iteration': currentIteration,
                 'max_tokens': maxResponseTokens,
                 'temperature': 0.3,
@@ -2638,6 +2765,10 @@ Follow these instructions carefully:
           if (this.enableDelegate && this.allowedTools.isEnabled('delegate')) {
             validTools.push('delegate');
           }
+          // Task tool (require both enableTasks flag AND allowedTools permission)
+          if (this.enableTasks && this.allowedTools.isEnabled('task')) {
+            validTools.push('task');
+          }
         }
         // Try parsing with hybrid parser that supports both native and MCP tools
@@ -2653,6 +2784,40 @@ Follow these instructions carefully:
           if (toolName === 'attempt_completion') {
             completionAttempted = true;
+            // END CHECKPOINT: Block completion if there are incomplete tasks
+            if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
+              const taskSummary = this.taskManager.getTaskSummary();
+              const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
+              const incompleteTasks = this.taskManager.getIncompleteTasks();
+              // Record telemetry for blocked completion
+              if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
+                this.tracer.recordTaskEvent('completion_blocked', {
+                  'task.incomplete_count': incompleteTasks.length,
+                  'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
+                  'task.iteration': currentIteration
+                });
+              }
+              if (this.debug) {
+                console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
+                console.log('[DEBUG] Incomplete tasks:', taskSummary);
+              }
+              // Add reminder message and continue the loop
+              currentMessages.push({
+                role: 'assistant',
+                content: assistantResponseContent
+              });
+              currentMessages.push({
+                role: 'user',
+                content: blockedMessage
+              });
+              completionAttempted = false; // Reset to allow more iterations
+              continue; // Skip the break and continue the loop
+            }
             // Handle attempt_complete shorthand - use previous response
             if (params.result === '__PREVIOUS_RESPONSE__') {
               // Find the last assistant message with actual content (not tool calls)
@@ -2732,7 +2897,6 @@ Follow these instructions carefully:
                 currentMessages.push({ role: 'user', content: `<tool_result>\n${toolResultContent}\n</tool_result>` });
               } catch (error) {
                 console.error(`Error executing MCP tool ${toolName}:`, error);
-                const toolResultContent = `Error executing MCP tool ${toolName}: ${error.message}`;
                 // Log MCP tool error in debug mode
                 if (this.debug) {
@@ -2742,7 +2906,9 @@ Follow these instructions carefully:
                   console.error(`[DEBUG] ========================================\n`);
                 }
-                currentMessages.push({ role: 'user', content: `<tool_result>\n${toolResultContent}\n</tool_result>` });
+                // Format error with structured information for AI
+                const errorXml = formatErrorForAI(error);
+                currentMessages.push({ role: 'user', content: `<tool_result>\n${errorXml}\n</tool_result>` });
               }
             } else if (this.toolImplementations[toolName]) {
               // Execute native tool
@@ -2810,6 +2976,7 @@ Follow these instructions carefully:
                       provider: this.apiType,           // Inherit AI provider (string identifier)
                       model: this.model,                // Inherit model
                       searchDelegate: this.searchDelegate,
+                      enableTasks: this.enableTasks,    // Inherit task management (subagent gets isolated TaskManager)
                       debug: this.debug,
                       tracer: this.tracer
                     };
@@ -2912,9 +3079,11 @@ Follow these instructions carefully:
               } catch (error) {
                 console.error(`[ERROR] Tool execution failed for ${toolName}:`, error);
                 currentMessages.push({ role: 'assistant', content: assistantResponseContent });
+                // Format error with structured information for AI
+                const errorXml = formatErrorForAI(error);
                 currentMessages.push({
-                  role: 'user',
-                  content: `<tool_result>\nError: ${error.message}\n</tool_result>`
+                  role: 'user',
+                  content: `<tool_result>\n${errorXml}\n</tool_result>`
                 });
               }
             } else {
@@ -2928,7 +3097,7 @@ Follow these instructions carefully:
               currentMessages.push({
                 role: 'user',
-                content: `<tool_result>\nError: Unknown tool '${toolName}'. Available tools: ${allAvailableTools.join(', ')}\n</tool_result>`
+                content: `<tool_result>\n<error type="parameter_error" recoverable="true">\n<message>Unknown tool '${toolName}'</message>\n<suggestion>Available tools: ${allAvailableTools.join(', ')}. Please use one of these tools.</suggestion>\n</error>\n</tool_result>`
               });
             }
           }
@@ -2952,8 +3121,22 @@ Follow these instructions carefully:
           // Add assistant response and ask for tool usage
           currentMessages.push({ role: 'assistant', content: assistantResponseContent });
-          // Standard reminder - schema was already provided in initial message
-          const reminderContent = `Please use one of the available tools to help answer the question, or use attempt_completion if you have enough information to provide a final answer.
+          // Check if the AI tried to use a tool that's not in the valid tools list
+          const unrecognizedTool = detectUnrecognizedToolCall(assistantResponseContent, validTools);
+          let reminderContent;
+          if (unrecognizedTool) {
+            // AI tried to use a tool that's not available - provide clear error
+            if (this.debug) {
+              console.log(`[DEBUG] Detected unrecognized tool '${unrecognizedTool}' in assistant response.`);
+            }
+            const toolError = new ParameterError(`Tool '${unrecognizedTool}' is not available in this context.`, {
+              suggestion: `Available tools: ${validTools.join(', ')}. Please use one of these tools instead.`
+            });
+            reminderContent = `<tool_result>\n${formatErrorForAI(toolError)}\n</tool_result>`;
+          } else {
+            // Standard reminder - no tool call detected at all
+            reminderContent = `Please use one of the available tools to help answer the question, or use attempt_completion if you have enough information to provide a final answer.
 Remember: Use proper XML format with BOTH opening and closing tags:
@@ -2961,17 +3144,27 @@ Remember: Use proper XML format with BOTH opening and closing tags:
 <parameter>value</parameter>
 </tool_name>
-Or for quick completion if your previous response was already correct and complete:
-<attempt_complete>
+Available tools: ${validTools.join(', ')}
-IMPORTANT: When using <attempt_complete>, this must be the ONLY content in your response. No additional text, explanations, or other content should be included. This tag signals to reuse your previous response as the final answer.`;
+To complete with a direct answer:
+<attempt_completion>Your final answer here</attempt_completion>
+Or if your previous response already contains a complete, direct answer (not a thinking block or JSON):
+<attempt_complete></attempt_complete>
+Note: <attempt_complete></attempt_complete> reuses your PREVIOUS assistant message as the final answer. Only use this if that message was already a valid, complete response to the user's question.`;
+          }
           currentMessages.push({
             role: 'user',
             content: reminderContent
           });
           if (this.debug) {
-            console.log(`[DEBUG] No tool call detected in assistant response. Prompting for tool use.`);
+            if (unrecognizedTool) {
+              console.log(`[DEBUG] Unrecognized tool '${unrecognizedTool}' used. Providing error feedback.`);
+            } else {
+              console.log(`[DEBUG] No tool call detected in assistant response. Prompting for tool use.`);
+            }
           }
         }
@@ -3872,9 +4065,9 @@ Convert your previous response content into actual JSON data that follows this s
       return true;
     }
-    // Empty attempt_complete reminders
-    if (content.includes('When using <attempt_complete>') &&
-        content.includes('this must be the ONLY content in your response')) {
+    // Empty attempt_complete reminders (legacy and new format)
+    if (content.includes('<attempt_complete></attempt_complete>') &&
+        content.includes('reuses your PREVIOUS assistant message')) {
       return true;
     }