@probelabs/probe 0.6.0-rc265 → 0.6.0-rc267

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/bin/binaries/probe-v0.6.0-rc267-aarch64-apple-darwin.tar.gz +0 -0
  2. package/bin/binaries/probe-v0.6.0-rc267-aarch64-unknown-linux-musl.tar.gz +0 -0
  3. package/bin/binaries/probe-v0.6.0-rc267-x86_64-apple-darwin.tar.gz +0 -0
  4. package/bin/binaries/probe-v0.6.0-rc267-x86_64-pc-windows-msvc.zip +0 -0
  5. package/bin/binaries/probe-v0.6.0-rc267-x86_64-unknown-linux-musl.tar.gz +0 -0
  6. package/build/agent/ProbeAgent.js +644 -1442
  7. package/build/agent/engines/enhanced-vercel.js +0 -7
  8. package/build/agent/index.js +3941 -5940
  9. package/build/agent/mcp/index.js +6 -15
  10. package/build/agent/mcp/xmlBridge.js +24 -324
  11. package/build/agent/tasks/index.js +0 -1
  12. package/build/agent/tools.js +11 -181
  13. package/build/index.js +13 -35
  14. package/build/tools/common.js +15 -707
  15. package/build/tools/executePlan.js +2 -2
  16. package/build/tools/index.js +8 -11
  17. package/cjs/agent/ProbeAgent.cjs +3734 -5831
  18. package/cjs/index.cjs +4797 -6869
  19. package/package.json +1 -1
  20. package/src/agent/ProbeAgent.js +644 -1442
  21. package/src/agent/engines/enhanced-vercel.js +0 -7
  22. package/src/agent/index.js +10 -2
  23. package/src/agent/mcp/index.js +6 -15
  24. package/src/agent/mcp/xmlBridge.js +24 -324
  25. package/src/agent/tasks/index.js +0 -1
  26. package/src/agent/tools.js +11 -181
  27. package/src/index.js +13 -35
  28. package/src/tools/common.js +15 -707
  29. package/src/tools/executePlan.js +2 -2
  30. package/src/tools/index.js +8 -11
  31. package/bin/binaries/probe-v0.6.0-rc265-aarch64-apple-darwin.tar.gz +0 -0
  32. package/bin/binaries/probe-v0.6.0-rc265-aarch64-unknown-linux-musl.tar.gz +0 -0
  33. package/bin/binaries/probe-v0.6.0-rc265-x86_64-apple-darwin.tar.gz +0 -0
  34. package/bin/binaries/probe-v0.6.0-rc265-x86_64-pc-windows-msvc.zip +0 -0
  35. package/bin/binaries/probe-v0.6.0-rc265-x86_64-unknown-linux-musl.tar.gz +0 -0
  36. package/build/agent/xmlParsingUtils.js +0 -221
  37. package/src/agent/xmlParsingUtils.js +0 -221
@@ -31,7 +31,7 @@ import { createAnthropic } from '@ai-sdk/anthropic';
31
31
  import { createOpenAI } from '@ai-sdk/openai';
32
32
  import { createGoogleGenerativeAI } from '@ai-sdk/google';
33
33
  import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
34
- import { streamText } from 'ai';
34
+ import { streamText, tool, stepCountIs, jsonSchema } from 'ai';
35
35
  import { randomUUID } from 'crypto';
36
36
  import { EventEmitter } from 'events';
37
37
  import { existsSync } from 'fs';
@@ -43,29 +43,26 @@ import { HookManager, HOOK_TYPES } from './hooks/HookManager.js';
43
43
  import { SUPPORTED_IMAGE_EXTENSIONS, IMAGE_MIME_TYPES, isFormatSupportedByProvider } from './imageConfig.js';
44
44
  import {
45
45
  createTools,
46
- searchToolDefinition,
47
- queryToolDefinition,
48
- extractToolDefinition,
49
- delegateToolDefinition,
50
- analyzeAllToolDefinition,
51
- getExecutePlanToolDefinition,
52
- getCleanupExecutePlanToolDefinition,
53
- bashToolDefinition,
54
- listFilesToolDefinition,
55
- searchFilesToolDefinition,
56
- listSkillsToolDefinition,
57
- useSkillToolDefinition,
58
- readImageToolDefinition,
59
- attemptCompletionToolDefinition,
60
- editToolDefinition,
61
- createToolDefinition,
62
- multiEditToolDefinition,
63
- googleSearchToolDefinition,
64
- urlContextToolDefinition,
65
46
  attemptCompletionSchema,
66
- parseXmlToolCallWithThinking
47
+ searchSchema,
48
+ querySchema,
49
+ extractSchema,
50
+ delegateSchema,
51
+ analyzeAllSchema,
52
+ executePlanSchema,
53
+ cleanupExecutePlanSchema,
54
+ bashSchema,
55
+ editSchema,
56
+ createSchema,
57
+ multiEditSchema,
58
+ listFilesSchema,
59
+ searchFilesSchema,
60
+ readImageSchema,
61
+ listSkillsSchema,
62
+ useSkillSchema
67
63
  } from './tools.js';
68
- import { createMessagePreview, detectUnrecognizedToolCall, detectStuckResponse, areBothStuckResponses } from '../tools/common.js';
64
+ import { createMessagePreview } from '../tools/common.js';
65
+ import { taskSchema } from './tasks/taskTool.js';
69
66
  import { FileTracker } from '../tools/fileTracker.js';
70
67
  import {
71
68
  createWrappedTools,
@@ -87,15 +84,13 @@ import {
87
84
  tryAutoWrapForSimpleSchema,
88
85
  tryExtractValidJsonPrefix
89
86
  } from './schemaUtils.js';
90
- import { removeThinkingTags, extractThinkingContent } from './xmlParsingUtils.js';
91
87
  import { predefinedPrompts } from './shared/prompts.js';
92
88
  import {
93
89
  MCPXmlBridge,
94
- parseHybridXmlToolCall,
95
90
  loadMCPConfigurationFromPath
96
91
  } from './mcp/index.js';
97
92
  import { SkillRegistry } from './skills/registry.js';
98
- import { formatAvailableSkillsXml } from './skills/formatting.js';
93
+ import { formatAvailableSkillsXml as formatAvailableSkills } from './skills/formatting.js';
99
94
  import { createSkillToolInstances } from './skills/tools.js';
100
95
  import { RetryManager, createRetryManagerFromEnv } from './RetryManager.js';
101
96
  import { FallbackManager, createFallbackManagerFromEnv, buildFallbackProvidersFromEnv } from './FallbackManager.js';
@@ -108,11 +103,11 @@ import { extractRawOutputBlocks } from '../tools/executePlan.js';
108
103
  import {
109
104
  TaskManager,
110
105
  createTaskTool,
111
- taskToolDefinition,
112
106
  taskSystemPrompt,
113
107
  taskGuidancePrompt,
114
108
  createTaskCompletionBlockedMessage
115
109
  } from './tasks/index.js';
110
+ import { z } from 'zod';
116
111
 
117
112
  // Maximum tool iterations to prevent infinite loops - configurable via MAX_TOOL_ITERATIONS env var
118
113
  const MAX_TOOL_ITERATIONS = (() => {
@@ -125,44 +120,6 @@ const MAX_TOOL_ITERATIONS = (() => {
125
120
  })();
126
121
  const MAX_HISTORY_MESSAGES = 100;
127
122
 
128
- /**
129
- * Extract tool name from wrapped_tool:toolName format
130
- * @param {string} wrappedToolError - Error string in format 'wrapped_tool:toolName'
131
- * @returns {string} The extracted tool name or 'unknown' if format is invalid
132
- */
133
- function extractWrappedToolName(wrappedToolError) {
134
- if (!wrappedToolError || typeof wrappedToolError !== 'string') {
135
- return 'unknown';
136
- }
137
- const colonIndex = wrappedToolError.indexOf(':');
138
- return colonIndex !== -1 ? wrappedToolError.slice(colonIndex + 1) : 'unknown';
139
- }
140
-
141
- /**
142
- * Check if an error indicates a wrapped tool format error
143
- * @param {string|null} error - Error from detectUnrecognizedToolCall
144
- * @returns {boolean} True if it's a wrapped tool error
145
- */
146
- function isWrappedToolError(error) {
147
- return error && typeof error === 'string' && error.startsWith('wrapped_tool:');
148
- }
149
-
150
- /**
151
- * Create error message for wrapped tool format issues
152
- * @param {string} wrappedToolName - The tool name that was incorrectly wrapped
153
- * @returns {string} User-friendly error message with correct format instructions
154
- */
155
- function createWrappedToolErrorMessage(wrappedToolName) {
156
- return `Your response contained an incorrectly formatted tool call (${wrappedToolName} wrapped in XML tags). This cannot be used.
157
-
158
- Please use the CORRECT format:
159
-
160
- <${wrappedToolName}>
161
- Your content here
162
- </${wrappedToolName}>
163
-
164
- Do NOT wrap in other tags like <api_call>, <tool_name>, <function>, etc.`;
165
- }
166
123
 
167
124
  // Supported image file extensions (imported from shared config)
168
125
 
@@ -223,6 +180,7 @@ export class ProbeAgent {
223
180
  * @param {number} [options.maxOutputTokens] - Maximum tokens for tool output before truncation (default: 20000, can also be set via PROBE_MAX_OUTPUT_TOKENS env var)
224
181
  * @param {number} [options.requestTimeout] - Timeout in ms for AI requests (default: 120000 or REQUEST_TIMEOUT env var). Used to abort hung requests.
225
182
  * @param {number} [options.maxOperationTimeout] - Maximum timeout in ms for the entire operation including all retries and fallbacks (default: 300000 or MAX_OPERATION_TIMEOUT env var). This is the absolute maximum time for streamTextWithRetryAndFallback.
183
+ * @param {string|number} [options.thinkingEffort] - Native thinking/reasoning effort level: 'low', 'medium', 'high', or a number (budget tokens). When set, passes provider-specific thinking options to the LLM via providerOptions.
226
184
  */
227
185
  constructor(options = {}) {
228
186
  // Basic configuration
@@ -264,6 +222,10 @@ export class ProbeAgent {
264
222
  // Completion prompt for post-completion validation/review
265
223
  this.completionPrompt = options.completionPrompt || null;
266
224
 
225
+ // Native thinking/reasoning effort for LLM providers
226
+ // Accepted values: 'off' (default), 'low', 'medium', 'high', or a number (budget tokens)
227
+ this.thinkingEffort = options.thinkingEffort || null;
228
+
267
229
  // Tool filtering configuration
268
230
  // Parse allowedTools option: ['*'] = all tools, [] or null = no tools, ['tool1', 'tool2'] = specific tools
269
231
  // Supports exclusion with '!' prefix: ['*', '!bash'] = all tools except bash
@@ -543,30 +505,6 @@ export class ProbeAgent {
543
505
  }
544
506
  }
545
507
 
546
- /**
547
- * Record AI thinking content for telemetry
548
- * @param {string} thinkingContent - The thinking content
549
- * @param {number} iteration - Current iteration number
550
- * @private
551
- */
552
- _recordThinkingTelemetry(thinkingContent, iteration) {
553
- if (!this.tracer || !thinkingContent) return;
554
-
555
- if (this._isAppTracerStyle() && typeof this.tracer.recordThinkingContent === 'function') {
556
- // AppTracer style: (sessionId, iteration, content)
557
- this.tracer.recordThinkingContent(this.sessionId, iteration, thinkingContent);
558
- } else if (typeof this.tracer.recordThinkingContent === 'function') {
559
- // SimpleAppTracer style: (content, metadata)
560
- this.tracer.recordThinkingContent(thinkingContent, { iteration });
561
- } else {
562
- this.tracer.addEvent('ai.thinking', {
563
- 'ai.thinking.content': thinkingContent.substring(0, 50000),
564
- 'ai.thinking.length': thinkingContent.length,
565
- 'iteration': iteration
566
- });
567
- }
568
- }
569
-
570
508
  /**
571
509
  * Record AI tool decision for telemetry
572
510
  * @param {string} toolName - The tool name
@@ -1621,6 +1559,478 @@ export class ProbeAgent {
1621
1559
  return Object.keys(tools).length > 0 ? tools : undefined;
1622
1560
  }
1623
1561
 
1562
+ /**
1563
+ * Build providerOptions for native thinking/reasoning based on thinkingEffort setting.
1564
+ * Maps effort levels to provider-specific parameters.
1565
+ * @param {number} maxResponseTokens - Current max response tokens for budget calculation
1566
+ * @returns {Object|undefined} providerOptions object or undefined if thinking is off
1567
+ * @private
1568
+ */
1569
+ _buildThinkingProviderOptions(maxResponseTokens) {
1570
+ if (!this.thinkingEffort) return undefined;
1571
+
1572
+ const effort = this.thinkingEffort;
1573
+
1574
+ // Map string effort levels to budget tokens
1575
+ const effortToBudget = {
1576
+ low: 4000,
1577
+ medium: 10000,
1578
+ high: 32000,
1579
+ };
1580
+
1581
+ if (this.apiType === 'anthropic') {
1582
+ const budgetTokens = typeof effort === 'number'
1583
+ ? effort
1584
+ : effortToBudget[effort];
1585
+ if (!budgetTokens) return undefined;
1586
+ return {
1587
+ anthropic: {
1588
+ thinking: { type: 'enabled', budgetTokens },
1589
+ },
1590
+ };
1591
+ }
1592
+
1593
+ if (this.apiType === 'openai') {
1594
+ // OpenAI reasoning models use reasoningEffort: 'low' | 'medium' | 'high'
1595
+ const reasoningEffort = typeof effort === 'number'
1596
+ ? (effort <= 4000 ? 'low' : effort <= 10000 ? 'medium' : 'high')
1597
+ : effort;
1598
+ if (!['low', 'medium', 'high'].includes(reasoningEffort)) return undefined;
1599
+ return {
1600
+ openai: {
1601
+ reasoningEffort,
1602
+ },
1603
+ };
1604
+ }
1605
+
1606
+ if (this.apiType === 'google') {
1607
+ const thinkingBudget = typeof effort === 'number'
1608
+ ? effort
1609
+ : effortToBudget[effort];
1610
+ if (!thinkingBudget) return undefined;
1611
+ return {
1612
+ google: {
1613
+ thinkingConfig: { thinkingBudget },
1614
+ },
1615
+ };
1616
+ }
1617
+
1618
+ return undefined;
1619
+ }
1620
+
1621
+ /**
1622
+ * Build native Vercel AI SDK tools object for use with streamText().
1623
+ * Each tool wraps the existing toolImplementations with:
1624
+ * - sessionId and workingDirectory injection
1625
+ * - Event emission
1626
+ * - Output truncation
1627
+ * - Raw output block extraction
1628
+ * - Telemetry recording
1629
+ * - Delegate tool param injection
1630
+ *
1631
+ * @param {Object} options - Options from the answer() call
1632
+ * @param {Function} onComplete - Callback when attempt_completion is called (receives result string)
1633
+ * @param {Object} context - Execution context { maxIterations, currentMessages }
1634
+ * @returns {Object} Tools object for streamText()
1635
+ * @private
1636
+ */
1637
+ _buildNativeTools(options, onComplete, context = {}) {
1638
+ const { maxIterations = 30 } = context;
1639
+ const nativeTools = {};
1640
+ const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
1641
+
1642
+ // Helper to wrap a tool implementation into a Vercel AI SDK tool
1643
+ const wrapTool = (toolName, schema, description, executeFn) => {
1644
+ // Auto-wrap plain JSON Schema objects with jsonSchema() for AI SDK 5 compatibility
1645
+ // Zod schemas have a _def property; plain objects need wrapping
1646
+ const resolvedSchema = schema && schema._def ? schema : jsonSchema(schema);
1647
+ return tool({
1648
+ description,
1649
+ inputSchema: resolvedSchema,
1650
+ execute: async (params) => {
1651
+ // Add sessionId and workingDirectory to params
1652
+ let resolvedWorkingDirectory = this.workspaceRoot || this.cwd || (this.allowedFolders && this.allowedFolders[0]) || process.cwd();
1653
+ if (params.workingDirectory) {
1654
+ const requestedDir = safeRealpath(isAbsolute(params.workingDirectory)
1655
+ ? resolve(params.workingDirectory)
1656
+ : resolve(resolvedWorkingDirectory, params.workingDirectory));
1657
+ const isWithinAllowed = !this.allowedFolders || this.allowedFolders.length === 0 ||
1658
+ this.allowedFolders.some(folder => {
1659
+ const resolvedFolder = safeRealpath(folder);
1660
+ return requestedDir === resolvedFolder || requestedDir.startsWith(resolvedFolder + sep);
1661
+ });
1662
+ if (isWithinAllowed) {
1663
+ resolvedWorkingDirectory = requestedDir;
1664
+ } else if (this.debug) {
1665
+ console.error(`[DEBUG] Rejected workingDirectory "${params.workingDirectory}" - not within allowed folders`);
1666
+ }
1667
+ }
1668
+ const toolParams = {
1669
+ ...params,
1670
+ sessionId: this.sessionId,
1671
+ workingDirectory: resolvedWorkingDirectory
1672
+ };
1673
+
1674
+ // Log tool execution in debug mode
1675
+ if (this.debug) {
1676
+ console.error(`\n[DEBUG] ========================================`);
1677
+ console.error(`[DEBUG] Executing tool: ${toolName}`);
1678
+ console.error(`[DEBUG] Arguments:`);
1679
+ for (const [key, value] of Object.entries(params)) {
1680
+ const displayValue = typeof value === 'string' && value.length > 100
1681
+ ? value.substring(0, 100) + '...'
1682
+ : value;
1683
+ console.error(`[DEBUG] ${key}: ${JSON.stringify(displayValue)}`);
1684
+ }
1685
+ console.error(`[DEBUG] ========================================\n`);
1686
+ }
1687
+
1688
+ // Emit tool start event
1689
+ this.events.emit('toolCall', {
1690
+ timestamp: new Date().toISOString(),
1691
+ name: toolName,
1692
+ args: toolParams,
1693
+ status: 'started',
1694
+ pauseStream: true
1695
+ });
1696
+
1697
+ const toolStartTime = Date.now();
1698
+ try {
1699
+ // For delegate tool, inject additional params
1700
+ let result;
1701
+ if (toolName === 'delegate') {
1702
+ let allowedToolsForDelegate = null;
1703
+ if (this.allowedTools.mode === 'whitelist') {
1704
+ allowedToolsForDelegate = [...this.allowedTools.allowed];
1705
+ } else if (this.allowedTools.mode === 'none') {
1706
+ allowedToolsForDelegate = [];
1707
+ } else if (this.allowedTools.mode === 'all' && this.allowedTools.exclusions?.length > 0) {
1708
+ allowedToolsForDelegate = ['*', ...this.allowedTools.exclusions.map(t => '!' + t)];
1709
+ }
1710
+
1711
+ const enhancedParams = {
1712
+ ...toolParams,
1713
+ currentIteration: context.currentIteration || 0,
1714
+ maxIterations,
1715
+ parentSessionId: this.sessionId,
1716
+ path: this.searchPath,
1717
+ provider: this.apiType,
1718
+ model: this.model,
1719
+ searchDelegate: this.searchDelegate,
1720
+ enableTasks: this.enableTasks,
1721
+ enableMcp: !!this.mcpBridge,
1722
+ mcpConfig: this.mcpConfig,
1723
+ mcpConfigPath: this.mcpConfigPath,
1724
+ enableBash: this.enableBash,
1725
+ bashConfig: this.bashConfig,
1726
+ allowEdit: this.allowEdit,
1727
+ allowedTools: allowedToolsForDelegate,
1728
+ debug: this.debug,
1729
+ tracer: this.tracer
1730
+ };
1731
+
1732
+ if (this.debug) {
1733
+ console.log(`[DEBUG] Executing delegate tool`);
1734
+ console.log(`[DEBUG] Parent session: ${this.sessionId}`);
1735
+ }
1736
+
1737
+ if (this.tracer) {
1738
+ this.tracer.recordDelegationEvent('tool_started', {
1739
+ 'delegation.task_preview': toolParams.task?.substring(0, 200)
1740
+ });
1741
+ }
1742
+
1743
+ result = await executeFn(enhancedParams);
1744
+ } else {
1745
+ result = await executeFn(toolParams);
1746
+ }
1747
+
1748
+ const toolDurationMs = Date.now() - toolStartTime;
1749
+ this._recordToolResultTelemetry(toolName, result, true, toolDurationMs, context.currentIteration || 0);
1750
+
1751
+ // Emit tool success event
1752
+ this.events.emit('toolCall', {
1753
+ timestamp: new Date().toISOString(),
1754
+ name: toolName,
1755
+ args: toolParams,
1756
+ resultPreview: typeof result === 'string'
1757
+ ? (result.length > 200 ? result.substring(0, 200) + '...' : result)
1758
+ : (result ? JSON.stringify(result).substring(0, 200) + '...' : 'No Result'),
1759
+ status: 'completed'
1760
+ });
1761
+
1762
+ let toolResultContent = typeof result === 'string' ? result : JSON.stringify(result, null, 2);
1763
+
1764
+ // Convert absolute workspace paths to relative
1765
+ if (this.workspaceRoot && toolResultContent) {
1766
+ const wsPrefix = this.workspaceRoot.endsWith(sep) ? this.workspaceRoot : this.workspaceRoot + sep;
1767
+ toolResultContent = toolResultContent.split(wsPrefix).join('');
1768
+ }
1769
+
1770
+ // Extract raw output blocks from tool result (before truncation)
1771
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
1772
+ if (extractedBlocks.length > 0) {
1773
+ toolResultContent = cleanedContent;
1774
+ this._extractedRawBlocks.push(...extractedBlocks);
1775
+ if (this.debug) {
1776
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks from tool result`);
1777
+ }
1778
+ }
1779
+
1780
+ // Truncate if output exceeds token limit
1781
+ try {
1782
+ const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
1783
+ if (truncateResult.truncated) {
1784
+ toolResultContent = truncateResult.content;
1785
+ if (this.debug) {
1786
+ console.log(`[DEBUG] Tool output truncated: ${truncateResult.originalTokens} tokens`);
1787
+ }
1788
+ }
1789
+ } catch (truncateError) {
1790
+ console.error(`[WARN] Tool output truncation failed: ${truncateError.message}`);
1791
+ }
1792
+
1793
+ if (this.debug) {
1794
+ console.log(`[DEBUG] Tool ${toolName} executed successfully. Result length: ${toolResultContent.length}`);
1795
+ }
1796
+
1797
+ return toolResultContent;
1798
+ } catch (error) {
1799
+ const toolDurationMs = Date.now() - toolStartTime;
1800
+ this._recordToolResultTelemetry(toolName, null, false, toolDurationMs, context.currentIteration || 0);
1801
+
1802
+ // Emit tool error event
1803
+ this.events.emit('toolCall', {
1804
+ timestamp: new Date().toISOString(),
1805
+ name: toolName,
1806
+ args: toolParams,
1807
+ error: error.message || 'Unknown error',
1808
+ status: 'error'
1809
+ });
1810
+
1811
+ if (this.debug) {
1812
+ console.error(`[DEBUG] Tool '${toolName}' failed: ${error.message}`);
1813
+ }
1814
+
1815
+ // Format error for AI
1816
+ const errorMsg = formatErrorForAI(error);
1817
+ return errorMsg;
1818
+ }
1819
+ }
1820
+ });
1821
+ };
1822
+
1823
+ // Only include attempt_completion when _disableTools is set
1824
+ if (options._disableTools) {
1825
+ nativeTools.attempt_completion = tool({
1826
+ description: 'Signal task completion and provide the final result to the user',
1827
+ inputSchema: z.object({
1828
+ result: z.string().describe('The final result to present to the user')
1829
+ }),
1830
+ execute: async ({ result }) => {
1831
+ onComplete(result);
1832
+ return result;
1833
+ }
1834
+ });
1835
+ return nativeTools;
1836
+ }
1837
+
1838
+ // Add all enabled tools from toolImplementations
1839
+ // Note: MCP tools are also in toolImplementations but have no schema in _getToolSchemaAndDescription.
1840
+ // They are handled separately via mcpBridge.getVercelTools() below, so we skip them here.
1841
+ for (const [toolName, toolImpl] of Object.entries(this.toolImplementations)) {
1842
+ // Get schema and description for this tool
1843
+ const toolInfo = this._getToolSchemaAndDescription(toolName);
1844
+ if (!toolInfo) continue;
1845
+ const { schema, description } = toolInfo;
1846
+ if (schema && description) {
1847
+ nativeTools[toolName] = wrapTool(toolName, schema, description, toolImpl.execute);
1848
+ }
1849
+ }
1850
+
1851
+ // Always add attempt_completion
1852
+ nativeTools.attempt_completion = tool({
1853
+ description: 'Signal task completion and provide the final result to the user',
1854
+ inputSchema: z.object({
1855
+ result: z.string().describe('The final result to present to the user')
1856
+ }),
1857
+ execute: async ({ result }) => {
1858
+ // Task completion blocking
1859
+ if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
1860
+ const incompleteTasks = this.taskManager.getIncompleteTasks();
1861
+ const highIterationCount = (context.currentIteration || 0) > maxIterations * 0.7;
1862
+
1863
+ if (!highIterationCount) {
1864
+ const taskSummary = this.taskManager.getTaskSummary();
1865
+ const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
1866
+ if (this.debug) {
1867
+ console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
1868
+ }
1869
+ return blockedMessage;
1870
+ }
1871
+ }
1872
+
1873
+ onComplete(result);
1874
+ return result;
1875
+ }
1876
+ });
1877
+
1878
+ // Add MCP tools if available
1879
+ if (this.mcpBridge && !options._disableTools) {
1880
+ const mcpTools = this.mcpBridge.getVercelTools(this._filterMcpTools(this.mcpBridge.getToolNames()));
1881
+ for (const [name, mcpTool] of Object.entries(mcpTools)) {
1882
+ nativeTools[name] = mcpTool;
1883
+ }
1884
+ }
1885
+
1886
+ // Add Gemini provider tools as wrapper function tools.
1887
+ // The Gemini API does not allow mixing provider-defined tools with function tools
1888
+ // in the same request. To work around this, we create regular function tools that
1889
+ // internally make a separate API call using only the provider-defined tool.
1890
+ if (this.apiType === 'google' && this._geminiToolsEnabled && !options._disableTools) {
1891
+ const { googleSearch, urlContext } = this._geminiToolsEnabled;
1892
+
1893
+ if (googleSearch && isToolAllowed('gemini_google_search')) {
1894
+ nativeTools.google_search = tool({
1895
+ description: 'Search the web using Google Search for current information, recent events, or real-time data.',
1896
+ inputSchema: z.object({
1897
+ query: z.string().describe('The search query to find information on the web')
1898
+ }),
1899
+ execute: async ({ query }) => {
1900
+ if (this.debug) {
1901
+ console.log(`[DEBUG] google_search wrapper: querying "${query}"`);
1902
+ }
1903
+ try {
1904
+ const { generateText: genText } = await import('ai');
1905
+ const searchResult = await genText({
1906
+ model: this.provider(this.model.includes('flash') ? this.model : this.model.replace('pro', 'flash')),
1907
+ messages: [{ role: 'user', content: query }],
1908
+ tools: { google_search: this.provider.tools.googleSearch({}) },
1909
+ stopWhen: stepCountIs(2),
1910
+ maxTokens: 4000
1911
+ });
1912
+ return searchResult.text || 'No search results found.';
1913
+ } catch (err) {
1914
+ if (this.debug) console.error(`[DEBUG] google_search wrapper error:`, err.message);
1915
+ return `Search failed: ${err.message}`;
1916
+ }
1917
+ }
1918
+ });
1919
+ }
1920
+
1921
+ if (urlContext && isToolAllowed('gemini_url_context')) {
1922
+ nativeTools.url_context = tool({
1923
+ description: 'Fetch and analyze content from a specific URL. Use this to read web pages, documentation, or online resources.',
1924
+ inputSchema: z.object({
1925
+ url: z.string().describe('The URL to fetch and analyze')
1926
+ }),
1927
+ execute: async ({ url }) => {
1928
+ if (this.debug) {
1929
+ console.log(`[DEBUG] url_context wrapper: fetching "${url}"`);
1930
+ }
1931
+ try {
1932
+ const { generateText: genText } = await import('ai');
1933
+ const fetchResult = await genText({
1934
+ model: this.provider(this.model.includes('flash') ? this.model : this.model.replace('pro', 'flash')),
1935
+ messages: [{ role: 'user', content: `Summarize the content at this URL: ${url}` }],
1936
+ tools: { url_context: this.provider.tools.urlContext({}) },
1937
+ stopWhen: stepCountIs(2),
1938
+ maxTokens: 4000
1939
+ });
1940
+ return fetchResult.text || 'Could not fetch URL content.';
1941
+ } catch (err) {
1942
+ if (this.debug) console.error(`[DEBUG] url_context wrapper error:`, err.message);
1943
+ return `URL fetch failed: ${err.message}`;
1944
+ }
1945
+ }
1946
+ });
1947
+ }
1948
+ }
1949
+
1950
+ return nativeTools;
1951
+ }
1952
+
1953
+ /**
1954
+ * Get the Zod schema and description for a tool by name
1955
+ * @param {string} toolName - Tool name
1956
+ * @returns {{ schema: z.ZodObject, description: string } | null}
1957
+ * @private
1958
+ */
1959
+ _getToolSchemaAndDescription(toolName) {
1960
+ const toolMap = {
1961
+ search: {
1962
+ schema: searchSchema,
1963
+ description: 'Search code in the repository using keyword queries with Elasticsearch syntax.'
1964
+ },
1965
+ query: {
1966
+ schema: querySchema,
1967
+ description: 'Search code using ast-grep structural pattern matching.'
1968
+ },
1969
+ extract: {
1970
+ schema: extractSchema,
1971
+ description: 'Extract code blocks from files based on file paths and optional line numbers.'
1972
+ },
1973
+ delegate: {
1974
+ schema: delegateSchema,
1975
+ description: 'Delegate big distinct tasks to specialized probe subagents.'
1976
+ },
1977
+ analyze_all: {
1978
+ schema: analyzeAllSchema,
1979
+ description: 'Process ALL data matching a query using map-reduce for aggregate questions.'
1980
+ },
1981
+ execute_plan: {
1982
+ schema: executePlanSchema,
1983
+ description: 'Execute a DSL program to orchestrate tool calls.'
1984
+ },
1985
+ cleanup_execute_plan: {
1986
+ schema: cleanupExecutePlanSchema,
1987
+ description: 'Clean up output buffer and session store from previous execute_plan calls.'
1988
+ },
1989
+ bash: {
1990
+ schema: bashSchema,
1991
+ description: 'Execute bash commands for system exploration and development tasks.'
1992
+ },
1993
+ edit: {
1994
+ schema: editSchema,
1995
+ description: 'Edit files using text replacement, AST-aware symbol operations, or line-targeted editing.'
1996
+ },
1997
+ create: {
1998
+ schema: createSchema,
1999
+ description: 'Create new files with specified content.'
2000
+ },
2001
+ multi_edit: {
2002
+ schema: multiEditSchema,
2003
+ description: 'Apply multiple file edits in one call using a JSON array of operations.'
2004
+ },
2005
+ listFiles: {
2006
+ schema: listFilesSchema,
2007
+ description: 'List files and directories in a specified location.'
2008
+ },
2009
+ searchFiles: {
2010
+ schema: searchFilesSchema,
2011
+ description: 'Find files matching a glob pattern with recursive search capability.'
2012
+ },
2013
+ readImage: {
2014
+ schema: readImageSchema,
2015
+ description: 'Read and load an image file for AI analysis.'
2016
+ },
2017
+ listSkills: {
2018
+ schema: listSkillsSchema,
2019
+ description: 'List available agent skills discovered in the repository.'
2020
+ },
2021
+ useSkill: {
2022
+ schema: useSkillSchema,
2023
+ description: 'Load and activate a specific skill\'s instructions.'
2024
+ },
2025
+ task: {
2026
+ schema: taskSchema,
2027
+ description: 'Manage tasks for tracking progress (create, update, complete, delete, list).'
2028
+ }
2029
+ };
2030
+
2031
+ return toolMap[toolName] || null;
2032
+ }
2033
+
1624
2034
  /**
1625
2035
  * Initialize AWS Bedrock model
1626
2036
  */
@@ -2369,7 +2779,7 @@ export class ProbeAgent {
2369
2779
  async _getAvailableSkillsXml() {
2370
2780
  const skills = await this._loadSkillsMetadata();
2371
2781
  if (!skills.length) return '';
2372
- return formatAvailableSkillsXml(skills);
2782
+ return formatAvailableSkills(skills);
2373
2783
  }
2374
2784
 
2375
2785
  /**
@@ -2527,245 +2937,16 @@ ${extractGuidance}
2527
2937
  }
2528
2938
  }
2529
2939
 
2530
- // Build tool definitions based on allowedTools configuration
2531
- let toolDefinitions = '';
2532
-
2533
- // Helper to check if a tool is allowed
2534
- const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
2535
-
2536
- // Core tools (filtered by allowedTools)
2537
- if (isToolAllowed('search')) {
2538
- const searchDefinition = this.searchDelegate
2539
- ? `${searchToolDefinition}\n**Note:** This search tool delegates code searching to a dedicated subagent and returns extracted code blocks. Use extract only to expand context or if search returns no code.`
2540
- : searchToolDefinition;
2541
- toolDefinitions += `${searchDefinition}\n`;
2542
- }
2543
- if (isToolAllowed('query')) {
2544
- toolDefinitions += `${queryToolDefinition}\n`;
2545
- }
2546
- if (isToolAllowed('extract')) {
2547
- toolDefinitions += `${extractToolDefinition}\n`;
2548
- }
2549
- if (isToolAllowed('listFiles')) {
2550
- toolDefinitions += `${listFilesToolDefinition}\n`;
2551
- }
2552
- if (isToolAllowed('searchFiles')) {
2553
- toolDefinitions += `${searchFilesToolDefinition}\n`;
2554
- }
2555
- if (this.enableSkills && isToolAllowed('listSkills')) {
2556
- toolDefinitions += `${listSkillsToolDefinition}\n`;
2557
- }
2558
- if (this.enableSkills && isToolAllowed('useSkill')) {
2559
- toolDefinitions += `${useSkillToolDefinition}\n`;
2560
- }
2561
- if (isToolAllowed('readImage')) {
2562
- toolDefinitions += `${readImageToolDefinition}\n`;
2563
- }
2564
-
2565
- // Edit tools (require both allowEdit flag AND allowedTools permission)
2566
- if (this.allowEdit && isToolAllowed('edit')) {
2567
- toolDefinitions += `${editToolDefinition}\n`;
2568
- }
2569
- if (this.allowEdit && isToolAllowed('create')) {
2570
- toolDefinitions += `${createToolDefinition}\n`;
2571
- }
2572
- if (this.allowEdit && isToolAllowed('multi_edit')) {
2573
- toolDefinitions += `${multiEditToolDefinition}\n`;
2574
- }
2575
- // Bash tool (require both enableBash flag AND allowedTools permission)
2576
- if (this.enableBash && isToolAllowed('bash')) {
2577
- toolDefinitions += `${bashToolDefinition}\n`;
2578
- }
2579
-
2580
- // Task tool (require both enableTasks flag AND allowedTools permission)
2581
- if (this.enableTasks && isToolAllowed('task')) {
2582
- toolDefinitions += `${taskToolDefinition}\n`;
2583
- }
2584
-
2585
- // Always include attempt_completion unconditionally - it's a completion signal, not a tool
2586
- // This ensures agents can always complete their work, regardless of tool restrictions
2587
- toolDefinitions += `${attemptCompletionToolDefinition}\n`;
2588
-
2589
- // Delegate tool (require both enableDelegate flag AND allowedTools permission)
2590
- // Place after attempt_completion as it's an optional tool
2591
- if (this.enableDelegate && isToolAllowed('delegate')) {
2592
- toolDefinitions += `${delegateToolDefinition}\n`;
2593
- }
2594
-
2595
- // Execute Plan tool for DSL-based orchestration (requires enableExecutePlan flag, supersedes analyze_all)
2596
- if (this.enableExecutePlan && isToolAllowed('execute_plan')) {
2597
- // Build available function list based on what tools are registered
2598
- const dslFunctions = ['LLM', 'map', 'chunk', 'batch', 'log', 'range', 'flatten', 'unique', 'groupBy', 'parseJSON', 'storeSet', 'storeGet', 'storeAppend', 'storeKeys', 'storeGetAll', 'output'];
2599
- if (isToolAllowed('search')) dslFunctions.unshift('search');
2600
- if (isToolAllowed('query')) dslFunctions.unshift('query');
2601
- if (isToolAllowed('extract')) dslFunctions.unshift('extract');
2602
- if (isToolAllowed('listFiles')) dslFunctions.push('listFiles');
2603
- if (this.enableBash && isToolAllowed('bash')) dslFunctions.push('bash');
2604
- toolDefinitions += `${getExecutePlanToolDefinition(dslFunctions)}\n`;
2605
- // cleanup_execute_plan is enabled together with execute_plan
2606
- if (isToolAllowed('cleanup_execute_plan')) {
2607
- toolDefinitions += `${getCleanupExecutePlanToolDefinition()}\n`;
2608
- }
2609
- } else if (isToolAllowed('analyze_all')) {
2610
- // Fallback: only register analyze_all if execute_plan is not available
2611
- toolDefinitions += `${analyzeAllToolDefinition}\n`;
2612
- }
2613
-
2614
- // Gemini built-in tools (only when using Google provider)
2615
- if (this._geminiToolsEnabled?.googleSearch && isToolAllowed('gemini_google_search')) {
2616
- toolDefinitions += `${googleSearchToolDefinition}\n`;
2617
- }
2618
- if (this._geminiToolsEnabled?.urlContext && isToolAllowed('gemini_url_context')) {
2619
- toolDefinitions += `${urlContextToolDefinition}\n`;
2620
- }
2621
-
2622
- // Build XML tool guidelines with dynamic examples based on allowed tools
2623
- // Build examples only for allowed tools
2624
- let toolExamples = '';
2625
- if (isToolAllowed('search')) {
2626
- toolExamples += `
2627
- <search>
2628
- <query>error handling</query>
2629
- <path>src/search</path>
2630
- </search>
2631
- `;
2632
- }
2633
- if (isToolAllowed('extract')) {
2634
- toolExamples += `
2635
- <extract>
2636
- <targets>src/config.js:15-25</targets>
2637
- </extract>
2638
- `;
2639
- }
2640
- if (isToolAllowed('attempt_completion')) {
2641
- toolExamples += `
2642
- <attempt_completion>
2643
- The configuration is loaded from src/config.js lines 15-25 which contains the database settings.
2644
- </attempt_completion>
2645
- `;
2646
- }
2647
-
2648
- // Build available tools list dynamically based on allowedTools
2649
- let availableToolsList = '';
2650
- if (isToolAllowed('search')) {
2651
- availableToolsList += `- search: Search code using keyword queries${this.searchDelegate ? ' (returns extracted code blocks via a dedicated subagent)' : ''}.\n`;
2652
- }
2653
- if (isToolAllowed('query')) {
2654
- availableToolsList += '- query: Search code using structural AST patterns.\n';
2655
- }
2656
- if (isToolAllowed('extract')) {
2657
- availableToolsList += '- extract: Extract specific code blocks or lines from files. Use with symbol targets (e.g. "file.js#funcName") to get line numbers for line-targeted editing.\n';
2658
- }
2659
- if (isToolAllowed('listFiles')) {
2660
- availableToolsList += '- listFiles: List files and directories in a specified location.\n';
2661
- }
2662
- if (isToolAllowed('searchFiles')) {
2663
- availableToolsList += '- searchFiles: Find files matching a glob pattern with recursive search capability.\n';
2664
- }
2665
- if (this.enableSkills && isToolAllowed('listSkills')) {
2666
- availableToolsList += '- listSkills: List available agent skills discovered in the repository.\n';
2667
- }
2668
- if (this.enableSkills && isToolAllowed('useSkill')) {
2669
- availableToolsList += '- useSkill: Load and activate a specific skill\'s instructions.\n';
2670
- }
2671
- if (isToolAllowed('readImage')) {
2672
- availableToolsList += '- readImage: Read and load an image file for AI analysis.\n';
2673
- }
2674
- if (this.allowEdit && isToolAllowed('edit')) {
2675
- availableToolsList += '- edit: Edit files using text replacement, AST-aware symbol operations, or line-targeted editing.\n';
2676
- }
2677
- if (this.allowEdit && isToolAllowed('create')) {
2678
- availableToolsList += '- create: Create new files with specified content.\n';
2679
- }
2680
- if (this.allowEdit && isToolAllowed('multi_edit')) {
2681
- availableToolsList += '- multi_edit: Apply multiple file edits in one call using a JSON array of operations.\n';
2682
- }
2683
- if (this.enableDelegate && isToolAllowed('delegate')) {
2684
- availableToolsList += '- delegate: Delegate big distinct tasks to specialized probe subagents.\n';
2685
- }
2686
- if (this.enableExecutePlan && isToolAllowed('execute_plan')) {
2687
- availableToolsList += '- execute_plan: Execute a DSL program to orchestrate tool calls. ALWAYS use this for: questions containing "all"/"every"/"comprehensive"/"complete inventory", multi-topic analysis, open-ended discovery questions, or any task requiring full codebase coverage.\n';
2688
- if (isToolAllowed('cleanup_execute_plan')) {
2689
- availableToolsList += '- cleanup_execute_plan: Clean up output buffer and session store from previous execute_plan calls.\n';
2690
- }
2691
- } else if (isToolAllowed('analyze_all')) {
2692
- availableToolsList += '- analyze_all: Process ALL data matching a query using map-reduce (for aggregate questions needing 100% coverage).\n';
2693
- }
2694
- if (this.enableBash && isToolAllowed('bash')) {
2695
- availableToolsList += '- bash: Execute bash commands for system operations.\n';
2696
- }
2697
- if (this.enableTasks && isToolAllowed('task')) {
2698
- availableToolsList += '- task: Manage tasks for tracking progress (create, update, complete, delete, list).\n';
2699
- }
2700
- if (isToolAllowed('attempt_completion')) {
2701
- availableToolsList += '- attempt_completion: Finalize the task and provide the result to the user.\n';
2702
- availableToolsList += '- attempt_complete: Quick completion using previous response (shorthand).\n';
2703
- }
2704
- if (this._geminiToolsEnabled?.googleSearch && isToolAllowed('gemini_google_search')) {
2705
- availableToolsList += '- gemini_google_search: (auto) Web search via Google — invoked automatically by the model when it needs current information.\n';
2706
- }
2707
- if (this._geminiToolsEnabled?.urlContext && isToolAllowed('gemini_url_context')) {
2708
- availableToolsList += '- gemini_url_context: (auto) URL content reader via Google — automatically fetches and reads URLs mentioned in the conversation.\n';
2709
- }
2710
-
2711
- let xmlToolGuidelines = `
2712
- # Tool Use Formatting
2713
-
2714
- Tool use MUST be formatted using XML-style tags. Each tool call requires BOTH opening and closing tags with the exact tool name. Each parameter is similarly enclosed within its own set of opening and closing tags. You MUST use exactly ONE tool call per message until you are ready to complete the task.
2715
-
2716
- **CRITICAL: Every XML tag MUST have both opening <tag> and closing </tag> parts.**
2717
-
2718
- Structure (note the closing tags):
2719
- <tool_name>
2720
- <parameter1_name>value1</parameter1_name>
2721
- <parameter2_name>value2</parameter2_name>
2722
- ...
2723
- </tool_name>
2724
-
2725
- Examples:${toolExamples}
2726
- # Special Case: Quick Completion
2727
- If your previous response was already correct and complete, you may respond with just:
2728
- <attempt_complete>
2729
- This signals to use your previous response as the final answer without repeating content.
2730
-
2731
- # Thinking Process
2732
-
2733
- Before using a tool, analyze the situation within <thinking></thinking> tags. This helps you organize your thoughts and make better decisions.
2734
-
2735
- Example:
2736
- <thinking>
2737
- I need to find code related to error handling in the search module. The most appropriate tool for this is the search tool, which requires a query parameter and a path parameter. I have both the query ("error handling") and the path ("src/search"), so I can proceed with the search.
2738
- </thinking>
2739
-
2740
- # Tool Use Guidelines
2741
-
2742
- 1. Think step-by-step about how to achieve the user's goal.
2743
- 2. Use <thinking></thinking> tags to analyze the situation and determine the appropriate tool.
2744
- 3. Choose **one** tool that helps achieve the current step.
2745
- 4. Format the tool call using the specified XML format with BOTH opening and closing tags. Ensure all required parameters are included.
2746
- 5. **You MUST respond with exactly one tool call in the specified XML format in each turn.**
2747
- 6. Wait for the tool execution result, which will be provided in the next message (within a <tool_result> block).
2748
- 7. Analyze the tool result and decide the next step. If more tool calls are needed, repeat steps 2-6.
2749
- 8. If the task is fully complete and all previous steps were successful, use the \`<attempt_completion>\` tool to provide the final answer. This is the ONLY way to finish the task.
2750
- 9. If you cannot proceed (e.g., missing information, invalid request), use \`<attempt_completion>\` to explain the issue clearly with an appropriate message directly inside the tags.
2751
- 10. If your previous response was already correct and complete, you may use \`<attempt_complete>\` as a shorthand.
2752
-
2753
- Available Tools:
2754
- ${availableToolsList}`;
2755
-
2756
- // Common instructions
2940
+ // Common instructions (simplified - tools are now provided via native tool calling)
2757
2941
  const commonInstructions = `<instructions>
2758
2942
  Follow these instructions carefully:
2759
2943
  1. Analyze the user's request.
2760
- 2. Use <thinking></thinking> tags to analyze the situation and determine the appropriate tool for each step.
2761
- 3. Use the available tools step-by-step to fulfill the request.
2762
- 4. You should always prefer the \`search\` tool for code-related questions.${this.searchDelegate ? ' It already returns extracted code blocks; use \`extract\` only to expand context or read full files.' : ' Read full files only if really necessary.'}
2763
- 5. Ensure to get really deep and understand the full picture before answering.
2764
- 6. You MUST respond with exactly ONE tool call per message, using the specified XML format, until the task is complete.
2765
- 7. Wait for the tool execution result (provided in the next user message in a <tool_result> block) before proceeding to the next step.
2766
- 8. Once the task is fully completed, use the '<attempt_completion>' tool to provide the final result. This is the ONLY way to signal completion.
2767
- 9. Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.${this.allowEdit ? `
2768
- 10. When modifying files, choose the appropriate tool:
2944
+ 2. Use the available tools step-by-step to fulfill the request.
2945
+ 3. You should always prefer the search tool for code-related questions.${this.searchDelegate ? ' It already returns extracted code blocks; use extract only to expand context or read full files.' : ' Read full files only if really necessary.'}
2946
+ 4. Ensure to get really deep and understand the full picture before answering.
2947
+ 5. Once the task is fully completed, use the attempt_completion tool to provide the final result.
2948
+ 6. Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.${this.allowEdit ? `
2949
+ 7. When modifying files, choose the appropriate tool:
2769
2950
  - Use 'edit' for all code modifications:
2770
2951
  * For small changes (a line or a few lines), use old_string + new_string — copy old_string verbatim from the file.
2771
2952
  * For rewriting entire functions/classes/methods, use the symbol parameter instead (no exact text matching needed).
@@ -2794,7 +2975,6 @@ Follow these instructions carefully:
2794
2975
  if (this.debug) {
2795
2976
  console.log(`[DEBUG] Using predefined prompt: ${this.promptType}`);
2796
2977
  }
2797
- // Add common instructions to predefined prompts
2798
2978
  systemMessage += commonInstructions;
2799
2979
  } else {
2800
2980
  // Use the default prompt (code explorer) if no prompt type is specified
@@ -2802,16 +2982,9 @@ Follow these instructions carefully:
2802
2982
  if (this.debug) {
2803
2983
  console.log(`[DEBUG] Using default prompt: code explorer`);
2804
2984
  }
2805
- // Add common instructions to the default prompt
2806
2985
  systemMessage += commonInstructions;
2807
2986
  }
2808
2987
 
2809
- // Add XML Tool Guidelines
2810
- systemMessage += `\n${xmlToolGuidelines}\n`;
2811
-
2812
- // Add Tool Definitions
2813
- systemMessage += `\n# Tools Available\n${toolDefinitions}\n`;
2814
-
2815
2988
  // Add available skills (metadata only)
2816
2989
  if (this.enableSkills) {
2817
2990
  const skillsXml = await this._getAvailableSkillsXml();
@@ -2825,19 +2998,6 @@ Follow these instructions carefully:
2825
2998
  systemMessage += `\n${taskSystemPrompt}\n`;
2826
2999
  }
2827
3000
 
2828
- // Add MCP tools if available (filtered by allowedTools)
2829
- if (this.mcpBridge && this.mcpBridge.getToolNames().length > 0) {
2830
- const allMcpTools = this.mcpBridge.getToolNames();
2831
- const allowedMcpTools = this._filterMcpTools(allMcpTools);
2832
-
2833
- if (allowedMcpTools.length > 0) {
2834
- systemMessage += `\n## MCP Tools (JSON parameters in <params> tag)\n`;
2835
- // Get only allowed MCP tool definitions
2836
- systemMessage += this.mcpBridge.getXmlToolDefinitions(allowedMcpTools);
2837
- systemMessage += `\n\nFor MCP tools, use JSON format within the params tag, e.g.:\n<mcp_tool>\n<params>\n{"key": "value"}\n</params>\n</mcp_tool>\n`;
2838
- }
2839
- }
2840
-
2841
3001
  // Add folder information using workspace root and relative paths
2842
3002
  const searchDirectory = this.workspaceRoot;
2843
3003
  if (this.debug) {
@@ -3195,1127 +3355,190 @@ Follow these instructions carefully:
3195
3355
  }
3196
3356
  }
3197
3357
 
3198
- // Circuit breaker for repeated format errors
3199
- let lastFormatErrorType = null;
3200
- let sameFormatErrorCount = 0;
3201
- const MAX_REPEATED_FORMAT_ERRORS = 3;
3202
-
3203
- // Circuit breaker for repeated identical responses without tool calls
3204
- let lastNoToolResponse = null;
3205
- let sameResponseCount = 0;
3206
- const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
3358
+ // Iteration counter for telemetry
3207
3359
 
3208
- // Circuit breaker for consecutive no-tool responses (regardless of content)
3209
- // This catches cases where agent alternates between similar "stuck" messages
3210
- let consecutiveNoToolCount = 0;
3211
- const MAX_CONSECUTIVE_NO_TOOL = 5;
3360
+ // Native tool calling via Vercel AI SDK streamText + maxSteps
3361
+ let completionResult = null;
3362
+ const toolContext = { maxIterations, currentIteration: 0, currentMessages };
3212
3363
 
3213
- // Tool iteration loop (only for non-CLI engines like Vercel/Anthropic/OpenAI)
3214
- while (currentIteration < maxIterations && !completionAttempted) {
3215
- currentIteration++;
3216
- if (this.cancelled) throw new Error('Request was cancelled by the user');
3364
+ const tools = this._buildNativeTools(options, (result) => {
3365
+ completionResult = result;
3366
+ completionAttempted = true;
3367
+ }, toolContext);
3217
3368
 
3218
- if (this.debug) {
3219
- console.log(`\n[DEBUG] --- Tool Loop Iteration ${currentIteration}/${maxIterations} ---`);
3220
- console.log(`[DEBUG] Current messages count for AI call: ${currentMessages.length}`);
3221
-
3222
- // Log preview of the latest user message (helpful for debugging loops)
3223
- const lastUserMessage = [...currentMessages].reverse().find(msg => msg.role === 'user');
3224
- if (lastUserMessage && lastUserMessage.content) {
3225
- const userPreview = createMessagePreview(lastUserMessage.content);
3226
- console.log(`[DEBUG] Latest user message (${lastUserMessage.content.length} chars): ${userPreview}`);
3227
- }
3369
+ let maxResponseTokens = this.maxResponseTokens;
3370
+ if (!maxResponseTokens) {
3371
+ maxResponseTokens = 4000;
3372
+ if (this.model && this.model.includes('opus') || this.model && this.model.includes('sonnet') || this.model && this.model.startsWith('gpt-4') || this.model && this.model.startsWith('gpt-5')) {
3373
+ maxResponseTokens = 8192;
3374
+ } else if (this.model && this.model.startsWith('gemini')) {
3375
+ maxResponseTokens = 32000;
3228
3376
  }
3377
+ }
3229
3378
 
3230
- // Add iteration tracing event
3231
- if (this.tracer) {
3232
- this.tracer.addEvent('iteration.start', {
3233
- 'iteration': currentIteration,
3234
- 'max_iterations': maxIterations,
3235
- 'message_count': currentMessages.length
3236
- });
3237
- }
3379
+ // Context compaction retry loop
3380
+ let compactionAttempted = false;
3381
+ while (true) {
3382
+ try {
3383
+ const messagesForAI = this.prepareMessagesWithImages(currentMessages);
3384
+
3385
+ const streamOptions = {
3386
+ model: this.provider ? this.provider(this.model) : this.model,
3387
+ messages: messagesForAI,
3388
+ tools,
3389
+ stopWhen: stepCountIs(maxIterations),
3390
+ maxTokens: maxResponseTokens,
3391
+ temperature: 0.3,
3392
+ onStepFinish: ({ toolResults, text, finishReason, usage }) => {
3393
+ currentIteration++;
3394
+ toolContext.currentIteration = currentIteration;
3395
+
3396
+ // Record telemetry
3397
+ if (this.tracer) {
3398
+ this.tracer.addEvent('iteration.step', {
3399
+ 'iteration': currentIteration,
3400
+ 'max_iterations': maxIterations,
3401
+ 'finish_reason': finishReason,
3402
+ 'has_tool_calls': !!(toolResults && toolResults.length > 0)
3403
+ });
3404
+ }
3238
3405
 
3239
- // Add warning message when reaching the last iteration
3240
- if (currentIteration === maxIterations) {
3241
- const warningMessage = `⚠️ WARNING: You have reached the maximum tool iterations limit (${maxIterations}). This is your final message. Please respond with the data you have so far. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`;
3242
-
3243
- currentMessages.push({
3244
- role: 'user',
3245
- content: warningMessage
3246
- });
3247
-
3248
- if (this.debug) {
3249
- console.log(`[DEBUG] Added max iterations warning message at iteration ${currentIteration}`);
3250
- }
3251
- }
3406
+ // Record token usage
3407
+ if (usage) {
3408
+ this.tokenCounter.recordUsage(usage);
3409
+ }
3252
3410
 
3253
- // Calculate context size
3254
- this.tokenCounter.calculateContextSize(currentMessages);
3255
- if (this.debug) {
3256
- console.log(`[DEBUG] Estimated context tokens BEFORE LLM call (Iter ${currentIteration}): ${this.tokenCounter.contextSize}`);
3257
- }
3258
-
3259
- let maxResponseTokens = this.maxResponseTokens;
3260
- if (!maxResponseTokens) {
3261
- // Use model-based defaults if not explicitly configured
3262
- maxResponseTokens = 4000;
3263
- if (this.model && this.model.includes('opus') || this.model && this.model.includes('sonnet') || this.model && this.model.startsWith('gpt-4') || this.model && this.model.startsWith('gpt-5')) {
3264
- maxResponseTokens = 8192;
3265
- } else if (this.model && this.model.startsWith('gemini')) {
3266
- maxResponseTokens = 32000;
3267
- }
3268
- }
3269
-
3270
- // Make AI request
3271
- let assistantResponseContent = '';
3272
- let compactionAttempted = false;
3273
-
3274
- // Retry loop for context compaction - separate from streamTextWithRetryAndFallback
3275
- // which handles transient errors (rate limits, network issues, etc.)
3276
- while (true) {
3277
- try {
3278
- // Wrap AI request with tracing if available
3279
- const executeAIRequest = async () => {
3280
- // Prepare messages with potential image content
3281
- const messagesForAI = this.prepareMessagesWithImages(currentMessages);
3282
-
3283
- // Build streamText options, including Gemini provider-defined tools if applicable
3284
- const streamOptions = {
3285
- model: this.provider ? this.provider(this.model) : this.model,
3286
- messages: messagesForAI,
3287
- maxTokens: maxResponseTokens,
3288
- temperature: 0.3,
3289
- };
3290
-
3291
- // Inject Gemini built-in tools (gemini_google_search, gemini_url_context) when using Google provider
3292
- const geminiProviderTools = this._buildGeminiProviderTools();
3293
- if (geminiProviderTools) {
3294
- streamOptions.tools = geminiProviderTools;
3295
- }
3296
-
3297
- const result = await this.streamTextWithRetryAndFallback(streamOptions);
3298
-
3299
- // Get the promise reference BEFORE consuming stream (doesn't lock it)
3300
- const usagePromise = result.usage;
3301
-
3302
- // Collect the streamed response - stream all content for now
3303
- for await (const delta of result.textStream) {
3304
- assistantResponseContent += delta;
3305
- // For now, stream everything - we'll handle segmentation after tools execute
3306
- if (options.onStream) {
3307
- options.onStream(delta);
3308
- }
3309
- }
3310
-
3311
- // Record token usage - await the promise AFTER stream is consumed
3312
- const usage = await usagePromise;
3313
- if (usage) {
3314
- this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
3411
+ // Stream text to callback if present
3412
+ if (options.onStream && text) {
3413
+ options.onStream(text);
3315
3414
  }
3316
3415
 
3317
- return result;
3318
- };
3319
-
3320
- if (this.tracer) {
3321
- // Prepare input preview for tracing (truncate if very long)
3322
- const inputPreview = message.length > 1000
3323
- ? message.substring(0, 1000) + '... [truncated]'
3324
- : message;
3325
-
3326
- await this.tracer.withSpan('ai.request', executeAIRequest, {
3327
- 'ai.model': this.model,
3328
- 'ai.provider': this.clientApiProvider || 'auto',
3329
- 'ai.input': inputPreview,
3330
- 'ai.input_length': message.length,
3331
- 'iteration': currentIteration,
3332
- 'max_tokens': maxResponseTokens,
3333
- 'temperature': 0.3,
3334
- 'message_count': currentMessages.length
3335
- });
3336
- } else {
3337
- await executeAIRequest();
3338
- }
3339
-
3340
- // Success - break out of compaction retry loop
3341
- break;
3342
-
3343
- } catch (error) {
3344
- // Check if this is a context limit error (only try compaction once per iteration)
3345
- if (!compactionAttempted && handleContextLimitError) {
3346
- const compactionResult = handleContextLimitError(error, currentMessages, {
3347
- keepLastSegment: true,
3348
- minSegmentsToKeep: 1
3349
- });
3350
-
3351
- if (compactionResult) {
3352
- // Context limit error detected - compact and retry once
3353
- const { messages: compactedMessages, stats } = compactionResult;
3354
-
3355
- // Check if compaction actually reduced message count
3356
- if (stats.removed === 0) {
3357
- // No messages removed - compaction won't help, fail immediately
3358
- console.error(`[ERROR] Context window exceeded but no messages can be compacted.`);
3359
- console.error(`[ERROR] The conversation history is already minimal (${stats.originalCount} messages).`);
3360
- finalResult = `Error: Context window limit exceeded and conversation cannot be compacted further. Consider starting a new session or reducing system message size.`;
3361
- throw new Error(finalResult);
3362
- }
3363
-
3364
- compactionAttempted = true;
3365
-
3366
- console.log(`[INFO] Context window limit exceeded. Compacting conversation...`);
3367
- console.log(`[INFO] Removed ${stats.removed} messages (${stats.reductionPercent}% reduction)`);
3368
- console.log(`[INFO] Estimated token savings: ${stats.tokensSaved} tokens`);
3369
-
3370
- if (this.debug) {
3371
- console.log(`[DEBUG] Compaction stats:`, stats);
3372
- console.log(`[DEBUG] Original message count: ${stats.originalCount}`);
3373
- console.log(`[DEBUG] Compacted message count: ${stats.compactedCount}`);
3374
- }
3375
-
3376
- // Replace currentMessages with compacted version (creates new array reference)
3377
- // This ensures we don't mutate the original history array
3378
- currentMessages = [...compactedMessages];
3379
-
3380
- // Log compaction event if tracer is available
3381
- if (this.tracer) {
3382
- this.tracer.addEvent('context.compacted', {
3383
- 'iteration': currentIteration,
3384
- 'original_count': stats.originalCount,
3385
- 'compacted_count': stats.compactedCount,
3386
- 'reduction_percent': stats.reductionPercent,
3387
- 'tokens_saved': stats.tokensSaved
3388
- });
3389
- }
3390
-
3391
- // Continue to retry with compacted messages
3392
- continue;
3416
+ if (this.debug) {
3417
+ console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3393
3418
  }
3394
3419
  }
3420
+ };
3395
3421
 
3396
- // Not a context limit error, compaction already attempted, or compaction not available
3397
- // IMPORTANT: This break prevents infinite loop if compacted messages still exceed limit
3398
- console.error(`Error during streamText (Iter ${currentIteration}):`, error);
3399
- finalResult = `Error: Failed to get response from AI model during iteration ${currentIteration}. ${error.message}`;
3400
- throw new Error(finalResult);
3401
- }
3402
- }
3403
-
3404
- // Log preview of assistant response for debugging loops
3405
- if (this.debug && assistantResponseContent) {
3406
- const assistantPreview = createMessagePreview(assistantResponseContent);
3407
- console.log(`[DEBUG] Assistant response (${assistantResponseContent.length} chars): ${assistantPreview}`);
3408
- }
3409
-
3410
- // Images in assistant responses are not automatically processed
3411
- // AI can use the readImage tool to explicitly request reading an image
3412
-
3413
- // Parse tool call from response with valid tools list
3414
- // Build validTools based on allowedTools configuration (same pattern as getSystemMessage)
3415
- // When _disableTools is set, only allow attempt_completion for JSON correction flows
3416
- const validTools = [];
3417
- if (options._disableTools) {
3418
- // Only allow attempt_completion for JSON correction - no search/query/edit tools
3419
- validTools.push('attempt_completion');
3420
- if (this.debug) {
3421
- console.log(`[DEBUG] Tools disabled for this call - only attempt_completion allowed`);
3422
- }
3423
- } else {
3424
- if (this.allowedTools.isEnabled('search')) validTools.push('search');
3425
- if (this.allowedTools.isEnabled('query')) validTools.push('query');
3426
- if (this.allowedTools.isEnabled('extract')) validTools.push('extract');
3427
- if (this.allowedTools.isEnabled('listFiles')) validTools.push('listFiles');
3428
- if (this.allowedTools.isEnabled('searchFiles')) validTools.push('searchFiles');
3429
- if (this.enableSkills && this.allowedTools.isEnabled('listSkills')) validTools.push('listSkills');
3430
- if (this.enableSkills && this.allowedTools.isEnabled('useSkill')) validTools.push('useSkill');
3431
- if (this.allowedTools.isEnabled('readImage')) validTools.push('readImage');
3432
- // Always allow attempt_completion in validTools - it's a completion signal, not a tool
3433
- // This ensures agents can complete even when disableTools: true is set (fixes #333)
3434
- // The tool DEFINITION may be hidden in raw AI mode, but we still need to recognize it
3435
- validTools.push('attempt_completion');
3436
-
3437
- // Edit tools (require both allowEdit flag AND allowedTools permission)
3438
- if (this.allowEdit && this.allowedTools.isEnabled('edit')) {
3439
- validTools.push('edit');
3440
- }
3441
- if (this.allowEdit && this.allowedTools.isEnabled('create')) {
3442
- validTools.push('create');
3443
- }
3444
- if (this.allowEdit && this.allowedTools.isEnabled('multi_edit')) {
3445
- validTools.push('multi_edit');
3422
+ // Add native thinking/reasoning providerOptions when thinkingEffort is set
3423
+ const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
3424
+ if (providerOpts) {
3425
+ streamOptions.providerOptions = providerOpts;
3446
3426
  }
3447
- // Bash tool (require both enableBash flag AND allowedTools permission)
3448
- if (this.enableBash && this.allowedTools.isEnabled('bash')) {
3449
- validTools.push('bash');
3450
- }
3451
- // Delegate tool (require both enableDelegate flag AND allowedTools permission)
3452
- if (this.enableDelegate && this.allowedTools.isEnabled('delegate')) {
3453
- validTools.push('delegate');
3454
- }
3455
- // Execute Plan tool (requires enableExecutePlan flag, supersedes analyze_all)
3456
- if (this.enableExecutePlan && this.allowedTools.isEnabled('execute_plan')) {
3457
- validTools.push('execute_plan');
3458
- // cleanup_execute_plan is enabled together with execute_plan
3459
- if (this.allowedTools.isEnabled('cleanup_execute_plan')) {
3460
- validTools.push('cleanup_execute_plan');
3461
- }
3462
- } else if (this.allowedTools.isEnabled('analyze_all')) {
3463
- validTools.push('analyze_all');
3464
- }
3465
- // Task tool (require both enableTasks flag AND allowedTools permission)
3466
- if (this.enableTasks && this.allowedTools.isEnabled('task')) {
3467
- validTools.push('task');
3468
- }
3469
- }
3470
-
3471
- // Try parsing with hybrid parser that supports both native and MCP tools
3472
- // When _disableTools is set, skip MCP tools entirely
3473
- const nativeTools = validTools;
3474
- const parsedTool = (this.mcpBridge && !options._disableTools)
3475
- ? parseHybridXmlToolCall(assistantResponseContent, nativeTools, this.mcpBridge)
3476
- : parseXmlToolCallWithThinking(assistantResponseContent, validTools);
3477
-
3478
- // Capture AI thinking content if present (for debugging and telemetry)
3479
- if (parsedTool?.thinkingContent) {
3480
- this._recordThinkingTelemetry(parsedTool.thinkingContent, currentIteration);
3481
- }
3482
-
3483
- if (parsedTool) {
3484
- const { toolName, params } = parsedTool;
3485
-
3486
- // Record AI tool decision for telemetry
3487
- this._recordToolDecisionTelemetry(toolName, params, assistantResponseContent.length, currentIteration);
3488
-
3489
- if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
3490
-
3491
- // Reset consecutive no-tool counter since we got a valid tool call
3492
- consecutiveNoToolCount = 0;
3493
-
3494
- if (toolName === 'attempt_completion') {
3495
- completionAttempted = true;
3496
-
3497
- // END CHECKPOINT: Block completion if there are incomplete tasks
3498
- // However, allow completion if the agent is stuck and genuinely cannot proceed
3499
- if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
3500
- const completionResult = typeof params.result === 'string' ? params.result : '';
3501
- const isStuckCompletion = detectStuckResponse(completionResult);
3502
- const highIterationCount = currentIteration > maxIterations * 0.7; // >70% of max iterations
3503
-
3504
- // Allow stuck completions after many iterations to prevent infinite loops
3505
- if (isStuckCompletion && highIterationCount) {
3506
- if (this.debug) {
3507
- console.log('[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)');
3508
- console.log('[DEBUG] Incomplete tasks will remain:', this.taskManager.getTaskSummary());
3509
- }
3510
- // Record telemetry for forced completion
3511
- if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3512
- this.tracer.recordTaskEvent('forced_stuck_completion', {
3513
- 'task.incomplete_count': this.taskManager.getIncompleteTasks().length,
3514
- 'task.iteration': currentIteration,
3515
- 'task.max_iterations': maxIterations
3516
- });
3517
- }
3518
- // Continue to process the completion instead of blocking
3519
- } else {
3520
- const taskSummary = this.taskManager.getTaskSummary();
3521
- const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
3522
- const incompleteTasks = this.taskManager.getIncompleteTasks();
3523
-
3524
- // Record telemetry for blocked completion
3525
- if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3526
- this.tracer.recordTaskEvent('completion_blocked', {
3527
- 'task.incomplete_count': incompleteTasks.length,
3528
- 'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
3529
- 'task.iteration': currentIteration
3530
- });
3531
- }
3532
-
3533
- if (this.debug) {
3534
- console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
3535
- console.log('[DEBUG] Incomplete tasks:', taskSummary);
3536
- }
3537
-
3538
- // Add reminder message and continue the loop
3539
- currentMessages.push({
3540
- role: 'assistant',
3541
- content: assistantResponseContent
3542
- });
3543
- currentMessages.push({
3544
- role: 'user',
3545
- content: blockedMessage
3546
- });
3547
-
3548
- completionAttempted = false; // Reset to allow more iterations
3549
- continue; // Skip the break and continue the loop
3550
- }
3551
- }
3552
-
3553
- // Handle attempt_complete shorthand - use previous response
3554
- if (params.result === '__PREVIOUS_RESPONSE__') {
3555
- // Find the last assistant message with actual content (not tool calls)
3556
- const lastAssistantMessage = [...currentMessages].reverse().find(msg =>
3557
- msg.role === 'assistant' &&
3558
- msg.content &&
3559
- !(this.mcpBridge
3560
- ? parseHybridXmlToolCall(msg.content, validTools, this.mcpBridge)
3561
- : parseXmlToolCallWithThinking(msg.content, validTools))
3562
- );
3563
-
3564
- if (lastAssistantMessage) {
3565
- const prevContent = lastAssistantMessage.content;
3566
-
3567
- // Check for patterns indicating a failed/wrapped tool call attempt
3568
- // Use detectUnrecognizedToolCall for consistent detection logic
3569
- const wrappedToolError = detectUnrecognizedToolCall(prevContent, validTools);
3570
-
3571
- if (isWrappedToolError(wrappedToolError)) {
3572
- // Previous response was a broken tool call attempt - don't reuse it
3573
- const wrappedToolName = extractWrappedToolName(wrappedToolError);
3574
- if (this.debug) {
3575
- console.log(`[DEBUG] Previous response contains wrapped tool '${wrappedToolName}' - rejecting for __PREVIOUS_RESPONSE__`);
3576
- }
3577
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3578
- currentMessages.push({
3579
- role: 'user',
3580
- content: createWrappedToolErrorMessage(wrappedToolName)
3581
- });
3582
- completionAttempted = false;
3583
- continue; // Don't use broken response, continue the loop
3584
- }
3585
-
3586
- // Pre-strip thinking tags to avoid losing content at final cleanup stage
3587
- const strippedContent = removeThinkingTags(prevContent);
3588
- if (strippedContent.length > 50) {
3589
- // Enough content outside thinking tags — use stripped version directly
3590
- finalResult = strippedContent;
3591
- if (this.debug) console.log(`[DEBUG] Using previous response (thinking-stripped) as completion: ${finalResult.substring(0, 100)}...`);
3592
- } else {
3593
- // Content was mostly/entirely inside thinking tags.
3594
- // Extract thinking content and use it as the actual answer.
3595
- // extractThinkingContent now handles nested thinking tags (issue #439)
3596
- let thinkingContent = extractThinkingContent(prevContent);
3597
- // Also apply removeThinkingTags as extra safety to catch any edge cases
3598
- if (thinkingContent) {
3599
- thinkingContent = removeThinkingTags(thinkingContent) || thinkingContent.replace(/<\/?thinking>/g, '');
3600
- }
3601
- if (thinkingContent && thinkingContent.length > 50) {
3602
- finalResult = thinkingContent;
3603
- if (this.debug) console.log(`[DEBUG] Previous response was mostly in thinking tags — using thinking content as completion: ${finalResult.substring(0, 100)}...`);
3604
- } else {
3605
- // Neither stripped nor thinking content is substantive — use raw as fallback
3606
- finalResult = prevContent;
3607
- if (this.debug) console.log(`[DEBUG] Using previous response as completion (raw): ${finalResult.substring(0, 100)}...`);
3608
- }
3609
- }
3610
- } else {
3611
- finalResult = 'Error: No previous response found to use as completion.';
3612
- if (this.debug) console.log(`[DEBUG] No suitable previous response found for attempt_complete shorthand`);
3613
- }
3614
- } else {
3615
- // Standard attempt_completion handling
3616
- const validation = attemptCompletionSchema.safeParse(params);
3617
- if (validation.success) {
3618
- finalResult = validation.data.result;
3619
-
3620
- // Stream the final result if callback is provided
3621
- if (options.onStream && finalResult) {
3622
- const chunkSize = 50; // Characters per chunk for smoother streaming
3623
- for (let i = 0; i < finalResult.length; i += chunkSize) {
3624
- const chunk = finalResult.slice(i, Math.min(i + chunkSize, finalResult.length));
3625
- options.onStream(chunk);
3626
- }
3627
- }
3628
-
3629
- if (this.debug) console.log(`[DEBUG] Task completed successfully with result: ${finalResult.substring(0, 100)}...`);
3630
- } else {
3631
- console.error(`[ERROR] Invalid attempt_completion parameters:`, validation.error);
3632
- finalResult = 'Error: Invalid completion attempt. The task could not be completed properly.';
3633
- }
3634
- }
3635
- break;
3636
- } else {
3637
- // Check tool type and execute accordingly
3638
- const { type } = parsedTool;
3639
-
3640
- if (type === 'mcp' && this.mcpBridge && this.mcpBridge.isMcpTool(toolName)) {
3641
- // Execute MCP tool
3642
- const mcpStartTime = Date.now();
3643
- this._recordMcpToolTelemetry('start', toolName, params, currentIteration);
3644
-
3645
- try {
3646
- // Log MCP tool execution in debug mode
3647
- if (this.debug) {
3648
- console.error(`\n[DEBUG] ========================================`);
3649
- console.error(`[DEBUG] Executing MCP tool: ${toolName}`);
3650
- console.error(`[DEBUG] Arguments:`);
3651
- for (const [key, value] of Object.entries(params)) {
3652
- const displayValue = typeof value === 'string' && value.length > 100
3653
- ? value.substring(0, 100) + '...'
3654
- : value;
3655
- console.error(`[DEBUG] ${key}: ${JSON.stringify(displayValue)}`);
3656
- }
3657
- console.error(`[DEBUG] ========================================\n`);
3658
- }
3659
-
3660
- // Execute MCP tool through the bridge
3661
- const executionResult = await this.mcpBridge.mcpTools[toolName].execute(params);
3662
-
3663
- let toolResultContent = typeof executionResult === 'string' ? executionResult : JSON.stringify(executionResult, null, 2);
3664
-
3665
- // Extract raw output blocks from tool result (before truncation)
3666
- // This prevents LLM from processing/hallucinating large structured output from execute_plan
3667
- // Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
3668
- // formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
3669
- const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
3670
- if (extractedBlocks.length > 0) {
3671
- toolResultContent = cleanedContent;
3672
- // Accumulate extracted blocks separately from DSL output() buffer
3673
- this._extractedRawBlocks.push(...extractedBlocks);
3674
- if (this.debug) {
3675
- console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
3676
- }
3677
- }
3678
-
3679
- // Truncate if output exceeds token limit
3680
- try {
3681
- const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
3682
- if (truncateResult.truncated) {
3683
- toolResultContent = truncateResult.content;
3684
- if (this.debug) {
3685
- console.log(`[DEBUG] Tool output truncated: ${truncateResult.originalTokens} tokens -> saved to ${truncateResult.tempFilePath || 'N/A'}`);
3686
- if (truncateResult.error) {
3687
- console.log(`[DEBUG] Truncation file error: ${truncateResult.error}`);
3688
- }
3689
- }
3690
- }
3691
- } catch (truncateError) {
3692
- // If truncation fails entirely, log and continue with original content
3693
- console.error(`[WARN] Tool output truncation failed: ${truncateError.message}`);
3694
- }
3695
-
3696
- // Record MCP tool end event (success)
3697
- const mcpDurationMs = Date.now() - mcpStartTime;
3698
- this._recordMcpToolTelemetry('end', toolName, null, currentIteration, {
3699
- result: toolResultContent,
3700
- success: true,
3701
- durationMs: mcpDurationMs,
3702
- error: null
3703
- });
3704
-
3705
- // Log MCP tool result in debug mode
3706
- if (this.debug) {
3707
- const preview = toolResultContent.length > 500 ? toolResultContent.substring(0, 500) + '...' : toolResultContent;
3708
- console.error(`[DEBUG] ========================================`);
3709
- console.error(`[DEBUG] MCP tool '${toolName}' completed successfully`);
3710
- console.error(`[DEBUG] Result preview:`);
3711
- console.error(preview);
3712
- console.error(`[DEBUG] ========================================\n`);
3713
- }
3714
-
3715
- // Add assistant message with tool call (matching native tool pattern)
3716
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3717
- currentMessages.push({ role: 'user', content: `<tool_result>\n${toolResultContent}\n</tool_result>` });
3718
- } catch (error) {
3719
- // Record MCP tool end event (failure)
3720
- const mcpDurationMs = Date.now() - mcpStartTime;
3721
- this._recordMcpToolTelemetry('end', toolName, null, currentIteration, {
3722
- result: null,
3723
- success: false,
3724
- durationMs: mcpDurationMs,
3725
- error: error.message
3726
- });
3727
-
3728
- console.error(`Error executing MCP tool ${toolName}:`, error);
3729
-
3730
- // Log MCP tool error in debug mode
3731
- if (this.debug) {
3732
- console.error(`[DEBUG] ========================================`);
3733
- console.error(`[DEBUG] MCP tool '${toolName}' failed with error:`);
3734
- console.error(`[DEBUG] ${error.message}`);
3735
- console.error(`[DEBUG] ========================================\n`);
3736
- }
3737
-
3738
- // Format error with structured information for AI
3739
- const errorXml = formatErrorForAI(error);
3740
- // Add assistant message with tool call (matching native tool pattern)
3741
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3742
- currentMessages.push({ role: 'user', content: `<tool_result>\n${errorXml}\n</tool_result>` });
3743
- }
3744
- } else if (this.toolImplementations[toolName]) {
3745
- // Execute native tool
3746
- try {
3747
- // Add sessionId and workingDirectory to params for tool execution
3748
- // Validate and resolve workingDirectory using safeRealpath for symlink security
3749
- // Consistent fallback chain: workspaceRoot > cwd > allowedFolders[0] > process.cwd()
3750
- let resolvedWorkingDirectory = this.workspaceRoot || this.cwd || (this.allowedFolders && this.allowedFolders[0]) || process.cwd();
3751
- if (params.workingDirectory) {
3752
- // Resolve relative paths against the current working directory context, not process.cwd()
3753
- // Use safeRealpath to resolve symlinks and prevent bypass attacks
3754
- const requestedDir = safeRealpath(isAbsolute(params.workingDirectory)
3755
- ? resolve(params.workingDirectory)
3756
- : resolve(resolvedWorkingDirectory, params.workingDirectory));
3757
- // Check if the requested directory is within allowed folders
3758
- const isWithinAllowed = !this.allowedFolders || this.allowedFolders.length === 0 ||
3759
- this.allowedFolders.some(folder => {
3760
- const resolvedFolder = safeRealpath(folder);
3761
- return requestedDir === resolvedFolder || requestedDir.startsWith(resolvedFolder + sep);
3762
- });
3763
- if (isWithinAllowed) {
3764
- resolvedWorkingDirectory = requestedDir;
3765
- } else if (this.debug) {
3766
- console.error(`[DEBUG] Rejected workingDirectory "${params.workingDirectory}" - not within allowed folders`);
3767
- }
3768
- }
3769
- const toolParams = {
3770
- ...params,
3771
- sessionId: this.sessionId,
3772
- workingDirectory: resolvedWorkingDirectory
3773
- };
3774
-
3775
- // Log tool execution in debug mode
3776
- if (this.debug) {
3777
- console.error(`\n[DEBUG] ========================================`);
3778
- console.error(`[DEBUG] Executing tool: ${toolName}`);
3779
- console.error(`[DEBUG] Arguments:`);
3780
- for (const [key, value] of Object.entries(params)) {
3781
- const displayValue = typeof value === 'string' && value.length > 100
3782
- ? value.substring(0, 100) + '...'
3783
- : value;
3784
- console.error(`[DEBUG] ${key}: ${JSON.stringify(displayValue)}`);
3785
- }
3786
- console.error(`[DEBUG] ========================================\n`);
3787
- }
3788
-
3789
- // Emit tool start event with stream pause signal
3790
- this.events.emit('toolCall', {
3791
- timestamp: new Date().toISOString(),
3792
- name: toolName,
3793
- args: toolParams,
3794
- status: 'started',
3795
- pauseStream: true // Signal to pause text streaming
3796
- });
3797
-
3798
- // Execute tool with tracing if available
3799
- const executeToolCall = async () => {
3800
- // For delegate tool, pass current iteration, max iterations, session ID, and config
3801
- if (toolName === 'delegate') {
3802
- // Reconstruct allowedTools array preserving all modes (same logic as clone())
3803
- let allowedToolsForDelegate = null;
3804
- if (this.allowedTools.mode === 'whitelist') {
3805
- allowedToolsForDelegate = [...this.allowedTools.allowed];
3806
- } else if (this.allowedTools.mode === 'none') {
3807
- allowedToolsForDelegate = [];
3808
- } else if (this.allowedTools.mode === 'all' && this.allowedTools.exclusions?.length > 0) {
3809
- allowedToolsForDelegate = ['*', ...this.allowedTools.exclusions.map(t => '!' + t)];
3810
- }
3811
- // If mode is 'all' with no exclusions, leave as null (default)
3812
-
3813
- const enhancedParams = {
3814
- ...toolParams,
3815
- currentIteration,
3816
- maxIterations,
3817
- parentSessionId: this.sessionId, // Pass parent session ID for tracking
3818
- path: this.searchPath, // Inherit search path
3819
- provider: this.apiType, // Inherit AI provider (string identifier)
3820
- model: this.model, // Inherit model
3821
- searchDelegate: this.searchDelegate,
3822
- enableTasks: this.enableTasks, // Inherit task management (subagent gets isolated TaskManager)
3823
- enableMcp: !!this.mcpBridge, // Inherit MCP enablement
3824
- mcpConfig: this.mcpConfig, // Inherit MCP configuration
3825
- mcpConfigPath: this.mcpConfigPath, // Inherit MCP config path
3826
- enableBash: this.enableBash, // Inherit bash enablement
3827
- bashConfig: this.bashConfig, // Inherit bash configuration
3828
- allowEdit: this.allowEdit, // Inherit edit/create permission
3829
- allowedTools: allowedToolsForDelegate, // Inherit allowed tools from parent
3830
- debug: this.debug,
3831
- tracer: this.tracer
3832
- };
3833
-
3834
- if (this.debug) {
3835
- console.log(`[DEBUG] Executing delegate tool at iteration ${currentIteration}/${maxIterations}`);
3836
- console.log(`[DEBUG] Parent session: ${this.sessionId}`);
3837
- console.log(`[DEBUG] Inherited config: path=${this.searchPath}, provider=${this.apiType}, model=${this.model}`);
3838
- console.log(`[DEBUG] Delegate task: ${toolParams.task?.substring(0, 100)}...`);
3839
- }
3840
-
3841
- // Record delegation start in telemetry
3842
- if (this.tracer) {
3843
- this.tracer.recordDelegationEvent('tool_started', {
3844
- 'delegation.iteration': currentIteration,
3845
- 'delegation.max_iterations': maxIterations,
3846
- 'delegation.task_preview': toolParams.task?.substring(0, 200) + (toolParams.task?.length > 200 ? '...' : '')
3847
- });
3848
- }
3849
-
3850
- return await this.toolImplementations[toolName].execute(enhancedParams);
3851
- }
3852
- return await this.toolImplementations[toolName].execute(toolParams);
3853
- };
3854
-
3855
- let toolResult;
3856
- const toolStartTime = Date.now();
3857
- try {
3858
- if (this.tracer) {
3859
- toolResult = await this.tracer.withSpan('tool.call', executeToolCall, {
3860
- 'tool.name': toolName,
3861
- 'tool.params': JSON.stringify(toolParams).substring(0, 500),
3862
- 'iteration': currentIteration
3863
- });
3864
- } else {
3865
- toolResult = await executeToolCall();
3866
- }
3867
-
3868
- // Record tool result in telemetry
3869
- const toolDurationMs = Date.now() - toolStartTime;
3870
- this._recordToolResultTelemetry(toolName, toolResult, true, toolDurationMs, currentIteration);
3871
3427
 
3872
- // Log tool result in debug mode
3873
- if (this.debug) {
3874
- const resultPreview = typeof toolResult === 'string'
3875
- ? (toolResult.length > 500 ? toolResult.substring(0, 500) + '...' : toolResult)
3876
- : (toolResult ? JSON.stringify(toolResult, null, 2).substring(0, 500) + '...' : 'No Result');
3877
- console.error(`[DEBUG] ========================================`);
3878
- console.error(`[DEBUG] Tool '${toolName}' completed successfully`);
3879
- console.error(`[DEBUG] Result preview:`);
3880
- console.error(resultPreview);
3881
- console.error(`[DEBUG] ========================================\n`);
3882
- }
3428
+ const executeAIRequest = async () => {
3429
+ const result = await this.streamTextWithRetryAndFallback(streamOptions);
3883
3430
 
3884
- // Emit tool success event
3885
- this.events.emit('toolCall', {
3886
- timestamp: new Date().toISOString(),
3887
- name: toolName,
3888
- args: toolParams,
3889
- resultPreview: typeof toolResult === 'string'
3890
- ? (toolResult.length > 200 ? toolResult.substring(0, 200) + '...' : toolResult)
3891
- : (toolResult ? JSON.stringify(toolResult).substring(0, 200) + '...' : 'No Result'),
3892
- status: 'completed'
3893
- });
3894
-
3895
- } catch (toolError) {
3896
- // Log tool error in debug mode
3897
- if (this.debug) {
3898
- console.error(`[DEBUG] ========================================`);
3899
- console.error(`[DEBUG] Tool '${toolName}' failed with error:`);
3900
- console.error(`[DEBUG] ${toolError.message}`);
3901
- console.error(`[DEBUG] ========================================\n`);
3902
- }
3903
-
3904
- // Emit tool error event
3905
- this.events.emit('toolCall', {
3906
- timestamp: new Date().toISOString(),
3907
- name: toolName,
3908
- args: toolParams,
3909
- error: toolError.message || 'Unknown error',
3910
- status: 'error'
3911
- });
3912
- throw toolError; // Re-throw to be handled by outer catch
3913
- }
3914
-
3915
- // Add assistant response and tool result to conversation
3916
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3431
+ // Collect the final text
3432
+ const finalText = await result.text;
3917
3433
 
3918
- let toolResultContent = typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult, null, 2);
3919
-
3920
- // Convert absolute workspace paths to relative in tool results
3921
- if (this.workspaceRoot && toolResultContent) {
3922
- const wsPrefix = this.workspaceRoot.endsWith(sep) ? this.workspaceRoot : this.workspaceRoot + sep;
3923
- toolResultContent = toolResultContent.split(wsPrefix).join('');
3924
- }
3925
-
3926
- // Extract raw output blocks from tool result (before truncation)
3927
- // This prevents LLM from processing/hallucinating large structured output from execute_plan
3928
- // Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
3929
- // formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
3930
- const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
3931
- if (extractedBlocks.length > 0) {
3932
- toolResultContent = cleanedContent;
3933
- // Accumulate extracted blocks separately from DSL output() buffer
3934
- this._extractedRawBlocks.push(...extractedBlocks);
3935
- if (this.debug) {
3936
- console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
3937
- }
3938
- }
3939
-
3940
- // Truncate if output exceeds token limit
3941
- try {
3942
- const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
3943
- if (truncateResult.truncated) {
3944
- toolResultContent = truncateResult.content;
3945
- if (this.debug) {
3946
- console.log(`[DEBUG] Tool output truncated: ${truncateResult.originalTokens} tokens -> saved to ${truncateResult.tempFilePath || 'N/A'}`);
3947
- if (truncateResult.error) {
3948
- console.log(`[DEBUG] Truncation file error: ${truncateResult.error}`);
3949
- }
3950
- }
3951
- }
3952
- } catch (truncateError) {
3953
- // If truncation fails entirely, log and continue with original content
3954
- console.error(`[WARN] Tool output truncation failed: ${truncateError.message}`);
3955
- }
3956
-
3957
- const toolResultMessage = `<tool_result>\n${toolResultContent}\n</tool_result>`;
3958
-
3959
- currentMessages.push({
3960
- role: 'user',
3961
- content: toolResultMessage
3962
- });
3963
-
3964
- // Record conversation turns in telemetry
3965
- if (this.tracer) {
3966
- if (typeof this.tracer.recordConversationTurn === 'function') {
3967
- this.tracer.recordConversationTurn('assistant', assistantResponseContent, {
3968
- iteration: currentIteration,
3969
- has_tool_call: true,
3970
- tool_name: toolName
3971
- });
3972
- this.tracer.recordConversationTurn('tool_result', toolResultContent, {
3973
- iteration: currentIteration,
3974
- tool_name: toolName,
3975
- tool_success: true
3976
- });
3977
- }
3978
- }
3979
-
3980
- // NOTE: Automatic image processing removed (GitHub issue #305)
3981
- // Images are now only loaded when the AI explicitly calls the readImage tool
3982
- // This prevents: 1) implicit behavior that users don't expect
3983
- // 2) crashes with unsupported MIME types (e.g., SVG on Gemini)
3984
-
3985
- if (this.debug) {
3986
- console.log(`[DEBUG] Tool ${toolName} executed successfully. Result length: ${typeof toolResult === 'string' ? toolResult.length : JSON.stringify(toolResult).length}`);
3987
- }
3988
- } catch (error) {
3989
- console.error(`[ERROR] Tool execution failed for ${toolName}:`, error);
3990
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3991
- // Format error with structured information for AI
3992
- const errorXml = formatErrorForAI(error);
3993
- currentMessages.push({
3994
- role: 'user',
3995
- content: `<tool_result>\n${errorXml}\n</tool_result>`
3996
- });
3997
- }
3998
- } else {
3999
- console.error(`[ERROR] Unknown tool: ${toolName}`);
4000
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
4001
-
4002
- // Build list of available tools including MCP tools
4003
- const nativeTools = Object.keys(this.toolImplementations);
4004
- const mcpTools = this.mcpBridge ? this.mcpBridge.getToolNames() : [];
4005
- const allAvailableTools = [...nativeTools, ...mcpTools];
4006
-
4007
- currentMessages.push({
4008
- role: 'user',
4009
- content: `<tool_result>\n<error type="parameter_error" recoverable="true">\n<message>Unknown tool '${toolName}'</message>\n<suggestion>Available tools: ${allAvailableTools.join(', ')}. Please use one of these tools.</suggestion>\n</error>\n</tool_result>`
4010
- });
4011
- }
4012
- }
4013
- } else {
4014
- // No tool call found
4015
- // Special case: If response contains a mermaid code block and no schema was provided,
4016
- // treat it as a valid completion (for mermaid diagram fixing workflow)
4017
- const hasMermaidCodeBlock = /```mermaid\s*\n[\s\S]*?\n```/.test(assistantResponseContent);
4018
- const hasNoSchemaOrTools = !options.schema && validTools.length === 0;
4019
-
4020
- if (hasMermaidCodeBlock && hasNoSchemaOrTools) {
4021
- // Accept mermaid code block as final answer for diagram fixing
4022
- finalResult = assistantResponseContent;
4023
- completionAttempted = true;
4024
3434
  if (this.debug) {
4025
- console.error(`[DEBUG] Accepting mermaid code block as valid completion (no schema, no tools)`);
3435
+ const steps = await result.steps;
3436
+ console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars, completion=${!!completionResult}`);
4026
3437
  }
4027
- break;
4028
- }
4029
3438
 
4030
- // Issue #443: Check if response contains valid schema-matching JSON
4031
- // Before triggering error.no_tool_call, strip markdown fences and validate
4032
- // This handles cases where AI returns valid JSON without using attempt_completion
4033
- if (options.schema) {
4034
- // Remove thinking tags first
4035
- let contentToCheck = assistantResponseContent;
4036
- contentToCheck = contentToCheck.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
4037
- contentToCheck = contentToCheck.replace(/<thinking>[\s\S]*$/gi, '').trim();
4038
-
4039
- // Try to extract and validate JSON
4040
- const cleanedJson = cleanSchemaResponse(contentToCheck);
4041
- try {
4042
- JSON.parse(cleanedJson);
4043
- const validation = validateJsonResponse(cleanedJson, { debug: this.debug, schema: options.schema });
4044
- if (validation.isValid) {
4045
- if (this.debug) {
4046
- console.log(`[DEBUG] Issue #443: Accepting valid JSON response without attempt_completion (${cleanedJson.length} chars)`);
4047
- }
4048
- finalResult = cleanedJson;
4049
- completionAttempted = true;
4050
- break;
4051
- }
4052
- } catch {
4053
- // Not valid JSON - continue to standard no_tool_call handling
3439
+ // Record final token usage
3440
+ const usage = await result.usage;
3441
+ if (usage) {
3442
+ this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
4054
3443
  }
4055
- }
4056
-
4057
- // Increment consecutive no-tool counter (catches alternating stuck responses)
4058
- consecutiveNoToolCount++;
4059
3444
 
4060
- // Check for repeated identical responses OR semantically similar "stuck" responses
4061
- // This catches cases where AI alternates between slightly different "I cannot proceed" messages
4062
- const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
4063
- const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
3445
+ return { finalText, result };
3446
+ };
4064
3447
 
4065
- if (isIdentical || isSemanticallyStuck) {
4066
- sameResponseCount++;
4067
- if (this.debug && isSemanticallyStuck && !isIdentical) {
4068
- console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
4069
- }
4070
- if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
4071
- // Clean up the response - remove thinking tags
4072
- let cleanedResponse = assistantResponseContent;
4073
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
4074
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
4075
-
4076
- const hasSubstantialContent = cleanedResponse.length > 50 &&
4077
- !cleanedResponse.includes('<api_call>') &&
4078
- !cleanedResponse.includes('<tool_name>') &&
4079
- !cleanedResponse.includes('<function>');
4080
-
4081
- if (hasSubstantialContent) {
4082
- if (this.debug) {
4083
- console.log(`[DEBUG] ${isIdentical ? 'Same' : 'Stuck'} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
4084
- }
4085
- finalResult = cleanedResponse;
4086
- completionAttempted = true;
4087
- break;
4088
- }
4089
- }
3448
+ let aiResult;
3449
+ if (this.tracer) {
3450
+ const inputPreview = message.length > 1000
3451
+ ? message.substring(0, 1000) + '... [truncated]'
3452
+ : message;
3453
+
3454
+ aiResult = await this.tracer.withSpan('ai.request', executeAIRequest, {
3455
+ 'ai.model': this.model,
3456
+ 'ai.provider': this.clientApiProvider || 'auto',
3457
+ 'ai.input': inputPreview,
3458
+ 'ai.input_length': message.length,
3459
+ 'max_steps': maxIterations,
3460
+ 'max_tokens': maxResponseTokens,
3461
+ 'temperature': 0.3,
3462
+ 'message_count': currentMessages.length
3463
+ });
4090
3464
  } else {
4091
- // Different response (and not both stuck), reset counter
4092
- lastNoToolResponse = assistantResponseContent;
4093
- sameResponseCount = 1;
3465
+ aiResult = await executeAIRequest();
4094
3466
  }
4095
3467
 
4096
- // Circuit breaker: If we've had MAX_CONSECUTIVE_NO_TOOL iterations without any tool call,
4097
- // force completion to avoid infinite loops (e.g., agent alternating between "can't proceed" variations)
4098
- if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
4099
- let cleanedResponse = assistantResponseContent;
4100
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
4101
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
3468
+ // Use completion result if available, otherwise use final text
3469
+ if (completionResult) {
3470
+ finalResult = completionResult;
4102
3471
 
4103
- if (cleanedResponse.length > 50) {
4104
- if (this.debug) {
4105
- console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
3472
+ // Stream the final result if callback is provided
3473
+ if (options.onStream && finalResult) {
3474
+ const chunkSize = 50;
3475
+ for (let i = 0; i < finalResult.length; i += chunkSize) {
3476
+ const chunk = finalResult.slice(i, Math.min(i + chunkSize, finalResult.length));
3477
+ options.onStream(chunk);
4106
3478
  }
4107
- // Record this in telemetry
4108
- this._recordErrorTelemetry('consecutive_no_tool_circuit_breaker', `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
4109
- finalResult = cleanedResponse;
4110
- completionAttempted = true;
4111
- break;
4112
3479
  }
3480
+ } else if (aiResult.finalText) {
3481
+ finalResult = aiResult.finalText;
3482
+ completionAttempted = true;
4113
3483
  }
4114
3484
 
4115
- // Add assistant response and ask for tool usage
4116
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
4117
-
4118
- // Check if the AI tried to use a tool that's not in the valid tools list
4119
- const unrecognizedTool = detectUnrecognizedToolCall(assistantResponseContent, validTools);
4120
-
4121
- let reminderContent;
4122
- if (isWrappedToolError(unrecognizedTool)) {
4123
- // AI wrapped a valid tool name in arbitrary XML tags - provide clear format error
4124
- const wrappedToolName = extractWrappedToolName(unrecognizedTool);
4125
- if (this.debug) {
4126
- console.log(`[DEBUG] Detected wrapped tool '${wrappedToolName}' in assistant response - wrong XML format.`);
4127
- }
4128
-
4129
- // Record wrapped tool error in telemetry
4130
- this._recordErrorTelemetry('wrapped_tool', 'Tool call wrapped in markdown', { toolName: wrappedToolName }, currentIteration);
4131
-
4132
- const toolError = new ParameterError(
4133
- `Tool '${wrappedToolName}' found but in WRONG FORMAT - do not wrap tools in other XML tags.`,
4134
- {
4135
- suggestion: `Use the tool tag DIRECTLY without any wrapper:
4136
-
4137
- CORRECT FORMAT:
4138
- <${wrappedToolName}>
4139
- <param>value</param>
4140
- </${wrappedToolName}>
4141
-
4142
- WRONG (what you did - do not wrap in other tags):
4143
- <api_call><tool_name>${wrappedToolName}</tool_name>...</api_call>
4144
- <function>${wrappedToolName}</function>
4145
- <call name="${wrappedToolName}">...</call>
4146
-
4147
- Remove ALL wrapper tags and use <${wrappedToolName}> directly as the outermost tag.`
4148
- }
4149
- );
4150
- reminderContent = `<tool_result>\n${formatErrorForAI(toolError)}\n</tool_result>`;
4151
- } else if (unrecognizedTool) {
4152
- // AI tried to use a tool that's not available - provide clear error
4153
- if (this.debug) {
4154
- console.log(`[DEBUG] Detected unrecognized tool '${unrecognizedTool}' in assistant response.`);
3485
+ // Update currentMessages from the result for history storage
3486
+ // The SDK manages the full message history internally
3487
+ const resultMessages = await aiResult.result.response?.messages;
3488
+ if (resultMessages) {
3489
+ // Append the AI-generated messages to our message list
3490
+ for (const msg of resultMessages) {
3491
+ currentMessages.push(msg);
4155
3492
  }
3493
+ }
4156
3494
 
4157
- // Record unrecognized tool error in telemetry
4158
- this._recordErrorTelemetry('unrecognized_tool', `Unknown tool: ${unrecognizedTool}`, { toolName: unrecognizedTool, validTools }, currentIteration);
3495
+ break; // Success
4159
3496
 
4160
- const toolError = new ParameterError(`Tool '${unrecognizedTool}' is not available in this context.`, {
4161
- suggestion: `Available tools: ${validTools.join(', ')}. Please use one of these tools instead.`
3497
+ } catch (error) {
3498
+ // Handle context-limit error: compact messages and retry (once)
3499
+ if (!compactionAttempted && handleContextLimitError) {
3500
+ const compactionResult = handleContextLimitError(error, currentMessages, {
3501
+ keepLastSegment: true,
3502
+ minSegmentsToKeep: 1
4162
3503
  });
4163
- reminderContent = `<tool_result>\n${formatErrorForAI(toolError)}\n</tool_result>`;
4164
- } else {
4165
- // No tool call detected at all - record in telemetry
4166
- this._recordErrorTelemetry('no_tool_call', 'AI response did not contain tool call', { responsePreview: assistantResponseContent.substring(0, 500) }, currentIteration);
4167
-
4168
- // Check if this is the last iteration
4169
- // On the last iteration, if the AI gave a substantive response without using
4170
- // attempt_completion, accept it as the final answer rather than losing the content
4171
- if (currentIteration >= maxIterations) {
4172
- // Clean up the response - remove thinking tags
4173
- let cleanedResponse = assistantResponseContent;
4174
- // Remove <thinking>...</thinking> blocks
4175
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
4176
- // Also remove unclosed thinking tags
4177
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
4178
-
4179
- // Only use if there's substantial content (not just a failed tool call attempt)
4180
- const hasSubstantialContent = cleanedResponse.length > 50 &&
4181
- !cleanedResponse.includes('<api_call>') &&
4182
- !cleanedResponse.includes('<tool_name>') &&
4183
- !cleanedResponse.includes('<function>');
4184
-
4185
- if (hasSubstantialContent) {
4186
- if (this.debug) {
4187
- console.log(`[DEBUG] Max iterations reached - accepting AI response as final answer (${cleanedResponse.length} chars)`);
4188
- }
4189
- finalResult = cleanedResponse;
4190
- completionAttempted = true;
4191
- break;
4192
- }
4193
- }
4194
-
4195
- // Standard reminder - no tool call detected at all
4196
- reminderContent = `Please use one of the available tools to help answer the question, or use attempt_completion if you have enough information to provide a final answer.
4197
3504
 
4198
- Remember: Use proper XML format with BOTH opening and closing tags:
3505
+ if (compactionResult) {
3506
+ const { messages: compactedMessages, stats } = compactionResult;
4199
3507
 
4200
- <tool_name>
4201
- <parameter>value</parameter>
4202
- </tool_name>
4203
-
4204
- Available tools: ${validTools.join(', ')}
4205
-
4206
- To complete with a direct answer:
4207
- <attempt_completion>Your final answer here</attempt_completion>
3508
+ if (stats.removed === 0) {
3509
+ console.error(`[ERROR] Context window exceeded but no messages can be compacted.`);
3510
+ finalResult = `Error: Context window limit exceeded and conversation cannot be compacted further.`;
3511
+ throw new Error(finalResult);
3512
+ }
4208
3513
 
4209
- Or if your previous response already contains a complete, direct answer (not a thinking block or JSON):
4210
- <attempt_complete></attempt_complete>
3514
+ compactionAttempted = true;
3515
+ console.log(`[INFO] Context window limit exceeded. Compacting conversation...`);
3516
+ console.log(`[INFO] Removed ${stats.removed} messages (${stats.reductionPercent}% reduction)`);
4211
3517
 
4212
- Note: <attempt_complete></attempt_complete> reuses your PREVIOUS assistant message as the final answer. Only use this if that message was already a valid, complete response to the user's question.`;
4213
- }
3518
+ currentMessages = [...compactedMessages];
4214
3519
 
4215
- // Check if we should replace the previous reminder instead of appending
4216
- // After pushing assistant message, the previous user message (if a reminder) is at length - 2
4217
- // Message pattern: [..., prev_assistant, prev_user_reminder, current_assistant]
4218
- const prevUserMsgIndex = currentMessages.length - 2;
4219
- const prevUserMsg = currentMessages[prevUserMsgIndex];
4220
- const isExistingReminder = prevUserMsg && prevUserMsg.role === 'user' &&
4221
- (prevUserMsg.content.includes('Please use one of the available tools') ||
4222
- prevUserMsg.content.includes('<tool_result>'));
4223
-
4224
- if (isExistingReminder && sameResponseCount > 1) {
4225
- // Replace the previous reminder with updated content and remove duplicated assistant message
4226
- // This prevents context bloat from repeated identical exchanges
4227
- // Pattern: [..., prev_assistant, prev_user_reminder, current_assistant] -> [..., current_assistant, new_reminder]
4228
- const prevAssistantIndex = prevUserMsgIndex - 1;
4229
-
4230
- // Validate the expected pattern before splicing:
4231
- // 1. prevAssistantIndex must be valid (>= 0)
4232
- // 2. If there's a system message at index 0, don't remove it (prevAssistantIndex > 0)
4233
- // 3. Must be an assistant message at prevAssistantIndex
4234
- // 4. After removal, array should have at least 2 messages (current assistant + new reminder)
4235
- const hasSystemMessage = currentMessages.length > 0 && currentMessages[0].role === 'system';
4236
- const minValidIndex = hasSystemMessage ? 1 : 0;
4237
- const canSafelyRemove = prevAssistantIndex >= minValidIndex &&
4238
- currentMessages[prevAssistantIndex] &&
4239
- currentMessages[prevAssistantIndex].role === 'assistant' &&
4240
- (currentMessages.length - 2) >= (hasSystemMessage ? 2 : 1); // After removal: at least system+assistant or just assistant
4241
-
4242
- if (canSafelyRemove) {
4243
- // Remove the duplicate assistant and old reminder (2 messages starting at prevAssistantIndex)
4244
- currentMessages.splice(prevAssistantIndex, 2);
4245
- if (this.debug) {
4246
- console.log(`[DEBUG] Removed duplicate assistant+reminder pair (iteration ${currentIteration}, same response #${sameResponseCount})`);
3520
+ if (this.tracer) {
3521
+ this.tracer.addEvent('context.compacted', {
3522
+ 'original_count': stats.originalCount,
3523
+ 'compacted_count': stats.compactedCount,
3524
+ 'reduction_percent': stats.reductionPercent,
3525
+ 'tokens_saved': stats.tokensSaved
3526
+ });
4247
3527
  }
4248
- } else if (this.debug) {
4249
- console.log(`[DEBUG] Skipped deduplication: pattern validation failed (prevAssistantIndex=${prevAssistantIndex}, arrayLength=${currentMessages.length})`);
4250
- }
4251
3528
 
4252
- // Add iteration context to help the AI understand this is a repeated attempt
4253
- const iterationHint = `\n\n(Attempt #${sameResponseCount}: Your previous ${sameResponseCount} responses were identical. If you have a complete answer, use <attempt_complete></attempt_complete> to finalize it.)`;
4254
- currentMessages.push({
4255
- role: 'user',
4256
- content: reminderContent + iterationHint
4257
- });
4258
- } else {
4259
- currentMessages.push({
4260
- role: 'user',
4261
- content: reminderContent
4262
- });
4263
- }
4264
-
4265
- if (this.debug) {
4266
- if (unrecognizedTool) {
4267
- console.log(`[DEBUG] Unrecognized tool '${unrecognizedTool}' used. Providing error feedback.`);
4268
- } else {
4269
- console.log(`[DEBUG] No tool call detected in assistant response. Prompting for tool use.`);
3529
+ continue; // Retry with compacted messages
4270
3530
  }
4271
3531
  }
4272
3532
 
4273
- // Circuit breaker: track repeated format errors and break early
4274
- // For wrapped_tool errors, track them as a category (any wrapped_tool counts)
4275
- // For other errors, track the exact error type
4276
- if (unrecognizedTool) {
4277
- const isWrapped = isWrappedToolError(unrecognizedTool);
4278
- const errorCategory = isWrapped ? 'wrapped_tool' : unrecognizedTool;
4279
-
4280
- if (errorCategory === lastFormatErrorType) {
4281
- sameFormatErrorCount++;
4282
- if (sameFormatErrorCount >= MAX_REPEATED_FORMAT_ERRORS) {
4283
- const errorDesc = isWrapped ? 'wrapped tool format' : unrecognizedTool;
4284
-
4285
- // Record circuit breaker error in telemetry
4286
- this._recordErrorTelemetry('circuit_breaker', 'Format error limit exceeded', { formatErrorCount: sameFormatErrorCount, errorCategory }, currentIteration);
4287
-
4288
- console.error(`[ERROR] Format error category '${errorCategory}' repeated ${sameFormatErrorCount} times. Breaking loop early to prevent infinite iteration.`);
4289
- finalResult = `Error: Unable to complete request. The AI model repeatedly used incorrect tool call format (${errorDesc}). Please try rephrasing your question or using a different model.`;
4290
- break;
4291
- }
4292
- } else {
4293
- lastFormatErrorType = errorCategory;
4294
- sameFormatErrorCount = 1;
4295
- }
4296
- } else {
4297
- // Reset counter if it's a different kind of "no tool call" situation
4298
- lastFormatErrorType = null;
4299
- sameFormatErrorCount = 0;
3533
+ // Handle AbortError from attempt_completion gracefully
3534
+ if (completionResult) {
3535
+ finalResult = completionResult;
3536
+ break;
4300
3537
  }
4301
- }
4302
3538
 
4303
- // Record iteration end event
4304
- this._recordIterationTelemetry('end', currentIteration, {
4305
- 'iteration.completed': completionAttempted,
4306
- 'iteration.message_count': currentMessages.length
4307
- });
4308
-
4309
- // Keep message history manageable
4310
- if (currentMessages.length > MAX_HISTORY_MESSAGES) {
4311
- const messagesBefore = currentMessages.length;
4312
- const systemMsg = currentMessages[0]; // Keep system message
4313
- const recentMessages = currentMessages.slice(-MAX_HISTORY_MESSAGES + 1);
4314
- currentMessages = [systemMsg, ...recentMessages];
4315
-
4316
- if (this.debug) {
4317
- console.log(`[DEBUG] Trimmed message history from ${messagesBefore} to ${currentMessages.length} messages`);
4318
- }
3539
+ console.error(`Error during streamText:`, error);
3540
+ finalResult = `Error: Failed to get response from AI model. ${error.message}`;
3541
+ throw new Error(finalResult);
4319
3542
  }
4320
3543
  }
4321
3544
 
@@ -4854,28 +4077,7 @@ Convert your previous response content into actual JSON data that follows this s
4854
4077
  console.log(`[DEBUG] Mermaid validation: Skipped final validation due to disableMermaidValidation option`);
4855
4078
  }
4856
4079
 
4857
- // Remove thinking tags from final result before returning to user
4858
- // Skip for valid JSON to avoid destroying JSON structure when <thinking> appears
4859
- // inside string values (e.g., after tryAutoWrapForSimpleSchema embeds content with
4860
- // residual thinking tag fragments — issue #439)
4861
- if (!options._schemaFormatted) {
4862
- let isValidJson = false;
4863
- try {
4864
- JSON.parse(finalResult);
4865
- isValidJson = true;
4866
- } catch {
4867
- // Not valid JSON, proceed with thinking tag removal
4868
- }
4869
4080
 
4870
- if (!isValidJson) {
4871
- finalResult = removeThinkingTags(finalResult);
4872
- if (this.debug) {
4873
- console.log(`[DEBUG] Removed thinking tags from final result`);
4874
- }
4875
- } else if (this.debug) {
4876
- console.log(`[DEBUG] Skipped thinking tag removal for valid JSON result (issue #439)`);
4877
- }
4878
- }
4879
4081
 
4880
4082
  // Append DSL output buffer directly to response (bypasses LLM rewriting)
4881
4083
  // Skip during _completionPromptProcessed — only the parent answer() should append the buffer.