@probelabs/probe 0.6.0-rc264 → 0.6.0-rc266

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/bin/binaries/probe-v0.6.0-rc266-aarch64-apple-darwin.tar.gz +0 -0
  2. package/bin/binaries/probe-v0.6.0-rc266-aarch64-unknown-linux-musl.tar.gz +0 -0
  3. package/bin/binaries/probe-v0.6.0-rc266-x86_64-apple-darwin.tar.gz +0 -0
  4. package/bin/binaries/probe-v0.6.0-rc266-x86_64-pc-windows-msvc.zip +0 -0
  5. package/bin/binaries/probe-v0.6.0-rc266-x86_64-unknown-linux-musl.tar.gz +0 -0
  6. package/build/agent/ProbeAgent.js +640 -1441
  7. package/build/agent/engines/enhanced-vercel.js +0 -7
  8. package/build/agent/index.js +3972 -5938
  9. package/build/agent/mcp/index.js +6 -15
  10. package/build/agent/mcp/xmlBridge.js +24 -324
  11. package/build/agent/shared/prompts.js +25 -2
  12. package/build/agent/tasks/index.js +0 -1
  13. package/build/agent/tools.js +11 -181
  14. package/build/index.js +13 -35
  15. package/build/tools/common.js +15 -707
  16. package/build/tools/edit.js +7 -0
  17. package/build/tools/executePlan.js +2 -2
  18. package/build/tools/index.js +8 -11
  19. package/cjs/agent/ProbeAgent.cjs +3503 -5461
  20. package/cjs/index.cjs +4429 -6362
  21. package/package.json +2 -2
  22. package/src/agent/ProbeAgent.js +640 -1441
  23. package/src/agent/engines/enhanced-vercel.js +0 -7
  24. package/src/agent/index.js +10 -2
  25. package/src/agent/mcp/index.js +6 -15
  26. package/src/agent/mcp/xmlBridge.js +24 -324
  27. package/src/agent/shared/prompts.js +25 -2
  28. package/src/agent/tasks/index.js +0 -1
  29. package/src/agent/tools.js +11 -181
  30. package/src/index.js +13 -35
  31. package/src/tools/common.js +15 -707
  32. package/src/tools/edit.js +7 -0
  33. package/src/tools/executePlan.js +2 -2
  34. package/src/tools/index.js +8 -11
  35. package/bin/binaries/probe-v0.6.0-rc264-aarch64-apple-darwin.tar.gz +0 -0
  36. package/bin/binaries/probe-v0.6.0-rc264-aarch64-unknown-linux-musl.tar.gz +0 -0
  37. package/bin/binaries/probe-v0.6.0-rc264-x86_64-apple-darwin.tar.gz +0 -0
  38. package/bin/binaries/probe-v0.6.0-rc264-x86_64-pc-windows-msvc.zip +0 -0
  39. package/bin/binaries/probe-v0.6.0-rc264-x86_64-unknown-linux-musl.tar.gz +0 -0
  40. package/build/agent/xmlParsingUtils.js +0 -221
  41. package/src/agent/xmlParsingUtils.js +0 -221
@@ -31,7 +31,7 @@ import { createAnthropic } from '@ai-sdk/anthropic';
31
31
  import { createOpenAI } from '@ai-sdk/openai';
32
32
  import { createGoogleGenerativeAI } from '@ai-sdk/google';
33
33
  import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
34
- import { streamText } from 'ai';
34
+ import { streamText, tool, stepCountIs, jsonSchema } from 'ai';
35
35
  import { randomUUID } from 'crypto';
36
36
  import { EventEmitter } from 'events';
37
37
  import { existsSync } from 'fs';
@@ -43,29 +43,26 @@ import { HookManager, HOOK_TYPES } from './hooks/HookManager.js';
43
43
  import { SUPPORTED_IMAGE_EXTENSIONS, IMAGE_MIME_TYPES, isFormatSupportedByProvider } from './imageConfig.js';
44
44
  import {
45
45
  createTools,
46
- searchToolDefinition,
47
- queryToolDefinition,
48
- extractToolDefinition,
49
- delegateToolDefinition,
50
- analyzeAllToolDefinition,
51
- getExecutePlanToolDefinition,
52
- getCleanupExecutePlanToolDefinition,
53
- bashToolDefinition,
54
- listFilesToolDefinition,
55
- searchFilesToolDefinition,
56
- listSkillsToolDefinition,
57
- useSkillToolDefinition,
58
- readImageToolDefinition,
59
- attemptCompletionToolDefinition,
60
- editToolDefinition,
61
- createToolDefinition,
62
- multiEditToolDefinition,
63
- googleSearchToolDefinition,
64
- urlContextToolDefinition,
65
46
  attemptCompletionSchema,
66
- parseXmlToolCallWithThinking
47
+ searchSchema,
48
+ querySchema,
49
+ extractSchema,
50
+ delegateSchema,
51
+ analyzeAllSchema,
52
+ executePlanSchema,
53
+ cleanupExecutePlanSchema,
54
+ bashSchema,
55
+ editSchema,
56
+ createSchema,
57
+ multiEditSchema,
58
+ listFilesSchema,
59
+ searchFilesSchema,
60
+ readImageSchema,
61
+ listSkillsSchema,
62
+ useSkillSchema
67
63
  } from './tools.js';
68
- import { createMessagePreview, detectUnrecognizedToolCall, detectStuckResponse, areBothStuckResponses } from '../tools/common.js';
64
+ import { createMessagePreview } from '../tools/common.js';
65
+ import { taskSchema } from './tasks/taskTool.js';
69
66
  import { FileTracker } from '../tools/fileTracker.js';
70
67
  import {
71
68
  createWrappedTools,
@@ -87,15 +84,13 @@ import {
87
84
  tryAutoWrapForSimpleSchema,
88
85
  tryExtractValidJsonPrefix
89
86
  } from './schemaUtils.js';
90
- import { removeThinkingTags, extractThinkingContent } from './xmlParsingUtils.js';
91
87
  import { predefinedPrompts } from './shared/prompts.js';
92
88
  import {
93
89
  MCPXmlBridge,
94
- parseHybridXmlToolCall,
95
90
  loadMCPConfigurationFromPath
96
91
  } from './mcp/index.js';
97
92
  import { SkillRegistry } from './skills/registry.js';
98
- import { formatAvailableSkillsXml } from './skills/formatting.js';
93
+ import { formatAvailableSkillsXml as formatAvailableSkills } from './skills/formatting.js';
99
94
  import { createSkillToolInstances } from './skills/tools.js';
100
95
  import { RetryManager, createRetryManagerFromEnv } from './RetryManager.js';
101
96
  import { FallbackManager, createFallbackManagerFromEnv, buildFallbackProvidersFromEnv } from './FallbackManager.js';
@@ -108,11 +103,11 @@ import { extractRawOutputBlocks } from '../tools/executePlan.js';
108
103
  import {
109
104
  TaskManager,
110
105
  createTaskTool,
111
- taskToolDefinition,
112
106
  taskSystemPrompt,
113
107
  taskGuidancePrompt,
114
108
  createTaskCompletionBlockedMessage
115
109
  } from './tasks/index.js';
110
+ import { z } from 'zod';
116
111
 
117
112
  // Maximum tool iterations to prevent infinite loops - configurable via MAX_TOOL_ITERATIONS env var
118
113
  const MAX_TOOL_ITERATIONS = (() => {
@@ -125,44 +120,6 @@ const MAX_TOOL_ITERATIONS = (() => {
125
120
  })();
126
121
  const MAX_HISTORY_MESSAGES = 100;
127
122
 
128
- /**
129
- * Extract tool name from wrapped_tool:toolName format
130
- * @param {string} wrappedToolError - Error string in format 'wrapped_tool:toolName'
131
- * @returns {string} The extracted tool name or 'unknown' if format is invalid
132
- */
133
- function extractWrappedToolName(wrappedToolError) {
134
- if (!wrappedToolError || typeof wrappedToolError !== 'string') {
135
- return 'unknown';
136
- }
137
- const colonIndex = wrappedToolError.indexOf(':');
138
- return colonIndex !== -1 ? wrappedToolError.slice(colonIndex + 1) : 'unknown';
139
- }
140
-
141
- /**
142
- * Check if an error indicates a wrapped tool format error
143
- * @param {string|null} error - Error from detectUnrecognizedToolCall
144
- * @returns {boolean} True if it's a wrapped tool error
145
- */
146
- function isWrappedToolError(error) {
147
- return error && typeof error === 'string' && error.startsWith('wrapped_tool:');
148
- }
149
-
150
- /**
151
- * Create error message for wrapped tool format issues
152
- * @param {string} wrappedToolName - The tool name that was incorrectly wrapped
153
- * @returns {string} User-friendly error message with correct format instructions
154
- */
155
- function createWrappedToolErrorMessage(wrappedToolName) {
156
- return `Your response contained an incorrectly formatted tool call (${wrappedToolName} wrapped in XML tags). This cannot be used.
157
-
158
- Please use the CORRECT format:
159
-
160
- <${wrappedToolName}>
161
- Your content here
162
- </${wrappedToolName}>
163
-
164
- Do NOT wrap in other tags like <api_call>, <tool_name>, <function>, etc.`;
165
- }
166
123
 
167
124
  // Supported image file extensions (imported from shared config)
168
125
 
@@ -223,6 +180,7 @@ export class ProbeAgent {
223
180
  * @param {number} [options.maxOutputTokens] - Maximum tokens for tool output before truncation (default: 20000, can also be set via PROBE_MAX_OUTPUT_TOKENS env var)
224
181
  * @param {number} [options.requestTimeout] - Timeout in ms for AI requests (default: 120000 or REQUEST_TIMEOUT env var). Used to abort hung requests.
225
182
  * @param {number} [options.maxOperationTimeout] - Maximum timeout in ms for the entire operation including all retries and fallbacks (default: 300000 or MAX_OPERATION_TIMEOUT env var). This is the absolute maximum time for streamTextWithRetryAndFallback.
183
+ * @param {string|number} [options.thinkingEffort] - Native thinking/reasoning effort level: 'low', 'medium', 'high', or a number (budget tokens). When set, passes provider-specific thinking options to the LLM via providerOptions.
226
184
  */
227
185
  constructor(options = {}) {
228
186
  // Basic configuration
@@ -264,6 +222,10 @@ export class ProbeAgent {
264
222
  // Completion prompt for post-completion validation/review
265
223
  this.completionPrompt = options.completionPrompt || null;
266
224
 
225
+ // Native thinking/reasoning effort for LLM providers
226
+ // Accepted values: 'off' (default), 'low', 'medium', 'high', or a number (budget tokens)
227
+ this.thinkingEffort = options.thinkingEffort || null;
228
+
267
229
  // Tool filtering configuration
268
230
  // Parse allowedTools option: ['*'] = all tools, [] or null = no tools, ['tool1', 'tool2'] = specific tools
269
231
  // Supports exclusion with '!' prefix: ['*', '!bash'] = all tools except bash
@@ -543,30 +505,6 @@ export class ProbeAgent {
543
505
  }
544
506
  }
545
507
 
546
- /**
547
- * Record AI thinking content for telemetry
548
- * @param {string} thinkingContent - The thinking content
549
- * @param {number} iteration - Current iteration number
550
- * @private
551
- */
552
- _recordThinkingTelemetry(thinkingContent, iteration) {
553
- if (!this.tracer || !thinkingContent) return;
554
-
555
- if (this._isAppTracerStyle() && typeof this.tracer.recordThinkingContent === 'function') {
556
- // AppTracer style: (sessionId, iteration, content)
557
- this.tracer.recordThinkingContent(this.sessionId, iteration, thinkingContent);
558
- } else if (typeof this.tracer.recordThinkingContent === 'function') {
559
- // SimpleAppTracer style: (content, metadata)
560
- this.tracer.recordThinkingContent(thinkingContent, { iteration });
561
- } else {
562
- this.tracer.addEvent('ai.thinking', {
563
- 'ai.thinking.content': thinkingContent.substring(0, 50000),
564
- 'ai.thinking.length': thinkingContent.length,
565
- 'iteration': iteration
566
- });
567
- }
568
- }
569
-
570
508
  /**
571
509
  * Record AI tool decision for telemetry
572
510
  * @param {string} toolName - The tool name
@@ -1621,6 +1559,474 @@ export class ProbeAgent {
1621
1559
  return Object.keys(tools).length > 0 ? tools : undefined;
1622
1560
  }
1623
1561
 
1562
+ /**
1563
+ * Build providerOptions for native thinking/reasoning based on thinkingEffort setting.
1564
+ * Maps effort levels to provider-specific parameters.
1565
+ * @param {number} maxResponseTokens - Current max response tokens for budget calculation
1566
+ * @returns {Object|undefined} providerOptions object or undefined if thinking is off
1567
+ * @private
1568
+ */
1569
+ _buildThinkingProviderOptions(maxResponseTokens) {
1570
+ if (!this.thinkingEffort) return undefined;
1571
+
1572
+ const effort = this.thinkingEffort;
1573
+
1574
+ // Map string effort levels to budget tokens
1575
+ const effortToBudget = {
1576
+ low: 4000,
1577
+ medium: 10000,
1578
+ high: 32000,
1579
+ };
1580
+
1581
+ if (this.apiType === 'anthropic') {
1582
+ const budgetTokens = typeof effort === 'number'
1583
+ ? effort
1584
+ : effortToBudget[effort];
1585
+ if (!budgetTokens) return undefined;
1586
+ return {
1587
+ anthropic: {
1588
+ thinking: { type: 'enabled', budgetTokens },
1589
+ },
1590
+ };
1591
+ }
1592
+
1593
+ if (this.apiType === 'openai') {
1594
+ // OpenAI reasoning models use reasoningEffort: 'low' | 'medium' | 'high'
1595
+ const reasoningEffort = typeof effort === 'number'
1596
+ ? (effort <= 4000 ? 'low' : effort <= 10000 ? 'medium' : 'high')
1597
+ : effort;
1598
+ if (!['low', 'medium', 'high'].includes(reasoningEffort)) return undefined;
1599
+ return {
1600
+ openai: {
1601
+ reasoningEffort,
1602
+ },
1603
+ };
1604
+ }
1605
+
1606
+ if (this.apiType === 'google') {
1607
+ const thinkingBudget = typeof effort === 'number'
1608
+ ? effort
1609
+ : effortToBudget[effort];
1610
+ if (!thinkingBudget) return undefined;
1611
+ return {
1612
+ google: {
1613
+ thinkingConfig: { thinkingBudget },
1614
+ },
1615
+ };
1616
+ }
1617
+
1618
+ return undefined;
1619
+ }
1620
+
1621
+ /**
1622
+ * Build native Vercel AI SDK tools object for use with streamText().
1623
+ * Each tool wraps the existing toolImplementations with:
1624
+ * - sessionId and workingDirectory injection
1625
+ * - Event emission
1626
+ * - Output truncation
1627
+ * - Raw output block extraction
1628
+ * - Telemetry recording
1629
+ * - Delegate tool param injection
1630
+ *
1631
+ * @param {Object} options - Options from the answer() call
1632
+ * @param {Function} onComplete - Callback when attempt_completion is called (receives result string)
1633
+ * @param {Object} context - Execution context { maxIterations, currentMessages }
1634
+ * @returns {Object} Tools object for streamText()
1635
+ * @private
1636
+ */
1637
+ _buildNativeTools(options, onComplete, context = {}) {
1638
+ const { maxIterations = 30 } = context;
1639
+ const nativeTools = {};
1640
+ const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
1641
+
1642
+ // Helper to wrap a tool implementation into a Vercel AI SDK tool
1643
+ const wrapTool = (toolName, schema, description, executeFn) => {
1644
+ // Auto-wrap plain JSON Schema objects with jsonSchema() for AI SDK 5 compatibility
1645
+ // Zod schemas have a _def property; plain objects need wrapping
1646
+ const resolvedSchema = schema && schema._def ? schema : jsonSchema(schema);
1647
+ return tool({
1648
+ description,
1649
+ inputSchema: resolvedSchema,
1650
+ execute: async (params) => {
1651
+ // Add sessionId and workingDirectory to params
1652
+ let resolvedWorkingDirectory = this.workspaceRoot || this.cwd || (this.allowedFolders && this.allowedFolders[0]) || process.cwd();
1653
+ if (params.workingDirectory) {
1654
+ const requestedDir = safeRealpath(isAbsolute(params.workingDirectory)
1655
+ ? resolve(params.workingDirectory)
1656
+ : resolve(resolvedWorkingDirectory, params.workingDirectory));
1657
+ const isWithinAllowed = !this.allowedFolders || this.allowedFolders.length === 0 ||
1658
+ this.allowedFolders.some(folder => {
1659
+ const resolvedFolder = safeRealpath(folder);
1660
+ return requestedDir === resolvedFolder || requestedDir.startsWith(resolvedFolder + sep);
1661
+ });
1662
+ if (isWithinAllowed) {
1663
+ resolvedWorkingDirectory = requestedDir;
1664
+ } else if (this.debug) {
1665
+ console.error(`[DEBUG] Rejected workingDirectory "${params.workingDirectory}" - not within allowed folders`);
1666
+ }
1667
+ }
1668
+ const toolParams = {
1669
+ ...params,
1670
+ sessionId: this.sessionId,
1671
+ workingDirectory: resolvedWorkingDirectory
1672
+ };
1673
+
1674
+ // Log tool execution in debug mode
1675
+ if (this.debug) {
1676
+ console.error(`\n[DEBUG] ========================================`);
1677
+ console.error(`[DEBUG] Executing tool: ${toolName}`);
1678
+ console.error(`[DEBUG] Arguments:`);
1679
+ for (const [key, value] of Object.entries(params)) {
1680
+ const displayValue = typeof value === 'string' && value.length > 100
1681
+ ? value.substring(0, 100) + '...'
1682
+ : value;
1683
+ console.error(`[DEBUG] ${key}: ${JSON.stringify(displayValue)}`);
1684
+ }
1685
+ console.error(`[DEBUG] ========================================\n`);
1686
+ }
1687
+
1688
+ // Emit tool start event
1689
+ this.events.emit('toolCall', {
1690
+ timestamp: new Date().toISOString(),
1691
+ name: toolName,
1692
+ args: toolParams,
1693
+ status: 'started',
1694
+ pauseStream: true
1695
+ });
1696
+
1697
+ const toolStartTime = Date.now();
1698
+ try {
1699
+ // For delegate tool, inject additional params
1700
+ let result;
1701
+ if (toolName === 'delegate') {
1702
+ let allowedToolsForDelegate = null;
1703
+ if (this.allowedTools.mode === 'whitelist') {
1704
+ allowedToolsForDelegate = [...this.allowedTools.allowed];
1705
+ } else if (this.allowedTools.mode === 'none') {
1706
+ allowedToolsForDelegate = [];
1707
+ } else if (this.allowedTools.mode === 'all' && this.allowedTools.exclusions?.length > 0) {
1708
+ allowedToolsForDelegate = ['*', ...this.allowedTools.exclusions.map(t => '!' + t)];
1709
+ }
1710
+
1711
+ const enhancedParams = {
1712
+ ...toolParams,
1713
+ currentIteration: context.currentIteration || 0,
1714
+ maxIterations,
1715
+ parentSessionId: this.sessionId,
1716
+ path: this.searchPath,
1717
+ provider: this.apiType,
1718
+ model: this.model,
1719
+ searchDelegate: this.searchDelegate,
1720
+ enableTasks: this.enableTasks,
1721
+ enableMcp: !!this.mcpBridge,
1722
+ mcpConfig: this.mcpConfig,
1723
+ mcpConfigPath: this.mcpConfigPath,
1724
+ enableBash: this.enableBash,
1725
+ bashConfig: this.bashConfig,
1726
+ allowEdit: this.allowEdit,
1727
+ allowedTools: allowedToolsForDelegate,
1728
+ debug: this.debug,
1729
+ tracer: this.tracer
1730
+ };
1731
+
1732
+ if (this.debug) {
1733
+ console.log(`[DEBUG] Executing delegate tool`);
1734
+ console.log(`[DEBUG] Parent session: ${this.sessionId}`);
1735
+ }
1736
+
1737
+ if (this.tracer) {
1738
+ this.tracer.recordDelegationEvent('tool_started', {
1739
+ 'delegation.task_preview': toolParams.task?.substring(0, 200)
1740
+ });
1741
+ }
1742
+
1743
+ result = await executeFn(enhancedParams);
1744
+ } else {
1745
+ result = await executeFn(toolParams);
1746
+ }
1747
+
1748
+ const toolDurationMs = Date.now() - toolStartTime;
1749
+ this._recordToolResultTelemetry(toolName, result, true, toolDurationMs, context.currentIteration || 0);
1750
+
1751
+ // Emit tool success event
1752
+ this.events.emit('toolCall', {
1753
+ timestamp: new Date().toISOString(),
1754
+ name: toolName,
1755
+ args: toolParams,
1756
+ resultPreview: typeof result === 'string'
1757
+ ? (result.length > 200 ? result.substring(0, 200) + '...' : result)
1758
+ : (result ? JSON.stringify(result).substring(0, 200) + '...' : 'No Result'),
1759
+ status: 'completed'
1760
+ });
1761
+
1762
+ let toolResultContent = typeof result === 'string' ? result : JSON.stringify(result, null, 2);
1763
+
1764
+ // Convert absolute workspace paths to relative
1765
+ if (this.workspaceRoot && toolResultContent) {
1766
+ const wsPrefix = this.workspaceRoot.endsWith(sep) ? this.workspaceRoot : this.workspaceRoot + sep;
1767
+ toolResultContent = toolResultContent.split(wsPrefix).join('');
1768
+ }
1769
+
1770
+ // Extract raw output blocks from tool result (before truncation)
1771
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
1772
+ if (extractedBlocks.length > 0) {
1773
+ toolResultContent = cleanedContent;
1774
+ this._extractedRawBlocks.push(...extractedBlocks);
1775
+ if (this.debug) {
1776
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks from tool result`);
1777
+ }
1778
+ }
1779
+
1780
+ // Truncate if output exceeds token limit
1781
+ try {
1782
+ const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
1783
+ if (truncateResult.truncated) {
1784
+ toolResultContent = truncateResult.content;
1785
+ if (this.debug) {
1786
+ console.log(`[DEBUG] Tool output truncated: ${truncateResult.originalTokens} tokens`);
1787
+ }
1788
+ }
1789
+ } catch (truncateError) {
1790
+ console.error(`[WARN] Tool output truncation failed: ${truncateError.message}`);
1791
+ }
1792
+
1793
+ if (this.debug) {
1794
+ console.log(`[DEBUG] Tool ${toolName} executed successfully. Result length: ${toolResultContent.length}`);
1795
+ }
1796
+
1797
+ return toolResultContent;
1798
+ } catch (error) {
1799
+ const toolDurationMs = Date.now() - toolStartTime;
1800
+ this._recordToolResultTelemetry(toolName, null, false, toolDurationMs, context.currentIteration || 0);
1801
+
1802
+ // Emit tool error event
1803
+ this.events.emit('toolCall', {
1804
+ timestamp: new Date().toISOString(),
1805
+ name: toolName,
1806
+ args: toolParams,
1807
+ error: error.message || 'Unknown error',
1808
+ status: 'error'
1809
+ });
1810
+
1811
+ if (this.debug) {
1812
+ console.error(`[DEBUG] Tool '${toolName}' failed: ${error.message}`);
1813
+ }
1814
+
1815
+ // Format error for AI
1816
+ const errorMsg = formatErrorForAI(error);
1817
+ return errorMsg;
1818
+ }
1819
+ }
1820
+ });
1821
+ };
1822
+
1823
+ // Only include attempt_completion when _disableTools is set
1824
+ if (options._disableTools) {
1825
+ nativeTools.attempt_completion = tool({
1826
+ description: 'Signal task completion and provide the final result to the user',
1827
+ inputSchema: z.object({
1828
+ result: z.string().describe('The final result to present to the user')
1829
+ }),
1830
+ execute: async ({ result }) => {
1831
+ onComplete(result);
1832
+ return result;
1833
+ }
1834
+ });
1835
+ return nativeTools;
1836
+ }
1837
+
1838
+ // Add all enabled tools from toolImplementations
1839
+ for (const [toolName, toolImpl] of Object.entries(this.toolImplementations)) {
1840
+ // Get schema and description for this tool
1841
+ const { schema, description } = this._getToolSchemaAndDescription(toolName);
1842
+ if (schema && description) {
1843
+ nativeTools[toolName] = wrapTool(toolName, schema, description, toolImpl.execute);
1844
+ }
1845
+ }
1846
+
1847
+ // Always add attempt_completion
1848
+ nativeTools.attempt_completion = tool({
1849
+ description: 'Signal task completion and provide the final result to the user',
1850
+ inputSchema: z.object({
1851
+ result: z.string().describe('The final result to present to the user')
1852
+ }),
1853
+ execute: async ({ result }) => {
1854
+ // Task completion blocking
1855
+ if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
1856
+ const incompleteTasks = this.taskManager.getIncompleteTasks();
1857
+ const highIterationCount = (context.currentIteration || 0) > maxIterations * 0.7;
1858
+
1859
+ if (!highIterationCount) {
1860
+ const taskSummary = this.taskManager.getTaskSummary();
1861
+ const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
1862
+ if (this.debug) {
1863
+ console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
1864
+ }
1865
+ return blockedMessage;
1866
+ }
1867
+ }
1868
+
1869
+ onComplete(result);
1870
+ return result;
1871
+ }
1872
+ });
1873
+
1874
+ // Add MCP tools if available
1875
+ if (this.mcpBridge && !options._disableTools) {
1876
+ const mcpTools = this.mcpBridge.getVercelTools(this._filterMcpTools(this.mcpBridge.getToolNames()));
1877
+ for (const [name, mcpTool] of Object.entries(mcpTools)) {
1878
+ nativeTools[name] = mcpTool;
1879
+ }
1880
+ }
1881
+
1882
+ // Add Gemini provider tools as wrapper function tools.
1883
+ // The Gemini API does not allow mixing provider-defined tools with function tools
1884
+ // in the same request. To work around this, we create regular function tools that
1885
+ // internally make a separate API call using only the provider-defined tool.
1886
+ if (this.apiType === 'google' && this._geminiToolsEnabled && !options._disableTools) {
1887
+ const { googleSearch, urlContext } = this._geminiToolsEnabled;
1888
+
1889
+ if (googleSearch && isToolAllowed('gemini_google_search')) {
1890
+ nativeTools.google_search = tool({
1891
+ description: 'Search the web using Google Search for current information, recent events, or real-time data.',
1892
+ inputSchema: z.object({
1893
+ query: z.string().describe('The search query to find information on the web')
1894
+ }),
1895
+ execute: async ({ query }) => {
1896
+ if (this.debug) {
1897
+ console.log(`[DEBUG] google_search wrapper: querying "${query}"`);
1898
+ }
1899
+ try {
1900
+ const { generateText: genText } = await import('ai');
1901
+ const searchResult = await genText({
1902
+ model: this.provider(this.model.includes('flash') ? this.model : this.model.replace('pro', 'flash')),
1903
+ messages: [{ role: 'user', content: query }],
1904
+ tools: { google_search: this.provider.tools.googleSearch({}) },
1905
+ stopWhen: stepCountIs(2),
1906
+ maxTokens: 4000
1907
+ });
1908
+ return searchResult.text || 'No search results found.';
1909
+ } catch (err) {
1910
+ if (this.debug) console.error(`[DEBUG] google_search wrapper error:`, err.message);
1911
+ return `Search failed: ${err.message}`;
1912
+ }
1913
+ }
1914
+ });
1915
+ }
1916
+
1917
+ if (urlContext && isToolAllowed('gemini_url_context')) {
1918
+ nativeTools.url_context = tool({
1919
+ description: 'Fetch and analyze content from a specific URL. Use this to read web pages, documentation, or online resources.',
1920
+ inputSchema: z.object({
1921
+ url: z.string().describe('The URL to fetch and analyze')
1922
+ }),
1923
+ execute: async ({ url }) => {
1924
+ if (this.debug) {
1925
+ console.log(`[DEBUG] url_context wrapper: fetching "${url}"`);
1926
+ }
1927
+ try {
1928
+ const { generateText: genText } = await import('ai');
1929
+ const fetchResult = await genText({
1930
+ model: this.provider(this.model.includes('flash') ? this.model : this.model.replace('pro', 'flash')),
1931
+ messages: [{ role: 'user', content: `Summarize the content at this URL: ${url}` }],
1932
+ tools: { url_context: this.provider.tools.urlContext({}) },
1933
+ stopWhen: stepCountIs(2),
1934
+ maxTokens: 4000
1935
+ });
1936
+ return fetchResult.text || 'Could not fetch URL content.';
1937
+ } catch (err) {
1938
+ if (this.debug) console.error(`[DEBUG] url_context wrapper error:`, err.message);
1939
+ return `URL fetch failed: ${err.message}`;
1940
+ }
1941
+ }
1942
+ });
1943
+ }
1944
+ }
1945
+
1946
+ return nativeTools;
1947
+ }
1948
+
1949
+ /**
1950
+ * Get the Zod schema and description for a tool by name
1951
+ * @param {string} toolName - Tool name
1952
+ * @returns {{ schema: z.ZodObject, description: string } | null}
1953
+ * @private
1954
+ */
1955
+ _getToolSchemaAndDescription(toolName) {
1956
+ const toolMap = {
1957
+ search: {
1958
+ schema: searchSchema,
1959
+ description: 'Search code in the repository using keyword queries with Elasticsearch syntax.'
1960
+ },
1961
+ query: {
1962
+ schema: querySchema,
1963
+ description: 'Search code using ast-grep structural pattern matching.'
1964
+ },
1965
+ extract: {
1966
+ schema: extractSchema,
1967
+ description: 'Extract code blocks from files based on file paths and optional line numbers.'
1968
+ },
1969
+ delegate: {
1970
+ schema: delegateSchema,
1971
+ description: 'Delegate big distinct tasks to specialized probe subagents.'
1972
+ },
1973
+ analyze_all: {
1974
+ schema: analyzeAllSchema,
1975
+ description: 'Process ALL data matching a query using map-reduce for aggregate questions.'
1976
+ },
1977
+ execute_plan: {
1978
+ schema: executePlanSchema,
1979
+ description: 'Execute a DSL program to orchestrate tool calls.'
1980
+ },
1981
+ cleanup_execute_plan: {
1982
+ schema: cleanupExecutePlanSchema,
1983
+ description: 'Clean up output buffer and session store from previous execute_plan calls.'
1984
+ },
1985
+ bash: {
1986
+ schema: bashSchema,
1987
+ description: 'Execute bash commands for system exploration and development tasks.'
1988
+ },
1989
+ edit: {
1990
+ schema: editSchema,
1991
+ description: 'Edit files using text replacement, AST-aware symbol operations, or line-targeted editing.'
1992
+ },
1993
+ create: {
1994
+ schema: createSchema,
1995
+ description: 'Create new files with specified content.'
1996
+ },
1997
+ multi_edit: {
1998
+ schema: multiEditSchema,
1999
+ description: 'Apply multiple file edits in one call using a JSON array of operations.'
2000
+ },
2001
+ listFiles: {
2002
+ schema: listFilesSchema,
2003
+ description: 'List files and directories in a specified location.'
2004
+ },
2005
+ searchFiles: {
2006
+ schema: searchFilesSchema,
2007
+ description: 'Find files matching a glob pattern with recursive search capability.'
2008
+ },
2009
+ readImage: {
2010
+ schema: readImageSchema,
2011
+ description: 'Read and load an image file for AI analysis.'
2012
+ },
2013
+ listSkills: {
2014
+ schema: listSkillsSchema,
2015
+ description: 'List available agent skills discovered in the repository.'
2016
+ },
2017
+ useSkill: {
2018
+ schema: useSkillSchema,
2019
+ description: 'Load and activate a specific skill\'s instructions.'
2020
+ },
2021
+ task: {
2022
+ schema: taskSchema,
2023
+ description: 'Manage tasks for tracking progress (create, update, complete, delete, list).'
2024
+ }
2025
+ };
2026
+
2027
+ return toolMap[toolName] || null;
2028
+ }
2029
+
1624
2030
  /**
1625
2031
  * Initialize AWS Bedrock model
1626
2032
  */
@@ -2369,7 +2775,7 @@ export class ProbeAgent {
2369
2775
  async _getAvailableSkillsXml() {
2370
2776
  const skills = await this._loadSkillsMetadata();
2371
2777
  if (!skills.length) return '';
2372
- return formatAvailableSkillsXml(skills);
2778
+ return formatAvailableSkills(skills);
2373
2779
  }
2374
2780
 
2375
2781
  /**
@@ -2527,250 +2933,22 @@ ${extractGuidance}
2527
2933
  }
2528
2934
  }
2529
2935
 
2530
- // Build tool definitions based on allowedTools configuration
2531
- let toolDefinitions = '';
2532
-
2533
- // Helper to check if a tool is allowed
2534
- const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
2535
-
2536
- // Core tools (filtered by allowedTools)
2537
- if (isToolAllowed('search')) {
2538
- const searchDefinition = this.searchDelegate
2539
- ? `${searchToolDefinition}\n**Note:** This search tool delegates code searching to a dedicated subagent and returns extracted code blocks. Use extract only to expand context or if search returns no code.`
2540
- : searchToolDefinition;
2541
- toolDefinitions += `${searchDefinition}\n`;
2542
- }
2543
- if (isToolAllowed('query')) {
2544
- toolDefinitions += `${queryToolDefinition}\n`;
2545
- }
2546
- if (isToolAllowed('extract')) {
2547
- toolDefinitions += `${extractToolDefinition}\n`;
2548
- }
2549
- if (isToolAllowed('listFiles')) {
2550
- toolDefinitions += `${listFilesToolDefinition}\n`;
2551
- }
2552
- if (isToolAllowed('searchFiles')) {
2553
- toolDefinitions += `${searchFilesToolDefinition}\n`;
2554
- }
2555
- if (this.enableSkills && isToolAllowed('listSkills')) {
2556
- toolDefinitions += `${listSkillsToolDefinition}\n`;
2557
- }
2558
- if (this.enableSkills && isToolAllowed('useSkill')) {
2559
- toolDefinitions += `${useSkillToolDefinition}\n`;
2560
- }
2561
- if (isToolAllowed('readImage')) {
2562
- toolDefinitions += `${readImageToolDefinition}\n`;
2563
- }
2564
-
2565
- // Edit tools (require both allowEdit flag AND allowedTools permission)
2566
- if (this.allowEdit && isToolAllowed('edit')) {
2567
- toolDefinitions += `${editToolDefinition}\n`;
2568
- }
2569
- if (this.allowEdit && isToolAllowed('create')) {
2570
- toolDefinitions += `${createToolDefinition}\n`;
2571
- }
2572
- if (this.allowEdit && isToolAllowed('multi_edit')) {
2573
- toolDefinitions += `${multiEditToolDefinition}\n`;
2574
- }
2575
- // Bash tool (require both enableBash flag AND allowedTools permission)
2576
- if (this.enableBash && isToolAllowed('bash')) {
2577
- toolDefinitions += `${bashToolDefinition}\n`;
2578
- }
2579
-
2580
- // Task tool (require both enableTasks flag AND allowedTools permission)
2581
- if (this.enableTasks && isToolAllowed('task')) {
2582
- toolDefinitions += `${taskToolDefinition}\n`;
2583
- }
2584
-
2585
- // Always include attempt_completion unconditionally - it's a completion signal, not a tool
2586
- // This ensures agents can always complete their work, regardless of tool restrictions
2587
- toolDefinitions += `${attemptCompletionToolDefinition}\n`;
2588
-
2589
- // Delegate tool (require both enableDelegate flag AND allowedTools permission)
2590
- // Place after attempt_completion as it's an optional tool
2591
- if (this.enableDelegate && isToolAllowed('delegate')) {
2592
- toolDefinitions += `${delegateToolDefinition}\n`;
2593
- }
2594
-
2595
- // Execute Plan tool for DSL-based orchestration (requires enableExecutePlan flag, supersedes analyze_all)
2596
- if (this.enableExecutePlan && isToolAllowed('execute_plan')) {
2597
- // Build available function list based on what tools are registered
2598
- const dslFunctions = ['LLM', 'map', 'chunk', 'batch', 'log', 'range', 'flatten', 'unique', 'groupBy', 'parseJSON', 'storeSet', 'storeGet', 'storeAppend', 'storeKeys', 'storeGetAll', 'output'];
2599
- if (isToolAllowed('search')) dslFunctions.unshift('search');
2600
- if (isToolAllowed('query')) dslFunctions.unshift('query');
2601
- if (isToolAllowed('extract')) dslFunctions.unshift('extract');
2602
- if (isToolAllowed('listFiles')) dslFunctions.push('listFiles');
2603
- if (this.enableBash && isToolAllowed('bash')) dslFunctions.push('bash');
2604
- toolDefinitions += `${getExecutePlanToolDefinition(dslFunctions)}\n`;
2605
- // cleanup_execute_plan is enabled together with execute_plan
2606
- if (isToolAllowed('cleanup_execute_plan')) {
2607
- toolDefinitions += `${getCleanupExecutePlanToolDefinition()}\n`;
2608
- }
2609
- } else if (isToolAllowed('analyze_all')) {
2610
- // Fallback: only register analyze_all if execute_plan is not available
2611
- toolDefinitions += `${analyzeAllToolDefinition}\n`;
2612
- }
2613
-
2614
- // Gemini built-in tools (only when using Google provider)
2615
- if (this._geminiToolsEnabled?.googleSearch && isToolAllowed('gemini_google_search')) {
2616
- toolDefinitions += `${googleSearchToolDefinition}\n`;
2617
- }
2618
- if (this._geminiToolsEnabled?.urlContext && isToolAllowed('gemini_url_context')) {
2619
- toolDefinitions += `${urlContextToolDefinition}\n`;
2620
- }
2621
-
2622
- // Build XML tool guidelines with dynamic examples based on allowed tools
2623
- // Build examples only for allowed tools
2624
- let toolExamples = '';
2625
- if (isToolAllowed('search')) {
2626
- toolExamples += `
2627
- <search>
2628
- <query>error handling</query>
2629
- <path>src/search</path>
2630
- </search>
2631
- `;
2632
- }
2633
- if (isToolAllowed('extract')) {
2634
- toolExamples += `
2635
- <extract>
2636
- <targets>src/config.js:15-25</targets>
2637
- </extract>
2638
- `;
2639
- }
2640
- if (isToolAllowed('attempt_completion')) {
2641
- toolExamples += `
2642
- <attempt_completion>
2643
- The configuration is loaded from src/config.js lines 15-25 which contains the database settings.
2644
- </attempt_completion>
2645
- `;
2646
- }
2647
-
2648
- // Build available tools list dynamically based on allowedTools
2649
- let availableToolsList = '';
2650
- if (isToolAllowed('search')) {
2651
- availableToolsList += `- search: Search code using keyword queries${this.searchDelegate ? ' (returns extracted code blocks via a dedicated subagent)' : ''}.\n`;
2652
- }
2653
- if (isToolAllowed('query')) {
2654
- availableToolsList += '- query: Search code using structural AST patterns.\n';
2655
- }
2656
- if (isToolAllowed('extract')) {
2657
- availableToolsList += '- extract: Extract specific code blocks or lines from files. Use with symbol targets (e.g. "file.js#funcName") to get line numbers for line-targeted editing.\n';
2658
- }
2659
- if (isToolAllowed('listFiles')) {
2660
- availableToolsList += '- listFiles: List files and directories in a specified location.\n';
2661
- }
2662
- if (isToolAllowed('searchFiles')) {
2663
- availableToolsList += '- searchFiles: Find files matching a glob pattern with recursive search capability.\n';
2664
- }
2665
- if (this.enableSkills && isToolAllowed('listSkills')) {
2666
- availableToolsList += '- listSkills: List available agent skills discovered in the repository.\n';
2667
- }
2668
- if (this.enableSkills && isToolAllowed('useSkill')) {
2669
- availableToolsList += '- useSkill: Load and activate a specific skill\'s instructions.\n';
2670
- }
2671
- if (isToolAllowed('readImage')) {
2672
- availableToolsList += '- readImage: Read and load an image file for AI analysis.\n';
2673
- }
2674
- if (this.allowEdit && isToolAllowed('edit')) {
2675
- availableToolsList += '- edit: Edit files using text replacement, AST-aware symbol operations, or line-targeted editing.\n';
2676
- }
2677
- if (this.allowEdit && isToolAllowed('create')) {
2678
- availableToolsList += '- create: Create new files with specified content.\n';
2679
- }
2680
- if (this.allowEdit && isToolAllowed('multi_edit')) {
2681
- availableToolsList += '- multi_edit: Apply multiple file edits in one call using a JSON array of operations.\n';
2682
- }
2683
- if (this.enableDelegate && isToolAllowed('delegate')) {
2684
- availableToolsList += '- delegate: Delegate big distinct tasks to specialized probe subagents.\n';
2685
- }
2686
- if (this.enableExecutePlan && isToolAllowed('execute_plan')) {
2687
- availableToolsList += '- execute_plan: Execute a DSL program to orchestrate tool calls. ALWAYS use this for: questions containing "all"/"every"/"comprehensive"/"complete inventory", multi-topic analysis, open-ended discovery questions, or any task requiring full codebase coverage.\n';
2688
- if (isToolAllowed('cleanup_execute_plan')) {
2689
- availableToolsList += '- cleanup_execute_plan: Clean up output buffer and session store from previous execute_plan calls.\n';
2690
- }
2691
- } else if (isToolAllowed('analyze_all')) {
2692
- availableToolsList += '- analyze_all: Process ALL data matching a query using map-reduce (for aggregate questions needing 100% coverage).\n';
2693
- }
2694
- if (this.enableBash && isToolAllowed('bash')) {
2695
- availableToolsList += '- bash: Execute bash commands for system operations.\n';
2696
- }
2697
- if (this.enableTasks && isToolAllowed('task')) {
2698
- availableToolsList += '- task: Manage tasks for tracking progress (create, update, complete, delete, list).\n';
2699
- }
2700
- if (isToolAllowed('attempt_completion')) {
2701
- availableToolsList += '- attempt_completion: Finalize the task and provide the result to the user.\n';
2702
- availableToolsList += '- attempt_complete: Quick completion using previous response (shorthand).\n';
2703
- }
2704
- if (this._geminiToolsEnabled?.googleSearch && isToolAllowed('gemini_google_search')) {
2705
- availableToolsList += '- gemini_google_search: (auto) Web search via Google — invoked automatically by the model when it needs current information.\n';
2706
- }
2707
- if (this._geminiToolsEnabled?.urlContext && isToolAllowed('gemini_url_context')) {
2708
- availableToolsList += '- gemini_url_context: (auto) URL content reader via Google — automatically fetches and reads URLs mentioned in the conversation.\n';
2709
- }
2710
-
2711
- let xmlToolGuidelines = `
2712
- # Tool Use Formatting
2713
-
2714
- Tool use MUST be formatted using XML-style tags. Each tool call requires BOTH opening and closing tags with the exact tool name. Each parameter is similarly enclosed within its own set of opening and closing tags. You MUST use exactly ONE tool call per message until you are ready to complete the task.
2715
-
2716
- **CRITICAL: Every XML tag MUST have both opening <tag> and closing </tag> parts.**
2717
-
2718
- Structure (note the closing tags):
2719
- <tool_name>
2720
- <parameter1_name>value1</parameter1_name>
2721
- <parameter2_name>value2</parameter2_name>
2722
- ...
2723
- </tool_name>
2724
-
2725
- Examples:${toolExamples}
2726
- # Special Case: Quick Completion
2727
- If your previous response was already correct and complete, you may respond with just:
2728
- <attempt_complete>
2729
- This signals to use your previous response as the final answer without repeating content.
2730
-
2731
- # Thinking Process
2732
-
2733
- Before using a tool, analyze the situation within <thinking></thinking> tags. This helps you organize your thoughts and make better decisions.
2734
-
2735
- Example:
2736
- <thinking>
2737
- I need to find code related to error handling in the search module. The most appropriate tool for this is the search tool, which requires a query parameter and a path parameter. I have both the query ("error handling") and the path ("src/search"), so I can proceed with the search.
2738
- </thinking>
2739
-
2740
- # Tool Use Guidelines
2741
-
2742
- 1. Think step-by-step about how to achieve the user's goal.
2743
- 2. Use <thinking></thinking> tags to analyze the situation and determine the appropriate tool.
2744
- 3. Choose **one** tool that helps achieve the current step.
2745
- 4. Format the tool call using the specified XML format with BOTH opening and closing tags. Ensure all required parameters are included.
2746
- 5. **You MUST respond with exactly one tool call in the specified XML format in each turn.**
2747
- 6. Wait for the tool execution result, which will be provided in the next message (within a <tool_result> block).
2748
- 7. Analyze the tool result and decide the next step. If more tool calls are needed, repeat steps 2-6.
2749
- 8. If the task is fully complete and all previous steps were successful, use the \`<attempt_completion>\` tool to provide the final answer. This is the ONLY way to finish the task.
2750
- 9. If you cannot proceed (e.g., missing information, invalid request), use \`<attempt_completion>\` to explain the issue clearly with an appropriate message directly inside the tags.
2751
- 10. If your previous response was already correct and complete, you may use \`<attempt_complete>\` as a shorthand.
2752
-
2753
- Available Tools:
2754
- ${availableToolsList}`;
2755
-
2756
- // Common instructions
2936
+ // Common instructions (simplified - tools are now provided via native tool calling)
2757
2937
  const commonInstructions = `<instructions>
2758
2938
  Follow these instructions carefully:
2759
2939
  1. Analyze the user's request.
2760
- 2. Use <thinking></thinking> tags to analyze the situation and determine the appropriate tool for each step.
2761
- 3. Use the available tools step-by-step to fulfill the request.
2762
- 4. You should always prefer the \`search\` tool for code-related questions.${this.searchDelegate ? ' It already returns extracted code blocks; use \`extract\` only to expand context or read full files.' : ' Read full files only if really necessary.'}
2763
- 5. Ensure to get really deep and understand the full picture before answering.
2764
- 6. You MUST respond with exactly ONE tool call per message, using the specified XML format, until the task is complete.
2765
- 7. Wait for the tool execution result (provided in the next user message in a <tool_result> block) before proceeding to the next step.
2766
- 8. Once the task is fully completed, use the '<attempt_completion>' tool to provide the final result. This is the ONLY way to signal completion.
2767
- 9. Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.${this.allowEdit ? `
2768
- 10. When modifying files, choose the appropriate tool:
2940
+ 2. Use the available tools step-by-step to fulfill the request.
2941
+ 3. You should always prefer the search tool for code-related questions.${this.searchDelegate ? ' It already returns extracted code blocks; use extract only to expand context or read full files.' : ' Read full files only if really necessary.'}
2942
+ 4. Ensure to get really deep and understand the full picture before answering.
2943
+ 5. Once the task is fully completed, use the attempt_completion tool to provide the final result.
2944
+ 6. Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.${this.allowEdit ? `
2945
+ 7. When modifying files, choose the appropriate tool:
2769
2946
  - Use 'edit' for all code modifications:
2770
2947
  * For small changes (a line or a few lines), use old_string + new_string — copy old_string verbatim from the file.
2771
2948
  * For rewriting entire functions/classes/methods, use the symbol parameter instead (no exact text matching needed).
2772
2949
  * For editing specific lines from search/extract output, use start_line (and optionally end_line) with the line numbers shown in the output.${this.hashLines ? ' Line references include content hashes (e.g. "42:ab") for integrity verification.' : ''}
2773
2950
  * For editing inside large functions: first use extract with the symbol target (e.g. "file.js#myFunction") to see the function with line numbers${this.hashLines ? ' and hashes' : ''}, then use start_line/end_line to surgically edit specific lines within it.
2951
+ * IMPORTANT: Keep old_string as small as possible — include only the lines you need to change plus minimal context for uniqueness. For replacing large blocks (10+ lines), prefer line-targeted editing with start_line/end_line to constrain scope.
2774
2952
  - Use 'create' for new files or complete file rewrites.
2775
2953
  - If an edit fails, read the error message — it tells you exactly how to fix the call and retry.
2776
2954
  - The system tracks which files you've seen via search/extract. If you try to edit a file you haven't read, or one that changed since you last read it, the edit will fail with instructions to re-read first. Always use extract before editing to ensure you have current file content.` : ''}
@@ -2793,7 +2971,6 @@ Follow these instructions carefully:
2793
2971
  if (this.debug) {
2794
2972
  console.log(`[DEBUG] Using predefined prompt: ${this.promptType}`);
2795
2973
  }
2796
- // Add common instructions to predefined prompts
2797
2974
  systemMessage += commonInstructions;
2798
2975
  } else {
2799
2976
  // Use the default prompt (code explorer) if no prompt type is specified
@@ -2801,16 +2978,9 @@ Follow these instructions carefully:
2801
2978
  if (this.debug) {
2802
2979
  console.log(`[DEBUG] Using default prompt: code explorer`);
2803
2980
  }
2804
- // Add common instructions to the default prompt
2805
2981
  systemMessage += commonInstructions;
2806
2982
  }
2807
2983
 
2808
- // Add XML Tool Guidelines
2809
- systemMessage += `\n${xmlToolGuidelines}\n`;
2810
-
2811
- // Add Tool Definitions
2812
- systemMessage += `\n# Tools Available\n${toolDefinitions}\n`;
2813
-
2814
2984
  // Add available skills (metadata only)
2815
2985
  if (this.enableSkills) {
2816
2986
  const skillsXml = await this._getAvailableSkillsXml();
@@ -2824,19 +2994,6 @@ Follow these instructions carefully:
2824
2994
  systemMessage += `\n${taskSystemPrompt}\n`;
2825
2995
  }
2826
2996
 
2827
- // Add MCP tools if available (filtered by allowedTools)
2828
- if (this.mcpBridge && this.mcpBridge.getToolNames().length > 0) {
2829
- const allMcpTools = this.mcpBridge.getToolNames();
2830
- const allowedMcpTools = this._filterMcpTools(allMcpTools);
2831
-
2832
- if (allowedMcpTools.length > 0) {
2833
- systemMessage += `\n## MCP Tools (JSON parameters in <params> tag)\n`;
2834
- // Get only allowed MCP tool definitions
2835
- systemMessage += this.mcpBridge.getXmlToolDefinitions(allowedMcpTools);
2836
- systemMessage += `\n\nFor MCP tools, use JSON format within the params tag, e.g.:\n<mcp_tool>\n<params>\n{"key": "value"}\n</params>\n</mcp_tool>\n`;
2837
- }
2838
- }
2839
-
2840
2997
  // Add folder information using workspace root and relative paths
2841
2998
  const searchDirectory = this.workspaceRoot;
2842
2999
  if (this.debug) {
@@ -3194,1127 +3351,190 @@ Follow these instructions carefully:
3194
3351
  }
3195
3352
  }
3196
3353
 
3197
- // Circuit breaker for repeated format errors
3198
- let lastFormatErrorType = null;
3199
- let sameFormatErrorCount = 0;
3200
- const MAX_REPEATED_FORMAT_ERRORS = 3;
3201
-
3202
- // Circuit breaker for repeated identical responses without tool calls
3203
- let lastNoToolResponse = null;
3204
- let sameResponseCount = 0;
3205
- const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
3354
+ // Iteration counter for telemetry
3206
3355
 
3207
- // Circuit breaker for consecutive no-tool responses (regardless of content)
3208
- // This catches cases where agent alternates between similar "stuck" messages
3209
- let consecutiveNoToolCount = 0;
3210
- const MAX_CONSECUTIVE_NO_TOOL = 5;
3356
+ // Native tool calling via Vercel AI SDK streamText + maxSteps
3357
+ let completionResult = null;
3358
+ const toolContext = { maxIterations, currentIteration: 0, currentMessages };
3211
3359
 
3212
- // Tool iteration loop (only for non-CLI engines like Vercel/Anthropic/OpenAI)
3213
- while (currentIteration < maxIterations && !completionAttempted) {
3214
- currentIteration++;
3215
- if (this.cancelled) throw new Error('Request was cancelled by the user');
3360
+ const tools = this._buildNativeTools(options, (result) => {
3361
+ completionResult = result;
3362
+ completionAttempted = true;
3363
+ }, toolContext);
3216
3364
 
3217
- if (this.debug) {
3218
- console.log(`\n[DEBUG] --- Tool Loop Iteration ${currentIteration}/${maxIterations} ---`);
3219
- console.log(`[DEBUG] Current messages count for AI call: ${currentMessages.length}`);
3220
-
3221
- // Log preview of the latest user message (helpful for debugging loops)
3222
- const lastUserMessage = [...currentMessages].reverse().find(msg => msg.role === 'user');
3223
- if (lastUserMessage && lastUserMessage.content) {
3224
- const userPreview = createMessagePreview(lastUserMessage.content);
3225
- console.log(`[DEBUG] Latest user message (${lastUserMessage.content.length} chars): ${userPreview}`);
3226
- }
3365
+ let maxResponseTokens = this.maxResponseTokens;
3366
+ if (!maxResponseTokens) {
3367
+ maxResponseTokens = 4000;
3368
+ if (this.model && this.model.includes('opus') || this.model && this.model.includes('sonnet') || this.model && this.model.startsWith('gpt-4') || this.model && this.model.startsWith('gpt-5')) {
3369
+ maxResponseTokens = 8192;
3370
+ } else if (this.model && this.model.startsWith('gemini')) {
3371
+ maxResponseTokens = 32000;
3227
3372
  }
3373
+ }
3228
3374
 
3229
- // Add iteration tracing event
3230
- if (this.tracer) {
3231
- this.tracer.addEvent('iteration.start', {
3232
- 'iteration': currentIteration,
3233
- 'max_iterations': maxIterations,
3234
- 'message_count': currentMessages.length
3235
- });
3236
- }
3375
+ // Context compaction retry loop
3376
+ let compactionAttempted = false;
3377
+ while (true) {
3378
+ try {
3379
+ const messagesForAI = this.prepareMessagesWithImages(currentMessages);
3380
+
3381
+ const streamOptions = {
3382
+ model: this.provider ? this.provider(this.model) : this.model,
3383
+ messages: messagesForAI,
3384
+ tools,
3385
+ stopWhen: stepCountIs(maxIterations),
3386
+ maxTokens: maxResponseTokens,
3387
+ temperature: 0.3,
3388
+ onStepFinish: ({ toolResults, text, finishReason, usage }) => {
3389
+ currentIteration++;
3390
+ toolContext.currentIteration = currentIteration;
3391
+
3392
+ // Record telemetry
3393
+ if (this.tracer) {
3394
+ this.tracer.addEvent('iteration.step', {
3395
+ 'iteration': currentIteration,
3396
+ 'max_iterations': maxIterations,
3397
+ 'finish_reason': finishReason,
3398
+ 'has_tool_calls': !!(toolResults && toolResults.length > 0)
3399
+ });
3400
+ }
3237
3401
 
3238
- // Add warning message when reaching the last iteration
3239
- if (currentIteration === maxIterations) {
3240
- const warningMessage = `⚠️ WARNING: You have reached the maximum tool iterations limit (${maxIterations}). This is your final message. Please respond with the data you have so far. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`;
3241
-
3242
- currentMessages.push({
3243
- role: 'user',
3244
- content: warningMessage
3245
- });
3246
-
3247
- if (this.debug) {
3248
- console.log(`[DEBUG] Added max iterations warning message at iteration ${currentIteration}`);
3249
- }
3250
- }
3251
-
3252
- // Calculate context size
3253
- this.tokenCounter.calculateContextSize(currentMessages);
3254
- if (this.debug) {
3255
- console.log(`[DEBUG] Estimated context tokens BEFORE LLM call (Iter ${currentIteration}): ${this.tokenCounter.contextSize}`);
3256
- }
3257
-
3258
- let maxResponseTokens = this.maxResponseTokens;
3259
- if (!maxResponseTokens) {
3260
- // Use model-based defaults if not explicitly configured
3261
- maxResponseTokens = 4000;
3262
- if (this.model && this.model.includes('opus') || this.model && this.model.includes('sonnet') || this.model && this.model.startsWith('gpt-4') || this.model && this.model.startsWith('gpt-5')) {
3263
- maxResponseTokens = 8192;
3264
- } else if (this.model && this.model.startsWith('gemini')) {
3265
- maxResponseTokens = 32000;
3266
- }
3267
- }
3268
-
3269
- // Make AI request
3270
- let assistantResponseContent = '';
3271
- let compactionAttempted = false;
3272
-
3273
- // Retry loop for context compaction - separate from streamTextWithRetryAndFallback
3274
- // which handles transient errors (rate limits, network issues, etc.)
3275
- while (true) {
3276
- try {
3277
- // Wrap AI request with tracing if available
3278
- const executeAIRequest = async () => {
3279
- // Prepare messages with potential image content
3280
- const messagesForAI = this.prepareMessagesWithImages(currentMessages);
3281
-
3282
- // Build streamText options, including Gemini provider-defined tools if applicable
3283
- const streamOptions = {
3284
- model: this.provider ? this.provider(this.model) : this.model,
3285
- messages: messagesForAI,
3286
- maxTokens: maxResponseTokens,
3287
- temperature: 0.3,
3288
- };
3289
-
3290
- // Inject Gemini built-in tools (gemini_google_search, gemini_url_context) when using Google provider
3291
- const geminiProviderTools = this._buildGeminiProviderTools();
3292
- if (geminiProviderTools) {
3293
- streamOptions.tools = geminiProviderTools;
3294
- }
3295
-
3296
- const result = await this.streamTextWithRetryAndFallback(streamOptions);
3297
-
3298
- // Get the promise reference BEFORE consuming stream (doesn't lock it)
3299
- const usagePromise = result.usage;
3300
-
3301
- // Collect the streamed response - stream all content for now
3302
- for await (const delta of result.textStream) {
3303
- assistantResponseContent += delta;
3304
- // For now, stream everything - we'll handle segmentation after tools execute
3305
- if (options.onStream) {
3306
- options.onStream(delta);
3307
- }
3308
- }
3309
-
3310
- // Record token usage - await the promise AFTER stream is consumed
3311
- const usage = await usagePromise;
3402
+ // Record token usage
3312
3403
  if (usage) {
3313
- this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
3314
- }
3315
-
3316
- return result;
3317
- };
3318
-
3319
- if (this.tracer) {
3320
- // Prepare input preview for tracing (truncate if very long)
3321
- const inputPreview = message.length > 1000
3322
- ? message.substring(0, 1000) + '... [truncated]'
3323
- : message;
3324
-
3325
- await this.tracer.withSpan('ai.request', executeAIRequest, {
3326
- 'ai.model': this.model,
3327
- 'ai.provider': this.clientApiProvider || 'auto',
3328
- 'ai.input': inputPreview,
3329
- 'ai.input_length': message.length,
3330
- 'iteration': currentIteration,
3331
- 'max_tokens': maxResponseTokens,
3332
- 'temperature': 0.3,
3333
- 'message_count': currentMessages.length
3334
- });
3335
- } else {
3336
- await executeAIRequest();
3337
- }
3338
-
3339
- // Success - break out of compaction retry loop
3340
- break;
3341
-
3342
- } catch (error) {
3343
- // Check if this is a context limit error (only try compaction once per iteration)
3344
- if (!compactionAttempted && handleContextLimitError) {
3345
- const compactionResult = handleContextLimitError(error, currentMessages, {
3346
- keepLastSegment: true,
3347
- minSegmentsToKeep: 1
3348
- });
3349
-
3350
- if (compactionResult) {
3351
- // Context limit error detected - compact and retry once
3352
- const { messages: compactedMessages, stats } = compactionResult;
3353
-
3354
- // Check if compaction actually reduced message count
3355
- if (stats.removed === 0) {
3356
- // No messages removed - compaction won't help, fail immediately
3357
- console.error(`[ERROR] Context window exceeded but no messages can be compacted.`);
3358
- console.error(`[ERROR] The conversation history is already minimal (${stats.originalCount} messages).`);
3359
- finalResult = `Error: Context window limit exceeded and conversation cannot be compacted further. Consider starting a new session or reducing system message size.`;
3360
- throw new Error(finalResult);
3361
- }
3362
-
3363
- compactionAttempted = true;
3364
-
3365
- console.log(`[INFO] Context window limit exceeded. Compacting conversation...`);
3366
- console.log(`[INFO] Removed ${stats.removed} messages (${stats.reductionPercent}% reduction)`);
3367
- console.log(`[INFO] Estimated token savings: ${stats.tokensSaved} tokens`);
3368
-
3369
- if (this.debug) {
3370
- console.log(`[DEBUG] Compaction stats:`, stats);
3371
- console.log(`[DEBUG] Original message count: ${stats.originalCount}`);
3372
- console.log(`[DEBUG] Compacted message count: ${stats.compactedCount}`);
3373
- }
3374
-
3375
- // Replace currentMessages with compacted version (creates new array reference)
3376
- // This ensures we don't mutate the original history array
3377
- currentMessages = [...compactedMessages];
3378
-
3379
- // Log compaction event if tracer is available
3380
- if (this.tracer) {
3381
- this.tracer.addEvent('context.compacted', {
3382
- 'iteration': currentIteration,
3383
- 'original_count': stats.originalCount,
3384
- 'compacted_count': stats.compactedCount,
3385
- 'reduction_percent': stats.reductionPercent,
3386
- 'tokens_saved': stats.tokensSaved
3387
- });
3388
- }
3389
-
3390
- // Continue to retry with compacted messages
3391
- continue;
3392
- }
3393
- }
3394
-
3395
- // Not a context limit error, compaction already attempted, or compaction not available
3396
- // IMPORTANT: This break prevents infinite loop if compacted messages still exceed limit
3397
- console.error(`Error during streamText (Iter ${currentIteration}):`, error);
3398
- finalResult = `Error: Failed to get response from AI model during iteration ${currentIteration}. ${error.message}`;
3399
- throw new Error(finalResult);
3400
- }
3401
- }
3402
-
3403
- // Log preview of assistant response for debugging loops
3404
- if (this.debug && assistantResponseContent) {
3405
- const assistantPreview = createMessagePreview(assistantResponseContent);
3406
- console.log(`[DEBUG] Assistant response (${assistantResponseContent.length} chars): ${assistantPreview}`);
3407
- }
3408
-
3409
- // Images in assistant responses are not automatically processed
3410
- // AI can use the readImage tool to explicitly request reading an image
3411
-
3412
- // Parse tool call from response with valid tools list
3413
- // Build validTools based on allowedTools configuration (same pattern as getSystemMessage)
3414
- // When _disableTools is set, only allow attempt_completion for JSON correction flows
3415
- const validTools = [];
3416
- if (options._disableTools) {
3417
- // Only allow attempt_completion for JSON correction - no search/query/edit tools
3418
- validTools.push('attempt_completion');
3419
- if (this.debug) {
3420
- console.log(`[DEBUG] Tools disabled for this call - only attempt_completion allowed`);
3421
- }
3422
- } else {
3423
- if (this.allowedTools.isEnabled('search')) validTools.push('search');
3424
- if (this.allowedTools.isEnabled('query')) validTools.push('query');
3425
- if (this.allowedTools.isEnabled('extract')) validTools.push('extract');
3426
- if (this.allowedTools.isEnabled('listFiles')) validTools.push('listFiles');
3427
- if (this.allowedTools.isEnabled('searchFiles')) validTools.push('searchFiles');
3428
- if (this.enableSkills && this.allowedTools.isEnabled('listSkills')) validTools.push('listSkills');
3429
- if (this.enableSkills && this.allowedTools.isEnabled('useSkill')) validTools.push('useSkill');
3430
- if (this.allowedTools.isEnabled('readImage')) validTools.push('readImage');
3431
- // Always allow attempt_completion in validTools - it's a completion signal, not a tool
3432
- // This ensures agents can complete even when disableTools: true is set (fixes #333)
3433
- // The tool DEFINITION may be hidden in raw AI mode, but we still need to recognize it
3434
- validTools.push('attempt_completion');
3435
-
3436
- // Edit tools (require both allowEdit flag AND allowedTools permission)
3437
- if (this.allowEdit && this.allowedTools.isEnabled('edit')) {
3438
- validTools.push('edit');
3439
- }
3440
- if (this.allowEdit && this.allowedTools.isEnabled('create')) {
3441
- validTools.push('create');
3442
- }
3443
- if (this.allowEdit && this.allowedTools.isEnabled('multi_edit')) {
3444
- validTools.push('multi_edit');
3445
- }
3446
- // Bash tool (require both enableBash flag AND allowedTools permission)
3447
- if (this.enableBash && this.allowedTools.isEnabled('bash')) {
3448
- validTools.push('bash');
3449
- }
3450
- // Delegate tool (require both enableDelegate flag AND allowedTools permission)
3451
- if (this.enableDelegate && this.allowedTools.isEnabled('delegate')) {
3452
- validTools.push('delegate');
3453
- }
3454
- // Execute Plan tool (requires enableExecutePlan flag, supersedes analyze_all)
3455
- if (this.enableExecutePlan && this.allowedTools.isEnabled('execute_plan')) {
3456
- validTools.push('execute_plan');
3457
- // cleanup_execute_plan is enabled together with execute_plan
3458
- if (this.allowedTools.isEnabled('cleanup_execute_plan')) {
3459
- validTools.push('cleanup_execute_plan');
3460
- }
3461
- } else if (this.allowedTools.isEnabled('analyze_all')) {
3462
- validTools.push('analyze_all');
3463
- }
3464
- // Task tool (require both enableTasks flag AND allowedTools permission)
3465
- if (this.enableTasks && this.allowedTools.isEnabled('task')) {
3466
- validTools.push('task');
3467
- }
3468
- }
3469
-
3470
- // Try parsing with hybrid parser that supports both native and MCP tools
3471
- // When _disableTools is set, skip MCP tools entirely
3472
- const nativeTools = validTools;
3473
- const parsedTool = (this.mcpBridge && !options._disableTools)
3474
- ? parseHybridXmlToolCall(assistantResponseContent, nativeTools, this.mcpBridge)
3475
- : parseXmlToolCallWithThinking(assistantResponseContent, validTools);
3476
-
3477
- // Capture AI thinking content if present (for debugging and telemetry)
3478
- if (parsedTool?.thinkingContent) {
3479
- this._recordThinkingTelemetry(parsedTool.thinkingContent, currentIteration);
3480
- }
3481
-
3482
- if (parsedTool) {
3483
- const { toolName, params } = parsedTool;
3484
-
3485
- // Record AI tool decision for telemetry
3486
- this._recordToolDecisionTelemetry(toolName, params, assistantResponseContent.length, currentIteration);
3487
-
3488
- if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
3489
-
3490
- // Reset consecutive no-tool counter since we got a valid tool call
3491
- consecutiveNoToolCount = 0;
3492
-
3493
- if (toolName === 'attempt_completion') {
3494
- completionAttempted = true;
3495
-
3496
- // END CHECKPOINT: Block completion if there are incomplete tasks
3497
- // However, allow completion if the agent is stuck and genuinely cannot proceed
3498
- if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
3499
- const completionResult = typeof params.result === 'string' ? params.result : '';
3500
- const isStuckCompletion = detectStuckResponse(completionResult);
3501
- const highIterationCount = currentIteration > maxIterations * 0.7; // >70% of max iterations
3502
-
3503
- // Allow stuck completions after many iterations to prevent infinite loops
3504
- if (isStuckCompletion && highIterationCount) {
3505
- if (this.debug) {
3506
- console.log('[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)');
3507
- console.log('[DEBUG] Incomplete tasks will remain:', this.taskManager.getTaskSummary());
3508
- }
3509
- // Record telemetry for forced completion
3510
- if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3511
- this.tracer.recordTaskEvent('forced_stuck_completion', {
3512
- 'task.incomplete_count': this.taskManager.getIncompleteTasks().length,
3513
- 'task.iteration': currentIteration,
3514
- 'task.max_iterations': maxIterations
3515
- });
3516
- }
3517
- // Continue to process the completion instead of blocking
3518
- } else {
3519
- const taskSummary = this.taskManager.getTaskSummary();
3520
- const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
3521
- const incompleteTasks = this.taskManager.getIncompleteTasks();
3522
-
3523
- // Record telemetry for blocked completion
3524
- if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3525
- this.tracer.recordTaskEvent('completion_blocked', {
3526
- 'task.incomplete_count': incompleteTasks.length,
3527
- 'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
3528
- 'task.iteration': currentIteration
3529
- });
3530
- }
3531
-
3532
- if (this.debug) {
3533
- console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
3534
- console.log('[DEBUG] Incomplete tasks:', taskSummary);
3535
- }
3536
-
3537
- // Add reminder message and continue the loop
3538
- currentMessages.push({
3539
- role: 'assistant',
3540
- content: assistantResponseContent
3541
- });
3542
- currentMessages.push({
3543
- role: 'user',
3544
- content: blockedMessage
3545
- });
3546
-
3547
- completionAttempted = false; // Reset to allow more iterations
3548
- continue; // Skip the break and continue the loop
3404
+ this.tokenCounter.recordUsage(usage);
3549
3405
  }
3550
- }
3551
-
3552
- // Handle attempt_complete shorthand - use previous response
3553
- if (params.result === '__PREVIOUS_RESPONSE__') {
3554
- // Find the last assistant message with actual content (not tool calls)
3555
- const lastAssistantMessage = [...currentMessages].reverse().find(msg =>
3556
- msg.role === 'assistant' &&
3557
- msg.content &&
3558
- !(this.mcpBridge
3559
- ? parseHybridXmlToolCall(msg.content, validTools, this.mcpBridge)
3560
- : parseXmlToolCallWithThinking(msg.content, validTools))
3561
- );
3562
-
3563
- if (lastAssistantMessage) {
3564
- const prevContent = lastAssistantMessage.content;
3565
-
3566
- // Check for patterns indicating a failed/wrapped tool call attempt
3567
- // Use detectUnrecognizedToolCall for consistent detection logic
3568
- const wrappedToolError = detectUnrecognizedToolCall(prevContent, validTools);
3569
-
3570
- if (isWrappedToolError(wrappedToolError)) {
3571
- // Previous response was a broken tool call attempt - don't reuse it
3572
- const wrappedToolName = extractWrappedToolName(wrappedToolError);
3573
- if (this.debug) {
3574
- console.log(`[DEBUG] Previous response contains wrapped tool '${wrappedToolName}' - rejecting for __PREVIOUS_RESPONSE__`);
3575
- }
3576
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3577
- currentMessages.push({
3578
- role: 'user',
3579
- content: createWrappedToolErrorMessage(wrappedToolName)
3580
- });
3581
- completionAttempted = false;
3582
- continue; // Don't use broken response, continue the loop
3583
- }
3584
3406
 
3585
- // Pre-strip thinking tags to avoid losing content at final cleanup stage
3586
- const strippedContent = removeThinkingTags(prevContent);
3587
- if (strippedContent.length > 50) {
3588
- // Enough content outside thinking tags — use stripped version directly
3589
- finalResult = strippedContent;
3590
- if (this.debug) console.log(`[DEBUG] Using previous response (thinking-stripped) as completion: ${finalResult.substring(0, 100)}...`);
3591
- } else {
3592
- // Content was mostly/entirely inside thinking tags.
3593
- // Extract thinking content and use it as the actual answer.
3594
- // extractThinkingContent now handles nested thinking tags (issue #439)
3595
- let thinkingContent = extractThinkingContent(prevContent);
3596
- // Also apply removeThinkingTags as extra safety to catch any edge cases
3597
- if (thinkingContent) {
3598
- thinkingContent = removeThinkingTags(thinkingContent) || thinkingContent.replace(/<\/?thinking>/g, '');
3599
- }
3600
- if (thinkingContent && thinkingContent.length > 50) {
3601
- finalResult = thinkingContent;
3602
- if (this.debug) console.log(`[DEBUG] Previous response was mostly in thinking tags — using thinking content as completion: ${finalResult.substring(0, 100)}...`);
3603
- } else {
3604
- // Neither stripped nor thinking content is substantive — use raw as fallback
3605
- finalResult = prevContent;
3606
- if (this.debug) console.log(`[DEBUG] Using previous response as completion (raw): ${finalResult.substring(0, 100)}...`);
3607
- }
3608
- }
3609
- } else {
3610
- finalResult = 'Error: No previous response found to use as completion.';
3611
- if (this.debug) console.log(`[DEBUG] No suitable previous response found for attempt_complete shorthand`);
3407
+ // Stream text to callback if present
3408
+ if (options.onStream && text) {
3409
+ options.onStream(text);
3612
3410
  }
3613
- } else {
3614
- // Standard attempt_completion handling
3615
- const validation = attemptCompletionSchema.safeParse(params);
3616
- if (validation.success) {
3617
- finalResult = validation.data.result;
3618
-
3619
- // Stream the final result if callback is provided
3620
- if (options.onStream && finalResult) {
3621
- const chunkSize = 50; // Characters per chunk for smoother streaming
3622
- for (let i = 0; i < finalResult.length; i += chunkSize) {
3623
- const chunk = finalResult.slice(i, Math.min(i + chunkSize, finalResult.length));
3624
- options.onStream(chunk);
3625
- }
3626
- }
3627
3411
 
3628
- if (this.debug) console.log(`[DEBUG] Task completed successfully with result: ${finalResult.substring(0, 100)}...`);
3629
- } else {
3630
- console.error(`[ERROR] Invalid attempt_completion parameters:`, validation.error);
3631
- finalResult = 'Error: Invalid completion attempt. The task could not be completed properly.';
3412
+ if (this.debug) {
3413
+ console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3632
3414
  }
3633
3415
  }
3634
- break;
3635
- } else {
3636
- // Check tool type and execute accordingly
3637
- const { type } = parsedTool;
3638
-
3639
- if (type === 'mcp' && this.mcpBridge && this.mcpBridge.isMcpTool(toolName)) {
3640
- // Execute MCP tool
3641
- const mcpStartTime = Date.now();
3642
- this._recordMcpToolTelemetry('start', toolName, params, currentIteration);
3643
-
3644
- try {
3645
- // Log MCP tool execution in debug mode
3646
- if (this.debug) {
3647
- console.error(`\n[DEBUG] ========================================`);
3648
- console.error(`[DEBUG] Executing MCP tool: ${toolName}`);
3649
- console.error(`[DEBUG] Arguments:`);
3650
- for (const [key, value] of Object.entries(params)) {
3651
- const displayValue = typeof value === 'string' && value.length > 100
3652
- ? value.substring(0, 100) + '...'
3653
- : value;
3654
- console.error(`[DEBUG] ${key}: ${JSON.stringify(displayValue)}`);
3655
- }
3656
- console.error(`[DEBUG] ========================================\n`);
3657
- }
3658
-
3659
- // Execute MCP tool through the bridge
3660
- const executionResult = await this.mcpBridge.mcpTools[toolName].execute(params);
3661
-
3662
- let toolResultContent = typeof executionResult === 'string' ? executionResult : JSON.stringify(executionResult, null, 2);
3663
-
3664
- // Extract raw output blocks from tool result (before truncation)
3665
- // This prevents LLM from processing/hallucinating large structured output from execute_plan
3666
- // Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
3667
- // formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
3668
- const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
3669
- if (extractedBlocks.length > 0) {
3670
- toolResultContent = cleanedContent;
3671
- // Accumulate extracted blocks separately from DSL output() buffer
3672
- this._extractedRawBlocks.push(...extractedBlocks);
3673
- if (this.debug) {
3674
- console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
3675
- }
3676
- }
3677
-
3678
- // Truncate if output exceeds token limit
3679
- try {
3680
- const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
3681
- if (truncateResult.truncated) {
3682
- toolResultContent = truncateResult.content;
3683
- if (this.debug) {
3684
- console.log(`[DEBUG] Tool output truncated: ${truncateResult.originalTokens} tokens -> saved to ${truncateResult.tempFilePath || 'N/A'}`);
3685
- if (truncateResult.error) {
3686
- console.log(`[DEBUG] Truncation file error: ${truncateResult.error}`);
3687
- }
3688
- }
3689
- }
3690
- } catch (truncateError) {
3691
- // If truncation fails entirely, log and continue with original content
3692
- console.error(`[WARN] Tool output truncation failed: ${truncateError.message}`);
3693
- }
3694
-
3695
- // Record MCP tool end event (success)
3696
- const mcpDurationMs = Date.now() - mcpStartTime;
3697
- this._recordMcpToolTelemetry('end', toolName, null, currentIteration, {
3698
- result: toolResultContent,
3699
- success: true,
3700
- durationMs: mcpDurationMs,
3701
- error: null
3702
- });
3703
-
3704
- // Log MCP tool result in debug mode
3705
- if (this.debug) {
3706
- const preview = toolResultContent.length > 500 ? toolResultContent.substring(0, 500) + '...' : toolResultContent;
3707
- console.error(`[DEBUG] ========================================`);
3708
- console.error(`[DEBUG] MCP tool '${toolName}' completed successfully`);
3709
- console.error(`[DEBUG] Result preview:`);
3710
- console.error(preview);
3711
- console.error(`[DEBUG] ========================================\n`);
3712
- }
3713
-
3714
- // Add assistant message with tool call (matching native tool pattern)
3715
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3716
- currentMessages.push({ role: 'user', content: `<tool_result>\n${toolResultContent}\n</tool_result>` });
3717
- } catch (error) {
3718
- // Record MCP tool end event (failure)
3719
- const mcpDurationMs = Date.now() - mcpStartTime;
3720
- this._recordMcpToolTelemetry('end', toolName, null, currentIteration, {
3721
- result: null,
3722
- success: false,
3723
- durationMs: mcpDurationMs,
3724
- error: error.message
3725
- });
3726
-
3727
- console.error(`Error executing MCP tool ${toolName}:`, error);
3728
-
3729
- // Log MCP tool error in debug mode
3730
- if (this.debug) {
3731
- console.error(`[DEBUG] ========================================`);
3732
- console.error(`[DEBUG] MCP tool '${toolName}' failed with error:`);
3733
- console.error(`[DEBUG] ${error.message}`);
3734
- console.error(`[DEBUG] ========================================\n`);
3735
- }
3736
-
3737
- // Format error with structured information for AI
3738
- const errorXml = formatErrorForAI(error);
3739
- // Add assistant message with tool call (matching native tool pattern)
3740
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3741
- currentMessages.push({ role: 'user', content: `<tool_result>\n${errorXml}\n</tool_result>` });
3742
- }
3743
- } else if (this.toolImplementations[toolName]) {
3744
- // Execute native tool
3745
- try {
3746
- // Add sessionId and workingDirectory to params for tool execution
3747
- // Validate and resolve workingDirectory using safeRealpath for symlink security
3748
- // Consistent fallback chain: workspaceRoot > cwd > allowedFolders[0] > process.cwd()
3749
- let resolvedWorkingDirectory = this.workspaceRoot || this.cwd || (this.allowedFolders && this.allowedFolders[0]) || process.cwd();
3750
- if (params.workingDirectory) {
3751
- // Resolve relative paths against the current working directory context, not process.cwd()
3752
- // Use safeRealpath to resolve symlinks and prevent bypass attacks
3753
- const requestedDir = safeRealpath(isAbsolute(params.workingDirectory)
3754
- ? resolve(params.workingDirectory)
3755
- : resolve(resolvedWorkingDirectory, params.workingDirectory));
3756
- // Check if the requested directory is within allowed folders
3757
- const isWithinAllowed = !this.allowedFolders || this.allowedFolders.length === 0 ||
3758
- this.allowedFolders.some(folder => {
3759
- const resolvedFolder = safeRealpath(folder);
3760
- return requestedDir === resolvedFolder || requestedDir.startsWith(resolvedFolder + sep);
3761
- });
3762
- if (isWithinAllowed) {
3763
- resolvedWorkingDirectory = requestedDir;
3764
- } else if (this.debug) {
3765
- console.error(`[DEBUG] Rejected workingDirectory "${params.workingDirectory}" - not within allowed folders`);
3766
- }
3767
- }
3768
- const toolParams = {
3769
- ...params,
3770
- sessionId: this.sessionId,
3771
- workingDirectory: resolvedWorkingDirectory
3772
- };
3773
-
3774
- // Log tool execution in debug mode
3775
- if (this.debug) {
3776
- console.error(`\n[DEBUG] ========================================`);
3777
- console.error(`[DEBUG] Executing tool: ${toolName}`);
3778
- console.error(`[DEBUG] Arguments:`);
3779
- for (const [key, value] of Object.entries(params)) {
3780
- const displayValue = typeof value === 'string' && value.length > 100
3781
- ? value.substring(0, 100) + '...'
3782
- : value;
3783
- console.error(`[DEBUG] ${key}: ${JSON.stringify(displayValue)}`);
3784
- }
3785
- console.error(`[DEBUG] ========================================\n`);
3786
- }
3787
-
3788
- // Emit tool start event with stream pause signal
3789
- this.events.emit('toolCall', {
3790
- timestamp: new Date().toISOString(),
3791
- name: toolName,
3792
- args: toolParams,
3793
- status: 'started',
3794
- pauseStream: true // Signal to pause text streaming
3795
- });
3796
-
3797
- // Execute tool with tracing if available
3798
- const executeToolCall = async () => {
3799
- // For delegate tool, pass current iteration, max iterations, session ID, and config
3800
- if (toolName === 'delegate') {
3801
- // Reconstruct allowedTools array preserving all modes (same logic as clone())
3802
- let allowedToolsForDelegate = null;
3803
- if (this.allowedTools.mode === 'whitelist') {
3804
- allowedToolsForDelegate = [...this.allowedTools.allowed];
3805
- } else if (this.allowedTools.mode === 'none') {
3806
- allowedToolsForDelegate = [];
3807
- } else if (this.allowedTools.mode === 'all' && this.allowedTools.exclusions?.length > 0) {
3808
- allowedToolsForDelegate = ['*', ...this.allowedTools.exclusions.map(t => '!' + t)];
3809
- }
3810
- // If mode is 'all' with no exclusions, leave as null (default)
3811
-
3812
- const enhancedParams = {
3813
- ...toolParams,
3814
- currentIteration,
3815
- maxIterations,
3816
- parentSessionId: this.sessionId, // Pass parent session ID for tracking
3817
- path: this.searchPath, // Inherit search path
3818
- provider: this.apiType, // Inherit AI provider (string identifier)
3819
- model: this.model, // Inherit model
3820
- searchDelegate: this.searchDelegate,
3821
- enableTasks: this.enableTasks, // Inherit task management (subagent gets isolated TaskManager)
3822
- enableMcp: !!this.mcpBridge, // Inherit MCP enablement
3823
- mcpConfig: this.mcpConfig, // Inherit MCP configuration
3824
- mcpConfigPath: this.mcpConfigPath, // Inherit MCP config path
3825
- enableBash: this.enableBash, // Inherit bash enablement
3826
- bashConfig: this.bashConfig, // Inherit bash configuration
3827
- allowEdit: this.allowEdit, // Inherit edit/create permission
3828
- allowedTools: allowedToolsForDelegate, // Inherit allowed tools from parent
3829
- debug: this.debug,
3830
- tracer: this.tracer
3831
- };
3832
-
3833
- if (this.debug) {
3834
- console.log(`[DEBUG] Executing delegate tool at iteration ${currentIteration}/${maxIterations}`);
3835
- console.log(`[DEBUG] Parent session: ${this.sessionId}`);
3836
- console.log(`[DEBUG] Inherited config: path=${this.searchPath}, provider=${this.apiType}, model=${this.model}`);
3837
- console.log(`[DEBUG] Delegate task: ${toolParams.task?.substring(0, 100)}...`);
3838
- }
3839
-
3840
- // Record delegation start in telemetry
3841
- if (this.tracer) {
3842
- this.tracer.recordDelegationEvent('tool_started', {
3843
- 'delegation.iteration': currentIteration,
3844
- 'delegation.max_iterations': maxIterations,
3845
- 'delegation.task_preview': toolParams.task?.substring(0, 200) + (toolParams.task?.length > 200 ? '...' : '')
3846
- });
3847
- }
3848
-
3849
- return await this.toolImplementations[toolName].execute(enhancedParams);
3850
- }
3851
- return await this.toolImplementations[toolName].execute(toolParams);
3852
- };
3853
-
3854
- let toolResult;
3855
- const toolStartTime = Date.now();
3856
- try {
3857
- if (this.tracer) {
3858
- toolResult = await this.tracer.withSpan('tool.call', executeToolCall, {
3859
- 'tool.name': toolName,
3860
- 'tool.params': JSON.stringify(toolParams).substring(0, 500),
3861
- 'iteration': currentIteration
3862
- });
3863
- } else {
3864
- toolResult = await executeToolCall();
3865
- }
3866
-
3867
- // Record tool result in telemetry
3868
- const toolDurationMs = Date.now() - toolStartTime;
3869
- this._recordToolResultTelemetry(toolName, toolResult, true, toolDurationMs, currentIteration);
3870
-
3871
- // Log tool result in debug mode
3872
- if (this.debug) {
3873
- const resultPreview = typeof toolResult === 'string'
3874
- ? (toolResult.length > 500 ? toolResult.substring(0, 500) + '...' : toolResult)
3875
- : (toolResult ? JSON.stringify(toolResult, null, 2).substring(0, 500) + '...' : 'No Result');
3876
- console.error(`[DEBUG] ========================================`);
3877
- console.error(`[DEBUG] Tool '${toolName}' completed successfully`);
3878
- console.error(`[DEBUG] Result preview:`);
3879
- console.error(resultPreview);
3880
- console.error(`[DEBUG] ========================================\n`);
3881
- }
3882
-
3883
- // Emit tool success event
3884
- this.events.emit('toolCall', {
3885
- timestamp: new Date().toISOString(),
3886
- name: toolName,
3887
- args: toolParams,
3888
- resultPreview: typeof toolResult === 'string'
3889
- ? (toolResult.length > 200 ? toolResult.substring(0, 200) + '...' : toolResult)
3890
- : (toolResult ? JSON.stringify(toolResult).substring(0, 200) + '...' : 'No Result'),
3891
- status: 'completed'
3892
- });
3893
-
3894
- } catch (toolError) {
3895
- // Log tool error in debug mode
3896
- if (this.debug) {
3897
- console.error(`[DEBUG] ========================================`);
3898
- console.error(`[DEBUG] Tool '${toolName}' failed with error:`);
3899
- console.error(`[DEBUG] ${toolError.message}`);
3900
- console.error(`[DEBUG] ========================================\n`);
3901
- }
3416
+ };
3902
3417
 
3903
- // Emit tool error event
3904
- this.events.emit('toolCall', {
3905
- timestamp: new Date().toISOString(),
3906
- name: toolName,
3907
- args: toolParams,
3908
- error: toolError.message || 'Unknown error',
3909
- status: 'error'
3910
- });
3911
- throw toolError; // Re-throw to be handled by outer catch
3912
- }
3913
-
3914
- // Add assistant response and tool result to conversation
3915
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3916
-
3917
- let toolResultContent = typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult, null, 2);
3918
-
3919
- // Convert absolute workspace paths to relative in tool results
3920
- if (this.workspaceRoot && toolResultContent) {
3921
- const wsPrefix = this.workspaceRoot.endsWith(sep) ? this.workspaceRoot : this.workspaceRoot + sep;
3922
- toolResultContent = toolResultContent.split(wsPrefix).join('');
3923
- }
3924
-
3925
- // Extract raw output blocks from tool result (before truncation)
3926
- // This prevents LLM from processing/hallucinating large structured output from execute_plan
3927
- // Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
3928
- // formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
3929
- const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
3930
- if (extractedBlocks.length > 0) {
3931
- toolResultContent = cleanedContent;
3932
- // Accumulate extracted blocks separately from DSL output() buffer
3933
- this._extractedRawBlocks.push(...extractedBlocks);
3934
- if (this.debug) {
3935
- console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
3936
- }
3937
- }
3938
-
3939
- // Truncate if output exceeds token limit
3940
- try {
3941
- const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
3942
- if (truncateResult.truncated) {
3943
- toolResultContent = truncateResult.content;
3944
- if (this.debug) {
3945
- console.log(`[DEBUG] Tool output truncated: ${truncateResult.originalTokens} tokens -> saved to ${truncateResult.tempFilePath || 'N/A'}`);
3946
- if (truncateResult.error) {
3947
- console.log(`[DEBUG] Truncation file error: ${truncateResult.error}`);
3948
- }
3949
- }
3950
- }
3951
- } catch (truncateError) {
3952
- // If truncation fails entirely, log and continue with original content
3953
- console.error(`[WARN] Tool output truncation failed: ${truncateError.message}`);
3954
- }
3955
-
3956
- const toolResultMessage = `<tool_result>\n${toolResultContent}\n</tool_result>`;
3957
-
3958
- currentMessages.push({
3959
- role: 'user',
3960
- content: toolResultMessage
3961
- });
3418
+ // Add native thinking/reasoning providerOptions when thinkingEffort is set
3419
+ const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
3420
+ if (providerOpts) {
3421
+ streamOptions.providerOptions = providerOpts;
3422
+ }
3962
3423
 
3963
- // Record conversation turns in telemetry
3964
- if (this.tracer) {
3965
- if (typeof this.tracer.recordConversationTurn === 'function') {
3966
- this.tracer.recordConversationTurn('assistant', assistantResponseContent, {
3967
- iteration: currentIteration,
3968
- has_tool_call: true,
3969
- tool_name: toolName
3970
- });
3971
- this.tracer.recordConversationTurn('tool_result', toolResultContent, {
3972
- iteration: currentIteration,
3973
- tool_name: toolName,
3974
- tool_success: true
3975
- });
3976
- }
3977
- }
3424
+ const executeAIRequest = async () => {
3425
+ const result = await this.streamTextWithRetryAndFallback(streamOptions);
3978
3426
 
3979
- // NOTE: Automatic image processing removed (GitHub issue #305)
3980
- // Images are now only loaded when the AI explicitly calls the readImage tool
3981
- // This prevents: 1) implicit behavior that users don't expect
3982
- // 2) crashes with unsupported MIME types (e.g., SVG on Gemini)
3427
+ // Collect the final text
3428
+ const finalText = await result.text;
3983
3429
 
3984
- if (this.debug) {
3985
- console.log(`[DEBUG] Tool ${toolName} executed successfully. Result length: ${typeof toolResult === 'string' ? toolResult.length : JSON.stringify(toolResult).length}`);
3986
- }
3987
- } catch (error) {
3988
- console.error(`[ERROR] Tool execution failed for ${toolName}:`, error);
3989
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3990
- // Format error with structured information for AI
3991
- const errorXml = formatErrorForAI(error);
3992
- currentMessages.push({
3993
- role: 'user',
3994
- content: `<tool_result>\n${errorXml}\n</tool_result>`
3995
- });
3996
- }
3997
- } else {
3998
- console.error(`[ERROR] Unknown tool: ${toolName}`);
3999
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
4000
-
4001
- // Build list of available tools including MCP tools
4002
- const nativeTools = Object.keys(this.toolImplementations);
4003
- const mcpTools = this.mcpBridge ? this.mcpBridge.getToolNames() : [];
4004
- const allAvailableTools = [...nativeTools, ...mcpTools];
4005
-
4006
- currentMessages.push({
4007
- role: 'user',
4008
- content: `<tool_result>\n<error type="parameter_error" recoverable="true">\n<message>Unknown tool '${toolName}'</message>\n<suggestion>Available tools: ${allAvailableTools.join(', ')}. Please use one of these tools.</suggestion>\n</error>\n</tool_result>`
4009
- });
4010
- }
4011
- }
4012
- } else {
4013
- // No tool call found
4014
- // Special case: If response contains a mermaid code block and no schema was provided,
4015
- // treat it as a valid completion (for mermaid diagram fixing workflow)
4016
- const hasMermaidCodeBlock = /```mermaid\s*\n[\s\S]*?\n```/.test(assistantResponseContent);
4017
- const hasNoSchemaOrTools = !options.schema && validTools.length === 0;
4018
-
4019
- if (hasMermaidCodeBlock && hasNoSchemaOrTools) {
4020
- // Accept mermaid code block as final answer for diagram fixing
4021
- finalResult = assistantResponseContent;
4022
- completionAttempted = true;
4023
3430
  if (this.debug) {
4024
- console.error(`[DEBUG] Accepting mermaid code block as valid completion (no schema, no tools)`);
3431
+ const steps = await result.steps;
3432
+ console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars, completion=${!!completionResult}`);
4025
3433
  }
4026
- break;
4027
- }
4028
3434
 
4029
- // Issue #443: Check if response contains valid schema-matching JSON
4030
- // Before triggering error.no_tool_call, strip markdown fences and validate
4031
- // This handles cases where AI returns valid JSON without using attempt_completion
4032
- if (options.schema) {
4033
- // Remove thinking tags first
4034
- let contentToCheck = assistantResponseContent;
4035
- contentToCheck = contentToCheck.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
4036
- contentToCheck = contentToCheck.replace(/<thinking>[\s\S]*$/gi, '').trim();
4037
-
4038
- // Try to extract and validate JSON
4039
- const cleanedJson = cleanSchemaResponse(contentToCheck);
4040
- try {
4041
- JSON.parse(cleanedJson);
4042
- const validation = validateJsonResponse(cleanedJson, { debug: this.debug, schema: options.schema });
4043
- if (validation.isValid) {
4044
- if (this.debug) {
4045
- console.log(`[DEBUG] Issue #443: Accepting valid JSON response without attempt_completion (${cleanedJson.length} chars)`);
4046
- }
4047
- finalResult = cleanedJson;
4048
- completionAttempted = true;
4049
- break;
4050
- }
4051
- } catch {
4052
- // Not valid JSON - continue to standard no_tool_call handling
3435
+ // Record final token usage
3436
+ const usage = await result.usage;
3437
+ if (usage) {
3438
+ this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
4053
3439
  }
4054
- }
4055
-
4056
- // Increment consecutive no-tool counter (catches alternating stuck responses)
4057
- consecutiveNoToolCount++;
4058
3440
 
4059
- // Check for repeated identical responses OR semantically similar "stuck" responses
4060
- // This catches cases where AI alternates between slightly different "I cannot proceed" messages
4061
- const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
4062
- const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
3441
+ return { finalText, result };
3442
+ };
4063
3443
 
4064
- if (isIdentical || isSemanticallyStuck) {
4065
- sameResponseCount++;
4066
- if (this.debug && isSemanticallyStuck && !isIdentical) {
4067
- console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
4068
- }
4069
- if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
4070
- // Clean up the response - remove thinking tags
4071
- let cleanedResponse = assistantResponseContent;
4072
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
4073
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
4074
-
4075
- const hasSubstantialContent = cleanedResponse.length > 50 &&
4076
- !cleanedResponse.includes('<api_call>') &&
4077
- !cleanedResponse.includes('<tool_name>') &&
4078
- !cleanedResponse.includes('<function>');
4079
-
4080
- if (hasSubstantialContent) {
4081
- if (this.debug) {
4082
- console.log(`[DEBUG] ${isIdentical ? 'Same' : 'Stuck'} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
4083
- }
4084
- finalResult = cleanedResponse;
4085
- completionAttempted = true;
4086
- break;
4087
- }
4088
- }
3444
+ let aiResult;
3445
+ if (this.tracer) {
3446
+ const inputPreview = message.length > 1000
3447
+ ? message.substring(0, 1000) + '... [truncated]'
3448
+ : message;
3449
+
3450
+ aiResult = await this.tracer.withSpan('ai.request', executeAIRequest, {
3451
+ 'ai.model': this.model,
3452
+ 'ai.provider': this.clientApiProvider || 'auto',
3453
+ 'ai.input': inputPreview,
3454
+ 'ai.input_length': message.length,
3455
+ 'max_steps': maxIterations,
3456
+ 'max_tokens': maxResponseTokens,
3457
+ 'temperature': 0.3,
3458
+ 'message_count': currentMessages.length
3459
+ });
4089
3460
  } else {
4090
- // Different response (and not both stuck), reset counter
4091
- lastNoToolResponse = assistantResponseContent;
4092
- sameResponseCount = 1;
3461
+ aiResult = await executeAIRequest();
4093
3462
  }
4094
3463
 
4095
- // Circuit breaker: If we've had MAX_CONSECUTIVE_NO_TOOL iterations without any tool call,
4096
- // force completion to avoid infinite loops (e.g., agent alternating between "can't proceed" variations)
4097
- if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
4098
- let cleanedResponse = assistantResponseContent;
4099
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
4100
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
3464
+ // Use completion result if available, otherwise use final text
3465
+ if (completionResult) {
3466
+ finalResult = completionResult;
4101
3467
 
4102
- if (cleanedResponse.length > 50) {
4103
- if (this.debug) {
4104
- console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
3468
+ // Stream the final result if callback is provided
3469
+ if (options.onStream && finalResult) {
3470
+ const chunkSize = 50;
3471
+ for (let i = 0; i < finalResult.length; i += chunkSize) {
3472
+ const chunk = finalResult.slice(i, Math.min(i + chunkSize, finalResult.length));
3473
+ options.onStream(chunk);
4105
3474
  }
4106
- // Record this in telemetry
4107
- this._recordErrorTelemetry('consecutive_no_tool_circuit_breaker', `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
4108
- finalResult = cleanedResponse;
4109
- completionAttempted = true;
4110
- break;
4111
3475
  }
3476
+ } else if (aiResult.finalText) {
3477
+ finalResult = aiResult.finalText;
3478
+ completionAttempted = true;
4112
3479
  }
4113
3480
 
4114
- // Add assistant response and ask for tool usage
4115
- currentMessages.push({ role: 'assistant', content: assistantResponseContent });
4116
-
4117
- // Check if the AI tried to use a tool that's not in the valid tools list
4118
- const unrecognizedTool = detectUnrecognizedToolCall(assistantResponseContent, validTools);
4119
-
4120
- let reminderContent;
4121
- if (isWrappedToolError(unrecognizedTool)) {
4122
- // AI wrapped a valid tool name in arbitrary XML tags - provide clear format error
4123
- const wrappedToolName = extractWrappedToolName(unrecognizedTool);
4124
- if (this.debug) {
4125
- console.log(`[DEBUG] Detected wrapped tool '${wrappedToolName}' in assistant response - wrong XML format.`);
4126
- }
4127
-
4128
- // Record wrapped tool error in telemetry
4129
- this._recordErrorTelemetry('wrapped_tool', 'Tool call wrapped in markdown', { toolName: wrappedToolName }, currentIteration);
4130
-
4131
- const toolError = new ParameterError(
4132
- `Tool '${wrappedToolName}' found but in WRONG FORMAT - do not wrap tools in other XML tags.`,
4133
- {
4134
- suggestion: `Use the tool tag DIRECTLY without any wrapper:
4135
-
4136
- CORRECT FORMAT:
4137
- <${wrappedToolName}>
4138
- <param>value</param>
4139
- </${wrappedToolName}>
4140
-
4141
- WRONG (what you did - do not wrap in other tags):
4142
- <api_call><tool_name>${wrappedToolName}</tool_name>...</api_call>
4143
- <function>${wrappedToolName}</function>
4144
- <call name="${wrappedToolName}">...</call>
4145
-
4146
- Remove ALL wrapper tags and use <${wrappedToolName}> directly as the outermost tag.`
4147
- }
4148
- );
4149
- reminderContent = `<tool_result>\n${formatErrorForAI(toolError)}\n</tool_result>`;
4150
- } else if (unrecognizedTool) {
4151
- // AI tried to use a tool that's not available - provide clear error
4152
- if (this.debug) {
4153
- console.log(`[DEBUG] Detected unrecognized tool '${unrecognizedTool}' in assistant response.`);
3481
+ // Update currentMessages from the result for history storage
3482
+ // The SDK manages the full message history internally
3483
+ const resultMessages = await aiResult.result.response?.messages;
3484
+ if (resultMessages) {
3485
+ // Append the AI-generated messages to our message list
3486
+ for (const msg of resultMessages) {
3487
+ currentMessages.push(msg);
4154
3488
  }
3489
+ }
4155
3490
 
4156
- // Record unrecognized tool error in telemetry
4157
- this._recordErrorTelemetry('unrecognized_tool', `Unknown tool: ${unrecognizedTool}`, { toolName: unrecognizedTool, validTools }, currentIteration);
3491
+ break; // Success
4158
3492
 
4159
- const toolError = new ParameterError(`Tool '${unrecognizedTool}' is not available in this context.`, {
4160
- suggestion: `Available tools: ${validTools.join(', ')}. Please use one of these tools instead.`
3493
+ } catch (error) {
3494
+ // Handle context-limit error: compact messages and retry (once)
3495
+ if (!compactionAttempted && handleContextLimitError) {
3496
+ const compactionResult = handleContextLimitError(error, currentMessages, {
3497
+ keepLastSegment: true,
3498
+ minSegmentsToKeep: 1
4161
3499
  });
4162
- reminderContent = `<tool_result>\n${formatErrorForAI(toolError)}\n</tool_result>`;
4163
- } else {
4164
- // No tool call detected at all - record in telemetry
4165
- this._recordErrorTelemetry('no_tool_call', 'AI response did not contain tool call', { responsePreview: assistantResponseContent.substring(0, 500) }, currentIteration);
4166
-
4167
- // Check if this is the last iteration
4168
- // On the last iteration, if the AI gave a substantive response without using
4169
- // attempt_completion, accept it as the final answer rather than losing the content
4170
- if (currentIteration >= maxIterations) {
4171
- // Clean up the response - remove thinking tags
4172
- let cleanedResponse = assistantResponseContent;
4173
- // Remove <thinking>...</thinking> blocks
4174
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
4175
- // Also remove unclosed thinking tags
4176
- cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
4177
-
4178
- // Only use if there's substantial content (not just a failed tool call attempt)
4179
- const hasSubstantialContent = cleanedResponse.length > 50 &&
4180
- !cleanedResponse.includes('<api_call>') &&
4181
- !cleanedResponse.includes('<tool_name>') &&
4182
- !cleanedResponse.includes('<function>');
4183
-
4184
- if (hasSubstantialContent) {
4185
- if (this.debug) {
4186
- console.log(`[DEBUG] Max iterations reached - accepting AI response as final answer (${cleanedResponse.length} chars)`);
4187
- }
4188
- finalResult = cleanedResponse;
4189
- completionAttempted = true;
4190
- break;
4191
- }
4192
- }
4193
-
4194
- // Standard reminder - no tool call detected at all
4195
- reminderContent = `Please use one of the available tools to help answer the question, or use attempt_completion if you have enough information to provide a final answer.
4196
3500
 
4197
- Remember: Use proper XML format with BOTH opening and closing tags:
3501
+ if (compactionResult) {
3502
+ const { messages: compactedMessages, stats } = compactionResult;
4198
3503
 
4199
- <tool_name>
4200
- <parameter>value</parameter>
4201
- </tool_name>
4202
-
4203
- Available tools: ${validTools.join(', ')}
4204
-
4205
- To complete with a direct answer:
4206
- <attempt_completion>Your final answer here</attempt_completion>
3504
+ if (stats.removed === 0) {
3505
+ console.error(`[ERROR] Context window exceeded but no messages can be compacted.`);
3506
+ finalResult = `Error: Context window limit exceeded and conversation cannot be compacted further.`;
3507
+ throw new Error(finalResult);
3508
+ }
4207
3509
 
4208
- Or if your previous response already contains a complete, direct answer (not a thinking block or JSON):
4209
- <attempt_complete></attempt_complete>
3510
+ compactionAttempted = true;
3511
+ console.log(`[INFO] Context window limit exceeded. Compacting conversation...`);
3512
+ console.log(`[INFO] Removed ${stats.removed} messages (${stats.reductionPercent}% reduction)`);
4210
3513
 
4211
- Note: <attempt_complete></attempt_complete> reuses your PREVIOUS assistant message as the final answer. Only use this if that message was already a valid, complete response to the user's question.`;
4212
- }
3514
+ currentMessages = [...compactedMessages];
4213
3515
 
4214
- // Check if we should replace the previous reminder instead of appending
4215
- // After pushing assistant message, the previous user message (if a reminder) is at length - 2
4216
- // Message pattern: [..., prev_assistant, prev_user_reminder, current_assistant]
4217
- const prevUserMsgIndex = currentMessages.length - 2;
4218
- const prevUserMsg = currentMessages[prevUserMsgIndex];
4219
- const isExistingReminder = prevUserMsg && prevUserMsg.role === 'user' &&
4220
- (prevUserMsg.content.includes('Please use one of the available tools') ||
4221
- prevUserMsg.content.includes('<tool_result>'));
4222
-
4223
- if (isExistingReminder && sameResponseCount > 1) {
4224
- // Replace the previous reminder with updated content and remove duplicated assistant message
4225
- // This prevents context bloat from repeated identical exchanges
4226
- // Pattern: [..., prev_assistant, prev_user_reminder, current_assistant] -> [..., current_assistant, new_reminder]
4227
- const prevAssistantIndex = prevUserMsgIndex - 1;
4228
-
4229
- // Validate the expected pattern before splicing:
4230
- // 1. prevAssistantIndex must be valid (>= 0)
4231
- // 2. If there's a system message at index 0, don't remove it (prevAssistantIndex > 0)
4232
- // 3. Must be an assistant message at prevAssistantIndex
4233
- // 4. After removal, array should have at least 2 messages (current assistant + new reminder)
4234
- const hasSystemMessage = currentMessages.length > 0 && currentMessages[0].role === 'system';
4235
- const minValidIndex = hasSystemMessage ? 1 : 0;
4236
- const canSafelyRemove = prevAssistantIndex >= minValidIndex &&
4237
- currentMessages[prevAssistantIndex] &&
4238
- currentMessages[prevAssistantIndex].role === 'assistant' &&
4239
- (currentMessages.length - 2) >= (hasSystemMessage ? 2 : 1); // After removal: at least system+assistant or just assistant
4240
-
4241
- if (canSafelyRemove) {
4242
- // Remove the duplicate assistant and old reminder (2 messages starting at prevAssistantIndex)
4243
- currentMessages.splice(prevAssistantIndex, 2);
4244
- if (this.debug) {
4245
- console.log(`[DEBUG] Removed duplicate assistant+reminder pair (iteration ${currentIteration}, same response #${sameResponseCount})`);
3516
+ if (this.tracer) {
3517
+ this.tracer.addEvent('context.compacted', {
3518
+ 'original_count': stats.originalCount,
3519
+ 'compacted_count': stats.compactedCount,
3520
+ 'reduction_percent': stats.reductionPercent,
3521
+ 'tokens_saved': stats.tokensSaved
3522
+ });
4246
3523
  }
4247
- } else if (this.debug) {
4248
- console.log(`[DEBUG] Skipped deduplication: pattern validation failed (prevAssistantIndex=${prevAssistantIndex}, arrayLength=${currentMessages.length})`);
4249
- }
4250
3524
 
4251
- // Add iteration context to help the AI understand this is a repeated attempt
4252
- const iterationHint = `\n\n(Attempt #${sameResponseCount}: Your previous ${sameResponseCount} responses were identical. If you have a complete answer, use <attempt_complete></attempt_complete> to finalize it.)`;
4253
- currentMessages.push({
4254
- role: 'user',
4255
- content: reminderContent + iterationHint
4256
- });
4257
- } else {
4258
- currentMessages.push({
4259
- role: 'user',
4260
- content: reminderContent
4261
- });
4262
- }
4263
-
4264
- if (this.debug) {
4265
- if (unrecognizedTool) {
4266
- console.log(`[DEBUG] Unrecognized tool '${unrecognizedTool}' used. Providing error feedback.`);
4267
- } else {
4268
- console.log(`[DEBUG] No tool call detected in assistant response. Prompting for tool use.`);
3525
+ continue; // Retry with compacted messages
4269
3526
  }
4270
3527
  }
4271
3528
 
4272
- // Circuit breaker: track repeated format errors and break early
4273
- // For wrapped_tool errors, track them as a category (any wrapped_tool counts)
4274
- // For other errors, track the exact error type
4275
- if (unrecognizedTool) {
4276
- const isWrapped = isWrappedToolError(unrecognizedTool);
4277
- const errorCategory = isWrapped ? 'wrapped_tool' : unrecognizedTool;
4278
-
4279
- if (errorCategory === lastFormatErrorType) {
4280
- sameFormatErrorCount++;
4281
- if (sameFormatErrorCount >= MAX_REPEATED_FORMAT_ERRORS) {
4282
- const errorDesc = isWrapped ? 'wrapped tool format' : unrecognizedTool;
4283
-
4284
- // Record circuit breaker error in telemetry
4285
- this._recordErrorTelemetry('circuit_breaker', 'Format error limit exceeded', { formatErrorCount: sameFormatErrorCount, errorCategory }, currentIteration);
4286
-
4287
- console.error(`[ERROR] Format error category '${errorCategory}' repeated ${sameFormatErrorCount} times. Breaking loop early to prevent infinite iteration.`);
4288
- finalResult = `Error: Unable to complete request. The AI model repeatedly used incorrect tool call format (${errorDesc}). Please try rephrasing your question or using a different model.`;
4289
- break;
4290
- }
4291
- } else {
4292
- lastFormatErrorType = errorCategory;
4293
- sameFormatErrorCount = 1;
4294
- }
4295
- } else {
4296
- // Reset counter if it's a different kind of "no tool call" situation
4297
- lastFormatErrorType = null;
4298
- sameFormatErrorCount = 0;
3529
+ // Handle AbortError from attempt_completion gracefully
3530
+ if (completionResult) {
3531
+ finalResult = completionResult;
3532
+ break;
4299
3533
  }
4300
- }
4301
3534
 
4302
- // Record iteration end event
4303
- this._recordIterationTelemetry('end', currentIteration, {
4304
- 'iteration.completed': completionAttempted,
4305
- 'iteration.message_count': currentMessages.length
4306
- });
4307
-
4308
- // Keep message history manageable
4309
- if (currentMessages.length > MAX_HISTORY_MESSAGES) {
4310
- const messagesBefore = currentMessages.length;
4311
- const systemMsg = currentMessages[0]; // Keep system message
4312
- const recentMessages = currentMessages.slice(-MAX_HISTORY_MESSAGES + 1);
4313
- currentMessages = [systemMsg, ...recentMessages];
4314
-
4315
- if (this.debug) {
4316
- console.log(`[DEBUG] Trimmed message history from ${messagesBefore} to ${currentMessages.length} messages`);
4317
- }
3535
+ console.error(`Error during streamText:`, error);
3536
+ finalResult = `Error: Failed to get response from AI model. ${error.message}`;
3537
+ throw new Error(finalResult);
4318
3538
  }
4319
3539
  }
4320
3540
 
@@ -4853,28 +4073,7 @@ Convert your previous response content into actual JSON data that follows this s
4853
4073
  console.log(`[DEBUG] Mermaid validation: Skipped final validation due to disableMermaidValidation option`);
4854
4074
  }
4855
4075
 
4856
- // Remove thinking tags from final result before returning to user
4857
- // Skip for valid JSON to avoid destroying JSON structure when <thinking> appears
4858
- // inside string values (e.g., after tryAutoWrapForSimpleSchema embeds content with
4859
- // residual thinking tag fragments — issue #439)
4860
- if (!options._schemaFormatted) {
4861
- let isValidJson = false;
4862
- try {
4863
- JSON.parse(finalResult);
4864
- isValidJson = true;
4865
- } catch {
4866
- // Not valid JSON, proceed with thinking tag removal
4867
- }
4868
4076
 
4869
- if (!isValidJson) {
4870
- finalResult = removeThinkingTags(finalResult);
4871
- if (this.debug) {
4872
- console.log(`[DEBUG] Removed thinking tags from final result`);
4873
- }
4874
- } else if (this.debug) {
4875
- console.log(`[DEBUG] Skipped thinking tag removal for valid JSON result (issue #439)`);
4876
- }
4877
- }
4878
4077
 
4879
4078
  // Append DSL output buffer directly to response (bypasses LLM rewriting)
4880
4079
  // Skip during _completionPromptProcessed — only the parent answer() should append the buffer.