cowork-os 0.3.21 → 0.3.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/README.md +372 -10
  2. package/connectors/README.md +20 -0
  3. package/connectors/asana-mcp/README.md +24 -0
  4. package/connectors/asana-mcp/dist/index.js +427 -0
  5. package/connectors/asana-mcp/package.json +15 -0
  6. package/connectors/asana-mcp/src/index.ts +553 -0
  7. package/connectors/asana-mcp/tsconfig.json +13 -0
  8. package/connectors/hubspot-mcp/README.md +35 -0
  9. package/connectors/hubspot-mcp/dist/index.js +454 -0
  10. package/connectors/hubspot-mcp/package.json +15 -0
  11. package/connectors/hubspot-mcp/src/index.ts +562 -0
  12. package/connectors/hubspot-mcp/tsconfig.json +13 -0
  13. package/connectors/jira-mcp/README.md +49 -0
  14. package/connectors/jira-mcp/dist/index.js +588 -0
  15. package/connectors/jira-mcp/package.json +15 -0
  16. package/connectors/jira-mcp/src/index.ts +711 -0
  17. package/connectors/jira-mcp/tsconfig.json +13 -0
  18. package/connectors/linear-mcp/README.md +22 -0
  19. package/connectors/linear-mcp/dist/index.js +402 -0
  20. package/connectors/linear-mcp/package.json +15 -0
  21. package/connectors/linear-mcp/src/index.ts +522 -0
  22. package/connectors/linear-mcp/tsconfig.json +13 -0
  23. package/connectors/okta-mcp/README.md +24 -0
  24. package/connectors/okta-mcp/dist/index.js +411 -0
  25. package/connectors/okta-mcp/package.json +15 -0
  26. package/connectors/okta-mcp/src/index.ts +520 -0
  27. package/connectors/okta-mcp/tsconfig.json +13 -0
  28. package/connectors/salesforce-mcp/README.md +47 -0
  29. package/connectors/salesforce-mcp/dist/index.js +584 -0
  30. package/connectors/salesforce-mcp/package.json +15 -0
  31. package/connectors/salesforce-mcp/src/index.ts +722 -0
  32. package/connectors/salesforce-mcp/tsconfig.json +13 -0
  33. package/connectors/servicenow-mcp/README.md +26 -0
  34. package/connectors/servicenow-mcp/dist/index.js +400 -0
  35. package/connectors/servicenow-mcp/package.json +15 -0
  36. package/connectors/servicenow-mcp/src/index.ts +500 -0
  37. package/connectors/servicenow-mcp/tsconfig.json +13 -0
  38. package/connectors/templates/mcp-connector/README.md +31 -0
  39. package/connectors/templates/mcp-connector/package.json +15 -0
  40. package/connectors/templates/mcp-connector/src/index.ts +330 -0
  41. package/connectors/templates/mcp-connector/tsconfig.json +13 -0
  42. package/connectors/zendesk-mcp/README.md +40 -0
  43. package/connectors/zendesk-mcp/dist/index.js +431 -0
  44. package/connectors/zendesk-mcp/package.json +15 -0
  45. package/connectors/zendesk-mcp/src/index.ts +543 -0
  46. package/connectors/zendesk-mcp/tsconfig.json +13 -0
  47. package/dist/electron/electron/agent/custom-skill-loader.js +31 -1
  48. package/dist/electron/electron/agent/daemon.js +189 -13
  49. package/dist/electron/electron/agent/executor.js +895 -78
  50. package/dist/electron/electron/agent/llm/anthropic-compatible-provider.js +177 -0
  51. package/dist/electron/electron/agent/llm/azure-openai-provider.js +328 -0
  52. package/dist/electron/electron/agent/llm/bedrock-provider.js +49 -9
  53. package/dist/electron/electron/agent/llm/github-copilot-provider.js +97 -0
  54. package/dist/electron/electron/agent/llm/groq-provider.js +33 -0
  55. package/dist/electron/electron/agent/llm/index.js +13 -1
  56. package/dist/electron/electron/agent/llm/kimi-provider.js +33 -0
  57. package/dist/electron/electron/agent/llm/openai-compatible-provider.js +116 -0
  58. package/dist/electron/electron/agent/llm/openai-compatible.js +111 -0
  59. package/dist/electron/electron/agent/llm/openai-oauth.js +2 -1
  60. package/dist/electron/electron/agent/llm/openrouter-provider.js +1 -1
  61. package/dist/electron/electron/agent/llm/provider-factory.js +350 -4
  62. package/dist/electron/electron/agent/llm/types.js +66 -1
  63. package/dist/electron/electron/agent/llm/xai-provider.js +33 -0
  64. package/dist/electron/electron/agent/search/provider-factory.js +38 -2
  65. package/dist/electron/electron/agent/tools/box-tools.js +231 -0
  66. package/dist/electron/electron/agent/tools/builtin-settings.js +28 -0
  67. package/dist/electron/electron/agent/tools/dropbox-tools.js +237 -0
  68. package/dist/electron/electron/agent/tools/file-tools.js +66 -3
  69. package/dist/electron/electron/agent/tools/google-drive-tools.js +227 -0
  70. package/dist/electron/electron/agent/tools/grep-tools.js +90 -10
  71. package/dist/electron/electron/agent/tools/image-tools.js +11 -1
  72. package/dist/electron/electron/agent/tools/notion-tools.js +312 -0
  73. package/dist/electron/electron/agent/tools/onedrive-tools.js +217 -0
  74. package/dist/electron/electron/agent/tools/registry.js +548 -10
  75. package/dist/electron/electron/agent/tools/search-tools.js +28 -10
  76. package/dist/electron/electron/agent/tools/sharepoint-tools.js +243 -0
  77. package/dist/electron/electron/agent/tools/shell-tools.js +12 -3
  78. package/dist/electron/electron/agent/tools/x-tools.js +1 -1
  79. package/dist/electron/electron/agents/agent-dispatch.js +63 -0
  80. package/dist/electron/electron/database/repositories.js +19 -5
  81. package/dist/electron/electron/database/schema.js +8 -0
  82. package/dist/electron/electron/gateway/channels/whatsapp.js +55 -0
  83. package/dist/electron/electron/gateway/index.js +75 -1
  84. package/dist/electron/electron/gateway/router.js +209 -154
  85. package/dist/electron/electron/ipc/canvas-handlers.js +5 -0
  86. package/dist/electron/electron/ipc/handlers.js +763 -267
  87. package/dist/electron/electron/main.js +63 -0
  88. package/dist/electron/electron/mcp/oauth/connector-oauth.js +333 -0
  89. package/dist/electron/electron/mcp/registry/MCPRegistryManager.js +503 -154
  90. package/dist/electron/electron/memory/MemoryService.js +2 -1
  91. package/dist/electron/electron/preload.js +78 -1
  92. package/dist/electron/electron/settings/appearance-manager.js +18 -1
  93. package/dist/electron/electron/settings/box-manager.js +54 -0
  94. package/dist/electron/electron/settings/dropbox-manager.js +54 -0
  95. package/dist/electron/electron/settings/google-drive-manager.js +54 -0
  96. package/dist/electron/electron/settings/notion-manager.js +56 -0
  97. package/dist/electron/electron/settings/onedrive-manager.js +54 -0
  98. package/dist/electron/electron/settings/sharepoint-manager.js +54 -0
  99. package/dist/electron/electron/utils/box-api.js +153 -0
  100. package/dist/electron/electron/utils/dropbox-api.js +144 -0
  101. package/dist/electron/electron/utils/env-migration.js +19 -0
  102. package/dist/electron/electron/utils/google-drive-api.js +152 -0
  103. package/dist/electron/electron/utils/notion-api.js +103 -0
  104. package/dist/electron/electron/utils/onedrive-api.js +113 -0
  105. package/dist/electron/electron/utils/sharepoint-api.js +109 -0
  106. package/dist/electron/electron/utils/validation.js +98 -3
  107. package/dist/electron/electron/utils/x-cli.js +1 -1
  108. package/dist/electron/shared/channelMessages.js +284 -3
  109. package/dist/electron/shared/llm-provider-catalog.js +198 -0
  110. package/dist/electron/shared/types.js +90 -1
  111. package/package.json +14 -3
  112. package/resources/skills/nano-banana-pro.json +4 -4
  113. package/resources/skills/openai-image-gen.json +3 -3
  114. package/resources/skills/scripts/gen.py +163 -0
  115. package/resources/skills/scripts/generate_image.py +91 -0
  116. package/src/electron/agent/custom-skill-loader.ts +34 -1
  117. package/src/electron/agent/daemon.ts +210 -14
  118. package/src/electron/agent/executor.ts +1124 -85
  119. package/src/electron/agent/llm/anthropic-compatible-provider.ts +214 -0
  120. package/src/electron/agent/llm/azure-openai-provider.ts +388 -0
  121. package/src/electron/agent/llm/bedrock-provider.ts +62 -9
  122. package/src/electron/agent/llm/github-copilot-provider.ts +117 -0
  123. package/src/electron/agent/llm/groq-provider.ts +39 -0
  124. package/src/electron/agent/llm/index.ts +6 -0
  125. package/src/electron/agent/llm/kimi-provider.ts +39 -0
  126. package/src/electron/agent/llm/openai-compatible-provider.ts +153 -0
  127. package/src/electron/agent/llm/openai-compatible.ts +133 -0
  128. package/src/electron/agent/llm/openai-oauth.ts +2 -1
  129. package/src/electron/agent/llm/openrouter-provider.ts +2 -1
  130. package/src/electron/agent/llm/provider-factory.ts +459 -6
  131. package/src/electron/agent/llm/types.ts +95 -1
  132. package/src/electron/agent/llm/xai-provider.ts +39 -0
  133. package/src/electron/agent/search/provider-factory.ts +43 -2
  134. package/src/electron/agent/tools/box-tools.ts +239 -0
  135. package/src/electron/agent/tools/builtin-settings.ts +36 -0
  136. package/src/electron/agent/tools/dropbox-tools.ts +237 -0
  137. package/src/electron/agent/tools/file-tools.ts +66 -3
  138. package/src/electron/agent/tools/gmail-tools.ts +240 -0
  139. package/src/electron/agent/tools/google-calendar-tools.ts +258 -0
  140. package/src/electron/agent/tools/google-drive-tools.ts +228 -0
  141. package/src/electron/agent/tools/grep-tools.ts +97 -12
  142. package/src/electron/agent/tools/image-tools.ts +11 -1
  143. package/src/electron/agent/tools/notion-tools.ts +330 -0
  144. package/src/electron/agent/tools/onedrive-tools.ts +217 -0
  145. package/src/electron/agent/tools/registry.ts +794 -10
  146. package/src/electron/agent/tools/search-tools.ts +29 -11
  147. package/src/electron/agent/tools/sharepoint-tools.ts +247 -0
  148. package/src/electron/agent/tools/shell-tools.ts +11 -3
  149. package/src/electron/agent/tools/x-tools.ts +1 -1
  150. package/src/electron/agents/agent-dispatch.ts +79 -0
  151. package/src/electron/database/SecureSettingsRepository.ts +7 -1
  152. package/src/electron/database/repositories.ts +58 -6
  153. package/src/electron/database/schema.ts +8 -0
  154. package/src/electron/gateway/channels/discord.ts +4 -0
  155. package/src/electron/gateway/channels/google-chat.ts +3 -0
  156. package/src/electron/gateway/channels/line.ts +3 -0
  157. package/src/electron/gateway/channels/matrix-client.ts +15 -0
  158. package/src/electron/gateway/channels/matrix.ts +31 -0
  159. package/src/electron/gateway/channels/mattermost.ts +3 -0
  160. package/src/electron/gateway/channels/signal.ts +3 -0
  161. package/src/electron/gateway/channels/slack.ts +9 -4
  162. package/src/electron/gateway/channels/teams.ts +4 -0
  163. package/src/electron/gateway/channels/telegram.ts +2 -0
  164. package/src/electron/gateway/channels/twitch.ts +2 -0
  165. package/src/electron/gateway/channels/types.ts +8 -0
  166. package/src/electron/gateway/channels/whatsapp.ts +66 -0
  167. package/src/electron/gateway/index.ts +95 -2
  168. package/src/electron/gateway/router.ts +231 -161
  169. package/src/electron/gateway/security.ts +21 -9
  170. package/src/electron/ipc/canvas-handlers.ts +10 -0
  171. package/src/electron/ipc/handlers.ts +848 -292
  172. package/src/electron/main.ts +35 -0
  173. package/src/electron/mcp/oauth/connector-oauth.ts +448 -0
  174. package/src/electron/mcp/registry/MCPRegistryManager.ts +343 -12
  175. package/src/electron/memory/MemoryService.ts +7 -1
  176. package/src/electron/preload.ts +200 -5
  177. package/src/electron/settings/appearance-manager.ts +20 -2
  178. package/src/electron/settings/box-manager.ts +58 -0
  179. package/src/electron/settings/dropbox-manager.ts +58 -0
  180. package/src/electron/settings/google-workspace-manager.ts +59 -0
  181. package/src/electron/settings/notion-manager.ts +60 -0
  182. package/src/electron/settings/onedrive-manager.ts +58 -0
  183. package/src/electron/settings/sharepoint-manager.ts +58 -0
  184. package/src/electron/utils/box-api.ts +184 -0
  185. package/src/electron/utils/dropbox-api.ts +171 -0
  186. package/src/electron/utils/env-migration.ts +22 -0
  187. package/src/electron/utils/gmail-api.ts +121 -0
  188. package/src/electron/utils/google-calendar-api.ts +115 -0
  189. package/src/electron/utils/google-workspace-api.ts +228 -0
  190. package/src/electron/utils/google-workspace-auth.ts +109 -0
  191. package/src/electron/utils/google-workspace-oauth.ts +232 -0
  192. package/src/electron/utils/notion-api.ts +126 -0
  193. package/src/electron/utils/onedrive-api.ts +137 -0
  194. package/src/electron/utils/sharepoint-api.ts +132 -0
  195. package/src/electron/utils/validation.ts +128 -1
  196. package/src/electron/utils/x-cli.ts +1 -1
  197. package/src/renderer/App.tsx +119 -8
  198. package/src/renderer/components/ActivityFeedItem.tsx +34 -17
  199. package/src/renderer/components/AgentWorkingStatePanel.tsx +7 -5
  200. package/src/renderer/components/AppearanceSettings.tsx +37 -2
  201. package/src/renderer/components/BlueBubblesSettings.tsx +18 -7
  202. package/src/renderer/components/BoxSettings.tsx +203 -0
  203. package/src/renderer/components/BrowserView.tsx +101 -0
  204. package/src/renderer/components/BuiltinToolsSettings.tsx +105 -0
  205. package/src/renderer/components/CanvasPreview.tsx +68 -1
  206. package/src/renderer/components/ConnectorEnvModal.tsx +116 -0
  207. package/src/renderer/components/ConnectorSetupModal.tsx +566 -0
  208. package/src/renderer/components/ConnectorsSettings.tsx +397 -0
  209. package/src/renderer/components/ControlPlaneSettings.tsx +2 -0
  210. package/src/renderer/components/DiscordSettings.tsx +18 -7
  211. package/src/renderer/components/DropboxSettings.tsx +202 -0
  212. package/src/renderer/components/EmailSettings.tsx +18 -7
  213. package/src/renderer/components/FileViewer.tsx +21 -13
  214. package/src/renderer/components/GoogleChatSettings.tsx +17 -7
  215. package/src/renderer/components/GoogleWorkspaceSettings.tsx +332 -0
  216. package/src/renderer/components/ImessageSettings.tsx +22 -11
  217. package/src/renderer/components/LineIcons.tsx +376 -0
  218. package/src/renderer/components/LineSettings.tsx +18 -7
  219. package/src/renderer/components/MCPSettings.tsx +56 -0
  220. package/src/renderer/components/MainContent.tsx +740 -76
  221. package/src/renderer/components/MatrixSettings.tsx +18 -7
  222. package/src/renderer/components/MattermostSettings.tsx +18 -7
  223. package/src/renderer/components/NodesSettings.tsx +58 -99
  224. package/src/renderer/components/NotificationPanel.tsx +25 -11
  225. package/src/renderer/components/NotionSettings.tsx +231 -0
  226. package/src/renderer/components/Onboarding/Onboarding.tsx +13 -1
  227. package/src/renderer/components/OnboardingModal.tsx +70 -1
  228. package/src/renderer/components/OneDriveSettings.tsx +212 -0
  229. package/src/renderer/components/RightPanel.tsx +141 -28
  230. package/src/renderer/components/ScheduledTasksSettings.tsx +10 -62
  231. package/src/renderer/components/SearchSettings.tsx +118 -114
  232. package/src/renderer/components/Settings.tsx +1425 -651
  233. package/src/renderer/components/SharePointSettings.tsx +224 -0
  234. package/src/renderer/components/Sidebar.tsx +94 -19
  235. package/src/renderer/components/SignalSettings.tsx +18 -7
  236. package/src/renderer/components/SkillHubBrowser.tsx +144 -185
  237. package/src/renderer/components/SlackSettings.tsx +18 -7
  238. package/src/renderer/components/TaskQuickActions.tsx +11 -6
  239. package/src/renderer/components/TaskTimeline.tsx +58 -26
  240. package/src/renderer/components/TeamsSettings.tsx +18 -7
  241. package/src/renderer/components/TelegramSettings.tsx +18 -7
  242. package/src/renderer/components/ThemeIcon.tsx +16 -0
  243. package/src/renderer/components/TwitchSettings.tsx +18 -7
  244. package/src/renderer/components/VoiceSettings.tsx +30 -74
  245. package/src/renderer/components/WhatsAppSettings.tsx +48 -37
  246. package/src/renderer/components/WorkingStateHistory.tsx +7 -5
  247. package/src/renderer/components/WorkspaceSelector.tsx +42 -13
  248. package/src/renderer/hooks/useOnboardingFlow.ts +21 -0
  249. package/src/renderer/styles/index.css +2333 -209
  250. package/src/shared/channelMessages.ts +367 -4
  251. package/src/shared/llm-provider-catalog.ts +217 -0
  252. package/src/shared/types.ts +251 -2
@@ -34,6 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.TaskExecutor = void 0;
37
+ const types_1 = require("../../shared/types");
37
38
  const fs = __importStar(require("fs"));
38
39
  const path = __importStar(require("path"));
39
40
  const registry_1 = require("./tools/registry");
@@ -46,6 +47,7 @@ const pricing_1 = require("./llm/pricing");
46
47
  const custom_skill_loader_1 = require("./custom-skill-loader");
47
48
  const MemoryService_1 = require("../memory/MemoryService");
48
49
  const security_1 = require("./security");
50
+ const builtin_settings_1 = require("./tools/builtin-settings");
49
51
  class AwaitingUserInputError extends Error {
50
52
  constructor(message) {
51
53
  super(message);
@@ -56,7 +58,7 @@ class AwaitingUserInputError extends Error {
56
58
  const LLM_TIMEOUT_MS = 2 * 60 * 1000;
57
59
  // Per-step timeout (5 minutes max per step)
58
60
  const STEP_TIMEOUT_MS = 5 * 60 * 1000;
59
- // Per-tool execution timeout (45 seconds - balance responsiveness with heavier tools)
61
+ // Default per-tool execution timeout (overrideable per tool)
60
62
  const TOOL_TIMEOUT_MS = 30 * 1000;
61
63
  // Maximum consecutive failures for the same tool before giving up
62
64
  const MAX_TOOL_FAILURES = 2;
@@ -103,6 +105,22 @@ const INPUT_DEPENDENT_ERROR_PATTERNS = [
103
105
  /applescript execution failed/i, // AppleScript errors are input-related
104
106
  /user denied/i, // User denied an approval request
105
107
  ];
108
+ // Keywords that imply a step wants image verification.
109
+ const IMAGE_VERIFICATION_KEYWORDS = [
110
+ 'image',
111
+ 'photo',
112
+ 'photograph',
113
+ 'picture',
114
+ 'render',
115
+ 'illustration',
116
+ 'png',
117
+ 'jpg',
118
+ 'jpeg',
119
+ 'webp',
120
+ ];
121
+ const IMAGE_FILE_EXTENSION_REGEX = /\.(png|jpe?g|webp|gif|bmp)$/i;
122
+ // Allow a small buffer for file timestamp granularity/clock skew.
123
+ const IMAGE_VERIFICATION_TIME_SKEW_MS = 1000;
106
124
  /**
107
125
  * Check if an error is non-retryable (quota/rate limit related)
108
126
  * These errors indicate a systemic problem with the tool/API
@@ -159,27 +177,6 @@ function isAskingQuestion(text) {
159
177
  const trimmed = text.trim();
160
178
  if (!trimmed)
161
179
  return false;
162
- // Keep this lightweight and conservative: only pause on questions that
163
- // clearly request input/decisions needed to proceed.
164
- const blockingQuestionPatterns = [
165
- // Direct requests for info or confirmation
166
- /(?:^|\n)\s*(?:please\s+)?(?:provide|share|send|upload|enter|paste|specify|clarify|confirm|choose|pick|select)\b/i,
167
- /(?:can|could|would)\s+you\s+(?:please\s+)?(?:provide|share|send|upload|enter|paste|specify|clarify|confirm|choose|pick|select)\b/i,
168
- // Decision/approval questions
169
- /would\s+you\s+like\s+me\s+to\b/i,
170
- /would\s+you\s+prefer\b/i,
171
- /should\s+i\b/i,
172
- /do\s+you\s+want\s+me\s+to\b/i,
173
- /do\s+you\s+prefer\b/i,
174
- /is\s+it\s+(?:ok|okay|alright)\s+if\s+i\b/i,
175
- // Clarifying questions about specifics
176
- /\bwhat\s+(?:is|are|was|were|should|would|can|could|do|does|did)\s+(?:the|your|this|that)\b/i,
177
- /\bwhat\s+should\s+i\b/i,
178
- /\bwhich\s+(?:one|option|approach|method|file|version|environment|format|branch|repo|path)\b/i,
179
- /\bwhere\s+(?:is|are|should|can|could)\b/i,
180
- /\bwhen\s+(?:is|are|should|can|could)\b/i,
181
- /\bhow\s+should\s+i\b/i,
182
- ];
183
180
  const nonBlockingQuestionPatterns = [
184
181
  // Conversational/offboarding prompts that shouldn't pause execution
185
182
  /\bwhat\s+(?:else\s+)?can\s+i\s+help\b/i,
@@ -191,21 +188,68 @@ function isAskingQuestion(text) {
191
188
  /\bcan\s+i\s+help\s+with\s+anything\s+else\b/i,
192
189
  /\bdoes\s+that\s+(?:help|make\s+sense)\b/i,
193
190
  ];
194
- const isShort = trimmed.length < 1000;
195
- if (!isShort)
196
- return false;
197
- // If we see explicit blocking cues, pause.
198
- if (blockingQuestionPatterns.some(pattern => pattern.test(trimmed))) {
191
+ const maxLengthForAnalysis = 4000;
192
+ const sample = trimmed.slice(0, maxLengthForAnalysis);
193
+ const blockingCuePatterns = [
194
+ /(?:need|required)\s+(?:your|a|the)\b/i,
195
+ /before\s+i\s+can\s+(?:proceed|continue)\b/i,
196
+ /to\s+(?:proceed|continue|move\s+forward)\b/i,
197
+ /i\s+can(?:not|'t)\s+(?:proceed|continue)\b/i,
198
+ /\bawaiting\s+your\b/i,
199
+ ];
200
+ const explicitProceedPatterns = [
201
+ /\bi\s+(?:will|\'ll)\s+(?:proceed|continue|go\s+ahead|move\s+forward)\b/i,
202
+ /\bi\s+can\s+(?:proceed|continue|move\s+forward)\b/i,
203
+ /\bi\s+(?:will|\'ll)\s+assume\b/i,
204
+ /\bif\s+you\s+do\s+not\s+(?:respond|answer|reply)\b/i,
205
+ /\bif\s+you\s+don\'t\s+(?:respond|answer|reply)\b/i,
206
+ ];
207
+ const questionWordPatterns = [
208
+ /^(?:who|what|where|when|why|how|which)\b/i,
209
+ ];
210
+ const imperativePatterns = [
211
+ /^(?:please\s+)?(?:provide|share|send|upload|enter|paste|specify|clarify|confirm|choose|pick|select|list|tell|give)\b/i,
212
+ ];
213
+ const decisionPatterns = [
214
+ /^(?:do\s+you\s+want|do\s+you\s+prefer|would\s+you\s+like|would\s+you\s+prefer|should\s+i|is\s+it\s+(?:ok|okay|alright)\s+if\s+i)\b/i,
215
+ ];
216
+ const hasBlockingCue = blockingCuePatterns.some(pattern => pattern.test(sample));
217
+ const hasExplicitProceed = explicitProceedPatterns.some(pattern => pattern.test(sample));
218
+ if (hasBlockingCue)
199
219
  return true;
200
- }
201
- // If it's a non-blocking conversational prompt, don't pause.
202
- const lastLine = trimmed.split('\n').filter(Boolean).pop() ?? trimmed;
220
+ const lines = sample.split('\n').map(l => l.trim()).filter(Boolean);
221
+ if (lines.length === 0)
222
+ return false;
223
+ const lastLine = lines[lines.length - 1] ?? sample;
203
224
  const sentenceMatch = lastLine.match(/[^.!?]+[.!?]*$/);
204
225
  const lastSentence = sentenceMatch ? sentenceMatch[0].trim() : lastLine;
205
- if (nonBlockingQuestionPatterns.some(pattern => pattern.test(lastSentence))) {
206
- return false;
226
+ const hasNonBlockingTail = nonBlockingQuestionPatterns.some(pattern => pattern.test(lastSentence));
227
+ const tailLines = lines.slice(-2);
228
+ let tailQuestion = false;
229
+ let tailImperative = false;
230
+ for (const line of tailLines) {
231
+ const normalized = line.replace(/^[\-\*]?\s*\d*[\).]?\s*/, '').trim();
232
+ if (!normalized)
233
+ continue;
234
+ if (nonBlockingQuestionPatterns.some(pattern => pattern.test(normalized))) {
235
+ continue;
236
+ }
237
+ if (imperativePatterns.some(pattern => pattern.test(normalized)) || decisionPatterns.some(pattern => pattern.test(normalized))) {
238
+ tailImperative = true;
239
+ }
240
+ if (normalized.endsWith('?') || questionWordPatterns.some(pattern => pattern.test(normalized))) {
241
+ tailQuestion = true;
242
+ }
243
+ }
244
+ if (tailImperative)
245
+ return true;
246
+ if (tailQuestion) {
247
+ if (hasNonBlockingTail)
248
+ return false;
249
+ if (hasExplicitProceed)
250
+ return false;
251
+ return true;
207
252
  }
208
- // Default to not pausing on generic questions.
209
253
  return false;
210
254
  }
211
255
  /**
@@ -900,6 +944,11 @@ class TaskExecutor {
900
944
  this.waitingForUserInput = false;
901
945
  this.conversationHistory = [];
902
946
  this.systemPrompt = '';
947
+ this.toolResultMemory = [];
948
+ this.lastAssistantOutput = null;
949
+ this.lastNonVerificationOutput = null;
950
+ this.toolResultMemoryLimit = 8;
951
+ this.dispatchedMentionedAgents = false;
903
952
  // Plan revision tracking to prevent infinite revision loops
904
953
  this.planRevisionCount = 0;
905
954
  this.maxPlanRevisions = 5;
@@ -915,7 +964,10 @@ class TaskExecutor {
915
964
  // Global turn tracking (across all steps) - similar to Claude Agent SDK's maxTurns
916
965
  this.globalTurnCount = 0;
917
966
  this.maxGlobalTurns = 100; // Configurable global limit
967
+ this.lastUserMessage = task.prompt;
918
968
  this.requiresTestRun = this.detectTestRequirement(`${task.title}\n${task.prompt}`);
969
+ // Only main tasks should pause for user input. Sub/parallel tasks should complete and report back.
970
+ this.shouldPauseForQuestions = !task.parentTaskId && (task.agentType ?? 'main') === 'main';
919
971
  // Get base settings
920
972
  const settings = llm_1.LLMProviderFactory.loadSettings();
921
973
  // Check if task has a model override (for sub-agents)
@@ -927,7 +979,8 @@ class TaskExecutor {
927
979
  // Use task's model key if specified, otherwise use global settings
928
980
  const effectiveModelKey = taskModelKey || settings.modelKey;
929
981
  // Get the model ID
930
- this.modelId = llm_1.LLMProviderFactory.getModelId(effectiveModelKey, settings.providerType, settings.ollama?.model, settings.gemini?.model, settings.openrouter?.model, settings.openai?.model);
982
+ const azureDeployment = settings.azure?.deployment || settings.azure?.deployments?.[0];
983
+ this.modelId = llm_1.LLMProviderFactory.getModelId(effectiveModelKey, settings.providerType, settings.ollama?.model, settings.gemini?.model, settings.openrouter?.model, settings.openai?.model, azureDeployment, settings.groq?.model, settings.xai?.model, settings.kimi?.model, settings.customProviders);
931
984
  this.modelKey = effectiveModelKey;
932
985
  // Initialize context manager for handling long conversations
933
986
  this.contextManager = new context_manager_1.ContextManager(effectiveModelKey);
@@ -994,6 +1047,9 @@ class TaskExecutor {
994
1047
  error.message?.includes('rate limit') ||
995
1048
  error.message?.includes('ECONNRESET') ||
996
1049
  error.message?.includes('ETIMEDOUT') ||
1050
+ error.message?.includes('ENOTFOUND') ||
1051
+ error.message?.includes('EAI_AGAIN') ||
1052
+ error.message?.includes('ECONNREFUSED') ||
997
1053
  error.message?.includes('network') ||
998
1054
  error.status === 429 ||
999
1055
  error.status === 503 ||
@@ -1046,6 +1102,20 @@ class TaskExecutor {
1046
1102
  this.iterationCount++;
1047
1103
  this.globalTurnCount++; // Track global turns across all steps
1048
1104
  }
1105
+ getToolTimeoutMs(toolName, input) {
1106
+ const settingsTimeout = builtin_settings_1.BuiltinToolsSettingsManager.getToolTimeoutMs(toolName);
1107
+ const normalizedSettingsTimeout = settingsTimeout && settingsTimeout > 0 ? settingsTimeout : null;
1108
+ if (toolName === 'run_command') {
1109
+ const inputTimeout = typeof input?.timeout === 'number'
1110
+ ? input.timeout
1111
+ : undefined;
1112
+ if (typeof inputTimeout === 'number' && Number.isFinite(inputTimeout) && inputTimeout > 0) {
1113
+ return Math.round(inputTimeout);
1114
+ }
1115
+ return normalizedSettingsTimeout ?? TOOL_TIMEOUT_MS;
1116
+ }
1117
+ return normalizedSettingsTimeout ?? TOOL_TIMEOUT_MS;
1118
+ }
1049
1119
  /**
1050
1120
  * Check if a file operation should be blocked (redundant read or duplicate creation)
1051
1121
  * @returns Object with blocked flag, reason, and suggestion if blocked, plus optional cached result
@@ -1188,6 +1258,31 @@ class TaskExecutor {
1188
1258
  this.testRunObserved = true;
1189
1259
  }
1190
1260
  }
1261
+ stepRequiresImageVerification(step) {
1262
+ const description = (step.description || '').toLowerCase();
1263
+ if (!description.includes('verify'))
1264
+ return false;
1265
+ return IMAGE_VERIFICATION_KEYWORDS.some((keyword) => description.includes(keyword));
1266
+ }
1267
+ hasNewImageFromGlobResult(result, since) {
1268
+ const matches = result?.matches;
1269
+ if (!Array.isArray(matches))
1270
+ return false;
1271
+ const threshold = Math.max(0, since - IMAGE_VERIFICATION_TIME_SKEW_MS);
1272
+ for (const match of matches) {
1273
+ const path = typeof match === 'string' ? match : match?.path;
1274
+ if (!path || !IMAGE_FILE_EXTENSION_REGEX.test(path))
1275
+ continue;
1276
+ const modified = typeof match === 'object' ? match?.modified : undefined;
1277
+ if (!modified)
1278
+ continue;
1279
+ const modifiedTime = Date.parse(modified);
1280
+ if (!Number.isNaN(modifiedTime) && modifiedTime >= threshold) {
1281
+ return true;
1282
+ }
1283
+ }
1284
+ return false;
1285
+ }
1191
1286
  /**
1192
1287
  * Infer missing parameters for tool calls (helps weaker models)
1193
1288
  * This auto-fills parameters when the LLM fails to provide them but context is available
@@ -1264,8 +1359,62 @@ class TaskExecutor {
1264
1359
  }
1265
1360
  return { input, modified, inference: modified ? inference : undefined };
1266
1361
  }
1362
+ // Handle web_search - normalize region/country inputs
1363
+ if (toolName === 'web_search') {
1364
+ let modified = false;
1365
+ let inference = '';
1366
+ if (!input?.region && input?.country && typeof input.country === 'string') {
1367
+ input.region = input.country;
1368
+ modified = true;
1369
+ inference = 'Normalized country -> region';
1370
+ }
1371
+ if (input?.region && typeof input.region === 'string') {
1372
+ const raw = input.region.trim();
1373
+ const upper = raw.toUpperCase();
1374
+ let normalized = upper;
1375
+ if (upper === 'UK')
1376
+ normalized = 'GB';
1377
+ if (upper === 'USA')
1378
+ normalized = 'US';
1379
+ if (normalized !== raw) {
1380
+ input.region = normalized;
1381
+ modified = true;
1382
+ inference = `${inference ? `${inference}; ` : ''}Normalized region "${raw}" -> "${normalized}"`;
1383
+ }
1384
+ }
1385
+ if (modified) {
1386
+ return { input, modified, inference };
1387
+ }
1388
+ }
1267
1389
  return { input, modified: false };
1268
1390
  }
1391
+ async handleCanvasPushFallback(content, assistantText) {
1392
+ if (content.name !== 'canvas_push') {
1393
+ return;
1394
+ }
1395
+ const inputContent = content.input?.content;
1396
+ const hasContent = typeof inputContent === 'string' && inputContent.trim().length > 0;
1397
+ const filename = content.input?.filename;
1398
+ const isHtmlTarget = !filename || filename === 'index.html';
1399
+ if (hasContent || !isHtmlTarget) {
1400
+ return;
1401
+ }
1402
+ const extracted = this.extractHtmlFromText(assistantText);
1403
+ const generated = extracted || await this.generateCanvasHtml(this.lastUserMessage || this.task.prompt);
1404
+ if (!generated) {
1405
+ return;
1406
+ }
1407
+ content.input = {
1408
+ ...(content.input || {}),
1409
+ content: generated,
1410
+ };
1411
+ this.daemon.logEvent(this.task.id, 'parameter_inference', {
1412
+ tool: content.name,
1413
+ inference: extracted
1414
+ ? 'Recovered HTML from assistant text'
1415
+ : 'Auto-generated HTML from latest user request',
1416
+ });
1417
+ }
1269
1418
  /**
1270
1419
  * Get available tools, filtering out disabled ones
1271
1420
  * This prevents the LLM from trying to use tools that have been disabled by the circuit breaker
@@ -1374,6 +1523,7 @@ class TaskExecutor {
1374
1523
  this.systemPrompt = `You are an AI assistant helping with tasks. Use the available tools to complete the work.
1375
1524
  Current time: ${getCurrentDateTimeContext()}
1376
1525
  Workspace: ${this.workspace.path}
1526
+ Workspace is temporary: ${this.workspace.isTemp ? 'true' : 'false'}
1377
1527
  Always ask for approval before deleting files or making destructive changes.
1378
1528
  Be concise in your responses. When reading files, only read what you need.
1379
1529
 
@@ -1650,6 +1800,9 @@ You are continuing a previous conversation. The context from the previous conver
1650
1800
  }
1651
1801
  // Reset tool failure tracker (tools might work on retry)
1652
1802
  this.toolFailureTracker = new ToolFailureTracker();
1803
+ this.toolResultMemory = [];
1804
+ this.lastAssistantOutput = null;
1805
+ this.lastNonVerificationOutput = null;
1653
1806
  // Add context for LLM about retry
1654
1807
  this.conversationHistory.push({
1655
1808
  role: 'user',
@@ -1863,6 +2016,368 @@ You are continuing a previous conversation. The context from the previous conver
1863
2016
  }
1864
2017
  return { additionalContext: additionalContext || undefined, taskType };
1865
2018
  }
2019
+ classifyWorkspaceNeed(prompt) {
2020
+ const text = prompt.toLowerCase();
2021
+ const newProjectPatterns = [
2022
+ /from\s+scratch/i,
2023
+ /\bnew\s+project\b/i,
2024
+ /\bcreate\s+(?:a|an)\s+new\b/i,
2025
+ /\bstart\s+(?:a|an)\s+new\b/i,
2026
+ /\bscaffold\b/i,
2027
+ /\bbootstrap\b/i,
2028
+ /\binitialize\b/i,
2029
+ /\binit\b/i,
2030
+ /\bgreenfield\b/i,
2031
+ ];
2032
+ const existingProjectPatterns = [
2033
+ /\bexisting\b/i,
2034
+ /\bcurrent\b/i,
2035
+ /\balready\b/i,
2036
+ /\bin\s+(?:this|the)\s+(?:repo|repository|project|codebase)\b/i,
2037
+ /\bfix\b/i,
2038
+ /\bbug\b/i,
2039
+ /\bdebug\b/i,
2040
+ /\brefactor\b/i,
2041
+ /\bupdate\b/i,
2042
+ /\bmodify\b/i,
2043
+ // Note: 'add' is intentionally omitted - it's ambiguous (could be new or existing)
2044
+ /\bextend\b/i,
2045
+ /\bmigrate\b/i,
2046
+ /\bpatch\b/i,
2047
+ ];
2048
+ const pathOrFilePatterns = [
2049
+ /(?:^|[\s/\\])[\w.\-\/\\]+?\.(ts|tsx|js|jsx|py|rs|go|java|kt|swift|json|yml|yaml|toml|md|sol|c|cpp|h|hpp)\b/i,
2050
+ /\b(?:src|app|apps|packages|programs|frontend|backend|server|client|contracts|lib|services)\//i,
2051
+ ];
2052
+ const codeTaskPatterns = [
2053
+ /\bapp\b/i,
2054
+ /\bdapp\b/i,
2055
+ /\bweb\b/i,
2056
+ /\bfrontend\b/i,
2057
+ /\bbackend\b/i,
2058
+ /\bapi\b/i,
2059
+ /\bservice\b/i,
2060
+ /\bprogram\b/i,
2061
+ /\bsmart\s+contract\b/i,
2062
+ /\bcontract\b/i,
2063
+ /\bblockchain\b/i,
2064
+ /\bsolana\b/i,
2065
+ /\breact\b/i,
2066
+ /\bnode\b/i,
2067
+ /\btypescript\b/i,
2068
+ /\bjavascript\b/i,
2069
+ /\bpython\b/i,
2070
+ /\brust\b/i,
2071
+ /\bgo\b/i,
2072
+ /\bjava\b/i,
2073
+ /\bkotlin\b/i,
2074
+ /\bswift\b/i,
2075
+ /\bdatabase\b/i,
2076
+ /\bschema\b/i,
2077
+ /\bmigration\b/i,
2078
+ /\brepo\b/i,
2079
+ /\brepository\b/i,
2080
+ /\bcodebase\b/i,
2081
+ ];
2082
+ const mentionsNew = newProjectPatterns.some(pattern => pattern.test(text));
2083
+ const isCodeTask = codeTaskPatterns.some(pattern => pattern.test(text));
2084
+ const mentionsExisting = pathOrFilePatterns.some(pattern => pattern.test(text)) ||
2085
+ (existingProjectPatterns.some(pattern => pattern.test(text)) && isCodeTask);
2086
+ if (mentionsExisting)
2087
+ return 'needs_existing';
2088
+ if (mentionsNew)
2089
+ return 'new_ok';
2090
+ if (isCodeTask)
2091
+ return 'ambiguous';
2092
+ return 'none';
2093
+ }
2094
+ getWorkspaceSignals() {
2095
+ const projectMarkers = new Set([
2096
+ 'package.json',
2097
+ 'pnpm-lock.yaml',
2098
+ 'yarn.lock',
2099
+ 'package-lock.json',
2100
+ 'Cargo.toml',
2101
+ 'Anchor.toml',
2102
+ 'pyproject.toml',
2103
+ 'requirements.txt',
2104
+ 'go.mod',
2105
+ 'pom.xml',
2106
+ 'build.gradle',
2107
+ 'settings.gradle',
2108
+ 'Gemfile',
2109
+ 'composer.json',
2110
+ 'mix.exs',
2111
+ 'Makefile',
2112
+ 'CMakeLists.txt',
2113
+ ]);
2114
+ const codeExtensions = new Set([
2115
+ '.ts', '.tsx', '.js', '.jsx', '.py', '.rs', '.go', '.java', '.kt', '.swift',
2116
+ '.cs', '.cpp', '.c', '.h', '.hpp', '.sol',
2117
+ ]);
2118
+ const appDirs = new Set([
2119
+ 'src', 'app', 'apps', 'packages', 'programs', 'frontend', 'backend',
2120
+ 'server', 'client', 'contracts', 'lib', 'services', 'web', 'api',
2121
+ ]);
2122
+ try {
2123
+ const entries = fs.readdirSync(this.workspace.path, { withFileTypes: true });
2124
+ let hasProjectMarkers = false;
2125
+ let hasCodeFiles = false;
2126
+ let hasAppDirs = false;
2127
+ for (const entry of entries) {
2128
+ if (entry.isFile()) {
2129
+ if (projectMarkers.has(entry.name)) {
2130
+ hasProjectMarkers = true;
2131
+ }
2132
+ const ext = path.extname(entry.name).toLowerCase();
2133
+ if (codeExtensions.has(ext)) {
2134
+ hasCodeFiles = true;
2135
+ }
2136
+ }
2137
+ else if (entry.isDirectory()) {
2138
+ if (appDirs.has(entry.name)) {
2139
+ hasAppDirs = true;
2140
+ }
2141
+ }
2142
+ if (hasProjectMarkers && hasCodeFiles && hasAppDirs)
2143
+ break;
2144
+ }
2145
+ return { hasProjectMarkers, hasCodeFiles, hasAppDirs };
2146
+ }
2147
+ catch {
2148
+ return { hasProjectMarkers: false, hasCodeFiles: false, hasAppDirs: false };
2149
+ }
2150
+ }
2151
+ pauseForUserInput(message, reason) {
2152
+ this.waitingForUserInput = true;
2153
+ this.daemon.updateTaskStatus(this.task.id, 'paused');
2154
+ this.daemon.logEvent(this.task.id, 'assistant_message', { message });
2155
+ this.daemon.logEvent(this.task.id, 'task_paused', { message, reason });
2156
+ this.daemon.logEvent(this.task.id, 'progress_update', {
2157
+ phase: 'execution',
2158
+ completedSteps: this.plan?.steps.filter(s => s.status === 'completed').length ?? 0,
2159
+ totalSteps: this.plan?.steps.length ?? 0,
2160
+ progress: 0,
2161
+ message: 'Paused - awaiting user input',
2162
+ });
2163
+ if (this.conversationHistory.length === 0) {
2164
+ this.conversationHistory.push({
2165
+ role: 'user',
2166
+ content: this.task.prompt,
2167
+ });
2168
+ }
2169
+ this.conversationHistory.push({
2170
+ role: 'assistant',
2171
+ content: [{ type: 'text', text: message }],
2172
+ });
2173
+ this.saveConversationSnapshot();
2174
+ }
2175
+ preflightWorkspaceCheck() {
2176
+ const workspaceNeed = this.classifyWorkspaceNeed(this.task.prompt);
2177
+ if (workspaceNeed === 'none')
2178
+ return false;
2179
+ const signals = this.getWorkspaceSignals();
2180
+ const looksLikeProject = signals.hasProjectMarkers || signals.hasCodeFiles || signals.hasAppDirs;
2181
+ const isTemp = this.workspace.isTemp || this.workspace.id === types_1.TEMP_WORKSPACE_ID;
2182
+ if (isTemp && !looksLikeProject) {
2183
+ if (workspaceNeed === 'needs_existing') {
2184
+ this.pauseForUserInput('I am in the temporary workspace, but this task looks like it targets an existing project. ' +
2185
+ 'Please select the project folder or provide its path so I can switch to it. ' +
2186
+ 'If you want a new project created here instead, say so.', 'workspace_required');
2187
+ return true;
2188
+ }
2189
+ if (workspaceNeed === 'ambiguous') {
2190
+ this.pauseForUserInput('I am in the temporary workspace and this task could be a new project or changes to an existing one. ' +
2191
+ 'Choose one:\n' +
2192
+ '1. Create a new project in the temporary workspace\n' +
2193
+ '2. Switch to an existing project folder (share the path or select a workspace)', 'workspace_selection');
2194
+ return true;
2195
+ }
2196
+ }
2197
+ if (!isTemp && workspaceNeed === 'needs_existing' && !looksLikeProject) {
2198
+ this.pauseForUserInput('I am in the selected workspace, but I do not see typical project files here. ' +
2199
+ 'If this task targets an existing project, please confirm the correct folder or provide its path. ' +
2200
+ 'If this is a new project, tell me to scaffold it here.', 'workspace_mismatch');
2201
+ return true;
2202
+ }
2203
+ return false;
2204
+ }
2205
+ summarizeToolResult(toolName, result) {
2206
+ if (!result)
2207
+ return null;
2208
+ if (toolName === 'web_search') {
2209
+ const query = typeof result.query === 'string' ? result.query : '';
2210
+ const items = Array.isArray(result.results) ? result.results : [];
2211
+ if (items.length === 0) {
2212
+ return query ? `query "${query}": no results` : 'no results';
2213
+ }
2214
+ const formatted = items.slice(0, 5).map((item) => {
2215
+ const title = item?.title ? String(item.title).trim() : 'Untitled';
2216
+ const url = item?.url ? String(item.url) : '';
2217
+ let host = '';
2218
+ if (url) {
2219
+ try {
2220
+ host = new URL(url).hostname.replace(/^www\./, '');
2221
+ }
2222
+ catch {
2223
+ host = '';
2224
+ }
2225
+ }
2226
+ return host ? `${title} (${host})` : title;
2227
+ });
2228
+ const prefix = query ? `query "${query}": ` : '';
2229
+ return `${prefix}${formatted.join(' | ')}`;
2230
+ }
2231
+ if (toolName === 'web_fetch') {
2232
+ const url = typeof result.url === 'string' ? result.url : '';
2233
+ const content = typeof result.content === 'string' ? result.content : '';
2234
+ const snippet = content
2235
+ ? content.replace(/\s+/g, ' ').slice(0, 300)
2236
+ : '';
2237
+ if (url && snippet)
2238
+ return `${url} — ${snippet}`;
2239
+ if (url)
2240
+ return url;
2241
+ if (snippet)
2242
+ return snippet;
2243
+ return null;
2244
+ }
2245
+ if (toolName === 'search_files') {
2246
+ const totalFound = typeof result.totalFound === 'number' ? result.totalFound : undefined;
2247
+ if (totalFound !== undefined)
2248
+ return `matches found: ${totalFound}`;
2249
+ }
2250
+ if (toolName === 'glob') {
2251
+ const totalMatches = typeof result.totalMatches === 'number' ? result.totalMatches : undefined;
2252
+ const pattern = typeof result.pattern === 'string' ? result.pattern : '';
2253
+ if (totalMatches !== undefined) {
2254
+ return pattern ? `pattern "${pattern}" matched ${totalMatches} item(s)` : `matched ${totalMatches} item(s)`;
2255
+ }
2256
+ }
2257
+ return null;
2258
+ }
2259
+ recordToolResult(toolName, result) {
2260
+ const summary = this.summarizeToolResult(toolName, result);
2261
+ if (!summary)
2262
+ return;
2263
+ this.toolResultMemory.push({ tool: toolName, summary, timestamp: Date.now() });
2264
+ if (this.toolResultMemory.length > this.toolResultMemoryLimit) {
2265
+ this.toolResultMemory.splice(0, this.toolResultMemory.length - this.toolResultMemoryLimit);
2266
+ }
2267
+ }
2268
+ getRecentToolResultSummary(maxEntries = 6) {
2269
+ if (this.toolResultMemory.length === 0)
2270
+ return '';
2271
+ const entries = this.toolResultMemory.slice(-maxEntries);
2272
+ return entries.map(entry => `- ${entry.tool}: ${entry.summary}`).join('\n');
2273
+ }
2274
+ isVerificationStep(step) {
2275
+ const desc = step.description.toLowerCase().trim();
2276
+ if (desc.startsWith('verify'))
2277
+ return true;
2278
+ if (desc.startsWith('review'))
2279
+ return true;
2280
+ return desc.includes('verify:') || desc.includes('verification') || desc.includes('verify ');
2281
+ }
2282
+ isSummaryStep(step) {
2283
+ const desc = step.description.toLowerCase();
2284
+ return desc.includes('summary') || desc.includes('summarize') || desc.includes('compile') || desc.includes('report');
2285
+ }
2286
+ isLastPlanStep(step) {
2287
+ if (!this.plan || this.plan.steps.length === 0)
2288
+ return false;
2289
+ const last = this.plan.steps[this.plan.steps.length - 1];
2290
+ return last?.id === step.id;
2291
+ }
2292
+ taskLikelyNeedsWebEvidence() {
2293
+ const prompt = `${this.task.title}\n${this.task.prompt}`.toLowerCase();
2294
+ const signals = [
2295
+ 'news',
2296
+ 'latest',
2297
+ 'today',
2298
+ 'trending',
2299
+ 'breaking',
2300
+ 'reddit',
2301
+ 'search',
2302
+ 'headline',
2303
+ 'current events',
2304
+ ];
2305
+ return signals.some(signal => prompt.includes(signal));
2306
+ }
2307
+ taskRequiresTodayContext() {
2308
+ const prompt = `${this.task.title}\n${this.task.prompt}`.toLowerCase();
2309
+ return prompt.includes('today');
2310
+ }
2311
+ hasWebEvidence() {
2312
+ return this.toolResultMemory.some(entry => entry.tool === 'web_search' || entry.tool === 'web_fetch');
2313
+ }
2314
+ normalizeToolName(name) {
2315
+ if (!name)
2316
+ return { name, modified: false, original: name };
2317
+ if (!name.includes('.'))
2318
+ return { name, modified: false, original: name };
2319
+ const [prefix, ...rest] = name.split('.');
2320
+ if (rest.length === 0)
2321
+ return { name, modified: false, original: name };
2322
+ if (['functions', 'tool', 'tools'].includes(prefix)) {
2323
+ const normalized = rest.join('.');
2324
+ return { name: normalized, modified: normalized !== name, original: name };
2325
+ }
2326
+ return { name, modified: false, original: name };
2327
+ }
2328
+ recordAssistantOutput(messages, step) {
2329
+ if (!messages || messages.length === 0)
2330
+ return;
2331
+ const lastAssistant = [...messages].reverse().find(m => m.role === 'assistant');
2332
+ if (!lastAssistant || !lastAssistant.content)
2333
+ return;
2334
+ const text = (Array.isArray(lastAssistant.content) ? lastAssistant.content : [])
2335
+ .filter((item) => item.type === 'text' && item.text)
2336
+ .map((item) => String(item.text))
2337
+ .join('\n')
2338
+ .trim();
2339
+ if (!text)
2340
+ return;
2341
+ const truncated = text.length > 1500 ? `${text.slice(0, 1500)}…` : text;
2342
+ if (!this.isVerificationStep(step)) {
2343
+ this.lastAssistantOutput = truncated;
2344
+ this.lastNonVerificationOutput = truncated;
2345
+ }
2346
+ else {
2347
+ if (!this.lastAssistantOutput) {
2348
+ this.lastAssistantOutput = truncated;
2349
+ }
2350
+ // Preserve lastNonVerificationOutput for future steps/follow-ups.
2351
+ }
2352
+ }
2353
+ isTransientProviderError(error) {
2354
+ if (!error)
2355
+ return false;
2356
+ const message = String(error.message || '').toLowerCase();
2357
+ const code = error.cause?.code || error.code;
2358
+ const retryableCodes = new Set(['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'EAI_AGAIN', 'ECONNREFUSED']);
2359
+ if (code && retryableCodes.has(code))
2360
+ return true;
2361
+ return (message.includes('fetch failed') ||
2362
+ message.includes('network') ||
2363
+ message.includes('timeout') ||
2364
+ message.includes('socket hang up'));
2365
+ }
2366
+ async dispatchMentionedAgentsAfterPlanning() {
2367
+ if (this.dispatchedMentionedAgents)
2368
+ return;
2369
+ if (!this.shouldPauseForQuestions)
2370
+ return;
2371
+ if (!this.plan)
2372
+ return;
2373
+ try {
2374
+ await this.daemon.dispatchMentionedAgents(this.task.id, this.plan);
2375
+ this.dispatchedMentionedAgents = true;
2376
+ }
2377
+ catch (error) {
2378
+ console.warn('[TaskExecutor] Failed to dispatch mentioned agents:', error);
2379
+ }
2380
+ }
1866
2381
  /**
1867
2382
  * Main execution loop
1868
2383
  */
@@ -1895,6 +2410,7 @@ You are continuing a previous conversation. The context from the previous conver
1895
2410
  // Phase 1: Planning
1896
2411
  this.daemon.updateTaskStatus(this.task.id, 'planning');
1897
2412
  await this.createPlan();
2413
+ await this.dispatchMentionedAgentsAfterPlanning();
1898
2414
  if (this.cancelled)
1899
2415
  return;
1900
2416
  // Phase 2: Execution with Goal Mode retry loop
@@ -1965,6 +2481,12 @@ You are continuing a previous conversation. The context from the previous conver
1965
2481
  // Status will be updated by the daemon's cancelTask method
1966
2482
  return;
1967
2483
  }
2484
+ if (this.isTransientProviderError(error)) {
2485
+ const scheduled = this.daemon.handleTransientTaskFailure(this.task.id, error.message || 'Transient LLM error');
2486
+ if (scheduled) {
2487
+ return;
2488
+ }
2489
+ }
1968
2490
  console.error(`Task execution failed:`, error);
1969
2491
  // Save conversation snapshot even on failure for potential recovery
1970
2492
  this.saveConversationSnapshot();
@@ -1998,6 +2520,7 @@ You are continuing a previous conversation. The context from the previous conver
1998
2520
 
1999
2521
  Current time: ${getCurrentDateTimeContext()}
2000
2522
  You have access to a workspace folder at: ${this.workspace.path}
2523
+ Workspace is temporary: ${this.workspace.isTemp ? 'true' : 'false'}
2001
2524
  Workspace permissions: ${JSON.stringify(this.workspace.permissions)}
2002
2525
 
2003
2526
  Available tools:
@@ -2010,6 +2533,12 @@ PLANNING RULES:
2010
2533
  - DO NOT plan to create multiple versions of files - pick ONE target file.
2011
2534
  - DO NOT plan to read the same file multiple times in different steps.
2012
2535
 
2536
+ WORKSPACE MODE (CRITICAL):
2537
+ - There are two modes: temporary workspace (no user-selected folder) and user-selected workspace.
2538
+ - If the workspace is temporary and the task likely targets an existing project, your FIRST step must be to ask for the correct folder or to switch workspaces.
2539
+ - If the task could be new or existing, ask the user to choose between scaffolding here or switching to an existing folder.
2540
+ - Do NOT assume a repo exists in the temporary workspace unless you find it.
2541
+
2013
2542
  PATH DISCOVERY (CRITICAL):
2014
2543
  - When users mention a folder or path (e.g., "electron/agent folder"), they may give a PARTIAL path, not the full path.
2015
2544
  - NEVER assume a path doesn't exist just because it's not in your workspace root.
@@ -2049,6 +2578,11 @@ WEB RESEARCH & CONTENT EXTRACTION (IMPORTANT):
2049
2578
  - NEVER create a plan that says "cannot be done" if alternative tools are available.
2050
2579
  - NEVER plan to ask the user for content you can extract yourself.
2051
2580
 
2581
+ REDDIT POSTS (WHEN UPVOTE COUNTS REQUIRED):
2582
+ - Prefer web_fetch against Reddit's JSON endpoints to get reliable titles and upvote counts.
2583
+ - Example: https://www.reddit.com/r/<sub>/top/.json?t=day&limit=5
2584
+ - Use web_search only to discover the right subreddit if needed, not for score counts.
2585
+
2052
2586
  TOOL SELECTION GUIDE (web tools):
2053
2587
  - web_search: Best for research, news, finding information, exploring topics (PREFERRED for most research)
2054
2588
  - web_fetch: Best for reading a specific known URL without interaction
@@ -2249,27 +2783,32 @@ Format your plan as a JSON object with this structure:
2249
2783
  if (!this.plan) {
2250
2784
  throw new Error('No plan available');
2251
2785
  }
2252
- const totalSteps = this.plan.steps.length;
2253
- let completedSteps = 0;
2786
+ if (this.preflightWorkspaceCheck()) {
2787
+ return;
2788
+ }
2254
2789
  // Emit initial progress event
2255
2790
  this.daemon.logEvent(this.task.id, 'progress_update', {
2256
2791
  phase: 'execution',
2257
- completedSteps,
2258
- totalSteps,
2792
+ completedSteps: this.plan.steps.filter(s => s.status === 'completed').length,
2793
+ totalSteps: this.plan.steps.length,
2259
2794
  progress: 0,
2260
- message: `Starting execution of ${totalSteps} steps`,
2795
+ message: `Starting execution of ${this.plan.steps.length} steps`,
2261
2796
  });
2262
- for (const step of this.plan.steps) {
2797
+ let index = 0;
2798
+ while (index < this.plan.steps.length) {
2799
+ const step = this.plan.steps[index];
2263
2800
  if (this.cancelled)
2264
2801
  break;
2265
2802
  if (step.status === 'completed') {
2266
- completedSteps++;
2803
+ index++;
2267
2804
  continue;
2268
2805
  }
2269
2806
  // Wait if paused
2270
2807
  while (this.paused && !this.cancelled) {
2271
2808
  await new Promise(resolve => setTimeout(resolve, 100));
2272
2809
  }
2810
+ const completedSteps = this.plan.steps.filter(s => s.status === 'completed').length;
2811
+ const totalSteps = this.plan.steps.length;
2273
2812
  // Emit step starting progress
2274
2813
  this.daemon.logEvent(this.task.id, 'progress_update', {
2275
2814
  phase: 'execution',
@@ -2324,20 +2863,34 @@ Format your plan as a JSON object with this structure:
2324
2863
  message: `Step timed out after ${STEP_TIMEOUT_MS / 1000}s`,
2325
2864
  });
2326
2865
  // Continue with next step instead of failing entire task
2327
- completedSteps++;
2866
+ const updatedIndex = this.plan.steps.findIndex(s => s.id === step.id);
2867
+ if (updatedIndex === -1) {
2868
+ index = Math.min(index + 1, this.plan.steps.length);
2869
+ }
2870
+ else {
2871
+ index = updatedIndex + 1;
2872
+ }
2328
2873
  continue;
2329
2874
  }
2330
2875
  throw error;
2331
2876
  }
2332
- completedSteps++;
2877
+ const updatedIndex = this.plan.steps.findIndex(s => s.id === step.id);
2878
+ if (updatedIndex === -1) {
2879
+ index = Math.min(index + 1, this.plan.steps.length);
2880
+ }
2881
+ else {
2882
+ index = updatedIndex + 1;
2883
+ }
2884
+ const completedAfterStep = this.plan.steps.filter(s => s.status === 'completed').length;
2885
+ const totalAfterStep = this.plan.steps.length;
2333
2886
  // Emit step completed progress
2334
2887
  this.daemon.logEvent(this.task.id, 'progress_update', {
2335
2888
  phase: 'execution',
2336
2889
  currentStep: step.id,
2337
- completedSteps,
2338
- totalSteps,
2339
- progress: Math.round((completedSteps / totalSteps) * 100),
2340
- message: `Completed step ${completedSteps}/${totalSteps}`,
2890
+ completedSteps: completedAfterStep,
2891
+ totalSteps: totalAfterStep,
2892
+ progress: totalAfterStep > 0 ? Math.round((completedAfterStep / totalAfterStep) * 100) : 100,
2893
+ message: `Completed step ${step.id}: ${step.description}`,
2341
2894
  });
2342
2895
  }
2343
2896
  // Check if any steps failed
@@ -2350,11 +2903,13 @@ Format your plan as a JSON object with this structure:
2350
2903
  // If critical steps failed (not just verification), this should be marked
2351
2904
  const criticalFailures = failedSteps.filter(s => !s.description.toLowerCase().includes('verify'));
2352
2905
  if (criticalFailures.length > 0) {
2906
+ const totalSteps = this.plan.steps.length;
2907
+ const progress = totalSteps > 0 ? Math.round((successfulSteps.length / totalSteps) * 100) : 0;
2353
2908
  this.daemon.logEvent(this.task.id, 'progress_update', {
2354
2909
  phase: 'execution',
2355
2910
  completedSteps: successfulSteps.length,
2356
2911
  totalSteps,
2357
- progress: Math.round((successfulSteps.length / totalSteps) * 100),
2912
+ progress,
2358
2913
  message: `Completed with ${criticalFailures.length} failed step(s)`,
2359
2914
  hasFailures: true,
2360
2915
  });
@@ -2365,8 +2920,8 @@ Format your plan as a JSON object with this structure:
2365
2920
  // Emit completion progress (only if no critical failures)
2366
2921
  this.daemon.logEvent(this.task.id, 'progress_update', {
2367
2922
  phase: 'execution',
2368
- completedSteps,
2369
- totalSteps,
2923
+ completedSteps: successfulSteps.length,
2924
+ totalSteps: this.plan.steps.length,
2370
2925
  progress: 100,
2371
2926
  message: 'All steps completed',
2372
2927
  });
@@ -2429,6 +2984,11 @@ IMPORTANT INSTRUCTIONS:
2429
2984
  - The delete_file tool has a built-in approval mechanism that will prompt the user. Just call the tool directly.
2430
2985
  - Do NOT ask "Should I proceed?" or wait for permission in text - the tools handle approvals automatically.
2431
2986
 
2987
+ USER INPUT GATE (CRITICAL):
2988
+ - If you ask the user for required information or a decision, STOP and wait.
2989
+ - Do NOT continue executing steps or call tools after asking such questions.
2990
+ - If safe defaults exist, state the assumption and proceed without asking.
2991
+
2432
2992
  PATH DISCOVERY (CRITICAL):
2433
2993
  - When a task mentions a folder or path (e.g., "electron/agent folder"), users often give PARTIAL paths.
2434
2994
  - NEVER conclude a path doesn't exist without SEARCHING for it first.
@@ -2518,6 +3078,11 @@ RESEARCH WORKFLOW:
2518
3078
  - Only fall back to browser_navigate if web_fetch fails (e.g., JavaScript-required content)
2519
3079
  - Many sites (X/Twitter, Reddit logged-in content, LinkedIn) require authentication - web_search can still find public discussions
2520
3080
 
3081
+ REDDIT POSTS (WHEN UPVOTE COUNTS REQUIRED):
3082
+ - Prefer web_fetch against Reddit's JSON endpoints to get reliable titles and upvote counts.
3083
+ - Example: https://www.reddit.com/r/<sub>/top/.json?t=day&limit=5
3084
+ - Use web_search only to discover the right subreddit if needed, not for score counts.
3085
+
2521
3086
  BROWSER TOOLS (when needed):
2522
3087
  - Treat browser_navigate + browser_get_content as ONE ATOMIC OPERATION
2523
3088
  - For dynamic content, use browser_wait then browser_get_content
@@ -2572,11 +3137,43 @@ SCHEDULING & REMINDERS:
2572
3137
  stepContext += `\n\nPrevious steps already completed:\n${completedSteps.map(s => `- ${s.description}`).join('\n')}`;
2573
3138
  stepContext += `\n\nDo NOT repeat work from previous steps. Focus only on: ${step.description}`;
2574
3139
  }
3140
+ const isVerifyStep = this.isVerificationStep(step);
3141
+ const isSummaryStep = this.isSummaryStep(step);
3142
+ const isLastStep = this.isLastPlanStep(step);
2575
3143
  // Add accumulated knowledge from previous steps (discovered files, directories, etc.)
2576
3144
  const knowledgeSummary = this.fileOperationTracker.getKnowledgeSummary();
2577
3145
  if (knowledgeSummary) {
2578
3146
  stepContext += `\n\nKNOWLEDGE FROM PREVIOUS STEPS (use this instead of re-reading/re-listing):\n${knowledgeSummary}`;
2579
3147
  }
3148
+ const toolResultSummary = this.getRecentToolResultSummary();
3149
+ if (toolResultSummary) {
3150
+ stepContext += `\n\nRECENT TOOL RESULTS (from previous steps; do not look in the filesystem for these):\n${toolResultSummary}`;
3151
+ }
3152
+ const shouldIncludePreviousOutput = !isVerifyStep || !this.lastNonVerificationOutput;
3153
+ if (this.lastAssistantOutput && shouldIncludePreviousOutput) {
3154
+ stepContext += `\n\nPREVIOUS STEP OUTPUT:\n${this.lastAssistantOutput}`;
3155
+ }
3156
+ if (isVerifyStep) {
3157
+ stepContext += `\n\nVERIFICATION MODE:\n- This is a verification step. Keep the response brief (1-3 sentences).\n- Do NOT output a checklist. Do NOT restate the full deliverable.\n- If the deliverable has NOT been provided earlier, provide it now, then add a one-sentence verification note.\n`;
3158
+ if (isLastStep) {
3159
+ stepContext += `- This is the FINAL step. Include a very short recap (2-4 sentences) of the deliverable before the verification note so the last message still answers the user.\n`;
3160
+ }
3161
+ if (this.lastNonVerificationOutput) {
3162
+ stepContext += `\n\nMOST RECENT DELIVERABLE (use this for verification):\n${this.lastNonVerificationOutput}`;
3163
+ }
3164
+ else if (this.lastAssistantOutput) {
3165
+ stepContext += `\n\nMOST RECENT DELIVERABLE (use this for verification):\n${this.lastAssistantOutput}`;
3166
+ }
3167
+ }
3168
+ if (isSummaryStep) {
3169
+ stepContext += `\n\nDELIVERABLE RULES:\n- If you write a file, you MUST also provide the key summary in your response.\n- Do not defer the answer to a verification step.\n`;
3170
+ if (this.taskLikelyNeedsWebEvidence() && !this.hasWebEvidence()) {
3171
+ stepContext += `\n\nEVIDENCE REQUIRED:\n- No web evidence has been gathered yet. Use web_search/web_fetch now before summarizing.\n- If you find no results, say so explicitly instead of guessing.\n`;
3172
+ }
3173
+ if (this.taskRequiresTodayContext()) {
3174
+ stepContext += `\n\nDATE REQUIREMENT:\n- This task explicitly asks for “today.” Only present items as “today” if you can confirm the date from sources.\n- If you cannot confirm any items from today, state that clearly, then optionally list the most recent items as “recent (not today)”.\n`;
3175
+ }
3176
+ }
2580
3177
  // Start fresh messages for this step
2581
3178
  let messages = [
2582
3179
  {
@@ -2591,8 +3188,17 @@ SCHEDULING & REMINDERS:
2591
3188
  let lastFailureReason = ''; // Track the reason for failure
2592
3189
  let hadToolError = false;
2593
3190
  let hadToolSuccessAfterError = false;
3191
+ let hadAnyToolSuccess = false;
3192
+ const toolErrors = new Set();
2594
3193
  let lastToolErrorReason = '';
2595
3194
  let awaitingUserInput = false;
3195
+ let hadRunCommandFailure = false;
3196
+ let hadToolSuccessAfterRunCommandFailure = false;
3197
+ const expectsImageVerification = this.stepRequiresImageVerification(step);
3198
+ const imageVerificationSince = typeof this.task.createdAt === 'number'
3199
+ ? this.task.createdAt
3200
+ : (step.startedAt ?? Date.now());
3201
+ let foundNewImage = false;
2596
3202
  const maxIterations = 5; // Reduced from 10 to prevent excessive iterations per step
2597
3203
  const maxEmptyResponses = 3;
2598
3204
  while (continueLoop && iterationCount < maxIterations) {
@@ -2610,12 +3216,13 @@ SCHEDULING & REMINDERS:
2610
3216
  this.checkBudgets();
2611
3217
  // Compact messages if context is getting too large
2612
3218
  messages = this.contextManager.compactMessages(messages, systemPromptTokens);
3219
+ const availableTools = this.getAvailableTools();
2613
3220
  // Use retry wrapper for resilient API calls
2614
3221
  const response = await this.callLLMWithRetry(() => withTimeout(this.provider.createMessage({
2615
3222
  model: this.modelId,
2616
3223
  maxTokens: 4096,
2617
3224
  system: this.systemPrompt,
2618
- tools: this.getAvailableTools(),
3225
+ tools: availableTools,
2619
3226
  messages,
2620
3227
  signal: this.abortController.signal,
2621
3228
  }), LLM_TIMEOUT_MS, 'LLM execution step'), `Step execution (iteration ${iterationCount})`);
@@ -2630,6 +3237,10 @@ SCHEDULING & REMINDERS:
2630
3237
  }
2631
3238
  // Log any text responses from the assistant and check if asking a question
2632
3239
  let assistantAskedQuestion = false;
3240
+ const assistantText = (response.content || [])
3241
+ .filter((item) => item.type === 'text' && item.text)
3242
+ .map((item) => item.text)
3243
+ .join('\n');
2633
3244
  if (response.content) {
2634
3245
  for (const content of response.content) {
2635
3246
  if (content.type === 'text' && content.text) {
@@ -2674,8 +3285,19 @@ SCHEDULING & REMINDERS:
2674
3285
  const toolResults = [];
2675
3286
  let hasDisabledToolAttempt = false;
2676
3287
  let hasDuplicateToolAttempt = false;
3288
+ let hasUnavailableToolAttempt = false;
3289
+ const availableToolNames = new Set(availableTools.map(tool => tool.name));
2677
3290
  for (const content of response.content || []) {
2678
3291
  if (content.type === 'tool_use') {
3292
+ // Normalize tool names like "functions.web_fetch" -> "web_fetch"
3293
+ const normalizedTool = this.normalizeToolName(content.name);
3294
+ if (normalizedTool.modified) {
3295
+ this.daemon.logEvent(this.task.id, 'parameter_inference', {
3296
+ tool: content.name,
3297
+ inference: `Normalized tool name "${normalizedTool.original}" -> "${normalizedTool.name}"`,
3298
+ });
3299
+ content.name = normalizedTool.name;
3300
+ }
2679
3301
  // Check if this tool is disabled (circuit breaker tripped)
2680
3302
  if (this.toolFailureTracker.isDisabled(content.name)) {
2681
3303
  const lastError = this.toolFailureTracker.getLastError(content.name);
@@ -2697,6 +3319,37 @@ SCHEDULING & REMINDERS:
2697
3319
  hasDisabledToolAttempt = true;
2698
3320
  continue;
2699
3321
  }
3322
+ // Validate tool availability before attempting any inference
3323
+ if (!availableToolNames.has(content.name)) {
3324
+ console.log(`[TaskExecutor] Tool not available in this context: ${content.name}`);
3325
+ this.daemon.logEvent(this.task.id, 'tool_error', {
3326
+ tool: content.name,
3327
+ error: 'Tool not available in current context or permissions',
3328
+ blocked: true,
3329
+ });
3330
+ toolResults.push({
3331
+ type: 'tool_result',
3332
+ tool_use_id: content.id,
3333
+ content: JSON.stringify({
3334
+ error: `Tool "${content.name}" is not available in this context. Please choose a different tool or check permissions/integrations.`,
3335
+ unavailable: true,
3336
+ }),
3337
+ is_error: true,
3338
+ });
3339
+ hasUnavailableToolAttempt = true;
3340
+ continue;
3341
+ }
3342
+ // Infer missing parameters for weaker models (normalize inputs before deduplication)
3343
+ const inference = this.inferMissingParameters(content.name, content.input);
3344
+ if (inference.modified) {
3345
+ content.input = inference.input;
3346
+ this.daemon.logEvent(this.task.id, 'parameter_inference', {
3347
+ tool: content.name,
3348
+ inference: inference.inference,
3349
+ });
3350
+ }
3351
+ // If canvas_push is missing content, try extracting HTML from assistant text or auto-generate
3352
+ await this.handleCanvasPushFallback(content, assistantText);
2700
3353
  // Check for duplicate tool calls (prevents stuck loops)
2701
3354
  const duplicateCheck = this.toolCallDeduplicator.checkDuplicate(content.name, content.input);
2702
3355
  if (duplicateCheck.isDuplicate) {
@@ -2767,22 +3420,36 @@ SCHEDULING & REMINDERS:
2767
3420
  }
2768
3421
  continue;
2769
3422
  }
2770
- // Infer missing parameters for weaker models
2771
- const inference = this.inferMissingParameters(content.name, content.input);
2772
- if (inference.modified) {
2773
- content.input = inference.input;
2774
- this.daemon.logEvent(this.task.id, 'parameter_inference', {
2775
- tool: content.name,
2776
- inference: inference.inference,
2777
- });
2778
- }
2779
3423
  this.daemon.logEvent(this.task.id, 'tool_call', {
2780
3424
  tool: content.name,
2781
3425
  input: content.input,
2782
3426
  });
2783
3427
  try {
2784
3428
  // Execute tool with timeout to prevent hanging
2785
- const result = await withTimeout(this.toolRegistry.executeTool(content.name, content.input), TOOL_TIMEOUT_MS, `Tool ${content.name}`);
3429
+ const toolTimeoutMs = this.getToolTimeoutMs(content.name, content.input);
3430
+ let result = await withTimeout(this.toolRegistry.executeTool(content.name, content.input), toolTimeoutMs, `Tool ${content.name}`);
3431
+ // Fallback: retry grep without glob if the glob produced an invalid regex
3432
+ if (content.name === 'grep' && result && result.success === false && content.input?.glob) {
3433
+ const errorText = String(result.error || '');
3434
+ if (/invalid regex pattern|nothing to repeat/i.test(errorText)) {
3435
+ this.daemon.logEvent(this.task.id, 'tool_fallback', {
3436
+ tool: 'grep',
3437
+ reason: 'invalid_glob_regex',
3438
+ originalGlob: content.input.glob,
3439
+ });
3440
+ const fallbackInput = { ...content.input };
3441
+ delete fallbackInput.glob;
3442
+ try {
3443
+ const fallbackResult = await withTimeout(this.toolRegistry.executeTool('grep', fallbackInput), toolTimeoutMs, 'Tool grep (fallback)');
3444
+ if (fallbackResult && fallbackResult.success !== false) {
3445
+ result = fallbackResult;
3446
+ }
3447
+ }
3448
+ catch {
3449
+ // Keep original error if fallback fails
3450
+ }
3451
+ }
3452
+ }
2786
3453
  // Tool succeeded - reset failure counter
2787
3454
  this.toolFailureTracker.recordSuccess(content.name);
2788
3455
  // Record this call for deduplication
@@ -2791,7 +3458,22 @@ SCHEDULING & REMINDERS:
2791
3458
  // Record file operation for tracking
2792
3459
  this.recordFileOperation(content.name, content.input, result);
2793
3460
  this.recordCommandExecution(content.name, content.input, result);
2794
- this.recordCommandExecution(content.name, content.input, result);
3461
+ const toolSucceeded = !(result && result.success === false);
3462
+ if (toolSucceeded) {
3463
+ hadAnyToolSuccess = true;
3464
+ this.recordToolResult(content.name, result);
3465
+ }
3466
+ if (content.name === 'run_command' && !toolSucceeded) {
3467
+ hadRunCommandFailure = true;
3468
+ }
3469
+ else if (hadRunCommandFailure && toolSucceeded) {
3470
+ hadToolSuccessAfterRunCommandFailure = true;
3471
+ }
3472
+ if (expectsImageVerification && content.name === 'glob' && !foundNewImage) {
3473
+ if (this.hasNewImageFromGlobResult(result, imageVerificationSince)) {
3474
+ foundNewImage = true;
3475
+ }
3476
+ }
2795
3477
  // Check if the result indicates an error (some tools return error in result)
2796
3478
  if (result && result.success === false) {
2797
3479
  const reason = result.error
@@ -2799,6 +3481,7 @@ SCHEDULING & REMINDERS:
2799
3481
  || (typeof result.exitCode === 'number' ? `exit code ${result.exitCode}` : undefined)
2800
3482
  || 'unknown error';
2801
3483
  hadToolError = true;
3484
+ toolErrors.add(content.name);
2802
3485
  lastToolErrorReason = `Tool ${content.name} failed: ${reason}`;
2803
3486
  // Check if this is a non-retryable error
2804
3487
  const shouldDisable = this.toolFailureTracker.recordFailure(content.name, result.error || reason);
@@ -2854,7 +3537,11 @@ SCHEDULING & REMINDERS:
2854
3537
  catch (error) {
2855
3538
  console.error(`Tool execution failed:`, error);
2856
3539
  hadToolError = true;
3540
+ toolErrors.add(content.name);
2857
3541
  lastToolErrorReason = `Tool ${content.name} failed: ${error.message}`;
3542
+ if (content.name === 'run_command') {
3543
+ hadRunCommandFailure = true;
3544
+ }
2858
3545
  // Track the failure
2859
3546
  const shouldDisable = this.toolFailureTracker.recordFailure(content.name, error.message);
2860
3547
  this.daemon.logEvent(this.task.id, 'tool_error', {
@@ -2882,7 +3569,7 @@ SCHEDULING & REMINDERS:
2882
3569
  // If all tool attempts were for disabled or duplicate tools, don't continue looping
2883
3570
  // This prevents infinite retry loops
2884
3571
  const allToolsFailed = toolResults.every(r => r.is_error);
2885
- if ((hasDisabledToolAttempt || hasDuplicateToolAttempt) && allToolsFailed) {
3572
+ if ((hasDisabledToolAttempt || hasDuplicateToolAttempt || hasUnavailableToolAttempt) && allToolsFailed) {
2886
3573
  console.log('[TaskExecutor] All tool calls failed, were disabled, or duplicates - stopping iteration');
2887
3574
  if (hasDuplicateToolAttempt) {
2888
3575
  // Duplicate detection triggered - step is likely complete
@@ -2899,20 +3586,37 @@ SCHEDULING & REMINDERS:
2899
3586
  continueLoop = true;
2900
3587
  }
2901
3588
  }
2902
- // If assistant asked a question and there are no tool calls, stop and wait for user
2903
- if (assistantAskedQuestion && toolResults.length === 0) {
3589
+ // If assistant asked a blocking question, stop and wait for user
3590
+ if (assistantAskedQuestion && this.shouldPauseForQuestions) {
2904
3591
  console.log('[TaskExecutor] Assistant asked a question, pausing for user input');
2905
3592
  awaitingUserInput = true;
2906
3593
  continueLoop = false;
2907
3594
  }
2908
3595
  }
2909
3596
  if (hadToolError && !hadToolSuccessAfterError) {
3597
+ const nonCriticalErrorTools = new Set(['web_search', 'web_fetch']);
3598
+ const onlyNonCriticalErrors = toolErrors.size > 0 && Array.from(toolErrors).every(t => nonCriticalErrorTools.has(t));
3599
+ if (!(hadAnyToolSuccess && onlyNonCriticalErrors)) {
3600
+ stepFailed = true;
3601
+ if (!lastFailureReason) {
3602
+ lastFailureReason = lastToolErrorReason || 'One or more tools failed without recovery.';
3603
+ }
3604
+ }
3605
+ }
3606
+ if (hadRunCommandFailure && !hadToolSuccessAfterRunCommandFailure) {
3607
+ stepFailed = true;
3608
+ if (!lastFailureReason) {
3609
+ lastFailureReason = 'run_command failed and no subsequent tool succeeded.';
3610
+ }
3611
+ }
3612
+ if (expectsImageVerification && !foundNewImage) {
2910
3613
  stepFailed = true;
2911
3614
  if (!lastFailureReason) {
2912
- lastFailureReason = lastToolErrorReason || 'One or more tools failed without recovery.';
3615
+ lastFailureReason = 'Verification failed: no newly generated image was found.';
2913
3616
  }
2914
3617
  }
2915
3618
  // Step completed or failed
3619
+ this.recordAssistantOutput(messages, step);
2916
3620
  // Save conversation history for follow-up messages
2917
3621
  this.conversationHistory = messages;
2918
3622
  if (awaitingUserInput) {
@@ -2993,6 +3697,57 @@ SCHEDULING & REMINDERS:
2993
3697
  });
2994
3698
  }
2995
3699
  }
3700
+ extractHtmlFromText(text) {
3701
+ if (!text)
3702
+ return null;
3703
+ const fenceMatch = text.match(/```html([\s\S]*?)```/i);
3704
+ const raw = fenceMatch ? fenceMatch[1].trim() : text;
3705
+ const doctypeIndex = raw.indexOf('<!DOCTYPE html');
3706
+ if (doctypeIndex >= 0) {
3707
+ const endIndex = raw.lastIndexOf('</html>');
3708
+ if (endIndex > doctypeIndex) {
3709
+ return raw.slice(doctypeIndex, endIndex + '</html>'.length).trim();
3710
+ }
3711
+ }
3712
+ const htmlIndex = raw.indexOf('<html');
3713
+ if (htmlIndex >= 0) {
3714
+ const endIndex = raw.lastIndexOf('</html>');
3715
+ if (endIndex > htmlIndex) {
3716
+ return raw.slice(htmlIndex, endIndex + '</html>'.length).trim();
3717
+ }
3718
+ }
3719
+ return null;
3720
+ }
3721
+ async generateCanvasHtml(prompt) {
3722
+ const system = [
3723
+ 'You generate a single self-contained HTML document for an in-app canvas.',
3724
+ 'Output ONLY the HTML document (no markdown, no commentary).',
3725
+ 'Use inline CSS and JS. Do not reference external assets or remote URLs.',
3726
+ 'Keep it reasonably compact and interactive where appropriate.',
3727
+ ].join(' ');
3728
+ try {
3729
+ const response = await this.provider.createMessage({
3730
+ model: this.modelId,
3731
+ maxTokens: 1800,
3732
+ system,
3733
+ messages: [
3734
+ {
3735
+ role: 'user',
3736
+ content: `Build an interactive HTML demo for this request:\n${prompt}`,
3737
+ },
3738
+ ],
3739
+ });
3740
+ const text = (response.content || [])
3741
+ .filter((c) => c.type === 'text')
3742
+ .map((c) => c.text)
3743
+ .join('\n');
3744
+ return this.extractHtmlFromText(text);
3745
+ }
3746
+ catch (error) {
3747
+ console.error('[TaskExecutor] Failed to auto-generate canvas HTML:', error);
3748
+ return null;
3749
+ }
3750
+ }
2996
3751
  /**
2997
3752
  * Send a follow-up message to continue the conversation
2998
3753
  */
@@ -3001,8 +3756,13 @@ SCHEDULING & REMINDERS:
3001
3756
  const shouldResumeAfterFollowup = previousStatus === 'paused' || this.waitingForUserInput;
3002
3757
  const shouldStartNewCanvasSession = ['completed', 'failed', 'cancelled'].includes(previousStatus);
3003
3758
  let resumeAttempted = false;
3759
+ let pausedForUserInput = false;
3004
3760
  this.waitingForUserInput = false;
3005
3761
  this.paused = false;
3762
+ this.lastUserMessage = message;
3763
+ if (shouldResumeAfterFollowup) {
3764
+ this.task.prompt = `${this.task.prompt}\n\nUSER UPDATE:\n${message}`;
3765
+ }
3006
3766
  this.toolRegistry.setCanvasSessionCutoff(shouldStartNewCanvasSession ? Date.now() : null);
3007
3767
  this.daemon.updateTaskStatus(this.task.id, 'executing');
3008
3768
  this.daemon.logEvent(this.task.id, 'executing', { message: 'Processing follow-up message' });
@@ -3051,6 +3811,11 @@ IMPORTANT INSTRUCTIONS:
3051
3811
  - The delete_file tool has a built-in approval mechanism that will prompt the user. Just call the tool directly.
3052
3812
  - Do NOT ask "Should I proceed?" or wait for permission in text - the tools handle approvals automatically.
3053
3813
 
3814
+ USER INPUT GATE (CRITICAL):
3815
+ - If you ask the user for required information or a decision, STOP and wait.
3816
+ - Do NOT continue executing steps or call tools after asking such questions.
3817
+ - If safe defaults exist, state the assumption and proceed without asking.
3818
+
3054
3819
  PATH DISCOVERY (CRITICAL):
3055
3820
  - When a task mentions a folder or path (e.g., "electron/agent folder"), users often give PARTIAL paths.
3056
3821
  - NEVER conclude a path doesn't exist without SEARCHING for it first.
@@ -3198,12 +3963,14 @@ SCHEDULING & REMINDERS:
3198
3963
  this.checkBudgets();
3199
3964
  // Compact messages if context is getting too large
3200
3965
  messages = this.contextManager.compactMessages(messages, systemPromptTokens);
3966
+ const availableTools = this.getAvailableTools();
3967
+ const availableToolNames = new Set(availableTools.map(tool => tool.name));
3201
3968
  // Use retry wrapper for resilient API calls
3202
3969
  const response = await this.callLLMWithRetry(() => withTimeout(this.provider.createMessage({
3203
3970
  model: this.modelId,
3204
3971
  maxTokens: 4096,
3205
3972
  system: this.systemPrompt,
3206
- tools: this.getAvailableTools(),
3973
+ tools: availableTools,
3207
3974
  messages,
3208
3975
  signal: this.abortController.signal,
3209
3976
  }), LLM_TIMEOUT_MS, 'LLM message processing'), `Message processing (iteration ${iterationCount})`);
@@ -3216,6 +3983,10 @@ SCHEDULING & REMINDERS:
3216
3983
  // Log any text responses from the assistant and check if asking a question
3217
3984
  let assistantAskedQuestion = false;
3218
3985
  let hasTextInThisResponse = false;
3986
+ const assistantText = (response.content || [])
3987
+ .filter((item) => item.type === 'text' && item.text)
3988
+ .map((item) => item.text)
3989
+ .join('\n');
3219
3990
  if (response.content) {
3220
3991
  for (const content of response.content) {
3221
3992
  if (content.type === 'text' && content.text && content.text.trim().length > 0) {
@@ -3262,8 +4033,18 @@ SCHEDULING & REMINDERS:
3262
4033
  const toolResults = [];
3263
4034
  let hasDisabledToolAttempt = false;
3264
4035
  let hasDuplicateToolAttempt = false;
4036
+ let hasUnavailableToolAttempt = false;
3265
4037
  for (const content of response.content || []) {
3266
4038
  if (content.type === 'tool_use') {
4039
+ // Normalize tool names like "functions.web_fetch" -> "web_fetch"
4040
+ const normalizedTool = this.normalizeToolName(content.name);
4041
+ if (normalizedTool.modified) {
4042
+ this.daemon.logEvent(this.task.id, 'parameter_inference', {
4043
+ tool: content.name,
4044
+ inference: `Normalized tool name "${normalizedTool.original}" -> "${normalizedTool.name}"`,
4045
+ });
4046
+ content.name = normalizedTool.name;
4047
+ }
3267
4048
  // Check if this tool is disabled (circuit breaker tripped)
3268
4049
  if (this.toolFailureTracker.isDisabled(content.name)) {
3269
4050
  const lastError = this.toolFailureTracker.getLastError(content.name);
@@ -3285,6 +4066,37 @@ SCHEDULING & REMINDERS:
3285
4066
  hasDisabledToolAttempt = true;
3286
4067
  continue;
3287
4068
  }
4069
+ // Validate tool availability before attempting any inference
4070
+ if (!availableToolNames.has(content.name)) {
4071
+ console.log(`[TaskExecutor] Tool not available in this context: ${content.name}`);
4072
+ this.daemon.logEvent(this.task.id, 'tool_error', {
4073
+ tool: content.name,
4074
+ error: 'Tool not available in current context or permissions',
4075
+ blocked: true,
4076
+ });
4077
+ toolResults.push({
4078
+ type: 'tool_result',
4079
+ tool_use_id: content.id,
4080
+ content: JSON.stringify({
4081
+ error: `Tool "${content.name}" is not available in this context. Please choose a different tool or check permissions/integrations.`,
4082
+ unavailable: true,
4083
+ }),
4084
+ is_error: true,
4085
+ });
4086
+ hasUnavailableToolAttempt = true;
4087
+ continue;
4088
+ }
4089
+ // Infer missing parameters for weaker models (normalize inputs before deduplication)
4090
+ const inference = this.inferMissingParameters(content.name, content.input);
4091
+ if (inference.modified) {
4092
+ content.input = inference.input;
4093
+ this.daemon.logEvent(this.task.id, 'parameter_inference', {
4094
+ tool: content.name,
4095
+ inference: inference.inference,
4096
+ });
4097
+ }
4098
+ // If canvas_push is missing content, try extracting HTML from assistant text or auto-generate
4099
+ await this.handleCanvasPushFallback(content, assistantText);
3288
4100
  // Check for duplicate tool calls (prevents stuck loops)
3289
4101
  const duplicateCheck = this.toolCallDeduplicator.checkDuplicate(content.name, content.input);
3290
4102
  if (duplicateCheck.isDuplicate) {
@@ -3353,22 +4165,14 @@ SCHEDULING & REMINDERS:
3353
4165
  }
3354
4166
  continue;
3355
4167
  }
3356
- // Infer missing parameters for weaker models
3357
- const inference = this.inferMissingParameters(content.name, content.input);
3358
- if (inference.modified) {
3359
- content.input = inference.input;
3360
- this.daemon.logEvent(this.task.id, 'parameter_inference', {
3361
- tool: content.name,
3362
- inference: inference.inference,
3363
- });
3364
- }
3365
4168
  this.daemon.logEvent(this.task.id, 'tool_call', {
3366
4169
  tool: content.name,
3367
4170
  input: content.input,
3368
4171
  });
3369
4172
  try {
3370
4173
  // Execute tool with timeout to prevent hanging
3371
- const result = await withTimeout(this.toolRegistry.executeTool(content.name, content.input), TOOL_TIMEOUT_MS, `Tool ${content.name}`);
4174
+ const toolTimeoutMs = this.getToolTimeoutMs(content.name, content.input);
4175
+ const result = await withTimeout(this.toolRegistry.executeTool(content.name, content.input), toolTimeoutMs, `Tool ${content.name}`);
3372
4176
  // Tool succeeded - reset failure counter
3373
4177
  this.toolFailureTracker.recordSuccess(content.name);
3374
4178
  // Record this call for deduplication
@@ -3430,7 +4234,7 @@ SCHEDULING & REMINDERS:
3430
4234
  });
3431
4235
  // If all tool attempts were for disabled or duplicate tools, don't continue looping
3432
4236
  const allToolsFailed = toolResults.every(r => r.is_error);
3433
- if ((hasDisabledToolAttempt || hasDuplicateToolAttempt) && allToolsFailed) {
4237
+ if ((hasDisabledToolAttempt || hasDuplicateToolAttempt || hasUnavailableToolAttempt) && allToolsFailed) {
3434
4238
  console.log('[TaskExecutor] All tool calls failed, were disabled, or duplicates - stopping iteration');
3435
4239
  continueLoop = false;
3436
4240
  }
@@ -3438,6 +4242,12 @@ SCHEDULING & REMINDERS:
3438
4242
  continueLoop = true;
3439
4243
  }
3440
4244
  }
4245
+ if (assistantAskedQuestion && shouldResumeAfterFollowup && this.shouldPauseForQuestions) {
4246
+ console.log('[TaskExecutor] Assistant asked a question during follow-up, pausing for user input');
4247
+ this.waitingForUserInput = true;
4248
+ pausedForUserInput = true;
4249
+ continueLoop = false;
4250
+ }
3441
4251
  // Check if agent wants to end but hasn't provided a text response yet
3442
4252
  // If tools were called but no summary was given, request one
3443
4253
  if (wantsToEnd && !hasTextInThisResponse && hadToolCalls && !hasProvidedTextResponse) {
@@ -3465,6 +4275,13 @@ SCHEDULING & REMINDERS:
3465
4275
  this.daemon.logEvent(this.task.id, 'follow_up_completed', {
3466
4276
  message: 'Follow-up message processed',
3467
4277
  });
4278
+ if (pausedForUserInput) {
4279
+ this.daemon.updateTaskStatus(this.task.id, 'paused');
4280
+ this.daemon.logEvent(this.task.id, 'task_paused', {
4281
+ message: 'Paused - awaiting user input',
4282
+ });
4283
+ return;
4284
+ }
3468
4285
  if (shouldResumeAfterFollowup && this.plan) {
3469
4286
  resumeAttempted = true;
3470
4287
  await this.resumeAfterPause();