screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,870 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { DEFAULT_RECOVERY_BUDGET } from "../recovery/types.js";
18
+ import { DEFAULT_PLANNER_CONFIG } from "./types.js";
19
+ /**
20
+ * PlanExecutor — runs ActionPlans step by step, verifying postconditions
21
+ * against the world model after each step.
22
+ *
23
+ * On failure, delegates to the Planner for replanning.
24
+ * On LLM steps, pauses and returns control to the client.
25
+ */
26
+ export class PlanExecutor {
27
+ worldModel;
28
+ planner;
29
+ executeTool;
30
+ recovery;
31
+ learningEngine;
32
+ config;
33
+ /** Accumulated execution trace for current goal — reset on each executeGoal() call */
34
+ log = [];
35
+ constructor(worldModel, planner, executeTool, config, recovery, learningEngine) {
36
+ this.worldModel = worldModel;
37
+ this.planner = planner;
38
+ this.executeTool = executeTool;
39
+ this.recovery = recovery;
40
+ this.learningEngine = learningEngine;
41
+ this.config = { ...DEFAULT_PLANNER_CONFIG, ...config };
42
+ }
43
+ dbg(msg) {
44
+ const line = `[${new Date().toISOString().substring(11, 23)}] ${msg}`;
45
+ this.log.push(line);
46
+ process.stderr.write(line + "\n");
47
+ }
48
+ /**
49
+ * Execute a full goal: iterate subgoals, execute plans, replan on failure.
50
+ * Pauses at LLM steps and returns an ExecutionPause for the client to resolve.
51
+ */
52
+ async executeGoal(goal) {
53
+ const start = Date.now();
54
+ let stepsExecuted = 0;
55
+ let replans = 0;
56
+ this.log = []; // reset log for this goal
57
+ // Capture the expected app at goal start for app_switched detection
58
+ const expectedBundleId = this.worldModel.getState().focusedApp?.bundleId ?? null;
59
+ this.dbg(`═══ GOAL START: "${goal.description}" ═══`);
60
+ this.dbg(`Focused app: ${expectedBundleId ?? "none"} | Windows: ${this.worldModel.getState().windows.size} | Controls: ${[...this.worldModel.getState().windows.values()].reduce((n, w) => n + w.controls.size, 0)}`);
61
+ // Recovery budget for the entire goal lifetime
62
+ const recoveryBudget = {
63
+ ...DEFAULT_RECOVERY_BUDGET,
64
+ usedStrategyIds: new Set(),
65
+ };
66
+ // Plan any unplanned subgoals
67
+ await this.planner.planGoal(goal);
68
+ // Resume from pausedAt if set
69
+ const startSubgoalIdx = goal.pausedAt?.subgoalIndex ?? 0;
70
+ delete goal.pausedAt;
71
+ for (let sgIdx = startSubgoalIdx; sgIdx < goal.subgoals.length; sgIdx++) {
72
+ const subgoal = goal.subgoals[sgIdx];
73
+ if (subgoal.status === "completed" || subgoal.status === "skipped")
74
+ continue;
75
+ this.dbg(`── Subgoal ${sgIdx + 1}/${goal.subgoals.length}: "${subgoal.description}"`);
76
+ subgoal.status = "active";
77
+ while (subgoal.status === "active" &&
78
+ subgoal.attempts < subgoal.maxAttempts) {
79
+ if (!subgoal.plan) {
80
+ this.dbg(` ✗ No plan available`);
81
+ subgoal.status = "failed";
82
+ subgoal.lastError = "No plan available";
83
+ break;
84
+ }
85
+ this.dbg(` Plan: ${subgoal.plan.source} | ${subgoal.plan.steps.length} steps | attempt ${subgoal.attempts + 1}/${subgoal.maxAttempts}`);
86
+ const result = await this.executePlan(subgoal.plan, recoveryBudget);
87
+ // Check if we hit an LLM pause
88
+ if ("paused" in result) {
89
+ this.dbg(` ⏸ Paused at step ${result.stepIndex}: ${result.stepDescription}`);
90
+ // Save resume point on the goal
91
+ goal.pausedAt = {
92
+ subgoalIndex: sgIdx,
93
+ stepIndex: result.stepIndex,
94
+ };
95
+ goal.status = "active";
96
+ return {
97
+ ...result,
98
+ subgoalIndex: sgIdx,
99
+ };
100
+ }
101
+ stepsExecuted += result.stepsExecuted;
102
+ if (result.success) {
103
+ this.dbg(` ✓ Subgoal completed`);
104
+ subgoal.status = "completed";
105
+ break;
106
+ }
107
+ this.dbg(` ✗ Plan failed: ${result.error}`);
108
+ // Plan failed — try replanning
109
+ replans++;
110
+ const reason = this.diagnoseFailure(result, expectedBundleId);
111
+ this.dbg(` → Replan #${replans}, reason: ${reason}`);
112
+ const newPlan = await this.planner.replan(subgoal, reason, result.error ?? undefined);
113
+ if (!newPlan) {
114
+ this.dbg(` ✗ No replan available — giving up`);
115
+ break;
116
+ }
117
+ this.dbg(` → New plan: ${newPlan.source} | ${newPlan.steps.length} steps`);
118
+ subgoal.plan = newPlan;
119
+ subgoal.status = "active";
120
+ }
121
+ }
122
+ this.planner.evaluateGoal(goal);
123
+ const finalError = goal.status === "failed"
124
+ ? goal.subgoals.find((sg) => sg.status === "failed")?.lastError ?? "Unknown error"
125
+ : null;
126
+ this.dbg(`═══ GOAL ${goal.status.toUpperCase()} in ${Date.now() - start}ms | steps=${stepsExecuted} replans=${replans}${finalError ? ` error="${finalError}"` : ""} ═══`);
127
+ return {
128
+ goalId: goal.id,
129
+ success: goal.status === "completed",
130
+ subgoalsCompleted: goal.subgoals.filter((sg) => sg.status === "completed").length,
131
+ totalSubgoals: goal.subgoals.length,
132
+ stepsExecuted,
133
+ replans,
134
+ durationMs: Date.now() - start,
135
+ error: finalError,
136
+ executionLog: [...this.log],
137
+ };
138
+ }
139
+ /**
140
+ * Execute the next single step of a goal. Returns the step result,
141
+ * or an ExecutionPause if the next step requires LLM interpretation.
142
+ */
143
+ async executeNextStep(goal) {
144
+ // Find the current active subgoal and step
145
+ for (let sgIdx = 0; sgIdx < goal.subgoals.length; sgIdx++) {
146
+ const subgoal = goal.subgoals[sgIdx];
147
+ if (subgoal.status === "completed" || subgoal.status === "skipped" || subgoal.status === "failed")
148
+ continue;
149
+ if (!subgoal.plan) {
150
+ subgoal.plan = await this.planner.planSubgoal(subgoal);
151
+ }
152
+ subgoal.status = "active";
153
+ const plan = subgoal.plan;
154
+ if (plan.currentStepIndex >= plan.steps.length) {
155
+ subgoal.status = "completed";
156
+ continue;
157
+ }
158
+ const step = plan.steps[plan.currentStepIndex];
159
+ // If step requires LLM and has no tool assigned, pause
160
+ if (step.requiresLLM && !step.tool) {
161
+ return {
162
+ paused: true,
163
+ reason: "requires_llm",
164
+ stepIndex: plan.currentStepIndex,
165
+ stepDescription: step.description,
166
+ subgoalIndex: sgIdx,
167
+ completedSteps: plan.currentStepIndex,
168
+ totalSteps: plan.steps.length,
169
+ };
170
+ }
171
+ const nextStep = findNextMeaningfulStep(plan.steps, plan.currentStepIndex);
172
+ const result = await this.executeStepInternal(step, nextStep);
173
+ if (result.success) {
174
+ step.status = "completed";
175
+ step.resolvedBy = "auto";
176
+ plan.currentStepIndex++;
177
+ // Check if subgoal is complete
178
+ if (plan.currentStepIndex >= plan.steps.length) {
179
+ subgoal.status = "completed";
180
+ this.planner.evaluateGoal(goal);
181
+ }
182
+ }
183
+ else {
184
+ step.status = "failed";
185
+ }
186
+ return result;
187
+ }
188
+ // All subgoals done
189
+ this.planner.evaluateGoal(goal);
190
+ return {
191
+ goalId: goal.id,
192
+ success: goal.status === "completed",
193
+ subgoalsCompleted: goal.subgoals.filter((sg) => sg.status === "completed").length,
194
+ totalSubgoals: goal.subgoals.length,
195
+ stepsExecuted: 0,
196
+ replans: 0,
197
+ durationMs: 0,
198
+ error: goal.status === "failed"
199
+ ? goal.subgoals.find((sg) => sg.status === "failed")?.lastError ?? "Unknown error"
200
+ : null,
201
+ executionLog: [...this.log],
202
+ };
203
+ }
204
+ /**
205
+ * Resolve an LLM step: the client provides the tool + params to use.
206
+ * Executes the tool, advances the plan, and returns the result.
207
+ */
208
+ async resolveStep(goal, tool, params) {
209
+ // Find the paused step
210
+ const sgIdx = goal.pausedAt?.subgoalIndex ?? 0;
211
+ const stepIdx = goal.pausedAt?.stepIndex ?? 0;
212
+ const subgoal = goal.subgoals[sgIdx];
213
+ if (!subgoal?.plan) {
214
+ return {
215
+ step: { tool: "", params: {}, expectedPostcondition: null, timeout: 0, fallbackTool: null, requiresLLM: true, status: "failed", description: "No plan" },
216
+ success: false,
217
+ durationMs: 0,
218
+ postconditionMet: false,
219
+ error: "No active plan to resolve",
220
+ usedFallback: false,
221
+ };
222
+ }
223
+ const plan = subgoal.plan;
224
+ const step = plan.steps[stepIdx];
225
+ if (!step) {
226
+ return {
227
+ step: { tool: "", params: {}, expectedPostcondition: null, timeout: 0, fallbackTool: null, requiresLLM: true, status: "failed", description: "No step" },
228
+ success: false,
229
+ durationMs: 0,
230
+ postconditionMet: false,
231
+ error: "Step not found at pause index",
232
+ usedFallback: false,
233
+ };
234
+ }
235
+ // Resolve the LLM step with client-provided tool+params
236
+ step.tool = tool;
237
+ step.params = params;
238
+ step.resolvedBy = "client";
239
+ const result = await this.executeStepInternal(step);
240
+ if (result.success) {
241
+ step.status = "completed";
242
+ plan.currentStepIndex = stepIdx + 1;
243
+ delete goal.pausedAt;
244
+ if (plan.currentStepIndex >= plan.steps.length) {
245
+ subgoal.status = "completed";
246
+ this.planner.evaluateGoal(goal);
247
+ }
248
+ }
249
+ else {
250
+ step.status = "failed";
251
+ }
252
+ return result;
253
+ }
254
+ /**
255
+ * Execute a single ActionPlan's steps sequentially.
256
+ * Pauses at LLM steps instead of failing.
257
+ */
258
+ async executePlan(plan, recoveryBudget) {
259
+ const stepResults = [];
260
+ for (let i = plan.currentStepIndex; i < plan.steps.length; i++) {
261
+ const step = plan.steps[i];
262
+ plan.currentStepIndex = i;
263
+ // Pause at LLM-required steps for client resolution
264
+ if (step.requiresLLM && !step.tool) {
265
+ return {
266
+ paused: true,
267
+ reason: "requires_llm",
268
+ stepIndex: i,
269
+ stepDescription: step.description,
270
+ subgoalIndex: 0,
271
+ completedSteps: stepResults.length,
272
+ totalSteps: plan.steps.length,
273
+ };
274
+ }
275
+ // Find the next meaningful step (skip screenshots/OCR — they have no target)
276
+ const nextStep = findNextMeaningfulStep(plan.steps, i);
277
+ const result = await this.executeStepInternal(step, nextStep);
278
+ stepResults.push(result);
279
+ if (!result.success) {
280
+ step.status = "failed";
281
+ // Attempt recovery before reporting failure
282
+ if (this.recovery && recoveryBudget) {
283
+ const expectedBundleId = this.worldModel.getState().focusedApp?.bundleId ?? null;
284
+ const recoveryOutcome = await this.recovery.attemptRecovery(result.error ?? "unknown failure", expectedBundleId, recoveryBudget);
285
+ if (recoveryOutcome.recovered) {
286
+ // Retry the failed step once after recovery
287
+ step.status = "pending";
288
+ const retryResult = await this.executeStepInternal(step);
289
+ stepResults.push(retryResult);
290
+ if (retryResult.success) {
291
+ step.status = "completed";
292
+ continue;
293
+ }
294
+ step.status = "failed";
295
+ }
296
+ }
297
+ return {
298
+ success: false,
299
+ stepsExecuted: stepResults.length,
300
+ error: result.error,
301
+ stepResults,
302
+ };
303
+ }
304
+ step.status = "completed";
305
+ }
306
+ return {
307
+ success: true,
308
+ stepsExecuted: stepResults.length,
309
+ error: null,
310
+ stepResults,
311
+ };
312
+ }
313
+ /**
314
+ * Execute a single PlanStep and verify its postcondition.
315
+ */
316
+ /**
317
+ * Execute a single PlanStep with world-model-aware LOOK → ACT → VERIFY loop.
318
+ * Uses the world model (0ms reads) for awareness — NOT screenshots.
319
+ */
320
+ async executeStepInternal(step, nextStep) {
321
+ const start = Date.now();
322
+ step.status = "executing";
323
+ let usedFallback = false;
324
+ // ── LOOK: Pre-step awareness from world model (0ms, consistent snapshot) ──
325
+ const preState = this.worldModel.getConsistentSnapshot();
326
+ const preControls = [...preState.windows.values()].reduce((n, w) => n + w.controls.size, 0);
327
+ const paramStr = Object.entries(step.params)
328
+ .filter(([k]) => !k.startsWith("_"))
329
+ .map(([k, v]) => `${k}=${JSON.stringify(v)}`)
330
+ .join(", ");
331
+ this.dbg(` ▶ ${step.tool}(${paramStr})`);
332
+ this.dbg(` PRE | app=${preState.focusedApp?.bundleId ?? "none"} | win=${preState.focusedWindowId ?? "none"} | controls=${preControls} | dialogs=${preState.activeDialogs.length}`);
333
+ if (nextStep) {
334
+ const npStr = Object.entries(nextStep.params).filter(([k]) => !k.startsWith("_")).map(([k, v]) => `${k}=${JSON.stringify(v)}`).join(", ");
335
+ this.dbg(` NEXT | ${nextStep.tool}(${npStr})`);
336
+ }
337
+ // 1. Dialog check: if a dialog is blocking, fail fast so recovery can handle it
338
+ if (preState.activeDialogs.length > 0) {
339
+ const dialog = preState.activeDialogs[0];
340
+ const err = `Dialog blocking: ${dialog.type} "${dialog.title ?? dialog.message ?? "unknown"}" [buttons: ${dialog.buttons.join(", ")}]`;
341
+ this.dbg(` BLOCK| ${err}`);
342
+ return {
343
+ step,
344
+ success: false,
345
+ durationMs: Date.now() - start,
346
+ postconditionMet: false,
347
+ error: err,
348
+ usedFallback: false,
349
+ };
350
+ }
351
+ // 2. Target validation: for ANY tool that targets a UI element, verify it exists
352
+ if (INTERACTION_TOOLS.has(step.tool)) {
353
+ const target = (step.params.title ?? step.params.text ?? step.params.name ?? step.params.selector);
354
+ if (typeof target === "string") {
355
+ const focusedWinId = preState.focusedWindowId;
356
+ if (focusedWinId !== null) {
357
+ const win = preState.windows.get(focusedWinId);
358
+ if (win && win.controls.size > 5) {
359
+ const targetLower = target.toLowerCase();
360
+ const found = [...win.controls.values()].some((c) => c.label.value?.toLowerCase().includes(targetLower));
361
+ if (!found) {
362
+ const available = [...win.controls.values()]
363
+ .slice(0, 15)
364
+ .map((c) => `${c.role}:"${c.label.value ?? ""}"`)
365
+ .filter((s) => s.length > 3)
366
+ .join(", ");
367
+ const err = `Pre-check: "${target}" not found in world model (${win.controls.size} controls tracked). Available: ${available}`;
368
+ this.dbg(` MISS | ${err}`);
369
+ return {
370
+ step,
371
+ success: false,
372
+ durationMs: Date.now() - start,
373
+ postconditionMet: false,
374
+ error: err,
375
+ usedFallback: false,
376
+ };
377
+ }
378
+ this.dbg(` FOUND| "${target}" in world model ✓`);
379
+ }
380
+ }
381
+ }
382
+ }
383
+ // 3. Focus validation: for type_text, verify a text field is focused
384
+ if (step.tool === "type_text") {
385
+ const focusedWinId = preState.focusedWindowId;
386
+ if (focusedWinId !== null) {
387
+ const win = preState.windows.get(focusedWinId);
388
+ if (win?.focusedElement) {
389
+ const role = win.focusedElement.role.toLowerCase();
390
+ const isTextInput = role.includes("text") || role.includes("search") ||
391
+ role.includes("combobox") || role.includes("field") || role.includes("area");
392
+ if (!isTextInput && win.controls.size > 5) {
393
+ // Focused element isn't a text field — warn but don't block
394
+ // (some apps use non-standard roles)
395
+ step.description = `${step.description} [⚠ focused: ${win.focusedElement.role}:"${win.focusedElement.label.value ?? ""}"]`;
396
+ }
397
+ }
398
+ }
399
+ }
400
+ // ── ACT: Execute the tool ──
401
+ // Auto-upgrade click_text → ui_press when world model already has the target via AX.
402
+ // click_text uses cg.captureWindow (crashes on GPU-heavy pages) + OCR (slow, sometimes wrong tab).
403
+ // ui_press uses AX directly — 10x faster, no screenshots, no crash risk.
404
+ if (step.tool === "click_text" && preState.focusedApp) {
405
+ const clickTarget = step.params.text;
406
+ if (clickTarget) {
407
+ for (const win of preState.windows.values()) {
408
+ const match = [...win.controls.values()].find((c) => c.label.value?.toLowerCase().includes(clickTarget.toLowerCase()));
409
+ if (match) {
410
+ // Get real pid: window.pid (from AX scan) > focusedApp.pid (often 0 from feedWorldModel)
411
+ const pid = win.pid || preState.focusedApp.pid;
412
+ if (pid) {
413
+ this.dbg(` SWAP | click_text → ui_press (found "${match.label.value}" as ${match.role} in AX, pid=${pid})`);
414
+ step.tool = "ui_press";
415
+ step.params = { pid, title: clickTarget };
416
+ }
417
+ else {
418
+ this.dbg(` SWAP | pid=0, cannot use ui_press — failing to avoid bridge crash`);
419
+ return {
420
+ step, success: false, durationMs: Date.now() - start, postconditionMet: false,
421
+ error: `Cannot click "${clickTarget}": element found in AX but pid unknown. Use focus first.`,
422
+ usedFallback: false,
423
+ };
424
+ }
425
+ break;
426
+ }
427
+ }
428
+ }
429
+ }
430
+ // Skip screenshot steps for browser apps when world model is populated.
431
+ // CGWindowListCreateImage crashes on GPU-heavy pages (WebGL/canvas).
432
+ // The world model already has full UI visibility — screenshot adds nothing.
433
+ if (SCREENSHOT_TOOLS.has(step.tool) && preState.focusedApp) {
434
+ const appDomain = preState.appDomains?.get(preState.focusedApp.bundleId);
435
+ const family = appDomain?.family;
436
+ const hasControls = preState.windows.size > 0 &&
437
+ [...preState.windows.values()].some((w) => w.controls.size > 10);
438
+ if (family === "browser" && hasControls) {
439
+ this.dbg(` SKIP | ${step.tool} — browser+world model active (${preControls} controls)`);
440
+ step.description = `${step.description} [skipped — browser+world model active]`;
441
+ return {
442
+ step,
443
+ success: true,
444
+ durationMs: Date.now() - start,
445
+ postconditionMet: true,
446
+ error: null,
447
+ usedFallback: false,
448
+ };
449
+ }
450
+ }
451
+ const params = { ...step.params };
452
+ // Auto-inject windowId for click_text/screenshot_file/ocr when not provided by plan.
453
+ // These tools require windowId but strategies often omit it — use focused window.
454
+ if (WINDOW_ID_TOOLS.has(step.tool) && !params.windowId) {
455
+ const winId = preState.focusedWindowId ?? [...preState.windows.keys()][0];
456
+ if (winId != null) {
457
+ params.windowId = winId;
458
+ this.dbg(` INJ | windowId=${winId} injected for ${step.tool}`);
459
+ }
460
+ }
461
+ if (this.learningEngine && !params._budget) {
462
+ const bundleId = preState.focusedApp?.bundleId;
463
+ if (bundleId) {
464
+ params._budget = this.learningEngine.getAdaptiveBudget(bundleId);
465
+ }
466
+ }
467
+ const stepTimeout = Math.max(step.timeout || 0, this.config.defaultStepTimeout);
468
+ this.dbg(` ACT | calling ${step.tool} (timeout=${stepTimeout}ms)`);
469
+ let result = await this.tryToolWithTimeout(step.tool, params, stepTimeout);
470
+ this.dbg(` ACT | ok=${result.ok}${result.ok ? "" : ` error="${result.error}"`}`);
471
+ if (!result.ok && step.fallbackTool) {
472
+ this.dbg(` ACT | trying fallback: ${step.fallbackTool}`);
473
+ result = await this.tryToolWithTimeout(step.fallbackTool, params, stepTimeout);
474
+ this.dbg(` ACT | fallback ok=${result.ok}${result.ok ? "" : ` error="${result.error}"`}`);
475
+ usedFallback = true;
476
+ }
477
+ if (!result.ok) {
478
+ const durationMs = Date.now() - start;
479
+ this.dbg(` FAIL | ${step.tool} failed in ${durationMs}ms: ${result.error}`);
480
+ this.recordLearningOutcomes(usedFallback ? (step.fallbackTool ?? step.tool) : step.tool, params, false, durationMs);
481
+ return {
482
+ step,
483
+ success: false,
484
+ durationMs,
485
+ postconditionMet: false,
486
+ error: result.error ?? "Tool execution failed",
487
+ usedFallback,
488
+ };
489
+ }
490
+ this.feedWorldModel(usedFallback ? step.fallbackTool : step.tool, params, result);
491
+ this.recordLearningOutcomes(usedFallback ? step.fallbackTool : step.tool, params, true, Date.now() - start);
492
+ // ── VERIFY: Post-step awareness from world model ──
493
+ // For state-changing tools, wait briefly for perception to update the world model
494
+ if (STATE_CHANGING_TOOLS.has(step.tool)) {
495
+ await sleep(150); // AX refreshes in ~50ms, 150ms gives margin
496
+ }
497
+ const postState = this.worldModel.getConsistentSnapshot();
498
+ const postControls = [...postState.windows.values()].reduce((n, w) => n + w.controls.size, 0);
499
+ this.dbg(` POST | app=${postState.focusedApp?.bundleId ?? "none"} | win=${postState.focusedWindowId ?? "none"} | controls=${postControls} | dialogs=${postState.activeDialogs.length}`);
500
+ // 1. Check if a dialog appeared after the action
501
+ if (postState.activeDialogs.length > 0 && preState.activeDialogs.length === 0) {
502
+ const dialog = postState.activeDialogs[0];
503
+ const err = `Dialog appeared after ${step.tool}: ${dialog.type} "${dialog.title ?? dialog.message ?? "unknown"}" [buttons: ${dialog.buttons.join(", ")}]`;
504
+ this.dbg(` FAIL | ${err}`);
505
+ return {
506
+ step,
507
+ success: false,
508
+ durationMs: Date.now() - start,
509
+ postconditionMet: false,
510
+ error: err,
511
+ usedFallback,
512
+ };
513
+ }
514
+ // 2. Check if focus was lost (app switched unexpectedly)
515
+ if (preState.focusedApp?.bundleId && postState.focusedApp?.bundleId &&
516
+ preState.focusedApp.bundleId !== postState.focusedApp.bundleId &&
517
+ !FOCUS_TOOLS.has(step.tool)) {
518
+ const err = `Focus lost: was ${preState.focusedApp.bundleId}, now ${postState.focusedApp.bundleId}`;
519
+ this.dbg(` FAIL | ${err}`);
520
+ return {
521
+ step,
522
+ success: false,
523
+ durationMs: Date.now() - start,
524
+ postconditionMet: false,
525
+ error: err,
526
+ usedFallback,
527
+ };
528
+ }
529
+ // 3. For navigation tools, wait for world model to reflect new page
530
+ const isNavigation = NAVIGATION_TOOLS.has(step.tool) ||
531
+ (step.tool === "key" && isEnterKey(step.params));
532
+ if (isNavigation) {
533
+ const nextTarget = nextStep
534
+ ? (nextStep.params.text ?? nextStep.params.title ?? nextStep.params.name)
535
+ : undefined;
536
+ const maxWait = nextTarget ? 8000 : 3000;
537
+ const pollMs = 200;
538
+ this.dbg(` NAV | waiting for ${nextTarget ? `"${nextTarget}"` : "any state change"} (max ${maxWait}ms)`);
539
+ const waited = await this.waitForWorldModelChange(preState.focusedWindowId, preState, maxWait, pollMs, nextTarget);
540
+ this.dbg(` NAV | waited=${waited}`);
541
+ if (!waited && nextTarget) {
542
+ const err = `Navigation failed: "${nextTarget}" not found after ${maxWait}ms. Page may not have loaded or URL was wrong.`;
543
+ this.dbg(` FAIL | ${err}`);
544
+ return {
545
+ step,
546
+ success: false,
547
+ durationMs: Date.now() - start,
548
+ postconditionMet: false,
549
+ error: err,
550
+ usedFallback,
551
+ };
552
+ }
553
+ if (!waited) {
554
+ step.description = `${step.description} [⚠ no state change detected after ${maxWait}ms]`;
555
+ }
556
+ }
557
+ // 4. Verify postcondition
558
+ let postconditionMet = true;
559
+ let postconditionActual = null;
560
+ let postconditionSoft = false;
561
+ const assertion = step.expectedPostcondition ?? this.inferPostcondition(step, nextStep);
562
+ if (!step.expectedPostcondition && assertion) {
563
+ postconditionSoft = true;
564
+ }
565
+ if (assertion) {
566
+ this.dbg(` PC | checking ${assertion.type}="${assertion.target}" (${postconditionSoft ? "soft" : "hard"})`);
567
+ await sleep(Math.min(this.config.postconditionWaitMs, 500));
568
+ if (this.worldModel.getState().windows.size > 0) {
569
+ const pcResult = this.worldModel.assertStateDetailed(assertion);
570
+ postconditionMet = pcResult.matched;
571
+ postconditionActual = pcResult.actual;
572
+ this.dbg(` PC | matched=${postconditionMet} actual="${postconditionActual ?? "nothing"}"`);
573
+ }
574
+ else {
575
+ this.dbg(` PC | skipped — no windows in world model`);
576
+ }
577
+ }
578
+ if (!postconditionMet && postconditionSoft) {
579
+ this.dbg(` WARN | soft postcondition miss: "${assertion.target}" not visible (continuing)`);
580
+ step.description = `${step.description} [⚠ soft postcondition: "${assertion.target}" not yet visible]`;
581
+ return {
582
+ step,
583
+ success: true,
584
+ durationMs: Date.now() - start,
585
+ postconditionMet: false,
586
+ error: null,
587
+ usedFallback,
588
+ };
589
+ }
590
+ const finalDuration = Date.now() - start;
591
+ if (postconditionMet) {
592
+ this.dbg(` ✓ OK in ${finalDuration}ms`);
593
+ }
594
+ else {
595
+ this.dbg(` FAIL | postcondition not met: expected ${assertion?.type}="${assertion?.target}", got "${postconditionActual ?? "nothing"}"`);
596
+ }
597
+ return {
598
+ step,
599
+ success: postconditionMet,
600
+ durationMs: finalDuration,
601
+ postconditionMet,
602
+ error: postconditionMet ? null : `Postcondition not met: expected ${assertion?.type}="${assertion?.target}", got ${postconditionActual ?? "nothing"}`,
603
+ usedFallback,
604
+ };
605
+ }
606
+ /**
607
+ * Wait for the world model to reflect a state change (e.g. after navigation).
608
+ * Polls the world model (0ms reads) — NOT screenshots.
609
+ * Returns true if a change was detected, false if timed out.
610
+ */
611
+ async waitForWorldModelChange(windowId, preState, maxWaitMs, pollMs, waitForTarget) {
612
+ if (windowId === null)
613
+ return true;
614
+ const preWin = preState.windows.get(windowId);
615
+ const preTitle = preWin?.title.value ?? "";
616
+ const preControlCount = preWin?.controls.size ?? 0;
617
+ const targetLower = waitForTarget?.toLowerCase();
618
+ const deadline = Date.now() + maxWaitMs;
619
+ let genericChangeDetected = false;
620
+ while (Date.now() < deadline) {
621
+ await sleep(pollMs);
622
+ const current = this.worldModel.getState();
623
+ const curWin = current.windows.get(windowId);
624
+ if (!curWin)
625
+ continue;
626
+ // If waiting for specific content, check ALL windows (page may load in different window)
627
+ if (targetLower) {
628
+ for (const [, win] of current.windows) {
629
+ const found = [...win.controls.values()].some((c) => c.label.value?.toLowerCase().includes(targetLower));
630
+ if (found)
631
+ return true; // Target content appeared — page is ready
632
+ }
633
+ }
634
+ // Generic change detection (title, control count, dialogs)
635
+ if (!genericChangeDetected) {
636
+ if (curWin.title.value !== preTitle && curWin.title.value)
637
+ genericChangeDetected = true;
638
+ const countDelta = Math.abs(curWin.controls.size - preControlCount);
639
+ if (countDelta > 10)
640
+ genericChangeDetected = true;
641
+ if (current.activeDialogs.length > preState.activeDialogs.length)
642
+ return true;
643
+ }
644
+ // If no specific target requested, return on generic change
645
+ if (!targetLower && genericChangeDetected)
646
+ return true;
647
+ }
648
+ // If we detected a generic change but target never appeared, still return true
649
+ return genericChangeDetected;
650
+ }
651
+ /**
652
+ * Auto-infer a soft postcondition from the current and next step.
653
+ * Returns null if no useful postcondition can be inferred.
654
+ *
655
+ * Inference rules:
656
+ * - Navigation → next step's target text should become visible
657
+ * - focus/launch → target app should be focused
658
+ * - click/press on element → if next step targets different text, that text should appear
659
+ */
660
+ inferPostcondition(step, nextStep) {
661
+ // focus/launch → app should be focused
662
+ if (step.tool === "focus" || step.tool === "launch") {
663
+ const bundleId = (step.params.bundleId ?? step.params.appName);
664
+ if (bundleId) {
665
+ return { type: "app_focused", target: bundleId };
666
+ }
667
+ }
668
+ // Navigation or state-changing click → next step's target should be visible
669
+ if (nextStep && STATE_CHANGING_TOOLS.has(step.tool)) {
670
+ const nextTarget = (nextStep.params.text ?? nextStep.params.title ?? nextStep.params.name);
671
+ if (nextTarget && typeof nextTarget === "string" && nextTarget.length >= 3) {
672
+ // Don't infer if current step targets the same text (redundant)
673
+ const currentTarget = (step.params.text ?? step.params.title ?? step.params.name);
674
+ if (!currentTarget || currentTarget.toLowerCase() !== nextTarget.toLowerCase()) {
675
+ return { type: "text_visible", target: nextTarget };
676
+ }
677
+ }
678
+ }
679
+ return null;
680
+ }
681
+ /**
682
+ * Feed tool execution results into the world model to keep state fresh
683
+ * between perception cycles. Best-effort — parse failures are silently ignored.
684
+ */
685
+ feedWorldModel(tool, params, result) {
686
+ if (!result.ok || !result.result)
687
+ return;
688
+ try {
689
+ if (FOCUS_TOOLS.has(tool)) {
690
+ const bundleId = params.bundleId ?? params.appName;
691
+ if (bundleId) {
692
+ this.worldModel.updateFocusedApp({
693
+ bundleId,
694
+ appName: params.appName ?? bundleId,
695
+ pid: 0,
696
+ windowTitle: "",
697
+ });
698
+ }
699
+ }
700
+ else if (BROWSER_TOOLS.has(tool)) {
701
+ // Extract URL and title from result for CDP snapshot
702
+ let parsed = null;
703
+ try {
704
+ parsed = JSON.parse(result.result);
705
+ }
706
+ catch { /* not JSON */ }
707
+ const url = parsed?.url ?? params.url ?? "";
708
+ const title = parsed?.title ?? "";
709
+ const bundleId = this.worldModel.getState().focusedApp?.bundleId;
710
+ if (bundleId && url) {
711
+ this.worldModel.ingestCDPSnapshot(bundleId, url, title);
712
+ }
713
+ }
714
+ else if (tool === "ocr") {
715
+ // OCR results may contain text regions
716
+ let parsed = null;
717
+ try {
718
+ parsed = JSON.parse(result.result);
719
+ }
720
+ catch { /* not JSON */ }
721
+ if (parsed?.regions && Array.isArray(parsed.regions)) {
722
+ const windowId = params.windowId ??
723
+ this.worldModel.getState().focusedWindowId ?? 0;
724
+ const regions = parsed.regions;
725
+ if (regions.length > 0 && windowId) {
726
+ this.worldModel.ingestOCRRegions(windowId, regions);
727
+ }
728
+ }
729
+ }
730
+ }
731
+ catch {
732
+ // Best-effort: don't let world model feeding break execution
733
+ }
734
+ }
735
+ /**
736
+ * Record tool timing and locator outcomes to the learning engine.
737
+ * Best-effort — errors are silently ignored.
738
+ */
739
+ recordLearningOutcomes(tool, params, success, durationMs) {
740
+ if (!this.learningEngine)
741
+ return;
742
+ try {
743
+ const bundleId = this.worldModel.getState().focusedApp?.bundleId;
744
+ if (!bundleId)
745
+ return;
746
+ // Record tool timing for adaptive budget learning
747
+ this.learningEngine.recordToolTiming({
748
+ tool,
749
+ bundleId,
750
+ durationMs,
751
+ success,
752
+ });
753
+ // Record locator outcome when a target/selector was used
754
+ const target = (params.target ?? params.selector);
755
+ if (target && LOCATOR_TOOLS.has(tool)) {
756
+ const method = tool.startsWith("browser_") ? "cdp" :
757
+ tool === "ocr" ? "ocr" : "ax";
758
+ this.learningEngine.recordLocatorOutcome({
759
+ bundleId,
760
+ actionKey: tool,
761
+ locator: target,
762
+ method,
763
+ success,
764
+ });
765
+ }
766
+ }
767
+ catch {
768
+ // Best-effort
769
+ }
770
+ }
771
+ async tryToolWithTimeout(tool, params, timeoutMs) {
772
+ return Promise.race([
773
+ this.tryTool(tool, params),
774
+ new Promise((resolve) => setTimeout(() => resolve({ ok: false, error: `Step timeout after ${timeoutMs}ms` }), timeoutMs)),
775
+ ]);
776
+ }
777
+ async tryTool(tool, params) {
778
+ try {
779
+ return await this.executeTool(tool, params);
780
+ }
781
+ catch (err) {
782
+ return {
783
+ ok: false,
784
+ error: err instanceof Error ? err.message : String(err),
785
+ };
786
+ }
787
+ }
788
+ diagnoseFailure(planResult, expectedBundleId) {
789
+ // Check if the focused app changed (app_switched)
790
+ if (expectedBundleId) {
791
+ const currentBundleId = this.worldModel.getState().focusedApp?.bundleId;
792
+ if (currentBundleId && currentBundleId !== expectedBundleId) {
793
+ return "app_switched";
794
+ }
795
+ }
796
+ const lastFailed = [...planResult.stepResults].reverse().find((r) => !r.success);
797
+ if (!lastFailed)
798
+ return "postcondition_mismatch";
799
+ const error = lastFailed.error ?? "";
800
+ if (error.includes("dialog") || error.includes("Dialog"))
801
+ return "unexpected_dialog";
802
+ if (error.includes("not found") || error.includes("LOCATE_FAILED"))
803
+ return "element_not_found";
804
+ if (error.includes("timeout") || error.includes("TIMEOUT"))
805
+ return "timeout";
806
+ if (error.includes("Postcondition"))
807
+ return "postcondition_mismatch";
808
+ return "postcondition_mismatch";
809
+ }
810
+ }
811
+ /**
812
+ * Tool categories for world-model-aware execution.
813
+ */
814
+ /** Tools that change which app is focused */
815
+ const FOCUS_TOOLS = new Set(["focus", "launch"]);
816
+ /** Tools that cause page/state transitions (need settle time) */
817
+ const STATE_CHANGING_TOOLS = new Set([
818
+ "focus", "launch", "key", "click", "click_text", "click_with_fallback",
819
+ "ui_press", "browser_click", "browser_navigate", "browser_open",
820
+ "menu_click",
821
+ ]);
822
+ /** Tools that navigate (URL change, page load — need title/control change verification) */
823
+ const NAVIGATION_TOOLS = new Set([
824
+ "browser_navigate", "browser_open",
825
+ // Note: key("enter") after typing a URL is also navigation, but we detect
826
+ // that via the step sequence in executePlan, not here.
827
+ ]);
828
+ /** Tools that interact with specific UI elements (need target validation) */
829
+ const INTERACTION_TOOLS = new Set([
830
+ "click", "click_text", "click_with_fallback",
831
+ "ui_press", "ui_set_value", "ui_find",
832
+ "browser_click", "browser_type",
833
+ "type_with_fallback", "select_with_fallback",
834
+ "read_with_fallback", "locate_with_fallback",
835
+ ]);
836
+ /** Tools that call CGWindowListCreateImage — crash on GPU-heavy browser windows */
837
+ const SCREENSHOT_TOOLS = new Set(["screenshot", "screenshot_file", "ocr"]);
838
+ /** Tools that require a windowId param — auto-injected from world model if missing */
839
+ const WINDOW_ID_TOOLS = new Set(["click_text", "screenshot_file", "ocr", "observer_ocr_roi"]);
840
+ /** Feed world model from tool results */
841
+ const BROWSER_TOOLS = new Set(["browser_navigate", "browser_open", "browser_dom", "browser_page_info"]);
842
+ const LOCATOR_TOOLS = new Set([
843
+ "click", "click_text", "click_with_fallback",
844
+ "type_text", "type_with_fallback",
845
+ "ui_press", "ui_set_value", "ui_find",
846
+ "browser_click", "browser_type",
847
+ "select_with_fallback", "read_with_fallback", "locate_with_fallback",
848
+ ]);
849
+ /**
850
+ * Find the next step that has a meaningful target (text, title, name).
851
+ * Skips screenshot/ocr steps which have no target params.
852
+ * Used to extract content-based readiness targets for navigation waits.
853
+ */
854
+ function findNextMeaningfulStep(steps, currentIndex) {
855
+ for (let j = currentIndex + 1; j < steps.length; j++) {
856
+ const s = steps[j];
857
+ if (SCREENSHOT_TOOLS.has(s.tool))
858
+ continue; // skip screenshot/ocr
859
+ return s;
860
+ }
861
+ return null;
862
+ }
863
+ /** Check if a key step is pressing Enter/Return (likely navigation after URL typing) */
864
+ function isEnterKey(params) {
865
+ const key = (params.key ?? params.combo ?? "").toLowerCase();
866
+ return key === "enter" || key === "return" || key.endsWith("+enter") || key.endsWith("+return");
867
+ }
868
+ function sleep(ms) {
869
+ return new Promise((resolve) => setTimeout(resolve, ms));
870
+ }