screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,1456 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import crypto from "node:crypto";
18
+ import fs from "node:fs";
19
+ import path from "node:path";
20
+ import { EntityTracker } from "./entity-tracker.js";
21
+ import { loadWorldState, saveWorldState, DebouncedPersister } from "./persistence.js";
22
+ function validateSchemaType(value, type) {
23
+ switch (type) {
24
+ case "string": return typeof value === "string";
25
+ case "number": return typeof value === "number";
26
+ case "boolean": return typeof value === "boolean";
27
+ case "array": return Array.isArray(value);
28
+ case "object": return typeof value === "object" && value !== null && !Array.isArray(value);
29
+ default: return true;
30
+ }
31
+ }
32
+ const DEFAULT_CONFIG = {
33
+ confidenceDecayRate: 0.05,
34
+ staleThresholdMs: 5 * 60 * 1000,
35
+ maxControlsPerWindow: 500,
36
+ persistDebounceMs: 500,
37
+ };
38
+ const DIALOG_ROLES = new Set(["sheet", "dialog", "alert", "popover", "modal"]);
39
+ const MAX_STRING_LENGTH = 1000;
40
+ const MAX_WALK_DEPTH = 50;
41
+ const ALLOWED_URL_PROTOCOLS = new Set(["http:", "https:", "about:", "chrome:", "chrome-extension:"]);
42
+ /**
43
+ * Sanitize untrusted strings from AX/OCR/CDP sources:
44
+ * 1. Truncate to MAX_STRING_LENGTH chars
45
+ * 2. Strip control characters (\x00-\x1F except \t \n \r) and DEL (\x7F)
46
+ */
47
+ function sanitizeString(s) {
48
+ // eslint-disable-next-line no-control-regex
49
+ const stripped = s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
50
+ return stripped.length > MAX_STRING_LENGTH
51
+ ? stripped.slice(0, MAX_STRING_LENGTH)
52
+ : stripped;
53
+ }
54
+ /**
55
+ * Validate a URL protocol. Returns the URL unchanged if allowed,
56
+ * or "about:blocked" if the protocol is disallowed.
57
+ * Also redacts sensitive query parameters (tokens, codes, passwords, keys).
58
+ */
59
+ const SENSITIVE_URL_PARAMS = new Set([
60
+ "code", "token", "access_token", "refresh_token", "id_token",
61
+ "secret", "password", "key", "api_key", "apikey", "auth",
62
+ "session", "session_id", "sessionid", "state", "nonce",
63
+ ]);
64
+ function sanitizeUrl(url) {
65
+ try {
66
+ const parsed = new URL(url);
67
+ if (!ALLOWED_URL_PROTOCOLS.has(parsed.protocol))
68
+ return "about:blocked";
69
+ // Redact sensitive query params
70
+ let redacted = false;
71
+ for (const paramName of parsed.searchParams.keys()) {
72
+ if (SENSITIVE_URL_PARAMS.has(paramName.toLowerCase())) {
73
+ parsed.searchParams.set(paramName, "[REDACTED]");
74
+ redacted = true;
75
+ }
76
+ }
77
+ return redacted ? parsed.toString() : url;
78
+ }
79
+ catch {
80
+ // Malformed URL — block it
81
+ }
82
+ return "about:blocked";
83
+ }
84
+ /**
85
+ * Redact sensitive patterns from labels/titles before storing in world model.
86
+ * Catches: email:password combos, standalone passwords, API keys, tokens.
87
+ */
88
+ const SENSITIVE_LABEL_PATTERNS = [
89
+ // email:password in window titles (e.g. "user@example.com:P@ssw0rd! - Chrome")
90
+ [/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+:[^\s]+/g, "[CREDENTIALS_REDACTED]"],
91
+ // Bearer tokens
92
+ [/Bearer\s+[A-Za-z0-9\-._~+/]+=*/g, "[BEARER_REDACTED]"],
93
+ ];
94
+ /**
95
+ * Check if a string looks like a token/key (mixed case, digits, special chars).
96
+ * Filters out simple repeated chars or plain words.
97
+ */
98
+ function looksLikeToken(s) {
99
+ if (s.length < 32)
100
+ return false;
101
+ const hasUpper = /[A-Z]/.test(s);
102
+ const hasLower = /[a-z]/.test(s);
103
+ const hasDigit = /[0-9]/.test(s);
104
+ const hasSpecial = /[\-._~+/]/.test(s);
105
+ // Tokens typically have at least 3 of these 4 character classes
106
+ const classes = [hasUpper, hasLower, hasDigit, hasSpecial].filter(Boolean).length;
107
+ return classes >= 3;
108
+ }
109
+ function redactSensitiveLabel(label) {
110
+ let result = label;
111
+ for (const [pattern, replacement] of SENSITIVE_LABEL_PATTERNS) {
112
+ result = result.replace(pattern, replacement);
113
+ }
114
+ // Redact long token-like strings (mixed case + digits + special, 32+ chars)
115
+ result = result.replace(/[A-Za-z0-9\-._~+/]{32,}={0,2}/g, (match) => looksLikeToken(match) ? "[TOKEN_REDACTED]" : match);
116
+ return result;
117
+ }
118
+ const BUNDLE_FAMILY_MAP = [
119
+ [/^com\.blackmagic-design\.DaVinciResolve/, "video_editor"],
120
+ [/^com\.adobe\.Premiere/, "video_editor"],
121
+ [/^com\.apple\.FinalCut/, "video_editor"],
122
+ [/^com\.adobe\.Photoshop/, "image_editor"],
123
+ [/^com\.adobe\.Illustrator/, "image_editor"],
124
+ [/^com\.figma\.Desktop$/, "design_tool"],
125
+ [/^com\.apple\.Safari$/, "browser"],
126
+ [/^com\.google\.Chrome/, "browser"],
127
+ [/^org\.mozilla\.firefox$/, "browser"],
128
+ [/^com\.microsoft\.edgemac$/, "browser"],
129
+ ];
130
+ /**
131
+ * Normalize AX role names: strip "AX" prefix and lowercase first char.
132
+ * e.g. "AXRadioButton" → "radioButton", "AXWindow" → "window", "button" → "button"
133
+ */
134
+ function normalizeRole(raw) {
135
+ if (raw.startsWith("AX") && raw.length > 2) {
136
+ return raw[2].toLowerCase() + raw.slice(3);
137
+ }
138
+ return raw;
139
+ }
140
+ function computeStableId(role, label, x, y) {
141
+ const qx = isNaN(x) ? 0 : Math.floor(x / 50) * 50;
142
+ const qy = isNaN(y) ? 0 : Math.floor(y / 50) * 50;
143
+ const input = `${role}|${label}|${qx},${qy}`;
144
+ return crypto.createHash("sha256").update(input).digest("hex").slice(0, 12);
145
+ }
146
+ function now() {
147
+ return new Date().toISOString();
148
+ }
149
+ function tracked(value, stableId) {
150
+ const t = { value, confidence: 1.0, updatedAt: now() };
151
+ if (stableId !== undefined)
152
+ t.stableId = stableId;
153
+ return t;
154
+ }
155
+ function applyDecay(t, decayRate) {
156
+ const elapsedMs = Date.now() - new Date(t.updatedAt).getTime();
157
+ const elapsedMinutes = elapsedMs / 60_000;
158
+ const decayed = t.confidence * Math.exp(-decayRate * elapsedMinutes);
159
+ return { ...t, confidence: decayed };
160
+ }
161
+ function detectFamily(bundleId) {
162
+ for (const [pattern, family] of BUNDLE_FAMILY_MAP) {
163
+ if (pattern.test(bundleId))
164
+ return family;
165
+ }
166
+ return "generic";
167
+ }
168
+ function createDefaultDomainState(family) {
169
+ switch (family) {
170
+ case "video_editor":
171
+ return {
172
+ family: "video_editor",
173
+ timeline: null,
174
+ activeTrack: null,
175
+ playbackState: tracked("stopped"),
176
+ playheadPosition: null,
177
+ selectedClips: tracked([]),
178
+ activeSequence: null,
179
+ activePage: null,
180
+ activeTool: null,
181
+ renderStatus: null,
182
+ mediaOffline: tracked(false),
183
+ };
184
+ case "image_editor":
185
+ return {
186
+ family: "image_editor",
187
+ canvasSize: null,
188
+ activeTool: null,
189
+ activeLayer: null,
190
+ zoom: tracked(1.0),
191
+ layerCount: tracked(0),
192
+ selectedLayers: tracked([]),
193
+ documentSize: null,
194
+ };
195
+ case "design_tool":
196
+ return {
197
+ family: "design_tool",
198
+ activePage: null,
199
+ selectedElements: tracked([]),
200
+ zoom: tracked(1.0),
201
+ activeTool: null,
202
+ sidebarPanel: null,
203
+ canvasSize: null,
204
+ };
205
+ case "browser":
206
+ return {
207
+ family: "browser",
208
+ url: null,
209
+ title: null,
210
+ };
211
+ case "generic":
212
+ return { family: "generic" };
213
+ }
214
+ }
215
+ function createEmptyState(sessionId) {
216
+ return {
217
+ windows: new Map(),
218
+ focusedWindowId: null,
219
+ focusedApp: null,
220
+ activeDialogs: [],
221
+ appDomains: new Map(),
222
+ lastFullScan: now(),
223
+ sessionId,
224
+ expectedPostcondition: null,
225
+ updatedAt: now(),
226
+ confidence: 1.0,
227
+ pendingGoal: null,
228
+ recentTransitions: [],
229
+ trackedEntities: new Map(),
230
+ };
231
+ }
232
+ export class WorldModel {
233
+ state;
234
+ config;
235
+ persister;
236
+ domainSchemaCache = new Map();
237
+ decayTimer = null;
238
+ entityTracker = new EntityTracker();
239
+ constructor(config) {
240
+ this.config = { ...DEFAULT_CONFIG, ...config };
241
+ const stateDir = this.config.stateDir;
242
+ this.persister = new DebouncedPersister(this.config.persistDebounceMs, stateDir ? (s) => saveWorldState(s, stateDir) : undefined);
243
+ this.state = createEmptyState("");
244
+ }
245
+ init(sessionId) {
246
+ // Full reset: clear all in-memory state before loading to prevent cross-session bleed
247
+ this.state.windows.clear();
248
+ this.state.focusedApp = null;
249
+ this.state.focusedWindowId = null;
250
+ this.state.activeDialogs = [];
251
+ this.state.appDomains.clear();
252
+ this.state.pendingGoal = null;
253
+ this.state.expectedPostcondition = null;
254
+ this.state.recentTransitions = [];
255
+ this.state.trackedEntities = new Map();
256
+ this.entityTracker.clear();
257
+ const loaded = loadWorldState(sessionId, this.config.stateDir);
258
+ this.state = loaded ?? createEmptyState(sessionId);
259
+ // Rehydrate entity tracker from persisted state
260
+ if (this.state.trackedEntities.size > 0) {
261
+ this.entityTracker.rehydrate(this.state.trackedEntities);
262
+ }
263
+ }
264
+ /**
265
+ * Merge an incoming control with an existing one using source confidence.
266
+ * Higher-confidence sources win unless the existing data is very recent (<5s).
267
+ */
268
+ mergeControl(existing, incoming) {
269
+ if (!existing)
270
+ return incoming;
271
+ const existingConf = existing.sourceConfidence ?? 0;
272
+ const incomingConf = incoming.sourceConfidence ?? 0;
273
+ const existingAge = existing.lastSeenAt
274
+ ? Date.now() - new Date(existing.lastSeenAt).getTime()
275
+ : Infinity;
276
+ // Keep existing if it has higher confidence AND is recent (<5s)
277
+ if (existingConf > incomingConf && existingAge < 5000) {
278
+ return existing;
279
+ }
280
+ return incoming;
281
+ }
282
+ ingestAXTree(windowId, tree, appContext, sourceConfidence = 0.9) {
283
+ const snap = this.takeSnapshot();
284
+ const controls = new Map();
285
+ let count = 0;
286
+ const max = this.config.maxControlsPerWindow;
287
+ const existing = this.state.windows.get(windowId);
288
+ const existingControls = existing?.controls ?? new Map();
289
+ const walk = (node, depth = 0) => {
290
+ if (count >= max)
291
+ return;
292
+ if (depth > MAX_WALK_DEPTH)
293
+ return;
294
+ if (!node.role) {
295
+ // Skip decorative nodes but walk children
296
+ if (node.children) {
297
+ for (const child of node.children)
298
+ walk(child, depth + 1);
299
+ }
300
+ return;
301
+ }
302
+ // Normalize AX roles: "AXButton" → "button", "AXRadioButton" → "radioButton"
303
+ const role = normalizeRole(node.role);
304
+ const rawLabel = node.title ?? node.description ?? "";
305
+ const label = sanitizeString(rawLabel);
306
+ const x = node.position?.x ?? 0;
307
+ const y = node.position?.y ?? 0;
308
+ // Skip off-screen/hidden menu items with zero size — they pollute the world model
309
+ // with meaningless coordinates and inflate control counts
310
+ const w = node.size?.width ?? 0;
311
+ const h = node.size?.height ?? 0;
312
+ if (role === "menuItem" && w === 0 && h === 0) {
313
+ // Still walk children (submenus may have real geometry)
314
+ if (node.children) {
315
+ for (const child of node.children)
316
+ walk(child, depth + 1);
317
+ }
318
+ return;
319
+ }
320
+ const sid = computeStableId(role, label, x, y);
321
+ const prev = existingControls.get(sid);
322
+ const control = {
323
+ stableId: sid,
324
+ role,
325
+ label: prev?.label && prev.label.value === label
326
+ ? prev.label
327
+ : tracked(label, sid),
328
+ value: tracked(node.value != null ? sanitizeString(node.value) : null, sid),
329
+ enabled: tracked(node.enabled ?? true, sid),
330
+ focused: node.focused ?? false,
331
+ position: { x, y },
332
+ size: {
333
+ width: node.size?.width ?? 0,
334
+ height: node.size?.height ?? 0,
335
+ },
336
+ source: "ax",
337
+ sourceConfidence,
338
+ lastSeenAt: now(),
339
+ };
340
+ // Detect dialogs — do NOT add dialog root to window controls
341
+ if (DIALOG_ROLES.has(role)) {
342
+ const dialogType = (role === "modal" || role === "dialog" ? "modal" : role);
343
+ const dialogControls = new Map();
344
+ // Flatten dialog children into its controls
345
+ if (node.children) {
346
+ for (const child of node.children) {
347
+ if (!child.role)
348
+ continue;
349
+ const childRole = normalizeRole(child.role);
350
+ const cl = sanitizeString(child.title ?? child.description ?? "");
351
+ const cx = child.position?.x ?? 0;
352
+ const cy = child.position?.y ?? 0;
353
+ const csid = computeStableId(childRole, cl, cx, cy);
354
+ dialogControls.set(csid, {
355
+ stableId: csid,
356
+ role: childRole,
357
+ label: tracked(cl, csid),
358
+ value: tracked(child.value != null ? sanitizeString(child.value) : null, csid),
359
+ enabled: tracked(child.enabled ?? true, csid),
360
+ focused: child.focused ?? false,
361
+ position: { x: cx, y: cy },
362
+ size: {
363
+ width: child.size?.width ?? 0,
364
+ height: child.size?.height ?? 0,
365
+ },
366
+ });
367
+ }
368
+ }
369
+ // Extract button labels and message from dialog children
370
+ const buttons = [];
371
+ let message = null;
372
+ for (const ctrl of dialogControls.values()) {
373
+ if (ctrl.role === "button" && ctrl.label.value) {
374
+ buttons.push(ctrl.label.value);
375
+ }
376
+ if ((ctrl.role === "staticText" || ctrl.role === "text") && ctrl.label.value && ctrl.label.value.length > 10) {
377
+ message = ctrl.label.value;
378
+ }
379
+ }
380
+ // Detect special dialog types from title/message (only for generic modal/alert)
381
+ let detectedType = dialogType;
382
+ if (dialogType === "modal" || dialogType === "alert") {
383
+ const lowerLabel = label.toLowerCase();
384
+ if (lowerLabel.includes("save") || lowerLabel.includes("unsaved")) {
385
+ detectedType = "save";
386
+ }
387
+ else if (lowerLabel.includes("permission") || lowerLabel.includes("allow") || lowerLabel.includes("access")) {
388
+ detectedType = "permission";
389
+ }
390
+ }
391
+ this.state.activeDialogs.push({
392
+ type: detectedType,
393
+ title: label,
394
+ windowId,
395
+ controls: dialogControls,
396
+ detectedAt: now(),
397
+ message,
398
+ buttons,
399
+ source: "ax",
400
+ });
401
+ // Don't add dialog root or its children as regular window controls
402
+ return;
403
+ }
404
+ controls.set(sid, control);
405
+ count++;
406
+ if (node.children) {
407
+ for (const child of node.children)
408
+ walk(child, depth + 1);
409
+ }
410
+ };
411
+ // Clear existing dialogs for this window before re-ingesting
412
+ this.state.activeDialogs = this.state.activeDialogs.filter((d) => d.windowId !== windowId);
413
+ walk(tree);
414
+ // Entity tracking: match/create entities for panels, toolbars, tabs
415
+ const ENTITY_ROLES = new Set(["toolbar", "tabGroup", "group", "splitGroup"]);
416
+ for (const ctrl of controls.values()) {
417
+ if (ENTITY_ROLES.has(ctrl.role) && ctrl.label.value) {
418
+ const entityType = ctrl.role === "tabGroup" ? "tab" : "panel";
419
+ this.entityTracker.matchOrCreate(entityType, redactSensitiveLabel(ctrl.label.value), ctrl.position, windowId);
420
+ }
421
+ }
422
+ this.entityTracker.pruneStale(60_000);
423
+ this.state.trackedEntities = this.entityTracker.getAll();
424
+ // Find focused element and interactive controls
425
+ let focusedElement = null;
426
+ const visibleControls = [];
427
+ const INTERACTIVE_ROLES = new Set(["button", "checkbox", "radioButton", "textField", "slider", "popUpButton", "menuItem", "link", "tab", "incrementor", "comboBox"]);
428
+ for (const ctrl of controls.values()) {
429
+ if (ctrl.focused)
430
+ focusedElement = ctrl;
431
+ if (INTERACTIVE_ROLES.has(ctrl.role))
432
+ visibleControls.push(ctrl);
433
+ }
434
+ // Collect dialogs for this window from activeDialogs
435
+ const dialogStack = this.state.activeDialogs.filter((d) => d.windowId === windowId);
436
+ // Derive window title: prefer appContext, fall back to AX tree root node title
437
+ let windowTitle = appContext.windowTitle;
438
+ if (!windowTitle && tree.role) {
439
+ const rootRole = normalizeRole(tree.role);
440
+ if (rootRole === "window" || rootRole === "application") {
441
+ windowTitle = tree.title ?? "";
442
+ }
443
+ // Also check first window child if root is application
444
+ if (!windowTitle && tree.children) {
445
+ for (const child of tree.children) {
446
+ if (child.role && normalizeRole(child.role) === "window" && child.title) {
447
+ windowTitle = child.title;
448
+ break;
449
+ }
450
+ }
451
+ }
452
+ }
453
+ const winState = {
454
+ windowId,
455
+ title: tracked(redactSensitiveLabel(sanitizeString(windowTitle || existing?.title.value || ""))),
456
+ bundleId: appContext.bundleId,
457
+ pid: appContext.pid,
458
+ bounds: existing?.bounds ?? tracked({ x: 0, y: 0, width: 0, height: 0 }),
459
+ controls,
460
+ isOnScreen: true,
461
+ focusedElement,
462
+ visibleControls,
463
+ dialogStack,
464
+ scrollPosition: existing?.scrollPosition ?? null,
465
+ lastAXScanAt: now(),
466
+ lastCDPScanAt: existing?.lastCDPScanAt ?? null,
467
+ lastOCRAt: existing?.lastOCRAt ?? null,
468
+ lastScreenshotHash: existing?.lastScreenshotHash ?? null,
469
+ };
470
+ this.state.windows.set(windowId, winState);
471
+ this.state.lastFullScan = now();
472
+ this.state.updatedAt = now();
473
+ // Auto-set focusedWindowId if unset or if this window belongs to the focused app
474
+ if (this.state.focusedWindowId === null ||
475
+ (this.state.focusedApp && this.state.focusedApp.bundleId === appContext.bundleId)) {
476
+ this.state.focusedWindowId = windowId;
477
+ }
478
+ // Update focusedApp.pid if it was 0 (set by feedWorldModel) but we now have the real pid
479
+ if (this.state.focusedApp && this.state.focusedApp.bundleId === appContext.bundleId &&
480
+ this.state.focusedApp.pid === 0 && appContext.pid > 0) {
481
+ this.state.focusedApp.pid = appContext.pid;
482
+ }
483
+ // Ensure app domain state exists
484
+ if (!this.state.appDomains.has(appContext.bundleId)) {
485
+ const family = detectFamily(appContext.bundleId);
486
+ this.state.appDomains.set(appContext.bundleId, createDefaultDomainState(family));
487
+ }
488
+ this.recordTransitions(snap, "ax");
489
+ this.schedulePersist();
490
+ }
491
+ ingestUIEvents(events) {
492
+ const snap = this.takeSnapshot();
493
+ for (const event of events) {
494
+ switch (event.type) {
495
+ case "value_changed": {
496
+ if (event.elementRole && event.elementLabel) {
497
+ const control = this.findControlByRoleLabel(event.elementRole, event.elementLabel);
498
+ if (control) {
499
+ control.value = tracked(event.newValue ?? null, control.stableId);
500
+ }
501
+ }
502
+ break;
503
+ }
504
+ case "focus_changed": {
505
+ if (event.elementRole && event.elementLabel) {
506
+ const control = this.findControlByRoleLabel(event.elementRole, event.elementLabel);
507
+ if (control) {
508
+ control.focused = true;
509
+ }
510
+ }
511
+ break;
512
+ }
513
+ case "dialog_appeared": {
514
+ const dialogTitle = event.windowTitle ?? "";
515
+ // Dedup: skip if a dialog with the same title already exists
516
+ const alreadyExists = this.state.activeDialogs.some((d) => d.title === dialogTitle);
517
+ if (!alreadyExists) {
518
+ this.state.activeDialogs.push({
519
+ type: "modal",
520
+ title: dialogTitle,
521
+ windowId: 0,
522
+ controls: new Map(),
523
+ detectedAt: now(),
524
+ message: null,
525
+ buttons: [],
526
+ source: "observer",
527
+ });
528
+ }
529
+ break;
530
+ }
531
+ case "window_closed": {
532
+ // Collect IDs first to avoid Map mutation during iteration
533
+ const toDelete = [];
534
+ for (const [id, win] of this.state.windows) {
535
+ if (win.pid === event.pid) {
536
+ toDelete.push(id);
537
+ }
538
+ }
539
+ for (const id of toDelete) {
540
+ this.state.windows.delete(id);
541
+ }
542
+ // Purge orphaned dialogs for deleted windows
543
+ if (toDelete.length > 0) {
544
+ const deletedIds = new Set(toDelete);
545
+ this.state.activeDialogs = this.state.activeDialogs.filter((d) => !deletedIds.has(d.windowId));
546
+ }
547
+ break;
548
+ }
549
+ case "title_changed": {
550
+ // Update window title for any window matching this pid
551
+ for (const win of this.state.windows.values()) {
552
+ if (win.pid === event.pid && event.newValue) {
553
+ win.title = tracked(event.newValue, `win_${win.windowId}`);
554
+ }
555
+ }
556
+ break;
557
+ }
558
+ case "app_activated": {
559
+ // Update focused app from observer event
560
+ if (event.bundleId && event.pid) {
561
+ this.state.focusedApp = {
562
+ bundleId: event.bundleId,
563
+ appName: event.bundleId,
564
+ pid: event.pid,
565
+ };
566
+ // Set focusedWindowId to first window matching this pid
567
+ for (const [id, win] of this.state.windows) {
568
+ if (win.pid === event.pid) {
569
+ this.state.focusedWindowId = id;
570
+ break;
571
+ }
572
+ }
573
+ }
574
+ break;
575
+ }
576
+ case "window_created": {
577
+ // Mark that a new window appeared — will be populated on next AX scan
578
+ // For now just ensure focusedWindowId is set if it was null
579
+ if (this.state.focusedWindowId === null && event.pid) {
580
+ for (const [id, win] of this.state.windows) {
581
+ if (win.pid === event.pid) {
582
+ this.state.focusedWindowId = id;
583
+ break;
584
+ }
585
+ }
586
+ }
587
+ break;
588
+ }
589
+ case "app_deactivated": {
590
+ if (event.bundleId && this.state.focusedApp?.bundleId === event.bundleId) {
591
+ this.state.focusedApp = null;
592
+ this.state.focusedWindowId = null;
593
+ }
594
+ break;
595
+ }
596
+ case "layout_changed":
597
+ case "menu_opened":
598
+ // These don't need world model updates — they signal
599
+ // that a fresh AX scan would be useful (handled by perception)
600
+ break;
601
+ }
602
+ }
603
+ this.state.updatedAt = now();
604
+ this.recordTransitions(snap, "ui_event");
605
+ this.schedulePersist();
606
+ }
607
+ updateFocusedApp(appContext) {
608
+ const prevBundleId = this.state.focusedApp?.bundleId;
609
+ this.state.focusedApp = {
610
+ bundleId: appContext.bundleId,
611
+ appName: appContext.appName,
612
+ pid: appContext.pid,
613
+ };
614
+ this.state.focusedWindowId = appContext.windowId ?? null;
615
+ // Prune windows from the previous app to prevent stale accumulation
616
+ // across app switches. Keep windows from the new focused app and any
617
+ // windows seen in the last 30 seconds (to handle multi-window workflows).
618
+ if (prevBundleId && prevBundleId !== appContext.bundleId) {
619
+ const STALE_WINDOW_MS = 30_000;
620
+ const cutoff = Date.now() - STALE_WINDOW_MS;
621
+ const toDelete = [];
622
+ for (const [id, win] of this.state.windows) {
623
+ if (win.bundleId === appContext.bundleId)
624
+ continue; // keep new app's windows
625
+ const lastScan = win.lastAXScanAt ? new Date(win.lastAXScanAt).getTime() : 0;
626
+ if (lastScan < cutoff) {
627
+ toDelete.push(id);
628
+ }
629
+ }
630
+ for (const id of toDelete) {
631
+ this.state.windows.delete(id);
632
+ }
633
+ }
634
+ // Ensure app domain
635
+ if (!this.state.appDomains.has(appContext.bundleId)) {
636
+ const family = detectFamily(appContext.bundleId);
637
+ this.state.appDomains.set(appContext.bundleId, createDefaultDomainState(family));
638
+ }
639
+ this.state.updatedAt = now();
640
+ this.schedulePersist();
641
+ }
642
+ /** Set/clear the pending goal (used by planner). */
643
+ setPendingGoal(goal) {
644
+ this.state.pendingGoal = goal;
645
+ this.state.updatedAt = now();
646
+ }
647
+ getWindowState(windowId) {
648
+ const win = this.state.windows.get(windowId);
649
+ if (!win)
650
+ return null;
651
+ return {
652
+ ...win,
653
+ title: applyDecay(win.title, this.config.confidenceDecayRate),
654
+ bounds: applyDecay(win.bounds, this.config.confidenceDecayRate),
655
+ };
656
+ }
657
+ getFocusedWindow() {
658
+ if (this.state.focusedWindowId === null)
659
+ return null;
660
+ return this.getWindowState(this.state.focusedWindowId);
661
+ }
662
+ getControl(stableId) {
663
+ for (const win of this.state.windows.values()) {
664
+ const control = win.controls.get(stableId);
665
+ if (control) {
666
+ return {
667
+ ...control,
668
+ label: applyDecay(control.label, this.config.confidenceDecayRate),
669
+ value: applyDecay(control.value, this.config.confidenceDecayRate),
670
+ enabled: applyDecay(control.enabled, this.config.confidenceDecayRate),
671
+ };
672
+ }
673
+ }
674
+ return null;
675
+ }
676
+ getActiveDialogs() {
677
+ return this.state.activeDialogs;
678
+ }
679
+ getAppDomain(bundleId) {
680
+ return this.state.appDomains.get(bundleId) ?? null;
681
+ }
682
+ /**
683
+ * Load domain schema from a reference file matching the given bundleId.
684
+ * Scans references/ directory for JSON files with matching bundleId,
685
+ * extracts `domainSchema` key if present, and caches it.
686
+ */
687
+ loadDomainSchema(bundleId) {
688
+ if (this.domainSchemaCache.has(bundleId)) {
689
+ return this.domainSchemaCache.get(bundleId) ?? null;
690
+ }
691
+ const refsDir = this.config.referencesDir ?? path.join(process.cwd(), "references");
692
+ let schema = null;
693
+ try {
694
+ const files = fs.readdirSync(refsDir);
695
+ for (const file of files) {
696
+ if (!file.endsWith(".json"))
697
+ continue;
698
+ try {
699
+ const raw = fs.readFileSync(path.join(refsDir, file), "utf-8");
700
+ const ref = JSON.parse(raw);
701
+ if (ref.bundleId === bundleId && ref.domainSchema) {
702
+ schema = ref.domainSchema;
703
+ break;
704
+ }
705
+ }
706
+ catch { /* skip malformed */ }
707
+ }
708
+ }
709
+ catch { /* dir doesn't exist */ }
710
+ this.domainSchemaCache.set(bundleId, schema);
711
+ return schema;
712
+ }
713
+ /**
714
+ * Update a domain state with partial data, optionally validating against
715
+ * a loaded domain schema. Creates the domain entry if it doesn't exist.
716
+ */
717
+ updateDomainState(bundleId, partial) {
718
+ let domain = this.state.appDomains.get(bundleId);
719
+ if (!domain) {
720
+ const family = detectFamily(bundleId);
721
+ domain = createDefaultDomainState(family);
722
+ this.state.appDomains.set(bundleId, domain);
723
+ }
724
+ // Apply schema validation if a schema is loaded
725
+ const schema = this.domainSchemaCache.get(bundleId);
726
+ const domainRecord = domain;
727
+ for (const [key, value] of Object.entries(partial)) {
728
+ if (key === "family")
729
+ continue; // never override family
730
+ if (schema?.fields) {
731
+ const fieldDef = schema.fields[key];
732
+ if (fieldDef) {
733
+ // Validate type if schema specifies one
734
+ if (fieldDef.type && !validateSchemaType(value, fieldDef.type)) {
735
+ continue; // skip invalid value
736
+ }
737
+ }
738
+ // If schema has fields defined but this key isn't in it, skip
739
+ if (schema.strict && !(key in schema.fields))
740
+ continue;
741
+ }
742
+ domainRecord[key] = tracked(value);
743
+ }
744
+ this.state.updatedAt = now();
745
+ this.schedulePersist();
746
+ }
747
+ /**
748
+ * Get the focused element from the active window.
749
+ */
750
+ getFocusedElement() {
751
+ const win = this.getFocusedWindow();
752
+ return win?.focusedElement ?? null;
753
+ }
754
+ /**
755
+ * Get the dialog stack (alias for getActiveDialogs for API symmetry).
756
+ */
757
+ getDialogStack() {
758
+ return this.state.activeDialogs;
759
+ }
760
+ /**
761
+ * Get the domain state for the currently focused app.
762
+ */
763
+ getDomainState() {
764
+ const bundleId = this.state.focusedApp?.bundleId;
765
+ if (!bundleId)
766
+ return null;
767
+ return this.state.appDomains.get(bundleId) ?? null;
768
+ }
769
+ /**
770
+ * Get a specific field from the focused app's domain state.
771
+ */
772
+ getDomainField(key) {
773
+ const domain = this.getDomainState();
774
+ if (!domain)
775
+ return undefined;
776
+ return domain[key];
777
+ }
778
+ /**
779
+ * Get the app family for the currently focused app.
780
+ */
781
+ getAppFamily() {
782
+ const domain = this.getDomainState();
783
+ return domain?.family ?? null;
784
+ }
785
+ /**
786
+ * Read confidence at a dotted path (e.g. "focusedWindow.title", "control.<stableId>.value").
787
+ */
788
+ getConfidence(path) {
789
+ const parts = path.split(".");
790
+ if (parts[0] === "focusedWindow") {
791
+ const win = this.getFocusedWindow();
792
+ if (!win)
793
+ return 0;
794
+ const field = parts[1];
795
+ if (field === "title")
796
+ return win.title.confidence;
797
+ if (field === "bounds")
798
+ return win.bounds.confidence;
799
+ return 0;
800
+ }
801
+ if (parts[0] === "control" && parts.length >= 3) {
802
+ const ctrl = this.getControl(parts[1]);
803
+ if (!ctrl)
804
+ return 0;
805
+ const field = parts[2];
806
+ if (field === "label")
807
+ return ctrl.label.confidence;
808
+ if (field === "value")
809
+ return ctrl.value.confidence;
810
+ if (field === "enabled")
811
+ return ctrl.enabled.confidence;
812
+ return 0;
813
+ }
814
+ if (parts[0] === "state") {
815
+ return this.state.confidence;
816
+ }
817
+ return 0;
818
+ }
819
+ assertState(assertion) {
820
+ return this.assertStateDetailed(assertion).matched;
821
+ }
822
+ /**
823
+ * Assert state with detailed result including actual value and confidence.
824
+ */
825
+ assertStateDetailed(assertion) {
826
+ switch (assertion.type) {
827
+ case "control_exists": {
828
+ const ctrl = this.getControl(assertion.target);
829
+ return {
830
+ matched: ctrl !== null,
831
+ actual: ctrl ? `${ctrl.role} "${ctrl.label.value}"` : null,
832
+ confidence: ctrl ? ctrl.label.confidence : 0,
833
+ };
834
+ }
835
+ case "control_absent": {
836
+ const ctrl = this.getControl(assertion.target);
837
+ return {
838
+ matched: ctrl === null,
839
+ actual: ctrl ? `${ctrl.role} "${ctrl.label.value}"` : null,
840
+ confidence: ctrl === null ? 1.0 : ctrl.label.confidence,
841
+ };
842
+ }
843
+ case "value_equals": {
844
+ const ctrl = this.getControl(assertion.target);
845
+ if (!ctrl)
846
+ return { matched: false, actual: null, confidence: 0 };
847
+ const actual = ctrl.value.value;
848
+ return {
849
+ matched: actual === assertion.expected,
850
+ actual: actual !== null ? String(actual) : null,
851
+ confidence: ctrl.value.confidence,
852
+ };
853
+ }
854
+ case "control_enabled": {
855
+ const ctrl = this.getControl(assertion.target);
856
+ if (!ctrl)
857
+ return { matched: false, actual: null, confidence: 0 };
858
+ const expectedEnabled = assertion.expected !== false;
859
+ return {
860
+ matched: ctrl.enabled.value === expectedEnabled,
861
+ actual: String(ctrl.enabled.value),
862
+ confidence: ctrl.enabled.confidence,
863
+ };
864
+ }
865
+ case "window_focused": {
866
+ const targetWindowId = Number(assertion.target);
867
+ if (!Number.isFinite(targetWindowId)) {
868
+ return { matched: false, actual: null, confidence: 0 };
869
+ }
870
+ const matched = this.state.focusedWindowId === targetWindowId;
871
+ return {
872
+ matched,
873
+ actual: this.state.focusedWindowId !== null ? String(this.state.focusedWindowId) : null,
874
+ confidence: 1.0,
875
+ };
876
+ }
877
+ case "app_focused": {
878
+ const matched = this.state.focusedApp?.bundleId === assertion.target;
879
+ return {
880
+ matched,
881
+ actual: this.state.focusedApp?.bundleId ?? null,
882
+ confidence: 1.0,
883
+ };
884
+ }
885
+ case "dialog_absent": {
886
+ const found = this.state.activeDialogs.find((d) => d.title === assertion.target);
887
+ return {
888
+ matched: !found,
889
+ actual: found ? `dialog: "${found.title}"` : null,
890
+ confidence: 1.0,
891
+ };
892
+ }
893
+ case "dialog_present": {
894
+ const found = this.state.activeDialogs.find((d) => d.title === assertion.target);
895
+ return {
896
+ matched: !!found,
897
+ actual: found ? `dialog: "${found.title}"` : null,
898
+ confidence: 1.0,
899
+ };
900
+ }
901
+ case "url_equals": {
902
+ // Check browser domain state for URL match
903
+ for (const domain of this.state.appDomains.values()) {
904
+ if (domain.family === "browser" && domain.url) {
905
+ const urlTracked = domain.url;
906
+ const matched = urlTracked.value === assertion.target ||
907
+ urlTracked.value.startsWith(assertion.target);
908
+ return {
909
+ matched,
910
+ actual: urlTracked.value,
911
+ confidence: urlTracked.confidence,
912
+ };
913
+ }
914
+ }
915
+ return { matched: false, actual: null, confidence: 0 };
916
+ }
917
+ case "text_visible": {
918
+ // Fuzzy text search across all controls in all windows
919
+ if (!assertion.target) {
920
+ return { matched: false, actual: null, confidence: 0 };
921
+ }
922
+ const targetLower = assertion.target.toLowerCase();
923
+ for (const win of this.state.windows.values()) {
924
+ for (const ctrl of win.controls.values()) {
925
+ if (ctrl.label.value?.toLowerCase().includes(targetLower)) {
926
+ return {
927
+ matched: true,
928
+ actual: `${ctrl.role} "${ctrl.label.value}"`,
929
+ confidence: ctrl.label.confidence,
930
+ };
931
+ }
932
+ }
933
+ }
934
+ return { matched: false, actual: null, confidence: 0 };
935
+ }
936
+ }
937
+ }
938
+ /**
939
+ * Set an expected postcondition BEFORE executing an action.
940
+ * Call verifyPostcondition() AFTER the action completes.
941
+ */
942
+ setExpectedPostcondition(assertion) {
943
+ this.state.expectedPostcondition = assertion;
944
+ }
945
+ /**
946
+ * Verify the previously set postcondition against current state.
947
+ * Returns detailed result with match status, actual value, and confidence.
948
+ * Clears the stored postcondition after verification.
949
+ */
950
+ verifyPostcondition() {
951
+ const assertion = this.state.expectedPostcondition;
952
+ if (!assertion) {
953
+ return { matched: true, actual: null, confidence: 1.0 };
954
+ }
955
+ this.state.expectedPostcondition = null;
956
+ return this.assertStateDetailed(assertion);
957
+ }
958
+ getStaleControls(thresholdMs) {
959
+ const threshold = thresholdMs ?? this.config.staleThresholdMs;
960
+ const cutoff = Date.now() - threshold;
961
+ const stale = [];
962
+ for (const win of this.state.windows.values()) {
963
+ for (const control of win.controls.values()) {
964
+ if (new Date(control.value.updatedAt).getTime() < cutoff) {
965
+ stale.push(control);
966
+ }
967
+ }
968
+ }
969
+ return stale;
970
+ }
971
+ toSummary() {
972
+ const winCount = this.state.windows.size;
973
+ let controlCount = 0;
974
+ for (const win of this.state.windows.values()) {
975
+ controlCount += win.controls.size;
976
+ }
977
+ const dialogCount = this.state.activeDialogs.length;
978
+ const focused = this.state.focusedApp;
979
+ const parts = [];
980
+ parts.push(`${winCount} window(s), ${controlCount} control(s) tracked`);
981
+ if (focused) {
982
+ parts.push(`Focused: ${focused.appName} (${focused.bundleId})`);
983
+ }
984
+ if (dialogCount > 0) {
985
+ parts.push(`${dialogCount} active dialog(s): ${this.state.activeDialogs.map((d) => sanitizeString(d.title || d.type)).join(", ")}`);
986
+ }
987
+ if (this.state.lastFullScan) {
988
+ const scanAge = Date.now() - new Date(this.state.lastFullScan).getTime();
989
+ const scanAgeSec = Math.round(scanAge / 1000);
990
+ // Show "never" for unreasonable ages (> 1 hour likely means epoch default)
991
+ if (scanAgeSec < 3600) {
992
+ parts.push(`Last scan: ${scanAgeSec}s ago`);
993
+ }
994
+ else {
995
+ parts.push("Last scan: never (no perception data received)");
996
+ }
997
+ }
998
+ else {
999
+ parts.push("Last scan: never");
1000
+ }
1001
+ return parts.join("\n");
1002
+ }
1003
+ /**
1004
+ * Update browser domain state from a CDP snapshot (url, title).
1005
+ */
1006
+ ingestCDPSnapshot(bundleId, url, title, windowId) {
1007
+ const snap = this.takeSnapshot();
1008
+ const safeUrl = sanitizeUrl(url);
1009
+ const safeTitle = sanitizeString(title);
1010
+ let domain = this.state.appDomains.get(bundleId);
1011
+ if (!domain) {
1012
+ domain = { family: "browser", url: null, title: null };
1013
+ this.state.appDomains.set(bundleId, domain);
1014
+ }
1015
+ if (domain.family === "browser") {
1016
+ domain.url = tracked(safeUrl);
1017
+ domain.title = tracked(safeTitle);
1018
+ }
1019
+ // Mark lastCDPScanAt on the window if we know which one
1020
+ if (windowId !== undefined) {
1021
+ const win = this.state.windows.get(windowId);
1022
+ if (win)
1023
+ win.lastCDPScanAt = now();
1024
+ }
1025
+ else {
1026
+ // Best effort: mark the focused window
1027
+ for (const win of this.state.windows.values()) {
1028
+ if (win.bundleId === bundleId) {
1029
+ win.lastCDPScanAt = now();
1030
+ break;
1031
+ }
1032
+ }
1033
+ }
1034
+ this.state.updatedAt = now();
1035
+ this.recordTransitions(snap, "cdp");
1036
+ this.schedulePersist();
1037
+ }
1038
+ /**
1039
+ * Ingest Safari browser state from AppleScript (URL, title, tabs).
1040
+ * This is the non-CDP path for Safari browser enrichment.
1041
+ */
1042
+ ingestSafariBrowserState(url, title, tabs) {
1043
+ const bundleId = "com.apple.Safari";
1044
+ let domain = this.state.appDomains.get(bundleId);
1045
+ if (!domain) {
1046
+ domain = { family: "browser", url: null, title: null };
1047
+ this.state.appDomains.set(bundleId, domain);
1048
+ }
1049
+ if (domain.family === "browser") {
1050
+ const bs = domain;
1051
+ bs.url = tracked(sanitizeUrl(url));
1052
+ bs.title = tracked(sanitizeString(title));
1053
+ if (tabs) {
1054
+ bs.tabs = tabs.map(t => ({
1055
+ ...t,
1056
+ url: sanitizeUrl(t.url),
1057
+ title: sanitizeString(t.title),
1058
+ }));
1059
+ }
1060
+ }
1061
+ this.state.updatedAt = now();
1062
+ this.schedulePersist();
1063
+ }
1064
+ /**
1065
+ * Ingest CDP DOM mutations into the world model.
1066
+ * Called from perception coordinator's fast cycle when mutations are drained.
1067
+ */
1068
+ ingestCDPMutations(bundleId, mutations) {
1069
+ // Find browser window for this bundleId
1070
+ let targetWin = null;
1071
+ for (const win of this.state.windows.values()) {
1072
+ if (win.bundleId === bundleId) {
1073
+ targetWin = win;
1074
+ break;
1075
+ }
1076
+ }
1077
+ // Fallback: use focused app's window if bundleId matches, but only pick
1078
+ // a window that actually belongs to the same app (matching bundleId or pid)
1079
+ if (!targetWin && this.state.focusedApp?.bundleId === bundleId) {
1080
+ const focusedPid = this.state.focusedApp.pid;
1081
+ for (const win of this.state.windows.values()) {
1082
+ if (win.bundleId === bundleId || (focusedPid != null && win.pid === focusedPid)) {
1083
+ targetWin = win;
1084
+ break;
1085
+ }
1086
+ }
1087
+ }
1088
+ if (!targetWin)
1089
+ return;
1090
+ for (const mut of mutations) {
1091
+ if (mut.addedNodes && mut.addedNodes > 0) {
1092
+ const controlId = `cdp_${mut.selector}`;
1093
+ if (targetWin.controls.size < this.config.maxControlsPerWindow) {
1094
+ const incoming = {
1095
+ stableId: controlId,
1096
+ role: "AXWebArea",
1097
+ label: tracked(mut.selector, controlId),
1098
+ value: tracked(null, controlId),
1099
+ enabled: tracked(true, controlId),
1100
+ focused: false,
1101
+ position: { x: 0, y: 0 },
1102
+ size: { width: 0, height: 0 },
1103
+ source: "cdp",
1104
+ sourceConfidence: 0.85,
1105
+ lastSeenAt: now(),
1106
+ };
1107
+ const existing = targetWin.controls.get(controlId);
1108
+ targetWin.controls.set(controlId, this.mergeControl(existing, incoming));
1109
+ }
1110
+ }
1111
+ if (mut.attribute && mut.newValue) {
1112
+ for (const [id, ctrl] of targetWin.controls) {
1113
+ if (id.includes(mut.selector) || ctrl.label.value === mut.selector) {
1114
+ ctrl.label = tracked(mut.newValue, ctrl.stableId);
1115
+ break;
1116
+ }
1117
+ }
1118
+ }
1119
+ }
1120
+ this.state.updatedAt = now();
1121
+ this.schedulePersist();
1122
+ }
1123
+ /**
1124
+ * Update controls from OCR text regions (vision source).
1125
+ * Creates synthetic controls for text regions found by OCR.
1126
+ */
1127
+ ingestOCRRegions(windowId, regions, sourceConfidence = 0.7) {
1128
+ const snap = this.takeSnapshot();
1129
+ const win = this.state.windows.get(windowId);
1130
+ if (!win)
1131
+ return;
1132
+ for (const region of regions) {
1133
+ // Sanitize OCR text: replace newlines with spaces, then apply standard sanitization
1134
+ const cleanText = sanitizeString(region.text.replace(/[\r\n]+/g, " "));
1135
+ const sid = computeStableId("staticText", cleanText, region.bounds.x, region.bounds.y);
1136
+ const incoming = {
1137
+ stableId: sid,
1138
+ role: "staticText",
1139
+ label: tracked(cleanText, sid),
1140
+ value: tracked(cleanText, sid),
1141
+ enabled: tracked(true, sid),
1142
+ focused: false,
1143
+ position: { x: region.bounds.x, y: region.bounds.y },
1144
+ size: { width: region.bounds.width, height: region.bounds.height },
1145
+ source: "ocr",
1146
+ sourceConfidence,
1147
+ lastSeenAt: now(),
1148
+ };
1149
+ const existing = win.controls.get(sid);
1150
+ const merged = this.mergeControl(existing, incoming);
1151
+ if (merged === incoming && !existing && win.controls.size >= this.config.maxControlsPerWindow) {
1152
+ continue; // at capacity, skip new controls
1153
+ }
1154
+ win.controls.set(sid, merged);
1155
+ }
1156
+ win.lastOCRAt = now();
1157
+ this.state.updatedAt = now();
1158
+ this.recordTransitions(snap, "ocr");
1159
+ this.schedulePersist();
1160
+ }
1161
+ /**
1162
+ * Get recent state transitions (max 50, newest last).
1163
+ */
1164
+ getRecentTransitions() {
1165
+ return this.state.recentTransitions;
1166
+ }
1167
+ /**
1168
+ * Diff two WorldState objects and return the state transitions between them.
1169
+ * Useful for external callers that need to compare snapshots without mutating internal state.
1170
+ */
1171
+ static diffStates(before, after) {
1172
+ const ts = now();
1173
+ const transitions = [];
1174
+ // Focus change
1175
+ const beforeBundleId = before.focusedApp?.bundleId ?? null;
1176
+ const afterBundleId = after.focusedApp?.bundleId ?? null;
1177
+ if (beforeBundleId !== afterBundleId) {
1178
+ transitions.push({
1179
+ from: beforeBundleId ?? "(none)",
1180
+ to: afterBundleId ?? "(none)",
1181
+ trigger: "diff:focus_changed",
1182
+ timestamp: ts,
1183
+ });
1184
+ }
1185
+ // Window added/removed
1186
+ const beforeWindowIds = new Set(before.windows.keys());
1187
+ const afterWindowIds = new Set(after.windows.keys());
1188
+ for (const id of afterWindowIds) {
1189
+ if (!beforeWindowIds.has(id)) {
1190
+ const win = after.windows.get(id);
1191
+ transitions.push({
1192
+ from: "(none)",
1193
+ to: win?.title.value ?? String(id),
1194
+ trigger: "diff:window_added",
1195
+ timestamp: ts,
1196
+ });
1197
+ }
1198
+ }
1199
+ for (const id of beforeWindowIds) {
1200
+ if (!afterWindowIds.has(id)) {
1201
+ const win = before.windows.get(id);
1202
+ transitions.push({
1203
+ from: win?.title.value ?? String(id),
1204
+ to: "(none)",
1205
+ trigger: "diff:window_removed",
1206
+ timestamp: ts,
1207
+ });
1208
+ }
1209
+ }
1210
+ // Window title changes
1211
+ for (const [id, beforeWin] of before.windows) {
1212
+ const afterWin = after.windows.get(id);
1213
+ if (afterWin && afterWin.title.value !== beforeWin.title.value) {
1214
+ transitions.push({
1215
+ from: beforeWin.title.value,
1216
+ to: afterWin.title.value,
1217
+ trigger: "diff:title_changed",
1218
+ timestamp: ts,
1219
+ });
1220
+ }
1221
+ }
1222
+ // Dialog count changes
1223
+ if (before.activeDialogs.length !== after.activeDialogs.length) {
1224
+ transitions.push({
1225
+ from: String(before.activeDialogs.length),
1226
+ to: String(after.activeDialogs.length),
1227
+ trigger: "diff:dialog_count_changed",
1228
+ timestamp: ts,
1229
+ });
1230
+ }
1231
+ return transitions;
1232
+ }
1233
+ flush() {
1234
+ this.persister.flush();
1235
+ }
1236
+ /**
1237
+ * Update the screenshot hash for a specific window.
1238
+ * Used by perception coordinator to record vision diffs without
1239
+ * directly mutating world model state.
1240
+ */
1241
+ updateWindowScreenshotHash(windowId, hash) {
1242
+ const win = this.state.windows.get(windowId);
1243
+ if (win) {
1244
+ win.lastScreenshotHash = hash;
1245
+ this.state.updatedAt = now();
1246
+ }
1247
+ }
1248
+ getState() {
1249
+ return this.state;
1250
+ }
1251
+ getStateCopy() {
1252
+ return {
1253
+ ...this.state,
1254
+ windows: new Map(this.state.windows),
1255
+ activeDialogs: [...this.state.activeDialogs],
1256
+ appDomains: new Map(this.state.appDomains),
1257
+ recentTransitions: [...this.state.recentTransitions],
1258
+ trackedEntities: new Map(this.state.trackedEntities),
1259
+ };
1260
+ }
1261
+ /**
1262
+ * Get a deep-frozen consistent snapshot of the world state.
1263
+ * Safe to read during concurrent ingestion — no shared references.
1264
+ */
1265
+ getConsistentSnapshot() {
1266
+ const windowsCopy = new Map();
1267
+ for (const [id, win] of this.state.windows) {
1268
+ // Deep-clone controls to prevent shared references
1269
+ const controlsCopy = new Map();
1270
+ for (const [cid, ctrl] of win.controls) {
1271
+ controlsCopy.set(cid, {
1272
+ ...ctrl,
1273
+ position: { ...ctrl.position },
1274
+ size: { ...ctrl.size },
1275
+ });
1276
+ }
1277
+ windowsCopy.set(id, {
1278
+ ...win,
1279
+ controls: controlsCopy,
1280
+ dialogStack: [...win.dialogStack],
1281
+ visibleControls: win.visibleControls.map((c) => ({ ...c, position: { ...c.position }, size: { ...c.size } })),
1282
+ });
1283
+ }
1284
+ // Deep-clone tracked entities
1285
+ const entitiesCopy = new Map();
1286
+ for (const [eid, entity] of this.state.trackedEntities) {
1287
+ entitiesCopy.set(eid, {
1288
+ ...entity,
1289
+ stableIds: [...entity.stableIds],
1290
+ positions: entity.positions.map((p) => ({ ...p })),
1291
+ properties: { ...entity.properties },
1292
+ });
1293
+ }
1294
+ return {
1295
+ ...this.state,
1296
+ windows: windowsCopy,
1297
+ activeDialogs: this.state.activeDialogs.map((d) => ({ ...d, controls: new Map(d.controls) })),
1298
+ appDomains: new Map(this.state.appDomains),
1299
+ recentTransitions: [...this.state.recentTransitions],
1300
+ trackedEntities: entitiesCopy,
1301
+ };
1302
+ }
1303
+ /**
1304
+ * Get all tracked entities (cross-frame persistent identities).
1305
+ */
1306
+ getTrackedEntities() {
1307
+ return this.state.trackedEntities;
1308
+ }
1309
+ /**
1310
+ * Capture a lightweight snapshot of key state for diffing.
1311
+ */
1312
+ takeSnapshot() {
1313
+ const windowTitles = new Map();
1314
+ const controlCounts = new Map();
1315
+ for (const [id, win] of this.state.windows) {
1316
+ windowTitles.set(id, win.title.value);
1317
+ controlCounts.set(id, win.controls.size);
1318
+ }
1319
+ return {
1320
+ focusedBundleId: this.state.focusedApp?.bundleId ?? null,
1321
+ focusedWindowId: this.state.focusedWindowId,
1322
+ windowIds: new Set(this.state.windows.keys()),
1323
+ windowTitles,
1324
+ controlCounts,
1325
+ dialogCount: this.state.activeDialogs.length,
1326
+ dialogTitles: this.state.activeDialogs.map((d) => d.title),
1327
+ };
1328
+ }
1329
+ /**
1330
+ * Diff a before/after snapshot and record transitions.
1331
+ */
1332
+ recordTransitions(before, trigger) {
1333
+ const ts = now();
1334
+ const transitions = [];
1335
+ // Focus change
1336
+ if (before.focusedBundleId !== (this.state.focusedApp?.bundleId ?? null)) {
1337
+ transitions.push({
1338
+ from: before.focusedBundleId ?? "(none)",
1339
+ to: this.state.focusedApp?.bundleId ?? "(none)",
1340
+ trigger: `${trigger}:focus_changed`,
1341
+ timestamp: ts,
1342
+ });
1343
+ }
1344
+ // Window added/removed
1345
+ const afterWindowIds = new Set(this.state.windows.keys());
1346
+ for (const id of afterWindowIds) {
1347
+ if (!before.windowIds.has(id)) {
1348
+ const win = this.state.windows.get(id);
1349
+ transitions.push({
1350
+ from: "(none)",
1351
+ to: win?.title.value ?? String(id),
1352
+ trigger: `${trigger}:window_added`,
1353
+ timestamp: ts,
1354
+ });
1355
+ }
1356
+ }
1357
+ for (const id of before.windowIds) {
1358
+ if (!afterWindowIds.has(id)) {
1359
+ transitions.push({
1360
+ from: before.windowTitles.get(id) ?? String(id),
1361
+ to: "(none)",
1362
+ trigger: `${trigger}:window_removed`,
1363
+ timestamp: ts,
1364
+ });
1365
+ }
1366
+ }
1367
+ // Window title changed
1368
+ for (const [id, oldTitle] of before.windowTitles) {
1369
+ const win = this.state.windows.get(id);
1370
+ if (win && win.title.value !== oldTitle) {
1371
+ transitions.push({
1372
+ from: oldTitle,
1373
+ to: win.title.value,
1374
+ trigger: `${trigger}:title_changed`,
1375
+ timestamp: ts,
1376
+ });
1377
+ }
1378
+ }
1379
+ // Dialog count changed
1380
+ if (before.dialogCount !== this.state.activeDialogs.length) {
1381
+ transitions.push({
1382
+ from: String(before.dialogCount),
1383
+ to: String(this.state.activeDialogs.length),
1384
+ trigger: `${trigger}:dialog_count_changed`,
1385
+ timestamp: ts,
1386
+ });
1387
+ }
1388
+ // Control count changed per window
1389
+ for (const [id, oldCount] of before.controlCounts) {
1390
+ const win = this.state.windows.get(id);
1391
+ if (win && win.controls.size !== oldCount) {
1392
+ transitions.push({
1393
+ from: String(oldCount),
1394
+ to: String(win.controls.size),
1395
+ trigger: `${trigger}:controls_changed`,
1396
+ timestamp: ts,
1397
+ });
1398
+ }
1399
+ }
1400
+ if (transitions.length > 0) {
1401
+ this.state.recentTransitions.push(...transitions);
1402
+ // Cap at 50
1403
+ if (this.state.recentTransitions.length > 50) {
1404
+ this.state.recentTransitions = this.state.recentTransitions.slice(-50);
1405
+ }
1406
+ }
1407
+ }
1408
+ findControlByRoleLabel(role, label) {
1409
+ for (const win of this.state.windows.values()) {
1410
+ for (const control of win.controls.values()) {
1411
+ if (control.role === role && control.label.value === label) {
1412
+ return control;
1413
+ }
1414
+ }
1415
+ }
1416
+ return undefined;
1417
+ }
1418
+ /** Start a periodic timer that proactively decays all tracked field confidences. */
1419
+ startDecayTimer(intervalMs = 10_000) {
1420
+ this.stopDecayTimer();
1421
+ this.decayTimer = setInterval(() => {
1422
+ this.decayAll();
1423
+ }, intervalMs);
1424
+ if (this.decayTimer && typeof this.decayTimer === "object" && "unref" in this.decayTimer) {
1425
+ this.decayTimer.unref(); // Don't prevent process exit
1426
+ }
1427
+ }
1428
+ /** Stop the decay timer. */
1429
+ stopDecayTimer() {
1430
+ if (this.decayTimer !== null) {
1431
+ clearInterval(this.decayTimer);
1432
+ this.decayTimer = null;
1433
+ }
1434
+ }
1435
+ /** Walk all tracked fields and apply exponential decay in-place. */
1436
+ decayAll() {
1437
+ const rate = this.config.confidenceDecayRate;
1438
+ for (const win of this.state.windows.values()) {
1439
+ win.title = applyDecay(win.title, rate);
1440
+ win.bounds = applyDecay(win.bounds, rate);
1441
+ for (const [id, control] of win.controls) {
1442
+ win.controls.set(id, {
1443
+ ...control,
1444
+ label: applyDecay(control.label, rate),
1445
+ value: applyDecay(control.value, rate),
1446
+ enabled: applyDecay(control.enabled, rate),
1447
+ });
1448
+ }
1449
+ }
1450
+ this.state.updatedAt = now();
1451
+ this.schedulePersist();
1452
+ }
1453
+ schedulePersist() {
1454
+ this.persister.schedule(this.state);
1455
+ }
1456
+ }