screenhand 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +193 -109
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +5876 -0
  4. package/dist/scripts/codex-monitor-daemon.js +335 -0
  5. package/dist/scripts/export-help-center.js +112 -0
  6. package/dist/scripts/marketing-loop.js +117 -0
  7. package/dist/scripts/observer-daemon.js +288 -0
  8. package/dist/scripts/orchestrator-daemon.js +399 -0
  9. package/dist/scripts/supervisor-daemon.js +272 -0
  10. package/dist/scripts/threads-campaign.js +208 -0
  11. package/dist/scripts/worker-daemon.js +228 -0
  12. package/dist/src/agent/cli.js +82 -0
  13. package/dist/src/agent/loop.js +274 -0
  14. package/dist/src/community/fetcher.js +109 -0
  15. package/dist/src/community/index.js +6 -0
  16. package/dist/src/community/publisher.js +191 -0
  17. package/dist/src/community/remote-api.js +121 -0
  18. package/dist/src/community/types.js +3 -0
  19. package/dist/src/community/validator.js +95 -0
  20. package/{src/config.ts → dist/src/config.js} +5 -10
  21. package/dist/src/context-tracker.js +489 -0
  22. package/{src/index.ts → dist/src/index.js} +32 -52
  23. package/dist/src/ingestion/coverage-auditor.js +233 -0
  24. package/dist/src/ingestion/doc-parser.js +164 -0
  25. package/dist/src/ingestion/index.js +8 -0
  26. package/dist/src/ingestion/menu-scanner.js +152 -0
  27. package/dist/src/ingestion/reference-merger.js +186 -0
  28. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  29. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  30. package/dist/src/ingestion/types.js +3 -0
  31. package/dist/src/jobs/manager.js +305 -0
  32. package/dist/src/jobs/runner.js +806 -0
  33. package/dist/src/jobs/store.js +102 -0
  34. package/dist/src/jobs/types.js +30 -0
  35. package/dist/src/jobs/worker.js +97 -0
  36. package/dist/src/learning/engine.js +356 -0
  37. package/dist/src/learning/index.js +9 -0
  38. package/dist/src/learning/locator-policy.js +120 -0
  39. package/dist/src/learning/pattern-policy.js +89 -0
  40. package/dist/src/learning/recovery-policy.js +116 -0
  41. package/dist/src/learning/sensor-policy.js +115 -0
  42. package/dist/src/learning/timing-model.js +204 -0
  43. package/dist/src/learning/topology-policy.js +90 -0
  44. package/dist/src/learning/types.js +9 -0
  45. package/dist/src/logging/timeline-logger.js +48 -0
  46. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  47. package/dist/src/mcp/server.js +363 -0
  48. package/dist/src/mcp-entry.js +60 -0
  49. package/dist/src/memory/playbook-seeds.js +200 -0
  50. package/dist/src/memory/recall.js +222 -0
  51. package/dist/src/memory/research.js +104 -0
  52. package/dist/src/memory/seeds.js +101 -0
  53. package/dist/src/memory/service.js +446 -0
  54. package/dist/src/memory/session.js +169 -0
  55. package/dist/src/memory/store.js +451 -0
  56. package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
  57. package/dist/src/monitor/codex-monitor.js +382 -0
  58. package/dist/src/monitor/task-queue.js +97 -0
  59. package/dist/src/monitor/types.js +62 -0
  60. package/dist/src/native/bridge-client.js +412 -0
  61. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  62. package/dist/src/observer/state.js +199 -0
  63. package/dist/src/observer/types.js +43 -0
  64. package/dist/src/orchestrator/state.js +68 -0
  65. package/dist/src/orchestrator/types.js +22 -0
  66. package/dist/src/perception/ax-source.js +162 -0
  67. package/dist/src/perception/cdp-source.js +162 -0
  68. package/dist/src/perception/coordinator.js +771 -0
  69. package/dist/src/perception/frame-differ.js +287 -0
  70. package/dist/src/perception/index.js +22 -0
  71. package/dist/src/perception/manager.js +199 -0
  72. package/dist/src/perception/types.js +47 -0
  73. package/dist/src/perception/vision-source.js +399 -0
  74. package/dist/src/planner/deterministic.js +298 -0
  75. package/dist/src/planner/executor.js +870 -0
  76. package/dist/src/planner/goal-store.js +92 -0
  77. package/dist/src/planner/index.js +21 -0
  78. package/dist/src/planner/planner.js +520 -0
  79. package/dist/src/planner/tool-registry.js +71 -0
  80. package/dist/src/planner/types.js +22 -0
  81. package/dist/src/platform/explorer.js +213 -0
  82. package/dist/src/platform/help-center-markdown.js +527 -0
  83. package/dist/src/platform/learner.js +257 -0
  84. package/dist/src/playbook/engine.js +486 -0
  85. package/dist/src/playbook/index.js +20 -0
  86. package/dist/src/playbook/mcp-recorder.js +204 -0
  87. package/dist/src/playbook/recorder.js +536 -0
  88. package/dist/src/playbook/runner.js +408 -0
  89. package/dist/src/playbook/store.js +312 -0
  90. package/dist/src/playbook/types.js +17 -0
  91. package/dist/src/recovery/detectors.js +156 -0
  92. package/dist/src/recovery/engine.js +327 -0
  93. package/dist/src/recovery/index.js +20 -0
  94. package/dist/src/recovery/strategies.js +274 -0
  95. package/dist/src/recovery/types.js +20 -0
  96. package/dist/src/runtime/accessibility-adapter.js +430 -0
  97. package/dist/src/runtime/app-adapter.js +64 -0
  98. package/dist/src/runtime/applescript-adapter.js +305 -0
  99. package/dist/src/runtime/ax-role-map.js +96 -0
  100. package/dist/src/runtime/browser-adapter.js +52 -0
  101. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  102. package/dist/src/runtime/composite-adapter.js +221 -0
  103. package/dist/src/runtime/execution-contract.js +159 -0
  104. package/dist/src/runtime/executor.js +286 -0
  105. package/dist/src/runtime/locator-cache.js +50 -0
  106. package/dist/src/runtime/planning-loop.js +63 -0
  107. package/dist/src/runtime/service.js +432 -0
  108. package/dist/src/runtime/session-manager.js +63 -0
  109. package/dist/src/runtime/state-observer.js +121 -0
  110. package/dist/src/runtime/vision-adapter.js +225 -0
  111. package/dist/src/state/app-map-types.js +72 -0
  112. package/dist/src/state/app-map.js +1974 -0
  113. package/dist/src/state/entity-tracker.js +108 -0
  114. package/dist/src/state/fusion.js +96 -0
  115. package/dist/src/state/index.js +21 -0
  116. package/dist/src/state/ladder-generator.js +236 -0
  117. package/dist/src/state/persistence.js +156 -0
  118. package/dist/src/state/types.js +17 -0
  119. package/dist/src/state/world-model.js +1456 -0
  120. package/dist/src/supervisor/locks.js +186 -0
  121. package/dist/src/supervisor/supervisor.js +403 -0
  122. package/dist/src/supervisor/types.js +30 -0
  123. package/dist/src/test-mcp-protocol.js +154 -0
  124. package/dist/src/types.js +17 -0
  125. package/dist/src/util/atomic-write.js +133 -0
  126. package/dist/src/util/sanitize.js +146 -0
  127. package/dist-app-maps/com.figma.Desktop.json +959 -0
  128. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  129. package/dist-app-maps/notion.id.json +2831 -0
  130. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  131. package/dist-playbooks/codex-desktop.json +76 -0
  132. package/dist-playbooks/competitor-research-stack.json +122 -0
  133. package/dist-playbooks/davinci-color-grade.json +153 -0
  134. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  135. package/dist-playbooks/davinci-render.json +114 -0
  136. package/dist-playbooks/devto.json +52 -0
  137. package/dist-playbooks/discord.json +41 -0
  138. package/dist-playbooks/google-flow-create-project.json +59 -0
  139. package/dist-playbooks/google-flow-edit-image.json +90 -0
  140. package/dist-playbooks/google-flow-edit-video.json +90 -0
  141. package/dist-playbooks/google-flow-generate-image.json +68 -0
  142. package/dist-playbooks/google-flow-generate-video.json +191 -0
  143. package/dist-playbooks/google-flow-open-project.json +48 -0
  144. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  145. package/dist-playbooks/google-flow-search-assets.json +64 -0
  146. package/dist-playbooks/instagram.json +57 -0
  147. package/dist-playbooks/linkedin.json +52 -0
  148. package/dist-playbooks/n8n.json +43 -0
  149. package/dist-playbooks/reddit.json +52 -0
  150. package/dist-playbooks/threads.json +59 -0
  151. package/dist-playbooks/x-twitter.json +59 -0
  152. package/dist-playbooks/youtube.json +59 -0
  153. package/dist-references/canva.json +646 -0
  154. package/dist-references/codex-desktop.json +305 -0
  155. package/dist-references/davinci-resolve-keyboard.json +594 -0
  156. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  157. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  158. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  159. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  160. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  161. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  162. package/dist-references/devto.json +317 -0
  163. package/dist-references/discord.json +549 -0
  164. package/dist-references/figma.json +1186 -0
  165. package/dist-references/finder.json +146 -0
  166. package/dist-references/google-ads-transparency.json +95 -0
  167. package/dist-references/google-flow.json +649 -0
  168. package/dist-references/instagram.json +341 -0
  169. package/dist-references/linkedin.json +324 -0
  170. package/dist-references/meta-ad-library.json +86 -0
  171. package/dist-references/n8n.json +387 -0
  172. package/dist-references/notes.json +27 -0
  173. package/dist-references/notion.json +163 -0
  174. package/dist-references/reddit.json +341 -0
  175. package/dist-references/threads.json +337 -0
  176. package/dist-references/x-twitter.json +403 -0
  177. package/dist-references/youtube.json +373 -0
  178. package/native/macos-bridge/Package.swift +1 -0
  179. package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
  180. package/native/macos-bridge/Sources/AppManagement.swift +212 -2
  181. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
  182. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  183. package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
  184. package/native/macos-bridge/Sources/main.swift +169 -16
  185. package/native/windows-bridge/Program.cs +5 -0
  186. package/native/windows-bridge/ScreenCapture.cs +124 -0
  187. package/package.json +29 -4
  188. package/scripts/postinstall.cjs +127 -0
  189. package/.claude/commands/automate.md +0 -28
  190. package/.claude/commands/debug-ui.md +0 -19
  191. package/.claude/commands/screenshot.md +0 -15
  192. package/.github/FUNDING.yml +0 -1
  193. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  194. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  195. package/.mcp.json +0 -8
  196. package/DESKTOP_MCP_GUIDE.md +0 -92
  197. package/SECURITY.md +0 -44
  198. package/docs/architecture.md +0 -47
  199. package/install-skills.sh +0 -19
  200. package/mcp-bridge.ts +0 -271
  201. package/mcp-desktop.ts +0 -1221
  202. package/playbooks/instagram.json +0 -41
  203. package/playbooks/instagram_v2.json +0 -201
  204. package/playbooks/x_v1.json +0 -211
  205. package/scripts/devpost-live-loop.mjs +0 -421
  206. package/src/logging/timeline-logger.ts +0 -55
  207. package/src/mcp/server.ts +0 -449
  208. package/src/memory/recall.ts +0 -191
  209. package/src/memory/research.ts +0 -146
  210. package/src/memory/seeds.ts +0 -123
  211. package/src/memory/session.ts +0 -201
  212. package/src/memory/store.ts +0 -434
  213. package/src/memory/types.ts +0 -69
  214. package/src/native/bridge-client.ts +0 -239
  215. package/src/runtime/accessibility-adapter.ts +0 -487
  216. package/src/runtime/app-adapter.ts +0 -169
  217. package/src/runtime/applescript-adapter.ts +0 -376
  218. package/src/runtime/ax-role-map.ts +0 -102
  219. package/src/runtime/browser-adapter.ts +0 -129
  220. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  221. package/src/runtime/composite-adapter.ts +0 -274
  222. package/src/runtime/executor.ts +0 -396
  223. package/src/runtime/planning-loop.ts +0 -81
  224. package/src/runtime/service.ts +0 -448
  225. package/src/runtime/session-manager.ts +0 -50
  226. package/src/runtime/state-observer.ts +0 -136
  227. package/src/runtime/vision-adapter.ts +0 -297
  228. package/src/types.ts +0 -297
  229. package/tests/bridge-client.test.ts +0 -176
  230. package/tests/browser-stealth.test.ts +0 -210
  231. package/tests/composite-adapter.test.ts +0 -64
  232. package/tests/mcp-server.test.ts +0 -151
  233. package/tests/memory-recall.test.ts +0 -339
  234. package/tests/memory-research.test.ts +0 -159
  235. package/tests/memory-seeds.test.ts +0 -120
  236. package/tests/memory-store.test.ts +0 -392
  237. package/tests/types.test.ts +0 -92
  238. package/tsconfig.check.json +0 -17
  239. package/tsconfig.json +0 -19
  240. package/vitest.config.ts +0 -8
  241. /package/{playbooks → dist-references}/devpost.json +0 -0
@@ -0,0 +1,771 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { EventEmitter } from "node:events";
18
+ import { VisionSource } from "./vision-source.js";
19
+ import { DEFAULT_PERCEPTION_CONFIG, createEmptyStats } from "./types.js";
20
+ import { acquireCaptureLock, releaseCaptureLock } from "../observer/state.js";
21
+ import { FusionPipeline } from "../state/fusion.js";
22
+ /** Race a promise against a timeout. Rejects with "timeout" if the promise doesn't settle in time. */
23
+ function withTimeout(promise, ms, label) {
24
+ return new Promise((resolve, reject) => {
25
+ const timer = setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms);
26
+ promise.then((v) => { clearTimeout(timer); resolve(v); }, (e) => { clearTimeout(timer); reject(e); });
27
+ });
28
+ }
29
+ /**
30
+ * PerceptionCoordinator — manages multi-rate perception sources and feeds
31
+ * results into the world model.
32
+ *
33
+ * Runs three interval loops at different rates:
34
+ * - FAST (100ms): AX push events + CDP mutations (event-driven, cheap)
35
+ * - MEDIUM (500ms): AX tree poll + CDP DOM snapshot (structured, moderate)
36
+ * - SLOW (2000ms): Screenshot diff + ROI OCR (visual, expensive)
37
+ *
38
+ * The coordinator runs in the MCP server process. Heavy work (capture/OCR)
39
+ * is delegated to the native bridge (separate process) or observer daemon.
40
+ */
41
+ export class PerceptionCoordinator extends EventEmitter {
42
+ worldModel;
43
+ axSource;
44
+ cdpSource;
45
+ visionSource;
46
+ config;
47
+ stats;
48
+ fastTimer = null;
49
+ mediumTimer = null;
50
+ slowTimer = null;
51
+ activePid = null;
52
+ activeWindowId = null;
53
+ activeAppContext = null;
54
+ cdpClient = null;
55
+ /** CDP connection factory — called to create/reconnect persistent clients */
56
+ cdpConnectFn = null;
57
+ running = false;
58
+ learningEngine = null;
59
+ appMap = null;
60
+ browserEnricher = null;
61
+ fusionPipeline = new FusionPipeline();
62
+ // In-flight guards to prevent timer pileup when async cycles exceed their interval
63
+ fastInFlight = false;
64
+ mediumInFlight = false;
65
+ slowInFlight = false;
66
+ // Debounce timer for switchContext to coalesce rapid app switches
67
+ switchDebounceTimer = null;
68
+ // Resolve callback for the previous debounced switchContext promise
69
+ switchDebounceResolve = null;
70
+ // Idle gating: pause perception when no tool calls for IDLE_THRESHOLD_MS
71
+ static IDLE_THRESHOLD_MS = 3_000;
72
+ lastToolCallAt = Date.now();
73
+ idle = false;
74
+ constructor(worldModel, axSource, cdpSource, visionSource, config) {
75
+ super();
76
+ this.worldModel = worldModel;
77
+ this.axSource = axSource;
78
+ this.cdpSource = cdpSource;
79
+ this.visionSource = visionSource;
80
+ this.config = { ...DEFAULT_PERCEPTION_CONFIG, ...config };
81
+ this.stats = createEmptyStats();
82
+ }
83
+ /**
84
+ * Inject the learning engine for recording sensor outcomes.
85
+ */
86
+ setLearningEngine(engine) {
87
+ this.learningEngine = engine;
88
+ this.fusionPipeline.setLearningEngine(engine);
89
+ }
90
+ /**
91
+ * Inject the app mastery map for validating spatial knowledge during slow cycle.
92
+ */
93
+ setAppMap(map) {
94
+ this.appMap = map;
95
+ }
96
+ /**
97
+ * Set a browser enricher callback for non-CDP browsers (Safari).
98
+ * Called during medium cycle to fetch URL/title/tabs via AppleScript.
99
+ * Pass null to clear the enricher (e.g. on app switch away from Safari).
100
+ */
101
+ setBrowserEnricher(fn) {
102
+ this.browserEnricher = fn;
103
+ }
104
+ /**
105
+ * Notify that a tool call is happening — resets idle timer and starts stream if needed.
106
+ * Call this from the intelligence wrapper PRE-CALL.
107
+ */
108
+ notifyToolCall() {
109
+ this.lastToolCallAt = Date.now();
110
+ if (this.idle) {
111
+ this.idle = false;
112
+ this.emit("wake");
113
+ // Start stream capture on wake for fast perception (only if running)
114
+ if (this.running && this.visionSource && this.activeWindowId && !this.visionSource.isStreaming) {
115
+ void this.visionSource.startStream(this.activeWindowId).catch(() => { });
116
+ }
117
+ }
118
+ }
119
+ /**
120
+ * Check if perception should be idle (no tool calls for 3s).
121
+ * Stops stream capture when entering idle.
122
+ */
123
+ isIdle() {
124
+ const elapsed = Date.now() - this.lastToolCallAt;
125
+ const shouldIdle = elapsed > PerceptionCoordinator.IDLE_THRESHOLD_MS;
126
+ if (shouldIdle && !this.idle) {
127
+ this.idle = true;
128
+ this.emit("idle");
129
+ // Stop stream capture to save battery at idle
130
+ if (this.visionSource?.isStreaming) {
131
+ void this.visionSource.stopStream().catch(() => { });
132
+ }
133
+ }
134
+ return shouldIdle;
135
+ }
136
+ /**
137
+ * Start continuous perception loops.
138
+ */
139
+ async start(appContext, cdpClient) {
140
+ if (this.running)
141
+ return;
142
+ this.activePid = appContext.pid;
143
+ this.activeWindowId = appContext.windowId ?? null;
144
+ this.activeAppContext = appContext;
145
+ this.cdpClient = cdpClient ?? null;
146
+ this.running = true;
147
+ this.stats = createEmptyStats();
148
+ this.stats.started = true;
149
+ this.stats.startedAt = new Date().toISOString();
150
+ this.cdpConsecutiveFailures = 0;
151
+ this.axConsecutiveFailures = 0;
152
+ this.fastInFlight = false;
153
+ this.mediumInFlight = false;
154
+ this.slowInFlight = false;
155
+ this.lastToolCallAt = Date.now();
156
+ this.idle = false;
157
+ // Enable safe CLI capture for browser apps to avoid CGWindowListCreateImage SIGSEGV
158
+ if (this.visionSource && typeof this.visionSource.setSafeCLI === "function") {
159
+ const family = this.worldModel.getAppFamily();
160
+ this.visionSource.setSafeCLI(family === "browser");
161
+ }
162
+ // Start continuous stream capture for fast perception (non-blocking, best-effort)
163
+ if (this.config.enableVision && this.visionSource && this.activeWindowId) {
164
+ void this.visionSource.startStream(this.activeWindowId).catch(() => { });
165
+ }
166
+ // Start AX observation
167
+ if (this.config.enableAX && this.axSource && this.activePid) {
168
+ try {
169
+ await this.axSource.startObserving(this.activePid);
170
+ }
171
+ catch {
172
+ // AX not available
173
+ }
174
+ }
175
+ // Install CDP mutation observer
176
+ if (this.config.enableCDP && this.cdpSource && this.cdpClient) {
177
+ try {
178
+ await this.cdpSource.installMutationObserver(this.cdpClient);
179
+ }
180
+ catch {
181
+ // CDP not available
182
+ }
183
+ }
184
+ // Start interval loops — in-flight guards prevent pileup when async cycle
185
+ // takes longer than the interval (e.g. bridge latency spike).
186
+ this.fastTimer = setInterval(() => {
187
+ if (this.fastInFlight)
188
+ return;
189
+ this.fastInFlight = true;
190
+ void this.fastCycle().catch(() => { }).finally(() => { this.fastInFlight = false; });
191
+ }, this.config.fastIntervalMs);
192
+ this.mediumTimer = setInterval(() => {
193
+ if (this.mediumInFlight)
194
+ return;
195
+ this.mediumInFlight = true;
196
+ void this.mediumCycle().catch(() => { }).finally(() => { this.mediumInFlight = false; });
197
+ }, this.config.mediumIntervalMs);
198
+ if (this.config.enableVision) {
199
+ this.slowTimer = setInterval(() => {
200
+ if (this.slowInFlight)
201
+ return;
202
+ this.slowInFlight = true;
203
+ void this.slowCycle().catch(() => { }).finally(() => { this.slowInFlight = false; });
204
+ }, this.config.slowIntervalMs);
205
+ }
206
+ this.emit("started", appContext);
207
+ }
208
+ /**
209
+ * Stop all perception loops.
210
+ */
211
+ async stop() {
212
+ if (!this.running)
213
+ return;
214
+ this.running = false;
215
+ if (this.switchDebounceTimer !== null) {
216
+ clearTimeout(this.switchDebounceTimer);
217
+ this.switchDebounceTimer = null;
218
+ if (this.switchDebounceResolve !== null) {
219
+ this.switchDebounceResolve();
220
+ this.switchDebounceResolve = null;
221
+ }
222
+ }
223
+ // Stop stream capture
224
+ if (this.visionSource?.isStreaming) {
225
+ void this.visionSource.stopStream().catch(() => { });
226
+ }
227
+ if (this.fastTimer) {
228
+ clearInterval(this.fastTimer);
229
+ this.fastTimer = null;
230
+ }
231
+ if (this.mediumTimer) {
232
+ clearInterval(this.mediumTimer);
233
+ this.mediumTimer = null;
234
+ }
235
+ if (this.slowTimer) {
236
+ clearInterval(this.slowTimer);
237
+ this.slowTimer = null;
238
+ }
239
+ if (this.axSource && this.activePid) {
240
+ try {
241
+ await this.axSource.stopObserving(this.activePid);
242
+ }
243
+ catch {
244
+ // ignore
245
+ }
246
+ }
247
+ this.activePid = null;
248
+ this.activeWindowId = null;
249
+ this.activeAppContext = null;
250
+ this.cdpClient = null;
251
+ this.browserEnricher = null;
252
+ this.stats.started = false;
253
+ this.fastInFlight = false;
254
+ this.mediumInFlight = false;
255
+ this.slowInFlight = false;
256
+ this.emit("stopped");
257
+ }
258
+ /**
259
+ * Switch perception to a new app/window context.
260
+ * Debounced by 150ms — rapid successive calls coalesce to the last context.
261
+ */
262
+ switchContext(appContext, cdpClient) {
263
+ if (this.switchDebounceTimer !== null) {
264
+ clearTimeout(this.switchDebounceTimer);
265
+ this.switchDebounceTimer = null;
266
+ // Resolve the previous caller's promise — their switch was superseded, not failed
267
+ if (this.switchDebounceResolve !== null) {
268
+ this.switchDebounceResolve();
269
+ this.switchDebounceResolve = null;
270
+ }
271
+ }
272
+ return new Promise((resolve) => {
273
+ this.switchDebounceResolve = resolve;
274
+ this.switchDebounceTimer = setTimeout(() => {
275
+ this.switchDebounceTimer = null;
276
+ this.switchDebounceResolve = null;
277
+ void this.doSwitchContext(appContext, cdpClient).then(resolve).catch((err) => {
278
+ console.error(`[Perception] switchContext failed: ${err?.message ?? err}`);
279
+ resolve(); // Resolve anyway so callers don't hang, but error is logged
280
+ });
281
+ }, 150);
282
+ });
283
+ }
284
+ /**
285
+ * Internal: perform the actual context switch (stop + reset + start).
286
+ */
287
+ async doSwitchContext(appContext, cdpClient) {
288
+ await this.stop();
289
+ this.visionSource?.reset();
290
+ this.cdpSource?.reset();
291
+ await this.start(appContext, cdpClient);
292
+ }
293
+ /**
294
+ * Get current perception statistics.
295
+ */
296
+ getStats() {
297
+ return { ...this.stats };
298
+ }
299
+ /**
300
+ * Get a perception freshness summary for intelligence wrapper hints.
301
+ */
302
+ getFreshnessSummary() {
303
+ if (!this.stats.started)
304
+ return "Perception: not active";
305
+ const now = Date.now();
306
+ const STALE_THRESHOLD_MS = 5_000;
307
+ const sources = [];
308
+ const warnings = [];
309
+ // Per-source detail
310
+ if (this.config.enableAX) {
311
+ if (this.stats.lastAXAt) {
312
+ const ageMs = now - new Date(this.stats.lastAXAt).getTime();
313
+ sources.push(`AX: ${ageMs}ms ago`);
314
+ if (ageMs > STALE_THRESHOLD_MS)
315
+ warnings.push("AX");
316
+ }
317
+ else {
318
+ sources.push("AX: no data yet");
319
+ }
320
+ }
321
+ else {
322
+ sources.push("AX: DISABLED");
323
+ }
324
+ if (this.config.enableCDP) {
325
+ if (this.stats.lastCDPAt) {
326
+ const ageMs = now - new Date(this.stats.lastCDPAt).getTime();
327
+ sources.push(`CDP: ${ageMs}ms ago`);
328
+ if (ageMs > STALE_THRESHOLD_MS)
329
+ warnings.push("CDP");
330
+ }
331
+ else {
332
+ sources.push("CDP: no data yet");
333
+ }
334
+ }
335
+ else {
336
+ sources.push("CDP: DISABLED");
337
+ }
338
+ if (this.config.enableVision) {
339
+ if (this.stats.lastVisionAt) {
340
+ const ageMs = now - new Date(this.stats.lastVisionAt).getTime();
341
+ sources.push(`Vision: ${ageMs}ms ago`);
342
+ if (ageMs > STALE_THRESHOLD_MS)
343
+ warnings.push("Vision");
344
+ }
345
+ else {
346
+ sources.push("Vision: no data yet");
347
+ }
348
+ }
349
+ else {
350
+ sources.push("Vision: DISABLED");
351
+ }
352
+ let summary = `Perception: ${sources.join(", ")}`;
353
+ if (warnings.length > 0) {
354
+ summary += ` ⚠ STALE: ${warnings.join(", ")} (>5s)`;
355
+ }
356
+ return summary;
357
+ }
358
+ get isRunning() {
359
+ return this.running;
360
+ }
361
+ getConfig() {
362
+ return { ...this.config };
363
+ }
364
+ // ── Loop implementations ──
365
+ async fastCycle() {
366
+ if (!this.running || this.isIdle())
367
+ return;
368
+ const timestamp = new Date().toISOString();
369
+ try {
370
+ // Drain AX events
371
+ if (this.config.enableAX && this.axSource) {
372
+ try {
373
+ const axEvent = this.axSource.drainEvents();
374
+ if (axEvent && axEvent.data.type === "ax_events") {
375
+ this.stats.axEventsProcessed += axEvent.data.events.length;
376
+ this.worldModel.ingestUIEvents(axEvent.data.events);
377
+ this.emit("perception", axEvent);
378
+ }
379
+ }
380
+ catch (err) {
381
+ console.error(`[Perception] fastCycle AX drain error: ${err?.message ?? err}`);
382
+ }
383
+ }
384
+ // Drain CDP mutations
385
+ if (this.config.enableCDP && this.cdpSource) {
386
+ try {
387
+ const cdpEvent = this.cdpSource.drainMutations();
388
+ if (cdpEvent && cdpEvent.data.type === "cdp_mutations") {
389
+ this.stats.cdpMutationsProcessed += cdpEvent.data.mutations.length;
390
+ // Ingest mutations into world model
391
+ if (this.activeAppContext) {
392
+ this.worldModel.ingestCDPMutations(this.activeAppContext.bundleId, cdpEvent.data.mutations);
393
+ }
394
+ this.emit("perception", cdpEvent);
395
+ }
396
+ }
397
+ catch (err) {
398
+ console.error(`[Perception] fastCycle CDP drain error: ${err?.message ?? err}`);
399
+ }
400
+ }
401
+ }
402
+ finally {
403
+ this.stats.fastCycles++;
404
+ this.stats.lastFastAt = timestamp;
405
+ }
406
+ }
407
+ async mediumCycle() {
408
+ if (!this.running || this.isIdle())
409
+ return;
410
+ const timestamp = new Date().toISOString();
411
+ // Determine sensor polling order — use learning engine ranking if available
412
+ const sensorOrder = this.getMediumCycleSensorOrder();
413
+ const MEDIUM_CYCLE_TIMEOUT_MS = 15_000;
414
+ for (const sensor of sensorOrder) {
415
+ try {
416
+ if (sensor === "ax") {
417
+ await withTimeout(this.pollAX(), MEDIUM_CYCLE_TIMEOUT_MS, "pollAX");
418
+ }
419
+ else if (sensor === "cdp") {
420
+ await withTimeout(this.pollCDP(), MEDIUM_CYCLE_TIMEOUT_MS, "pollCDP");
421
+ }
422
+ }
423
+ catch (e) {
424
+ console.error(`[Perception] ${sensor} medium cycle error: ${e?.message ?? e}`);
425
+ }
426
+ }
427
+ // Enrich browser state for non-CDP browsers (Safari)
428
+ if (this.browserEnricher) {
429
+ try {
430
+ await this.browserEnricher();
431
+ }
432
+ catch { /* best-effort */ }
433
+ }
434
+ this.stats.mediumCycles++;
435
+ this.stats.lastMediumAt = timestamp;
436
+ }
437
+ /**
438
+ * Determine the order to poll sensors in the medium cycle.
439
+ * If the learning engine has ranked data for the current app, use that order.
440
+ * Otherwise, fall back to the default: AX → CDP.
441
+ */
442
+ /**
443
+ * Inject or update the CDP client after perception has started.
444
+ * Called when a browser CDP connection is established.
445
+ */
446
+ /**
447
+ * Inject or update the CDP client after perception has started.
448
+ * Accepts either a live client or a connect function (preferred — enables reconnection).
449
+ */
450
+ activateCDP(cdpClient, connectFn) {
451
+ console.error(`[Perception] activateCDP called, client=${!!cdpClient}, connectFn=${!!connectFn}, enableCDP=${this.config.enableCDP}, cdpSource=${!!this.cdpSource}`);
452
+ this.cdpClient = cdpClient;
453
+ if (connectFn)
454
+ this.cdpConnectFn = connectFn;
455
+ this.cdpConsecutiveFailures = 0;
456
+ if (this.config.enableCDP && this.cdpSource && cdpClient) {
457
+ void this.cdpSource.installMutationObserver(cdpClient).catch((e) => {
458
+ console.error(`[Perception] installMutationObserver failed: ${e?.message ?? e}`);
459
+ });
460
+ }
461
+ }
462
+ /**
463
+ * Update the active window ID after perception has started.
464
+ * Called when window resolution succeeds late (after start).
465
+ */
466
+ setActiveWindowId(windowId) {
467
+ this.activeWindowId = windowId;
468
+ }
469
+ getMediumCycleSensorOrder() {
470
+ const defaultOrder = [];
471
+ // Allow AX polling even without windowId — pollAX uses windowId ?? 0 (full app tree)
472
+ if (this.config.enableAX && this.axSource && this.activePid) {
473
+ defaultOrder.push("ax");
474
+ }
475
+ if (this.config.enableCDP && this.cdpSource && this.cdpClient) {
476
+ defaultOrder.push("cdp");
477
+ }
478
+ if (!this.learningEngine || !this.activeAppContext || defaultOrder.length <= 1) {
479
+ return defaultOrder;
480
+ }
481
+ const ranked = this.learningEngine.rankSensors(this.activeAppContext.bundleId);
482
+ if (ranked.length === 0)
483
+ return defaultOrder;
484
+ // Build ordered list from ranking, only including sensors that are available
485
+ const available = new Set(defaultOrder);
486
+ const ordered = [];
487
+ for (const { sourceType } of ranked) {
488
+ const s = sourceType;
489
+ if (available.has(s)) {
490
+ ordered.push(s);
491
+ available.delete(s);
492
+ }
493
+ }
494
+ // Append any remaining sensors not covered by ranking
495
+ for (const s of defaultOrder) {
496
+ if (available.has(s)) {
497
+ ordered.push(s);
498
+ }
499
+ }
500
+ return ordered;
501
+ }
502
+ axConsecutiveFailures = 0;
503
+ async pollAX() {
504
+ if (!this.config.enableAX ||
505
+ !this.axSource ||
506
+ !this.activePid ||
507
+ !this.activeAppContext)
508
+ return;
509
+ // If AX has failed many times, the app PID is likely dead — skip polling
510
+ // and emit a stale warning so the caller knows to restart perception
511
+ if (this.axConsecutiveFailures > 5) {
512
+ return;
513
+ }
514
+ // Adaptive skip: if recent AX polls are extremely slow, skip this cycle
515
+ if (this.axSource.shouldSkipPoll()) {
516
+ return;
517
+ }
518
+ // Derive windowId if not set — use first tracked window for this pid
519
+ if (this.activeWindowId === null) {
520
+ for (const [id, win] of this.worldModel.getState().windows) {
521
+ if (win.pid === this.activePid) {
522
+ this.activeWindowId = id;
523
+ break;
524
+ }
525
+ }
526
+ }
527
+ try {
528
+ const { event: treeEvent, latencyMs: axLatency, nodeCount } = await this.axSource.pollAXTree(this.activePid, this.activeWindowId ?? 0, this.activeAppContext);
529
+ const axSuccess = !!(treeEvent && treeEvent.data.type === "ax_tree");
530
+ if (treeEvent && treeEvent.data.type === "ax_tree") {
531
+ this.stats.axTreePolls++;
532
+ this.stats.lastAXAt = new Date().toISOString();
533
+ this.axConsecutiveFailures = 0;
534
+ this.fusionPipeline.enqueue({
535
+ source: "ax",
536
+ timestamp: new Date().toISOString(),
537
+ confidence: 0.9,
538
+ windowId: treeEvent.data.windowId,
539
+ axTree: treeEvent.data.tree,
540
+ appContext: treeEvent.data.appContext,
541
+ });
542
+ this.fusionPipeline.flush(this.worldModel);
543
+ this.emit("perception", treeEvent);
544
+ }
545
+ else {
546
+ this.axConsecutiveFailures++;
547
+ }
548
+ if (this.learningEngine && this.activeAppContext) {
549
+ this.learningEngine.recordSensorOutcome({
550
+ bundleId: this.activeAppContext.bundleId,
551
+ sourceType: "ax",
552
+ success: axSuccess,
553
+ latencyMs: axLatency,
554
+ nodeCount,
555
+ });
556
+ }
557
+ }
558
+ catch (err) {
559
+ this.axConsecutiveFailures++;
560
+ console.error(`[Perception] pollAX error #${this.axConsecutiveFailures}: ${err?.message ?? err}`);
561
+ }
562
+ }
563
+ cdpConsecutiveFailures = 0;
564
+ async pollCDP() {
565
+ if (!this.config.enableCDP || !this.cdpSource || !this.cdpClient) {
566
+ if (this.stats.cdpSnapshots === 0 && this.stats.mediumCycles > 0 && this.stats.mediumCycles % 20 === 0) {
567
+ console.error(`[Perception] pollCDP skipped: enableCDP=${this.config.enableCDP} cdpSource=${!!this.cdpSource} cdpClient=${!!this.cdpClient}`);
568
+ }
569
+ return;
570
+ }
571
+ // If CDP has failed too many times, periodically retry reconnection (every 10th cycle)
572
+ // instead of giving up forever — the target app may have restarted
573
+ if (this.cdpConsecutiveFailures > 10) {
574
+ if (this.cdpConsecutiveFailures % 10 === 0 && this.cdpConnectFn) {
575
+ try {
576
+ this.cdpClient = await this.cdpConnectFn();
577
+ const failureCount = this.cdpConsecutiveFailures;
578
+ this.cdpConsecutiveFailures = 0;
579
+ console.error(`[Perception] CDP reconnected after ${failureCount} failures`);
580
+ }
581
+ catch {
582
+ this.cdpConsecutiveFailures++;
583
+ }
584
+ }
585
+ else {
586
+ this.cdpConsecutiveFailures++;
587
+ }
588
+ return;
589
+ }
590
+ const cdpStart = Date.now();
591
+ try {
592
+ const snapEvent = await this.cdpSource.pollSnapshot(this.cdpClient);
593
+ const cdpLatency = Date.now() - cdpStart;
594
+ const cdpSuccess = !!(snapEvent && snapEvent.data.type === "cdp_snapshot");
595
+ if (this.stats.cdpSnapshots === 0) {
596
+ console.error(`[Perception] pollCDP result: success=${cdpSuccess} latency=${cdpLatency}ms event_type=${snapEvent?.data?.type ?? "null"}`);
597
+ }
598
+ if (snapEvent && snapEvent.data.type === "cdp_snapshot") {
599
+ this.stats.cdpSnapshots++;
600
+ this.stats.lastCDPAt = new Date().toISOString();
601
+ this.cdpConsecutiveFailures = 0;
602
+ if (this.activeAppContext) {
603
+ this.fusionPipeline.enqueue({
604
+ source: "cdp",
605
+ timestamp: new Date().toISOString(),
606
+ confidence: 0.85,
607
+ windowId: this.activeWindowId ?? 0,
608
+ cdpSnapshot: {
609
+ bundleId: this.activeAppContext.bundleId,
610
+ url: snapEvent.data.url,
611
+ title: snapEvent.data.title,
612
+ },
613
+ });
614
+ this.fusionPipeline.flush(this.worldModel);
615
+ }
616
+ this.emit("perception", snapEvent);
617
+ }
618
+ if (this.learningEngine && this.activeAppContext) {
619
+ this.learningEngine.recordSensorOutcome({
620
+ bundleId: this.activeAppContext.bundleId,
621
+ sourceType: "cdp",
622
+ success: cdpSuccess,
623
+ latencyMs: cdpLatency,
624
+ });
625
+ }
626
+ }
627
+ catch (err) {
628
+ this.cdpConsecutiveFailures++;
629
+ console.error(`[Perception] pollCDP error #${this.cdpConsecutiveFailures}: ${err?.message ?? err}`);
630
+ // Try to reconnect using the connect factory if available
631
+ if (this.cdpConsecutiveFailures <= 3 && this.cdpConnectFn) {
632
+ try {
633
+ this.cdpClient = await this.cdpConnectFn();
634
+ this.cdpConsecutiveFailures = 0;
635
+ }
636
+ catch {
637
+ // reconnect failed — will retry next cycle
638
+ }
639
+ }
640
+ if (this.learningEngine && this.activeAppContext) {
641
+ this.learningEngine.recordSensorOutcome({
642
+ bundleId: this.activeAppContext.bundleId,
643
+ sourceType: "cdp",
644
+ success: false,
645
+ latencyMs: Date.now() - cdpStart,
646
+ });
647
+ }
648
+ }
649
+ }
650
+ async slowCycle() {
651
+ if (!this.running || !this.visionSource || this.isIdle())
652
+ return;
653
+ // For browsers, use safe CLI capture mode (screencapture) instead of
654
+ // CGWindowListCreateImage which crashes on GPU-heavy pages (WebGL, canvas).
655
+ // Safe CLI mode is already enabled via setSafeCLI() in start().
656
+ // This allows vision/OCR for canvas-heavy apps like Canva in Chrome.
657
+ // Skip vision if learning engine shows it consistently fails for this app,
658
+ // but retry every 20th cycle to re-evaluate (apps may gain windows later)
659
+ if (this.learningEngine && this.activeAppContext) {
660
+ const ranked = this.learningEngine.rankSensors(this.activeAppContext.bundleId);
661
+ const visionRank = ranked.find(r => r.sourceType === "vision");
662
+ if (visionRank && visionRank.score < 0.1 && ranked.length >= 2 && this.stats.slowCycles % 20 !== 0) {
663
+ this.stats.slowCycles++;
664
+ return; // Vision consistently fails for this app — skip (retry every 20th cycle)
665
+ }
666
+ }
667
+ const timestamp = new Date().toISOString();
668
+ // Acquire capture lock to prevent concurrent captures with observer daemon
669
+ if (!this.config.skipCaptureLock && !acquireCaptureLock()) {
670
+ this.stats.slowCycles++;
671
+ return; // Observer daemon is capturing — skip this cycle
672
+ }
673
+ try {
674
+ // Screenshot diff — optimized single-capture pipeline
675
+ const windowId = this.activeWindowId ?? 0;
676
+ if (windowId === 0)
677
+ return; // Vision needs a real window ID for screenshot
678
+ const SLOW_CYCLE_TIMEOUT_MS = 25_000;
679
+ const { diffEvent, ocrEvent, yoloElements } = await withTimeout(this.visionSource.captureAndDiffOptimized(windowId, this.config.maxROIsPerCycle), SLOW_CYCLE_TIMEOUT_MS, "captureAndDiffOptimized");
680
+ if (diffEvent) {
681
+ this.stats.visionDiffs++;
682
+ this.stats.lastVisionAt = new Date().toISOString();
683
+ // Store screenshot hash in world model for change detection
684
+ if (diffEvent.data.type === "vision_diff" && diffEvent.data.hash && this.activeWindowId !== null) {
685
+ this.worldModel.updateWindowScreenshotHash(this.activeWindowId, diffEvent.data.hash);
686
+ }
687
+ this.emit("perception", diffEvent);
688
+ }
689
+ if (ocrEvent) {
690
+ this.stats.visionOCRs++;
691
+ // Merge OCR regions into world model via fusion pipeline
692
+ if (ocrEvent.data.type === "vision_ocr" && ocrEvent.data.regions.length > 0) {
693
+ this.fusionPipeline.enqueue({
694
+ source: "ocr",
695
+ timestamp: new Date().toISOString(),
696
+ confidence: 0.7,
697
+ windowId,
698
+ ocrRegions: ocrEvent.data.regions,
699
+ });
700
+ this.fusionPipeline.flush(this.worldModel);
701
+ }
702
+ // Touch lastValidated on app map when OCR confirms screen content
703
+ if (this.appMap && this.activeAppContext) {
704
+ const mapData = this.appMap.load(this.activeAppContext.bundleId);
705
+ if (mapData) {
706
+ mapData.lastValidated = new Date().toISOString();
707
+ this.appMap.save(mapData);
708
+ }
709
+ }
710
+ this.emit("perception", ocrEvent);
711
+ }
712
+ // Fuse YOLO element detections with OCR text regions
713
+ if (yoloElements && yoloElements.length > 0) {
714
+ const ocrRegions = (ocrEvent?.data.type === "vision_ocr" && ocrEvent.data.regions)
715
+ ? ocrEvent.data.regions
716
+ : [];
717
+ const fused = VisionSource.fuseOcrAndYolo(ocrRegions, yoloElements);
718
+ if (fused.length > 0) {
719
+ this.fusionPipeline.enqueue({
720
+ source: "ocr",
721
+ timestamp: new Date().toISOString(),
722
+ confidence: 0.8,
723
+ windowId,
724
+ ocrRegions: fused.map((f) => ({
725
+ text: f.text || `[${f.class}]`,
726
+ bounds: f.bounds,
727
+ })),
728
+ });
729
+ this.fusionPipeline.flush(this.worldModel);
730
+ }
731
+ this.emit("perception", {
732
+ source: "vision_yolo",
733
+ rate: "slow",
734
+ timestamp: new Date().toISOString(),
735
+ data: {
736
+ type: "vision_yolo",
737
+ elements: fused,
738
+ count: fused.length,
739
+ },
740
+ });
741
+ }
742
+ // Record vision sensor outcome
743
+ if (this.learningEngine && this.activeAppContext) {
744
+ this.learningEngine.recordSensorOutcome({
745
+ bundleId: this.activeAppContext.bundleId,
746
+ sourceType: "vision",
747
+ success: !!diffEvent,
748
+ latencyMs: Date.now() - new Date(timestamp).getTime(),
749
+ });
750
+ }
751
+ }
752
+ catch {
753
+ // Vision source failed (bridge crash, timeout, etc.) — continue running
754
+ if (this.learningEngine && this.activeAppContext) {
755
+ this.learningEngine.recordSensorOutcome({
756
+ bundleId: this.activeAppContext.bundleId,
757
+ sourceType: "vision",
758
+ success: false,
759
+ latencyMs: Date.now() - new Date(timestamp).getTime(),
760
+ });
761
+ }
762
+ }
763
+ finally {
764
+ // Always increment stats, even on early return (windowId=0) or error
765
+ this.stats.slowCycles++;
766
+ this.stats.lastSlowAt = timestamp;
767
+ if (!this.config.skipCaptureLock)
768
+ releaseCaptureLock();
769
+ }
770
+ }
771
+ }