screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,287 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import crypto from "node:crypto";
18
+ import fs from "node:fs";
19
+ /**
20
+ * Fast frame differencing using content hashing.
21
+ * Compares PNG buffers (in-memory, no disk I/O) and extracts changed regions
22
+ * by dividing the frame into a grid and hashing each cell.
23
+ */
24
+ export class FrameDiffer {
25
+ lastFrameHash = null;
26
+ lastFrameBuffer = null;
27
+ lastGridHashes = null;
28
+ /** Grid cell size for region detection (pixels). */
29
+ cellSize;
30
+ constructor(cellSize = 128) {
31
+ this.cellSize = cellSize;
32
+ }
33
+ /**
34
+ * Hash a frame buffer. Uses MD5 for speed (not security).
35
+ */
36
+ hashBuffer(buffer) {
37
+ return crypto.createHash("md5").update(buffer).digest("hex");
38
+ }
39
+ /**
40
+ * Compare a new frame against the last.
41
+ * Returns whether anything changed and which regions differ.
42
+ *
43
+ * For PNG buffers, we do whole-frame hash for quick "anything changed?" check,
44
+ * then grid-based hashing for region extraction.
45
+ *
46
+ * IMPORTANT: PNG is a compressed format, so byte-offset slicing does NOT map
47
+ * to pixel coordinates. The grid-based region detection is an approximation
48
+ * that detects *which chunk of the compressed stream* changed, not the exact
49
+ * pixel region. The returned ROI coordinates are estimates — use them as hints
50
+ * for OCR, not as precise bounding boxes. For exact pixel-level regions, use
51
+ * the native bridge's `cg.captureWindowBuffer` (raw RGBA) + `vision.ocrRegion`.
52
+ */
53
+ diff(buffer, frameWidth, frameHeight) {
54
+ const hash = this.hashBuffer(buffer);
55
+ const changed = this.lastFrameHash !== null && hash !== this.lastFrameHash;
56
+ let changedRegions = [];
57
+ if (changed && this.lastGridHashes !== null) {
58
+ changedRegions = this.detectChangedRegions(buffer, frameWidth, frameHeight);
59
+ }
60
+ // Update grid hashes for next comparison
61
+ this.lastGridHashes = this.computeGridHashes(buffer, frameWidth, frameHeight);
62
+ this.lastFrameHash = hash;
63
+ this.lastFrameBuffer = buffer;
64
+ return { changed, hash, changedRegions };
65
+ }
66
+ /**
67
+ * Quick check: did anything change? (~0.1ms for hash comparison)
68
+ */
69
+ quickChanged(buffer) {
70
+ const hash = this.hashBuffer(buffer);
71
+ return this.lastFrameHash !== null && hash !== this.lastFrameHash;
72
+ }
73
+ /**
74
+ * Hash a file on disk directly (skips base64 round-trip).
75
+ */
76
+ hashFile(filePath) {
77
+ const buf = fs.readFileSync(filePath);
78
+ return crypto.createHash("md5").update(buf).digest("hex");
79
+ }
80
+ /**
81
+ * Quick change detection from a file path. Hashes file on disk,
82
+ * compares against last frame hash. Skips grid hashing entirely.
83
+ */
84
+ quickChangedFile(filePath) {
85
+ const hash = this.hashFile(filePath);
86
+ const changed = this.lastFrameHash !== null && hash !== this.lastFrameHash;
87
+ this.lastFrameHash = hash;
88
+ return { changed, hash };
89
+ }
90
+ /**
91
+ * Full diff from a file path — detects changed regions using grid hashing.
92
+ * More expensive than quickChangedFile (~5ms vs ~1ms) but returns ROIs
93
+ * that can be used for region-based OCR.
94
+ */
95
+ diffFile(filePath, frameWidth, frameHeight) {
96
+ const buffer = fs.readFileSync(filePath);
97
+ return this.diff(buffer, frameWidth, frameHeight);
98
+ }
99
+ /**
100
+ * Merge adjacent ROI cells into larger rectangles and pad with extra pixels
101
+ * to ensure OCR captures text at region boundaries. Returns at most
102
+ * maxRegions merged ROIs, sorted by area (largest first).
103
+ */
104
+ static mergeRegions(regions, maxRegions, padding, frameWidth, frameHeight, cellSize = 128) {
105
+ if (regions.length === 0)
106
+ return [];
107
+ if (regions.length === 1) {
108
+ return [FrameDiffer.padRegion(regions[0], padding, frameWidth, frameHeight)];
109
+ }
110
+ // Sort by position (top-left to bottom-right) for merge pass
111
+ const sorted = [...regions].sort((a, b) => a.y - b.y || a.x - b.x);
112
+ // Greedy merge: combine overlapping/adjacent regions
113
+ const merged = [];
114
+ let current = { ...sorted[0] };
115
+ for (let i = 1; i < sorted.length; i++) {
116
+ const next = sorted[i];
117
+ // Check if next region overlaps or is adjacent to current (within 1 cell gap)
118
+ const currentRight = current.x + current.width;
119
+ const currentBottom = current.y + current.height;
120
+ const nextRight = next.x + next.width;
121
+ const nextBottom = next.y + next.height;
122
+ const GAP = cellSize; // one cell-sized gap tolerance for adjacency
123
+ const horizontalOverlap = next.x <= currentRight + GAP && nextRight >= current.x;
124
+ const verticalOverlap = next.y <= currentBottom + GAP && nextBottom >= current.y;
125
+ if (horizontalOverlap && verticalOverlap) {
126
+ // Merge: expand current to encompass next
127
+ const newX = Math.min(current.x, next.x);
128
+ const newY = Math.min(current.y, next.y);
129
+ current = {
130
+ x: newX,
131
+ y: newY,
132
+ width: Math.max(currentRight, nextRight) - newX,
133
+ height: Math.max(currentBottom, nextBottom) - newY,
134
+ reason: "changed_pixels",
135
+ };
136
+ }
137
+ else {
138
+ merged.push(current);
139
+ current = { ...next };
140
+ }
141
+ }
142
+ merged.push(current);
143
+ // Pad and sort by area (largest first), cap at maxRegions
144
+ return merged
145
+ .map((r) => FrameDiffer.padRegion(r, padding, frameWidth, frameHeight))
146
+ .sort((a, b) => b.width * b.height - a.width * a.height)
147
+ .slice(0, maxRegions);
148
+ }
149
+ static padRegion(roi, padding, frameWidth, frameHeight) {
150
+ const x = Math.max(0, roi.x - padding);
151
+ const y = Math.max(0, roi.y - padding);
152
+ return {
153
+ x,
154
+ y,
155
+ width: Math.min(roi.x + roi.width + padding, frameWidth) - x,
156
+ height: Math.min(roi.y + roi.height + padding, frameHeight) - y,
157
+ reason: roi.reason,
158
+ };
159
+ }
160
+ /** Reset state (e.g., on context switch). */
161
+ reset() {
162
+ this.lastFrameHash = null;
163
+ this.lastFrameBuffer = null;
164
+ this.lastGridHashes = null;
165
+ }
166
+ /** Get last frame hash (for external state tracking). */
167
+ getLastHash() {
168
+ return this.lastFrameHash;
169
+ }
170
+ computeGridHashes(buffer, width, height) {
171
+ const hashes = new Map();
172
+ if (width <= 0 || height <= 0 || buffer.length === 0)
173
+ return hashes;
174
+ const cols = Math.ceil(width / this.cellSize);
175
+ const rows = Math.ceil(height / this.cellSize);
176
+ const bytesPerRow = Math.ceil(buffer.length / height) || 1;
177
+ for (let row = 0; row < rows; row++) {
178
+ for (let col = 0; col < cols; col++) {
179
+ const key = `${col},${row}`;
180
+ const startByte = row * this.cellSize * bytesPerRow + col * this.cellSize;
181
+ const endByte = Math.min(startByte + this.cellSize * bytesPerRow, buffer.length);
182
+ if (startByte >= buffer.length)
183
+ continue;
184
+ const slice = buffer.subarray(startByte, endByte);
185
+ hashes.set(key, crypto.createHash("md5").update(slice).digest("hex"));
186
+ }
187
+ }
188
+ return hashes;
189
+ }
190
+ detectChangedRegions(buffer, width, height) {
191
+ const currentGrid = this.computeGridHashes(buffer, width, height);
192
+ const regions = [];
193
+ for (const [key, hash] of currentGrid) {
194
+ const prevHash = this.lastGridHashes?.get(key);
195
+ if (prevHash && prevHash !== hash) {
196
+ const [colStr, rowStr] = key.split(",");
197
+ const col = Number(colStr);
198
+ const row = Number(rowStr);
199
+ regions.push({
200
+ x: col * this.cellSize,
201
+ y: row * this.cellSize,
202
+ width: Math.min(this.cellSize, width - col * this.cellSize),
203
+ height: Math.min(this.cellSize, height - row * this.cellSize),
204
+ reason: "changed_pixels",
205
+ });
206
+ }
207
+ }
208
+ return regions;
209
+ }
210
+ // ── Raw RGBA pixel-accurate diffing ──
211
+ lastRawHash = null;
212
+ lastRawGridHashes = null;
213
+ /**
214
+ * Diff raw RGBA pixel data for accurate ROI detection.
215
+ *
216
+ * Unlike `diff()` which operates on compressed PNG bytes (approximate ROIs),
217
+ * this method works with uncompressed RGBA buffers where byte offsets map
218
+ * directly to pixel coordinates. Use with the native bridge's
219
+ * `cg.captureWindowBuffer` which returns raw RGBA data.
220
+ *
221
+ * @param rgba Raw RGBA pixel buffer (4 bytes per pixel, row-major)
222
+ * @param width Frame width in pixels
223
+ * @param height Frame height in pixels
224
+ */
225
+ diffRaw(rgba, width, height) {
226
+ const hash = this.hashBuffer(rgba);
227
+ const changed = this.lastRawHash !== null && hash !== this.lastRawHash;
228
+ let changedRegions = [];
229
+ if (changed && this.lastRawGridHashes !== null) {
230
+ changedRegions = this.detectRawChangedRegions(rgba, width, height);
231
+ }
232
+ this.lastRawGridHashes = this.computeRawGridHashes(rgba, width, height);
233
+ this.lastRawHash = hash;
234
+ return { changed, hash, changedRegions };
235
+ }
236
+ /**
237
+ * Compute grid hashes from raw RGBA data using pixel-accurate slicing.
238
+ * Each cell is hashed using its actual pixel rows, not byte-offset estimates.
239
+ */
240
+ computeRawGridHashes(rgba, width, height) {
241
+ const hashes = new Map();
242
+ const cols = Math.ceil(width / this.cellSize);
243
+ const rows = Math.ceil(height / this.cellSize);
244
+ const bytesPerPixel = 4; // RGBA
245
+ const stride = width * bytesPerPixel;
246
+ for (let row = 0; row < rows; row++) {
247
+ for (let col = 0; col < cols; col++) {
248
+ const key = `${col},${row}`;
249
+ const cellX = col * this.cellSize;
250
+ const cellY = row * this.cellSize;
251
+ const cellW = Math.min(this.cellSize, width - cellX);
252
+ const cellH = Math.min(this.cellSize, height - cellY);
253
+ // Hash the actual pixel data for this cell
254
+ const hasher = crypto.createHash("md5");
255
+ for (let y = cellY; y < cellY + cellH; y++) {
256
+ const rowStart = y * stride + cellX * bytesPerPixel;
257
+ const rowEnd = rowStart + cellW * bytesPerPixel;
258
+ if (rowEnd <= rgba.length) {
259
+ hasher.update(rgba.subarray(rowStart, rowEnd));
260
+ }
261
+ }
262
+ hashes.set(key, hasher.digest("hex"));
263
+ }
264
+ }
265
+ return hashes;
266
+ }
267
+ detectRawChangedRegions(rgba, width, height) {
268
+ const currentGrid = this.computeRawGridHashes(rgba, width, height);
269
+ const regions = [];
270
+ for (const [key, hash] of currentGrid) {
271
+ const prevHash = this.lastRawGridHashes?.get(key);
272
+ if (prevHash && prevHash !== hash) {
273
+ const [colStr, rowStr] = key.split(",");
274
+ const col = Number(colStr);
275
+ const row = Number(rowStr);
276
+ regions.push({
277
+ x: col * this.cellSize,
278
+ y: row * this.cellSize,
279
+ width: Math.min(this.cellSize, width - col * this.cellSize),
280
+ height: Math.min(this.cellSize, height - row * this.cellSize),
281
+ reason: "changed_pixels",
282
+ });
283
+ }
284
+ }
285
+ return regions;
286
+ }
287
+ }
@@ -0,0 +1,22 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ export { PerceptionCoordinator } from "./coordinator.js";
18
+ export { PerceptionManager } from "./manager.js";
19
+ export { AXSource } from "./ax-source.js";
20
+ export { CDPSource } from "./cdp-source.js";
21
+ export { VisionSource } from "./vision-source.js";
22
+ export { FrameDiffer } from "./frame-differ.js";
@@ -0,0 +1,199 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { EventEmitter } from "node:events";
18
+ import { StateObserver } from "../runtime/state-observer.js";
19
+ import { AXSource } from "./ax-source.js";
20
+ import { CDPSource } from "./cdp-source.js";
21
+ import { VisionSource } from "./vision-source.js";
22
+ import { PerceptionCoordinator } from "./coordinator.js";
23
+ import { createEmptyStats } from "./types.js";
24
+ /**
25
+ * PerceptionManager — creates sources lazily when the bridge is ready,
26
+ * auto-starts perception on first app context, manages context switches,
27
+ * and emits reactive events (dialog_detected, app_switched).
28
+ */
29
+ export class PerceptionManager extends EventEmitter {
30
+ worldModel;
31
+ config;
32
+ coordinator = null;
33
+ sourcesCreated = false;
34
+ currentContext = null;
35
+ currentPid = null;
36
+ currentBundleId = null;
37
+ lastCdpClient = null;
38
+ pendingLearningEngine = null;
39
+ pendingAppMap = null;
40
+ constructor(worldModel, config) {
41
+ super();
42
+ this.worldModel = worldModel;
43
+ this.config = config;
44
+ }
45
+ /**
46
+ * Inject the learning engine. If coordinator already exists, wires immediately.
47
+ * Otherwise, defers until createSources() is called.
48
+ */
49
+ setLearningEngine(engine) {
50
+ this.pendingLearningEngine = engine;
51
+ if (this.coordinator) {
52
+ this.coordinator.setLearningEngine(engine);
53
+ }
54
+ }
55
+ /**
56
+ * Inject the app mastery map. If coordinator already exists, wires immediately.
57
+ * Otherwise, defers until createSources() is called.
58
+ */
59
+ setAppMap(map) {
60
+ this.pendingAppMap = map;
61
+ if (this.coordinator) {
62
+ this.coordinator.setAppMap(map);
63
+ }
64
+ }
65
+ /**
66
+ * Create perception sources from the bridge. Called once after ensureBridge().
67
+ */
68
+ createSources(bridge) {
69
+ if (this.sourcesCreated)
70
+ return;
71
+ this.sourcesCreated = true;
72
+ const observer = new StateObserver(bridge);
73
+ const axSource = new AXSource(observer, bridge);
74
+ const cdpSource = new CDPSource();
75
+ const visionSource = new VisionSource(bridge);
76
+ this.coordinator = new PerceptionCoordinator(this.worldModel, axSource, cdpSource, visionSource, { enableVision: true, ...this.config });
77
+ if (this.pendingLearningEngine) {
78
+ this.coordinator.setLearningEngine(this.pendingLearningEngine);
79
+ }
80
+ if (this.pendingAppMap) {
81
+ this.coordinator.setAppMap(this.pendingAppMap);
82
+ }
83
+ this.coordinator.on("perception", (event) => {
84
+ this.handleReactiveEvent(event);
85
+ });
86
+ }
87
+ /**
88
+ * Ensure perception is started for the given app context.
89
+ * Idempotent — starts if not running, switches context if app changed.
90
+ */
91
+ async ensureStarted(appContext, cdpClient) {
92
+ if (!this.coordinator)
93
+ return;
94
+ const client = cdpClient ?? this.lastCdpClient;
95
+ if (!this.coordinator.isRunning) {
96
+ this.currentContext = appContext;
97
+ this.currentPid = appContext.pid;
98
+ this.currentBundleId = appContext.bundleId;
99
+ await this.coordinator.start(appContext, client);
100
+ }
101
+ else if (this.currentPid !== appContext.pid ||
102
+ (appContext.windowId != null && this.currentContext?.windowId !== appContext.windowId)) {
103
+ // Switch context when PID changes or when windowId is now available but wasn't before
104
+ this.currentContext = appContext;
105
+ this.currentPid = appContext.pid;
106
+ this.currentBundleId = appContext.bundleId;
107
+ await this.coordinator.switchContext(appContext, client);
108
+ }
109
+ }
110
+ /**
111
+ * Activate CDP source with a new client.
112
+ * Uses hot-inject on the running coordinator instead of stop+restart
113
+ * to preserve AX polling state and avoid resetting counters.
114
+ */
115
+ activateCDP(cdpClient) {
116
+ this.lastCdpClient = cdpClient;
117
+ if (this.coordinator?.isRunning) {
118
+ this.coordinator.activateCDP(cdpClient);
119
+ }
120
+ }
121
+ /**
122
+ * Best-effort auto-start: if perception isn't running and a focused app
123
+ * is known, resolve its windowId and start perception silently.
124
+ * Non-blocking — failures are swallowed.
125
+ */
126
+ async tryAutoStart(focusedApp, bridge) {
127
+ if (!this.coordinator || this.coordinator.isRunning)
128
+ return;
129
+ if (!focusedApp.pid)
130
+ return;
131
+ let windowId;
132
+ try {
133
+ const wins = await bridge.call("window.list", {});
134
+ const matching = wins?.filter((w) => w.pid === focusedApp.pid);
135
+ if (matching && matching.length > 0) {
136
+ const frontmost = matching.find((w) => w.focused || w.frontmost || w.isMain);
137
+ windowId = (frontmost ?? matching[0])?.windowId;
138
+ }
139
+ }
140
+ catch { /* best-effort */ }
141
+ const ctx = {
142
+ bundleId: focusedApp.bundleId,
143
+ appName: focusedApp.bundleId,
144
+ pid: focusedApp.pid,
145
+ windowTitle: "",
146
+ ...(windowId != null ? { windowId } : {}),
147
+ };
148
+ await this.ensureStarted(ctx);
149
+ }
150
+ async stop() {
151
+ if (this.coordinator?.isRunning) {
152
+ await this.coordinator.stop();
153
+ }
154
+ this.currentContext = null;
155
+ this.currentPid = null;
156
+ this.currentBundleId = null;
157
+ }
158
+ get isRunning() {
159
+ return this.coordinator?.isRunning ?? false;
160
+ }
161
+ getStats() {
162
+ return this.coordinator?.getStats() ?? createEmptyStats();
163
+ }
164
+ getFreshnessSummary() {
165
+ return this.coordinator?.getFreshnessSummary() ?? "Perception: not initialized";
166
+ }
167
+ getConfig() {
168
+ return this.coordinator?.getConfig() ?? null;
169
+ }
170
+ getCoordinator() {
171
+ return this.coordinator;
172
+ }
173
+ /**
174
+ * Notify perception that a tool call is happening — resets idle timer.
175
+ */
176
+ notifyToolCall() {
177
+ this.coordinator?.notifyToolCall();
178
+ }
179
+ handleReactiveEvent(event) {
180
+ if (event.data?.type === "ax_events" && Array.isArray(event.data.events)) {
181
+ for (const uiEvent of event.data.events) {
182
+ if (uiEvent.type === "dialog_appeared") {
183
+ this.emit("dialog_detected", {
184
+ title: uiEvent.windowTitle ?? "",
185
+ pid: uiEvent.pid,
186
+ });
187
+ }
188
+ if (uiEvent.type === "app_activated" &&
189
+ uiEvent.bundleId &&
190
+ uiEvent.bundleId !== this.currentBundleId) {
191
+ this.emit("app_switched", {
192
+ bundleId: uiEvent.bundleId,
193
+ pid: uiEvent.pid,
194
+ });
195
+ }
196
+ }
197
+ }
198
+ }
199
+ }
@@ -0,0 +1,47 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ export const DEFAULT_PERCEPTION_CONFIG = {
18
+ fastIntervalMs: 100,
19
+ mediumIntervalMs: 300,
20
+ slowIntervalMs: 1000,
21
+ enableAX: true,
22
+ enableCDP: true,
23
+ enableVision: true,
24
+ maxROIsPerCycle: 3,
25
+ skipCaptureLock: false,
26
+ };
27
+ export function createEmptyStats() {
28
+ return {
29
+ started: false,
30
+ startedAt: null,
31
+ fastCycles: 0,
32
+ mediumCycles: 0,
33
+ slowCycles: 0,
34
+ axEventsProcessed: 0,
35
+ axTreePolls: 0,
36
+ cdpMutationsProcessed: 0,
37
+ cdpSnapshots: 0,
38
+ visionDiffs: 0,
39
+ visionOCRs: 0,
40
+ lastFastAt: null,
41
+ lastMediumAt: null,
42
+ lastSlowAt: null,
43
+ lastAXAt: null,
44
+ lastCDPAt: null,
45
+ lastVisionAt: null,
46
+ };
47
+ }