screenhand 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -446
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +3615 -400
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/dist/src/context-tracker.js +489 -0
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +82 -14
- package/dist/src/jobs/runner.js +138 -15
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +4 -1
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +60 -8
- package/dist/src/memory/service.js +30 -5
- package/dist/src/memory/store.js +34 -5
- package/dist/src/native/bridge-client.js +253 -31
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +296 -11
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +3 -2
- package/dist/src/playbook/runner.js +1 -1
- package/dist/src/playbook/store.js +139 -10
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +55 -18
- package/dist/src/runtime/applescript-adapter.js +8 -2
- package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
- package/dist/src/runtime/executor.js +23 -3
- package/dist/src/runtime/locator-cache.js +24 -2
- package/dist/src/runtime/service.js +59 -15
- package/dist/src/runtime/session-manager.js +4 -1
- package/dist/src/runtime/vision-adapter.js +2 -1
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/util/atomic-write.js +19 -4
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devpost.json +186 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +22 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
- package/native/macos-bridge/Sources/AppManagement.swift +339 -0
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
- package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
- package/native/macos-bridge/Sources/main.swift +498 -0
- package/native/windows-bridge/AppManagement.cs +234 -0
- package/native/windows-bridge/InputBridge.cs +436 -0
- package/native/windows-bridge/Program.cs +270 -0
- package/native/windows-bridge/ScreenCapture.cs +453 -0
- package/native/windows-bridge/UIAutomationBridge.cs +571 -0
- package/native/windows-bridge/WindowsBridge.csproj +17 -0
- package/package.json +12 -1
- package/scripts/postinstall.cjs +127 -0
- package/dist/.audit-log.jsonl +0 -55
- package/dist/.screenhand/memory/.lock +0 -1
- package/dist/.screenhand/memory/actions.jsonl +0 -85
- package/dist/.screenhand/memory/errors.jsonl +0 -5
- package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
- package/dist/.screenhand/memory/state.json +0 -35
- package/dist/.screenhand/memory/state.json.bak +0 -35
- package/dist/.screenhand/memory/strategies.jsonl +0 -12
- package/dist/agent/cli.js +0 -73
- package/dist/agent/loop.js +0 -258
- package/dist/config.js +0 -9
- package/dist/index.js +0 -56
- package/dist/logging/timeline-logger.js +0 -29
- package/dist/mcp/mcp-stdio-server.js +0 -448
- package/dist/mcp/server.js +0 -347
- package/dist/mcp-entry.js +0 -59
- package/dist/memory/recall.js +0 -160
- package/dist/memory/research.js +0 -98
- package/dist/memory/seeds.js +0 -89
- package/dist/memory/session.js +0 -161
- package/dist/memory/store.js +0 -391
- package/dist/memory/types.js +0 -4
- package/dist/monitor/codex-monitor.js +0 -377
- package/dist/monitor/task-queue.js +0 -84
- package/dist/monitor/types.js +0 -49
- package/dist/native/bridge-client.js +0 -174
- package/dist/native/macos-bridge-client.js +0 -5
- package/dist/npm-publish-helper.js +0 -117
- package/dist/npm-token-cdp.js +0 -113
- package/dist/npm-token-create.js +0 -135
- package/dist/npm-token-finish.js +0 -126
- package/dist/playbook/engine.js +0 -193
- package/dist/playbook/index.js +0 -4
- package/dist/playbook/recorder.js +0 -519
- package/dist/playbook/runner.js +0 -392
- package/dist/playbook/store.js +0 -166
- package/dist/playbook/types.js +0 -4
- package/dist/runtime/accessibility-adapter.js +0 -377
- package/dist/runtime/app-adapter.js +0 -48
- package/dist/runtime/applescript-adapter.js +0 -283
- package/dist/runtime/ax-role-map.js +0 -80
- package/dist/runtime/browser-adapter.js +0 -36
- package/dist/runtime/cdp-chrome-adapter.js +0 -505
- package/dist/runtime/composite-adapter.js +0 -205
- package/dist/runtime/executor.js +0 -250
- package/dist/runtime/locator-cache.js +0 -12
- package/dist/runtime/planning-loop.js +0 -47
- package/dist/runtime/service.js +0 -372
- package/dist/runtime/session-manager.js +0 -28
- package/dist/runtime/state-observer.js +0 -105
- package/dist/runtime/vision-adapter.js +0 -208
- package/dist/test-mcp-protocol.js +0 -138
- package/dist/types.js +0 -1
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import crypto from "node:crypto";
|
|
18
|
+
import fs from "node:fs";
|
|
19
|
+
/**
|
|
20
|
+
* Fast frame differencing using content hashing.
|
|
21
|
+
* Compares PNG buffers (in-memory, no disk I/O) and extracts changed regions
|
|
22
|
+
* by dividing the frame into a grid and hashing each cell.
|
|
23
|
+
*/
|
|
24
|
+
export class FrameDiffer {
|
|
25
|
+
lastFrameHash = null;
|
|
26
|
+
lastFrameBuffer = null;
|
|
27
|
+
lastGridHashes = null;
|
|
28
|
+
/** Grid cell size for region detection (pixels). */
|
|
29
|
+
cellSize;
|
|
30
|
+
constructor(cellSize = 128) {
|
|
31
|
+
this.cellSize = cellSize;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Hash a frame buffer. Uses MD5 for speed (not security).
|
|
35
|
+
*/
|
|
36
|
+
hashBuffer(buffer) {
|
|
37
|
+
return crypto.createHash("md5").update(buffer).digest("hex");
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Compare a new frame against the last.
|
|
41
|
+
* Returns whether anything changed and which regions differ.
|
|
42
|
+
*
|
|
43
|
+
* For PNG buffers, we do whole-frame hash for quick "anything changed?" check,
|
|
44
|
+
* then grid-based hashing for region extraction.
|
|
45
|
+
*
|
|
46
|
+
* IMPORTANT: PNG is a compressed format, so byte-offset slicing does NOT map
|
|
47
|
+
* to pixel coordinates. The grid-based region detection is an approximation
|
|
48
|
+
* that detects *which chunk of the compressed stream* changed, not the exact
|
|
49
|
+
* pixel region. The returned ROI coordinates are estimates — use them as hints
|
|
50
|
+
* for OCR, not as precise bounding boxes. For exact pixel-level regions, use
|
|
51
|
+
* the native bridge's `cg.captureWindowBuffer` (raw RGBA) + `vision.ocrRegion`.
|
|
52
|
+
*/
|
|
53
|
+
diff(buffer, frameWidth, frameHeight) {
|
|
54
|
+
const hash = this.hashBuffer(buffer);
|
|
55
|
+
const changed = this.lastFrameHash !== null && hash !== this.lastFrameHash;
|
|
56
|
+
let changedRegions = [];
|
|
57
|
+
if (changed && this.lastGridHashes !== null) {
|
|
58
|
+
changedRegions = this.detectChangedRegions(buffer, frameWidth, frameHeight);
|
|
59
|
+
}
|
|
60
|
+
// Update grid hashes for next comparison
|
|
61
|
+
this.lastGridHashes = this.computeGridHashes(buffer, frameWidth, frameHeight);
|
|
62
|
+
this.lastFrameHash = hash;
|
|
63
|
+
this.lastFrameBuffer = buffer;
|
|
64
|
+
return { changed, hash, changedRegions };
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Quick check: did anything change? (~0.1ms for hash comparison)
|
|
68
|
+
*/
|
|
69
|
+
quickChanged(buffer) {
|
|
70
|
+
const hash = this.hashBuffer(buffer);
|
|
71
|
+
return this.lastFrameHash !== null && hash !== this.lastFrameHash;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Hash a file on disk directly (skips base64 round-trip).
|
|
75
|
+
*/
|
|
76
|
+
hashFile(filePath) {
|
|
77
|
+
const buf = fs.readFileSync(filePath);
|
|
78
|
+
return crypto.createHash("md5").update(buf).digest("hex");
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Quick change detection from a file path. Hashes file on disk,
|
|
82
|
+
* compares against last frame hash. Skips grid hashing entirely.
|
|
83
|
+
*/
|
|
84
|
+
quickChangedFile(filePath) {
|
|
85
|
+
const hash = this.hashFile(filePath);
|
|
86
|
+
const changed = this.lastFrameHash !== null && hash !== this.lastFrameHash;
|
|
87
|
+
this.lastFrameHash = hash;
|
|
88
|
+
return { changed, hash };
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Full diff from a file path — detects changed regions using grid hashing.
|
|
92
|
+
* More expensive than quickChangedFile (~5ms vs ~1ms) but returns ROIs
|
|
93
|
+
* that can be used for region-based OCR.
|
|
94
|
+
*/
|
|
95
|
+
diffFile(filePath, frameWidth, frameHeight) {
|
|
96
|
+
const buffer = fs.readFileSync(filePath);
|
|
97
|
+
return this.diff(buffer, frameWidth, frameHeight);
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Merge adjacent ROI cells into larger rectangles and pad with extra pixels
|
|
101
|
+
* to ensure OCR captures text at region boundaries. Returns at most
|
|
102
|
+
* maxRegions merged ROIs, sorted by area (largest first).
|
|
103
|
+
*/
|
|
104
|
+
static mergeRegions(regions, maxRegions, padding, frameWidth, frameHeight, cellSize = 128) {
|
|
105
|
+
if (regions.length === 0)
|
|
106
|
+
return [];
|
|
107
|
+
if (regions.length === 1) {
|
|
108
|
+
return [FrameDiffer.padRegion(regions[0], padding, frameWidth, frameHeight)];
|
|
109
|
+
}
|
|
110
|
+
// Sort by position (top-left to bottom-right) for merge pass
|
|
111
|
+
const sorted = [...regions].sort((a, b) => a.y - b.y || a.x - b.x);
|
|
112
|
+
// Greedy merge: combine overlapping/adjacent regions
|
|
113
|
+
const merged = [];
|
|
114
|
+
let current = { ...sorted[0] };
|
|
115
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
116
|
+
const next = sorted[i];
|
|
117
|
+
// Check if next region overlaps or is adjacent to current (within 1 cell gap)
|
|
118
|
+
const currentRight = current.x + current.width;
|
|
119
|
+
const currentBottom = current.y + current.height;
|
|
120
|
+
const nextRight = next.x + next.width;
|
|
121
|
+
const nextBottom = next.y + next.height;
|
|
122
|
+
const GAP = cellSize; // one cell-sized gap tolerance for adjacency
|
|
123
|
+
const horizontalOverlap = next.x <= currentRight + GAP && nextRight >= current.x;
|
|
124
|
+
const verticalOverlap = next.y <= currentBottom + GAP && nextBottom >= current.y;
|
|
125
|
+
if (horizontalOverlap && verticalOverlap) {
|
|
126
|
+
// Merge: expand current to encompass next
|
|
127
|
+
const newX = Math.min(current.x, next.x);
|
|
128
|
+
const newY = Math.min(current.y, next.y);
|
|
129
|
+
current = {
|
|
130
|
+
x: newX,
|
|
131
|
+
y: newY,
|
|
132
|
+
width: Math.max(currentRight, nextRight) - newX,
|
|
133
|
+
height: Math.max(currentBottom, nextBottom) - newY,
|
|
134
|
+
reason: "changed_pixels",
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
merged.push(current);
|
|
139
|
+
current = { ...next };
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
merged.push(current);
|
|
143
|
+
// Pad and sort by area (largest first), cap at maxRegions
|
|
144
|
+
return merged
|
|
145
|
+
.map((r) => FrameDiffer.padRegion(r, padding, frameWidth, frameHeight))
|
|
146
|
+
.sort((a, b) => b.width * b.height - a.width * a.height)
|
|
147
|
+
.slice(0, maxRegions);
|
|
148
|
+
}
|
|
149
|
+
static padRegion(roi, padding, frameWidth, frameHeight) {
|
|
150
|
+
const x = Math.max(0, roi.x - padding);
|
|
151
|
+
const y = Math.max(0, roi.y - padding);
|
|
152
|
+
return {
|
|
153
|
+
x,
|
|
154
|
+
y,
|
|
155
|
+
width: Math.min(roi.x + roi.width + padding, frameWidth) - x,
|
|
156
|
+
height: Math.min(roi.y + roi.height + padding, frameHeight) - y,
|
|
157
|
+
reason: roi.reason,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
/** Reset state (e.g., on context switch). */
|
|
161
|
+
reset() {
|
|
162
|
+
this.lastFrameHash = null;
|
|
163
|
+
this.lastFrameBuffer = null;
|
|
164
|
+
this.lastGridHashes = null;
|
|
165
|
+
}
|
|
166
|
+
/** Get last frame hash (for external state tracking). */
|
|
167
|
+
getLastHash() {
|
|
168
|
+
return this.lastFrameHash;
|
|
169
|
+
}
|
|
170
|
+
computeGridHashes(buffer, width, height) {
|
|
171
|
+
const hashes = new Map();
|
|
172
|
+
if (width <= 0 || height <= 0 || buffer.length === 0)
|
|
173
|
+
return hashes;
|
|
174
|
+
const cols = Math.ceil(width / this.cellSize);
|
|
175
|
+
const rows = Math.ceil(height / this.cellSize);
|
|
176
|
+
const bytesPerRow = Math.ceil(buffer.length / height) || 1;
|
|
177
|
+
for (let row = 0; row < rows; row++) {
|
|
178
|
+
for (let col = 0; col < cols; col++) {
|
|
179
|
+
const key = `${col},${row}`;
|
|
180
|
+
const startByte = row * this.cellSize * bytesPerRow + col * this.cellSize;
|
|
181
|
+
const endByte = Math.min(startByte + this.cellSize * bytesPerRow, buffer.length);
|
|
182
|
+
if (startByte >= buffer.length)
|
|
183
|
+
continue;
|
|
184
|
+
const slice = buffer.subarray(startByte, endByte);
|
|
185
|
+
hashes.set(key, crypto.createHash("md5").update(slice).digest("hex"));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return hashes;
|
|
189
|
+
}
|
|
190
|
+
detectChangedRegions(buffer, width, height) {
|
|
191
|
+
const currentGrid = this.computeGridHashes(buffer, width, height);
|
|
192
|
+
const regions = [];
|
|
193
|
+
for (const [key, hash] of currentGrid) {
|
|
194
|
+
const prevHash = this.lastGridHashes?.get(key);
|
|
195
|
+
if (prevHash && prevHash !== hash) {
|
|
196
|
+
const [colStr, rowStr] = key.split(",");
|
|
197
|
+
const col = Number(colStr);
|
|
198
|
+
const row = Number(rowStr);
|
|
199
|
+
regions.push({
|
|
200
|
+
x: col * this.cellSize,
|
|
201
|
+
y: row * this.cellSize,
|
|
202
|
+
width: Math.min(this.cellSize, width - col * this.cellSize),
|
|
203
|
+
height: Math.min(this.cellSize, height - row * this.cellSize),
|
|
204
|
+
reason: "changed_pixels",
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return regions;
|
|
209
|
+
}
|
|
210
|
+
// ── Raw RGBA pixel-accurate diffing ──
|
|
211
|
+
lastRawHash = null;
|
|
212
|
+
lastRawGridHashes = null;
|
|
213
|
+
/**
|
|
214
|
+
* Diff raw RGBA pixel data for accurate ROI detection.
|
|
215
|
+
*
|
|
216
|
+
* Unlike `diff()` which operates on compressed PNG bytes (approximate ROIs),
|
|
217
|
+
* this method works with uncompressed RGBA buffers where byte offsets map
|
|
218
|
+
* directly to pixel coordinates. Use with the native bridge's
|
|
219
|
+
* `cg.captureWindowBuffer` which returns raw RGBA data.
|
|
220
|
+
*
|
|
221
|
+
* @param rgba Raw RGBA pixel buffer (4 bytes per pixel, row-major)
|
|
222
|
+
* @param width Frame width in pixels
|
|
223
|
+
* @param height Frame height in pixels
|
|
224
|
+
*/
|
|
225
|
+
diffRaw(rgba, width, height) {
|
|
226
|
+
const hash = this.hashBuffer(rgba);
|
|
227
|
+
const changed = this.lastRawHash !== null && hash !== this.lastRawHash;
|
|
228
|
+
let changedRegions = [];
|
|
229
|
+
if (changed && this.lastRawGridHashes !== null) {
|
|
230
|
+
changedRegions = this.detectRawChangedRegions(rgba, width, height);
|
|
231
|
+
}
|
|
232
|
+
this.lastRawGridHashes = this.computeRawGridHashes(rgba, width, height);
|
|
233
|
+
this.lastRawHash = hash;
|
|
234
|
+
return { changed, hash, changedRegions };
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Compute grid hashes from raw RGBA data using pixel-accurate slicing.
|
|
238
|
+
* Each cell is hashed using its actual pixel rows, not byte-offset estimates.
|
|
239
|
+
*/
|
|
240
|
+
computeRawGridHashes(rgba, width, height) {
|
|
241
|
+
const hashes = new Map();
|
|
242
|
+
const cols = Math.ceil(width / this.cellSize);
|
|
243
|
+
const rows = Math.ceil(height / this.cellSize);
|
|
244
|
+
const bytesPerPixel = 4; // RGBA
|
|
245
|
+
const stride = width * bytesPerPixel;
|
|
246
|
+
for (let row = 0; row < rows; row++) {
|
|
247
|
+
for (let col = 0; col < cols; col++) {
|
|
248
|
+
const key = `${col},${row}`;
|
|
249
|
+
const cellX = col * this.cellSize;
|
|
250
|
+
const cellY = row * this.cellSize;
|
|
251
|
+
const cellW = Math.min(this.cellSize, width - cellX);
|
|
252
|
+
const cellH = Math.min(this.cellSize, height - cellY);
|
|
253
|
+
// Hash the actual pixel data for this cell
|
|
254
|
+
const hasher = crypto.createHash("md5");
|
|
255
|
+
for (let y = cellY; y < cellY + cellH; y++) {
|
|
256
|
+
const rowStart = y * stride + cellX * bytesPerPixel;
|
|
257
|
+
const rowEnd = rowStart + cellW * bytesPerPixel;
|
|
258
|
+
if (rowEnd <= rgba.length) {
|
|
259
|
+
hasher.update(rgba.subarray(rowStart, rowEnd));
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
hashes.set(key, hasher.digest("hex"));
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return hashes;
|
|
266
|
+
}
|
|
267
|
+
detectRawChangedRegions(rgba, width, height) {
|
|
268
|
+
const currentGrid = this.computeRawGridHashes(rgba, width, height);
|
|
269
|
+
const regions = [];
|
|
270
|
+
for (const [key, hash] of currentGrid) {
|
|
271
|
+
const prevHash = this.lastRawGridHashes?.get(key);
|
|
272
|
+
if (prevHash && prevHash !== hash) {
|
|
273
|
+
const [colStr, rowStr] = key.split(",");
|
|
274
|
+
const col = Number(colStr);
|
|
275
|
+
const row = Number(rowStr);
|
|
276
|
+
regions.push({
|
|
277
|
+
x: col * this.cellSize,
|
|
278
|
+
y: row * this.cellSize,
|
|
279
|
+
width: Math.min(this.cellSize, width - col * this.cellSize),
|
|
280
|
+
height: Math.min(this.cellSize, height - row * this.cellSize),
|
|
281
|
+
reason: "changed_pixels",
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
return regions;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
export { PerceptionCoordinator } from "./coordinator.js";
|
|
18
|
+
export { PerceptionManager } from "./manager.js";
|
|
19
|
+
export { AXSource } from "./ax-source.js";
|
|
20
|
+
export { CDPSource } from "./cdp-source.js";
|
|
21
|
+
export { VisionSource } from "./vision-source.js";
|
|
22
|
+
export { FrameDiffer } from "./frame-differ.js";
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import { EventEmitter } from "node:events";
|
|
18
|
+
import { StateObserver } from "../runtime/state-observer.js";
|
|
19
|
+
import { AXSource } from "./ax-source.js";
|
|
20
|
+
import { CDPSource } from "./cdp-source.js";
|
|
21
|
+
import { VisionSource } from "./vision-source.js";
|
|
22
|
+
import { PerceptionCoordinator } from "./coordinator.js";
|
|
23
|
+
import { createEmptyStats } from "./types.js";
|
|
24
|
+
/**
|
|
25
|
+
* PerceptionManager — creates sources lazily when the bridge is ready,
|
|
26
|
+
* auto-starts perception on first app context, manages context switches,
|
|
27
|
+
* and emits reactive events (dialog_detected, app_switched).
|
|
28
|
+
*/
|
|
29
|
+
export class PerceptionManager extends EventEmitter {
|
|
30
|
+
worldModel;
|
|
31
|
+
config;
|
|
32
|
+
coordinator = null;
|
|
33
|
+
sourcesCreated = false;
|
|
34
|
+
currentContext = null;
|
|
35
|
+
currentPid = null;
|
|
36
|
+
currentBundleId = null;
|
|
37
|
+
lastCdpClient = null;
|
|
38
|
+
pendingLearningEngine = null;
|
|
39
|
+
pendingAppMap = null;
|
|
40
|
+
constructor(worldModel, config) {
|
|
41
|
+
super();
|
|
42
|
+
this.worldModel = worldModel;
|
|
43
|
+
this.config = config;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Inject the learning engine. If coordinator already exists, wires immediately.
|
|
47
|
+
* Otherwise, defers until createSources() is called.
|
|
48
|
+
*/
|
|
49
|
+
setLearningEngine(engine) {
|
|
50
|
+
this.pendingLearningEngine = engine;
|
|
51
|
+
if (this.coordinator) {
|
|
52
|
+
this.coordinator.setLearningEngine(engine);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Inject the app mastery map. If coordinator already exists, wires immediately.
|
|
57
|
+
* Otherwise, defers until createSources() is called.
|
|
58
|
+
*/
|
|
59
|
+
setAppMap(map) {
|
|
60
|
+
this.pendingAppMap = map;
|
|
61
|
+
if (this.coordinator) {
|
|
62
|
+
this.coordinator.setAppMap(map);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Create perception sources from the bridge. Called once after ensureBridge().
|
|
67
|
+
*/
|
|
68
|
+
createSources(bridge) {
|
|
69
|
+
if (this.sourcesCreated)
|
|
70
|
+
return;
|
|
71
|
+
this.sourcesCreated = true;
|
|
72
|
+
const observer = new StateObserver(bridge);
|
|
73
|
+
const axSource = new AXSource(observer, bridge);
|
|
74
|
+
const cdpSource = new CDPSource();
|
|
75
|
+
const visionSource = new VisionSource(bridge);
|
|
76
|
+
this.coordinator = new PerceptionCoordinator(this.worldModel, axSource, cdpSource, visionSource, { enableVision: true, ...this.config });
|
|
77
|
+
if (this.pendingLearningEngine) {
|
|
78
|
+
this.coordinator.setLearningEngine(this.pendingLearningEngine);
|
|
79
|
+
}
|
|
80
|
+
if (this.pendingAppMap) {
|
|
81
|
+
this.coordinator.setAppMap(this.pendingAppMap);
|
|
82
|
+
}
|
|
83
|
+
this.coordinator.on("perception", (event) => {
|
|
84
|
+
this.handleReactiveEvent(event);
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Ensure perception is started for the given app context.
|
|
89
|
+
* Idempotent — starts if not running, switches context if app changed.
|
|
90
|
+
*/
|
|
91
|
+
async ensureStarted(appContext, cdpClient) {
|
|
92
|
+
if (!this.coordinator)
|
|
93
|
+
return;
|
|
94
|
+
const client = cdpClient ?? this.lastCdpClient;
|
|
95
|
+
if (!this.coordinator.isRunning) {
|
|
96
|
+
this.currentContext = appContext;
|
|
97
|
+
this.currentPid = appContext.pid;
|
|
98
|
+
this.currentBundleId = appContext.bundleId;
|
|
99
|
+
await this.coordinator.start(appContext, client);
|
|
100
|
+
}
|
|
101
|
+
else if (this.currentPid !== appContext.pid ||
|
|
102
|
+
(appContext.windowId != null && this.currentContext?.windowId !== appContext.windowId)) {
|
|
103
|
+
// Switch context when PID changes or when windowId is now available but wasn't before
|
|
104
|
+
this.currentContext = appContext;
|
|
105
|
+
this.currentPid = appContext.pid;
|
|
106
|
+
this.currentBundleId = appContext.bundleId;
|
|
107
|
+
await this.coordinator.switchContext(appContext, client);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Activate CDP source with a new client.
|
|
112
|
+
* Uses hot-inject on the running coordinator instead of stop+restart
|
|
113
|
+
* to preserve AX polling state and avoid resetting counters.
|
|
114
|
+
*/
|
|
115
|
+
activateCDP(cdpClient) {
|
|
116
|
+
this.lastCdpClient = cdpClient;
|
|
117
|
+
if (this.coordinator?.isRunning) {
|
|
118
|
+
this.coordinator.activateCDP(cdpClient);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Best-effort auto-start: if perception isn't running and a focused app
|
|
123
|
+
* is known, resolve its windowId and start perception silently.
|
|
124
|
+
* Non-blocking — failures are swallowed.
|
|
125
|
+
*/
|
|
126
|
+
async tryAutoStart(focusedApp, bridge) {
|
|
127
|
+
if (!this.coordinator || this.coordinator.isRunning)
|
|
128
|
+
return;
|
|
129
|
+
if (!focusedApp.pid)
|
|
130
|
+
return;
|
|
131
|
+
let windowId;
|
|
132
|
+
try {
|
|
133
|
+
const wins = await bridge.call("window.list", {});
|
|
134
|
+
const matching = wins?.filter((w) => w.pid === focusedApp.pid);
|
|
135
|
+
if (matching && matching.length > 0) {
|
|
136
|
+
const frontmost = matching.find((w) => w.focused || w.frontmost || w.isMain);
|
|
137
|
+
windowId = (frontmost ?? matching[0])?.windowId;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
catch { /* best-effort */ }
|
|
141
|
+
const ctx = {
|
|
142
|
+
bundleId: focusedApp.bundleId,
|
|
143
|
+
appName: focusedApp.bundleId,
|
|
144
|
+
pid: focusedApp.pid,
|
|
145
|
+
windowTitle: "",
|
|
146
|
+
...(windowId != null ? { windowId } : {}),
|
|
147
|
+
};
|
|
148
|
+
await this.ensureStarted(ctx);
|
|
149
|
+
}
|
|
150
|
+
async stop() {
|
|
151
|
+
if (this.coordinator?.isRunning) {
|
|
152
|
+
await this.coordinator.stop();
|
|
153
|
+
}
|
|
154
|
+
this.currentContext = null;
|
|
155
|
+
this.currentPid = null;
|
|
156
|
+
this.currentBundleId = null;
|
|
157
|
+
}
|
|
158
|
+
get isRunning() {
|
|
159
|
+
return this.coordinator?.isRunning ?? false;
|
|
160
|
+
}
|
|
161
|
+
getStats() {
|
|
162
|
+
return this.coordinator?.getStats() ?? createEmptyStats();
|
|
163
|
+
}
|
|
164
|
+
getFreshnessSummary() {
|
|
165
|
+
return this.coordinator?.getFreshnessSummary() ?? "Perception: not initialized";
|
|
166
|
+
}
|
|
167
|
+
getConfig() {
|
|
168
|
+
return this.coordinator?.getConfig() ?? null;
|
|
169
|
+
}
|
|
170
|
+
getCoordinator() {
|
|
171
|
+
return this.coordinator;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Notify perception that a tool call is happening — resets idle timer.
|
|
175
|
+
*/
|
|
176
|
+
notifyToolCall() {
|
|
177
|
+
this.coordinator?.notifyToolCall();
|
|
178
|
+
}
|
|
179
|
+
handleReactiveEvent(event) {
|
|
180
|
+
if (event.data?.type === "ax_events" && Array.isArray(event.data.events)) {
|
|
181
|
+
for (const uiEvent of event.data.events) {
|
|
182
|
+
if (uiEvent.type === "dialog_appeared") {
|
|
183
|
+
this.emit("dialog_detected", {
|
|
184
|
+
title: uiEvent.windowTitle ?? "",
|
|
185
|
+
pid: uiEvent.pid,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
if (uiEvent.type === "app_activated" &&
|
|
189
|
+
uiEvent.bundleId &&
|
|
190
|
+
uiEvent.bundleId !== this.currentBundleId) {
|
|
191
|
+
this.emit("app_switched", {
|
|
192
|
+
bundleId: uiEvent.bundleId,
|
|
193
|
+
pid: uiEvent.pid,
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
export const DEFAULT_PERCEPTION_CONFIG = {
|
|
18
|
+
fastIntervalMs: 100,
|
|
19
|
+
mediumIntervalMs: 300,
|
|
20
|
+
slowIntervalMs: 1000,
|
|
21
|
+
enableAX: true,
|
|
22
|
+
enableCDP: true,
|
|
23
|
+
enableVision: true,
|
|
24
|
+
maxROIsPerCycle: 3,
|
|
25
|
+
skipCaptureLock: false,
|
|
26
|
+
};
|
|
27
|
+
export function createEmptyStats() {
|
|
28
|
+
return {
|
|
29
|
+
started: false,
|
|
30
|
+
startedAt: null,
|
|
31
|
+
fastCycles: 0,
|
|
32
|
+
mediumCycles: 0,
|
|
33
|
+
slowCycles: 0,
|
|
34
|
+
axEventsProcessed: 0,
|
|
35
|
+
axTreePolls: 0,
|
|
36
|
+
cdpMutationsProcessed: 0,
|
|
37
|
+
cdpSnapshots: 0,
|
|
38
|
+
visionDiffs: 0,
|
|
39
|
+
visionOCRs: 0,
|
|
40
|
+
lastFastAt: null,
|
|
41
|
+
lastMediumAt: null,
|
|
42
|
+
lastSlowAt: null,
|
|
43
|
+
lastAXAt: null,
|
|
44
|
+
lastCDPAt: null,
|
|
45
|
+
lastVisionAt: null,
|
|
46
|
+
};
|
|
47
|
+
}
|