screenhand 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -446
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +3615 -400
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/dist/src/context-tracker.js +489 -0
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +82 -14
- package/dist/src/jobs/runner.js +138 -15
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +4 -1
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +60 -8
- package/dist/src/memory/service.js +30 -5
- package/dist/src/memory/store.js +34 -5
- package/dist/src/native/bridge-client.js +253 -31
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +296 -11
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +3 -2
- package/dist/src/playbook/runner.js +1 -1
- package/dist/src/playbook/store.js +139 -10
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +55 -18
- package/dist/src/runtime/applescript-adapter.js +8 -2
- package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
- package/dist/src/runtime/executor.js +23 -3
- package/dist/src/runtime/locator-cache.js +24 -2
- package/dist/src/runtime/service.js +59 -15
- package/dist/src/runtime/session-manager.js +4 -1
- package/dist/src/runtime/vision-adapter.js +2 -1
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/util/atomic-write.js +19 -4
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devpost.json +186 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +22 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
- package/native/macos-bridge/Sources/AppManagement.swift +339 -0
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
- package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
- package/native/macos-bridge/Sources/main.swift +498 -0
- package/native/windows-bridge/AppManagement.cs +234 -0
- package/native/windows-bridge/InputBridge.cs +436 -0
- package/native/windows-bridge/Program.cs +270 -0
- package/native/windows-bridge/ScreenCapture.cs +453 -0
- package/native/windows-bridge/UIAutomationBridge.cs +571 -0
- package/native/windows-bridge/WindowsBridge.csproj +17 -0
- package/package.json +12 -1
- package/scripts/postinstall.cjs +127 -0
- package/dist/.audit-log.jsonl +0 -55
- package/dist/.screenhand/memory/.lock +0 -1
- package/dist/.screenhand/memory/actions.jsonl +0 -85
- package/dist/.screenhand/memory/errors.jsonl +0 -5
- package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
- package/dist/.screenhand/memory/state.json +0 -35
- package/dist/.screenhand/memory/state.json.bak +0 -35
- package/dist/.screenhand/memory/strategies.jsonl +0 -12
- package/dist/agent/cli.js +0 -73
- package/dist/agent/loop.js +0 -258
- package/dist/config.js +0 -9
- package/dist/index.js +0 -56
- package/dist/logging/timeline-logger.js +0 -29
- package/dist/mcp/mcp-stdio-server.js +0 -448
- package/dist/mcp/server.js +0 -347
- package/dist/mcp-entry.js +0 -59
- package/dist/memory/recall.js +0 -160
- package/dist/memory/research.js +0 -98
- package/dist/memory/seeds.js +0 -89
- package/dist/memory/session.js +0 -161
- package/dist/memory/store.js +0 -391
- package/dist/memory/types.js +0 -4
- package/dist/monitor/codex-monitor.js +0 -377
- package/dist/monitor/task-queue.js +0 -84
- package/dist/monitor/types.js +0 -49
- package/dist/native/bridge-client.js +0 -174
- package/dist/native/macos-bridge-client.js +0 -5
- package/dist/npm-publish-helper.js +0 -117
- package/dist/npm-token-cdp.js +0 -113
- package/dist/npm-token-create.js +0 -135
- package/dist/npm-token-finish.js +0 -126
- package/dist/playbook/engine.js +0 -193
- package/dist/playbook/index.js +0 -4
- package/dist/playbook/recorder.js +0 -519
- package/dist/playbook/runner.js +0 -392
- package/dist/playbook/store.js +0 -166
- package/dist/playbook/types.js +0 -4
- package/dist/runtime/accessibility-adapter.js +0 -377
- package/dist/runtime/app-adapter.js +0 -48
- package/dist/runtime/applescript-adapter.js +0 -283
- package/dist/runtime/ax-role-map.js +0 -80
- package/dist/runtime/browser-adapter.js +0 -36
- package/dist/runtime/cdp-chrome-adapter.js +0 -505
- package/dist/runtime/composite-adapter.js +0 -205
- package/dist/runtime/executor.js +0 -250
- package/dist/runtime/locator-cache.js +0 -12
- package/dist/runtime/planning-loop.js +0 -47
- package/dist/runtime/service.js +0 -372
- package/dist/runtime/session-manager.js +0 -28
- package/dist/runtime/state-observer.js +0 -105
- package/dist/runtime/vision-adapter.js +0 -208
- package/dist/test-mcp-protocol.js +0 -138
- package/dist/types.js +0 -1
|
@@ -0,0 +1,771 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import { EventEmitter } from "node:events";
|
|
18
|
+
import { VisionSource } from "./vision-source.js";
|
|
19
|
+
import { DEFAULT_PERCEPTION_CONFIG, createEmptyStats } from "./types.js";
|
|
20
|
+
import { acquireCaptureLock, releaseCaptureLock } from "../observer/state.js";
|
|
21
|
+
import { FusionPipeline } from "../state/fusion.js";
|
|
22
|
+
/** Race a promise against a timeout. Rejects with "timeout" if the promise doesn't settle in time. */
|
|
23
|
+
function withTimeout(promise, ms, label) {
|
|
24
|
+
return new Promise((resolve, reject) => {
|
|
25
|
+
const timer = setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms);
|
|
26
|
+
promise.then((v) => { clearTimeout(timer); resolve(v); }, (e) => { clearTimeout(timer); reject(e); });
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* PerceptionCoordinator — manages multi-rate perception sources and feeds
|
|
31
|
+
* results into the world model.
|
|
32
|
+
*
|
|
33
|
+
* Runs three interval loops at different rates:
|
|
34
|
+
* - FAST (100ms): AX push events + CDP mutations (event-driven, cheap)
|
|
35
|
+
* - MEDIUM (500ms): AX tree poll + CDP DOM snapshot (structured, moderate)
|
|
36
|
+
* - SLOW (2000ms): Screenshot diff + ROI OCR (visual, expensive)
|
|
37
|
+
*
|
|
38
|
+
* The coordinator runs in the MCP server process. Heavy work (capture/OCR)
|
|
39
|
+
* is delegated to the native bridge (separate process) or observer daemon.
|
|
40
|
+
*/
|
|
41
|
+
export class PerceptionCoordinator extends EventEmitter {
|
|
42
|
+
worldModel;
|
|
43
|
+
axSource;
|
|
44
|
+
cdpSource;
|
|
45
|
+
visionSource;
|
|
46
|
+
config;
|
|
47
|
+
stats;
|
|
48
|
+
fastTimer = null;
|
|
49
|
+
mediumTimer = null;
|
|
50
|
+
slowTimer = null;
|
|
51
|
+
activePid = null;
|
|
52
|
+
activeWindowId = null;
|
|
53
|
+
activeAppContext = null;
|
|
54
|
+
cdpClient = null;
|
|
55
|
+
/** CDP connection factory — called to create/reconnect persistent clients */
|
|
56
|
+
cdpConnectFn = null;
|
|
57
|
+
running = false;
|
|
58
|
+
learningEngine = null;
|
|
59
|
+
appMap = null;
|
|
60
|
+
browserEnricher = null;
|
|
61
|
+
fusionPipeline = new FusionPipeline();
|
|
62
|
+
// In-flight guards to prevent timer pileup when async cycles exceed their interval
|
|
63
|
+
fastInFlight = false;
|
|
64
|
+
mediumInFlight = false;
|
|
65
|
+
slowInFlight = false;
|
|
66
|
+
// Debounce timer for switchContext to coalesce rapid app switches
|
|
67
|
+
switchDebounceTimer = null;
|
|
68
|
+
// Resolve callback for the previous debounced switchContext promise
|
|
69
|
+
switchDebounceResolve = null;
|
|
70
|
+
// Idle gating: pause perception when no tool calls for IDLE_THRESHOLD_MS
|
|
71
|
+
static IDLE_THRESHOLD_MS = 3_000;
|
|
72
|
+
lastToolCallAt = Date.now();
|
|
73
|
+
idle = false;
|
|
74
|
+
constructor(worldModel, axSource, cdpSource, visionSource, config) {
|
|
75
|
+
super();
|
|
76
|
+
this.worldModel = worldModel;
|
|
77
|
+
this.axSource = axSource;
|
|
78
|
+
this.cdpSource = cdpSource;
|
|
79
|
+
this.visionSource = visionSource;
|
|
80
|
+
this.config = { ...DEFAULT_PERCEPTION_CONFIG, ...config };
|
|
81
|
+
this.stats = createEmptyStats();
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Inject the learning engine for recording sensor outcomes.
|
|
85
|
+
*/
|
|
86
|
+
setLearningEngine(engine) {
|
|
87
|
+
this.learningEngine = engine;
|
|
88
|
+
this.fusionPipeline.setLearningEngine(engine);
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Inject the app mastery map for validating spatial knowledge during slow cycle.
|
|
92
|
+
*/
|
|
93
|
+
setAppMap(map) {
|
|
94
|
+
this.appMap = map;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Set a browser enricher callback for non-CDP browsers (Safari).
|
|
98
|
+
* Called during medium cycle to fetch URL/title/tabs via AppleScript.
|
|
99
|
+
* Pass null to clear the enricher (e.g. on app switch away from Safari).
|
|
100
|
+
*/
|
|
101
|
+
setBrowserEnricher(fn) {
|
|
102
|
+
this.browserEnricher = fn;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Notify that a tool call is happening — resets idle timer and starts stream if needed.
|
|
106
|
+
* Call this from the intelligence wrapper PRE-CALL.
|
|
107
|
+
*/
|
|
108
|
+
notifyToolCall() {
|
|
109
|
+
this.lastToolCallAt = Date.now();
|
|
110
|
+
if (this.idle) {
|
|
111
|
+
this.idle = false;
|
|
112
|
+
this.emit("wake");
|
|
113
|
+
// Start stream capture on wake for fast perception (only if running)
|
|
114
|
+
if (this.running && this.visionSource && this.activeWindowId && !this.visionSource.isStreaming) {
|
|
115
|
+
void this.visionSource.startStream(this.activeWindowId).catch(() => { });
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Check if perception should be idle (no tool calls for 3s).
|
|
121
|
+
* Stops stream capture when entering idle.
|
|
122
|
+
*/
|
|
123
|
+
isIdle() {
|
|
124
|
+
const elapsed = Date.now() - this.lastToolCallAt;
|
|
125
|
+
const shouldIdle = elapsed > PerceptionCoordinator.IDLE_THRESHOLD_MS;
|
|
126
|
+
if (shouldIdle && !this.idle) {
|
|
127
|
+
this.idle = true;
|
|
128
|
+
this.emit("idle");
|
|
129
|
+
// Stop stream capture to save battery at idle
|
|
130
|
+
if (this.visionSource?.isStreaming) {
|
|
131
|
+
void this.visionSource.stopStream().catch(() => { });
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return shouldIdle;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Start continuous perception loops.
|
|
138
|
+
*/
|
|
139
|
+
async start(appContext, cdpClient) {
|
|
140
|
+
if (this.running)
|
|
141
|
+
return;
|
|
142
|
+
this.activePid = appContext.pid;
|
|
143
|
+
this.activeWindowId = appContext.windowId ?? null;
|
|
144
|
+
this.activeAppContext = appContext;
|
|
145
|
+
this.cdpClient = cdpClient ?? null;
|
|
146
|
+
this.running = true;
|
|
147
|
+
this.stats = createEmptyStats();
|
|
148
|
+
this.stats.started = true;
|
|
149
|
+
this.stats.startedAt = new Date().toISOString();
|
|
150
|
+
this.cdpConsecutiveFailures = 0;
|
|
151
|
+
this.axConsecutiveFailures = 0;
|
|
152
|
+
this.fastInFlight = false;
|
|
153
|
+
this.mediumInFlight = false;
|
|
154
|
+
this.slowInFlight = false;
|
|
155
|
+
this.lastToolCallAt = Date.now();
|
|
156
|
+
this.idle = false;
|
|
157
|
+
// Enable safe CLI capture for browser apps to avoid CGWindowListCreateImage SIGSEGV
|
|
158
|
+
if (this.visionSource && typeof this.visionSource.setSafeCLI === "function") {
|
|
159
|
+
const family = this.worldModel.getAppFamily();
|
|
160
|
+
this.visionSource.setSafeCLI(family === "browser");
|
|
161
|
+
}
|
|
162
|
+
// Start continuous stream capture for fast perception (non-blocking, best-effort)
|
|
163
|
+
if (this.config.enableVision && this.visionSource && this.activeWindowId) {
|
|
164
|
+
void this.visionSource.startStream(this.activeWindowId).catch(() => { });
|
|
165
|
+
}
|
|
166
|
+
// Start AX observation
|
|
167
|
+
if (this.config.enableAX && this.axSource && this.activePid) {
|
|
168
|
+
try {
|
|
169
|
+
await this.axSource.startObserving(this.activePid);
|
|
170
|
+
}
|
|
171
|
+
catch {
|
|
172
|
+
// AX not available
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
// Install CDP mutation observer
|
|
176
|
+
if (this.config.enableCDP && this.cdpSource && this.cdpClient) {
|
|
177
|
+
try {
|
|
178
|
+
await this.cdpSource.installMutationObserver(this.cdpClient);
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
// CDP not available
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// Start interval loops — in-flight guards prevent pileup when async cycle
|
|
185
|
+
// takes longer than the interval (e.g. bridge latency spike).
|
|
186
|
+
this.fastTimer = setInterval(() => {
|
|
187
|
+
if (this.fastInFlight)
|
|
188
|
+
return;
|
|
189
|
+
this.fastInFlight = true;
|
|
190
|
+
void this.fastCycle().catch(() => { }).finally(() => { this.fastInFlight = false; });
|
|
191
|
+
}, this.config.fastIntervalMs);
|
|
192
|
+
this.mediumTimer = setInterval(() => {
|
|
193
|
+
if (this.mediumInFlight)
|
|
194
|
+
return;
|
|
195
|
+
this.mediumInFlight = true;
|
|
196
|
+
void this.mediumCycle().catch(() => { }).finally(() => { this.mediumInFlight = false; });
|
|
197
|
+
}, this.config.mediumIntervalMs);
|
|
198
|
+
if (this.config.enableVision) {
|
|
199
|
+
this.slowTimer = setInterval(() => {
|
|
200
|
+
if (this.slowInFlight)
|
|
201
|
+
return;
|
|
202
|
+
this.slowInFlight = true;
|
|
203
|
+
void this.slowCycle().catch(() => { }).finally(() => { this.slowInFlight = false; });
|
|
204
|
+
}, this.config.slowIntervalMs);
|
|
205
|
+
}
|
|
206
|
+
this.emit("started", appContext);
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Stop all perception loops.
|
|
210
|
+
*/
|
|
211
|
+
async stop() {
|
|
212
|
+
if (!this.running)
|
|
213
|
+
return;
|
|
214
|
+
this.running = false;
|
|
215
|
+
if (this.switchDebounceTimer !== null) {
|
|
216
|
+
clearTimeout(this.switchDebounceTimer);
|
|
217
|
+
this.switchDebounceTimer = null;
|
|
218
|
+
if (this.switchDebounceResolve !== null) {
|
|
219
|
+
this.switchDebounceResolve();
|
|
220
|
+
this.switchDebounceResolve = null;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
// Stop stream capture
|
|
224
|
+
if (this.visionSource?.isStreaming) {
|
|
225
|
+
void this.visionSource.stopStream().catch(() => { });
|
|
226
|
+
}
|
|
227
|
+
if (this.fastTimer) {
|
|
228
|
+
clearInterval(this.fastTimer);
|
|
229
|
+
this.fastTimer = null;
|
|
230
|
+
}
|
|
231
|
+
if (this.mediumTimer) {
|
|
232
|
+
clearInterval(this.mediumTimer);
|
|
233
|
+
this.mediumTimer = null;
|
|
234
|
+
}
|
|
235
|
+
if (this.slowTimer) {
|
|
236
|
+
clearInterval(this.slowTimer);
|
|
237
|
+
this.slowTimer = null;
|
|
238
|
+
}
|
|
239
|
+
if (this.axSource && this.activePid) {
|
|
240
|
+
try {
|
|
241
|
+
await this.axSource.stopObserving(this.activePid);
|
|
242
|
+
}
|
|
243
|
+
catch {
|
|
244
|
+
// ignore
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
this.activePid = null;
|
|
248
|
+
this.activeWindowId = null;
|
|
249
|
+
this.activeAppContext = null;
|
|
250
|
+
this.cdpClient = null;
|
|
251
|
+
this.browserEnricher = null;
|
|
252
|
+
this.stats.started = false;
|
|
253
|
+
this.fastInFlight = false;
|
|
254
|
+
this.mediumInFlight = false;
|
|
255
|
+
this.slowInFlight = false;
|
|
256
|
+
this.emit("stopped");
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Switch perception to a new app/window context.
|
|
260
|
+
* Debounced by 150ms — rapid successive calls coalesce to the last context.
|
|
261
|
+
*/
|
|
262
|
+
switchContext(appContext, cdpClient) {
|
|
263
|
+
if (this.switchDebounceTimer !== null) {
|
|
264
|
+
clearTimeout(this.switchDebounceTimer);
|
|
265
|
+
this.switchDebounceTimer = null;
|
|
266
|
+
// Resolve the previous caller's promise — their switch was superseded, not failed
|
|
267
|
+
if (this.switchDebounceResolve !== null) {
|
|
268
|
+
this.switchDebounceResolve();
|
|
269
|
+
this.switchDebounceResolve = null;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
return new Promise((resolve) => {
|
|
273
|
+
this.switchDebounceResolve = resolve;
|
|
274
|
+
this.switchDebounceTimer = setTimeout(() => {
|
|
275
|
+
this.switchDebounceTimer = null;
|
|
276
|
+
this.switchDebounceResolve = null;
|
|
277
|
+
void this.doSwitchContext(appContext, cdpClient).then(resolve).catch((err) => {
|
|
278
|
+
console.error(`[Perception] switchContext failed: ${err?.message ?? err}`);
|
|
279
|
+
resolve(); // Resolve anyway so callers don't hang, but error is logged
|
|
280
|
+
});
|
|
281
|
+
}, 150);
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Internal: perform the actual context switch (stop + reset + start).
|
|
286
|
+
*/
|
|
287
|
+
async doSwitchContext(appContext, cdpClient) {
|
|
288
|
+
await this.stop();
|
|
289
|
+
this.visionSource?.reset();
|
|
290
|
+
this.cdpSource?.reset();
|
|
291
|
+
await this.start(appContext, cdpClient);
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Get current perception statistics.
|
|
295
|
+
*/
|
|
296
|
+
getStats() {
|
|
297
|
+
return { ...this.stats };
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Get a perception freshness summary for intelligence wrapper hints.
|
|
301
|
+
*/
|
|
302
|
+
getFreshnessSummary() {
|
|
303
|
+
if (!this.stats.started)
|
|
304
|
+
return "Perception: not active";
|
|
305
|
+
const now = Date.now();
|
|
306
|
+
const STALE_THRESHOLD_MS = 5_000;
|
|
307
|
+
const sources = [];
|
|
308
|
+
const warnings = [];
|
|
309
|
+
// Per-source detail
|
|
310
|
+
if (this.config.enableAX) {
|
|
311
|
+
if (this.stats.lastAXAt) {
|
|
312
|
+
const ageMs = now - new Date(this.stats.lastAXAt).getTime();
|
|
313
|
+
sources.push(`AX: ${ageMs}ms ago`);
|
|
314
|
+
if (ageMs > STALE_THRESHOLD_MS)
|
|
315
|
+
warnings.push("AX");
|
|
316
|
+
}
|
|
317
|
+
else {
|
|
318
|
+
sources.push("AX: no data yet");
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
else {
|
|
322
|
+
sources.push("AX: DISABLED");
|
|
323
|
+
}
|
|
324
|
+
if (this.config.enableCDP) {
|
|
325
|
+
if (this.stats.lastCDPAt) {
|
|
326
|
+
const ageMs = now - new Date(this.stats.lastCDPAt).getTime();
|
|
327
|
+
sources.push(`CDP: ${ageMs}ms ago`);
|
|
328
|
+
if (ageMs > STALE_THRESHOLD_MS)
|
|
329
|
+
warnings.push("CDP");
|
|
330
|
+
}
|
|
331
|
+
else {
|
|
332
|
+
sources.push("CDP: no data yet");
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
else {
|
|
336
|
+
sources.push("CDP: DISABLED");
|
|
337
|
+
}
|
|
338
|
+
if (this.config.enableVision) {
|
|
339
|
+
if (this.stats.lastVisionAt) {
|
|
340
|
+
const ageMs = now - new Date(this.stats.lastVisionAt).getTime();
|
|
341
|
+
sources.push(`Vision: ${ageMs}ms ago`);
|
|
342
|
+
if (ageMs > STALE_THRESHOLD_MS)
|
|
343
|
+
warnings.push("Vision");
|
|
344
|
+
}
|
|
345
|
+
else {
|
|
346
|
+
sources.push("Vision: no data yet");
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
else {
|
|
350
|
+
sources.push("Vision: DISABLED");
|
|
351
|
+
}
|
|
352
|
+
let summary = `Perception: ${sources.join(", ")}`;
|
|
353
|
+
if (warnings.length > 0) {
|
|
354
|
+
summary += ` ⚠ STALE: ${warnings.join(", ")} (>5s)`;
|
|
355
|
+
}
|
|
356
|
+
return summary;
|
|
357
|
+
}
|
|
358
|
+
get isRunning() {
|
|
359
|
+
return this.running;
|
|
360
|
+
}
|
|
361
|
+
getConfig() {
|
|
362
|
+
return { ...this.config };
|
|
363
|
+
}
|
|
364
|
+
// ── Loop implementations ──
|
|
365
|
+
async fastCycle() {
|
|
366
|
+
if (!this.running || this.isIdle())
|
|
367
|
+
return;
|
|
368
|
+
const timestamp = new Date().toISOString();
|
|
369
|
+
try {
|
|
370
|
+
// Drain AX events
|
|
371
|
+
if (this.config.enableAX && this.axSource) {
|
|
372
|
+
try {
|
|
373
|
+
const axEvent = this.axSource.drainEvents();
|
|
374
|
+
if (axEvent && axEvent.data.type === "ax_events") {
|
|
375
|
+
this.stats.axEventsProcessed += axEvent.data.events.length;
|
|
376
|
+
this.worldModel.ingestUIEvents(axEvent.data.events);
|
|
377
|
+
this.emit("perception", axEvent);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
catch (err) {
|
|
381
|
+
console.error(`[Perception] fastCycle AX drain error: ${err?.message ?? err}`);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
// Drain CDP mutations
|
|
385
|
+
if (this.config.enableCDP && this.cdpSource) {
|
|
386
|
+
try {
|
|
387
|
+
const cdpEvent = this.cdpSource.drainMutations();
|
|
388
|
+
if (cdpEvent && cdpEvent.data.type === "cdp_mutations") {
|
|
389
|
+
this.stats.cdpMutationsProcessed += cdpEvent.data.mutations.length;
|
|
390
|
+
// Ingest mutations into world model
|
|
391
|
+
if (this.activeAppContext) {
|
|
392
|
+
this.worldModel.ingestCDPMutations(this.activeAppContext.bundleId, cdpEvent.data.mutations);
|
|
393
|
+
}
|
|
394
|
+
this.emit("perception", cdpEvent);
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
catch (err) {
|
|
398
|
+
console.error(`[Perception] fastCycle CDP drain error: ${err?.message ?? err}`);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
finally {
|
|
403
|
+
this.stats.fastCycles++;
|
|
404
|
+
this.stats.lastFastAt = timestamp;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
async mediumCycle() {
|
|
408
|
+
if (!this.running || this.isIdle())
|
|
409
|
+
return;
|
|
410
|
+
const timestamp = new Date().toISOString();
|
|
411
|
+
// Determine sensor polling order — use learning engine ranking if available
|
|
412
|
+
const sensorOrder = this.getMediumCycleSensorOrder();
|
|
413
|
+
const MEDIUM_CYCLE_TIMEOUT_MS = 15_000;
|
|
414
|
+
for (const sensor of sensorOrder) {
|
|
415
|
+
try {
|
|
416
|
+
if (sensor === "ax") {
|
|
417
|
+
await withTimeout(this.pollAX(), MEDIUM_CYCLE_TIMEOUT_MS, "pollAX");
|
|
418
|
+
}
|
|
419
|
+
else if (sensor === "cdp") {
|
|
420
|
+
await withTimeout(this.pollCDP(), MEDIUM_CYCLE_TIMEOUT_MS, "pollCDP");
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
catch (e) {
|
|
424
|
+
console.error(`[Perception] ${sensor} medium cycle error: ${e?.message ?? e}`);
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
// Enrich browser state for non-CDP browsers (Safari)
|
|
428
|
+
if (this.browserEnricher) {
|
|
429
|
+
try {
|
|
430
|
+
await this.browserEnricher();
|
|
431
|
+
}
|
|
432
|
+
catch { /* best-effort */ }
|
|
433
|
+
}
|
|
434
|
+
this.stats.mediumCycles++;
|
|
435
|
+
this.stats.lastMediumAt = timestamp;
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* Determine the order to poll sensors in the medium cycle.
|
|
439
|
+
* If the learning engine has ranked data for the current app, use that order.
|
|
440
|
+
* Otherwise, fall back to the default: AX → CDP.
|
|
441
|
+
*/
|
|
442
|
+
/**
|
|
443
|
+
* Inject or update the CDP client after perception has started.
|
|
444
|
+
* Called when a browser CDP connection is established.
|
|
445
|
+
*/
|
|
446
|
+
/**
|
|
447
|
+
* Inject or update the CDP client after perception has started.
|
|
448
|
+
* Accepts either a live client or a connect function (preferred — enables reconnection).
|
|
449
|
+
*/
|
|
450
|
+
activateCDP(cdpClient, connectFn) {
|
|
451
|
+
console.error(`[Perception] activateCDP called, client=${!!cdpClient}, connectFn=${!!connectFn}, enableCDP=${this.config.enableCDP}, cdpSource=${!!this.cdpSource}`);
|
|
452
|
+
this.cdpClient = cdpClient;
|
|
453
|
+
if (connectFn)
|
|
454
|
+
this.cdpConnectFn = connectFn;
|
|
455
|
+
this.cdpConsecutiveFailures = 0;
|
|
456
|
+
if (this.config.enableCDP && this.cdpSource && cdpClient) {
|
|
457
|
+
void this.cdpSource.installMutationObserver(cdpClient).catch((e) => {
|
|
458
|
+
console.error(`[Perception] installMutationObserver failed: ${e?.message ?? e}`);
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
/**
|
|
463
|
+
* Update the active window ID after perception has started.
|
|
464
|
+
* Called when window resolution succeeds late (after start).
|
|
465
|
+
*/
|
|
466
|
+
setActiveWindowId(windowId) {
|
|
467
|
+
this.activeWindowId = windowId;
|
|
468
|
+
}
|
|
469
|
+
getMediumCycleSensorOrder() {
|
|
470
|
+
const defaultOrder = [];
|
|
471
|
+
// Allow AX polling even without windowId — pollAX uses windowId ?? 0 (full app tree)
|
|
472
|
+
if (this.config.enableAX && this.axSource && this.activePid) {
|
|
473
|
+
defaultOrder.push("ax");
|
|
474
|
+
}
|
|
475
|
+
if (this.config.enableCDP && this.cdpSource && this.cdpClient) {
|
|
476
|
+
defaultOrder.push("cdp");
|
|
477
|
+
}
|
|
478
|
+
if (!this.learningEngine || !this.activeAppContext || defaultOrder.length <= 1) {
|
|
479
|
+
return defaultOrder;
|
|
480
|
+
}
|
|
481
|
+
const ranked = this.learningEngine.rankSensors(this.activeAppContext.bundleId);
|
|
482
|
+
if (ranked.length === 0)
|
|
483
|
+
return defaultOrder;
|
|
484
|
+
// Build ordered list from ranking, only including sensors that are available
|
|
485
|
+
const available = new Set(defaultOrder);
|
|
486
|
+
const ordered = [];
|
|
487
|
+
for (const { sourceType } of ranked) {
|
|
488
|
+
const s = sourceType;
|
|
489
|
+
if (available.has(s)) {
|
|
490
|
+
ordered.push(s);
|
|
491
|
+
available.delete(s);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
// Append any remaining sensors not covered by ranking
|
|
495
|
+
for (const s of defaultOrder) {
|
|
496
|
+
if (available.has(s)) {
|
|
497
|
+
ordered.push(s);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
return ordered;
|
|
501
|
+
}
|
|
502
|
+
axConsecutiveFailures = 0;
|
|
503
|
+
async pollAX() {
|
|
504
|
+
if (!this.config.enableAX ||
|
|
505
|
+
!this.axSource ||
|
|
506
|
+
!this.activePid ||
|
|
507
|
+
!this.activeAppContext)
|
|
508
|
+
return;
|
|
509
|
+
// If AX has failed many times, the app PID is likely dead — skip polling
|
|
510
|
+
// and emit a stale warning so the caller knows to restart perception
|
|
511
|
+
if (this.axConsecutiveFailures > 5) {
|
|
512
|
+
return;
|
|
513
|
+
}
|
|
514
|
+
// Adaptive skip: if recent AX polls are extremely slow, skip this cycle
|
|
515
|
+
if (this.axSource.shouldSkipPoll()) {
|
|
516
|
+
return;
|
|
517
|
+
}
|
|
518
|
+
// Derive windowId if not set — use first tracked window for this pid
|
|
519
|
+
if (this.activeWindowId === null) {
|
|
520
|
+
for (const [id, win] of this.worldModel.getState().windows) {
|
|
521
|
+
if (win.pid === this.activePid) {
|
|
522
|
+
this.activeWindowId = id;
|
|
523
|
+
break;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
try {
|
|
528
|
+
const { event: treeEvent, latencyMs: axLatency, nodeCount } = await this.axSource.pollAXTree(this.activePid, this.activeWindowId ?? 0, this.activeAppContext);
|
|
529
|
+
const axSuccess = !!(treeEvent && treeEvent.data.type === "ax_tree");
|
|
530
|
+
if (treeEvent && treeEvent.data.type === "ax_tree") {
|
|
531
|
+
this.stats.axTreePolls++;
|
|
532
|
+
this.stats.lastAXAt = new Date().toISOString();
|
|
533
|
+
this.axConsecutiveFailures = 0;
|
|
534
|
+
this.fusionPipeline.enqueue({
|
|
535
|
+
source: "ax",
|
|
536
|
+
timestamp: new Date().toISOString(),
|
|
537
|
+
confidence: 0.9,
|
|
538
|
+
windowId: treeEvent.data.windowId,
|
|
539
|
+
axTree: treeEvent.data.tree,
|
|
540
|
+
appContext: treeEvent.data.appContext,
|
|
541
|
+
});
|
|
542
|
+
this.fusionPipeline.flush(this.worldModel);
|
|
543
|
+
this.emit("perception", treeEvent);
|
|
544
|
+
}
|
|
545
|
+
else {
|
|
546
|
+
this.axConsecutiveFailures++;
|
|
547
|
+
}
|
|
548
|
+
if (this.learningEngine && this.activeAppContext) {
|
|
549
|
+
this.learningEngine.recordSensorOutcome({
|
|
550
|
+
bundleId: this.activeAppContext.bundleId,
|
|
551
|
+
sourceType: "ax",
|
|
552
|
+
success: axSuccess,
|
|
553
|
+
latencyMs: axLatency,
|
|
554
|
+
nodeCount,
|
|
555
|
+
});
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
catch (err) {
|
|
559
|
+
this.axConsecutiveFailures++;
|
|
560
|
+
console.error(`[Perception] pollAX error #${this.axConsecutiveFailures}: ${err?.message ?? err}`);
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
cdpConsecutiveFailures = 0;
|
|
564
|
+
async pollCDP() {
|
|
565
|
+
if (!this.config.enableCDP || !this.cdpSource || !this.cdpClient) {
|
|
566
|
+
if (this.stats.cdpSnapshots === 0 && this.stats.mediumCycles > 0 && this.stats.mediumCycles % 20 === 0) {
|
|
567
|
+
console.error(`[Perception] pollCDP skipped: enableCDP=${this.config.enableCDP} cdpSource=${!!this.cdpSource} cdpClient=${!!this.cdpClient}`);
|
|
568
|
+
}
|
|
569
|
+
return;
|
|
570
|
+
}
|
|
571
|
+
// If CDP has failed too many times, periodically retry reconnection (every 10th cycle)
|
|
572
|
+
// instead of giving up forever — the target app may have restarted
|
|
573
|
+
if (this.cdpConsecutiveFailures > 10) {
|
|
574
|
+
if (this.cdpConsecutiveFailures % 10 === 0 && this.cdpConnectFn) {
|
|
575
|
+
try {
|
|
576
|
+
this.cdpClient = await this.cdpConnectFn();
|
|
577
|
+
const failureCount = this.cdpConsecutiveFailures;
|
|
578
|
+
this.cdpConsecutiveFailures = 0;
|
|
579
|
+
console.error(`[Perception] CDP reconnected after ${failureCount} failures`);
|
|
580
|
+
}
|
|
581
|
+
catch {
|
|
582
|
+
this.cdpConsecutiveFailures++;
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
else {
|
|
586
|
+
this.cdpConsecutiveFailures++;
|
|
587
|
+
}
|
|
588
|
+
return;
|
|
589
|
+
}
|
|
590
|
+
const cdpStart = Date.now();
|
|
591
|
+
try {
|
|
592
|
+
const snapEvent = await this.cdpSource.pollSnapshot(this.cdpClient);
|
|
593
|
+
const cdpLatency = Date.now() - cdpStart;
|
|
594
|
+
const cdpSuccess = !!(snapEvent && snapEvent.data.type === "cdp_snapshot");
|
|
595
|
+
if (this.stats.cdpSnapshots === 0) {
|
|
596
|
+
console.error(`[Perception] pollCDP result: success=${cdpSuccess} latency=${cdpLatency}ms event_type=${snapEvent?.data?.type ?? "null"}`);
|
|
597
|
+
}
|
|
598
|
+
if (snapEvent && snapEvent.data.type === "cdp_snapshot") {
|
|
599
|
+
this.stats.cdpSnapshots++;
|
|
600
|
+
this.stats.lastCDPAt = new Date().toISOString();
|
|
601
|
+
this.cdpConsecutiveFailures = 0;
|
|
602
|
+
if (this.activeAppContext) {
|
|
603
|
+
this.fusionPipeline.enqueue({
|
|
604
|
+
source: "cdp",
|
|
605
|
+
timestamp: new Date().toISOString(),
|
|
606
|
+
confidence: 0.85,
|
|
607
|
+
windowId: this.activeWindowId ?? 0,
|
|
608
|
+
cdpSnapshot: {
|
|
609
|
+
bundleId: this.activeAppContext.bundleId,
|
|
610
|
+
url: snapEvent.data.url,
|
|
611
|
+
title: snapEvent.data.title,
|
|
612
|
+
},
|
|
613
|
+
});
|
|
614
|
+
this.fusionPipeline.flush(this.worldModel);
|
|
615
|
+
}
|
|
616
|
+
this.emit("perception", snapEvent);
|
|
617
|
+
}
|
|
618
|
+
if (this.learningEngine && this.activeAppContext) {
|
|
619
|
+
this.learningEngine.recordSensorOutcome({
|
|
620
|
+
bundleId: this.activeAppContext.bundleId,
|
|
621
|
+
sourceType: "cdp",
|
|
622
|
+
success: cdpSuccess,
|
|
623
|
+
latencyMs: cdpLatency,
|
|
624
|
+
});
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
catch (err) {
|
|
628
|
+
this.cdpConsecutiveFailures++;
|
|
629
|
+
console.error(`[Perception] pollCDP error #${this.cdpConsecutiveFailures}: ${err?.message ?? err}`);
|
|
630
|
+
// Try to reconnect using the connect factory if available
|
|
631
|
+
if (this.cdpConsecutiveFailures <= 3 && this.cdpConnectFn) {
|
|
632
|
+
try {
|
|
633
|
+
this.cdpClient = await this.cdpConnectFn();
|
|
634
|
+
this.cdpConsecutiveFailures = 0;
|
|
635
|
+
}
|
|
636
|
+
catch {
|
|
637
|
+
// reconnect failed — will retry next cycle
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
if (this.learningEngine && this.activeAppContext) {
|
|
641
|
+
this.learningEngine.recordSensorOutcome({
|
|
642
|
+
bundleId: this.activeAppContext.bundleId,
|
|
643
|
+
sourceType: "cdp",
|
|
644
|
+
success: false,
|
|
645
|
+
latencyMs: Date.now() - cdpStart,
|
|
646
|
+
});
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
async slowCycle() {
|
|
651
|
+
if (!this.running || !this.visionSource || this.isIdle())
|
|
652
|
+
return;
|
|
653
|
+
// For browsers, use safe CLI capture mode (screencapture) instead of
|
|
654
|
+
// CGWindowListCreateImage which crashes on GPU-heavy pages (WebGL, canvas).
|
|
655
|
+
// Safe CLI mode is already enabled via setSafeCLI() in start().
|
|
656
|
+
// This allows vision/OCR for canvas-heavy apps like Canva in Chrome.
|
|
657
|
+
// Skip vision if learning engine shows it consistently fails for this app,
|
|
658
|
+
// but retry every 20th cycle to re-evaluate (apps may gain windows later)
|
|
659
|
+
if (this.learningEngine && this.activeAppContext) {
|
|
660
|
+
const ranked = this.learningEngine.rankSensors(this.activeAppContext.bundleId);
|
|
661
|
+
const visionRank = ranked.find(r => r.sourceType === "vision");
|
|
662
|
+
if (visionRank && visionRank.score < 0.1 && ranked.length >= 2 && this.stats.slowCycles % 20 !== 0) {
|
|
663
|
+
this.stats.slowCycles++;
|
|
664
|
+
return; // Vision consistently fails for this app — skip (retry every 20th cycle)
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
const timestamp = new Date().toISOString();
|
|
668
|
+
// Acquire capture lock to prevent concurrent captures with observer daemon
|
|
669
|
+
if (!this.config.skipCaptureLock && !acquireCaptureLock()) {
|
|
670
|
+
this.stats.slowCycles++;
|
|
671
|
+
return; // Observer daemon is capturing — skip this cycle
|
|
672
|
+
}
|
|
673
|
+
try {
|
|
674
|
+
// Screenshot diff — optimized single-capture pipeline
|
|
675
|
+
const windowId = this.activeWindowId ?? 0;
|
|
676
|
+
if (windowId === 0)
|
|
677
|
+
return; // Vision needs a real window ID for screenshot
|
|
678
|
+
const SLOW_CYCLE_TIMEOUT_MS = 25_000;
|
|
679
|
+
const { diffEvent, ocrEvent, yoloElements } = await withTimeout(this.visionSource.captureAndDiffOptimized(windowId, this.config.maxROIsPerCycle), SLOW_CYCLE_TIMEOUT_MS, "captureAndDiffOptimized");
|
|
680
|
+
if (diffEvent) {
|
|
681
|
+
this.stats.visionDiffs++;
|
|
682
|
+
this.stats.lastVisionAt = new Date().toISOString();
|
|
683
|
+
// Store screenshot hash in world model for change detection
|
|
684
|
+
if (diffEvent.data.type === "vision_diff" && diffEvent.data.hash && this.activeWindowId !== null) {
|
|
685
|
+
this.worldModel.updateWindowScreenshotHash(this.activeWindowId, diffEvent.data.hash);
|
|
686
|
+
}
|
|
687
|
+
this.emit("perception", diffEvent);
|
|
688
|
+
}
|
|
689
|
+
if (ocrEvent) {
|
|
690
|
+
this.stats.visionOCRs++;
|
|
691
|
+
// Merge OCR regions into world model via fusion pipeline
|
|
692
|
+
if (ocrEvent.data.type === "vision_ocr" && ocrEvent.data.regions.length > 0) {
|
|
693
|
+
this.fusionPipeline.enqueue({
|
|
694
|
+
source: "ocr",
|
|
695
|
+
timestamp: new Date().toISOString(),
|
|
696
|
+
confidence: 0.7,
|
|
697
|
+
windowId,
|
|
698
|
+
ocrRegions: ocrEvent.data.regions,
|
|
699
|
+
});
|
|
700
|
+
this.fusionPipeline.flush(this.worldModel);
|
|
701
|
+
}
|
|
702
|
+
// Touch lastValidated on app map when OCR confirms screen content
|
|
703
|
+
if (this.appMap && this.activeAppContext) {
|
|
704
|
+
const mapData = this.appMap.load(this.activeAppContext.bundleId);
|
|
705
|
+
if (mapData) {
|
|
706
|
+
mapData.lastValidated = new Date().toISOString();
|
|
707
|
+
this.appMap.save(mapData);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
this.emit("perception", ocrEvent);
|
|
711
|
+
}
|
|
712
|
+
// Fuse YOLO element detections with OCR text regions
|
|
713
|
+
if (yoloElements && yoloElements.length > 0) {
|
|
714
|
+
const ocrRegions = (ocrEvent?.data.type === "vision_ocr" && ocrEvent.data.regions)
|
|
715
|
+
? ocrEvent.data.regions
|
|
716
|
+
: [];
|
|
717
|
+
const fused = VisionSource.fuseOcrAndYolo(ocrRegions, yoloElements);
|
|
718
|
+
if (fused.length > 0) {
|
|
719
|
+
this.fusionPipeline.enqueue({
|
|
720
|
+
source: "ocr",
|
|
721
|
+
timestamp: new Date().toISOString(),
|
|
722
|
+
confidence: 0.8,
|
|
723
|
+
windowId,
|
|
724
|
+
ocrRegions: fused.map((f) => ({
|
|
725
|
+
text: f.text || `[${f.class}]`,
|
|
726
|
+
bounds: f.bounds,
|
|
727
|
+
})),
|
|
728
|
+
});
|
|
729
|
+
this.fusionPipeline.flush(this.worldModel);
|
|
730
|
+
}
|
|
731
|
+
this.emit("perception", {
|
|
732
|
+
source: "vision_yolo",
|
|
733
|
+
rate: "slow",
|
|
734
|
+
timestamp: new Date().toISOString(),
|
|
735
|
+
data: {
|
|
736
|
+
type: "vision_yolo",
|
|
737
|
+
elements: fused,
|
|
738
|
+
count: fused.length,
|
|
739
|
+
},
|
|
740
|
+
});
|
|
741
|
+
}
|
|
742
|
+
// Record vision sensor outcome
|
|
743
|
+
if (this.learningEngine && this.activeAppContext) {
|
|
744
|
+
this.learningEngine.recordSensorOutcome({
|
|
745
|
+
bundleId: this.activeAppContext.bundleId,
|
|
746
|
+
sourceType: "vision",
|
|
747
|
+
success: !!diffEvent,
|
|
748
|
+
latencyMs: Date.now() - new Date(timestamp).getTime(),
|
|
749
|
+
});
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
catch {
|
|
753
|
+
// Vision source failed (bridge crash, timeout, etc.) — continue running
|
|
754
|
+
if (this.learningEngine && this.activeAppContext) {
|
|
755
|
+
this.learningEngine.recordSensorOutcome({
|
|
756
|
+
bundleId: this.activeAppContext.bundleId,
|
|
757
|
+
sourceType: "vision",
|
|
758
|
+
success: false,
|
|
759
|
+
latencyMs: Date.now() - new Date(timestamp).getTime(),
|
|
760
|
+
});
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
finally {
|
|
764
|
+
// Always increment stats, even on early return (windowId=0) or error
|
|
765
|
+
this.stats.slowCycles++;
|
|
766
|
+
this.stats.lastSlowAt = timestamp;
|
|
767
|
+
if (!this.config.skipCaptureLock)
|
|
768
|
+
releaseCaptureLock();
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
}
|