screenhand 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -109
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +5876 -0
- package/dist/scripts/codex-monitor-daemon.js +335 -0
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/supervisor-daemon.js +272 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/scripts/worker-daemon.js +228 -0
- package/dist/src/agent/cli.js +82 -0
- package/dist/src/agent/loop.js +274 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/{src/config.ts → dist/src/config.js} +5 -10
- package/dist/src/context-tracker.js +489 -0
- package/{src/index.ts → dist/src/index.js} +32 -52
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +305 -0
- package/dist/src/jobs/runner.js +806 -0
- package/dist/src/jobs/store.js +102 -0
- package/dist/src/jobs/types.js +30 -0
- package/dist/src/jobs/worker.js +97 -0
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +48 -0
- package/dist/src/mcp/mcp-stdio-server.js +464 -0
- package/dist/src/mcp/server.js +363 -0
- package/dist/src/mcp-entry.js +60 -0
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +222 -0
- package/dist/src/memory/research.js +104 -0
- package/dist/src/memory/seeds.js +101 -0
- package/dist/src/memory/service.js +446 -0
- package/dist/src/memory/session.js +169 -0
- package/dist/src/memory/store.js +451 -0
- package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
- package/dist/src/monitor/codex-monitor.js +382 -0
- package/dist/src/monitor/task-queue.js +97 -0
- package/dist/src/monitor/types.js +62 -0
- package/dist/src/native/bridge-client.js +412 -0
- package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +486 -0
- package/dist/src/playbook/index.js +20 -0
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +536 -0
- package/dist/src/playbook/runner.js +408 -0
- package/dist/src/playbook/store.js +312 -0
- package/dist/src/playbook/types.js +17 -0
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +430 -0
- package/dist/src/runtime/app-adapter.js +64 -0
- package/dist/src/runtime/applescript-adapter.js +305 -0
- package/dist/src/runtime/ax-role-map.js +96 -0
- package/dist/src/runtime/browser-adapter.js +52 -0
- package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
- package/dist/src/runtime/composite-adapter.js +221 -0
- package/dist/src/runtime/execution-contract.js +159 -0
- package/dist/src/runtime/executor.js +286 -0
- package/dist/src/runtime/locator-cache.js +50 -0
- package/dist/src/runtime/planning-loop.js +63 -0
- package/dist/src/runtime/service.js +432 -0
- package/dist/src/runtime/session-manager.js +63 -0
- package/dist/src/runtime/state-observer.js +121 -0
- package/dist/src/runtime/vision-adapter.js +225 -0
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/supervisor/locks.js +186 -0
- package/dist/src/supervisor/supervisor.js +403 -0
- package/dist/src/supervisor/types.js +30 -0
- package/dist/src/test-mcp-protocol.js +154 -0
- package/dist/src/types.js +17 -0
- package/dist/src/util/atomic-write.js +133 -0
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +1 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
- package/native/macos-bridge/Sources/AppManagement.swift +212 -2
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
- package/native/macos-bridge/Sources/main.swift +169 -16
- package/native/windows-bridge/Program.cs +5 -0
- package/native/windows-bridge/ScreenCapture.cs +124 -0
- package/package.json +29 -4
- package/scripts/postinstall.cjs +127 -0
- package/.claude/commands/automate.md +0 -28
- package/.claude/commands/debug-ui.md +0 -19
- package/.claude/commands/screenshot.md +0 -15
- package/.github/FUNDING.yml +0 -1
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
- package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
- package/.mcp.json +0 -8
- package/DESKTOP_MCP_GUIDE.md +0 -92
- package/SECURITY.md +0 -44
- package/docs/architecture.md +0 -47
- package/install-skills.sh +0 -19
- package/mcp-bridge.ts +0 -271
- package/mcp-desktop.ts +0 -1221
- package/playbooks/instagram.json +0 -41
- package/playbooks/instagram_v2.json +0 -201
- package/playbooks/x_v1.json +0 -211
- package/scripts/devpost-live-loop.mjs +0 -421
- package/src/logging/timeline-logger.ts +0 -55
- package/src/mcp/server.ts +0 -449
- package/src/memory/recall.ts +0 -191
- package/src/memory/research.ts +0 -146
- package/src/memory/seeds.ts +0 -123
- package/src/memory/session.ts +0 -201
- package/src/memory/store.ts +0 -434
- package/src/memory/types.ts +0 -69
- package/src/native/bridge-client.ts +0 -239
- package/src/runtime/accessibility-adapter.ts +0 -487
- package/src/runtime/app-adapter.ts +0 -169
- package/src/runtime/applescript-adapter.ts +0 -376
- package/src/runtime/ax-role-map.ts +0 -102
- package/src/runtime/browser-adapter.ts +0 -129
- package/src/runtime/cdp-chrome-adapter.ts +0 -676
- package/src/runtime/composite-adapter.ts +0 -274
- package/src/runtime/executor.ts +0 -396
- package/src/runtime/planning-loop.ts +0 -81
- package/src/runtime/service.ts +0 -448
- package/src/runtime/session-manager.ts +0 -50
- package/src/runtime/state-observer.ts +0 -136
- package/src/runtime/vision-adapter.ts +0 -297
- package/src/types.ts +0 -297
- package/tests/bridge-client.test.ts +0 -176
- package/tests/browser-stealth.test.ts +0 -210
- package/tests/composite-adapter.test.ts +0 -64
- package/tests/mcp-server.test.ts +0 -151
- package/tests/memory-recall.test.ts +0 -339
- package/tests/memory-research.test.ts +0 -159
- package/tests/memory-seeds.test.ts +0 -120
- package/tests/memory-store.test.ts +0 -392
- package/tests/types.test.ts +0 -92
- package/tsconfig.check.json +0 -17
- package/tsconfig.json +0 -19
- package/vitest.config.ts +0 -8
- /package/{playbooks → dist-references}/devpost.json +0 -0
|
@@ -0,0 +1,1456 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import crypto from "node:crypto";
|
|
18
|
+
import fs from "node:fs";
|
|
19
|
+
import path from "node:path";
|
|
20
|
+
import { EntityTracker } from "./entity-tracker.js";
|
|
21
|
+
import { loadWorldState, saveWorldState, DebouncedPersister } from "./persistence.js";
|
|
22
|
+
function validateSchemaType(value, type) {
|
|
23
|
+
switch (type) {
|
|
24
|
+
case "string": return typeof value === "string";
|
|
25
|
+
case "number": return typeof value === "number";
|
|
26
|
+
case "boolean": return typeof value === "boolean";
|
|
27
|
+
case "array": return Array.isArray(value);
|
|
28
|
+
case "object": return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
29
|
+
default: return true;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
const DEFAULT_CONFIG = {
|
|
33
|
+
confidenceDecayRate: 0.05,
|
|
34
|
+
staleThresholdMs: 5 * 60 * 1000,
|
|
35
|
+
maxControlsPerWindow: 500,
|
|
36
|
+
persistDebounceMs: 500,
|
|
37
|
+
};
|
|
38
|
+
const DIALOG_ROLES = new Set(["sheet", "dialog", "alert", "popover", "modal"]);
|
|
39
|
+
const MAX_STRING_LENGTH = 1000;
|
|
40
|
+
const MAX_WALK_DEPTH = 50;
|
|
41
|
+
const ALLOWED_URL_PROTOCOLS = new Set(["http:", "https:", "about:", "chrome:", "chrome-extension:"]);
|
|
42
|
+
/**
|
|
43
|
+
* Sanitize untrusted strings from AX/OCR/CDP sources:
|
|
44
|
+
* 1. Truncate to MAX_STRING_LENGTH chars
|
|
45
|
+
* 2. Strip control characters (\x00-\x1F except \t \n \r) and DEL (\x7F)
|
|
46
|
+
*/
|
|
47
|
+
function sanitizeString(s) {
|
|
48
|
+
// eslint-disable-next-line no-control-regex
|
|
49
|
+
const stripped = s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
|
|
50
|
+
return stripped.length > MAX_STRING_LENGTH
|
|
51
|
+
? stripped.slice(0, MAX_STRING_LENGTH)
|
|
52
|
+
: stripped;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Validate a URL protocol. Returns the URL unchanged if allowed,
|
|
56
|
+
* or "about:blocked" if the protocol is disallowed.
|
|
57
|
+
* Also redacts sensitive query parameters (tokens, codes, passwords, keys).
|
|
58
|
+
*/
|
|
59
|
+
const SENSITIVE_URL_PARAMS = new Set([
|
|
60
|
+
"code", "token", "access_token", "refresh_token", "id_token",
|
|
61
|
+
"secret", "password", "key", "api_key", "apikey", "auth",
|
|
62
|
+
"session", "session_id", "sessionid", "state", "nonce",
|
|
63
|
+
]);
|
|
64
|
+
function sanitizeUrl(url) {
|
|
65
|
+
try {
|
|
66
|
+
const parsed = new URL(url);
|
|
67
|
+
if (!ALLOWED_URL_PROTOCOLS.has(parsed.protocol))
|
|
68
|
+
return "about:blocked";
|
|
69
|
+
// Redact sensitive query params
|
|
70
|
+
let redacted = false;
|
|
71
|
+
for (const paramName of parsed.searchParams.keys()) {
|
|
72
|
+
if (SENSITIVE_URL_PARAMS.has(paramName.toLowerCase())) {
|
|
73
|
+
parsed.searchParams.set(paramName, "[REDACTED]");
|
|
74
|
+
redacted = true;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return redacted ? parsed.toString() : url;
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
// Malformed URL — block it
|
|
81
|
+
}
|
|
82
|
+
return "about:blocked";
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Redact sensitive patterns from labels/titles before storing in world model.
|
|
86
|
+
* Catches: email:password combos, standalone passwords, API keys, tokens.
|
|
87
|
+
*/
|
|
88
|
+
const SENSITIVE_LABEL_PATTERNS = [
|
|
89
|
+
// email:password in window titles (e.g. "user@example.com:P@ssw0rd! - Chrome")
|
|
90
|
+
[/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+:[^\s]+/g, "[CREDENTIALS_REDACTED]"],
|
|
91
|
+
// Bearer tokens
|
|
92
|
+
[/Bearer\s+[A-Za-z0-9\-._~+/]+=*/g, "[BEARER_REDACTED]"],
|
|
93
|
+
];
|
|
94
|
+
/**
|
|
95
|
+
* Check if a string looks like a token/key (mixed case, digits, special chars).
|
|
96
|
+
* Filters out simple repeated chars or plain words.
|
|
97
|
+
*/
|
|
98
|
+
function looksLikeToken(s) {
|
|
99
|
+
if (s.length < 32)
|
|
100
|
+
return false;
|
|
101
|
+
const hasUpper = /[A-Z]/.test(s);
|
|
102
|
+
const hasLower = /[a-z]/.test(s);
|
|
103
|
+
const hasDigit = /[0-9]/.test(s);
|
|
104
|
+
const hasSpecial = /[\-._~+/]/.test(s);
|
|
105
|
+
// Tokens typically have at least 3 of these 4 character classes
|
|
106
|
+
const classes = [hasUpper, hasLower, hasDigit, hasSpecial].filter(Boolean).length;
|
|
107
|
+
return classes >= 3;
|
|
108
|
+
}
|
|
109
|
+
function redactSensitiveLabel(label) {
|
|
110
|
+
let result = label;
|
|
111
|
+
for (const [pattern, replacement] of SENSITIVE_LABEL_PATTERNS) {
|
|
112
|
+
result = result.replace(pattern, replacement);
|
|
113
|
+
}
|
|
114
|
+
// Redact long token-like strings (mixed case + digits + special, 32+ chars)
|
|
115
|
+
result = result.replace(/[A-Za-z0-9\-._~+/]{32,}={0,2}/g, (match) => looksLikeToken(match) ? "[TOKEN_REDACTED]" : match);
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
const BUNDLE_FAMILY_MAP = [
|
|
119
|
+
[/^com\.blackmagic-design\.DaVinciResolve/, "video_editor"],
|
|
120
|
+
[/^com\.adobe\.Premiere/, "video_editor"],
|
|
121
|
+
[/^com\.apple\.FinalCut/, "video_editor"],
|
|
122
|
+
[/^com\.adobe\.Photoshop/, "image_editor"],
|
|
123
|
+
[/^com\.adobe\.Illustrator/, "image_editor"],
|
|
124
|
+
[/^com\.figma\.Desktop$/, "design_tool"],
|
|
125
|
+
[/^com\.apple\.Safari$/, "browser"],
|
|
126
|
+
[/^com\.google\.Chrome/, "browser"],
|
|
127
|
+
[/^org\.mozilla\.firefox$/, "browser"],
|
|
128
|
+
[/^com\.microsoft\.edgemac$/, "browser"],
|
|
129
|
+
];
|
|
130
|
+
/**
|
|
131
|
+
* Normalize AX role names: strip "AX" prefix and lowercase first char.
|
|
132
|
+
* e.g. "AXRadioButton" → "radioButton", "AXWindow" → "window", "button" → "button"
|
|
133
|
+
*/
|
|
134
|
+
function normalizeRole(raw) {
|
|
135
|
+
if (raw.startsWith("AX") && raw.length > 2) {
|
|
136
|
+
return raw[2].toLowerCase() + raw.slice(3);
|
|
137
|
+
}
|
|
138
|
+
return raw;
|
|
139
|
+
}
|
|
140
|
+
function computeStableId(role, label, x, y) {
|
|
141
|
+
const qx = isNaN(x) ? 0 : Math.floor(x / 50) * 50;
|
|
142
|
+
const qy = isNaN(y) ? 0 : Math.floor(y / 50) * 50;
|
|
143
|
+
const input = `${role}|${label}|${qx},${qy}`;
|
|
144
|
+
return crypto.createHash("sha256").update(input).digest("hex").slice(0, 12);
|
|
145
|
+
}
|
|
146
|
+
function now() {
|
|
147
|
+
return new Date().toISOString();
|
|
148
|
+
}
|
|
149
|
+
function tracked(value, stableId) {
|
|
150
|
+
const t = { value, confidence: 1.0, updatedAt: now() };
|
|
151
|
+
if (stableId !== undefined)
|
|
152
|
+
t.stableId = stableId;
|
|
153
|
+
return t;
|
|
154
|
+
}
|
|
155
|
+
function applyDecay(t, decayRate) {
|
|
156
|
+
const elapsedMs = Date.now() - new Date(t.updatedAt).getTime();
|
|
157
|
+
const elapsedMinutes = elapsedMs / 60_000;
|
|
158
|
+
const decayed = t.confidence * Math.exp(-decayRate * elapsedMinutes);
|
|
159
|
+
return { ...t, confidence: decayed };
|
|
160
|
+
}
|
|
161
|
+
function detectFamily(bundleId) {
|
|
162
|
+
for (const [pattern, family] of BUNDLE_FAMILY_MAP) {
|
|
163
|
+
if (pattern.test(bundleId))
|
|
164
|
+
return family;
|
|
165
|
+
}
|
|
166
|
+
return "generic";
|
|
167
|
+
}
|
|
168
|
+
function createDefaultDomainState(family) {
|
|
169
|
+
switch (family) {
|
|
170
|
+
case "video_editor":
|
|
171
|
+
return {
|
|
172
|
+
family: "video_editor",
|
|
173
|
+
timeline: null,
|
|
174
|
+
activeTrack: null,
|
|
175
|
+
playbackState: tracked("stopped"),
|
|
176
|
+
playheadPosition: null,
|
|
177
|
+
selectedClips: tracked([]),
|
|
178
|
+
activeSequence: null,
|
|
179
|
+
activePage: null,
|
|
180
|
+
activeTool: null,
|
|
181
|
+
renderStatus: null,
|
|
182
|
+
mediaOffline: tracked(false),
|
|
183
|
+
};
|
|
184
|
+
case "image_editor":
|
|
185
|
+
return {
|
|
186
|
+
family: "image_editor",
|
|
187
|
+
canvasSize: null,
|
|
188
|
+
activeTool: null,
|
|
189
|
+
activeLayer: null,
|
|
190
|
+
zoom: tracked(1.0),
|
|
191
|
+
layerCount: tracked(0),
|
|
192
|
+
selectedLayers: tracked([]),
|
|
193
|
+
documentSize: null,
|
|
194
|
+
};
|
|
195
|
+
case "design_tool":
|
|
196
|
+
return {
|
|
197
|
+
family: "design_tool",
|
|
198
|
+
activePage: null,
|
|
199
|
+
selectedElements: tracked([]),
|
|
200
|
+
zoom: tracked(1.0),
|
|
201
|
+
activeTool: null,
|
|
202
|
+
sidebarPanel: null,
|
|
203
|
+
canvasSize: null,
|
|
204
|
+
};
|
|
205
|
+
case "browser":
|
|
206
|
+
return {
|
|
207
|
+
family: "browser",
|
|
208
|
+
url: null,
|
|
209
|
+
title: null,
|
|
210
|
+
};
|
|
211
|
+
case "generic":
|
|
212
|
+
return { family: "generic" };
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
function createEmptyState(sessionId) {
|
|
216
|
+
return {
|
|
217
|
+
windows: new Map(),
|
|
218
|
+
focusedWindowId: null,
|
|
219
|
+
focusedApp: null,
|
|
220
|
+
activeDialogs: [],
|
|
221
|
+
appDomains: new Map(),
|
|
222
|
+
lastFullScan: now(),
|
|
223
|
+
sessionId,
|
|
224
|
+
expectedPostcondition: null,
|
|
225
|
+
updatedAt: now(),
|
|
226
|
+
confidence: 1.0,
|
|
227
|
+
pendingGoal: null,
|
|
228
|
+
recentTransitions: [],
|
|
229
|
+
trackedEntities: new Map(),
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
export class WorldModel {
|
|
233
|
+
state;
|
|
234
|
+
config;
|
|
235
|
+
persister;
|
|
236
|
+
domainSchemaCache = new Map();
|
|
237
|
+
decayTimer = null;
|
|
238
|
+
entityTracker = new EntityTracker();
|
|
239
|
+
constructor(config) {
|
|
240
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
241
|
+
const stateDir = this.config.stateDir;
|
|
242
|
+
this.persister = new DebouncedPersister(this.config.persistDebounceMs, stateDir ? (s) => saveWorldState(s, stateDir) : undefined);
|
|
243
|
+
this.state = createEmptyState("");
|
|
244
|
+
}
|
|
245
|
+
init(sessionId) {
|
|
246
|
+
// Full reset: clear all in-memory state before loading to prevent cross-session bleed
|
|
247
|
+
this.state.windows.clear();
|
|
248
|
+
this.state.focusedApp = null;
|
|
249
|
+
this.state.focusedWindowId = null;
|
|
250
|
+
this.state.activeDialogs = [];
|
|
251
|
+
this.state.appDomains.clear();
|
|
252
|
+
this.state.pendingGoal = null;
|
|
253
|
+
this.state.expectedPostcondition = null;
|
|
254
|
+
this.state.recentTransitions = [];
|
|
255
|
+
this.state.trackedEntities = new Map();
|
|
256
|
+
this.entityTracker.clear();
|
|
257
|
+
const loaded = loadWorldState(sessionId, this.config.stateDir);
|
|
258
|
+
this.state = loaded ?? createEmptyState(sessionId);
|
|
259
|
+
// Rehydrate entity tracker from persisted state
|
|
260
|
+
if (this.state.trackedEntities.size > 0) {
|
|
261
|
+
this.entityTracker.rehydrate(this.state.trackedEntities);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Merge an incoming control with an existing one using source confidence.
|
|
266
|
+
* Higher-confidence sources win unless the existing data is very recent (<5s).
|
|
267
|
+
*/
|
|
268
|
+
mergeControl(existing, incoming) {
|
|
269
|
+
if (!existing)
|
|
270
|
+
return incoming;
|
|
271
|
+
const existingConf = existing.sourceConfidence ?? 0;
|
|
272
|
+
const incomingConf = incoming.sourceConfidence ?? 0;
|
|
273
|
+
const existingAge = existing.lastSeenAt
|
|
274
|
+
? Date.now() - new Date(existing.lastSeenAt).getTime()
|
|
275
|
+
: Infinity;
|
|
276
|
+
// Keep existing if it has higher confidence AND is recent (<5s)
|
|
277
|
+
if (existingConf > incomingConf && existingAge < 5000) {
|
|
278
|
+
return existing;
|
|
279
|
+
}
|
|
280
|
+
return incoming;
|
|
281
|
+
}
|
|
282
|
+
ingestAXTree(windowId, tree, appContext, sourceConfidence = 0.9) {
|
|
283
|
+
const snap = this.takeSnapshot();
|
|
284
|
+
const controls = new Map();
|
|
285
|
+
let count = 0;
|
|
286
|
+
const max = this.config.maxControlsPerWindow;
|
|
287
|
+
const existing = this.state.windows.get(windowId);
|
|
288
|
+
const existingControls = existing?.controls ?? new Map();
|
|
289
|
+
const walk = (node, depth = 0) => {
|
|
290
|
+
if (count >= max)
|
|
291
|
+
return;
|
|
292
|
+
if (depth > MAX_WALK_DEPTH)
|
|
293
|
+
return;
|
|
294
|
+
if (!node.role) {
|
|
295
|
+
// Skip decorative nodes but walk children
|
|
296
|
+
if (node.children) {
|
|
297
|
+
for (const child of node.children)
|
|
298
|
+
walk(child, depth + 1);
|
|
299
|
+
}
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
// Normalize AX roles: "AXButton" → "button", "AXRadioButton" → "radioButton"
|
|
303
|
+
const role = normalizeRole(node.role);
|
|
304
|
+
const rawLabel = node.title ?? node.description ?? "";
|
|
305
|
+
const label = sanitizeString(rawLabel);
|
|
306
|
+
const x = node.position?.x ?? 0;
|
|
307
|
+
const y = node.position?.y ?? 0;
|
|
308
|
+
// Skip off-screen/hidden menu items with zero size — they pollute the world model
|
|
309
|
+
// with meaningless coordinates and inflate control counts
|
|
310
|
+
const w = node.size?.width ?? 0;
|
|
311
|
+
const h = node.size?.height ?? 0;
|
|
312
|
+
if (role === "menuItem" && w === 0 && h === 0) {
|
|
313
|
+
// Still walk children (submenus may have real geometry)
|
|
314
|
+
if (node.children) {
|
|
315
|
+
for (const child of node.children)
|
|
316
|
+
walk(child, depth + 1);
|
|
317
|
+
}
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
const sid = computeStableId(role, label, x, y);
|
|
321
|
+
const prev = existingControls.get(sid);
|
|
322
|
+
const control = {
|
|
323
|
+
stableId: sid,
|
|
324
|
+
role,
|
|
325
|
+
label: prev?.label && prev.label.value === label
|
|
326
|
+
? prev.label
|
|
327
|
+
: tracked(label, sid),
|
|
328
|
+
value: tracked(node.value != null ? sanitizeString(node.value) : null, sid),
|
|
329
|
+
enabled: tracked(node.enabled ?? true, sid),
|
|
330
|
+
focused: node.focused ?? false,
|
|
331
|
+
position: { x, y },
|
|
332
|
+
size: {
|
|
333
|
+
width: node.size?.width ?? 0,
|
|
334
|
+
height: node.size?.height ?? 0,
|
|
335
|
+
},
|
|
336
|
+
source: "ax",
|
|
337
|
+
sourceConfidence,
|
|
338
|
+
lastSeenAt: now(),
|
|
339
|
+
};
|
|
340
|
+
// Detect dialogs — do NOT add dialog root to window controls
|
|
341
|
+
if (DIALOG_ROLES.has(role)) {
|
|
342
|
+
const dialogType = (role === "modal" || role === "dialog" ? "modal" : role);
|
|
343
|
+
const dialogControls = new Map();
|
|
344
|
+
// Flatten dialog children into its controls
|
|
345
|
+
if (node.children) {
|
|
346
|
+
for (const child of node.children) {
|
|
347
|
+
if (!child.role)
|
|
348
|
+
continue;
|
|
349
|
+
const childRole = normalizeRole(child.role);
|
|
350
|
+
const cl = sanitizeString(child.title ?? child.description ?? "");
|
|
351
|
+
const cx = child.position?.x ?? 0;
|
|
352
|
+
const cy = child.position?.y ?? 0;
|
|
353
|
+
const csid = computeStableId(childRole, cl, cx, cy);
|
|
354
|
+
dialogControls.set(csid, {
|
|
355
|
+
stableId: csid,
|
|
356
|
+
role: childRole,
|
|
357
|
+
label: tracked(cl, csid),
|
|
358
|
+
value: tracked(child.value != null ? sanitizeString(child.value) : null, csid),
|
|
359
|
+
enabled: tracked(child.enabled ?? true, csid),
|
|
360
|
+
focused: child.focused ?? false,
|
|
361
|
+
position: { x: cx, y: cy },
|
|
362
|
+
size: {
|
|
363
|
+
width: child.size?.width ?? 0,
|
|
364
|
+
height: child.size?.height ?? 0,
|
|
365
|
+
},
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// Extract button labels and message from dialog children
|
|
370
|
+
const buttons = [];
|
|
371
|
+
let message = null;
|
|
372
|
+
for (const ctrl of dialogControls.values()) {
|
|
373
|
+
if (ctrl.role === "button" && ctrl.label.value) {
|
|
374
|
+
buttons.push(ctrl.label.value);
|
|
375
|
+
}
|
|
376
|
+
if ((ctrl.role === "staticText" || ctrl.role === "text") && ctrl.label.value && ctrl.label.value.length > 10) {
|
|
377
|
+
message = ctrl.label.value;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
// Detect special dialog types from title/message (only for generic modal/alert)
|
|
381
|
+
let detectedType = dialogType;
|
|
382
|
+
if (dialogType === "modal" || dialogType === "alert") {
|
|
383
|
+
const lowerLabel = label.toLowerCase();
|
|
384
|
+
if (lowerLabel.includes("save") || lowerLabel.includes("unsaved")) {
|
|
385
|
+
detectedType = "save";
|
|
386
|
+
}
|
|
387
|
+
else if (lowerLabel.includes("permission") || lowerLabel.includes("allow") || lowerLabel.includes("access")) {
|
|
388
|
+
detectedType = "permission";
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
this.state.activeDialogs.push({
|
|
392
|
+
type: detectedType,
|
|
393
|
+
title: label,
|
|
394
|
+
windowId,
|
|
395
|
+
controls: dialogControls,
|
|
396
|
+
detectedAt: now(),
|
|
397
|
+
message,
|
|
398
|
+
buttons,
|
|
399
|
+
source: "ax",
|
|
400
|
+
});
|
|
401
|
+
// Don't add dialog root or its children as regular window controls
|
|
402
|
+
return;
|
|
403
|
+
}
|
|
404
|
+
controls.set(sid, control);
|
|
405
|
+
count++;
|
|
406
|
+
if (node.children) {
|
|
407
|
+
for (const child of node.children)
|
|
408
|
+
walk(child, depth + 1);
|
|
409
|
+
}
|
|
410
|
+
};
|
|
411
|
+
// Clear existing dialogs for this window before re-ingesting
|
|
412
|
+
this.state.activeDialogs = this.state.activeDialogs.filter((d) => d.windowId !== windowId);
|
|
413
|
+
walk(tree);
|
|
414
|
+
// Entity tracking: match/create entities for panels, toolbars, tabs
|
|
415
|
+
const ENTITY_ROLES = new Set(["toolbar", "tabGroup", "group", "splitGroup"]);
|
|
416
|
+
for (const ctrl of controls.values()) {
|
|
417
|
+
if (ENTITY_ROLES.has(ctrl.role) && ctrl.label.value) {
|
|
418
|
+
const entityType = ctrl.role === "tabGroup" ? "tab" : "panel";
|
|
419
|
+
this.entityTracker.matchOrCreate(entityType, redactSensitiveLabel(ctrl.label.value), ctrl.position, windowId);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
this.entityTracker.pruneStale(60_000);
|
|
423
|
+
this.state.trackedEntities = this.entityTracker.getAll();
|
|
424
|
+
// Find focused element and interactive controls
|
|
425
|
+
let focusedElement = null;
|
|
426
|
+
const visibleControls = [];
|
|
427
|
+
const INTERACTIVE_ROLES = new Set(["button", "checkbox", "radioButton", "textField", "slider", "popUpButton", "menuItem", "link", "tab", "incrementor", "comboBox"]);
|
|
428
|
+
for (const ctrl of controls.values()) {
|
|
429
|
+
if (ctrl.focused)
|
|
430
|
+
focusedElement = ctrl;
|
|
431
|
+
if (INTERACTIVE_ROLES.has(ctrl.role))
|
|
432
|
+
visibleControls.push(ctrl);
|
|
433
|
+
}
|
|
434
|
+
// Collect dialogs for this window from activeDialogs
|
|
435
|
+
const dialogStack = this.state.activeDialogs.filter((d) => d.windowId === windowId);
|
|
436
|
+
// Derive window title: prefer appContext, fall back to AX tree root node title
|
|
437
|
+
let windowTitle = appContext.windowTitle;
|
|
438
|
+
if (!windowTitle && tree.role) {
|
|
439
|
+
const rootRole = normalizeRole(tree.role);
|
|
440
|
+
if (rootRole === "window" || rootRole === "application") {
|
|
441
|
+
windowTitle = tree.title ?? "";
|
|
442
|
+
}
|
|
443
|
+
// Also check first window child if root is application
|
|
444
|
+
if (!windowTitle && tree.children) {
|
|
445
|
+
for (const child of tree.children) {
|
|
446
|
+
if (child.role && normalizeRole(child.role) === "window" && child.title) {
|
|
447
|
+
windowTitle = child.title;
|
|
448
|
+
break;
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
const winState = {
|
|
454
|
+
windowId,
|
|
455
|
+
title: tracked(redactSensitiveLabel(sanitizeString(windowTitle || existing?.title.value || ""))),
|
|
456
|
+
bundleId: appContext.bundleId,
|
|
457
|
+
pid: appContext.pid,
|
|
458
|
+
bounds: existing?.bounds ?? tracked({ x: 0, y: 0, width: 0, height: 0 }),
|
|
459
|
+
controls,
|
|
460
|
+
isOnScreen: true,
|
|
461
|
+
focusedElement,
|
|
462
|
+
visibleControls,
|
|
463
|
+
dialogStack,
|
|
464
|
+
scrollPosition: existing?.scrollPosition ?? null,
|
|
465
|
+
lastAXScanAt: now(),
|
|
466
|
+
lastCDPScanAt: existing?.lastCDPScanAt ?? null,
|
|
467
|
+
lastOCRAt: existing?.lastOCRAt ?? null,
|
|
468
|
+
lastScreenshotHash: existing?.lastScreenshotHash ?? null,
|
|
469
|
+
};
|
|
470
|
+
this.state.windows.set(windowId, winState);
|
|
471
|
+
this.state.lastFullScan = now();
|
|
472
|
+
this.state.updatedAt = now();
|
|
473
|
+
// Auto-set focusedWindowId if unset or if this window belongs to the focused app
|
|
474
|
+
if (this.state.focusedWindowId === null ||
|
|
475
|
+
(this.state.focusedApp && this.state.focusedApp.bundleId === appContext.bundleId)) {
|
|
476
|
+
this.state.focusedWindowId = windowId;
|
|
477
|
+
}
|
|
478
|
+
// Update focusedApp.pid if it was 0 (set by feedWorldModel) but we now have the real pid
|
|
479
|
+
if (this.state.focusedApp && this.state.focusedApp.bundleId === appContext.bundleId &&
|
|
480
|
+
this.state.focusedApp.pid === 0 && appContext.pid > 0) {
|
|
481
|
+
this.state.focusedApp.pid = appContext.pid;
|
|
482
|
+
}
|
|
483
|
+
// Ensure app domain state exists
|
|
484
|
+
if (!this.state.appDomains.has(appContext.bundleId)) {
|
|
485
|
+
const family = detectFamily(appContext.bundleId);
|
|
486
|
+
this.state.appDomains.set(appContext.bundleId, createDefaultDomainState(family));
|
|
487
|
+
}
|
|
488
|
+
this.recordTransitions(snap, "ax");
|
|
489
|
+
this.schedulePersist();
|
|
490
|
+
}
|
|
491
|
+
ingestUIEvents(events) {
|
|
492
|
+
const snap = this.takeSnapshot();
|
|
493
|
+
for (const event of events) {
|
|
494
|
+
switch (event.type) {
|
|
495
|
+
case "value_changed": {
|
|
496
|
+
if (event.elementRole && event.elementLabel) {
|
|
497
|
+
const control = this.findControlByRoleLabel(event.elementRole, event.elementLabel);
|
|
498
|
+
if (control) {
|
|
499
|
+
control.value = tracked(event.newValue ?? null, control.stableId);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
break;
|
|
503
|
+
}
|
|
504
|
+
case "focus_changed": {
|
|
505
|
+
if (event.elementRole && event.elementLabel) {
|
|
506
|
+
const control = this.findControlByRoleLabel(event.elementRole, event.elementLabel);
|
|
507
|
+
if (control) {
|
|
508
|
+
control.focused = true;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
break;
|
|
512
|
+
}
|
|
513
|
+
case "dialog_appeared": {
|
|
514
|
+
const dialogTitle = event.windowTitle ?? "";
|
|
515
|
+
// Dedup: skip if a dialog with the same title already exists
|
|
516
|
+
const alreadyExists = this.state.activeDialogs.some((d) => d.title === dialogTitle);
|
|
517
|
+
if (!alreadyExists) {
|
|
518
|
+
this.state.activeDialogs.push({
|
|
519
|
+
type: "modal",
|
|
520
|
+
title: dialogTitle,
|
|
521
|
+
windowId: 0,
|
|
522
|
+
controls: new Map(),
|
|
523
|
+
detectedAt: now(),
|
|
524
|
+
message: null,
|
|
525
|
+
buttons: [],
|
|
526
|
+
source: "observer",
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
break;
|
|
530
|
+
}
|
|
531
|
+
case "window_closed": {
|
|
532
|
+
// Collect IDs first to avoid Map mutation during iteration
|
|
533
|
+
const toDelete = [];
|
|
534
|
+
for (const [id, win] of this.state.windows) {
|
|
535
|
+
if (win.pid === event.pid) {
|
|
536
|
+
toDelete.push(id);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
for (const id of toDelete) {
|
|
540
|
+
this.state.windows.delete(id);
|
|
541
|
+
}
|
|
542
|
+
// Purge orphaned dialogs for deleted windows
|
|
543
|
+
if (toDelete.length > 0) {
|
|
544
|
+
const deletedIds = new Set(toDelete);
|
|
545
|
+
this.state.activeDialogs = this.state.activeDialogs.filter((d) => !deletedIds.has(d.windowId));
|
|
546
|
+
}
|
|
547
|
+
break;
|
|
548
|
+
}
|
|
549
|
+
case "title_changed": {
|
|
550
|
+
// Update window title for any window matching this pid
|
|
551
|
+
for (const win of this.state.windows.values()) {
|
|
552
|
+
if (win.pid === event.pid && event.newValue) {
|
|
553
|
+
win.title = tracked(event.newValue, `win_${win.windowId}`);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
break;
|
|
557
|
+
}
|
|
558
|
+
case "app_activated": {
|
|
559
|
+
// Update focused app from observer event
|
|
560
|
+
if (event.bundleId && event.pid) {
|
|
561
|
+
this.state.focusedApp = {
|
|
562
|
+
bundleId: event.bundleId,
|
|
563
|
+
appName: event.bundleId,
|
|
564
|
+
pid: event.pid,
|
|
565
|
+
};
|
|
566
|
+
// Set focusedWindowId to first window matching this pid
|
|
567
|
+
for (const [id, win] of this.state.windows) {
|
|
568
|
+
if (win.pid === event.pid) {
|
|
569
|
+
this.state.focusedWindowId = id;
|
|
570
|
+
break;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
case "window_created": {
|
|
577
|
+
// Mark that a new window appeared — will be populated on next AX scan
|
|
578
|
+
// For now just ensure focusedWindowId is set if it was null
|
|
579
|
+
if (this.state.focusedWindowId === null && event.pid) {
|
|
580
|
+
for (const [id, win] of this.state.windows) {
|
|
581
|
+
if (win.pid === event.pid) {
|
|
582
|
+
this.state.focusedWindowId = id;
|
|
583
|
+
break;
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
break;
|
|
588
|
+
}
|
|
589
|
+
case "app_deactivated": {
|
|
590
|
+
if (event.bundleId && this.state.focusedApp?.bundleId === event.bundleId) {
|
|
591
|
+
this.state.focusedApp = null;
|
|
592
|
+
this.state.focusedWindowId = null;
|
|
593
|
+
}
|
|
594
|
+
break;
|
|
595
|
+
}
|
|
596
|
+
case "layout_changed":
|
|
597
|
+
case "menu_opened":
|
|
598
|
+
// These don't need world model updates — they signal
|
|
599
|
+
// that a fresh AX scan would be useful (handled by perception)
|
|
600
|
+
break;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
this.state.updatedAt = now();
|
|
604
|
+
this.recordTransitions(snap, "ui_event");
|
|
605
|
+
this.schedulePersist();
|
|
606
|
+
}
|
|
607
|
+
updateFocusedApp(appContext) {
|
|
608
|
+
const prevBundleId = this.state.focusedApp?.bundleId;
|
|
609
|
+
this.state.focusedApp = {
|
|
610
|
+
bundleId: appContext.bundleId,
|
|
611
|
+
appName: appContext.appName,
|
|
612
|
+
pid: appContext.pid,
|
|
613
|
+
};
|
|
614
|
+
this.state.focusedWindowId = appContext.windowId ?? null;
|
|
615
|
+
// Prune windows from the previous app to prevent stale accumulation
|
|
616
|
+
// across app switches. Keep windows from the new focused app and any
|
|
617
|
+
// windows seen in the last 30 seconds (to handle multi-window workflows).
|
|
618
|
+
if (prevBundleId && prevBundleId !== appContext.bundleId) {
|
|
619
|
+
const STALE_WINDOW_MS = 30_000;
|
|
620
|
+
const cutoff = Date.now() - STALE_WINDOW_MS;
|
|
621
|
+
const toDelete = [];
|
|
622
|
+
for (const [id, win] of this.state.windows) {
|
|
623
|
+
if (win.bundleId === appContext.bundleId)
|
|
624
|
+
continue; // keep new app's windows
|
|
625
|
+
const lastScan = win.lastAXScanAt ? new Date(win.lastAXScanAt).getTime() : 0;
|
|
626
|
+
if (lastScan < cutoff) {
|
|
627
|
+
toDelete.push(id);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
for (const id of toDelete) {
|
|
631
|
+
this.state.windows.delete(id);
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
// Ensure app domain
|
|
635
|
+
if (!this.state.appDomains.has(appContext.bundleId)) {
|
|
636
|
+
const family = detectFamily(appContext.bundleId);
|
|
637
|
+
this.state.appDomains.set(appContext.bundleId, createDefaultDomainState(family));
|
|
638
|
+
}
|
|
639
|
+
this.state.updatedAt = now();
|
|
640
|
+
this.schedulePersist();
|
|
641
|
+
}
|
|
642
|
+
/** Set/clear the pending goal (used by planner). */
|
|
643
|
+
setPendingGoal(goal) {
|
|
644
|
+
this.state.pendingGoal = goal;
|
|
645
|
+
this.state.updatedAt = now();
|
|
646
|
+
}
|
|
647
|
+
getWindowState(windowId) {
|
|
648
|
+
const win = this.state.windows.get(windowId);
|
|
649
|
+
if (!win)
|
|
650
|
+
return null;
|
|
651
|
+
return {
|
|
652
|
+
...win,
|
|
653
|
+
title: applyDecay(win.title, this.config.confidenceDecayRate),
|
|
654
|
+
bounds: applyDecay(win.bounds, this.config.confidenceDecayRate),
|
|
655
|
+
};
|
|
656
|
+
}
|
|
657
|
+
getFocusedWindow() {
|
|
658
|
+
if (this.state.focusedWindowId === null)
|
|
659
|
+
return null;
|
|
660
|
+
return this.getWindowState(this.state.focusedWindowId);
|
|
661
|
+
}
|
|
662
|
+
getControl(stableId) {
|
|
663
|
+
for (const win of this.state.windows.values()) {
|
|
664
|
+
const control = win.controls.get(stableId);
|
|
665
|
+
if (control) {
|
|
666
|
+
return {
|
|
667
|
+
...control,
|
|
668
|
+
label: applyDecay(control.label, this.config.confidenceDecayRate),
|
|
669
|
+
value: applyDecay(control.value, this.config.confidenceDecayRate),
|
|
670
|
+
enabled: applyDecay(control.enabled, this.config.confidenceDecayRate),
|
|
671
|
+
};
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
return null;
|
|
675
|
+
}
|
|
676
|
+
getActiveDialogs() {
|
|
677
|
+
return this.state.activeDialogs;
|
|
678
|
+
}
|
|
679
|
+
getAppDomain(bundleId) {
|
|
680
|
+
return this.state.appDomains.get(bundleId) ?? null;
|
|
681
|
+
}
|
|
682
|
+
/**
|
|
683
|
+
* Load domain schema from a reference file matching the given bundleId.
|
|
684
|
+
* Scans references/ directory for JSON files with matching bundleId,
|
|
685
|
+
* extracts `domainSchema` key if present, and caches it.
|
|
686
|
+
*/
|
|
687
|
+
loadDomainSchema(bundleId) {
|
|
688
|
+
if (this.domainSchemaCache.has(bundleId)) {
|
|
689
|
+
return this.domainSchemaCache.get(bundleId) ?? null;
|
|
690
|
+
}
|
|
691
|
+
const refsDir = this.config.referencesDir ?? path.join(process.cwd(), "references");
|
|
692
|
+
let schema = null;
|
|
693
|
+
try {
|
|
694
|
+
const files = fs.readdirSync(refsDir);
|
|
695
|
+
for (const file of files) {
|
|
696
|
+
if (!file.endsWith(".json"))
|
|
697
|
+
continue;
|
|
698
|
+
try {
|
|
699
|
+
const raw = fs.readFileSync(path.join(refsDir, file), "utf-8");
|
|
700
|
+
const ref = JSON.parse(raw);
|
|
701
|
+
if (ref.bundleId === bundleId && ref.domainSchema) {
|
|
702
|
+
schema = ref.domainSchema;
|
|
703
|
+
break;
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
catch { /* skip malformed */ }
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
catch { /* dir doesn't exist */ }
|
|
710
|
+
this.domainSchemaCache.set(bundleId, schema);
|
|
711
|
+
return schema;
|
|
712
|
+
}
|
|
713
|
+
/**
|
|
714
|
+
* Update a domain state with partial data, optionally validating against
|
|
715
|
+
* a loaded domain schema. Creates the domain entry if it doesn't exist.
|
|
716
|
+
*/
|
|
717
|
+
updateDomainState(bundleId, partial) {
|
|
718
|
+
let domain = this.state.appDomains.get(bundleId);
|
|
719
|
+
if (!domain) {
|
|
720
|
+
const family = detectFamily(bundleId);
|
|
721
|
+
domain = createDefaultDomainState(family);
|
|
722
|
+
this.state.appDomains.set(bundleId, domain);
|
|
723
|
+
}
|
|
724
|
+
// Apply schema validation if a schema is loaded
|
|
725
|
+
const schema = this.domainSchemaCache.get(bundleId);
|
|
726
|
+
const domainRecord = domain;
|
|
727
|
+
for (const [key, value] of Object.entries(partial)) {
|
|
728
|
+
if (key === "family")
|
|
729
|
+
continue; // never override family
|
|
730
|
+
if (schema?.fields) {
|
|
731
|
+
const fieldDef = schema.fields[key];
|
|
732
|
+
if (fieldDef) {
|
|
733
|
+
// Validate type if schema specifies one
|
|
734
|
+
if (fieldDef.type && !validateSchemaType(value, fieldDef.type)) {
|
|
735
|
+
continue; // skip invalid value
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
// If schema has fields defined but this key isn't in it, skip
|
|
739
|
+
if (schema.strict && !(key in schema.fields))
|
|
740
|
+
continue;
|
|
741
|
+
}
|
|
742
|
+
domainRecord[key] = tracked(value);
|
|
743
|
+
}
|
|
744
|
+
this.state.updatedAt = now();
|
|
745
|
+
this.schedulePersist();
|
|
746
|
+
}
|
|
747
|
+
/**
|
|
748
|
+
* Get the focused element from the active window.
|
|
749
|
+
*/
|
|
750
|
+
getFocusedElement() {
|
|
751
|
+
const win = this.getFocusedWindow();
|
|
752
|
+
return win?.focusedElement ?? null;
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* Get the dialog stack (alias for getActiveDialogs for API symmetry).
|
|
756
|
+
*/
|
|
757
|
+
getDialogStack() {
|
|
758
|
+
return this.state.activeDialogs;
|
|
759
|
+
}
|
|
760
|
+
/**
|
|
761
|
+
* Get the domain state for the currently focused app.
|
|
762
|
+
*/
|
|
763
|
+
getDomainState() {
|
|
764
|
+
const bundleId = this.state.focusedApp?.bundleId;
|
|
765
|
+
if (!bundleId)
|
|
766
|
+
return null;
|
|
767
|
+
return this.state.appDomains.get(bundleId) ?? null;
|
|
768
|
+
}
|
|
769
|
+
/**
|
|
770
|
+
* Get a specific field from the focused app's domain state.
|
|
771
|
+
*/
|
|
772
|
+
getDomainField(key) {
|
|
773
|
+
const domain = this.getDomainState();
|
|
774
|
+
if (!domain)
|
|
775
|
+
return undefined;
|
|
776
|
+
return domain[key];
|
|
777
|
+
}
|
|
778
|
+
/**
|
|
779
|
+
* Get the app family for the currently focused app.
|
|
780
|
+
*/
|
|
781
|
+
getAppFamily() {
|
|
782
|
+
const domain = this.getDomainState();
|
|
783
|
+
return domain?.family ?? null;
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Read confidence at a dotted path (e.g. "focusedWindow.title", "control.<stableId>.value").
|
|
787
|
+
*/
|
|
788
|
+
getConfidence(path) {
|
|
789
|
+
const parts = path.split(".");
|
|
790
|
+
if (parts[0] === "focusedWindow") {
|
|
791
|
+
const win = this.getFocusedWindow();
|
|
792
|
+
if (!win)
|
|
793
|
+
return 0;
|
|
794
|
+
const field = parts[1];
|
|
795
|
+
if (field === "title")
|
|
796
|
+
return win.title.confidence;
|
|
797
|
+
if (field === "bounds")
|
|
798
|
+
return win.bounds.confidence;
|
|
799
|
+
return 0;
|
|
800
|
+
}
|
|
801
|
+
if (parts[0] === "control" && parts.length >= 3) {
|
|
802
|
+
const ctrl = this.getControl(parts[1]);
|
|
803
|
+
if (!ctrl)
|
|
804
|
+
return 0;
|
|
805
|
+
const field = parts[2];
|
|
806
|
+
if (field === "label")
|
|
807
|
+
return ctrl.label.confidence;
|
|
808
|
+
if (field === "value")
|
|
809
|
+
return ctrl.value.confidence;
|
|
810
|
+
if (field === "enabled")
|
|
811
|
+
return ctrl.enabled.confidence;
|
|
812
|
+
return 0;
|
|
813
|
+
}
|
|
814
|
+
if (parts[0] === "state") {
|
|
815
|
+
return this.state.confidence;
|
|
816
|
+
}
|
|
817
|
+
return 0;
|
|
818
|
+
}
|
|
819
|
+
assertState(assertion) {
|
|
820
|
+
return this.assertStateDetailed(assertion).matched;
|
|
821
|
+
}
|
|
822
|
+
/**
|
|
823
|
+
* Assert state with detailed result including actual value and confidence.
|
|
824
|
+
*/
|
|
825
|
+
assertStateDetailed(assertion) {
|
|
826
|
+
switch (assertion.type) {
|
|
827
|
+
case "control_exists": {
|
|
828
|
+
const ctrl = this.getControl(assertion.target);
|
|
829
|
+
return {
|
|
830
|
+
matched: ctrl !== null,
|
|
831
|
+
actual: ctrl ? `${ctrl.role} "${ctrl.label.value}"` : null,
|
|
832
|
+
confidence: ctrl ? ctrl.label.confidence : 0,
|
|
833
|
+
};
|
|
834
|
+
}
|
|
835
|
+
case "control_absent": {
|
|
836
|
+
const ctrl = this.getControl(assertion.target);
|
|
837
|
+
return {
|
|
838
|
+
matched: ctrl === null,
|
|
839
|
+
actual: ctrl ? `${ctrl.role} "${ctrl.label.value}"` : null,
|
|
840
|
+
confidence: ctrl === null ? 1.0 : ctrl.label.confidence,
|
|
841
|
+
};
|
|
842
|
+
}
|
|
843
|
+
case "value_equals": {
|
|
844
|
+
const ctrl = this.getControl(assertion.target);
|
|
845
|
+
if (!ctrl)
|
|
846
|
+
return { matched: false, actual: null, confidence: 0 };
|
|
847
|
+
const actual = ctrl.value.value;
|
|
848
|
+
return {
|
|
849
|
+
matched: actual === assertion.expected,
|
|
850
|
+
actual: actual !== null ? String(actual) : null,
|
|
851
|
+
confidence: ctrl.value.confidence,
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
case "control_enabled": {
|
|
855
|
+
const ctrl = this.getControl(assertion.target);
|
|
856
|
+
if (!ctrl)
|
|
857
|
+
return { matched: false, actual: null, confidence: 0 };
|
|
858
|
+
const expectedEnabled = assertion.expected !== false;
|
|
859
|
+
return {
|
|
860
|
+
matched: ctrl.enabled.value === expectedEnabled,
|
|
861
|
+
actual: String(ctrl.enabled.value),
|
|
862
|
+
confidence: ctrl.enabled.confidence,
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
case "window_focused": {
|
|
866
|
+
const targetWindowId = Number(assertion.target);
|
|
867
|
+
if (!Number.isFinite(targetWindowId)) {
|
|
868
|
+
return { matched: false, actual: null, confidence: 0 };
|
|
869
|
+
}
|
|
870
|
+
const matched = this.state.focusedWindowId === targetWindowId;
|
|
871
|
+
return {
|
|
872
|
+
matched,
|
|
873
|
+
actual: this.state.focusedWindowId !== null ? String(this.state.focusedWindowId) : null,
|
|
874
|
+
confidence: 1.0,
|
|
875
|
+
};
|
|
876
|
+
}
|
|
877
|
+
case "app_focused": {
|
|
878
|
+
const matched = this.state.focusedApp?.bundleId === assertion.target;
|
|
879
|
+
return {
|
|
880
|
+
matched,
|
|
881
|
+
actual: this.state.focusedApp?.bundleId ?? null,
|
|
882
|
+
confidence: 1.0,
|
|
883
|
+
};
|
|
884
|
+
}
|
|
885
|
+
case "dialog_absent": {
|
|
886
|
+
const found = this.state.activeDialogs.find((d) => d.title === assertion.target);
|
|
887
|
+
return {
|
|
888
|
+
matched: !found,
|
|
889
|
+
actual: found ? `dialog: "${found.title}"` : null,
|
|
890
|
+
confidence: 1.0,
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
case "dialog_present": {
|
|
894
|
+
const found = this.state.activeDialogs.find((d) => d.title === assertion.target);
|
|
895
|
+
return {
|
|
896
|
+
matched: !!found,
|
|
897
|
+
actual: found ? `dialog: "${found.title}"` : null,
|
|
898
|
+
confidence: 1.0,
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
case "url_equals": {
|
|
902
|
+
// Check browser domain state for URL match
|
|
903
|
+
for (const domain of this.state.appDomains.values()) {
|
|
904
|
+
if (domain.family === "browser" && domain.url) {
|
|
905
|
+
const urlTracked = domain.url;
|
|
906
|
+
const matched = urlTracked.value === assertion.target ||
|
|
907
|
+
urlTracked.value.startsWith(assertion.target);
|
|
908
|
+
return {
|
|
909
|
+
matched,
|
|
910
|
+
actual: urlTracked.value,
|
|
911
|
+
confidence: urlTracked.confidence,
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
return { matched: false, actual: null, confidence: 0 };
|
|
916
|
+
}
|
|
917
|
+
case "text_visible": {
|
|
918
|
+
// Fuzzy text search across all controls in all windows
|
|
919
|
+
if (!assertion.target) {
|
|
920
|
+
return { matched: false, actual: null, confidence: 0 };
|
|
921
|
+
}
|
|
922
|
+
const targetLower = assertion.target.toLowerCase();
|
|
923
|
+
for (const win of this.state.windows.values()) {
|
|
924
|
+
for (const ctrl of win.controls.values()) {
|
|
925
|
+
if (ctrl.label.value?.toLowerCase().includes(targetLower)) {
|
|
926
|
+
return {
|
|
927
|
+
matched: true,
|
|
928
|
+
actual: `${ctrl.role} "${ctrl.label.value}"`,
|
|
929
|
+
confidence: ctrl.label.confidence,
|
|
930
|
+
};
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
return { matched: false, actual: null, confidence: 0 };
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
/**
|
|
939
|
+
* Set an expected postcondition BEFORE executing an action.
|
|
940
|
+
* Call verifyPostcondition() AFTER the action completes.
|
|
941
|
+
*/
|
|
942
|
+
setExpectedPostcondition(assertion) {
|
|
943
|
+
this.state.expectedPostcondition = assertion;
|
|
944
|
+
}
|
|
945
|
+
/**
|
|
946
|
+
* Verify the previously set postcondition against current state.
|
|
947
|
+
* Returns detailed result with match status, actual value, and confidence.
|
|
948
|
+
* Clears the stored postcondition after verification.
|
|
949
|
+
*/
|
|
950
|
+
verifyPostcondition() {
|
|
951
|
+
const assertion = this.state.expectedPostcondition;
|
|
952
|
+
if (!assertion) {
|
|
953
|
+
return { matched: true, actual: null, confidence: 1.0 };
|
|
954
|
+
}
|
|
955
|
+
this.state.expectedPostcondition = null;
|
|
956
|
+
return this.assertStateDetailed(assertion);
|
|
957
|
+
}
|
|
958
|
+
getStaleControls(thresholdMs) {
|
|
959
|
+
const threshold = thresholdMs ?? this.config.staleThresholdMs;
|
|
960
|
+
const cutoff = Date.now() - threshold;
|
|
961
|
+
const stale = [];
|
|
962
|
+
for (const win of this.state.windows.values()) {
|
|
963
|
+
for (const control of win.controls.values()) {
|
|
964
|
+
if (new Date(control.value.updatedAt).getTime() < cutoff) {
|
|
965
|
+
stale.push(control);
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
return stale;
|
|
970
|
+
}
|
|
971
|
+
toSummary() {
|
|
972
|
+
const winCount = this.state.windows.size;
|
|
973
|
+
let controlCount = 0;
|
|
974
|
+
for (const win of this.state.windows.values()) {
|
|
975
|
+
controlCount += win.controls.size;
|
|
976
|
+
}
|
|
977
|
+
const dialogCount = this.state.activeDialogs.length;
|
|
978
|
+
const focused = this.state.focusedApp;
|
|
979
|
+
const parts = [];
|
|
980
|
+
parts.push(`${winCount} window(s), ${controlCount} control(s) tracked`);
|
|
981
|
+
if (focused) {
|
|
982
|
+
parts.push(`Focused: ${focused.appName} (${focused.bundleId})`);
|
|
983
|
+
}
|
|
984
|
+
if (dialogCount > 0) {
|
|
985
|
+
parts.push(`${dialogCount} active dialog(s): ${this.state.activeDialogs.map((d) => sanitizeString(d.title || d.type)).join(", ")}`);
|
|
986
|
+
}
|
|
987
|
+
if (this.state.lastFullScan) {
|
|
988
|
+
const scanAge = Date.now() - new Date(this.state.lastFullScan).getTime();
|
|
989
|
+
const scanAgeSec = Math.round(scanAge / 1000);
|
|
990
|
+
// Show "never" for unreasonable ages (> 1 hour likely means epoch default)
|
|
991
|
+
if (scanAgeSec < 3600) {
|
|
992
|
+
parts.push(`Last scan: ${scanAgeSec}s ago`);
|
|
993
|
+
}
|
|
994
|
+
else {
|
|
995
|
+
parts.push("Last scan: never (no perception data received)");
|
|
996
|
+
}
|
|
997
|
+
}
|
|
998
|
+
else {
|
|
999
|
+
parts.push("Last scan: never");
|
|
1000
|
+
}
|
|
1001
|
+
return parts.join("\n");
|
|
1002
|
+
}
|
|
1003
|
+
/**
|
|
1004
|
+
* Update browser domain state from a CDP snapshot (url, title).
|
|
1005
|
+
*/
|
|
1006
|
+
ingestCDPSnapshot(bundleId, url, title, windowId) {
|
|
1007
|
+
const snap = this.takeSnapshot();
|
|
1008
|
+
const safeUrl = sanitizeUrl(url);
|
|
1009
|
+
const safeTitle = sanitizeString(title);
|
|
1010
|
+
let domain = this.state.appDomains.get(bundleId);
|
|
1011
|
+
if (!domain) {
|
|
1012
|
+
domain = { family: "browser", url: null, title: null };
|
|
1013
|
+
this.state.appDomains.set(bundleId, domain);
|
|
1014
|
+
}
|
|
1015
|
+
if (domain.family === "browser") {
|
|
1016
|
+
domain.url = tracked(safeUrl);
|
|
1017
|
+
domain.title = tracked(safeTitle);
|
|
1018
|
+
}
|
|
1019
|
+
// Mark lastCDPScanAt on the window if we know which one
|
|
1020
|
+
if (windowId !== undefined) {
|
|
1021
|
+
const win = this.state.windows.get(windowId);
|
|
1022
|
+
if (win)
|
|
1023
|
+
win.lastCDPScanAt = now();
|
|
1024
|
+
}
|
|
1025
|
+
else {
|
|
1026
|
+
// Best effort: mark the focused window
|
|
1027
|
+
for (const win of this.state.windows.values()) {
|
|
1028
|
+
if (win.bundleId === bundleId) {
|
|
1029
|
+
win.lastCDPScanAt = now();
|
|
1030
|
+
break;
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
this.state.updatedAt = now();
|
|
1035
|
+
this.recordTransitions(snap, "cdp");
|
|
1036
|
+
this.schedulePersist();
|
|
1037
|
+
}
|
|
1038
|
+
/**
|
|
1039
|
+
* Ingest Safari browser state from AppleScript (URL, title, tabs).
|
|
1040
|
+
* This is the non-CDP path for Safari browser enrichment.
|
|
1041
|
+
*/
|
|
1042
|
+
ingestSafariBrowserState(url, title, tabs) {
|
|
1043
|
+
const bundleId = "com.apple.Safari";
|
|
1044
|
+
let domain = this.state.appDomains.get(bundleId);
|
|
1045
|
+
if (!domain) {
|
|
1046
|
+
domain = { family: "browser", url: null, title: null };
|
|
1047
|
+
this.state.appDomains.set(bundleId, domain);
|
|
1048
|
+
}
|
|
1049
|
+
if (domain.family === "browser") {
|
|
1050
|
+
const bs = domain;
|
|
1051
|
+
bs.url = tracked(sanitizeUrl(url));
|
|
1052
|
+
bs.title = tracked(sanitizeString(title));
|
|
1053
|
+
if (tabs) {
|
|
1054
|
+
bs.tabs = tabs.map(t => ({
|
|
1055
|
+
...t,
|
|
1056
|
+
url: sanitizeUrl(t.url),
|
|
1057
|
+
title: sanitizeString(t.title),
|
|
1058
|
+
}));
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
this.state.updatedAt = now();
|
|
1062
|
+
this.schedulePersist();
|
|
1063
|
+
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Ingest CDP DOM mutations into the world model.
|
|
1066
|
+
* Called from perception coordinator's fast cycle when mutations are drained.
|
|
1067
|
+
*/
|
|
1068
|
+
ingestCDPMutations(bundleId, mutations) {
|
|
1069
|
+
// Find browser window for this bundleId
|
|
1070
|
+
let targetWin = null;
|
|
1071
|
+
for (const win of this.state.windows.values()) {
|
|
1072
|
+
if (win.bundleId === bundleId) {
|
|
1073
|
+
targetWin = win;
|
|
1074
|
+
break;
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
// Fallback: use focused app's window if bundleId matches, but only pick
|
|
1078
|
+
// a window that actually belongs to the same app (matching bundleId or pid)
|
|
1079
|
+
if (!targetWin && this.state.focusedApp?.bundleId === bundleId) {
|
|
1080
|
+
const focusedPid = this.state.focusedApp.pid;
|
|
1081
|
+
for (const win of this.state.windows.values()) {
|
|
1082
|
+
if (win.bundleId === bundleId || (focusedPid != null && win.pid === focusedPid)) {
|
|
1083
|
+
targetWin = win;
|
|
1084
|
+
break;
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
if (!targetWin)
|
|
1089
|
+
return;
|
|
1090
|
+
for (const mut of mutations) {
|
|
1091
|
+
if (mut.addedNodes && mut.addedNodes > 0) {
|
|
1092
|
+
const controlId = `cdp_${mut.selector}`;
|
|
1093
|
+
if (targetWin.controls.size < this.config.maxControlsPerWindow) {
|
|
1094
|
+
const incoming = {
|
|
1095
|
+
stableId: controlId,
|
|
1096
|
+
role: "AXWebArea",
|
|
1097
|
+
label: tracked(mut.selector, controlId),
|
|
1098
|
+
value: tracked(null, controlId),
|
|
1099
|
+
enabled: tracked(true, controlId),
|
|
1100
|
+
focused: false,
|
|
1101
|
+
position: { x: 0, y: 0 },
|
|
1102
|
+
size: { width: 0, height: 0 },
|
|
1103
|
+
source: "cdp",
|
|
1104
|
+
sourceConfidence: 0.85,
|
|
1105
|
+
lastSeenAt: now(),
|
|
1106
|
+
};
|
|
1107
|
+
const existing = targetWin.controls.get(controlId);
|
|
1108
|
+
targetWin.controls.set(controlId, this.mergeControl(existing, incoming));
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
if (mut.attribute && mut.newValue) {
|
|
1112
|
+
for (const [id, ctrl] of targetWin.controls) {
|
|
1113
|
+
if (id.includes(mut.selector) || ctrl.label.value === mut.selector) {
|
|
1114
|
+
ctrl.label = tracked(mut.newValue, ctrl.stableId);
|
|
1115
|
+
break;
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
this.state.updatedAt = now();
|
|
1121
|
+
this.schedulePersist();
|
|
1122
|
+
}
|
|
1123
|
+
/**
|
|
1124
|
+
* Update controls from OCR text regions (vision source).
|
|
1125
|
+
* Creates synthetic controls for text regions found by OCR.
|
|
1126
|
+
*/
|
|
1127
|
+
ingestOCRRegions(windowId, regions, sourceConfidence = 0.7) {
|
|
1128
|
+
const snap = this.takeSnapshot();
|
|
1129
|
+
const win = this.state.windows.get(windowId);
|
|
1130
|
+
if (!win)
|
|
1131
|
+
return;
|
|
1132
|
+
for (const region of regions) {
|
|
1133
|
+
// Sanitize OCR text: replace newlines with spaces, then apply standard sanitization
|
|
1134
|
+
const cleanText = sanitizeString(region.text.replace(/[\r\n]+/g, " "));
|
|
1135
|
+
const sid = computeStableId("staticText", cleanText, region.bounds.x, region.bounds.y);
|
|
1136
|
+
const incoming = {
|
|
1137
|
+
stableId: sid,
|
|
1138
|
+
role: "staticText",
|
|
1139
|
+
label: tracked(cleanText, sid),
|
|
1140
|
+
value: tracked(cleanText, sid),
|
|
1141
|
+
enabled: tracked(true, sid),
|
|
1142
|
+
focused: false,
|
|
1143
|
+
position: { x: region.bounds.x, y: region.bounds.y },
|
|
1144
|
+
size: { width: region.bounds.width, height: region.bounds.height },
|
|
1145
|
+
source: "ocr",
|
|
1146
|
+
sourceConfidence,
|
|
1147
|
+
lastSeenAt: now(),
|
|
1148
|
+
};
|
|
1149
|
+
const existing = win.controls.get(sid);
|
|
1150
|
+
const merged = this.mergeControl(existing, incoming);
|
|
1151
|
+
if (merged === incoming && !existing && win.controls.size >= this.config.maxControlsPerWindow) {
|
|
1152
|
+
continue; // at capacity, skip new controls
|
|
1153
|
+
}
|
|
1154
|
+
win.controls.set(sid, merged);
|
|
1155
|
+
}
|
|
1156
|
+
win.lastOCRAt = now();
|
|
1157
|
+
this.state.updatedAt = now();
|
|
1158
|
+
this.recordTransitions(snap, "ocr");
|
|
1159
|
+
this.schedulePersist();
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Get recent state transitions (max 50, newest last).
|
|
1163
|
+
*/
|
|
1164
|
+
getRecentTransitions() {
|
|
1165
|
+
return this.state.recentTransitions;
|
|
1166
|
+
}
|
|
1167
|
+
/**
|
|
1168
|
+
* Diff two WorldState objects and return the state transitions between them.
|
|
1169
|
+
* Useful for external callers that need to compare snapshots without mutating internal state.
|
|
1170
|
+
*/
|
|
1171
|
+
static diffStates(before, after) {
|
|
1172
|
+
const ts = now();
|
|
1173
|
+
const transitions = [];
|
|
1174
|
+
// Focus change
|
|
1175
|
+
const beforeBundleId = before.focusedApp?.bundleId ?? null;
|
|
1176
|
+
const afterBundleId = after.focusedApp?.bundleId ?? null;
|
|
1177
|
+
if (beforeBundleId !== afterBundleId) {
|
|
1178
|
+
transitions.push({
|
|
1179
|
+
from: beforeBundleId ?? "(none)",
|
|
1180
|
+
to: afterBundleId ?? "(none)",
|
|
1181
|
+
trigger: "diff:focus_changed",
|
|
1182
|
+
timestamp: ts,
|
|
1183
|
+
});
|
|
1184
|
+
}
|
|
1185
|
+
// Window added/removed
|
|
1186
|
+
const beforeWindowIds = new Set(before.windows.keys());
|
|
1187
|
+
const afterWindowIds = new Set(after.windows.keys());
|
|
1188
|
+
for (const id of afterWindowIds) {
|
|
1189
|
+
if (!beforeWindowIds.has(id)) {
|
|
1190
|
+
const win = after.windows.get(id);
|
|
1191
|
+
transitions.push({
|
|
1192
|
+
from: "(none)",
|
|
1193
|
+
to: win?.title.value ?? String(id),
|
|
1194
|
+
trigger: "diff:window_added",
|
|
1195
|
+
timestamp: ts,
|
|
1196
|
+
});
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
for (const id of beforeWindowIds) {
|
|
1200
|
+
if (!afterWindowIds.has(id)) {
|
|
1201
|
+
const win = before.windows.get(id);
|
|
1202
|
+
transitions.push({
|
|
1203
|
+
from: win?.title.value ?? String(id),
|
|
1204
|
+
to: "(none)",
|
|
1205
|
+
trigger: "diff:window_removed",
|
|
1206
|
+
timestamp: ts,
|
|
1207
|
+
});
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
// Window title changes
|
|
1211
|
+
for (const [id, beforeWin] of before.windows) {
|
|
1212
|
+
const afterWin = after.windows.get(id);
|
|
1213
|
+
if (afterWin && afterWin.title.value !== beforeWin.title.value) {
|
|
1214
|
+
transitions.push({
|
|
1215
|
+
from: beforeWin.title.value,
|
|
1216
|
+
to: afterWin.title.value,
|
|
1217
|
+
trigger: "diff:title_changed",
|
|
1218
|
+
timestamp: ts,
|
|
1219
|
+
});
|
|
1220
|
+
}
|
|
1221
|
+
}
|
|
1222
|
+
// Dialog count changes
|
|
1223
|
+
if (before.activeDialogs.length !== after.activeDialogs.length) {
|
|
1224
|
+
transitions.push({
|
|
1225
|
+
from: String(before.activeDialogs.length),
|
|
1226
|
+
to: String(after.activeDialogs.length),
|
|
1227
|
+
trigger: "diff:dialog_count_changed",
|
|
1228
|
+
timestamp: ts,
|
|
1229
|
+
});
|
|
1230
|
+
}
|
|
1231
|
+
return transitions;
|
|
1232
|
+
}
|
|
1233
|
+
flush() {
|
|
1234
|
+
this.persister.flush();
|
|
1235
|
+
}
|
|
1236
|
+
/**
|
|
1237
|
+
* Update the screenshot hash for a specific window.
|
|
1238
|
+
* Used by perception coordinator to record vision diffs without
|
|
1239
|
+
* directly mutating world model state.
|
|
1240
|
+
*/
|
|
1241
|
+
updateWindowScreenshotHash(windowId, hash) {
|
|
1242
|
+
const win = this.state.windows.get(windowId);
|
|
1243
|
+
if (win) {
|
|
1244
|
+
win.lastScreenshotHash = hash;
|
|
1245
|
+
this.state.updatedAt = now();
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
getState() {
|
|
1249
|
+
return this.state;
|
|
1250
|
+
}
|
|
1251
|
+
getStateCopy() {
|
|
1252
|
+
return {
|
|
1253
|
+
...this.state,
|
|
1254
|
+
windows: new Map(this.state.windows),
|
|
1255
|
+
activeDialogs: [...this.state.activeDialogs],
|
|
1256
|
+
appDomains: new Map(this.state.appDomains),
|
|
1257
|
+
recentTransitions: [...this.state.recentTransitions],
|
|
1258
|
+
trackedEntities: new Map(this.state.trackedEntities),
|
|
1259
|
+
};
|
|
1260
|
+
}
|
|
1261
|
+
/**
|
|
1262
|
+
* Get a deep-frozen consistent snapshot of the world state.
|
|
1263
|
+
* Safe to read during concurrent ingestion — no shared references.
|
|
1264
|
+
*/
|
|
1265
|
+
getConsistentSnapshot() {
|
|
1266
|
+
const windowsCopy = new Map();
|
|
1267
|
+
for (const [id, win] of this.state.windows) {
|
|
1268
|
+
// Deep-clone controls to prevent shared references
|
|
1269
|
+
const controlsCopy = new Map();
|
|
1270
|
+
for (const [cid, ctrl] of win.controls) {
|
|
1271
|
+
controlsCopy.set(cid, {
|
|
1272
|
+
...ctrl,
|
|
1273
|
+
position: { ...ctrl.position },
|
|
1274
|
+
size: { ...ctrl.size },
|
|
1275
|
+
});
|
|
1276
|
+
}
|
|
1277
|
+
windowsCopy.set(id, {
|
|
1278
|
+
...win,
|
|
1279
|
+
controls: controlsCopy,
|
|
1280
|
+
dialogStack: [...win.dialogStack],
|
|
1281
|
+
visibleControls: win.visibleControls.map((c) => ({ ...c, position: { ...c.position }, size: { ...c.size } })),
|
|
1282
|
+
});
|
|
1283
|
+
}
|
|
1284
|
+
// Deep-clone tracked entities
|
|
1285
|
+
const entitiesCopy = new Map();
|
|
1286
|
+
for (const [eid, entity] of this.state.trackedEntities) {
|
|
1287
|
+
entitiesCopy.set(eid, {
|
|
1288
|
+
...entity,
|
|
1289
|
+
stableIds: [...entity.stableIds],
|
|
1290
|
+
positions: entity.positions.map((p) => ({ ...p })),
|
|
1291
|
+
properties: { ...entity.properties },
|
|
1292
|
+
});
|
|
1293
|
+
}
|
|
1294
|
+
return {
|
|
1295
|
+
...this.state,
|
|
1296
|
+
windows: windowsCopy,
|
|
1297
|
+
activeDialogs: this.state.activeDialogs.map((d) => ({ ...d, controls: new Map(d.controls) })),
|
|
1298
|
+
appDomains: new Map(this.state.appDomains),
|
|
1299
|
+
recentTransitions: [...this.state.recentTransitions],
|
|
1300
|
+
trackedEntities: entitiesCopy,
|
|
1301
|
+
};
|
|
1302
|
+
}
|
|
1303
|
+
/**
|
|
1304
|
+
* Get all tracked entities (cross-frame persistent identities).
|
|
1305
|
+
*/
|
|
1306
|
+
getTrackedEntities() {
|
|
1307
|
+
return this.state.trackedEntities;
|
|
1308
|
+
}
|
|
1309
|
+
/**
|
|
1310
|
+
* Capture a lightweight snapshot of key state for diffing.
|
|
1311
|
+
*/
|
|
1312
|
+
takeSnapshot() {
|
|
1313
|
+
const windowTitles = new Map();
|
|
1314
|
+
const controlCounts = new Map();
|
|
1315
|
+
for (const [id, win] of this.state.windows) {
|
|
1316
|
+
windowTitles.set(id, win.title.value);
|
|
1317
|
+
controlCounts.set(id, win.controls.size);
|
|
1318
|
+
}
|
|
1319
|
+
return {
|
|
1320
|
+
focusedBundleId: this.state.focusedApp?.bundleId ?? null,
|
|
1321
|
+
focusedWindowId: this.state.focusedWindowId,
|
|
1322
|
+
windowIds: new Set(this.state.windows.keys()),
|
|
1323
|
+
windowTitles,
|
|
1324
|
+
controlCounts,
|
|
1325
|
+
dialogCount: this.state.activeDialogs.length,
|
|
1326
|
+
dialogTitles: this.state.activeDialogs.map((d) => d.title),
|
|
1327
|
+
};
|
|
1328
|
+
}
|
|
1329
|
+
/**
|
|
1330
|
+
* Diff a before/after snapshot and record transitions.
|
|
1331
|
+
*/
|
|
1332
|
+
recordTransitions(before, trigger) {
|
|
1333
|
+
const ts = now();
|
|
1334
|
+
const transitions = [];
|
|
1335
|
+
// Focus change
|
|
1336
|
+
if (before.focusedBundleId !== (this.state.focusedApp?.bundleId ?? null)) {
|
|
1337
|
+
transitions.push({
|
|
1338
|
+
from: before.focusedBundleId ?? "(none)",
|
|
1339
|
+
to: this.state.focusedApp?.bundleId ?? "(none)",
|
|
1340
|
+
trigger: `${trigger}:focus_changed`,
|
|
1341
|
+
timestamp: ts,
|
|
1342
|
+
});
|
|
1343
|
+
}
|
|
1344
|
+
// Window added/removed
|
|
1345
|
+
const afterWindowIds = new Set(this.state.windows.keys());
|
|
1346
|
+
for (const id of afterWindowIds) {
|
|
1347
|
+
if (!before.windowIds.has(id)) {
|
|
1348
|
+
const win = this.state.windows.get(id);
|
|
1349
|
+
transitions.push({
|
|
1350
|
+
from: "(none)",
|
|
1351
|
+
to: win?.title.value ?? String(id),
|
|
1352
|
+
trigger: `${trigger}:window_added`,
|
|
1353
|
+
timestamp: ts,
|
|
1354
|
+
});
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
for (const id of before.windowIds) {
|
|
1358
|
+
if (!afterWindowIds.has(id)) {
|
|
1359
|
+
transitions.push({
|
|
1360
|
+
from: before.windowTitles.get(id) ?? String(id),
|
|
1361
|
+
to: "(none)",
|
|
1362
|
+
trigger: `${trigger}:window_removed`,
|
|
1363
|
+
timestamp: ts,
|
|
1364
|
+
});
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
// Window title changed
|
|
1368
|
+
for (const [id, oldTitle] of before.windowTitles) {
|
|
1369
|
+
const win = this.state.windows.get(id);
|
|
1370
|
+
if (win && win.title.value !== oldTitle) {
|
|
1371
|
+
transitions.push({
|
|
1372
|
+
from: oldTitle,
|
|
1373
|
+
to: win.title.value,
|
|
1374
|
+
trigger: `${trigger}:title_changed`,
|
|
1375
|
+
timestamp: ts,
|
|
1376
|
+
});
|
|
1377
|
+
}
|
|
1378
|
+
}
|
|
1379
|
+
// Dialog count changed
|
|
1380
|
+
if (before.dialogCount !== this.state.activeDialogs.length) {
|
|
1381
|
+
transitions.push({
|
|
1382
|
+
from: String(before.dialogCount),
|
|
1383
|
+
to: String(this.state.activeDialogs.length),
|
|
1384
|
+
trigger: `${trigger}:dialog_count_changed`,
|
|
1385
|
+
timestamp: ts,
|
|
1386
|
+
});
|
|
1387
|
+
}
|
|
1388
|
+
// Control count changed per window
|
|
1389
|
+
for (const [id, oldCount] of before.controlCounts) {
|
|
1390
|
+
const win = this.state.windows.get(id);
|
|
1391
|
+
if (win && win.controls.size !== oldCount) {
|
|
1392
|
+
transitions.push({
|
|
1393
|
+
from: String(oldCount),
|
|
1394
|
+
to: String(win.controls.size),
|
|
1395
|
+
trigger: `${trigger}:controls_changed`,
|
|
1396
|
+
timestamp: ts,
|
|
1397
|
+
});
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
if (transitions.length > 0) {
|
|
1401
|
+
this.state.recentTransitions.push(...transitions);
|
|
1402
|
+
// Cap at 50
|
|
1403
|
+
if (this.state.recentTransitions.length > 50) {
|
|
1404
|
+
this.state.recentTransitions = this.state.recentTransitions.slice(-50);
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
findControlByRoleLabel(role, label) {
|
|
1409
|
+
for (const win of this.state.windows.values()) {
|
|
1410
|
+
for (const control of win.controls.values()) {
|
|
1411
|
+
if (control.role === role && control.label.value === label) {
|
|
1412
|
+
return control;
|
|
1413
|
+
}
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
return undefined;
|
|
1417
|
+
}
|
|
1418
|
+
/** Start a periodic timer that proactively decays all tracked field confidences. */
|
|
1419
|
+
startDecayTimer(intervalMs = 10_000) {
|
|
1420
|
+
this.stopDecayTimer();
|
|
1421
|
+
this.decayTimer = setInterval(() => {
|
|
1422
|
+
this.decayAll();
|
|
1423
|
+
}, intervalMs);
|
|
1424
|
+
if (this.decayTimer && typeof this.decayTimer === "object" && "unref" in this.decayTimer) {
|
|
1425
|
+
this.decayTimer.unref(); // Don't prevent process exit
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1428
|
+
/** Stop the decay timer. */
|
|
1429
|
+
stopDecayTimer() {
|
|
1430
|
+
if (this.decayTimer !== null) {
|
|
1431
|
+
clearInterval(this.decayTimer);
|
|
1432
|
+
this.decayTimer = null;
|
|
1433
|
+
}
|
|
1434
|
+
}
|
|
1435
|
+
/** Walk all tracked fields and apply exponential decay in-place. */
|
|
1436
|
+
decayAll() {
|
|
1437
|
+
const rate = this.config.confidenceDecayRate;
|
|
1438
|
+
for (const win of this.state.windows.values()) {
|
|
1439
|
+
win.title = applyDecay(win.title, rate);
|
|
1440
|
+
win.bounds = applyDecay(win.bounds, rate);
|
|
1441
|
+
for (const [id, control] of win.controls) {
|
|
1442
|
+
win.controls.set(id, {
|
|
1443
|
+
...control,
|
|
1444
|
+
label: applyDecay(control.label, rate),
|
|
1445
|
+
value: applyDecay(control.value, rate),
|
|
1446
|
+
enabled: applyDecay(control.enabled, rate),
|
|
1447
|
+
});
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
this.state.updatedAt = now();
|
|
1451
|
+
this.schedulePersist();
|
|
1452
|
+
}
|
|
1453
|
+
schedulePersist() {
|
|
1454
|
+
this.persister.schedule(this.state);
|
|
1455
|
+
}
|
|
1456
|
+
}
|