screenhand 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -109
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +5876 -0
- package/dist/scripts/codex-monitor-daemon.js +335 -0
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/supervisor-daemon.js +272 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/scripts/worker-daemon.js +228 -0
- package/dist/src/agent/cli.js +82 -0
- package/dist/src/agent/loop.js +274 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/{src/config.ts → dist/src/config.js} +5 -10
- package/dist/src/context-tracker.js +489 -0
- package/{src/index.ts → dist/src/index.js} +32 -52
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +305 -0
- package/dist/src/jobs/runner.js +806 -0
- package/dist/src/jobs/store.js +102 -0
- package/dist/src/jobs/types.js +30 -0
- package/dist/src/jobs/worker.js +97 -0
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +48 -0
- package/dist/src/mcp/mcp-stdio-server.js +464 -0
- package/dist/src/mcp/server.js +363 -0
- package/dist/src/mcp-entry.js +60 -0
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +222 -0
- package/dist/src/memory/research.js +104 -0
- package/dist/src/memory/seeds.js +101 -0
- package/dist/src/memory/service.js +446 -0
- package/dist/src/memory/session.js +169 -0
- package/dist/src/memory/store.js +451 -0
- package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
- package/dist/src/monitor/codex-monitor.js +382 -0
- package/dist/src/monitor/task-queue.js +97 -0
- package/dist/src/monitor/types.js +62 -0
- package/dist/src/native/bridge-client.js +412 -0
- package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +486 -0
- package/dist/src/playbook/index.js +20 -0
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +536 -0
- package/dist/src/playbook/runner.js +408 -0
- package/dist/src/playbook/store.js +312 -0
- package/dist/src/playbook/types.js +17 -0
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +430 -0
- package/dist/src/runtime/app-adapter.js +64 -0
- package/dist/src/runtime/applescript-adapter.js +305 -0
- package/dist/src/runtime/ax-role-map.js +96 -0
- package/dist/src/runtime/browser-adapter.js +52 -0
- package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
- package/dist/src/runtime/composite-adapter.js +221 -0
- package/dist/src/runtime/execution-contract.js +159 -0
- package/dist/src/runtime/executor.js +286 -0
- package/dist/src/runtime/locator-cache.js +50 -0
- package/dist/src/runtime/planning-loop.js +63 -0
- package/dist/src/runtime/service.js +432 -0
- package/dist/src/runtime/session-manager.js +63 -0
- package/dist/src/runtime/state-observer.js +121 -0
- package/dist/src/runtime/vision-adapter.js +225 -0
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/supervisor/locks.js +186 -0
- package/dist/src/supervisor/supervisor.js +403 -0
- package/dist/src/supervisor/types.js +30 -0
- package/dist/src/test-mcp-protocol.js +154 -0
- package/dist/src/types.js +17 -0
- package/dist/src/util/atomic-write.js +133 -0
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +1 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
- package/native/macos-bridge/Sources/AppManagement.swift +212 -2
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
- package/native/macos-bridge/Sources/main.swift +169 -16
- package/native/windows-bridge/Program.cs +5 -0
- package/native/windows-bridge/ScreenCapture.cs +124 -0
- package/package.json +29 -4
- package/scripts/postinstall.cjs +127 -0
- package/.claude/commands/automate.md +0 -28
- package/.claude/commands/debug-ui.md +0 -19
- package/.claude/commands/screenshot.md +0 -15
- package/.github/FUNDING.yml +0 -1
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
- package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
- package/.mcp.json +0 -8
- package/DESKTOP_MCP_GUIDE.md +0 -92
- package/SECURITY.md +0 -44
- package/docs/architecture.md +0 -47
- package/install-skills.sh +0 -19
- package/mcp-bridge.ts +0 -271
- package/mcp-desktop.ts +0 -1221
- package/playbooks/instagram.json +0 -41
- package/playbooks/instagram_v2.json +0 -201
- package/playbooks/x_v1.json +0 -211
- package/scripts/devpost-live-loop.mjs +0 -421
- package/src/logging/timeline-logger.ts +0 -55
- package/src/mcp/server.ts +0 -449
- package/src/memory/recall.ts +0 -191
- package/src/memory/research.ts +0 -146
- package/src/memory/seeds.ts +0 -123
- package/src/memory/session.ts +0 -201
- package/src/memory/store.ts +0 -434
- package/src/memory/types.ts +0 -69
- package/src/native/bridge-client.ts +0 -239
- package/src/runtime/accessibility-adapter.ts +0 -487
- package/src/runtime/app-adapter.ts +0 -169
- package/src/runtime/applescript-adapter.ts +0 -376
- package/src/runtime/ax-role-map.ts +0 -102
- package/src/runtime/browser-adapter.ts +0 -129
- package/src/runtime/cdp-chrome-adapter.ts +0 -676
- package/src/runtime/composite-adapter.ts +0 -274
- package/src/runtime/executor.ts +0 -396
- package/src/runtime/planning-loop.ts +0 -81
- package/src/runtime/service.ts +0 -448
- package/src/runtime/session-manager.ts +0 -50
- package/src/runtime/state-observer.ts +0 -136
- package/src/runtime/vision-adapter.ts +0 -297
- package/src/types.ts +0 -297
- package/tests/bridge-client.test.ts +0 -176
- package/tests/browser-stealth.test.ts +0 -210
- package/tests/composite-adapter.test.ts +0 -64
- package/tests/mcp-server.test.ts +0 -151
- package/tests/memory-recall.test.ts +0 -339
- package/tests/memory-research.test.ts +0 -159
- package/tests/memory-seeds.test.ts +0 -120
- package/tests/memory-store.test.ts +0 -392
- package/tests/types.test.ts +0 -92
- package/tsconfig.check.json +0 -17
- package/tsconfig.json +0 -19
- package/vitest.config.ts +0 -8
- /package/{playbooks → dist-references}/devpost.json +0 -0
|
@@ -0,0 +1,536 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
/**
|
|
18
|
+
* Playbook Recorder v2 — event-driven + screenshot-based
|
|
19
|
+
*
|
|
20
|
+
* Two capture modes running in parallel:
|
|
21
|
+
*
|
|
22
|
+
* 1. AX Event Stream (real-time, ~0ms latency)
|
|
23
|
+
* - Listens to macOS accessibility notifications via the native bridge
|
|
24
|
+
* - Captures: focus changes, value changes, window creates, app switches
|
|
25
|
+
* - This is how we know WHAT the user clicked/typed
|
|
26
|
+
*
|
|
27
|
+
* 2. Periodic Screenshots (every 2s)
|
|
28
|
+
* - Captures visual state of the screen
|
|
29
|
+
* - At stop time, AI analyzes the screenshot sequence + AX events
|
|
30
|
+
* - This is how we handle things AX events miss (Chrome DOM, visual changes)
|
|
31
|
+
*
|
|
32
|
+
* On stop:
|
|
33
|
+
* - All AX events + screenshots sent to AI
|
|
34
|
+
* - AI produces clean PlaybookStep[] from the combined data
|
|
35
|
+
* - Saved to disk as a replayable playbook
|
|
36
|
+
*/
|
|
37
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
38
|
+
import fs from "node:fs";
|
|
39
|
+
import { PlaybookStore } from "./store.js";
|
|
40
|
+
const SCREENSHOT_INTERVAL_MS = 2500;
|
|
41
|
+
const AX_POLL_INTERVAL_MS = 500;
|
|
42
|
+
export class PlaybookRecorder {
|
|
43
|
+
runtime;
|
|
44
|
+
options;
|
|
45
|
+
recording = false;
|
|
46
|
+
events = [];
|
|
47
|
+
screenshots = [];
|
|
48
|
+
screenshotTimer = null;
|
|
49
|
+
axPollTimer = null;
|
|
50
|
+
sessionId = "";
|
|
51
|
+
// Track previous AX state for diff detection
|
|
52
|
+
prevFocused = "";
|
|
53
|
+
prevActiveApp = "";
|
|
54
|
+
prevWindowTitle = "";
|
|
55
|
+
prevUrl = "";
|
|
56
|
+
prevTextFields = new Map();
|
|
57
|
+
store;
|
|
58
|
+
ai;
|
|
59
|
+
model;
|
|
60
|
+
log;
|
|
61
|
+
onEvent;
|
|
62
|
+
captureScreenshots;
|
|
63
|
+
constructor(runtime, playbookDir, options = {}) {
|
|
64
|
+
this.runtime = runtime;
|
|
65
|
+
this.options = options;
|
|
66
|
+
this.store = new PlaybookStore(playbookDir);
|
|
67
|
+
this.store.load();
|
|
68
|
+
this.ai = new Anthropic();
|
|
69
|
+
this.model = options.model ?? "claude-sonnet-4-20250514";
|
|
70
|
+
this.log = options.onLog ?? ((msg) => console.error(`[Recorder] ${msg}`));
|
|
71
|
+
this.onEvent = options.onEvent;
|
|
72
|
+
this.captureScreenshots = options.screenshots !== false;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Start recording user actions.
|
|
76
|
+
*/
|
|
77
|
+
async start(sessionId) {
|
|
78
|
+
if (this.recording) {
|
|
79
|
+
this.log("Already recording");
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
this.recording = true;
|
|
83
|
+
this.sessionId = sessionId;
|
|
84
|
+
this.events = [];
|
|
85
|
+
this.screenshots = [];
|
|
86
|
+
this.prevFocused = "";
|
|
87
|
+
this.prevActiveApp = "";
|
|
88
|
+
this.prevWindowTitle = "";
|
|
89
|
+
this.prevUrl = "";
|
|
90
|
+
this.prevTextFields.clear();
|
|
91
|
+
// Take initial state snapshot
|
|
92
|
+
await this.captureState("initial");
|
|
93
|
+
this.log("Recording started — watching for AX events + taking screenshots");
|
|
94
|
+
// Start AX event polling (fast — every 500ms)
|
|
95
|
+
this.axPollTimer = setInterval(async () => {
|
|
96
|
+
if (!this.recording)
|
|
97
|
+
return;
|
|
98
|
+
try {
|
|
99
|
+
await this.pollAXState();
|
|
100
|
+
}
|
|
101
|
+
catch { /* non-fatal */ }
|
|
102
|
+
}, AX_POLL_INTERVAL_MS);
|
|
103
|
+
// Start screenshot capture (slower — every 2.5s)
|
|
104
|
+
if (this.captureScreenshots) {
|
|
105
|
+
this.screenshotTimer = setInterval(async () => {
|
|
106
|
+
if (!this.recording)
|
|
107
|
+
return;
|
|
108
|
+
try {
|
|
109
|
+
await this.takeScreenshot();
|
|
110
|
+
}
|
|
111
|
+
catch { /* non-fatal */ }
|
|
112
|
+
}, SCREENSHOT_INTERVAL_MS);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Stop recording and generate a playbook.
|
|
117
|
+
*/
|
|
118
|
+
async stop(name, description, platform) {
|
|
119
|
+
this.recording = false;
|
|
120
|
+
this.clearTimers();
|
|
121
|
+
// Take final screenshot
|
|
122
|
+
if (this.captureScreenshots) {
|
|
123
|
+
try {
|
|
124
|
+
await this.takeScreenshot();
|
|
125
|
+
}
|
|
126
|
+
catch { /* ignore */ }
|
|
127
|
+
}
|
|
128
|
+
this.log(`Recording stopped. ${this.events.length} events, ${this.screenshots.length} screenshots captured.`);
|
|
129
|
+
// Convert raw events + screenshots to playbook steps via AI
|
|
130
|
+
const steps = await this.eventsToSteps(this.events, this.screenshots, name, platform);
|
|
131
|
+
// Save as playbook
|
|
132
|
+
const id = `rec_${platform}_${Date.now()}`;
|
|
133
|
+
const playbook = {
|
|
134
|
+
id,
|
|
135
|
+
name,
|
|
136
|
+
description,
|
|
137
|
+
platform,
|
|
138
|
+
steps,
|
|
139
|
+
version: "1.0.0",
|
|
140
|
+
tags: [
|
|
141
|
+
platform,
|
|
142
|
+
...name.toLowerCase().split(/\W+/).filter((w) => w.length >= 3),
|
|
143
|
+
],
|
|
144
|
+
successCount: 0,
|
|
145
|
+
failCount: 0,
|
|
146
|
+
lastRun: new Date().toISOString(),
|
|
147
|
+
};
|
|
148
|
+
this.store.save(playbook);
|
|
149
|
+
this.log(`Playbook saved: ${id} (${steps.length} steps)`);
|
|
150
|
+
return playbook;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Cancel recording without saving.
|
|
154
|
+
*/
|
|
155
|
+
cancel() {
|
|
156
|
+
this.recording = false;
|
|
157
|
+
this.clearTimers();
|
|
158
|
+
this.events = [];
|
|
159
|
+
this.screenshots = [];
|
|
160
|
+
this.log("Recording cancelled");
|
|
161
|
+
}
|
|
162
|
+
get isRecording() {
|
|
163
|
+
return this.recording;
|
|
164
|
+
}
|
|
165
|
+
get eventCount() {
|
|
166
|
+
return this.events.length;
|
|
167
|
+
}
|
|
168
|
+
getEvents() {
|
|
169
|
+
return [...this.events];
|
|
170
|
+
}
|
|
171
|
+
// ── AX State Polling (fast, event-driven feel) ──
|
|
172
|
+
async pollAXState() {
|
|
173
|
+
// 1. Check which app is active
|
|
174
|
+
try {
|
|
175
|
+
const apps = await this.runtime.appList(this.sessionId);
|
|
176
|
+
if (apps.ok) {
|
|
177
|
+
const active = apps.data.find((a) => a.isActive);
|
|
178
|
+
if (active && active.bundleId !== this.prevActiveApp) {
|
|
179
|
+
if (this.prevActiveApp) {
|
|
180
|
+
this.addEvent({
|
|
181
|
+
type: "app_activated",
|
|
182
|
+
details: {
|
|
183
|
+
from: this.prevActiveApp,
|
|
184
|
+
to: active.bundleId,
|
|
185
|
+
appName: active.name,
|
|
186
|
+
},
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
this.prevActiveApp = active.bundleId;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
catch { /* ignore */ }
|
|
194
|
+
// 2. Get accessibility tree — find focused element and text field values
|
|
195
|
+
try {
|
|
196
|
+
const tree = await this.runtime.elementTree({ sessionId: this.sessionId, maxDepth: 4 });
|
|
197
|
+
if (!tree.ok)
|
|
198
|
+
return;
|
|
199
|
+
// Detect focus change
|
|
200
|
+
const focused = findFocused(tree.data);
|
|
201
|
+
if (focused && focused !== this.prevFocused) {
|
|
202
|
+
this.addEvent({
|
|
203
|
+
type: "focus_changed",
|
|
204
|
+
details: {
|
|
205
|
+
from: this.prevFocused,
|
|
206
|
+
to: focused,
|
|
207
|
+
element: describeFocused(tree.data),
|
|
208
|
+
},
|
|
209
|
+
});
|
|
210
|
+
this.prevFocused = focused;
|
|
211
|
+
}
|
|
212
|
+
// Detect text field value changes (typing detection)
|
|
213
|
+
const currentFields = collectTextFields(tree.data);
|
|
214
|
+
for (const [fieldId, value] of currentFields) {
|
|
215
|
+
const prev = this.prevTextFields.get(fieldId);
|
|
216
|
+
if (prev !== undefined && prev !== value) {
|
|
217
|
+
this.addEvent({
|
|
218
|
+
type: "value_changed",
|
|
219
|
+
details: {
|
|
220
|
+
field: fieldId,
|
|
221
|
+
from: prev.slice(-50),
|
|
222
|
+
to: value.slice(-50),
|
|
223
|
+
typed: value.slice(prev.length),
|
|
224
|
+
},
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
this.prevTextFields = currentFields;
|
|
229
|
+
// Detect window title change (navigation in browser)
|
|
230
|
+
const title = tree.data.title ?? "";
|
|
231
|
+
if (title && title !== this.prevWindowTitle) {
|
|
232
|
+
if (this.prevWindowTitle) {
|
|
233
|
+
this.addEvent({
|
|
234
|
+
type: "title_changed",
|
|
235
|
+
details: { from: this.prevWindowTitle, to: title },
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
this.prevWindowTitle = title;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
catch { /* ignore */ }
|
|
242
|
+
}
|
|
243
|
+
// ── Screenshot Capture ──
|
|
244
|
+
async takeScreenshot() {
|
|
245
|
+
try {
|
|
246
|
+
const result = await this.runtime.screenshot({ sessionId: this.sessionId });
|
|
247
|
+
if (result.ok) {
|
|
248
|
+
const record = {
|
|
249
|
+
path: result.data.path,
|
|
250
|
+
timestamp: new Date().toISOString(),
|
|
251
|
+
index: this.screenshots.length,
|
|
252
|
+
};
|
|
253
|
+
this.screenshots.push(record);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
catch { /* non-fatal */ }
|
|
257
|
+
}
|
|
258
|
+
// ── State Capture ──
|
|
259
|
+
async captureState(label) {
|
|
260
|
+
// Capture initial app state
|
|
261
|
+
try {
|
|
262
|
+
const apps = await this.runtime.appList(this.sessionId);
|
|
263
|
+
if (apps.ok) {
|
|
264
|
+
const active = apps.data.find((a) => a.isActive);
|
|
265
|
+
if (active) {
|
|
266
|
+
this.prevActiveApp = active.bundleId;
|
|
267
|
+
this.addEvent({
|
|
268
|
+
type: "app_activated",
|
|
269
|
+
details: { to: active.bundleId, appName: active.name, label },
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
catch { /* ignore */ }
|
|
275
|
+
// Capture initial tree state
|
|
276
|
+
try {
|
|
277
|
+
const tree = await this.runtime.elementTree({ sessionId: this.sessionId, maxDepth: 4 });
|
|
278
|
+
if (tree.ok) {
|
|
279
|
+
this.prevFocused = findFocused(tree.data);
|
|
280
|
+
this.prevWindowTitle = tree.data.title ?? "";
|
|
281
|
+
this.prevTextFields = collectTextFields(tree.data);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
catch { /* ignore */ }
|
|
285
|
+
// Take initial screenshot
|
|
286
|
+
if (this.captureScreenshots) {
|
|
287
|
+
await this.takeScreenshot();
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
// ── Event Management ──
|
|
291
|
+
addEvent(partial) {
|
|
292
|
+
const event = {
|
|
293
|
+
...partial,
|
|
294
|
+
timestamp: new Date().toISOString(),
|
|
295
|
+
};
|
|
296
|
+
this.events.push(event);
|
|
297
|
+
this.log(`Event: ${event.type} — ${JSON.stringify(event.details).slice(0, 120)}`);
|
|
298
|
+
if (this.onEvent)
|
|
299
|
+
this.onEvent(event);
|
|
300
|
+
}
|
|
301
|
+
clearTimers() {
|
|
302
|
+
if (this.axPollTimer) {
|
|
303
|
+
clearInterval(this.axPollTimer);
|
|
304
|
+
this.axPollTimer = null;
|
|
305
|
+
}
|
|
306
|
+
if (this.screenshotTimer) {
|
|
307
|
+
clearInterval(this.screenshotTimer);
|
|
308
|
+
this.screenshotTimer = null;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
// ── AI Conversion ──
|
|
312
|
+
/**
|
|
313
|
+
* Convert raw events + screenshots into clean playbook steps.
|
|
314
|
+
* Sends first + last screenshot as images so AI can see what happened visually.
|
|
315
|
+
*/
|
|
316
|
+
async eventsToSteps(events, screenshots, taskName, platform) {
|
|
317
|
+
if (events.length === 0)
|
|
318
|
+
return [];
|
|
319
|
+
// Build the content array — text + optional images
|
|
320
|
+
const content = [];
|
|
321
|
+
// Add text prompt
|
|
322
|
+
content.push({
|
|
323
|
+
type: "text",
|
|
324
|
+
text: `Convert these recorded user events into a clean, replayable automation playbook.
|
|
325
|
+
|
|
326
|
+
Task: ${taskName}
|
|
327
|
+
Platform: ${platform}
|
|
328
|
+
|
|
329
|
+
Raw events recorded (in chronological order):
|
|
330
|
+
${events.map((e, i) => `${i + 1}. [${e.timestamp}] ${e.type}: ${JSON.stringify(e.details)}`).join("\n")}
|
|
331
|
+
|
|
332
|
+
${screenshots.length > 0 ? `\n${screenshots.length} screenshots were taken during recording. The first and last are attached below for visual context.\n` : ""}
|
|
333
|
+
Convert these into a JSON array of playbook steps. Each step:
|
|
334
|
+
{
|
|
335
|
+
"action": "navigate" | "press" | "type_into" | "key" | "key_combo" | "menu_click" | "scroll" | "wait" | "screenshot",
|
|
336
|
+
"target": "CSS selector, text label, or {\"selector\": \"...\"}",
|
|
337
|
+
"url": "for navigate",
|
|
338
|
+
"text": "for type_into",
|
|
339
|
+
"keys": ["for", "key or key_combo"],
|
|
340
|
+
"menuPath": ["for", "menu_click"],
|
|
341
|
+
"ms": 1000,
|
|
342
|
+
"description": "human-readable description of what this step does",
|
|
343
|
+
"verify": "optional CSS selector or text to verify success",
|
|
344
|
+
"optional": false
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
Rules:
|
|
348
|
+
- Infer the user's INTENT from events, not just mirror them mechanically
|
|
349
|
+
- focus_changed events usually mean a click — convert to "press" with the element label
|
|
350
|
+
- value_changed events mean typing — convert to "type_into" with the field and text
|
|
351
|
+
- title_changed often means navigation — add appropriate navigate or wait steps
|
|
352
|
+
- app_activated means switching apps — use app_focus or app_launch
|
|
353
|
+
- Use stable selectors: data-testid, aria-label, role+name over fragile CSS
|
|
354
|
+
- Merge rapid consecutive events into single meaningful steps
|
|
355
|
+
- Add wait steps (500-2000ms) after navigation/page loads
|
|
356
|
+
- Add verify conditions for critical steps (modal opened, page loaded, etc.)
|
|
357
|
+
- Skip noise (duplicate events, layout thrash, irrelevant focus changes)
|
|
358
|
+
|
|
359
|
+
Respond with ONLY a valid JSON array, no markdown fences, no explanation.`,
|
|
360
|
+
});
|
|
361
|
+
// Attach first and last screenshots as images (if available)
|
|
362
|
+
if (screenshots.length > 0) {
|
|
363
|
+
const toAttach = [screenshots[0]];
|
|
364
|
+
if (screenshots.length > 1) {
|
|
365
|
+
toAttach.push(screenshots[screenshots.length - 1]);
|
|
366
|
+
}
|
|
367
|
+
for (const shot of toAttach) {
|
|
368
|
+
try {
|
|
369
|
+
const imageData = fs.readFileSync(shot.path);
|
|
370
|
+
const base64 = imageData.toString("base64");
|
|
371
|
+
content.push({
|
|
372
|
+
type: "image",
|
|
373
|
+
source: {
|
|
374
|
+
type: "base64",
|
|
375
|
+
media_type: "image/png",
|
|
376
|
+
data: base64,
|
|
377
|
+
},
|
|
378
|
+
});
|
|
379
|
+
content.push({
|
|
380
|
+
type: "text",
|
|
381
|
+
text: `Screenshot ${shot.index + 1} taken at ${shot.timestamp}`,
|
|
382
|
+
});
|
|
383
|
+
}
|
|
384
|
+
catch {
|
|
385
|
+
// Skip unreadable screenshots
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
try {
|
|
390
|
+
const resp = await this.ai.messages.create({
|
|
391
|
+
model: this.model,
|
|
392
|
+
max_tokens: 4096,
|
|
393
|
+
messages: [{ role: "user", content }],
|
|
394
|
+
});
|
|
395
|
+
const text = resp.content[0]?.type === "text" ? resp.content[0].text : "";
|
|
396
|
+
const jsonMatch = text.match(/\[[\s\S]*\]/);
|
|
397
|
+
if (jsonMatch) {
|
|
398
|
+
const steps = JSON.parse(jsonMatch[0]);
|
|
399
|
+
this.log(`AI generated ${steps.length} playbook steps`);
|
|
400
|
+
return steps;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
catch (err) {
|
|
404
|
+
this.log(`AI conversion failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
405
|
+
}
|
|
406
|
+
// Fallback without AI
|
|
407
|
+
return this.eventsToStepsFallback(events);
|
|
408
|
+
}
|
|
409
|
+
eventsToStepsFallback(events) {
|
|
410
|
+
const steps = [];
|
|
411
|
+
for (const event of events) {
|
|
412
|
+
switch (event.type) {
|
|
413
|
+
case "app_activated":
|
|
414
|
+
if (event.details.label === "initial")
|
|
415
|
+
break;
|
|
416
|
+
steps.push({
|
|
417
|
+
action: "wait",
|
|
418
|
+
ms: 500,
|
|
419
|
+
description: `Switched to ${event.details.appName ?? event.details.to}`,
|
|
420
|
+
});
|
|
421
|
+
break;
|
|
422
|
+
case "focus_changed": {
|
|
423
|
+
const target = String(event.details.to ?? "");
|
|
424
|
+
if (!target || target === this.prevFocused)
|
|
425
|
+
break;
|
|
426
|
+
steps.push({
|
|
427
|
+
action: "press",
|
|
428
|
+
target,
|
|
429
|
+
description: `Click on ${event.details.element ?? target}`,
|
|
430
|
+
});
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
case "value_changed": {
|
|
434
|
+
const typed = String(event.details.typed ?? "");
|
|
435
|
+
const field = String(event.details.field ?? "");
|
|
436
|
+
if (typed) {
|
|
437
|
+
steps.push({
|
|
438
|
+
action: "type_into",
|
|
439
|
+
target: field,
|
|
440
|
+
text: typed,
|
|
441
|
+
description: `Type "${typed.slice(0, 30)}" into ${field}`,
|
|
442
|
+
});
|
|
443
|
+
}
|
|
444
|
+
break;
|
|
445
|
+
}
|
|
446
|
+
case "title_changed":
|
|
447
|
+
steps.push({
|
|
448
|
+
action: "wait",
|
|
449
|
+
ms: 1500,
|
|
450
|
+
description: `Page changed to: ${event.details.to}`,
|
|
451
|
+
});
|
|
452
|
+
break;
|
|
453
|
+
case "url_changed":
|
|
454
|
+
steps.push({
|
|
455
|
+
action: "navigate",
|
|
456
|
+
url: event.details.to,
|
|
457
|
+
description: `Navigate to ${event.details.to}`,
|
|
458
|
+
});
|
|
459
|
+
break;
|
|
460
|
+
case "menu_opened":
|
|
461
|
+
case "dialog_appeared":
|
|
462
|
+
steps.push({
|
|
463
|
+
action: "wait",
|
|
464
|
+
ms: 1000,
|
|
465
|
+
description: `${event.type}: ${JSON.stringify(event.details).slice(0, 50)}`,
|
|
466
|
+
});
|
|
467
|
+
break;
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
return steps;
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
// ── AX Tree Helpers ──
|
|
474
|
+
/** Find the focused element and return a stable identifier. */
|
|
475
|
+
function findFocused(node, depth = 0) {
|
|
476
|
+
if (depth > 6)
|
|
477
|
+
return "";
|
|
478
|
+
if (node.focused === true) {
|
|
479
|
+
const role = node.role?.replace("AX", "") ?? "";
|
|
480
|
+
const label = node.title ?? node.description ?? node.identifier ?? "";
|
|
481
|
+
return `${role}:${label}`;
|
|
482
|
+
}
|
|
483
|
+
if (node.children) {
|
|
484
|
+
for (const child of node.children) {
|
|
485
|
+
const found = findFocused(child, depth + 1);
|
|
486
|
+
if (found)
|
|
487
|
+
return found;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
return "";
|
|
491
|
+
}
|
|
492
|
+
/** Get a human-readable description of the focused element + context. */
|
|
493
|
+
function describeFocused(node, depth = 0) {
|
|
494
|
+
if (depth > 6)
|
|
495
|
+
return "";
|
|
496
|
+
if (node.focused === true) {
|
|
497
|
+
const parts = [node.role?.replace("AX", "")];
|
|
498
|
+
if (node.title)
|
|
499
|
+
parts.push(`"${node.title}"`);
|
|
500
|
+
if (node.description)
|
|
501
|
+
parts.push(`desc="${node.description}"`);
|
|
502
|
+
if (node.value)
|
|
503
|
+
parts.push(`val="${node.value.slice(0, 30)}"`);
|
|
504
|
+
if (node.position)
|
|
505
|
+
parts.push(`@${Math.round(node.position.x)},${Math.round(node.position.y)}`);
|
|
506
|
+
return parts.filter(Boolean).join(" ");
|
|
507
|
+
}
|
|
508
|
+
if (node.children) {
|
|
509
|
+
for (const child of node.children) {
|
|
510
|
+
const found = describeFocused(child, depth + 1);
|
|
511
|
+
if (found)
|
|
512
|
+
return found;
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
return "";
|
|
516
|
+
}
|
|
517
|
+
/** Collect all text field values from the tree for typing detection. */
|
|
518
|
+
function collectTextFields(node, depth = 0) {
|
|
519
|
+
const fields = new Map();
|
|
520
|
+
if (depth > 5)
|
|
521
|
+
return fields;
|
|
522
|
+
const role = node.role?.replace("AX", "").toLowerCase() ?? "";
|
|
523
|
+
const isTextField = role === "textfield" || role === "textarea" || role === "combobox" || role === "searchfield";
|
|
524
|
+
if (isTextField && node.value !== undefined) {
|
|
525
|
+
const id = node.identifier ?? node.title ?? node.description ?? `field_${depth}`;
|
|
526
|
+
fields.set(id, node.value);
|
|
527
|
+
}
|
|
528
|
+
if (node.children) {
|
|
529
|
+
for (const child of node.children) {
|
|
530
|
+
for (const [k, v] of collectTextFields(child, depth + 1)) {
|
|
531
|
+
fields.set(k, v);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
return fields;
|
|
536
|
+
}
|