screenhand 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +458 -93
- package/dist/.audit-log.jsonl +55 -0
- package/dist/.screenhand/memory/.lock +1 -0
- package/dist/.screenhand/memory/actions.jsonl +85 -0
- package/dist/.screenhand/memory/errors.jsonl +5 -0
- package/dist/.screenhand/memory/errors.jsonl.bak +4 -0
- package/dist/.screenhand/memory/state.json +35 -0
- package/dist/.screenhand/memory/state.json.bak +35 -0
- package/dist/.screenhand/memory/strategies.jsonl +12 -0
- package/dist/agent/cli.js +73 -0
- package/dist/agent/loop.js +258 -0
- package/dist/config.js +9 -0
- package/dist/index.js +56 -0
- package/dist/logging/timeline-logger.js +29 -0
- package/dist/mcp/mcp-stdio-server.js +448 -0
- package/dist/mcp/server.js +347 -0
- package/dist/mcp-desktop.js +2731 -0
- package/dist/mcp-entry.js +59 -0
- package/dist/memory/recall.js +160 -0
- package/dist/memory/research.js +98 -0
- package/dist/memory/seeds.js +89 -0
- package/dist/memory/session.js +161 -0
- package/dist/memory/store.js +391 -0
- package/dist/memory/types.js +4 -0
- package/dist/monitor/codex-monitor.js +377 -0
- package/dist/monitor/task-queue.js +84 -0
- package/dist/monitor/types.js +49 -0
- package/dist/native/bridge-client.js +174 -0
- package/dist/native/macos-bridge-client.js +5 -0
- package/dist/npm-publish-helper.js +117 -0
- package/dist/npm-token-cdp.js +113 -0
- package/dist/npm-token-create.js +135 -0
- package/dist/npm-token-finish.js +126 -0
- package/dist/playbook/engine.js +193 -0
- package/dist/playbook/index.js +4 -0
- package/dist/playbook/recorder.js +519 -0
- package/dist/playbook/runner.js +392 -0
- package/dist/playbook/store.js +166 -0
- package/dist/playbook/types.js +4 -0
- package/dist/runtime/accessibility-adapter.js +377 -0
- package/dist/runtime/app-adapter.js +48 -0
- package/dist/runtime/applescript-adapter.js +283 -0
- package/dist/runtime/ax-role-map.js +80 -0
- package/dist/runtime/browser-adapter.js +36 -0
- package/dist/runtime/cdp-chrome-adapter.js +505 -0
- package/dist/runtime/composite-adapter.js +205 -0
- package/dist/runtime/executor.js +250 -0
- package/dist/runtime/locator-cache.js +12 -0
- package/dist/runtime/planning-loop.js +47 -0
- package/dist/runtime/service.js +372 -0
- package/dist/runtime/session-manager.js +28 -0
- package/dist/runtime/state-observer.js +105 -0
- package/dist/runtime/vision-adapter.js +208 -0
- package/dist/scripts/codex-monitor-daemon.js +335 -0
- package/dist/scripts/supervisor-daemon.js +272 -0
- package/dist/scripts/worker-daemon.js +228 -0
- package/dist/src/agent/cli.js +82 -0
- package/dist/src/agent/loop.js +274 -0
- package/{src/config.ts → dist/src/config.js} +5 -10
- package/{src/index.ts → dist/src/index.js} +32 -52
- package/dist/src/jobs/manager.js +237 -0
- package/dist/src/jobs/runner.js +683 -0
- package/dist/src/jobs/store.js +102 -0
- package/dist/src/jobs/types.js +30 -0
- package/dist/src/jobs/worker.js +97 -0
- package/dist/src/logging/timeline-logger.js +45 -0
- package/dist/src/mcp/mcp-stdio-server.js +464 -0
- package/dist/src/mcp/server.js +363 -0
- package/dist/src/mcp-entry.js +60 -0
- package/dist/src/memory/recall.js +170 -0
- package/dist/src/memory/research.js +104 -0
- package/dist/src/memory/seeds.js +101 -0
- package/dist/src/memory/service.js +421 -0
- package/dist/src/memory/session.js +169 -0
- package/dist/src/memory/store.js +422 -0
- package/dist/src/memory/types.js +17 -0
- package/dist/src/monitor/codex-monitor.js +382 -0
- package/dist/src/monitor/task-queue.js +97 -0
- package/dist/src/monitor/types.js +62 -0
- package/dist/src/native/bridge-client.js +190 -0
- package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
- package/dist/src/playbook/engine.js +201 -0
- package/dist/src/playbook/index.js +20 -0
- package/dist/src/playbook/recorder.js +535 -0
- package/dist/src/playbook/runner.js +408 -0
- package/dist/src/playbook/store.js +183 -0
- package/dist/src/playbook/types.js +17 -0
- package/dist/src/runtime/accessibility-adapter.js +393 -0
- package/dist/src/runtime/app-adapter.js +64 -0
- package/dist/src/runtime/applescript-adapter.js +299 -0
- package/dist/src/runtime/ax-role-map.js +96 -0
- package/dist/src/runtime/browser-adapter.js +52 -0
- package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
- package/dist/src/runtime/composite-adapter.js +221 -0
- package/dist/src/runtime/execution-contract.js +159 -0
- package/dist/src/runtime/executor.js +266 -0
- package/{src/runtime/locator-cache.ts → dist/src/runtime/locator-cache.js} +10 -15
- package/dist/src/runtime/planning-loop.js +63 -0
- package/dist/src/runtime/service.js +388 -0
- package/dist/src/runtime/session-manager.js +60 -0
- package/dist/src/runtime/state-observer.js +121 -0
- package/dist/src/runtime/vision-adapter.js +224 -0
- package/dist/src/supervisor/locks.js +186 -0
- package/dist/src/supervisor/supervisor.js +403 -0
- package/dist/src/supervisor/types.js +30 -0
- package/dist/src/test-mcp-protocol.js +154 -0
- package/dist/src/types.js +17 -0
- package/dist/src/util/atomic-write.js +118 -0
- package/dist/test-mcp-protocol.js +138 -0
- package/dist/types.js +1 -0
- package/package.json +18 -4
- package/.claude/commands/automate.md +0 -28
- package/.claude/commands/debug-ui.md +0 -19
- package/.claude/commands/screenshot.md +0 -15
- package/.github/FUNDING.yml +0 -1
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
- package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
- package/.mcp.json +0 -8
- package/DESKTOP_MCP_GUIDE.md +0 -92
- package/SECURITY.md +0 -44
- package/docs/architecture.md +0 -47
- package/install-skills.sh +0 -19
- package/mcp-bridge.ts +0 -271
- package/mcp-desktop.ts +0 -1221
- package/native/macos-bridge/Package.swift +0 -21
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +0 -261
- package/native/macos-bridge/Sources/AppManagement.swift +0 -129
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +0 -242
- package/native/macos-bridge/Sources/ObserverBridge.swift +0 -120
- package/native/macos-bridge/Sources/VisionBridge.swift +0 -80
- package/native/macos-bridge/Sources/main.swift +0 -345
- package/native/windows-bridge/AppManagement.cs +0 -234
- package/native/windows-bridge/InputBridge.cs +0 -436
- package/native/windows-bridge/Program.cs +0 -265
- package/native/windows-bridge/ScreenCapture.cs +0 -329
- package/native/windows-bridge/UIAutomationBridge.cs +0 -571
- package/native/windows-bridge/WindowsBridge.csproj +0 -17
- package/playbooks/devpost.json +0 -186
- package/playbooks/instagram.json +0 -41
- package/playbooks/instagram_v2.json +0 -201
- package/playbooks/x_v1.json +0 -211
- package/scripts/devpost-live-loop.mjs +0 -421
- package/src/logging/timeline-logger.ts +0 -55
- package/src/mcp/server.ts +0 -449
- package/src/memory/recall.ts +0 -191
- package/src/memory/research.ts +0 -146
- package/src/memory/seeds.ts +0 -123
- package/src/memory/session.ts +0 -201
- package/src/memory/store.ts +0 -434
- package/src/memory/types.ts +0 -69
- package/src/native/bridge-client.ts +0 -239
- package/src/runtime/accessibility-adapter.ts +0 -487
- package/src/runtime/app-adapter.ts +0 -169
- package/src/runtime/applescript-adapter.ts +0 -376
- package/src/runtime/ax-role-map.ts +0 -102
- package/src/runtime/browser-adapter.ts +0 -129
- package/src/runtime/cdp-chrome-adapter.ts +0 -676
- package/src/runtime/composite-adapter.ts +0 -274
- package/src/runtime/executor.ts +0 -396
- package/src/runtime/planning-loop.ts +0 -81
- package/src/runtime/service.ts +0 -448
- package/src/runtime/session-manager.ts +0 -50
- package/src/runtime/state-observer.ts +0 -136
- package/src/runtime/vision-adapter.ts +0 -297
- package/src/types.ts +0 -297
- package/tests/bridge-client.test.ts +0 -176
- package/tests/browser-stealth.test.ts +0 -210
- package/tests/composite-adapter.test.ts +0 -64
- package/tests/mcp-server.test.ts +0 -151
- package/tests/memory-recall.test.ts +0 -339
- package/tests/memory-research.test.ts +0 -159
- package/tests/memory-seeds.test.ts +0 -120
- package/tests/memory-store.test.ts +0 -392
- package/tests/types.test.ts +0 -92
- package/tsconfig.check.json +0 -17
- package/tsconfig.json +0 -19
- package/vitest.config.ts +0 -8
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
const POLL_INTERVAL_MS = 200;
|
|
2
|
+
/**
|
|
3
|
+
* Vision-based adapter for apps with poor/no accessibility support.
|
|
4
|
+
* Uses screenshots + OCR to locate elements and CG events to interact.
|
|
5
|
+
*/
|
|
6
|
+
export class VisionAdapter {
|
|
7
|
+
bridge;
|
|
8
|
+
sessions = new Map();
|
|
9
|
+
sessionsByProfile = new Map();
|
|
10
|
+
constructor(bridge) {
|
|
11
|
+
this.bridge = bridge;
|
|
12
|
+
}
|
|
13
|
+
async attach(profile) {
|
|
14
|
+
const existing = this.sessionsByProfile.get(profile);
|
|
15
|
+
if (existing)
|
|
16
|
+
return existing.info;
|
|
17
|
+
await this.bridge.start();
|
|
18
|
+
const frontmost = await this.bridge.call("app.frontmost");
|
|
19
|
+
const info = {
|
|
20
|
+
sessionId: `vision_session_${profile}_${Date.now()}`,
|
|
21
|
+
profile,
|
|
22
|
+
createdAt: new Date().toISOString(),
|
|
23
|
+
adapterType: "vision",
|
|
24
|
+
};
|
|
25
|
+
const state = {
|
|
26
|
+
info,
|
|
27
|
+
pid: frontmost.pid,
|
|
28
|
+
bundleId: frontmost.bundleId,
|
|
29
|
+
appName: frontmost.name,
|
|
30
|
+
};
|
|
31
|
+
this.sessions.set(info.sessionId, state);
|
|
32
|
+
this.sessionsByProfile.set(profile, state);
|
|
33
|
+
return info;
|
|
34
|
+
}
|
|
35
|
+
async getAppContext(sessionId) {
|
|
36
|
+
const state = this.requireSession(sessionId);
|
|
37
|
+
return {
|
|
38
|
+
bundleId: state.bundleId,
|
|
39
|
+
appName: state.appName,
|
|
40
|
+
pid: state.pid,
|
|
41
|
+
windowTitle: state.appName,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
async getPageMeta(sessionId) {
|
|
45
|
+
const ctx = await this.getAppContext(sessionId);
|
|
46
|
+
return {
|
|
47
|
+
url: `app://${ctx.bundleId}`,
|
|
48
|
+
title: ctx.appName,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
async navigate(sessionId, url, _timeoutMs) {
|
|
52
|
+
if (url.startsWith("app://")) {
|
|
53
|
+
const bundleId = url.slice(6);
|
|
54
|
+
const result = await this.bridge.call("app.launch", { bundleId });
|
|
55
|
+
const state = this.requireSession(sessionId);
|
|
56
|
+
state.pid = result.pid;
|
|
57
|
+
state.bundleId = result.bundleId;
|
|
58
|
+
state.appName = result.appName;
|
|
59
|
+
}
|
|
60
|
+
return this.getPageMeta(sessionId);
|
|
61
|
+
}
|
|
62
|
+
async locate(sessionId, target, timeoutMs) {
|
|
63
|
+
const state = this.requireSession(sessionId);
|
|
64
|
+
const deadline = Date.now() + timeoutMs;
|
|
65
|
+
while (Date.now() < deadline) {
|
|
66
|
+
// Take a screenshot
|
|
67
|
+
const screenshotResult = await this.bridge.call("cg.captureScreen", {});
|
|
68
|
+
state.lastScreenshotPath = screenshotResult.path;
|
|
69
|
+
const searchText = this.getSearchText(target);
|
|
70
|
+
if (!searchText) {
|
|
71
|
+
// For coordinate targets, just return coordinates directly
|
|
72
|
+
if (target.type === "coordinates") {
|
|
73
|
+
return {
|
|
74
|
+
handleId: `vision_coords_${target.x}_${target.y}`,
|
|
75
|
+
locatorUsed: "vision:coordinates",
|
|
76
|
+
coordinates: { x: target.x, y: target.y, width: 1, height: 1 },
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
// OCR the screenshot
|
|
82
|
+
const matches = await this.bridge.call("vision.findText", {
|
|
83
|
+
imagePath: screenshotResult.path,
|
|
84
|
+
searchText,
|
|
85
|
+
});
|
|
86
|
+
if (matches.length > 0) {
|
|
87
|
+
const best = matches.reduce((a, b) => (a.confidence > b.confidence ? a : b));
|
|
88
|
+
return {
|
|
89
|
+
handleId: `vision_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
90
|
+
locatorUsed: `vision:text:${searchText}`,
|
|
91
|
+
label: best.text,
|
|
92
|
+
coordinates: best.bounds,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
await sleep(POLL_INTERVAL_MS);
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
async click(_sessionId, element) {
|
|
100
|
+
if (!element.coordinates) {
|
|
101
|
+
throw new Error("Vision adapter requires coordinates to click");
|
|
102
|
+
}
|
|
103
|
+
const cx = element.coordinates.x + element.coordinates.width / 2;
|
|
104
|
+
const cy = element.coordinates.y + element.coordinates.height / 2;
|
|
105
|
+
await this.bridge.call("cg.mouseClick", { x: cx, y: cy });
|
|
106
|
+
}
|
|
107
|
+
async setValue(_sessionId, element, text, clear) {
|
|
108
|
+
// Click to focus
|
|
109
|
+
await this.click(_sessionId, element);
|
|
110
|
+
await sleep(100);
|
|
111
|
+
if (clear) {
|
|
112
|
+
await this.bridge.call("cg.keyCombo", { keys: ["cmd", "a"] });
|
|
113
|
+
await sleep(50);
|
|
114
|
+
}
|
|
115
|
+
await this.bridge.call("cg.typeText", { text });
|
|
116
|
+
}
|
|
117
|
+
async getValue(_sessionId, element) {
|
|
118
|
+
// Vision can't reliably read values; return label if available
|
|
119
|
+
return element.label ?? "";
|
|
120
|
+
}
|
|
121
|
+
async waitFor(sessionId, condition, timeoutMs) {
|
|
122
|
+
const deadline = Date.now() + timeoutMs;
|
|
123
|
+
while (Date.now() < deadline) {
|
|
124
|
+
if (condition.type === "text_appears") {
|
|
125
|
+
const found = await this.locate(sessionId, { type: "text", value: condition.text }, 200);
|
|
126
|
+
if (found)
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
else if (condition.type === "element_exists") {
|
|
130
|
+
const found = await this.locate(sessionId, condition.target, 200);
|
|
131
|
+
if (found)
|
|
132
|
+
return true;
|
|
133
|
+
}
|
|
134
|
+
else if (condition.type === "element_gone") {
|
|
135
|
+
const found = await this.locate(sessionId, condition.target, 200);
|
|
136
|
+
if (!found)
|
|
137
|
+
return true;
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
// Unsupported condition types
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
await sleep(POLL_INTERVAL_MS);
|
|
144
|
+
}
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
async extract(sessionId, _target, format) {
|
|
148
|
+
const state = this.requireSession(sessionId);
|
|
149
|
+
// Take a fresh screenshot and OCR it
|
|
150
|
+
const screenshotResult = await this.bridge.call("cg.captureScreen", {});
|
|
151
|
+
state.lastScreenshotPath = screenshotResult.path;
|
|
152
|
+
const ocrResult = await this.bridge.call("vision.ocr", { imagePath: screenshotResult.path });
|
|
153
|
+
if (format === "text") {
|
|
154
|
+
return ocrResult.text;
|
|
155
|
+
}
|
|
156
|
+
if (format === "json") {
|
|
157
|
+
return ocrResult;
|
|
158
|
+
}
|
|
159
|
+
// table format
|
|
160
|
+
return {
|
|
161
|
+
headers: ["text", "confidence", "x", "y", "width", "height"],
|
|
162
|
+
rows: ocrResult.regions.map((r) => [
|
|
163
|
+
r.text,
|
|
164
|
+
r.confidence,
|
|
165
|
+
r.bounds.x,
|
|
166
|
+
r.bounds.y,
|
|
167
|
+
r.bounds.width,
|
|
168
|
+
r.bounds.height,
|
|
169
|
+
]),
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
async screenshot(_sessionId, region) {
|
|
173
|
+
const result = await this.bridge.call("cg.captureScreen", region ? { region } : {});
|
|
174
|
+
return result.path;
|
|
175
|
+
}
|
|
176
|
+
async keyCombo(_sessionId, keys) {
|
|
177
|
+
await this.bridge.call("cg.keyCombo", { keys });
|
|
178
|
+
}
|
|
179
|
+
async elementTree(_sessionId, _maxDepth, _root) {
|
|
180
|
+
throw new Error("Vision adapter does not support elementTree — use accessibility adapter");
|
|
181
|
+
}
|
|
182
|
+
// ── Private ──
|
|
183
|
+
requireSession(sessionId) {
|
|
184
|
+
const state = this.sessions.get(sessionId);
|
|
185
|
+
if (!state)
|
|
186
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
187
|
+
return state;
|
|
188
|
+
}
|
|
189
|
+
getSearchText(target) {
|
|
190
|
+
switch (target.type) {
|
|
191
|
+
case "text":
|
|
192
|
+
return target.value;
|
|
193
|
+
case "role":
|
|
194
|
+
return target.name;
|
|
195
|
+
case "selector":
|
|
196
|
+
return target.value;
|
|
197
|
+
case "ax_attribute":
|
|
198
|
+
return target.value;
|
|
199
|
+
case "image":
|
|
200
|
+
case "coordinates":
|
|
201
|
+
case "ax_path":
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
function sleep(ms) {
|
|
207
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
208
|
+
}
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Codex Monitor Daemon — runs as a standalone background process.
|
|
4
|
+
*
|
|
5
|
+
* Survives Claude Code restarts. Writes state to ~/.screenhand/monitor/
|
|
6
|
+
* so MCP tools can read status and enqueue tasks via the filesystem.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* npx tsx scripts/codex-monitor-daemon.ts --pid 56966
|
|
10
|
+
* npx tsx scripts/codex-monitor-daemon.ts --pid 56966 --window 5015 --poll 5000
|
|
11
|
+
*
|
|
12
|
+
* State files:
|
|
13
|
+
* ~/.screenhand/monitor/state.json — terminal states + status
|
|
14
|
+
* ~/.screenhand/monitor/tasks.json — task queue
|
|
15
|
+
* ~/.screenhand/monitor/daemon.pid — PID of this process
|
|
16
|
+
* ~/.screenhand/monitor/daemon.log — log output
|
|
17
|
+
*/
|
|
18
|
+
import path from "node:path";
|
|
19
|
+
import fs from "node:fs";
|
|
20
|
+
import os from "node:os";
|
|
21
|
+
import { BridgeClient } from "../src/native/bridge-client.js";
|
|
22
|
+
// ── Config from CLI args ──
|
|
23
|
+
const args = process.argv.slice(2);
|
|
24
|
+
function getArg(name, fallback) {
|
|
25
|
+
const idx = args.indexOf("--" + name);
|
|
26
|
+
if (idx === -1)
|
|
27
|
+
return fallback;
|
|
28
|
+
return args[idx + 1] ?? fallback;
|
|
29
|
+
}
|
|
30
|
+
const VSCODE_PID = Number(getArg("pid", "0"));
|
|
31
|
+
const WINDOW_ID = getArg("window") ? Number(getArg("window")) : undefined;
|
|
32
|
+
const POLL_MS = Number(getArg("poll", "3000"));
|
|
33
|
+
const LABEL = getArg("label", "Terminal") ?? "Terminal";
|
|
34
|
+
const AUTO_ASSIGN = getArg("no-auto-assign") === undefined;
|
|
35
|
+
if (!VSCODE_PID) {
|
|
36
|
+
console.error("Usage: codex-monitor-daemon.ts --pid <vscode_pid> [--window <id>] [--poll <ms>]");
|
|
37
|
+
process.exit(1);
|
|
38
|
+
}
|
|
39
|
+
// ── State directory ──
|
|
40
|
+
const STATE_DIR = path.join(os.homedir(), ".screenhand", "monitor");
|
|
41
|
+
const STATE_FILE = path.join(STATE_DIR, "state.json");
|
|
42
|
+
const TASKS_FILE = path.join(STATE_DIR, "tasks.json");
|
|
43
|
+
const PID_FILE = path.join(STATE_DIR, "daemon.pid");
|
|
44
|
+
const LOG_FILE = path.join(STATE_DIR, "daemon.log");
|
|
45
|
+
fs.mkdirSync(STATE_DIR, { recursive: true });
|
|
46
|
+
// ── Logging ──
|
|
47
|
+
const logStream = fs.createWriteStream(LOG_FILE, { flags: "a" });
|
|
48
|
+
function log(msg) {
|
|
49
|
+
const line = `[${new Date().toISOString()}] ${msg}`;
|
|
50
|
+
logStream.write(line + "\n");
|
|
51
|
+
// Also write to stderr for initial debugging
|
|
52
|
+
if (!daemonized)
|
|
53
|
+
process.stderr.write(line + "\n");
|
|
54
|
+
}
|
|
55
|
+
let daemonized = false;
|
|
56
|
+
// ── Status detection patterns ──
|
|
57
|
+
const RUNNING_PATTERNS = [
|
|
58
|
+
"thinking", "working", "generating", "analyzing", "reading",
|
|
59
|
+
"writing", "searching", "running", "executing", "...",
|
|
60
|
+
"in progress",
|
|
61
|
+
];
|
|
62
|
+
const IDLE_PATTERNS = [
|
|
63
|
+
"codex>", "> ", "$ ", "done", "complete", "finished",
|
|
64
|
+
"task completed", "all done", "ready", "waiting for input",
|
|
65
|
+
"what would you like", "how can i help",
|
|
66
|
+
];
|
|
67
|
+
const ERROR_PATTERNS = [
|
|
68
|
+
"error:", "failed", "exception", "traceback", "panic:",
|
|
69
|
+
"fatal", "cannot", "could not",
|
|
70
|
+
];
|
|
71
|
+
// ── Bridge setup ──
|
|
72
|
+
const scriptDir = import.meta.dirname ?? path.dirname(new URL(import.meta.url).pathname);
|
|
73
|
+
const projectRoot = path.resolve(scriptDir, "..");
|
|
74
|
+
const bridgePath = process.platform === "win32"
|
|
75
|
+
? path.resolve(projectRoot, "native/windows-bridge/bin/Release/net8.0-windows/windows-bridge.exe")
|
|
76
|
+
: path.resolve(projectRoot, "native/macos-bridge/.build/release/macos-bridge");
|
|
77
|
+
const bridge = new BridgeClient(bridgePath);
|
|
78
|
+
// ── State ──
|
|
79
|
+
let resolvedWindowId = WINDOW_ID;
|
|
80
|
+
const terminal = {
|
|
81
|
+
id: `term_${VSCODE_PID}_${Date.now().toString(36)}`,
|
|
82
|
+
vscodePid: VSCODE_PID,
|
|
83
|
+
...(WINDOW_ID != null ? { windowId: WINDOW_ID } : {}),
|
|
84
|
+
label: LABEL,
|
|
85
|
+
status: "unknown",
|
|
86
|
+
lastOutput: "",
|
|
87
|
+
lastTask: null,
|
|
88
|
+
lastPollAt: new Date().toISOString(),
|
|
89
|
+
tasksCompleted: 0,
|
|
90
|
+
taskHistory: [],
|
|
91
|
+
};
|
|
92
|
+
let stopped = false;
|
|
93
|
+
// ── Filesystem I/O ──
|
|
94
|
+
function writeState() {
|
|
95
|
+
const state = {
|
|
96
|
+
pid: process.pid,
|
|
97
|
+
startedAt: new Date().toISOString(),
|
|
98
|
+
terminals: [terminal],
|
|
99
|
+
running: !stopped,
|
|
100
|
+
};
|
|
101
|
+
fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
|
|
102
|
+
}
|
|
103
|
+
function readTasks() {
|
|
104
|
+
try {
|
|
105
|
+
if (!fs.existsSync(TASKS_FILE))
|
|
106
|
+
return [];
|
|
107
|
+
return JSON.parse(fs.readFileSync(TASKS_FILE, "utf-8"));
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
return [];
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
function writeTasks(tasks) {
|
|
114
|
+
fs.writeFileSync(TASKS_FILE, JSON.stringify(tasks, null, 2));
|
|
115
|
+
}
|
|
116
|
+
// ── Detection logic ──
|
|
117
|
+
function detectStatus(text) {
|
|
118
|
+
const lower = text.toLowerCase();
|
|
119
|
+
const lines = text.split("\n").filter((l) => l.trim().length > 0);
|
|
120
|
+
const lastLine = (lines[lines.length - 1] ?? "").trim().toLowerCase();
|
|
121
|
+
// Check errors in last 5 lines
|
|
122
|
+
const lastFew = lines.slice(-5).join("\n").toLowerCase();
|
|
123
|
+
for (const p of ERROR_PATTERNS) {
|
|
124
|
+
if (lastFew.includes(p))
|
|
125
|
+
return "error";
|
|
126
|
+
}
|
|
127
|
+
// Check idle (prompt on last line)
|
|
128
|
+
for (const p of IDLE_PATTERNS) {
|
|
129
|
+
if (lastLine.includes(p) || lastLine.endsWith(p.trim()))
|
|
130
|
+
return "idle";
|
|
131
|
+
}
|
|
132
|
+
// Check running
|
|
133
|
+
for (const p of RUNNING_PATTERNS) {
|
|
134
|
+
if (lower.includes(p))
|
|
135
|
+
return "running";
|
|
136
|
+
}
|
|
137
|
+
return "unknown";
|
|
138
|
+
}
|
|
139
|
+
// ── Terminal reading ──
|
|
140
|
+
async function readTerminalContent() {
|
|
141
|
+
try {
|
|
142
|
+
// Find window if not set
|
|
143
|
+
if (!resolvedWindowId) {
|
|
144
|
+
const wins = await bridge.call("app.windows");
|
|
145
|
+
// Find VS Code windows, pick the largest one (skip title bar / small panels)
|
|
146
|
+
const vscodeWins = wins.filter((w) => w.pid === VSCODE_PID || w.bundleId === "com.microsoft.VSCode");
|
|
147
|
+
if (vscodeWins.length === 0) {
|
|
148
|
+
log("VS Code window not found");
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
const largest = vscodeWins.reduce((a, b) => {
|
|
152
|
+
const areaA = (a.bounds?.width ?? 0) * (a.bounds?.height ?? 0);
|
|
153
|
+
const areaB = (b.bounds?.width ?? 0) * (b.bounds?.height ?? 0);
|
|
154
|
+
return areaA >= areaB ? a : b;
|
|
155
|
+
});
|
|
156
|
+
resolvedWindowId = largest.windowId;
|
|
157
|
+
if (resolvedWindowId !== undefined) {
|
|
158
|
+
terminal.windowId = resolvedWindowId;
|
|
159
|
+
}
|
|
160
|
+
log(`Auto-detected window ${resolvedWindowId} (${largest.bounds?.width}x${largest.bounds?.height})`);
|
|
161
|
+
}
|
|
162
|
+
// Screenshot + OCR
|
|
163
|
+
const shot = await bridge.call("cg.captureWindow", {
|
|
164
|
+
windowId: resolvedWindowId,
|
|
165
|
+
});
|
|
166
|
+
const ocr = await bridge.call("vision.ocr", {
|
|
167
|
+
imagePath: shot.path,
|
|
168
|
+
});
|
|
169
|
+
return ocr.text;
|
|
170
|
+
}
|
|
171
|
+
catch (err) {
|
|
172
|
+
log(`OCR failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
173
|
+
// Fallback: AX tree
|
|
174
|
+
try {
|
|
175
|
+
const tree = await bridge.call("ax.getElementTree", {
|
|
176
|
+
pid: VSCODE_PID,
|
|
177
|
+
maxDepth: 6,
|
|
178
|
+
});
|
|
179
|
+
return extractTerminalText(tree);
|
|
180
|
+
}
|
|
181
|
+
catch {
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
function extractTerminalText(node, depth = 0) {
|
|
187
|
+
if (depth > 8)
|
|
188
|
+
return null;
|
|
189
|
+
const role = (node.role || "").toLowerCase();
|
|
190
|
+
const title = (node.title || "").toLowerCase();
|
|
191
|
+
if ((role.includes("terminal") || title.includes("terminal")) &&
|
|
192
|
+
node.value) {
|
|
193
|
+
return node.value;
|
|
194
|
+
}
|
|
195
|
+
if (node.children) {
|
|
196
|
+
for (const child of node.children) {
|
|
197
|
+
const found = extractTerminalText(child, depth + 1);
|
|
198
|
+
if (found)
|
|
199
|
+
return found;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
// ── Task assignment ──
|
|
205
|
+
async function typeIntoTerminal(text) {
|
|
206
|
+
try {
|
|
207
|
+
await bridge.call("app.focus", { bundleId: "com.microsoft.VSCode" });
|
|
208
|
+
await sleep(300);
|
|
209
|
+
await bridge.call("cg.typeText", { text });
|
|
210
|
+
await sleep(100);
|
|
211
|
+
await bridge.call("cg.keyCombo", { keys: ["enter"] });
|
|
212
|
+
return true;
|
|
213
|
+
}
|
|
214
|
+
catch (err) {
|
|
215
|
+
log(`Failed to type: ${err instanceof Error ? err.message : String(err)}`);
|
|
216
|
+
return false;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
async function tryAssignTask() {
|
|
220
|
+
if (!AUTO_ASSIGN || terminal.status !== "idle")
|
|
221
|
+
return;
|
|
222
|
+
const tasks = readTasks();
|
|
223
|
+
const next = tasks.find((t) => t.status === "queued" && (t.terminalId === null || t.terminalId === terminal.id));
|
|
224
|
+
if (!next)
|
|
225
|
+
return;
|
|
226
|
+
log(`Assigning task: "${next.prompt.slice(0, 60)}"`);
|
|
227
|
+
next.status = "assigned";
|
|
228
|
+
next.terminalId = terminal.id;
|
|
229
|
+
next.assignedAt = new Date().toISOString();
|
|
230
|
+
writeTasks(tasks);
|
|
231
|
+
terminal.lastTask = next.prompt;
|
|
232
|
+
const success = await typeIntoTerminal(next.prompt);
|
|
233
|
+
if (success) {
|
|
234
|
+
next.status = "running";
|
|
235
|
+
terminal.status = "running";
|
|
236
|
+
log(`Task running: ${next.id}`);
|
|
237
|
+
}
|
|
238
|
+
else {
|
|
239
|
+
next.status = "failed";
|
|
240
|
+
next.result = "Failed to type into terminal";
|
|
241
|
+
log(`Task failed to assign: ${next.id}`);
|
|
242
|
+
}
|
|
243
|
+
writeTasks(tasks);
|
|
244
|
+
}
|
|
245
|
+
function handleIdleTransition() {
|
|
246
|
+
// Complete current task
|
|
247
|
+
if (terminal.lastTask) {
|
|
248
|
+
terminal.taskHistory.push({
|
|
249
|
+
task: terminal.lastTask,
|
|
250
|
+
completedAt: new Date().toISOString(),
|
|
251
|
+
});
|
|
252
|
+
terminal.tasksCompleted++;
|
|
253
|
+
// Update task in queue
|
|
254
|
+
const tasks = readTasks();
|
|
255
|
+
const running = tasks.find((t) => t.status === "running" && t.terminalId === terminal.id);
|
|
256
|
+
if (running) {
|
|
257
|
+
running.status = "completed";
|
|
258
|
+
running.completedAt = new Date().toISOString();
|
|
259
|
+
running.result = terminal.lastOutput.split("\n").slice(-20).join("\n");
|
|
260
|
+
writeTasks(tasks);
|
|
261
|
+
}
|
|
262
|
+
terminal.lastTask = null;
|
|
263
|
+
log(`Task completed (${terminal.tasksCompleted} total)`);
|
|
264
|
+
}
|
|
265
|
+
// Delay then try assign
|
|
266
|
+
setTimeout(() => tryAssignTask(), 2000);
|
|
267
|
+
}
|
|
268
|
+
// ── Main loop ──
|
|
269
|
+
async function poll() {
|
|
270
|
+
const output = await readTerminalContent();
|
|
271
|
+
if (output === null)
|
|
272
|
+
return;
|
|
273
|
+
const oldStatus = terminal.status;
|
|
274
|
+
terminal.lastOutput = output;
|
|
275
|
+
terminal.lastPollAt = new Date().toISOString();
|
|
276
|
+
const lastLines = output.split("\n").slice(-15).join("\n");
|
|
277
|
+
terminal.status = detectStatus(lastLines);
|
|
278
|
+
if (oldStatus !== terminal.status) {
|
|
279
|
+
log(`Status: ${oldStatus} -> ${terminal.status}`);
|
|
280
|
+
if (terminal.status === "idle" && (oldStatus === "running" || oldStatus === "unknown")) {
|
|
281
|
+
handleIdleTransition();
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
writeState();
|
|
285
|
+
}
|
|
286
|
+
async function main() {
|
|
287
|
+
// Write PID file
|
|
288
|
+
fs.writeFileSync(PID_FILE, String(process.pid));
|
|
289
|
+
// Initialize tasks file if missing
|
|
290
|
+
if (!fs.existsSync(TASKS_FILE)) {
|
|
291
|
+
writeTasks([]);
|
|
292
|
+
}
|
|
293
|
+
log(`Daemon started (pid=${process.pid})`);
|
|
294
|
+
log(`Watching VS Code pid=${VSCODE_PID} window=${WINDOW_ID ?? "auto"} poll=${POLL_MS}ms`);
|
|
295
|
+
await bridge.start();
|
|
296
|
+
log("Bridge started");
|
|
297
|
+
writeState();
|
|
298
|
+
// Poll loop
|
|
299
|
+
while (!stopped) {
|
|
300
|
+
try {
|
|
301
|
+
await poll();
|
|
302
|
+
}
|
|
303
|
+
catch (err) {
|
|
304
|
+
log(`Poll error: ${err instanceof Error ? err.message : String(err)}`);
|
|
305
|
+
}
|
|
306
|
+
await sleep(POLL_MS);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
function sleep(ms) {
|
|
310
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
311
|
+
}
|
|
312
|
+
// ── Graceful shutdown ──
|
|
313
|
+
process.on("SIGINT", shutdown);
|
|
314
|
+
process.on("SIGTERM", shutdown);
|
|
315
|
+
async function shutdown() {
|
|
316
|
+
if (stopped)
|
|
317
|
+
return;
|
|
318
|
+
stopped = true;
|
|
319
|
+
log("Shutting down...");
|
|
320
|
+
writeState();
|
|
321
|
+
try {
|
|
322
|
+
fs.unlinkSync(PID_FILE);
|
|
323
|
+
}
|
|
324
|
+
catch { /* ignore */ }
|
|
325
|
+
try {
|
|
326
|
+
await bridge.stop();
|
|
327
|
+
}
|
|
328
|
+
catch { /* ignore */ }
|
|
329
|
+
logStream.end();
|
|
330
|
+
process.exit(0);
|
|
331
|
+
}
|
|
332
|
+
main().catch((err) => {
|
|
333
|
+
log(`Fatal: ${err instanceof Error ? err.message : String(err)}`);
|
|
334
|
+
process.exit(1);
|
|
335
|
+
});
|