screenhand 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * ScreenHand MCP Server — stdio entry point.
4
+ *
5
+ * Usage in claude_desktop_config.json or openclaw.json:
6
+ *
7
+ * {
8
+ * "mcpServers": {
9
+ * "screenhand": {
10
+ * "command": "npx",
11
+ * "args": ["tsx", "/path/to/screenhand/src/mcp-entry.ts"]
12
+ * }
13
+ * }
14
+ * }
15
+ *
16
+ * Environment variables:
17
+ * SCREENHAND_ADAPTER - "accessibility" (default), "composite", "cdp", "placeholder"
18
+ * SCREENHAND_HEADLESS - "1" to run browser in headless mode
19
+ */
20
+ import { PlaceholderAppAdapter } from "./runtime/app-adapter.js";
21
+ import { CdpChromeAdapter } from "./runtime/cdp-chrome-adapter.js";
22
+ import { TimelineLogger } from "./logging/timeline-logger.js";
23
+ import { AutomationRuntimeService } from "./runtime/service.js";
24
+ import { startMcpStdioServer } from "./mcp/mcp-stdio-server.js";
25
+ async function createAdapter() {
26
+ const adapterType = process.env.SCREENHAND_ADAPTER ?? process.env.AUTOMATOR_ADAPTER ?? "accessibility";
27
+ switch (adapterType) {
28
+ case "placeholder":
29
+ return new PlaceholderAppAdapter();
30
+ case "cdp":
31
+ return new CdpChromeAdapter({
32
+ headless: process.env.SCREENHAND_HEADLESS === "1" || process.env.AUTOMATOR_HEADLESS === "1",
33
+ });
34
+ case "composite": {
35
+ const { BridgeClient } = await import("./native/bridge-client.js");
36
+ const { CompositeAdapter } = await import("./runtime/composite-adapter.js");
37
+ const bridge = new BridgeClient();
38
+ return new CompositeAdapter(bridge, {
39
+ headless: process.env.SCREENHAND_HEADLESS === "1" || process.env.AUTOMATOR_HEADLESS === "1",
40
+ });
41
+ }
42
+ case "accessibility":
43
+ default: {
44
+ const { BridgeClient } = await import("./native/bridge-client.js");
45
+ const { AccessibilityAdapter } = await import("./runtime/accessibility-adapter.js");
46
+ const bridge = new BridgeClient();
47
+ return new AccessibilityAdapter(bridge);
48
+ }
49
+ }
50
+ }
51
+ try {
52
+ const adapter = await createAdapter();
53
+ const logger = new TimelineLogger();
54
+ const runtime = new AutomationRuntimeService(adapter, logger);
55
+ process.stderr.write("ScreenHand MCP server starting...\n");
56
+ await startMcpStdioServer(runtime);
57
+ process.stderr.write("ScreenHand MCP server connected.\n");
58
+ }
59
+ catch (e) {
60
+ process.stderr.write(`ScreenHand startup error: ${e instanceof Error ? e.message : String(e)}\n`);
61
+ process.exit(1);
62
+ }
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Learning Memory — Recall engine (in-memory)
3
+ *
4
+ * All searches run against cached data — no disk IO.
5
+ * Provides fast methods for the interceptor to call on every tool invocation.
6
+ */
7
+ import { MemoryStore } from "./store.js";
8
+ export class RecallEngine {
9
+ store;
10
+ constructor(store) {
11
+ this.store = store;
12
+ }
13
+ /**
14
+ * Find strategies matching a task description (~0ms, in-memory).
15
+ * Strategies with high fail rates are penalized.
16
+ */
17
+ recallStrategies(query, limit = 5) {
18
+ const strategies = this.store.readStrategies();
19
+ if (strategies.length === 0)
20
+ return [];
21
+ const queryTokens = tokenize(query);
22
+ if (queryTokens.length === 0)
23
+ return [];
24
+ const scored = strategies.map((s) => {
25
+ const targetTokens = new Set([
26
+ ...tokenize(s.task),
27
+ ...s.tags,
28
+ ...s.steps.map((step) => step.tool),
29
+ ...s.steps.flatMap((step) => Object.values(step.params)
30
+ .filter((v) => typeof v === "string")
31
+ .flatMap(tokenize)),
32
+ ]);
33
+ let matches = 0;
34
+ for (const qt of queryTokens) {
35
+ for (const tt of targetTokens) {
36
+ if (tt.includes(qt) || qt.includes(tt)) {
37
+ matches++;
38
+ break;
39
+ }
40
+ }
41
+ }
42
+ const relevance = matches / queryTokens.length;
43
+ const ageMs = Date.now() - new Date(s.lastUsed).getTime();
44
+ const ageDays = ageMs / (1000 * 60 * 60 * 24);
45
+ const recency = Math.max(0.5, 1.0 - ageDays / 365);
46
+ const successBoost = 1 + Math.log2(Math.max(1, s.successCount)) * 0.1;
47
+ // Penalty for strategies that have failed — reduces score proportionally
48
+ const failCount = s.failCount ?? 0;
49
+ const totalAttempts = s.successCount + failCount;
50
+ const reliabilityPenalty = totalAttempts > 0
51
+ ? s.successCount / totalAttempts
52
+ : 1;
53
+ const score = relevance * recency * successBoost * reliabilityPenalty;
54
+ return { ...s, score };
55
+ });
56
+ return scored
57
+ .filter((s) => s.score > 0)
58
+ .sort((a, b) => b.score - a.score)
59
+ .slice(0, limit);
60
+ }
61
+ /**
62
+ * O(1) exact match by tool sequence fingerprint.
63
+ * Returns the strategy if found and it has a positive reliability score.
64
+ */
65
+ recallByFingerprint(tools) {
66
+ const fp = MemoryStore.makeFingerprint(tools);
67
+ const strategy = this.store.lookupByFingerprint(fp);
68
+ if (!strategy)
69
+ return null;
70
+ // Skip strategies that fail more than they succeed
71
+ const failCount = strategy.failCount ?? 0;
72
+ if (failCount > strategy.successCount)
73
+ return null;
74
+ return strategy;
75
+ }
76
+ /**
77
+ * Quick error lookup for a tool — used by interceptor on every call (~0ms).
78
+ * Returns the most relevant error pattern or null.
79
+ */
80
+ quickErrorCheck(tool) {
81
+ const errors = this.store.readErrors();
82
+ let best = null;
83
+ for (const e of errors) {
84
+ if (e.tool === tool && e.resolution) {
85
+ if (!best || e.occurrences > best.occurrences)
86
+ best = e;
87
+ }
88
+ }
89
+ return best;
90
+ }
91
+ /**
92
+ * Quick strategy hint for a tool sequence — used by interceptor.
93
+ * Tries fingerprint prefix match first (O(1)), then falls back to scan.
94
+ * Skips unreliable strategies (failCount > successCount).
95
+ */
96
+ quickStrategyHint(recentTools) {
97
+ if (recentTools.length === 0)
98
+ return null;
99
+ const strategies = this.store.readStrategies();
100
+ for (const s of strategies) {
101
+ if (s.steps.length <= recentTools.length)
102
+ continue;
103
+ // Skip unreliable strategies
104
+ const failCount = s.failCount ?? 0;
105
+ if (failCount > s.successCount)
106
+ continue;
107
+ const strategyToolPrefix = s.steps.slice(0, recentTools.length).map((st) => st.tool);
108
+ const matches = recentTools.every((t, i) => t === strategyToolPrefix[i]);
109
+ if (matches) {
110
+ return {
111
+ strategy: s,
112
+ nextStep: s.steps[recentTools.length],
113
+ fingerprint: s.fingerprint ?? MemoryStore.makeFingerprint(s.steps.map((st) => st.tool)),
114
+ };
115
+ }
116
+ }
117
+ return null;
118
+ }
119
+ /** Find error patterns for a specific tool or all tools */
120
+ recallErrors(tool, params) {
121
+ const errors = this.store.readErrors();
122
+ if (!tool)
123
+ return errors;
124
+ let filtered = errors.filter((e) => e.tool === tool);
125
+ if (params && filtered.length > 1) {
126
+ const paramStr = JSON.stringify(params).toLowerCase();
127
+ filtered.sort((a, b) => {
128
+ const aScore = stringSimilarity(paramStr, JSON.stringify(a.params).toLowerCase());
129
+ const bScore = stringSimilarity(paramStr, JSON.stringify(b.params).toLowerCase());
130
+ return bScore - aScore;
131
+ });
132
+ }
133
+ return filtered;
134
+ }
135
+ }
136
+ /** Tokenize a string into lowercase keywords (3+ chars) */
137
+ function tokenize(text) {
138
+ return text
139
+ .toLowerCase()
140
+ .split(/[\W_]+/)
141
+ .filter((w) => w.length >= 3);
142
+ }
143
+ /** Simple string similarity: shared character bigrams / total bigrams */
144
+ function stringSimilarity(a, b) {
145
+ if (a === b)
146
+ return 1;
147
+ if (a.length < 2 || b.length < 2)
148
+ return 0;
149
+ const bigramsA = new Set();
150
+ for (let i = 0; i < a.length - 1; i++)
151
+ bigramsA.add(a.slice(i, i + 2));
152
+ let matches = 0;
153
+ let total = 0;
154
+ for (let i = 0; i < b.length - 1; i++) {
155
+ total++;
156
+ if (bigramsA.has(b.slice(i, i + 2)))
157
+ matches++;
158
+ }
159
+ return total > 0 ? matches / total : 0;
160
+ }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Background web research — fire-and-forget resolution lookup.
3
+ *
4
+ * When a tool fails and no resolution exists, this searches for a fix
5
+ * in the background (non-blocking) and saves it for next time.
6
+ *
7
+ * Resolution paths:
8
+ * 1. Claude API (haiku) — if ANTHROPIC_API_KEY is set
9
+ * 2. DuckDuckGo instant answer — free fallback, no auth
10
+ */
11
+ export function backgroundResearch(store, tool, params, errorMessage) {
12
+ // Fire-and-forget — never blocks, never throws
13
+ doResearch(store, tool, params, errorMessage).catch(() => { });
14
+ }
15
+ async function doResearch(store, tool, params, errorMessage) {
16
+ const query = `macOS automation: "${tool}" failed with "${errorMessage.slice(0, 200)}"`;
17
+ let resolution = null;
18
+ const apiKey = process.env["ANTHROPIC_API_KEY"];
19
+ if (apiKey) {
20
+ resolution = await tryClaudeAPI(apiKey, query);
21
+ }
22
+ if (!resolution) {
23
+ resolution = await tryDuckDuckGo(query);
24
+ }
25
+ if (!resolution)
26
+ return;
27
+ // Save resolution to error cache
28
+ store.appendError({
29
+ id: "err_research_" + Date.now().toString(36),
30
+ tool,
31
+ params,
32
+ error: errorMessage,
33
+ resolution,
34
+ occurrences: 1,
35
+ lastSeen: new Date().toISOString(),
36
+ });
37
+ // Save as a reusable strategy
38
+ store.appendStrategy({
39
+ id: "str_research_" + Date.now().toString(36),
40
+ task: `Fix: ${tool} — ${errorMessage.slice(0, 100)}`,
41
+ steps: [{ tool, params }],
42
+ totalDurationMs: 0,
43
+ successCount: 1,
44
+ failCount: 0,
45
+ lastUsed: new Date().toISOString(),
46
+ tags: [tool, "research", "fix"],
47
+ fingerprint: "",
48
+ });
49
+ }
50
+ async function tryClaudeAPI(apiKey, query) {
51
+ try {
52
+ const controller = new AbortController();
53
+ const timeout = setTimeout(() => controller.abort(), 10000);
54
+ const response = await fetch("https://api.anthropic.com/v1/messages", {
55
+ method: "POST",
56
+ headers: {
57
+ "Content-Type": "application/json",
58
+ "x-api-key": apiKey,
59
+ "anthropic-version": "2023-06-01",
60
+ },
61
+ body: JSON.stringify({
62
+ model: "claude-haiku-4-5-20251001",
63
+ max_tokens: 200,
64
+ messages: [{
65
+ role: "user",
66
+ content: `You are a macOS automation expert. Give a brief fix (1-2 sentences) for this error:\n\n${query}`,
67
+ }],
68
+ }),
69
+ signal: controller.signal,
70
+ });
71
+ clearTimeout(timeout);
72
+ if (!response.ok)
73
+ return null;
74
+ const data = await response.json();
75
+ const text = data.content?.[0]?.text;
76
+ return text && text.length > 10 ? text.trim() : null;
77
+ }
78
+ catch {
79
+ return null;
80
+ }
81
+ }
82
+ async function tryDuckDuckGo(query) {
83
+ try {
84
+ const controller = new AbortController();
85
+ const timeout = setTimeout(() => controller.abort(), 5000);
86
+ const url = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1`;
87
+ const response = await fetch(url, { signal: controller.signal });
88
+ clearTimeout(timeout);
89
+ if (!response.ok)
90
+ return null;
91
+ const data = await response.json();
92
+ const text = data.AbstractText || data.Abstract || data.Answer;
93
+ return text && text.length > 10 ? text.trim() : null;
94
+ }
95
+ catch {
96
+ return null;
97
+ }
98
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Predefined seed strategies — common macOS desktop workflows.
3
+ * Loaded on first boot so the memory system has knowledge from day one.
4
+ */
5
+ let seedCounter = 0;
6
+ function makeFingerprint(tools) {
7
+ return tools.join("→");
8
+ }
9
+ function seed(task, steps, tags) {
10
+ seedCounter++;
11
+ return {
12
+ id: `seed_${String(seedCounter).padStart(3, "0")}`,
13
+ task,
14
+ steps,
15
+ totalDurationMs: 0,
16
+ successCount: 10,
17
+ failCount: 0,
18
+ lastUsed: new Date().toISOString(),
19
+ tags,
20
+ fingerprint: makeFingerprint(steps.map((s) => s.tool)),
21
+ };
22
+ }
23
+ export const SEED_STRATEGIES = [
24
+ // 1. Take a photo with Photo Booth
25
+ seed("Take a photo with Photo Booth", [
26
+ { tool: "launch", params: { bundleId: "com.apple.PhotoBooth" } },
27
+ { tool: "ui_press", params: { title: "Take Photo" } },
28
+ ], ["photo", "booth", "camera"]),
29
+ // 2. Open a URL in Chrome
30
+ seed("Open a URL in Chrome", [
31
+ { tool: "launch", params: { bundleId: "com.google.Chrome" } },
32
+ { tool: "browser_navigate", params: { url: "" } },
33
+ ], ["chrome", "browse", "url"]),
34
+ // 3. Save current document
35
+ seed("Save current document", [
36
+ { tool: "focus", params: { bundleId: "" } },
37
+ { tool: "key", params: { combo: "cmd+s" } },
38
+ ], ["save", "document"]),
39
+ // 4. Copy from one app and paste into another
40
+ seed("Copy from one app and paste into another", [
41
+ { tool: "focus", params: { bundleId: "" } },
42
+ { tool: "key", params: { combo: "cmd+c" } },
43
+ { tool: "focus", params: { bundleId: "" } },
44
+ { tool: "key", params: { combo: "cmd+v" } },
45
+ ], ["copy", "paste"]),
46
+ // 5. Navigate to a folder in Finder
47
+ seed("Navigate to a folder in Finder", [
48
+ { tool: "focus", params: { bundleId: "com.apple.finder" } },
49
+ { tool: "key", params: { combo: "cmd+shift+g" } },
50
+ { tool: "type_text", params: { text: "" } },
51
+ ], ["finder", "folder", "navigate"]),
52
+ // 6. Create a new folder in Finder
53
+ seed("Create a new folder in Finder", [
54
+ { tool: "focus", params: { bundleId: "com.apple.finder" } },
55
+ { tool: "key", params: { combo: "cmd+shift+n" } },
56
+ { tool: "type_text", params: { text: "" } },
57
+ ], ["finder", "folder", "create"]),
58
+ // 7. Close the current window
59
+ seed("Close the current window", [
60
+ { tool: "focus", params: { bundleId: "" } },
61
+ { tool: "key", params: { combo: "cmd+w" } },
62
+ ], ["close", "window"]),
63
+ // 8. Select all and copy
64
+ seed("Select all content and copy", [
65
+ { tool: "focus", params: { bundleId: "" } },
66
+ { tool: "key", params: { combo: "cmd+a" } },
67
+ { tool: "key", params: { combo: "cmd+c" } },
68
+ ], ["select", "all", "copy"]),
69
+ // 9. List running apps
70
+ seed("List all running applications", [
71
+ { tool: "apps", params: {} },
72
+ ], ["apps", "list", "running"]),
73
+ // 10. Inspect app UI tree
74
+ seed("Inspect an app's UI element tree", [
75
+ { tool: "focus", params: { bundleId: "" } },
76
+ { tool: "ui_tree", params: { pid: 0 } },
77
+ ], ["inspect", "tree", "accessibility"]),
78
+ // 11. Open a new tab in Chrome and navigate
79
+ seed("Open a new Chrome tab and navigate to URL", [
80
+ { tool: "focus", params: { bundleId: "com.google.Chrome" } },
81
+ { tool: "key", params: { combo: "cmd+t" } },
82
+ { tool: "browser_navigate", params: { url: "" } },
83
+ ], ["chrome", "tab", "new"]),
84
+ // 12. Export as PDF via menu
85
+ seed("Export document as PDF", [
86
+ { tool: "focus", params: { bundleId: "" } },
87
+ { tool: "menu_click", params: { menuPath: "File/Export as PDF" } },
88
+ ], ["export", "pdf"]),
89
+ ];
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Learning Memory — Session tracking with auto-save
3
+ *
4
+ * Tracks a rolling buffer of actions within a "task session".
5
+ * Auto-saves strategies when a successful sequence is detected:
6
+ * - 3+ consecutive successes followed by a gap (>60s) or session end
7
+ * - Or explicit endSession() call
8
+ */
9
+ import { MemoryStore } from "./store.js";
10
+ const SESSION_GAP_MS = 60_000; // 60s gap = new session
11
+ const MAX_BUFFER_SIZE = 100;
12
+ const MIN_AUTO_SAVE_STEPS = 3; // Need at least 3 successful steps to auto-save
13
+ export class SessionTracker {
14
+ store;
15
+ sessionId;
16
+ taskDescription = null;
17
+ buffer = [];
18
+ lastActionTime = 0;
19
+ constructor(store) {
20
+ this.store = store;
21
+ this.sessionId = SessionTracker.generateId();
22
+ }
23
+ static generateId() {
24
+ return "s_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
25
+ }
26
+ /** Start (or restart) a named task session */
27
+ startSession(taskDescription) {
28
+ // Auto-save previous session if it had successful actions
29
+ this.tryAutoSave();
30
+ this.sessionId = SessionTracker.generateId();
31
+ this.taskDescription = taskDescription ?? null;
32
+ this.buffer = [];
33
+ this.lastActionTime = Date.now();
34
+ return this.sessionId;
35
+ }
36
+ /** Get the current session ID, auto-rotating if stale */
37
+ getSessionId() {
38
+ const now = Date.now();
39
+ if (this.lastActionTime > 0 && now - this.lastActionTime > SESSION_GAP_MS) {
40
+ // Session gap detected — auto-save previous sequence then start fresh
41
+ this.tryAutoSave();
42
+ this.sessionId = SessionTracker.generateId();
43
+ this.buffer = [];
44
+ this.taskDescription = null;
45
+ }
46
+ return this.sessionId;
47
+ }
48
+ /** Record an action into the current session buffer */
49
+ recordAction(entry) {
50
+ const now = Date.now();
51
+ if (this.lastActionTime > 0 && now - this.lastActionTime > SESSION_GAP_MS) {
52
+ // Gap detected — auto-save then start new session
53
+ this.tryAutoSave();
54
+ this.sessionId = SessionTracker.generateId();
55
+ this.buffer = [];
56
+ this.taskDescription = null;
57
+ }
58
+ this.lastActionTime = now;
59
+ this.buffer.push(entry);
60
+ if (this.buffer.length > MAX_BUFFER_SIZE) {
61
+ this.buffer.shift();
62
+ }
63
+ }
64
+ /** End the session and save a strategy if successful */
65
+ endSession(success, taskDescription) {
66
+ const task = taskDescription ?? this.taskDescription;
67
+ if (!success || !task || this.buffer.length === 0) {
68
+ this.buffer = [];
69
+ return null;
70
+ }
71
+ const strategy = this.buildStrategy(task, this.buffer);
72
+ this.store.appendStrategy(strategy);
73
+ this.buffer = [];
74
+ return strategy;
75
+ }
76
+ /** Get the current session's action buffer */
77
+ getBuffer() {
78
+ return [...this.buffer];
79
+ }
80
+ /** Get recent tool names (for strategy hint matching) */
81
+ getRecentToolNames(limit = 10) {
82
+ return this.buffer.slice(-limit).map((a) => a.tool);
83
+ }
84
+ /** Get current task description */
85
+ getTaskDescription() {
86
+ return this.taskDescription;
87
+ }
88
+ // ── auto-save logic ────────────────────────────
89
+ /**
90
+ * Try to auto-save the current buffer as a strategy.
91
+ * Only saves if there are MIN_AUTO_SAVE_STEPS+ consecutive successes.
92
+ * Uses tool sequence as task description if no explicit one was given.
93
+ */
94
+ tryAutoSave() {
95
+ if (this.buffer.length < MIN_AUTO_SAVE_STEPS)
96
+ return;
97
+ // Find the longest trailing streak of successes
98
+ let successStreak = [];
99
+ for (let i = this.buffer.length - 1; i >= 0; i--) {
100
+ if (this.buffer[i].success) {
101
+ successStreak.unshift(this.buffer[i]);
102
+ }
103
+ else {
104
+ break;
105
+ }
106
+ }
107
+ if (successStreak.length < MIN_AUTO_SAVE_STEPS)
108
+ return;
109
+ // Build a task description from the tool sequence if none provided
110
+ const task = this.taskDescription ?? this.inferTaskDescription(successStreak);
111
+ const strategy = this.buildStrategy(task, successStreak);
112
+ this.store.appendStrategy(strategy);
113
+ }
114
+ /** Infer a task description from a sequence of actions */
115
+ inferTaskDescription(actions) {
116
+ const tools = [...new Set(actions.map((a) => a.tool))];
117
+ // Extract key param values (bundle IDs, titles, URLs, etc.)
118
+ const keyParams = [];
119
+ for (const a of actions) {
120
+ for (const [key, val] of Object.entries(a.params)) {
121
+ if (typeof val === "string" && val.length > 2 && val.length < 60) {
122
+ if (["bundleId", "title", "url", "text", "script", "selector", "menuPath"].includes(key)) {
123
+ keyParams.push(val);
124
+ }
125
+ }
126
+ }
127
+ }
128
+ const paramHint = keyParams.length > 0 ? ` (${keyParams.slice(0, 3).join(", ")})` : "";
129
+ return `${tools.join(" → ")}${paramHint}`;
130
+ }
131
+ buildStrategy(task, actions) {
132
+ const steps = actions.map((a) => ({
133
+ tool: a.tool,
134
+ params: a.params,
135
+ }));
136
+ const totalDurationMs = actions.reduce((sum, a) => sum + a.durationMs, 0);
137
+ const tags = extractTags(task, steps);
138
+ const fingerprint = MemoryStore.makeFingerprint(steps.map((s) => s.tool));
139
+ return {
140
+ id: "str_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
141
+ task,
142
+ steps,
143
+ totalDurationMs,
144
+ successCount: 1,
145
+ failCount: 0,
146
+ lastUsed: new Date().toISOString(),
147
+ tags,
148
+ fingerprint,
149
+ };
150
+ }
151
+ }
152
+ /** Extract tags from task description and tool names */
153
+ function extractTags(task, steps) {
154
+ const tags = new Set();
155
+ const words = task.toLowerCase().split(/\W+/).filter((w) => w.length >= 3);
156
+ for (const w of words)
157
+ tags.add(w);
158
+ for (const s of steps)
159
+ tags.add(s.tool);
160
+ return [...tags];
161
+ }