screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env npx tsx
2
+ import { exportHelpCenterToMarkdown } from "../src/platform/help-center-markdown.js";
3
+ async function main() {
4
+ const options = parseArgs(process.argv.slice(2));
5
+ if (options.help || !options.url) {
6
+ printUsage();
7
+ process.exit(options.help ? 0 : 1);
8
+ }
9
+ const result = await exportHelpCenterToMarkdown({
10
+ startUrl: options.url,
11
+ outputPath: options.output,
12
+ maxPages: options.maxPages,
13
+ headless: options.headless,
14
+ waitAfterLoadMs: options.waitMs,
15
+ onProgress: (message) => console.log(message),
16
+ ...(options.scope ? { scopePrefix: options.scope } : {}),
17
+ });
18
+ console.log("");
19
+ console.log(`Wrote ${result.pageCount} page(s) to ${result.outputPath}`);
20
+ console.log(`Scope prefix: ${result.scopePrefix}`);
21
+ }
22
+ function parseArgs(args) {
23
+ const options = {
24
+ output: "help-center-export.md",
25
+ maxPages: 25,
26
+ headless: false,
27
+ waitMs: 1200,
28
+ help: false,
29
+ };
30
+ for (let index = 0; index < args.length; index++) {
31
+ const arg = args[index];
32
+ const next = args[index + 1];
33
+ if (arg === "--help" || arg === "-h") {
34
+ options.help = true;
35
+ continue;
36
+ }
37
+ if (arg === "--headless") {
38
+ options.headless = true;
39
+ continue;
40
+ }
41
+ if (arg === "--headed") {
42
+ options.headless = false;
43
+ continue;
44
+ }
45
+ if (arg === "--url" && next) {
46
+ options.url = next;
47
+ index++;
48
+ continue;
49
+ }
50
+ if (arg.startsWith("--url=")) {
51
+ options.url = arg.slice("--url=".length);
52
+ continue;
53
+ }
54
+ if (arg === "--output" && next) {
55
+ options.output = next;
56
+ index++;
57
+ continue;
58
+ }
59
+ if (arg.startsWith("--output=")) {
60
+ options.output = arg.slice("--output=".length);
61
+ continue;
62
+ }
63
+ if (arg === "--scope" && next) {
64
+ options.scope = next;
65
+ index++;
66
+ continue;
67
+ }
68
+ if (arg.startsWith("--scope=")) {
69
+ options.scope = arg.slice("--scope=".length);
70
+ continue;
71
+ }
72
+ if (arg === "--max-pages" && next) {
73
+ options.maxPages = parsePositiveInt(next, "--max-pages");
74
+ index++;
75
+ continue;
76
+ }
77
+ if (arg.startsWith("--max-pages=")) {
78
+ options.maxPages = parsePositiveInt(arg.slice("--max-pages=".length), "--max-pages");
79
+ continue;
80
+ }
81
+ if (arg === "--wait-ms" && next) {
82
+ options.waitMs = parsePositiveInt(next, "--wait-ms");
83
+ index++;
84
+ continue;
85
+ }
86
+ if (arg.startsWith("--wait-ms=")) {
87
+ options.waitMs = parsePositiveInt(arg.slice("--wait-ms=".length), "--wait-ms");
88
+ continue;
89
+ }
90
+ throw new Error(`Unknown argument: ${arg}`);
91
+ }
92
+ return options;
93
+ }
94
+ function parsePositiveInt(value, flagName) {
95
+ const parsed = Number.parseInt(value, 10);
96
+ if (!Number.isFinite(parsed) || parsed <= 0) {
97
+ throw new Error(`${flagName} must be a positive integer`);
98
+ }
99
+ return parsed;
100
+ }
101
+ function printUsage() {
102
+ console.log("Usage:");
103
+ console.log(" npm run export:help-md -- --url <help-root-url> [--output file.md] [--scope /path/] [--max-pages 25] [--headless]");
104
+ console.log("");
105
+ console.log("Examples:");
106
+ console.log(" npm run export:help-md -- --url https://www.canva.com/en_in/help/topics/ --output docs/canva-help.md --max-pages 40");
107
+ console.log(" npm run export:help-md -- --url https://www.canva.com/en_in/help/topics/ --scope /en_in/help/ --headless");
108
+ }
109
+ main().catch((error) => {
110
+ console.error(error instanceof Error ? error.message : String(error));
111
+ process.exit(1);
112
+ });
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * ScreenHand Marketing Automation Loop
4
+ *
5
+ * Architecture:
6
+ * Claude Code (this script via /loop)
7
+ * → ScreenHand CDP → Codex (content generation)
8
+ * → ScreenHand browser → Social platforms (execution)
9
+ *
10
+ * Platforms: X/Twitter, Threads, LinkedIn, Reddit
11
+ * Goal: Increase GitHub stars for ScreenHand (github.com/manushi4/Screenhand)
12
+ */
13
+ // Marketing task types with weights for rotation
14
+ const TASK_TYPES = [
15
+ { type: 'search_engage', weight: 30, platforms: ['x', 'threads', 'linkedin'] },
16
+ { type: 'create_post', weight: 20, platforms: ['x', 'threads', 'linkedin'] },
17
+ { type: 'reply_contextual', weight: 25, platforms: ['x', 'threads', 'reddit'] },
18
+ { type: 'like_repost', weight: 15, platforms: ['x', 'threads'] },
19
+ { type: 'dm_outreach', weight: 10, platforms: ['x', 'linkedin'] },
20
+ ];
21
+ // Search queries to find relevant conversations
22
+ const SEARCH_QUERIES = [
23
+ 'claude code',
24
+ 'desktop automation AI',
25
+ 'MCP server',
26
+ 'browser automation agent',
27
+ 'AI agent desktop control',
28
+ 'anthropic claude tools',
29
+ 'cursor automation',
30
+ 'computer use API',
31
+ 'accessibility automation mac',
32
+ 'screenhand',
33
+ 'AI coding assistant',
34
+ 'claude MCP',
35
+ 'openai codex desktop',
36
+ 'AI agent framework',
37
+ 'playwright alternative AI',
38
+ ];
39
+ // Content angles for posts
40
+ const CONTENT_ANGLES = [
41
+ 'Show how ScreenHand gives AI agents native desktop control — 82 MCP tools',
42
+ 'Demo: Claude Code using ScreenHand to automate a real workflow',
43
+ 'ScreenHand vs browser-only automation — why desktop control matters',
44
+ 'Open source AI desktop automation — ScreenHand on npm',
45
+ 'How ScreenHand handles the focus-stealing problem in Electron apps',
46
+ 'Building multi-agent systems with ScreenHand supervisor + job system',
47
+ 'ScreenHand memory system — AI agents that learn from mistakes',
48
+ 'Cross-platform desktop automation: macOS Swift + Windows C# native bridges',
49
+ 'The playbook system — reusable automation recipes for any platform',
50
+ 'Why we built ScreenHand: the gap between AI coding and AI doing',
51
+ ];
52
+ // Rate limits per platform (actions per hour)
53
+ const RATE_LIMITS = {
54
+ x: 8,
55
+ threads: 10,
56
+ linkedin: 6,
57
+ reddit: 4,
58
+ };
59
+ const state = {
60
+ startTime: Date.now(),
61
+ actionsPerformed: { x: 0, threads: 0, linkedin: 0, reddit: 0 },
62
+ lastActionTime: { x: 0, threads: 0, linkedin: 0, reddit: 0 },
63
+ postsCreated: [],
64
+ repliesSent: [],
65
+ searchesPerformed: [],
66
+ errors: [],
67
+ currentCycle: 0,
68
+ };
69
+ function pickWeightedRandom(items) {
70
+ const total = items.reduce((s, i) => s + i.weight, 0);
71
+ let r = Math.random() * total;
72
+ for (const item of items) {
73
+ r -= item.weight;
74
+ if (r <= 0)
75
+ return item;
76
+ }
77
+ return items[0];
78
+ }
79
+ function pickRandom(arr) {
80
+ return arr[Math.floor(Math.random() * arr.length)];
81
+ }
82
+ function canActOnPlatform(platform) {
83
+ const hourMs = 60 * 60 * 1000;
84
+ const now = Date.now();
85
+ const limit = RATE_LIMITS[platform] ?? 5;
86
+ const count = state.actionsPerformed[platform] ?? 0;
87
+ const elapsed = now - state.startTime;
88
+ const hoursElapsed = Math.max(1, elapsed / hourMs);
89
+ return count / hoursElapsed < limit;
90
+ }
91
+ function getNextTask() {
92
+ state.currentCycle++;
93
+ // Pick task type
94
+ const task = pickWeightedRandom(TASK_TYPES);
95
+ // Pick platform that hasn't hit rate limit
96
+ const availablePlatforms = task.platforms.filter(p => canActOnPlatform(p));
97
+ if (availablePlatforms.length === 0) {
98
+ return { type: 'wait', platform: 'none' };
99
+ }
100
+ const platform = pickRandom(availablePlatforms);
101
+ return {
102
+ type: task.type,
103
+ platform,
104
+ query: pickRandom(SEARCH_QUERIES),
105
+ angle: pickRandom(CONTENT_ANGLES),
106
+ };
107
+ }
108
+ function formatStatus() {
109
+ const elapsed = Math.round((Date.now() - state.startTime) / 60000);
110
+ const total = Object.values(state.actionsPerformed).reduce((s, n) => s + n, 0);
111
+ return `[Cycle ${state.currentCycle} | ${elapsed}m elapsed | ${total} actions | ` +
112
+ `X:${state.actionsPerformed.x} T:${state.actionsPerformed.threads} ` +
113
+ `L:${state.actionsPerformed.linkedin} R:${state.actionsPerformed.reddit} | ` +
114
+ `${state.errors.length} errors]`;
115
+ }
116
+ // Export for use by the loop controller
117
+ export { getNextTask, formatStatus, state, SEARCH_QUERIES, CONTENT_ANGLES, canActOnPlatform };
@@ -0,0 +1,288 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * Observer Daemon — background app-level visual monitor.
4
+ *
5
+ * Captures a single app window via cg.captureWindow (CGWindowListCreateImage).
6
+ * Uses pixel-hash frame diff to skip OCR when nothing changed.
7
+ * Persists state to ~/.screenhand/observer/state.json for the engine to read.
8
+ *
9
+ * Zero overhead on the main execution path — engine reads a JSON file, daemon
10
+ * does the heavy lifting in a separate process.
11
+ *
12
+ * Usage:
13
+ * npx tsx scripts/observer-daemon.ts --bundleId com.blackmagic-design.DaVinciResolve --windowId 1234
14
+ * npx tsx scripts/observer-daemon.ts --bundleId com.blackmagic-design.DaVinciResolve --windowId 1234 --interval 2000
15
+ *
16
+ * State files:
17
+ * ~/.screenhand/observer/state.json — observer state (latest OCR, popup detection)
18
+ * ~/.screenhand/observer/observer.pid — PID of this process
19
+ * ~/.screenhand/observer/observer.log — log output
20
+ */
21
+ import path from "node:path";
22
+ import fs from "node:fs";
23
+ import crypto from "node:crypto";
24
+ import { BridgeClient } from "../src/native/bridge-client.js";
25
+ import { writeObserverState, readObserverCommands, writeObserverCommands, acquireCaptureLock, releaseCaptureLock } from "../src/observer/state.js";
26
+ import { detectPopup } from "../src/observer/state.js";
27
+ import { OBSERVER_DIR, OBSERVER_PID_FILE, OBSERVER_LOG_FILE } from "../src/observer/types.js";
28
+ // ── Config from CLI args ──
29
+ const args = process.argv.slice(2);
30
+ function getArg(name, fallback) {
31
+ const idx = args.indexOf("--" + name);
32
+ if (idx === -1)
33
+ return fallback;
34
+ return args[idx + 1] ?? fallback;
35
+ }
36
+ const BUNDLE_ID = getArg("bundleId");
37
+ const WINDOW_ID = Number(getArg("windowId", "0"));
38
+ const INTERVAL_MS = Number(getArg("interval", "2000"));
39
+ if (!BUNDLE_ID || !WINDOW_ID) {
40
+ process.stderr.write("Usage: observer-daemon.ts --bundleId <id> --windowId <id> [--interval <ms>]\n");
41
+ process.exit(1);
42
+ }
43
+ // ── Logging ──
44
+ fs.mkdirSync(OBSERVER_DIR, { recursive: true });
45
+ const logStream = fs.createWriteStream(OBSERVER_LOG_FILE, { flags: "a" });
46
+ let daemonized = false;
47
+ function log(msg) {
48
+ const line = `[${new Date().toISOString()}] ${msg}`;
49
+ logStream.write(line + "\n");
50
+ if (!daemonized)
51
+ process.stderr.write(line + "\n");
52
+ }
53
+ // ── Bridge setup ──
54
+ const scriptDir = import.meta.dirname ?? path.dirname(new URL(import.meta.url).pathname);
55
+ const projectRoot = scriptDir.includes("/dist/")
56
+ ? path.resolve(scriptDir, "../..")
57
+ : path.resolve(scriptDir, "..");
58
+ const bridgePath = process.platform === "win32"
59
+ ? path.resolve(projectRoot, "native/windows-bridge/bin/Release/net8.0-windows/windows-bridge.exe")
60
+ : path.resolve(projectRoot, "native/macos-bridge/.build/release/macos-bridge");
61
+ const bridge = new BridgeClient(bridgePath);
62
+ let bridgeReady = false;
63
+ async function ensureBridge() {
64
+ if (!bridgeReady) {
65
+ await bridge.start();
66
+ bridgeReady = true;
67
+ }
68
+ }
69
+ // ── Frame diff via file hash ──
70
+ let lastFrameHash = null;
71
+ function hashFile(filePath) {
72
+ const data = fs.readFileSync(filePath);
73
+ return crypto.createHash("md5").update(data).digest("hex");
74
+ }
75
+ // ── State ──
76
+ let stopped = false;
77
+ let framesCaptured = 0;
78
+ let framesChanged = 0;
79
+ let ocrRuns = 0;
80
+ let lastFrame = null;
81
+ let lastPopup = null;
82
+ let lastError = null;
83
+ const startedAt = new Date().toISOString();
84
+ function buildState() {
85
+ return {
86
+ pid: process.pid,
87
+ running: !stopped,
88
+ startedAt,
89
+ bundleId: BUNDLE_ID,
90
+ windowId: WINDOW_ID,
91
+ intervalMs: INTERVAL_MS,
92
+ framesCaptured,
93
+ framesChanged,
94
+ ocrRuns,
95
+ lastFrame,
96
+ popup: lastPopup,
97
+ lastError,
98
+ };
99
+ }
100
+ function persistState() {
101
+ try {
102
+ writeObserverState(buildState());
103
+ }
104
+ catch {
105
+ // Non-fatal
106
+ }
107
+ }
108
+ // ── Capture loop ──
109
+ async function captureFrame() {
110
+ // Acquire capture lock to prevent concurrent captures with perception coordinator
111
+ if (!acquireCaptureLock()) {
112
+ log("Skipping capture — lock held by perception coordinator");
113
+ return;
114
+ }
115
+ try {
116
+ await ensureBridge();
117
+ // 1. Capture window (app-level, not full screen)
118
+ let shot;
119
+ try {
120
+ shot = await bridge.call("cg.captureWindow", { windowId: WINDOW_ID });
121
+ }
122
+ catch (err) {
123
+ lastError = `Capture failed: ${err instanceof Error ? err.message : String(err)}`;
124
+ return;
125
+ }
126
+ framesCaptured++;
127
+ // 2. Frame diff — hash the image file, skip OCR if identical
128
+ const currentHash = hashFile(shot.path);
129
+ const pixelsChanged = currentHash !== lastFrameHash;
130
+ lastFrameHash = currentHash;
131
+ if (!pixelsChanged) {
132
+ // Frame identical — update timestamp only, skip expensive OCR
133
+ if (lastFrame) {
134
+ lastFrame.capturedAt = new Date().toISOString();
135
+ lastFrame.changed = false;
136
+ }
137
+ return;
138
+ }
139
+ framesChanged++;
140
+ // 3. OCR only on changed frames
141
+ let ocrText = "";
142
+ try {
143
+ const ocr = await bridge.call("vision.ocr", {
144
+ imagePath: shot.path,
145
+ });
146
+ ocrText = ocr.text;
147
+ ocrRuns++;
148
+ }
149
+ catch (err) {
150
+ lastError = `OCR failed: ${err instanceof Error ? err.message : String(err)}`;
151
+ ocrText = lastFrame?.ocrText ?? "";
152
+ }
153
+ // 4. Update frame
154
+ lastFrame = {
155
+ capturedAt: new Date().toISOString(),
156
+ ocrText,
157
+ changed: true,
158
+ };
159
+ // 5. Popup detection on the new OCR text
160
+ lastPopup = detectPopup(ocrText);
161
+ if (lastPopup) {
162
+ log(`Popup detected: "${lastPopup.pattern}" → ${lastPopup.dismissAction}`);
163
+ }
164
+ lastError = null;
165
+ // Clean up temp screenshot
166
+ try {
167
+ fs.unlinkSync(shot.path);
168
+ }
169
+ catch { /* ignore */ }
170
+ }
171
+ finally {
172
+ releaseCaptureLock();
173
+ }
174
+ }
175
+ // ── Command processing ──
176
+ async function processCommands() {
177
+ let commands;
178
+ try {
179
+ commands = readObserverCommands();
180
+ }
181
+ catch {
182
+ return; // No commands file or corrupt — skip
183
+ }
184
+ const pending = commands.filter((c) => c.status === "pending");
185
+ if (pending.length === 0)
186
+ return;
187
+ let changed = false;
188
+ for (const cmd of pending) {
189
+ if (cmd.type !== "ocr_roi") {
190
+ cmd.status = "error";
191
+ cmd.error = `Unknown command type: ${cmd.type}`;
192
+ changed = true;
193
+ continue;
194
+ }
195
+ cmd.status = "running";
196
+ changed = true;
197
+ try {
198
+ await ensureBridge();
199
+ const targetWindowId = cmd.windowId ?? WINDOW_ID;
200
+ // Use vision.ocrRegion for targeted ROI OCR
201
+ const result = await bridge.call("vision.ocrRegion", {
202
+ windowId: targetWindowId,
203
+ region: cmd.roi,
204
+ });
205
+ cmd.status = "done";
206
+ cmd.result = {
207
+ text: result.text ?? "",
208
+ regions: result.regions ?? [],
209
+ completedAt: new Date().toISOString(),
210
+ };
211
+ ocrRuns++;
212
+ log(`Command ${cmd.id}: OCR ROI completed (${cmd.result.regions.length} regions)`);
213
+ }
214
+ catch (err) {
215
+ cmd.status = "error";
216
+ cmd.error = err instanceof Error ? err.message : String(err);
217
+ log(`Command ${cmd.id}: failed — ${cmd.error}`);
218
+ }
219
+ }
220
+ if (changed) {
221
+ writeObserverCommands(commands);
222
+ }
223
+ }
224
+ // ── Main loop ──
225
+ async function main() {
226
+ // Enforce single daemon
227
+ try {
228
+ const existingPid = fs.readFileSync(OBSERVER_PID_FILE, "utf-8").trim();
229
+ const pid = Number(existingPid);
230
+ if (!Number.isNaN(pid) && pid !== process.pid) {
231
+ try {
232
+ process.kill(pid, 0); // Check if alive
233
+ log(`Another observer daemon already running (pid=${pid}). Aborting.`);
234
+ process.exit(1);
235
+ }
236
+ catch {
237
+ // Stale PID — safe to continue
238
+ }
239
+ }
240
+ }
241
+ catch {
242
+ // No PID file — first run
243
+ }
244
+ fs.writeFileSync(OBSERVER_PID_FILE, String(process.pid));
245
+ daemonized = true;
246
+ log(`Observer daemon started (pid=${process.pid})`);
247
+ log(`Watching: bundleId=${BUNDLE_ID} windowId=${WINDOW_ID} interval=${INTERVAL_MS}ms`);
248
+ persistState();
249
+ while (!stopped) {
250
+ try {
251
+ await captureFrame();
252
+ await processCommands();
253
+ persistState();
254
+ }
255
+ catch (err) {
256
+ lastError = `Frame error: ${err instanceof Error ? err.message : String(err)}`;
257
+ log(lastError);
258
+ }
259
+ await sleep(INTERVAL_MS);
260
+ }
261
+ }
262
+ function sleep(ms) {
263
+ return new Promise((resolve) => setTimeout(resolve, ms));
264
+ }
265
+ // ── Graceful shutdown ──
266
+ process.on("SIGINT", shutdown);
267
+ process.on("SIGTERM", shutdown);
268
+ async function shutdown() {
269
+ if (stopped)
270
+ return;
271
+ stopped = true;
272
+ log("Shutting down...");
273
+ persistState();
274
+ try {
275
+ fs.unlinkSync(OBSERVER_PID_FILE);
276
+ }
277
+ catch { /* ignore */ }
278
+ try {
279
+ await bridge.stop();
280
+ }
281
+ catch { /* ignore */ }
282
+ logStream.end();
283
+ process.exit(0);
284
+ }
285
+ main().catch((err) => {
286
+ log(`Fatal: ${err instanceof Error ? err.message : String(err)}`);
287
+ process.exit(1);
288
+ });