screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,571 @@
1
+ using System.Windows.Automation;
2
+ using System.Runtime.InteropServices;
3
+
4
+ namespace WindowsBridge;
5
+
6
+ /// <summary>
7
+ /// UI Automation wrapper — equivalent to macOS AccessibilityBridge.swift.
8
+ /// Uses the Windows UI Automation framework to inspect and interact with UI elements.
9
+ /// </summary>
10
+ class UIAutomationBridge
11
+ {
12
+ /// <summary>
13
+ /// Get the full UI element tree for a process.
14
+ /// </summary>
15
+ public Dictionary<string, object?> GetElementTree(int pid, int maxDepth)
16
+ {
17
+ var rootElement = GetRootElementForProcess(pid);
18
+ return BuildTree(rootElement, 0, maxDepth, new List<int>());
19
+ }
20
+
21
+ /// <summary>
22
+ /// Find an element by role, title, value, or identifier.
23
+ /// </summary>
24
+ public Dictionary<string, object?> FindElement(int pid, string? role, string? title,
25
+ string? value, string? identifier, bool exact)
26
+ {
27
+ var rootElement = GetRootElementForProcess(pid);
28
+
29
+ // Build conditions
30
+ var conditions = new List<Condition>();
31
+
32
+ if (!string.IsNullOrEmpty(role))
33
+ {
34
+ var controlType = MapRoleToControlType(role!);
35
+ if (controlType != null)
36
+ conditions.Add(new PropertyCondition(AutomationElement.ControlTypeProperty, controlType));
37
+ }
38
+
39
+ if (!string.IsNullOrEmpty(title))
40
+ {
41
+ if (exact)
42
+ conditions.Add(new PropertyCondition(AutomationElement.NameProperty, title));
43
+ // For non-exact, we'll filter after search
44
+ }
45
+
46
+ if (!string.IsNullOrEmpty(identifier))
47
+ {
48
+ conditions.Add(new PropertyCondition(AutomationElement.AutomationIdProperty, identifier));
49
+ }
50
+
51
+ Condition searchCondition;
52
+ if (conditions.Count == 0)
53
+ searchCondition = Condition.TrueCondition;
54
+ else if (conditions.Count == 1)
55
+ searchCondition = conditions[0];
56
+ else
57
+ searchCondition = new AndCondition(conditions.ToArray());
58
+
59
+ AutomationElement? found;
60
+
61
+ if (!string.IsNullOrEmpty(title) && !exact)
62
+ {
63
+ // For partial match, walk the tree manually
64
+ found = FindElementByPartialName(rootElement, title!, role, 10);
65
+ }
66
+ else
67
+ {
68
+ found = rootElement.FindFirst(TreeScope.Descendants, searchCondition);
69
+ }
70
+
71
+ if (found == null)
72
+ throw new BridgeException($"Element not found: role={role}, title={title}, value={value}");
73
+
74
+ // Build element path for later reference
75
+ var elementPath = GetElementPath(rootElement, found);
76
+
77
+ var result = new Dictionary<string, object?>
78
+ {
79
+ ["role"] = MapControlTypeToRole(found.Current.ControlType),
80
+ ["title"] = found.Current.Name,
81
+ ["elementPath"] = elementPath,
82
+ };
83
+
84
+ try
85
+ {
86
+ var bounds = found.Current.BoundingRectangle;
87
+ if (!bounds.IsEmpty)
88
+ {
89
+ result["bounds"] = new Dictionary<string, object>
90
+ {
91
+ ["x"] = bounds.X,
92
+ ["y"] = bounds.Y,
93
+ ["width"] = bounds.Width,
94
+ ["height"] = bounds.Height,
95
+ };
96
+ }
97
+ }
98
+ catch { }
99
+
100
+ // Try to get value
101
+ try
102
+ {
103
+ if (found.TryGetCurrentPattern(ValuePattern.Pattern, out object? pattern))
104
+ {
105
+ result["value"] = ((ValuePattern)pattern).Current.Value;
106
+ }
107
+ }
108
+ catch { }
109
+
110
+ return result;
111
+ }
112
+
113
+ /// <summary>
114
+ /// Perform an action on an element at the given path.
115
+ /// Maps macOS AX actions to Windows UIA patterns.
116
+ /// </summary>
117
+ public Dictionary<string, object> PerformAction(int pid, int[] elementPath, string action)
118
+ {
119
+ var rootElement = GetRootElementForProcess(pid);
120
+ var element = NavigateToElement(rootElement, elementPath);
121
+
122
+ switch (action)
123
+ {
124
+ case "AXPress":
125
+ case "press":
126
+ case "click":
127
+ if (element.TryGetCurrentPattern(InvokePattern.Pattern, out object? invokePattern))
128
+ {
129
+ ((InvokePattern)invokePattern).Invoke();
130
+ }
131
+ else if (element.TryGetCurrentPattern(TogglePattern.Pattern, out object? togglePattern))
132
+ {
133
+ ((TogglePattern)togglePattern).Toggle();
134
+ }
135
+ else if (element.TryGetCurrentPattern(SelectionItemPattern.Pattern, out object? selPattern))
136
+ {
137
+ ((SelectionItemPattern)selPattern).Select();
138
+ }
139
+ else if (element.TryGetCurrentPattern(ExpandCollapsePattern.Pattern, out object? ecPattern))
140
+ {
141
+ var p = (ExpandCollapsePattern)ecPattern;
142
+ if (p.Current.ExpandCollapseState == ExpandCollapseState.Collapsed)
143
+ p.Expand();
144
+ else
145
+ p.Collapse();
146
+ }
147
+ else
148
+ {
149
+ // Fallback: click at element center
150
+ var bounds = element.Current.BoundingRectangle;
151
+ if (!bounds.IsEmpty)
152
+ {
153
+ var x = bounds.X + bounds.Width / 2;
154
+ var y = bounds.Y + bounds.Height / 2;
155
+ new InputBridge().MouseClick(x, y, "left", 1);
156
+ }
157
+ else
158
+ {
159
+ throw new BridgeException($"Element does not support any click pattern and has no bounds");
160
+ }
161
+ }
162
+ break;
163
+
164
+ case "AXShowMenu":
165
+ case "showMenu":
166
+ if (element.TryGetCurrentPattern(ExpandCollapsePattern.Pattern, out object? expandPattern))
167
+ {
168
+ ((ExpandCollapsePattern)expandPattern).Expand();
169
+ }
170
+ break;
171
+
172
+ case "AXScrollToVisible":
173
+ case "scrollToVisible":
174
+ if (element.TryGetCurrentPattern(ScrollItemPattern.Pattern, out object? scrollPattern))
175
+ {
176
+ ((ScrollItemPattern)scrollPattern).ScrollIntoView();
177
+ }
178
+ break;
179
+
180
+ default:
181
+ throw new BridgeException($"Unsupported action: {action}");
182
+ }
183
+
184
+ return new Dictionary<string, object> { ["ok"] = true };
185
+ }
186
+
187
+ /// <summary>
188
+ /// Set value of a text field or similar element.
189
+ /// </summary>
190
+ public Dictionary<string, object> SetElementValue(int pid, int[] elementPath, string value)
191
+ {
192
+ var rootElement = GetRootElementForProcess(pid);
193
+ var element = NavigateToElement(rootElement, elementPath);
194
+
195
+ if (element.TryGetCurrentPattern(ValuePattern.Pattern, out object? pattern))
196
+ {
197
+ ((ValuePattern)pattern).SetValue(value);
198
+ }
199
+ else
200
+ {
201
+ // Fallback: focus and type
202
+ try { element.SetFocus(); } catch { }
203
+ System.Threading.Thread.Sleep(50);
204
+ // Select all and type
205
+ new InputBridge().KeyCombo(new[] { "ctrl", "a" });
206
+ System.Threading.Thread.Sleep(50);
207
+ new InputBridge().TypeText(value);
208
+ }
209
+
210
+ return new Dictionary<string, object> { ["ok"] = true };
211
+ }
212
+
213
+ /// <summary>
214
+ /// Get value of an element.
215
+ /// </summary>
216
+ public Dictionary<string, object?> GetElementValue(int pid, int[] elementPath)
217
+ {
218
+ var rootElement = GetRootElementForProcess(pid);
219
+ var element = NavigateToElement(rootElement, elementPath);
220
+
221
+ string? val = null;
222
+ if (element.TryGetCurrentPattern(ValuePattern.Pattern, out object? pattern))
223
+ {
224
+ val = ((ValuePattern)pattern).Current.Value;
225
+ }
226
+ else
227
+ {
228
+ val = element.Current.Name;
229
+ }
230
+
231
+ return new Dictionary<string, object?> { ["value"] = val };
232
+ }
233
+
234
+ /// <summary>
235
+ /// Click a menu item by path (e.g., ["File", "New"]).
236
+ /// </summary>
237
+ public Dictionary<string, object> MenuClick(int pid, string[] menuPath)
238
+ {
239
+ var rootElement = GetRootElementForProcess(pid);
240
+
241
+ // Find the menu bar
242
+ var menuBar = rootElement.FindFirst(TreeScope.Children,
243
+ new PropertyCondition(AutomationElement.ControlTypeProperty, ControlType.MenuBar));
244
+
245
+ if (menuBar == null)
246
+ {
247
+ // Try looking in the window's children
248
+ var window = rootElement.FindFirst(TreeScope.Children,
249
+ new PropertyCondition(AutomationElement.ControlTypeProperty, ControlType.Window));
250
+ if (window != null)
251
+ {
252
+ menuBar = window.FindFirst(TreeScope.Children,
253
+ new PropertyCondition(AutomationElement.ControlTypeProperty, ControlType.MenuBar));
254
+ }
255
+ }
256
+
257
+ if (menuBar == null)
258
+ throw new BridgeException("Menu bar not found");
259
+
260
+ AutomationElement current = menuBar;
261
+
262
+ for (int i = 0; i < menuPath.Length; i++)
263
+ {
264
+ var menuName = menuPath[i];
265
+
266
+ // Find the menu item by name
267
+ var menuItem = current.FindFirst(TreeScope.Children,
268
+ new PropertyCondition(AutomationElement.NameProperty, menuName));
269
+
270
+ if (menuItem == null)
271
+ {
272
+ // Try partial match
273
+ var children = current.FindAll(TreeScope.Children, Condition.TrueCondition);
274
+ foreach (AutomationElement child in children)
275
+ {
276
+ if (child.Current.Name.Contains(menuName, StringComparison.OrdinalIgnoreCase))
277
+ {
278
+ menuItem = child;
279
+ break;
280
+ }
281
+ }
282
+ }
283
+
284
+ if (menuItem == null)
285
+ throw new BridgeException($"Menu item not found: {menuName}");
286
+
287
+ if (i < menuPath.Length - 1)
288
+ {
289
+ // Expand submenu
290
+ if (menuItem.TryGetCurrentPattern(ExpandCollapsePattern.Pattern, out object? ecPattern))
291
+ {
292
+ ((ExpandCollapsePattern)ecPattern).Expand();
293
+ System.Threading.Thread.Sleep(100);
294
+ }
295
+ else if (menuItem.TryGetCurrentPattern(InvokePattern.Pattern, out object? invPattern))
296
+ {
297
+ ((InvokePattern)invPattern).Invoke();
298
+ System.Threading.Thread.Sleep(100);
299
+ }
300
+
301
+ // After expanding, the submenu items should be children or in a popup
302
+ current = menuItem;
303
+ }
304
+ else
305
+ {
306
+ // Click the final menu item
307
+ if (menuItem.TryGetCurrentPattern(InvokePattern.Pattern, out object? invPattern))
308
+ {
309
+ ((InvokePattern)invPattern).Invoke();
310
+ }
311
+ else if (menuItem.TryGetCurrentPattern(ExpandCollapsePattern.Pattern, out object? ecPattern))
312
+ {
313
+ ((ExpandCollapsePattern)ecPattern).Expand();
314
+ }
315
+ }
316
+ }
317
+
318
+ return new Dictionary<string, object> { ["ok"] = true };
319
+ }
320
+
321
+ // ── Helpers ──
322
+
323
+ private AutomationElement GetRootElementForProcess(int pid)
324
+ {
325
+ var root = AutomationElement.RootElement;
326
+ var condition = new PropertyCondition(AutomationElement.ProcessIdProperty, pid);
327
+ var element = root.FindFirst(TreeScope.Children, condition);
328
+
329
+ if (element == null)
330
+ {
331
+ // Try finding any window with this PID
332
+ var allWindows = root.FindAll(TreeScope.Children, Condition.TrueCondition);
333
+ foreach (AutomationElement win in allWindows)
334
+ {
335
+ try
336
+ {
337
+ if (win.Current.ProcessId == pid)
338
+ {
339
+ element = win;
340
+ break;
341
+ }
342
+ }
343
+ catch { }
344
+ }
345
+ }
346
+
347
+ if (element == null)
348
+ throw new BridgeException($"No window found for PID {pid}");
349
+
350
+ return element;
351
+ }
352
+
353
+ private Dictionary<string, object?> BuildTree(AutomationElement element, int depth, int maxDepth, List<int> path)
354
+ {
355
+ var node = new Dictionary<string, object?>
356
+ {
357
+ ["role"] = MapControlTypeToRole(element.Current.ControlType),
358
+ };
359
+
360
+ var name = element.Current.Name;
361
+ if (!string.IsNullOrEmpty(name))
362
+ node["title"] = name;
363
+
364
+ // Get value if available
365
+ try
366
+ {
367
+ if (element.TryGetCurrentPattern(ValuePattern.Pattern, out object? pattern))
368
+ {
369
+ var val = ((ValuePattern)pattern).Current.Value;
370
+ if (!string.IsNullOrEmpty(val))
371
+ node["value"] = val;
372
+ }
373
+ }
374
+ catch { }
375
+
376
+ // Get bounds
377
+ try
378
+ {
379
+ var bounds = element.Current.BoundingRectangle;
380
+ if (!bounds.IsEmpty)
381
+ {
382
+ node["bounds"] = new Dictionary<string, object>
383
+ {
384
+ ["x"] = bounds.X,
385
+ ["y"] = bounds.Y,
386
+ ["width"] = bounds.Width,
387
+ ["height"] = bounds.Height,
388
+ };
389
+ }
390
+ }
391
+ catch { }
392
+
393
+ node["path"] = path.ToArray();
394
+
395
+ // Recurse into children
396
+ if (depth < maxDepth)
397
+ {
398
+ try
399
+ {
400
+ var children = element.FindAll(TreeScope.Children, Condition.TrueCondition);
401
+ if (children.Count > 0)
402
+ {
403
+ var childNodes = new List<Dictionary<string, object?>>();
404
+ for (int i = 0; i < children.Count && i < 100; i++) // Cap at 100 children
405
+ {
406
+ var childPath = new List<int>(path) { i };
407
+ try
408
+ {
409
+ childNodes.Add(BuildTree(children[i], depth + 1, maxDepth, childPath));
410
+ }
411
+ catch
412
+ {
413
+ // Skip inaccessible children
414
+ }
415
+ }
416
+ if (childNodes.Count > 0)
417
+ node["children"] = childNodes;
418
+ }
419
+ }
420
+ catch { }
421
+ }
422
+
423
+ return node;
424
+ }
425
+
426
+ private AutomationElement NavigateToElement(AutomationElement root, int[] path)
427
+ {
428
+ var current = root;
429
+ foreach (var index in path)
430
+ {
431
+ var children = current.FindAll(TreeScope.Children, Condition.TrueCondition);
432
+ if (index >= children.Count)
433
+ throw new BridgeException($"Element path index {index} out of range (count={children.Count})");
434
+ current = children[index];
435
+ }
436
+ return current;
437
+ }
438
+
439
+ private int[] GetElementPath(AutomationElement root, AutomationElement target)
440
+ {
441
+ // BFS to find the path from root to target
442
+ var queue = new Queue<(AutomationElement element, List<int> path)>();
443
+ queue.Enqueue((root, new List<int>()));
444
+
445
+ while (queue.Count > 0)
446
+ {
447
+ var (current, path) = queue.Dequeue();
448
+
449
+ if (Automation.Compare(current, target))
450
+ return path.ToArray();
451
+
452
+ try
453
+ {
454
+ var children = current.FindAll(TreeScope.Children, Condition.TrueCondition);
455
+ for (int i = 0; i < children.Count && i < 100; i++)
456
+ {
457
+ var childPath = new List<int>(path) { i };
458
+ queue.Enqueue((children[i], childPath));
459
+ }
460
+ }
461
+ catch { }
462
+ }
463
+
464
+ // Fallback: return empty path
465
+ return Array.Empty<int>();
466
+ }
467
+
468
+ private AutomationElement? FindElementByPartialName(AutomationElement root, string partialName,
469
+ string? role, int maxDepth)
470
+ {
471
+ if (maxDepth <= 0) return null;
472
+
473
+ try
474
+ {
475
+ var name = root.Current.Name;
476
+ if (!string.IsNullOrEmpty(name) &&
477
+ name.Contains(partialName, StringComparison.OrdinalIgnoreCase))
478
+ {
479
+ if (role == null || MapControlTypeToRole(root.Current.ControlType)
480
+ .Equals(role, StringComparison.OrdinalIgnoreCase))
481
+ {
482
+ return root;
483
+ }
484
+ }
485
+ }
486
+ catch { }
487
+
488
+ try
489
+ {
490
+ var children = root.FindAll(TreeScope.Children, Condition.TrueCondition);
491
+ foreach (AutomationElement child in children)
492
+ {
493
+ var found = FindElementByPartialName(child, partialName, role, maxDepth - 1);
494
+ if (found != null) return found;
495
+ }
496
+ }
497
+ catch { }
498
+
499
+ return null;
500
+ }
501
+
502
+ // Map macOS AX roles to Windows UIA ControlTypes
503
+ private static ControlType? MapRoleToControlType(string role)
504
+ {
505
+ return role.ToLowerInvariant() switch
506
+ {
507
+ "button" or "axbutton" => ControlType.Button,
508
+ "checkbox" or "axcheckbox" => ControlType.CheckBox,
509
+ "combobox" or "axcombobox" => ControlType.ComboBox,
510
+ "textfield" or "axtextfield" or "textarea" or "axtextarea" => ControlType.Edit,
511
+ "group" or "axgroup" => ControlType.Group,
512
+ "image" or "aximage" => ControlType.Image,
513
+ "link" or "axlink" => ControlType.Hyperlink,
514
+ "list" or "axlist" => ControlType.List,
515
+ "menu" or "axmenu" => ControlType.Menu,
516
+ "menuitem" or "axmenuitem" => ControlType.MenuItem,
517
+ "menubar" or "axmenubar" => ControlType.MenuBar,
518
+ "radiobutton" or "axradiobutton" => ControlType.RadioButton,
519
+ "scrollbar" or "axscrollbar" => ControlType.ScrollBar,
520
+ "slider" or "axslider" => ControlType.Slider,
521
+ "statictext" or "axstatictext" => ControlType.Text,
522
+ "tab" or "axtab" or "tabgroup" or "axtabgroup" => ControlType.Tab,
523
+ "table" or "axtable" => ControlType.Table,
524
+ "toolbar" or "axtoolbar" => ControlType.ToolBar,
525
+ "tree" or "axtree" or "outline" or "axoutline" => ControlType.Tree,
526
+ "window" or "axwindow" => ControlType.Window,
527
+ _ => null,
528
+ };
529
+ }
530
+
531
+ // Map Windows UIA ControlTypes to macOS-style role strings
532
+ private static string MapControlTypeToRole(ControlType ct)
533
+ {
534
+ if (ct == ControlType.Button) return "AXButton";
535
+ if (ct == ControlType.CheckBox) return "AXCheckBox";
536
+ if (ct == ControlType.ComboBox) return "AXComboBox";
537
+ if (ct == ControlType.Edit) return "AXTextField";
538
+ if (ct == ControlType.Group) return "AXGroup";
539
+ if (ct == ControlType.Image) return "AXImage";
540
+ if (ct == ControlType.Hyperlink) return "AXLink";
541
+ if (ct == ControlType.List) return "AXList";
542
+ if (ct == ControlType.ListItem) return "AXCell";
543
+ if (ct == ControlType.Menu) return "AXMenu";
544
+ if (ct == ControlType.MenuItem) return "AXMenuItem";
545
+ if (ct == ControlType.MenuBar) return "AXMenuBar";
546
+ if (ct == ControlType.Pane) return "AXGroup";
547
+ if (ct == ControlType.RadioButton) return "AXRadioButton";
548
+ if (ct == ControlType.ScrollBar) return "AXScrollBar";
549
+ if (ct == ControlType.Slider) return "AXSlider";
550
+ if (ct == ControlType.StatusBar) return "AXStaticText";
551
+ if (ct == ControlType.Tab) return "AXTabGroup";
552
+ if (ct == ControlType.TabItem) return "AXTab";
553
+ if (ct == ControlType.Table) return "AXTable";
554
+ if (ct == ControlType.Text) return "AXStaticText";
555
+ if (ct == ControlType.ToolBar) return "AXToolbar";
556
+ if (ct == ControlType.ToolTip) return "AXStaticText";
557
+ if (ct == ControlType.Tree) return "AXOutline";
558
+ if (ct == ControlType.TreeItem) return "AXRow";
559
+ if (ct == ControlType.Window) return "AXWindow";
560
+ if (ct == ControlType.Document) return "AXWebArea";
561
+ if (ct == ControlType.Header) return "AXGroup";
562
+ if (ct == ControlType.DataGrid) return "AXTable";
563
+ if (ct == ControlType.DataItem) return "AXCell";
564
+ if (ct == ControlType.SplitButton) return "AXButton";
565
+ if (ct == ControlType.Spinner) return "AXIncrementor";
566
+ if (ct == ControlType.Thumb) return "AXHandle";
567
+ if (ct == ControlType.TitleBar) return "AXStaticText";
568
+ if (ct == ControlType.Custom) return "AXGroup";
569
+ return "AXGroup"; // Default fallback
570
+ }
571
+ }
@@ -0,0 +1,17 @@
1
+ <Project Sdk="Microsoft.NET.Sdk">
2
+
3
+ <PropertyGroup>
4
+ <OutputType>Exe</OutputType>
5
+ <TargetFramework>net8.0-windows</TargetFramework>
6
+ <AssemblyName>windows-bridge</AssemblyName>
7
+ <RootNamespace>WindowsBridge</RootNamespace>
8
+ <Nullable>enable</Nullable>
9
+ <ImplicitUsings>enable</ImplicitUsings>
10
+ <UseWindowsForms>true</UseWindowsForms>
11
+ </PropertyGroup>
12
+
13
+ <ItemGroup>
14
+ <PackageReference Include="System.Text.Json" Version="8.0.5" />
15
+ </ItemGroup>
16
+
17
+ </Project>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "screenhand",
3
- "version": "0.2.0",
3
+ "version": "0.3.1",
4
4
  "mcpName": "io.github.manushi4/screenhand",
5
5
  "description": "Give AI eyes and hands on your desktop. ScreenHand is an open-source MCP server that lets Claude and other AI agents see your screen, click buttons, type text, and control any app on macOS and Windows.",
6
6
  "homepage": "https://screenhand.com",
@@ -11,6 +11,14 @@
11
11
  },
12
12
  "files": [
13
13
  "dist",
14
+ "bin",
15
+ "dist-references",
16
+ "dist-playbooks",
17
+ "dist-app-maps",
18
+ "native/macos-bridge/Package.swift",
19
+ "native/macos-bridge/Sources",
20
+ "native/windows-bridge",
21
+ "scripts/postinstall.cjs",
14
22
  "README.md",
15
23
  "LICENSE"
16
24
  ],
@@ -19,12 +27,15 @@
19
27
  "dev:modular": "tsx src/mcp-entry.ts",
20
28
  "build": "tsc -p tsconfig.json",
21
29
  "check": "tsc --noEmit -p tsconfig.check.json",
30
+ "prepublishOnly": "node scripts/prepublish.cjs",
22
31
  "start": "node dist/mcp-desktop.js",
23
32
  "agent": "tsx src/agent/cli.ts",
33
+ "postinstall": "node scripts/postinstall.cjs",
24
34
  "build:native": "cd native/macos-bridge && swift build -c release",
25
35
  "build:native:windows": "cd native/windows-bridge && dotnet build -c Release",
26
36
  "test": "vitest run",
27
37
  "test:watch": "vitest",
38
+ "export:help-md": "tsx scripts/export-help-center.ts",
28
39
  "codex:monitor": "tsx scripts/codex-monitor-daemon.ts",
29
40
  "codex:watch": "node scripts/vscode-codex-watch.mjs"
30
41
  },