screenhand 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-desktop.js +36 -2
- package/package.json +1 -1
package/dist/mcp-desktop.js
CHANGED
|
@@ -359,8 +359,42 @@ When any tool fails, ScreenHand automatically tries alternative strategies (AX
|
|
|
359
359
|
4. **screenshot + ocr** — visual capture, ~600ms (only for canvas apps)
|
|
360
360
|
5. **applescript** — macOS scripting (Finder, Mail, Safari, etc.)
|
|
361
361
|
|
|
362
|
+
## Decision Flow (run BEFORE step 1 of Golden Sequence)
|
|
363
|
+
|
|
364
|
+
Before starting any automation, ask two questions to pick your strategy:
|
|
365
|
+
|
|
366
|
+
### "Should I learn first or just go?" → coverage_report(bundleId)
|
|
367
|
+
- 0 shortcuts, 0 selectors, 0 flows → LEARN FIRST: scan_menu_bar() + platform_explore() before acting
|
|
368
|
+
- Has selectors + flows but 0 playbooks → CAN ACT, but start playbook_record() to save for next time
|
|
369
|
+
- Has everything + high stability → GO FAST: use direct tools (ui_press, key, type_text)
|
|
370
|
+
- Has error patterns for your tool → BE CAREFUL: use *_with_fallback tools
|
|
371
|
+
|
|
372
|
+
### "Should I use fast or safe tools?" → learning_status(bundleId)
|
|
373
|
+
- 100+ timing samples → FAST: app is well-known, use direct tools (ui_press, key, type_text ~50ms)
|
|
374
|
+
- 1-99 timing samples → SAFE: use *_with_fallback tools (~100-500ms)
|
|
375
|
+
- 0 timing samples → LEARN: platform_explore() first, then *_with_fallback
|
|
376
|
+
- AX score > 0.9 → use ui_tree + ui_press (native accessibility, fastest)
|
|
377
|
+
- AX low, CDP high → it's a web app, use browser_* tools
|
|
378
|
+
- Both low, Vision high → canvas app, use screenshot + ocr + click_text
|
|
379
|
+
|
|
380
|
+
### "Do I need perception?"
|
|
381
|
+
- Single action (click a button) → NO, just ui_find + ui_press
|
|
382
|
+
- Multi-step workflow (5+ steps) → YES, perception_start()
|
|
383
|
+
- Visual app (Figma, DaVinci) → YES, with vision (default)
|
|
384
|
+
- Text-heavy app (Notes, Terminal) → AX-only is enough
|
|
385
|
+
|
|
386
|
+
### Decision Tree Summary
|
|
387
|
+
1. coverage_report(bundleId) → do we know this app?
|
|
388
|
+
- YES (has references) → use known selectors/flows directly
|
|
389
|
+
- NO (empty) → scan_menu_bar + platform_explore FIRST
|
|
390
|
+
2. learning_status(bundleId) → how well do we know it?
|
|
391
|
+
- 100+ samples → direct tools (fast)
|
|
392
|
+
- <100 samples → *_with_fallback (safe)
|
|
393
|
+
- 0 samples → learn first, then fallback
|
|
394
|
+
3. Multi-step? → perception_start() : skip perception
|
|
395
|
+
|
|
362
396
|
## Key Rule
|
|
363
|
-
Never click blind. Always: KNOW → SEE → NAVIGATE → ACT → VERIFY.
|
|
397
|
+
Never click blind. Always: coverage_report → learning_status → KNOW → SEE → NAVIGATE → ACT → VERIFY → STOP.
|
|
364
398
|
`,
|
|
365
399
|
});
|
|
366
400
|
// ═══════════════════════════════════════════════
|
|
@@ -6248,7 +6282,7 @@ server.tool("ingest_tutorial", "Extract structured playbook steps from a video t
|
|
|
6248
6282
|
}],
|
|
6249
6283
|
};
|
|
6250
6284
|
});
|
|
6251
|
-
server.tool("coverage_report", "
|
|
6285
|
+
server.tool("coverage_report", "CALL THIS FIRST before automating any app. Shows what ScreenHand knows: shortcuts, selectors, flows, playbooks, error patterns, and stability %. Use the result to decide your strategy: learn first (if empty), go fast (if high coverage), or use fallback tools (if error patterns exist). See Decision Flow in server instructions.", {
|
|
6252
6286
|
bundleId: z.string().describe("macOS bundle ID (e.g. com.blackmagic-design.DaVinciResolveLite)"),
|
|
6253
6287
|
appName: z.string().describe("Human-readable app name"),
|
|
6254
6288
|
includeLiveMenuScan: z.boolean().optional().describe("Also scan the live menu bar for comparison (requires app to be running, needs pid)"),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "screenhand",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.5",
|
|
4
4
|
"mcpName": "io.github.manushi4/screenhand",
|
|
5
5
|
"description": "Give AI eyes and hands on your desktop. ScreenHand is an open-source MCP server that lets Claude and other AI agents see your screen, click buttons, type text, and control any app on macOS and Windows.",
|
|
6
6
|
"homepage": "https://screenhand.com",
|