computeruseprotocol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cup/__main__.py ADDED
@@ -0,0 +1,222 @@
1
+ """CLI for CUP tree capture: python -m cup"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import os
8
+ import time
9
+
10
+ from cup._router import detect_platform, get_adapter
11
+ from cup.format import build_envelope, prune_tree, serialize_compact, serialize_overview
12
+
13
+
14
+ def main() -> None:
15
+ parser = argparse.ArgumentParser(
16
+ description="CUP: Capture accessibility tree in Computer Use Protocol format"
17
+ )
18
+ parser.add_argument("--depth", type=int, default=0, help="Max tree depth (0 = unlimited)")
19
+ parser.add_argument(
20
+ "--scope",
21
+ type=str,
22
+ default=None,
23
+ choices=["overview", "foreground", "desktop", "full"],
24
+ help="Capture scope (default: foreground)",
25
+ )
26
+ parser.add_argument(
27
+ "--app", type=str, default=None, help="Filter to window/app title containing this string"
28
+ )
29
+ parser.add_argument("--json-out", type=str, default=None, help="Write pruned CUP JSON to file")
30
+ parser.add_argument(
31
+ "--full-json-out", type=str, default=None, help="Write full (unpruned) CUP JSON to file"
32
+ )
33
+ parser.add_argument("--compact-out", type=str, default=None, help="Write compact text to file")
34
+ parser.add_argument(
35
+ "--verbose",
36
+ action="store_true",
37
+ help="Print diagnostics (timing, role distribution, sizes)",
38
+ )
39
+ parser.add_argument(
40
+ "--platform",
41
+ type=str,
42
+ default=None,
43
+ choices=["windows", "macos", "linux", "web"],
44
+ help="Force platform (default: auto-detect)",
45
+ )
46
+ parser.add_argument(
47
+ "--cdp-port", type=int, default=None, help="CDP port for web platform (default: 9222)"
48
+ )
49
+ parser.add_argument(
50
+ "--cdp-host", type=str, default=None, help="CDP host for web platform (default: localhost)"
51
+ )
52
+ args = parser.parse_args()
53
+
54
+ scope = args.scope or "foreground"
55
+ verbose = args.verbose
56
+
57
+ max_depth = args.depth if args.depth > 0 else 999
58
+ platform = args.platform or detect_platform()
59
+
60
+ # Pass CDP connection args via env vars for the web adapter
61
+ if platform == "web":
62
+ if args.cdp_port:
63
+ os.environ["CUP_CDP_PORT"] = str(args.cdp_port)
64
+ if args.cdp_host:
65
+ os.environ["CUP_CDP_HOST"] = args.cdp_host
66
+
67
+ if verbose:
68
+ print(f"=== CUP Tree Capture ({platform}) ===")
69
+
70
+ adapter = get_adapter(platform)
71
+ sw, sh, scale = adapter.get_screen_info()
72
+
73
+ if verbose:
74
+ scale_str = f" @{scale}x" if scale != 1.0 else ""
75
+ print(f"Screen: {sw}x{sh}{scale_str}")
76
+
77
+ # -- Overview scope: window list only, no tree walking --
78
+ if scope == "overview":
79
+ t0 = time.perf_counter()
80
+ window_list = adapter.get_window_list()
81
+ t_enum = (time.perf_counter() - t0) * 1000
82
+
83
+ if verbose:
84
+ print(f"Scope: overview ({len(window_list)} windows, {t_enum:.1f} ms)")
85
+
86
+ overview_str = serialize_overview(
87
+ window_list,
88
+ platform=platform,
89
+ screen_w=sw,
90
+ screen_h=sh,
91
+ )
92
+ print(overview_str)
93
+
94
+ if args.compact_out:
95
+ with open(args.compact_out, "w", encoding="utf-8") as f:
96
+ f.write(overview_str)
97
+ if verbose:
98
+ print(f"Overview written to {args.compact_out}")
99
+ return
100
+
101
+ # -- Window enumeration --
102
+ t0 = time.perf_counter()
103
+ window_list = None
104
+
105
+ if scope == "foreground":
106
+ windows = [adapter.get_foreground_window()]
107
+ window_list = adapter.get_window_list()
108
+ if verbose:
109
+ print(f'Scope: foreground ("{windows[0]["title"]}")')
110
+ elif scope == "desktop":
111
+ desktop_win = adapter.get_desktop_window()
112
+ if desktop_win is None:
113
+ if verbose:
114
+ print("No desktop window found on this platform. Falling back to overview.")
115
+ window_list = adapter.get_window_list()
116
+ overview_str = serialize_overview(
117
+ window_list,
118
+ platform=platform,
119
+ screen_w=sw,
120
+ screen_h=sh,
121
+ )
122
+ print(overview_str)
123
+ return
124
+ windows = [desktop_win]
125
+ if verbose:
126
+ print("Scope: desktop")
127
+ else: # "full"
128
+ windows = adapter.get_all_windows()
129
+ if args.app:
130
+ windows = [w for w in windows if args.app.lower() in w["title"].lower()]
131
+ if not windows:
132
+ print(f"No window found matching '{args.app}'")
133
+ return
134
+ if verbose:
135
+ print(f"Scope: full ({len(windows)} window(s))")
136
+ t_enum = (time.perf_counter() - t0) * 1000
137
+
138
+ # -- Tree capture --
139
+ t0 = time.perf_counter()
140
+ tree, stats, _refs = adapter.capture_tree(windows, max_depth=max_depth)
141
+ t_walk = (time.perf_counter() - t0) * 1000
142
+
143
+ if verbose:
144
+ print(f"Captured {stats['nodes']} nodes in {t_walk:.1f} ms (enum: {t_enum:.1f} ms)")
145
+ print(f"Max depth: {stats['max_depth']}")
146
+
147
+ # -- Envelope --
148
+ app_name = windows[0]["title"] if len(windows) == 1 else None
149
+ app_pid = windows[0]["pid"] if len(windows) == 1 else None
150
+ app_bundle_id = windows[0].get("bundle_id") if len(windows) == 1 else None
151
+
152
+ # Collect WebMCP tools when available (web platform)
153
+ tools = None
154
+ if hasattr(adapter, "get_last_tools"):
155
+ tools = adapter.get_last_tools() or None
156
+
157
+ envelope = build_envelope(
158
+ tree,
159
+ platform=platform,
160
+ scope=scope,
161
+ screen_w=sw,
162
+ screen_h=sh,
163
+ screen_scale=scale,
164
+ app_name=app_name,
165
+ app_pid=app_pid,
166
+ app_bundle_id=app_bundle_id,
167
+ tools=tools,
168
+ )
169
+
170
+ # -- Compact text to stdout (default) --
171
+ compact_str = serialize_compact(envelope, window_list=window_list)
172
+ print(compact_str)
173
+
174
+ # -- Verbose diagnostics --
175
+ if verbose:
176
+ json_str = json.dumps(envelope, ensure_ascii=False)
177
+ json_kb = len(json_str) / 1024
178
+ compact_kb = len(compact_str) / 1024
179
+ print(f"JSON size: {json_kb:.1f} KB | Compact size: {compact_kb:.1f} KB")
180
+
181
+ print("\nRole distribution (top 15):")
182
+ for role, count in sorted(stats["roles"].items(), key=lambda kv: -kv[1])[:15]:
183
+ print(f" {role:45s} {count:6d}")
184
+
185
+ if tools:
186
+ print(f"\nWebMCP tools ({len(tools)}):")
187
+ for tool in tools:
188
+ desc = tool.get("description", "")
189
+ desc_str = f" - {desc}" if desc else ""
190
+ print(f" {tool['name']}{desc_str}")
191
+
192
+ # -- File output options --
193
+ if args.json_out:
194
+ pruned_tree = prune_tree(envelope["tree"])
195
+ pruned_envelope = {**envelope, "tree": pruned_tree}
196
+ with open(args.json_out, "w", encoding="utf-8") as f:
197
+ json.dump(pruned_envelope, f, indent=2, ensure_ascii=False)
198
+ if verbose:
199
+ pruned_kb = len(json.dumps(pruned_envelope, ensure_ascii=False)) / 1024
200
+ print(f"\nPruned JSON written to {args.json_out} ({pruned_kb:.1f} KB)")
201
+
202
+ if args.full_json_out:
203
+ with open(args.full_json_out, "w", encoding="utf-8") as f:
204
+ json.dump(envelope, f, indent=2, ensure_ascii=False)
205
+ if verbose:
206
+ json_kb = len(json.dumps(envelope, ensure_ascii=False)) / 1024
207
+ print(f"Full JSON written to {args.full_json_out} ({json_kb:.1f} KB)")
208
+
209
+ if args.compact_out:
210
+ with open(args.compact_out, "w", encoding="utf-8") as f:
211
+ f.write(compact_str)
212
+ if verbose:
213
+ json_kb = len(json.dumps(envelope, ensure_ascii=False)) / 1024
214
+ compact_kb = len(compact_str) / 1024
215
+ ratio = (1 - compact_kb / json_kb) * 100 if json_kb > 0 else 0
216
+ print(
217
+ f"Compact written to {args.compact_out} ({compact_kb:.1f} KB, {ratio:.0f}% smaller)"
218
+ )
219
+
220
+
221
+ if __name__ == "__main__":
222
+ main()
cup/_base.py ADDED
@@ -0,0 +1,123 @@
1
+ """Abstract base for platform adapters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any
7
+
8
+
9
+ class PlatformAdapter(ABC):
10
+ """Interface that each platform tree-capture backend must implement.
11
+
12
+ Subclasses handle all platform-specific initialization, window
13
+ enumeration, tree walking, and CUP node construction. The router
14
+ calls only the methods defined here.
15
+ """
16
+
17
+ # ---- identity --------------------------------------------------------
18
+
19
+ @property
20
+ @abstractmethod
21
+ def platform_name(self) -> str:
22
+ """Return the platform identifier used in CUP envelopes.
23
+
24
+ Must be one of: 'windows', 'macos', 'linux', 'web', 'android', 'ios'.
25
+ """
26
+ ...
27
+
28
+ # ---- lifecycle -------------------------------------------------------
29
+
30
+ @abstractmethod
31
+ def initialize(self) -> None:
32
+ """Perform any one-time setup (COM init, pyobjc bootstrap, etc.).
33
+
34
+ Called once before the first capture. Implementations should be
35
+ idempotent (safe to call multiple times).
36
+ """
37
+ ...
38
+
39
+ # ---- screen ----------------------------------------------------------
40
+
41
+ @abstractmethod
42
+ def get_screen_info(self) -> tuple[int, int, float]:
43
+ """Return (width, height, scale_factor) of the primary display."""
44
+ ...
45
+
46
+ # ---- window enumeration ----------------------------------------------
47
+
48
+ @abstractmethod
49
+ def get_foreground_window(self) -> dict[str, Any]:
50
+ """Return metadata about the foreground/focused window.
51
+
52
+ Must return a dict with at least:
53
+ {
54
+ "handle": <platform-specific window handle/ref>,
55
+ "title": str,
56
+ "pid": int | None,
57
+ "bundle_id": str | None,
58
+ }
59
+ """
60
+ ...
61
+
62
+ @abstractmethod
63
+ def get_all_windows(self) -> list[dict[str, Any]]:
64
+ """Return metadata dicts for all visible top-level windows.
65
+
66
+ Same dict shape as get_foreground_window().
67
+ """
68
+ ...
69
+
70
+ # ---- window overview -------------------------------------------------
71
+
72
+ @abstractmethod
73
+ def get_window_list(self) -> list[dict[str, Any]]:
74
+ """Return lightweight metadata for all visible windows.
75
+
76
+ Does NOT perform any tree walking. Must be near-instant.
77
+
78
+ Each dict contains::
79
+
80
+ {
81
+ "title": str,
82
+ "pid": int | None,
83
+ "bundle_id": str | None,
84
+ "foreground": bool,
85
+ "bounds": {"x": int, "y": int, "w": int, "h": int} | None,
86
+ }
87
+ """
88
+ ...
89
+
90
+ @abstractmethod
91
+ def get_desktop_window(self) -> dict[str, Any] | None:
92
+ """Return metadata for the desktop surface window.
93
+
94
+ Returns a window metadata dict (same shape as get_foreground_window)
95
+ pointing at the desktop surface (icons, widgets), or None if the
96
+ platform has no desktop concept (e.g., web).
97
+ """
98
+ ...
99
+
100
+ # ---- tree capture ----------------------------------------------------
101
+
102
+ @abstractmethod
103
+ def capture_tree(
104
+ self,
105
+ windows: list[dict[str, Any]],
106
+ *,
107
+ max_depth: int = 999,
108
+ ) -> tuple[list[dict], dict, dict[str, Any]]:
109
+ """Walk the accessibility tree for the given windows.
110
+
111
+ Args:
112
+ windows: List of window metadata dicts (from get_foreground_window
113
+ or get_all_windows).
114
+ max_depth: Maximum tree depth to walk.
115
+
116
+ Returns:
117
+ (tree_roots, stats, refs) where:
118
+ tree_roots: list of CUP node dicts (the "tree" field of the envelope)
119
+ stats: dict with at least {"nodes": int, "max_depth": int}
120
+ refs: dict mapping element IDs (e.g. "e14") to native platform
121
+ element references, used by the action execution layer
122
+ """
123
+ ...
cup/_router.py ADDED
@@ -0,0 +1,63 @@
1
+ """Platform auto-detection and adapter dispatch."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from cup._base import PlatformAdapter
10
+
11
+
12
+ def detect_platform() -> str:
13
+ """Return the current platform identifier."""
14
+ if sys.platform == "win32":
15
+ return "windows"
16
+ elif sys.platform == "darwin":
17
+ return "macos"
18
+ elif sys.platform.startswith("linux"):
19
+ return "linux"
20
+ else:
21
+ raise RuntimeError(f"Unsupported platform: {sys.platform}")
22
+
23
+
24
+ def get_adapter(platform: str | None = None) -> PlatformAdapter:
25
+ """Return a fresh platform adapter instance.
26
+
27
+ Each call creates and initializes a new adapter. Callers (e.g., Session)
28
+ are responsible for holding onto the instance for reuse.
29
+
30
+ Args:
31
+ platform: Force a specific platform ('windows', 'macos', 'web').
32
+ If None, auto-detects from sys.platform.
33
+
34
+ Raises:
35
+ RuntimeError: If the platform is unsupported or dependencies are missing.
36
+ """
37
+ if platform is None:
38
+ platform = detect_platform()
39
+
40
+ if platform == "windows":
41
+ from cup.platforms.windows import WindowsAdapter
42
+
43
+ adapter = WindowsAdapter()
44
+ elif platform == "macos":
45
+ from cup.platforms.macos import MacosAdapter
46
+
47
+ adapter = MacosAdapter()
48
+ elif platform == "linux":
49
+ from cup.platforms.linux import LinuxAdapter
50
+
51
+ adapter = LinuxAdapter()
52
+ elif platform == "web":
53
+ from cup.platforms.web import WebAdapter
54
+
55
+ adapter = WebAdapter()
56
+ else:
57
+ raise RuntimeError(
58
+ f"No adapter available for platform '{platform}'. "
59
+ f"Currently supported: windows, macos, linux, web."
60
+ )
61
+
62
+ adapter.initialize()
63
+ return adapter
@@ -0,0 +1,9 @@
1
+ """CUP action execution layer.
2
+
3
+ Provides cross-platform action dispatch using element references
4
+ captured during tree walks.
5
+ """
6
+
7
+ from cup.actions.executor import ActionExecutor, ActionResult
8
+
9
+ __all__ = ["ActionExecutor", "ActionResult"]
@@ -0,0 +1,62 @@
1
+ """Abstract base for platform-specific action handlers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any
7
+
8
+ from cup.actions.executor import ActionResult
9
+
10
+
11
+ class ActionHandler(ABC):
12
+ """Interface for platform-specific action execution.
13
+
14
+ Each platform implements this to translate CUP canonical actions
15
+ (click, type, toggle, etc.) into native API calls.
16
+ """
17
+
18
+ @abstractmethod
19
+ def action(
20
+ self,
21
+ native_ref: Any,
22
+ action: str,
23
+ params: dict[str, Any],
24
+ ) -> ActionResult:
25
+ """Execute a CUP action using the native element reference.
26
+
27
+ Args:
28
+ native_ref: Platform-specific element reference from ref_map.
29
+ action: CUP canonical action name (click, type, toggle, etc.).
30
+ params: Action parameters (e.g., value for type, direction for scroll).
31
+
32
+ Returns:
33
+ ActionResult with success status and message.
34
+ """
35
+ ...
36
+
37
+ @abstractmethod
38
+ def press(self, combo: str) -> ActionResult:
39
+ """Send a keyboard combination to the focused window.
40
+
41
+ Args:
42
+ combo: Key combination string (e.g., "ctrl+s", "enter", "alt+f4").
43
+
44
+ Returns:
45
+ ActionResult with success status and message.
46
+ """
47
+ ...
48
+
49
+ @abstractmethod
50
+ def open_app(self, name: str) -> ActionResult:
51
+ """Open an application by name.
52
+
53
+ Implementations should discover installed apps, fuzzy-match the
54
+ name, launch the best match, and confirm the window appeared.
55
+
56
+ Args:
57
+ name: Application name to open (fuzzy matched).
58
+
59
+ Returns:
60
+ ActionResult with success status and message.
61
+ """
62
+ ...
cup/actions/_keys.py ADDED
@@ -0,0 +1,56 @@
1
+ """Shared key combo parsing and normalization."""
2
+
3
+ from __future__ import annotations
4
+
5
+ # Modifier key names (normalized)
6
+ MODIFIERS = frozenset({"ctrl", "alt", "shift", "win", "cmd", "meta", "super"})
7
+
8
+ # Alias normalization
9
+ _ALIASES: dict[str, str] = {
10
+ "return": "enter",
11
+ "esc": "escape",
12
+ "del": "delete",
13
+ "bs": "backspace",
14
+ "cmd": "meta",
15
+ "super": "meta",
16
+ "win": "meta",
17
+ "pgup": "pageup",
18
+ "pgdn": "pagedown",
19
+ "pgdown": "pagedown",
20
+ }
21
+
22
+
23
+ def parse_combo(combo: str) -> tuple[list[str], list[str]]:
24
+ """Parse a key combo string into (modifiers, keys).
25
+
26
+ Examples::
27
+
28
+ >>> parse_combo("ctrl+s")
29
+ (['ctrl'], ['s'])
30
+ >>> parse_combo("ctrl+shift+p")
31
+ (['ctrl', 'shift'], ['p'])
32
+ >>> parse_combo("enter")
33
+ ([], ['enter'])
34
+ >>> parse_combo("a")
35
+ ([], ['a'])
36
+
37
+ Args:
38
+ combo: Key combination string, parts joined with "+".
39
+
40
+ Returns:
41
+ (modifiers, keys) where modifiers are normalized modifier names
42
+ and keys are the non-modifier key names.
43
+ """
44
+ parts = [p.strip().lower() for p in combo.split("+") if p.strip()]
45
+ modifiers: list[str] = []
46
+ keys: list[str] = []
47
+
48
+ for part in parts:
49
+ # Normalize aliases
50
+ normalized = _ALIASES.get(part, part)
51
+ if normalized in ("ctrl", "alt", "shift", "meta"):
52
+ modifiers.append(normalized)
53
+ else:
54
+ keys.append(normalized)
55
+
56
+ return modifiers, keys