klyk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
klyk/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """Klyk — OS-level macOS app testing via MCP.
2
+
3
+ Portfolio project. Showcases product thinking and shipped tooling.
4
+ Bug reports won't be actively triaged. Well-scoped PRs are welcome —
5
+ but expect a slow review cadence.
6
+
7
+ Primary interface: the MCP server (`python -m klyk.mcp_server`).
8
+
9
+ Module-level access for Python library users:
10
+ from klyk import computer, capture, matcher, ocr, session, launcher
11
+
12
+ A higher-level Python library API may follow if there's demand.
13
+ """
14
+
15
+ __version__ = "0.1.0"
klyk/activity.py ADDED
@@ -0,0 +1,286 @@
1
+ """
2
+ Activity recorder — single source of truth for what klyk is doing right now.
3
+
4
+ MCP tool dispatchers call record() before each user-visible action; the
5
+ menu-bar status item reads this state to display activity to the user.
6
+ Decoupled from the UI surfaces: when nobody is watching, record() is a
7
+ sub-microsecond append into a bounded deque; when the menubar dropdown
8
+ is open, it reads from this state via get_summary() / get_recent()
9
+ without coupling to any call site.
10
+
11
+ Design considerations:
12
+ - 4. Token / payload bloat: this module returns nothing to the agent —
13
+ it feeds local UI surfaces only. No risk of bloating tool responses.
14
+ - 9. Hidden state across calls: the recorder IS hidden state. Capped
15
+ bounded (200 entries/app, 64 apps), evicted FIFO, and surfaced through
16
+ the menu-bar dropdown so the user sees what's accumulating.
17
+ - 5. Failure coupling: record() catches every exception internally.
18
+ UI surfaces are observers; an observer that raises does not affect
19
+ other observers or the calling tool.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import threading
25
+ import time
26
+ from collections import deque
27
+ from typing import Callable
28
+
29
+ # Bounded retention. Per-app 200 entries — a typical 30-min seamless run
30
+ # fires ~50-150 actions, so a single session never hits the cap; a long
31
+ # autonomous run drops the oldest first. The 64-app outer cap is defensive
32
+ # only; klyk hardly ever runs against more than 4 apps concurrently.
33
+ _MAX_PER_APP = 200
34
+ _MAX_APPS = 64
35
+
36
+
37
+ class ActivityRecorder:
38
+ """Thread-safe; observers are invoked outside the lock to avoid re-entrancy."""
39
+
40
+ def __init__(self) -> None:
41
+ self._lock = threading.Lock()
42
+ # {app_name: deque[entry]}. Insertion order preserved by dict in 3.7+.
43
+ self._per_app: dict[str, deque[dict]] = {}
44
+ # {app_name: last_entry} so menu-bar can show "current" without
45
+ # peeking inside the deque.
46
+ self._last: dict[str, dict] = {}
47
+ # {app_name: {"mode": str, "escalation_count": int, "pid": int}} —
48
+ # snapshot of session state at last record(). Kept here so the
49
+ # menu-bar can render without holding a session lock.
50
+ self._session_state: dict[str, dict] = {}
51
+ self._observers: list[Callable[[dict], None]] = []
52
+
53
+ # -- writers --
54
+
55
+ def record(
56
+ self,
57
+ app: str,
58
+ tool: str,
59
+ *,
60
+ x: int | float | None = None,
61
+ y: int | float | None = None,
62
+ x2: int | float | None = None,
63
+ y2: int | float | None = None,
64
+ detail: str | None = None,
65
+ via: str | None = None,
66
+ session_mode: str | None = None,
67
+ escalation_count: int | None = None,
68
+ pid: int | None = None,
69
+ win_x: int | None = None,
70
+ win_y: int | None = None,
71
+ ) -> dict:
72
+ """
73
+ Append an entry and notify observers. Coordinates are window-relative
74
+ (the screenshot coord space klyk uses everywhere). win_x/win_y are
75
+ recorded so a future observer that wants screen-space coords doesn't
76
+ have to re-read session state.
77
+ """
78
+ if not app or not tool:
79
+ return {}
80
+ entry = {
81
+ "app": app,
82
+ "tool": tool,
83
+ "ts": time.time(),
84
+ "x": int(x) if x is not None else None,
85
+ "y": int(y) if y is not None else None,
86
+ "x2": int(x2) if x2 is not None else None,
87
+ "y2": int(y2) if y2 is not None else None,
88
+ "detail": detail,
89
+ "via": via,
90
+ "win_x": win_x,
91
+ "win_y": win_y,
92
+ }
93
+ observers_snapshot: list[Callable[[dict], None]] = []
94
+ with self._lock:
95
+ dq = self._per_app.get(app)
96
+ if dq is None:
97
+ # First time we see this app. Defend the outer cap by
98
+ # dropping the least-recently-used app's entries when full.
99
+ if len(self._per_app) >= _MAX_APPS:
100
+ # Drop the app whose last_ts is oldest.
101
+ victim = min(
102
+ self._per_app.keys(),
103
+ key=lambda a: (self._last.get(a) or {}).get("ts", 0.0),
104
+ )
105
+ self._per_app.pop(victim, None)
106
+ self._last.pop(victim, None)
107
+ self._session_state.pop(victim, None)
108
+ dq = deque(maxlen=_MAX_PER_APP)
109
+ self._per_app[app] = dq
110
+ dq.append(entry)
111
+ self._last[app] = entry
112
+ if (
113
+ session_mode is not None
114
+ or escalation_count is not None
115
+ or pid is not None
116
+ ):
117
+ state = self._session_state.setdefault(app, {})
118
+ if session_mode is not None:
119
+ state["mode"] = session_mode
120
+ if escalation_count is not None:
121
+ state["escalation_count"] = escalation_count
122
+ if pid is not None:
123
+ state["pid"] = pid
124
+ observers_snapshot = list(self._observers)
125
+ # Fire observers outside the lock. A buggy observer must not
126
+ # block other observers or block record() callers.
127
+ for cb in observers_snapshot:
128
+ try:
129
+ cb(entry)
130
+ except Exception:
131
+ pass
132
+ return entry
133
+
134
+ def remove_app(self, app: str) -> None:
135
+ """Drop everything we have for app. Called when a session closes."""
136
+ with self._lock:
137
+ self._per_app.pop(app, None)
138
+ self._last.pop(app, None)
139
+ self._session_state.pop(app, None)
140
+
141
+ # -- readers --
142
+
143
+ def get_summary(self) -> list[dict]:
144
+ """
145
+ Per-app summary, freshest activity first. Each entry:
146
+ {app, mode, escalation_count, last_action, last_action_age_s,
147
+ recent_count}
148
+ """
149
+ now = time.time()
150
+ out: list[dict] = []
151
+ with self._lock:
152
+ for app, dq in self._per_app.items():
153
+ last = self._last.get(app)
154
+ state = self._session_state.get(app, {})
155
+ out.append({
156
+ "app": app,
157
+ "mode": state.get("mode"),
158
+ "escalation_count": state.get("escalation_count", 0),
159
+ "pid": state.get("pid"),
160
+ "last_action": last["tool"] if last else None,
161
+ "last_action_via": last.get("via") if last else None,
162
+ "last_action_age_s": (now - last["ts"]) if last else None,
163
+ "recent_count": len(dq),
164
+ })
165
+ out.sort(
166
+ key=lambda r: (r["last_action_age_s"] is None, r["last_action_age_s"] or 0.0),
167
+ )
168
+ return out
169
+
170
+ def get_recent(self, app: str, n: int = 20) -> list[dict]:
171
+ with self._lock:
172
+ dq = self._per_app.get(app)
173
+ if not dq:
174
+ return []
175
+ return list(dq)[-n:]
176
+
177
+ def get_last(self, app: str) -> dict | None:
178
+ with self._lock:
179
+ return self._last.get(app)
180
+
181
+ # -- observers --
182
+
183
+ def subscribe(self, cb: Callable[[dict], None]) -> None:
184
+ with self._lock:
185
+ if cb not in self._observers:
186
+ self._observers.append(cb)
187
+
188
+ def unsubscribe(self, cb: Callable[[dict], None]) -> None:
189
+ with self._lock:
190
+ try:
191
+ self._observers.remove(cb)
192
+ except ValueError:
193
+ pass
194
+
195
+
196
+ # Module-level singleton. Importers receive the same recorder so the
197
+ # menu-bar (subscriber) and the mcp_server tool handlers (writer) share
198
+ # state without a passed-around handle.
199
+ recorder = ActivityRecorder()
200
+
201
+
202
+ # Convenience entry points so mcp_server doesn't repeat boilerplate.
203
+ # These pick the canonical coord pair from a tool's args dict and translate
204
+ # window-relative -> recorder format. Failures swallowed — instrumentation
205
+ # must never break a tool call.
206
+
207
+ # Tools whose dispatch represents a user-visible "action" the UI should
208
+ # surface. Read-only / introspection tools (list_sessions, screen_info,
209
+ # get_pixel, etc.) are excluded so the menubar shows actual activity
210
+ # rather than just call counts.
211
+ ACTION_TOOLS = frozenset({
212
+ "click", "double_click", "long_press", "drag", "scroll", "ax_action",
213
+ "click_element", "click_menu", "fill_field", "type_text",
214
+ "press_key", "press_system_key", "select_option",
215
+ "inspect", "screenshot", "wait_for", "wait_for_visual", "find_template",
216
+ "handle_system_dialog", "set_window_bounds", "focus_window",
217
+ "set_clipboard", "run",
218
+ })
219
+
220
+
221
+ def record_from_args(session, tool_name: str, args: dict, via: str | None = None) -> None:
222
+ """
223
+ Best-effort recorder call from the MCP dispatch path. Pulls
224
+ coords/mode/pid from the resolved session + args and never raises.
225
+ """
226
+ if tool_name not in ACTION_TOOLS:
227
+ return
228
+ try:
229
+ # Pull common coordinate pairs. Drag uses x1/y1 → x2/y2; everything
230
+ # else uses x/y. Tools without coords (type_text, press_key) record
231
+ # without spatial info — the menubar log just omits the coord column.
232
+ x = args.get("x")
233
+ y = args.get("y")
234
+ x2 = args.get("x2")
235
+ y2 = args.get("y2")
236
+ if "x1" in args and x is None:
237
+ x = args["x1"]
238
+ if "y1" in args and y is None:
239
+ y = args["y1"]
240
+ # Detail is a short human-readable hint shown in the menubar log.
241
+ detail = None
242
+ if tool_name == "type_text":
243
+ txt = args.get("text", "") or ""
244
+ detail = f"{len(txt)} chars" if txt else None
245
+ elif tool_name == "press_key":
246
+ key = args.get("key") or args.get("keys")
247
+ if isinstance(key, list):
248
+ detail = "+".join(map(str, key))
249
+ elif key:
250
+ detail = str(key)[:32]
251
+ elif tool_name == "click_element":
252
+ detail = (args.get("label") or "")[:48]
253
+ elif tool_name == "fill_field":
254
+ txt = args.get("text", "") or ""
255
+ detail = f"{len(txt)} chars"
256
+ elif tool_name == "scroll":
257
+ detail = f"{args.get('direction','?')} x{args.get('amount', 3)}"
258
+ recorder.record(
259
+ app=session.app,
260
+ tool=tool_name,
261
+ x=x, y=y, x2=x2, y2=y2,
262
+ detail=detail,
263
+ via=via,
264
+ session_mode=getattr(session, "mode", None),
265
+ escalation_count=len(getattr(session, "escalation_log", []) or []),
266
+ pid=getattr(session, "pid", None),
267
+ win_x=getattr(session, "win_x", None),
268
+ win_y=getattr(session, "win_y", None),
269
+ )
270
+ except Exception:
271
+ # Silent: this is instrumentation, not a contract surface.
272
+ pass
273
+
274
+
275
+ def record_via(app: str, tool: str, via: str) -> None:
276
+ """Update the last entry's via field once a seamless dispatch resolves.
277
+ Records-and-forgets — used by the seamless paths to stamp 'skylight',
278
+ 'cursor_warp', etc. onto the most recent entry so the menubar's
279
+ last-action row shows the actual delivery mode."""
280
+ try:
281
+ last = recorder.get_last(app)
282
+ if last and last.get("tool") == tool and last.get("via") is None:
283
+ # Mutate in place — both deque and _last reference the same dict.
284
+ last["via"] = via
285
+ except Exception:
286
+ pass
klyk/ax_roles.py ADDED
@@ -0,0 +1,42 @@
1
+ """
2
+ AX role catalogs — single source of truth for "what counts as interactive UI."
3
+
4
+ Two related sets live here so they can't drift independently:
5
+
6
+ INTERACTIVE_ROLES — the broad set surfaced in ax_snapshot. Includes
7
+ structural elements (tables, headings, static
8
+ text) so callers can inspect layout context.
9
+ BROWSER_INTERACTIVE_ROLES — the narrower set used when filtering a browser
10
+ AX tree (which is dominated by structural noise).
11
+ Derived from INTERACTIVE_ROLES minus the
12
+ structural roles so adding a new interactive
13
+ role automatically reaches both call sites.
14
+
15
+ Resolves Consideration #8 (two sources of truth) — previously these were two
16
+ separate frozen sets in computer.py and mcp_server.py.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ # All AX roles the snapshot collector considers "interesting." Used by
22
+ # computer.py's element walker to decide which elements to emit.
23
+ INTERACTIVE_ROLES: frozenset[str] = frozenset({
24
+ "AXButton", "AXTextField", "AXTextArea", "AXCheckBox", "AXRadioButton",
25
+ "AXPopUpButton", "AXComboBox", "AXSlider", "AXMenuItem", "AXMenuBarItem",
26
+ "AXLink", "AXStaticText", "AXHeading", "AXTab", "AXTabGroup",
27
+ "AXTable", "AXRow", "AXCell", "AXToolbar", "AXSearchField",
28
+ "AXDisclosureTriangle", "AXOutline", "AXBrowser", "AXSplitter",
29
+ "AXSegmentedControl", "AXMenuButton",
30
+ })
31
+
32
+ # Structural roles — present in INTERACTIVE_ROLES but NOT clickable/typable.
33
+ # Subtracted from the interactive set when filtering browser AX trees, where
34
+ # these roles dominate the output and bury the real click targets.
35
+ _STRUCTURAL_ROLES: frozenset[str] = frozenset({
36
+ "AXStaticText", "AXHeading", "AXTabGroup",
37
+ "AXTable", "AXRow", "AXCell",
38
+ "AXDisclosureTriangle", "AXOutline", "AXBrowser", "AXSplitter",
39
+ })
40
+
41
+ # What survives the browser filter — true click/type/pick targets only.
42
+ BROWSER_INTERACTIVE_ROLES: frozenset[str] = INTERACTIVE_ROLES - _STRUCTURAL_ROLES