klyk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klyk-0.1.0/.gitignore +31 -0
- klyk-0.1.0/LICENSE +21 -0
- klyk-0.1.0/PKG-INFO +210 -0
- klyk-0.1.0/README.md +178 -0
- klyk-0.1.0/SECURITY.md +61 -0
- klyk-0.1.0/klyk/__init__.py +15 -0
- klyk-0.1.0/klyk/activity.py +286 -0
- klyk-0.1.0/klyk/ax_roles.py +42 -0
- klyk-0.1.0/klyk/capture.py +1335 -0
- klyk-0.1.0/klyk/cli.py +476 -0
- klyk-0.1.0/klyk/client.py +355 -0
- klyk-0.1.0/klyk/clients.py +322 -0
- klyk-0.1.0/klyk/computer.py +3120 -0
- klyk-0.1.0/klyk/doctor.py +514 -0
- klyk-0.1.0/klyk/grader.py +74 -0
- klyk-0.1.0/klyk/keycodes.py +458 -0
- klyk-0.1.0/klyk/launcher.py +357 -0
- klyk-0.1.0/klyk/logs.py +157 -0
- klyk-0.1.0/klyk/matcher.py +277 -0
- klyk-0.1.0/klyk/mcp_server.py +6160 -0
- klyk-0.1.0/klyk/menubar.py +364 -0
- klyk-0.1.0/klyk/ocr.py +178 -0
- klyk-0.1.0/klyk/ownership.py +181 -0
- klyk-0.1.0/klyk/reporter.py +57 -0
- klyk-0.1.0/klyk/session.py +501 -0
- klyk-0.1.0/klyk/skylight.py +713 -0
- klyk-0.1.0/klyk/ui_thread.py +217 -0
- klyk-0.1.0/klyk/visibility.py +48 -0
- klyk-0.1.0/pyproject.toml +63 -0
klyk-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Python bytecode and caches
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.pyc
|
|
4
|
+
*.pyo
|
|
5
|
+
*.pyd
|
|
6
|
+
|
|
7
|
+
# Environment / secrets
|
|
8
|
+
.env
|
|
9
|
+
|
|
10
|
+
# macOS
|
|
11
|
+
.DS_Store
|
|
12
|
+
|
|
13
|
+
# Tooling caches
|
|
14
|
+
.playwright-mcp/
|
|
15
|
+
|
|
16
|
+
# Editors
|
|
17
|
+
.vscode/
|
|
18
|
+
.idea/
|
|
19
|
+
|
|
20
|
+
# Claude Code session state (local-only — scheduled tasks lock, project settings)
|
|
21
|
+
.claude/
|
|
22
|
+
|
|
23
|
+
# Build / dist (forward-looking, for PyPI packaging)
|
|
24
|
+
build/
|
|
25
|
+
dist/
|
|
26
|
+
*.egg-info/
|
|
27
|
+
|
|
28
|
+
# Private/internal docs — not part of the public repo
|
|
29
|
+
ROADMAP.md
|
|
30
|
+
CLAUDE.md
|
|
31
|
+
PUBLISH.md
|
klyk-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Bent Eisheuer
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
klyk-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: klyk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: OS-level macOS app testing via MCP — native apps, Electron apps, browsers, web pages, system dialogs
|
|
5
|
+
Project-URL: Homepage, https://github.com/legetdev/klyk
|
|
6
|
+
Project-URL: Repository, https://github.com/legetdev/klyk
|
|
7
|
+
Project-URL: Issues, https://github.com/legetdev/klyk/issues
|
|
8
|
+
Author-email: Bent Eisheuer <bentnicolaus@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: accessibility,ai-agent,automation,macos,mcp,ocr,testing
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Software Development :: Testing
|
|
22
|
+
Classifier: Topic :: Software Development :: Testing :: Acceptance
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Requires-Dist: jsonschema>=4.0
|
|
25
|
+
Requires-Dist: mcp>=1.0.0
|
|
26
|
+
Requires-Dist: numpy>=1.24
|
|
27
|
+
Requires-Dist: pyobjc-framework-cocoa>=10.0; sys_platform == 'darwin'
|
|
28
|
+
Requires-Dist: pyobjc-framework-quartz>=10.0; sys_platform == 'darwin'
|
|
29
|
+
Requires-Dist: pyobjc-framework-vision>=10.0; sys_platform == 'darwin'
|
|
30
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# Klyk
|
|
34
|
+
|
|
35
|
+
> ## ⚠️ What klyk is — and what it isn't
|
|
36
|
+
>
|
|
37
|
+
> **Read this before you install.**
|
|
38
|
+
>
|
|
39
|
+
> klyk gives an AI agent real, OS-level control of your Mac — the same input a human has. It moves the cursor, fires keystrokes, and clicks what's on screen. In its default mode it does this **invisibly and autonomously**: no cursor movement you can watch, no confirmation prompts — acting on its own once the agent decides to.
|
|
40
|
+
>
|
|
41
|
+
> **This is powerful, and it is dangerous.** Be clear-eyed about what that means:
|
|
42
|
+
>
|
|
43
|
+
> - **It can take real, irreversible actions.** A click is a click. klyk can press *Buy*, *Send*, *Confirm Transfer*, *Delete*, or *Sign* just as you could. The only guardrails are a window-bounds check and a text instruction asking the agent to confirm first — an instruction the agent *can ignore*. There is no sandbox and no spending limit.
|
|
44
|
+
> - **It runs with your full user privileges.** Anything you can do on your Mac, klyk can do. It does not isolate itself or drop privileges.
|
|
45
|
+
> - **It is a prompt-injection target.** If the agent driving klyk also reads untrusted content — a web page, an email, a document — a malicious instruction hidden there can become real clicks and keystrokes on your machine. "Reads the web" + "controls the Mac" is the high-risk combination. Run klyk only with an agent and a workflow you trust.
|
|
46
|
+
> - **It relies on an undocumented Apple API.** Invisible input uses Apple's private SkyLight framework. Apple does not support or guarantee it; a macOS update can change or break it without notice (klyk falls back to a visible cursor when it can).
|
|
47
|
+
>
|
|
48
|
+
> **What klyk is, honestly:** an early, experimental, open-source tool built by a solo author — a business student, not a professional developer — working with AI, in good faith. It has **not** had an independent professional security audit. Treat it as unproven: it works in the cases its author has tested, and has not been verified end-to-end on every path.
|
|
49
|
+
>
|
|
50
|
+
> **No warranty. Use at your own risk.** klyk is provided "as is" under the MIT license, with no warranty of any kind. You are responsible for what the agent does on your machine. Don't point it at anything — money, accounts, irreplaceable files — you aren't willing to have an autonomous agent touch.
|
|
51
|
+
|
|
52
|
+
**OS-level macOS app testing for AI agents.** Click real buttons, type real keys, see what actually rendered. Native apps, Electron apps, browsers, web pages, system dialogs — anything visible on the screen.
|
|
53
|
+
|
|
54
|
+
> **Status:** Portfolio project. Showcases product thinking and shipped tooling. Bug reports won't be actively triaged. Well-scoped PRs are welcome — but expect a slow review cadence.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## The problem
|
|
59
|
+
|
|
60
|
+
AI assistants are increasingly asked to test, validate, or operate desktop apps end-to-end. Today they can't. Existing automation tools either require deep app instrumentation (XCUITest, Appium) or simulate user input at a layer too brittle to be trusted (pixel-only click frameworks, headless DOM scrapers). The result: agents that can write apps faster than ever, but can't verify they actually work.
|
|
61
|
+
|
|
62
|
+
Klyk closes that gap. It gives an AI agent the same input channel a human has — real cursor moves via Apple's CoreGraphics API, real keystrokes posted to the HID event tap, real composited screenshots — and a clean MCP interface to drive it. The agent observes, decides, acts, verifies. The way a human would.
|
|
63
|
+
|
|
64
|
+
## What it does
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
> screenshot the app, then click "Sign in"
|
|
68
|
+
[ Klyk takes a real screenshot via CoreGraphics, returns it + the AX tree ]
|
|
69
|
+
[ click_element finds "Sign in" via accessibility, falls back to OCR, then to template match ]
|
|
70
|
+
[ Real click fires through the HID event tap — indistinguishable from a human pressing the button ]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
A flat, MECE tool surface across observation, interaction, evaluation, session management, and system operations. Three-tier click targeting (AX → on-device OCR → pixel template) so a label is reachable regardless of how the app exposes it. Cross-app drag, right-click-then-select, and multilingual OCR are all first-class. Per-call latency + reasoning-gap metrics so the agent can self-pace. Best-effort AX folded into screenshots so most tasks finish in one round-trip.
|
|
74
|
+
|
|
75
|
+
## Install
|
|
76
|
+
|
|
77
|
+
> Not yet on PyPI. The block below is the target experience. The code is written but **not yet verified end-to-end** on the current build (recent changes include removing the HTTP bridge). Treat klyk as experimental and unproven — see the warning above and the Disclaimer.
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pipx install klyk # isolated install — recommended
|
|
81
|
+
klyk install
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
> **Use `pipx` (or `uv tool install klyk`), not bare `pip`.** Klyk pulls a modern NumPy; installing it into your global Python can clash with other packages pinned to older versions. `pipx`/`uv` give klyk its own environment while still putting `klyk` and `klyk-call` on your PATH — same commands, zero blast radius. Plain `pip install klyk` works only if you want it in the current environment and accept that risk.
|
|
85
|
+
|
|
86
|
+
`klyk install` is a turnkey first-run flow:
|
|
87
|
+
|
|
88
|
+
1. Adds Klyk to `~/.claude.json` (so it appears in every Claude Code session).
|
|
89
|
+
2. Walks you through granting the two macOS permissions Klyk needs — opens the exact System Settings panes for **Accessibility** and **Screen Recording**, waits for you to add your terminal app, then **verifies the grant actually came through** before continuing.
|
|
90
|
+
3. Runs a final `klyk doctor` pass to confirm every piece is green.
|
|
91
|
+
4. Detects every *other* AI client on your Mac and offers to wire them all in one confirmation — permissions carry over, so it's a single prompt, not a setup pass per client.
|
|
92
|
+
|
|
93
|
+
For clients that read a natural-language context file (Gemini CLI's `GEMINI.md`), `install` also *offers* — opt-in, defaults to no — to add a short, clearly-marked klyk note there, so the agent can fall back to the `klyk-call` shell if its own MCP ever fails to surface klyk. It never edits that file without an explicit yes, merges around your existing content, and `uninstall` removes it.
|
|
94
|
+
|
|
95
|
+
Restart Claude Code (or whichever MCP client you use) and klyk is live. Try `inspect Finder` to see it in action. To wire every detected client up front in one shot: `klyk install --all`.
|
|
96
|
+
|
|
97
|
+
**Troubleshooting: `klyk doctor`.** Run it any time something's off. Reports every dependency, permission, and config grant klyk needs as ✓ / ⚠ / ✗ with the exact next step on anything that's not green. `--json` gives a structured payload for tooling.
|
|
98
|
+
|
|
99
|
+
**One klyk per Mac.** Klyk claims an exclusive `fcntl.flock` on `~/.klyk/server.lock` at startup. If you configure klyk in two MCP clients (e.g. Claude Code *and* Cursor) and both are running, the second to launch exits cleanly with a "another klyk is already running" message — preventing duplicate menu-bar items and interleaved input collisions. Close one client to free the lock.
|
|
100
|
+
|
|
101
|
+
### Use with other MCP clients
|
|
102
|
+
|
|
103
|
+
`klyk install <client>` auto-configures any supported client — same turnkey flow as Claude (writes the config, grants permissions, runs a health check):
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
klyk install cursor # or: windsurf · continue · cline · codex · gemini · antigravity (agy)
|
|
107
|
+
klyk install --list # show every supported client and its config path
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
| Client | Config file |
|
|
111
|
+
|---|---|
|
|
112
|
+
| Claude Code | `~/.claude.json` (the default: `klyk install`) |
|
|
113
|
+
| Cursor | `~/.cursor/mcp.json` |
|
|
114
|
+
| Windsurf | `~/.codeium/windsurf/mcp_config.json` |
|
|
115
|
+
| Continue | `~/.continue/config.json` |
|
|
116
|
+
| Cline | VS Code globalStorage `cline_mcp_settings.json` |
|
|
117
|
+
| OpenAI Codex CLI | `~/.codex/config.toml` |
|
|
118
|
+
| Gemini CLI | `~/.gemini/settings.json` |
|
|
119
|
+
| Antigravity CLI (`agy`) | `~/.gemini/antigravity-cli/mcp_config.json` |
|
|
120
|
+
|
|
121
|
+
Any other MCP client works too — klyk speaks MCP natively. Add this entry to its config wherever it lives — use the **full path to the Python klyk is installed in** as `command` (run `python -c "import sys;print(sys.executable)"` in that env; `klyk install` fills this in automatically). A bare `python3` only works if klyk is in your global Python:
|
|
122
|
+
|
|
123
|
+
```json
|
|
124
|
+
{ "mcpServers": { "klyk": { "command": "/path/to/python", "args": ["-m", "klyk.mcp_server"] } } }
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Permissions, the singleton lock, and `klyk doctor` work identically regardless of which client launches klyk.
|
|
128
|
+
|
|
129
|
+
### Drive klyk from any AI (no MCP integration required)
|
|
130
|
+
|
|
131
|
+
MCP support varies a lot between agent harnesses — some gate it, some implement it incompletely, some don't have it. So klyk ships an extra front door that works even when a client's own MCP plumbing doesn't. It reaches the **same** persistent klyk session.
|
|
132
|
+
|
|
133
|
+
**`klyk-call` — one shell command, any tool.** For any agent that can run a shell command (great for smaller models — no handshake to reason about):
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
klyk-call --list # all tools + their parameter names
|
|
137
|
+
klyk-call --schema inspect # full JSON schema for one tool
|
|
138
|
+
klyk-call --tool inspect --app Finder # call any tool
|
|
139
|
+
klyk-call --tool screenshot --app Finder # screenshot → saved to disk, path returned
|
|
140
|
+
echo '{"tool":"screen_info","args":{}}' | klyk-call --batch # many calls, one session
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Vision over the shell.** When a tool returns a screenshot (`screenshot`, `inspect`, `verdict`, image-producing `run` steps), `klyk-call` writes the PNG to `~/.klyk/captures/` and returns its `saved_path` instead of dumping base64 — so the agent *views* the capture with its own image reader (Claude Code's file read, Gemini's `@path`, etc.) and keeps the same observe→act→verify loop the native MCP transport has, with no context-flooding payload. The cache keeps the most recent 20 captures.
|
|
144
|
+
|
|
145
|
+
## Quick example
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
screenshot(app="Google Chrome")
|
|
149
|
+
# → returns the image plus the AX element list
|
|
150
|
+
|
|
151
|
+
run(app="Google Chrome", actions=[
|
|
152
|
+
{"tool": "click", "x": 580, "y": 389},
|
|
153
|
+
{"tool": "fill_field", "x": 580, "y": 389, "text": "This video is incredible!"},
|
|
154
|
+
{"tool": "screenshot"}
|
|
155
|
+
])
|
|
156
|
+
# → executes the full sequence at OS speed, returns one batched response
|
|
157
|
+
|
|
158
|
+
verdict(app="Google Chrome", test_description="Posted a comment on a YouTube video")
|
|
159
|
+
# → returns final screenshot + logs + grading criteria for the agent to synthesize PASS/FAIL
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Why these specific trade-offs
|
|
163
|
+
|
|
164
|
+
The interesting product decisions weren't tools to build but tools to *not* build:
|
|
165
|
+
|
|
166
|
+
- **No third-party computer-use libraries.** Everything runs through Apple's CoreGraphics, Vision, and Accessibility frameworks via Python's `ctypes`. Keeps the dependency footprint tiny and the failure surface predictable.
|
|
167
|
+
- **No visual grounding models.** UI-TARS and OmniParser would have closed the "no AX, no text" gap — at the cost of a multi-GB model download and 1–2s per call. Rejected. Template matching covers the same case at 30ms with zero ML dependencies.
|
|
168
|
+
- **No Chrome DevTools Protocol.** It would have helped only Chromium browsers and broken the "like a human" model. Skipped in favor of forcing the renderer-accessibility flag, which gets the entire web AX tree for free.
|
|
169
|
+
- **macOS only.** Cross-platform compromises every primitive. The honest framing: ship one OS well rather than three OSes badly.
|
|
170
|
+
|
|
171
|
+
Every tool is designed against the same set of failure modes — ambiguity, accidental retries, token bloat, lost reactivity from batching, and so on. The agent-facing contract for each tool lives in its `description` field in `klyk/mcp_server.py`.
|
|
172
|
+
|
|
173
|
+
## What's inside
|
|
174
|
+
|
|
175
|
+
| Module | Purpose |
|
|
176
|
+
|---|---|
|
|
177
|
+
| `mcp_server.py` | MCP server, tool definitions, dispatch |
|
|
178
|
+
| `session.py` | Per-app session registry, auto-launch, template cache |
|
|
179
|
+
| `computer.py` | CoreGraphics input synthesis (click, drag, keyboard, scroll, AX) |
|
|
180
|
+
| `capture.py` | CoreGraphics screenshot capture (in-memory primary, screencapture fallback) |
|
|
181
|
+
| `launcher.py` | App launch with browser-aware AX flag injection |
|
|
182
|
+
| `ocr.py` | Apple Vision OCR (two-pass: fast then accurate) |
|
|
183
|
+
| `matcher.py` | Pure-NumPy template matching (FFT + integral-image NCC) with template cache support |
|
|
184
|
+
| `grader.py`, `reporter.py` | Verdict + UI grading helpers |
|
|
185
|
+
| `keycodes.py`, `logs.py` | Low-level support |
|
|
186
|
+
|
|
187
|
+
For the full tool reference and behavior contracts, see the tool `description` fields in `klyk/mcp_server.py`. For how the internals are shaped and why, see [`ARCHITECTURE.md`](./ARCHITECTURE.md).
|
|
188
|
+
|
|
189
|
+
## Security model & trust scope
|
|
190
|
+
|
|
191
|
+
Klyk is a thin pipe between the agent and the OS. Its trust model is straightforward:
|
|
192
|
+
|
|
193
|
+
- **Local only.** **As of this release, klyk makes no network calls** — your screen contents and inputs stay on your machine. Screenshots, OCR results, AX labels, and tool responses never leave your machine via klyk.
|
|
194
|
+
- **macOS permissions are the consent surface.** Accessibility and Screen Recording must be granted explicitly via System Settings; `klyk doctor` shows the current state.
|
|
195
|
+
- **The agent controls every action.** Klyk doesn't decide what to click or type — it executes what the agent asks. Run klyk only with agents you trust to act on your behalf.
|
|
196
|
+
- **Stderr from launched apps is captured for the `verdict` payload.** klyk attempts to scrub common credential patterns (passwords, API keys, JWTs, AWS keys, bearer tokens) on a **best-effort** basis — it cannot catch every format, so do not rely on it as your only safeguard.
|
|
197
|
+
- **Private framework usage.** Klyk uses Apple's private SkyLight framework for invisible input delivery (the "autonomous" mode). This is fine for CLI / PyPI distribution but is the reason klyk can't ship via the Mac App Store.
|
|
198
|
+
- **Reporting a vulnerability.** See [`SECURITY.md`](./SECURITY.md).
|
|
199
|
+
|
|
200
|
+
## License
|
|
201
|
+
|
|
202
|
+
MIT — see [`LICENSE`](./LICENSE).
|
|
203
|
+
|
|
204
|
+
## Disclaimer — No Warranty
|
|
205
|
+
|
|
206
|
+
klyk is provided **"AS IS", without warranty of any kind**, express or implied, including merchantability, fitness for a particular purpose, and non-infringement (see [`LICENSE`](./LICENSE)). To the maximum extent permitted by law, the author is **not liable** for any damage, data loss, financial loss, account action, privacy exposure, or other harm arising from the use, misuse, or malfunction of klyk — whether caused by the software, the AI agent driving it, or a third-party dependency or macOS framework it relies on. Its safety measures (credential scrubbing, bounds checks, the emergency-stop chord, confirm-destructive flags) are **best-effort and agent-cooperative only** — not a guarantee, and not to be relied upon as your sole safeguard. **You run klyk at your own risk and are solely responsible for what you connect it to and what it does.**
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
*Designed and shipped using AI as implementation partner. The product decisions, scope choices, and trade-offs are mine.*
|
klyk-0.1.0/README.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Klyk
|
|
2
|
+
|
|
3
|
+
> ## ⚠️ What klyk is — and what it isn't
|
|
4
|
+
>
|
|
5
|
+
> **Read this before you install.**
|
|
6
|
+
>
|
|
7
|
+
> klyk gives an AI agent real, OS-level control of your Mac — the same input a human has. It moves the cursor, fires keystrokes, and clicks what's on screen. In its default mode it does this **invisibly and autonomously**: no cursor movement you can watch, no confirmation prompts — acting on its own once the agent decides to.
|
|
8
|
+
>
|
|
9
|
+
> **This is powerful, and it is dangerous.** Be clear-eyed about what that means:
|
|
10
|
+
>
|
|
11
|
+
> - **It can take real, irreversible actions.** A click is a click. klyk can press *Buy*, *Send*, *Confirm Transfer*, *Delete*, or *Sign* just as you could. The only guardrails are a window-bounds check and a text instruction asking the agent to confirm first — an instruction the agent *can ignore*. There is no sandbox and no spending limit.
|
|
12
|
+
> - **It runs with your full user privileges.** Anything you can do on your Mac, klyk can do. It does not isolate itself or drop privileges.
|
|
13
|
+
> - **It is a prompt-injection target.** If the agent driving klyk also reads untrusted content — a web page, an email, a document — a malicious instruction hidden there can become real clicks and keystrokes on your machine. "Reads the web" + "controls the Mac" is the high-risk combination. Run klyk only with an agent and a workflow you trust.
|
|
14
|
+
> - **It relies on an undocumented Apple API.** Invisible input uses Apple's private SkyLight framework. Apple does not support or guarantee it; a macOS update can change or break it without notice (klyk falls back to a visible cursor when it can).
|
|
15
|
+
>
|
|
16
|
+
> **What klyk is, honestly:** an early, experimental, open-source tool built by a solo author — a business student, not a professional developer — working with AI, in good faith. It has **not** had an independent professional security audit. Treat it as unproven: it works in the cases its author has tested, and has not been verified end-to-end on every path.
|
|
17
|
+
>
|
|
18
|
+
> **No warranty. Use at your own risk.** klyk is provided "as is" under the MIT license, with no warranty of any kind. You are responsible for what the agent does on your machine. Don't point it at anything — money, accounts, irreplaceable files — you aren't willing to have an autonomous agent touch.
|
|
19
|
+
|
|
20
|
+
**OS-level macOS app testing for AI agents.** Click real buttons, type real keys, see what actually rendered. Native apps, Electron apps, browsers, web pages, system dialogs — anything visible on the screen.
|
|
21
|
+
|
|
22
|
+
> **Status:** Portfolio project. Showcases product thinking and shipped tooling. Bug reports won't be actively triaged. Well-scoped PRs are welcome — but expect a slow review cadence.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## The problem
|
|
27
|
+
|
|
28
|
+
AI assistants are increasingly asked to test, validate, or operate desktop apps end-to-end. Today they can't. Existing automation tools either require deep app instrumentation (XCUITest, Appium) or simulate user input at a layer too brittle to be trusted (pixel-only click frameworks, headless DOM scrapers). The result: agents that can write apps faster than ever, but can't verify they actually work.
|
|
29
|
+
|
|
30
|
+
Klyk closes that gap. It gives an AI agent the same input channel a human has — real cursor moves via Apple's CoreGraphics API, real keystrokes posted to the HID event tap, real composited screenshots — and a clean MCP interface to drive it. The agent observes, decides, acts, verifies. The way a human would.
|
|
31
|
+
|
|
32
|
+
## What it does
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
> screenshot the app, then click "Sign in"
|
|
36
|
+
[ Klyk takes a real screenshot via CoreGraphics, returns it + the AX tree ]
|
|
37
|
+
[ click_element finds "Sign in" via accessibility, falls back to OCR, then to template match ]
|
|
38
|
+
[ Real click fires through the HID event tap — indistinguishable from a human pressing the button ]
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
A flat, MECE tool surface across observation, interaction, evaluation, session management, and system operations. Three-tier click targeting (AX → on-device OCR → pixel template) so a label is reachable regardless of how the app exposes it. Cross-app drag, right-click-then-select, and multilingual OCR are all first-class. Per-call latency + reasoning-gap metrics so the agent can self-pace. Best-effort AX folded into screenshots so most tasks finish in one round-trip.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
> Not yet on PyPI. The block below is the target experience. The code is written but **not yet verified end-to-end** on the current build (recent changes include removing the HTTP bridge). Treat klyk as experimental and unproven — see the warning above and the Disclaimer.
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pipx install klyk # isolated install — recommended
|
|
49
|
+
klyk install
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
> **Use `pipx` (or `uv tool install klyk`), not bare `pip`.** Klyk pulls a modern NumPy; installing it into your global Python can clash with other packages pinned to older versions. `pipx`/`uv` give klyk its own environment while still putting `klyk` and `klyk-call` on your PATH — same commands, zero blast radius. Plain `pip install klyk` works only if you want it in the current environment and accept that risk.
|
|
53
|
+
|
|
54
|
+
`klyk install` is a turnkey first-run flow:
|
|
55
|
+
|
|
56
|
+
1. Adds Klyk to `~/.claude.json` (so it appears in every Claude Code session).
|
|
57
|
+
2. Walks you through granting the two macOS permissions Klyk needs — opens the exact System Settings panes for **Accessibility** and **Screen Recording**, waits for you to add your terminal app, then **verifies the grant actually came through** before continuing.
|
|
58
|
+
3. Runs a final `klyk doctor` pass to confirm every piece is green.
|
|
59
|
+
4. Detects every *other* AI client on your Mac and offers to wire them all in one confirmation — permissions carry over, so it's a single prompt, not a setup pass per client.
|
|
60
|
+
|
|
61
|
+
For clients that read a natural-language context file (Gemini CLI's `GEMINI.md`), `install` also *offers* — opt-in, defaults to no — to add a short, clearly-marked klyk note there, so the agent can fall back to the `klyk-call` shell if its own MCP ever fails to surface klyk. It never edits that file without an explicit yes, merges around your existing content, and `uninstall` removes it.
|
|
62
|
+
|
|
63
|
+
Restart Claude Code (or whichever MCP client you use) and klyk is live. Try `inspect Finder` to see it in action. To wire every detected client up front in one shot: `klyk install --all`.
|
|
64
|
+
|
|
65
|
+
**Troubleshooting: `klyk doctor`.** Run it any time something's off. Reports every dependency, permission, and config grant klyk needs as ✓ / ⚠ / ✗ with the exact next step on anything that's not green. `--json` gives a structured payload for tooling.
|
|
66
|
+
|
|
67
|
+
**One klyk per Mac.** Klyk claims an exclusive `fcntl.flock` on `~/.klyk/server.lock` at startup. If you configure klyk in two MCP clients (e.g. Claude Code *and* Cursor) and both are running, the second to launch exits cleanly with a "another klyk is already running" message — preventing duplicate menu-bar items and interleaved input collisions. Close one client to free the lock.
|
|
68
|
+
|
|
69
|
+
### Use with other MCP clients
|
|
70
|
+
|
|
71
|
+
`klyk install <client>` auto-configures any supported client — same turnkey flow as Claude (writes the config, grants permissions, runs a health check):
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
klyk install cursor # or: windsurf · continue · cline · codex · gemini · antigravity (agy)
|
|
75
|
+
klyk install --list # show every supported client and its config path
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
| Client | Config file |
|
|
79
|
+
|---|---|
|
|
80
|
+
| Claude Code | `~/.claude.json` (the default: `klyk install`) |
|
|
81
|
+
| Cursor | `~/.cursor/mcp.json` |
|
|
82
|
+
| Windsurf | `~/.codeium/windsurf/mcp_config.json` |
|
|
83
|
+
| Continue | `~/.continue/config.json` |
|
|
84
|
+
| Cline | VS Code globalStorage `cline_mcp_settings.json` |
|
|
85
|
+
| OpenAI Codex CLI | `~/.codex/config.toml` |
|
|
86
|
+
| Gemini CLI | `~/.gemini/settings.json` |
|
|
87
|
+
| Antigravity CLI (`agy`) | `~/.gemini/antigravity-cli/mcp_config.json` |
|
|
88
|
+
|
|
89
|
+
Any other MCP client works too — klyk speaks MCP natively. Add this entry to its config wherever it lives — use the **full path to the Python klyk is installed in** as `command` (run `python -c "import sys;print(sys.executable)"` in that env; `klyk install` fills this in automatically). A bare `python3` only works if klyk is in your global Python:
|
|
90
|
+
|
|
91
|
+
```json
|
|
92
|
+
{ "mcpServers": { "klyk": { "command": "/path/to/python", "args": ["-m", "klyk.mcp_server"] } } }
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Permissions, the singleton lock, and `klyk doctor` work identically regardless of which client launches klyk.
|
|
96
|
+
|
|
97
|
+
### Drive klyk from any AI (no MCP integration required)
|
|
98
|
+
|
|
99
|
+
MCP support varies a lot between agent harnesses — some gate it, some implement it incompletely, some don't have it. So klyk ships an extra front door that works even when a client's own MCP plumbing doesn't. It reaches the **same** persistent klyk session.
|
|
100
|
+
|
|
101
|
+
**`klyk-call` — one shell command, any tool.** For any agent that can run a shell command (great for smaller models — no handshake to reason about):
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
klyk-call --list # all tools + their parameter names
|
|
105
|
+
klyk-call --schema inspect # full JSON schema for one tool
|
|
106
|
+
klyk-call --tool inspect --app Finder # call any tool
|
|
107
|
+
klyk-call --tool screenshot --app Finder # screenshot → saved to disk, path returned
|
|
108
|
+
echo '{"tool":"screen_info","args":{}}' | klyk-call --batch # many calls, one session
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Vision over the shell.** When a tool returns a screenshot (`screenshot`, `inspect`, `verdict`, image-producing `run` steps), `klyk-call` writes the PNG to `~/.klyk/captures/` and returns its `saved_path` instead of dumping base64 — so the agent *views* the capture with its own image reader (Claude Code's file read, Gemini's `@path`, etc.) and keeps the same observe→act→verify loop the native MCP transport has, with no context-flooding payload. The cache keeps the most recent 20 captures.
|
|
112
|
+
|
|
113
|
+
## Quick example
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
screenshot(app="Google Chrome")
|
|
117
|
+
# → returns the image plus the AX element list
|
|
118
|
+
|
|
119
|
+
run(app="Google Chrome", actions=[
|
|
120
|
+
{"tool": "click", "x": 580, "y": 389},
|
|
121
|
+
{"tool": "fill_field", "x": 580, "y": 389, "text": "This video is incredible!"},
|
|
122
|
+
{"tool": "screenshot"}
|
|
123
|
+
])
|
|
124
|
+
# → executes the full sequence at OS speed, returns one batched response
|
|
125
|
+
|
|
126
|
+
verdict(app="Google Chrome", test_description="Posted a comment on a YouTube video")
|
|
127
|
+
# → returns final screenshot + logs + grading criteria for the agent to synthesize PASS/FAIL
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Why these specific trade-offs
|
|
131
|
+
|
|
132
|
+
The interesting product decisions weren't tools to build but tools to *not* build:
|
|
133
|
+
|
|
134
|
+
- **No third-party computer-use libraries.** Everything runs through Apple's CoreGraphics, Vision, and Accessibility frameworks via Python's `ctypes`. Keeps the dependency footprint tiny and the failure surface predictable.
|
|
135
|
+
- **No visual grounding models.** UI-TARS and OmniParser would have closed the "no AX, no text" gap — at the cost of a multi-GB model download and 1–2s per call. Rejected. Template matching covers the same case at 30ms with zero ML dependencies.
|
|
136
|
+
- **No Chrome DevTools Protocol.** It would have helped only Chromium browsers and broken the "like a human" model. Skipped in favor of forcing the renderer-accessibility flag, which gets the entire web AX tree for free.
|
|
137
|
+
- **macOS only.** Cross-platform compromises every primitive. The honest framing: ship one OS well rather than three OSes badly.
|
|
138
|
+
|
|
139
|
+
Every tool is designed against the same set of failure modes — ambiguity, accidental retries, token bloat, lost reactivity from batching, and so on. The agent-facing contract for each tool lives in its `description` field in `klyk/mcp_server.py`.
|
|
140
|
+
|
|
141
|
+
## What's inside
|
|
142
|
+
|
|
143
|
+
| Module | Purpose |
|
|
144
|
+
|---|---|
|
|
145
|
+
| `mcp_server.py` | MCP server, tool definitions, dispatch |
|
|
146
|
+
| `session.py` | Per-app session registry, auto-launch, template cache |
|
|
147
|
+
| `computer.py` | CoreGraphics input synthesis (click, drag, keyboard, scroll, AX) |
|
|
148
|
+
| `capture.py` | CoreGraphics screenshot capture (in-memory primary, screencapture fallback) |
|
|
149
|
+
| `launcher.py` | App launch with browser-aware AX flag injection |
|
|
150
|
+
| `ocr.py` | Apple Vision OCR (two-pass: fast then accurate) |
|
|
151
|
+
| `matcher.py` | Pure-NumPy template matching (FFT + integral-image NCC) with template cache support |
|
|
152
|
+
| `grader.py`, `reporter.py` | Verdict + UI grading helpers |
|
|
153
|
+
| `keycodes.py`, `logs.py` | Low-level support |
|
|
154
|
+
|
|
155
|
+
For the full tool reference and behavior contracts, see the tool `description` fields in `klyk/mcp_server.py`. For how the internals are shaped and why, see [`ARCHITECTURE.md`](./ARCHITECTURE.md).
|
|
156
|
+
|
|
157
|
+
## Security model & trust scope
|
|
158
|
+
|
|
159
|
+
Klyk is a thin pipe between the agent and the OS. Its trust model is straightforward:
|
|
160
|
+
|
|
161
|
+
- **Local only.** **As of this release, klyk makes no network calls** — your screen contents and inputs stay on your machine. Screenshots, OCR results, AX labels, and tool responses never leave your machine via klyk.
|
|
162
|
+
- **macOS permissions are the consent surface.** Accessibility and Screen Recording must be granted explicitly via System Settings; `klyk doctor` shows the current state.
|
|
163
|
+
- **The agent controls every action.** Klyk doesn't decide what to click or type — it executes what the agent asks. Run klyk only with agents you trust to act on your behalf.
|
|
164
|
+
- **Stderr from launched apps is captured for the `verdict` payload.** klyk attempts to scrub common credential patterns (passwords, API keys, JWTs, AWS keys, bearer tokens) on a **best-effort** basis — it cannot catch every format, so do not rely on it as your only safeguard.
|
|
165
|
+
- **Private framework usage.** Klyk uses Apple's private SkyLight framework for invisible input delivery (the "autonomous" mode). This is fine for CLI / PyPI distribution but is the reason klyk can't ship via the Mac App Store.
|
|
166
|
+
- **Reporting a vulnerability.** See [`SECURITY.md`](./SECURITY.md).
|
|
167
|
+
|
|
168
|
+
## License
|
|
169
|
+
|
|
170
|
+
MIT — see [`LICENSE`](./LICENSE).
|
|
171
|
+
|
|
172
|
+
## Disclaimer — No Warranty
|
|
173
|
+
|
|
174
|
+
klyk is provided **"AS IS", without warranty of any kind**, express or implied, including merchantability, fitness for a particular purpose, and non-infringement (see [`LICENSE`](./LICENSE)). To the maximum extent permitted by law, the author is **not liable** for any damage, data loss, financial loss, account action, privacy exposure, or other harm arising from the use, misuse, or malfunction of klyk — whether caused by the software, the AI agent driving it, or a third-party dependency or macOS framework it relies on. Its safety measures (credential scrubbing, bounds checks, the emergency-stop chord, confirm-destructive flags) are **best-effort and agent-cooperative only** — not a guarantee, and not to be relied upon as your sole safeguard. **You run klyk at your own risk and are solely responsible for what you connect it to and what it does.**
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
*Designed and shipped using AI as implementation partner. The product decisions, scope choices, and trade-offs are mine.*
|
klyk-0.1.0/SECURITY.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported versions
|
|
4
|
+
|
|
5
|
+
Klyk is a portfolio project with a slow review cadence. Security reports are read and triaged, but fixes ship on a best-effort timeline. The current `main` branch is the only supported version.
|
|
6
|
+
|
|
7
|
+
## Reporting a vulnerability
|
|
8
|
+
|
|
9
|
+
If you find a security issue, please **do not** open a public GitHub issue. Instead:
|
|
10
|
+
|
|
11
|
+
1. Email the maintainer at the address in the GitHub profile, or
|
|
12
|
+
2. Open a private GitHub Security Advisory at <https://github.com/legetdev/klyk/security/advisories>.
|
|
13
|
+
|
|
14
|
+
Include:
|
|
15
|
+
- A clear description of the issue and its impact
|
|
16
|
+
- The version / commit you found it on
|
|
17
|
+
- A minimal reproducer if possible
|
|
18
|
+
|
|
19
|
+
You should expect an acknowledgement within ~7 days. A fix or written disposition follows on a best-effort basis. Coordinated disclosure preferred — please give the project a reasonable window to fix before publishing details.
|
|
20
|
+
|
|
21
|
+
## Trust model
|
|
22
|
+
|
|
23
|
+
Klyk runs entirely on the user's local Mac, with permissions granted by the user via macOS System Settings. The trust boundaries are:
|
|
24
|
+
|
|
25
|
+
- **User → klyk:** the user grants Accessibility and Screen Recording via macOS Settings. `klyk doctor` reports the current state.
|
|
26
|
+
- **Agent → klyk:** the agent (Claude / Cursor / Cline / etc.) drives klyk via the MCP protocol over stdio. Whatever the agent asks klyk to do, klyk does — within the bounds-check, safety-guard, and confirm-destructive flags. Run klyk only with agents you trust to act on your behalf.
|
|
27
|
+
- **Klyk → outside world:** none. Klyk makes zero network calls. Captured screen content, OCR results, and tool responses never leave the local machine via klyk.
|
|
28
|
+
|
|
29
|
+
## Prompt injection & the confused-deputy risk
|
|
30
|
+
|
|
31
|
+
klyk executes whatever the connected agent tells it to. If that agent also ingests untrusted content — a web page, an email, a PDF, a chat message — a malicious instruction hidden in that content can be turned into real clicks and keystrokes on your Mac. This is the classic *confused-deputy* problem, and the "agent that reads the web **and** drives the machine" configuration is the highest-risk way to run klyk.
|
|
32
|
+
|
|
33
|
+
klyk's safeguards are **agent-cooperative, not enforced**: the money/destructive guidance in tool descriptions is text the agent can ignore, and the only guard enforced in code is the geometric window-bounds check (a click must land inside the target app's window — it does **not** stop a destructive click *inside* it). The `Cmd+Shift+Esc` emergency stop is a hard latch: it blocks **all** input until the user presses the chord again to clear it — the agent **cannot** resume it (the `resume` tool only reports status).
|
|
34
|
+
|
|
35
|
+
Mitigations are operational, not technical: run klyk only with agents and workflows you trust, keep untrusted-content reading and machine control in separate sessions where you can, and supervise anything consequential.
|
|
36
|
+
|
|
37
|
+
## What's scrubbed
|
|
38
|
+
|
|
39
|
+
Stderr from apps launched by klyk is run through credential scrubbers at capture time before being stored in the in-session log buffer. The scrubbed patterns:
|
|
40
|
+
|
|
41
|
+
- `password=…`, `secret=…`, `token=…`, `api_key=…`, `access_key=…`, `auth=…`, `bearer=…` (key visible, value replaced with `***`)
|
|
42
|
+
- `Authorization: Bearer …` HTTP headers
|
|
43
|
+
- AWS access key IDs (`AKIA*`, `ASIA*`, …)
|
|
44
|
+
- JWTs (`eyJ…`.`…`.`…`)
|
|
45
|
+
|
|
46
|
+
This is defense-in-depth — agents shouldn't be trusted to filter credentials downstream, and a misbehaving app that prints secrets to stderr shouldn't infect the rest of the trust chain. It is **best-effort; it cannot catch every credential format — not a guarantee.** Do not rely on it as your only safeguard.
|
|
47
|
+
|
|
48
|
+
## What's deliberately not scrubbed
|
|
49
|
+
|
|
50
|
+
- Screenshots and OCR text returned to the agent: the agent asked for the pixels, so it gets them. Don't run klyk on screens with content you can't show the agent.
|
|
51
|
+
- AX labels and values: same rationale — the agent asked.
|
|
52
|
+
|
|
53
|
+
## Out of scope
|
|
54
|
+
|
|
55
|
+
- Vulnerabilities that require the user to already be running malware or to have granted system-wide screen-control to a hostile process. Klyk is downstream of those exploits.
|
|
56
|
+
- Issues in third-party MCP clients (Claude Code, Cursor, etc.). Report those upstream.
|
|
57
|
+
- Bugs in macOS frameworks. Report those to Apple.
|
|
58
|
+
|
|
59
|
+
## Acknowledgements
|
|
60
|
+
|
|
61
|
+
Reporters who follow coordinated disclosure get a credit in the release notes if they want one.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Klyk — OS-level macOS app testing via MCP.
|
|
2
|
+
|
|
3
|
+
Portfolio project. Showcases product thinking and shipped tooling.
|
|
4
|
+
Bug reports won't be actively triaged. Well-scoped PRs are welcome —
|
|
5
|
+
but expect a slow review cadence.
|
|
6
|
+
|
|
7
|
+
Primary interface: the MCP server (`python -m klyk.mcp_server`).
|
|
8
|
+
|
|
9
|
+
Module-level access for Python library users:
|
|
10
|
+
from klyk import computer, capture, matcher, ocr, session, launcher
|
|
11
|
+
|
|
12
|
+
A higher-level Python library API may follow if there's demand.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
__version__ = "0.1.0"
|