puppet-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. puppet_agent-0.1.0/.gitignore +21 -0
  2. puppet_agent-0.1.0/CHANGELOG.md +19 -0
  3. puppet_agent-0.1.0/LICENSE +21 -0
  4. puppet_agent-0.1.0/PKG-INFO +409 -0
  5. puppet_agent-0.1.0/README.md +370 -0
  6. puppet_agent-0.1.0/benchmarks/expected/.gitkeep +0 -0
  7. puppet_agent-0.1.0/benchmarks/run_benchmark.py +71 -0
  8. puppet_agent-0.1.0/benchmarks/screenshots/.gitkeep +0 -0
  9. puppet_agent-0.1.0/examples/browser_automation.py +39 -0
  10. puppet_agent-0.1.0/examples/desktop_control.py +38 -0
  11. puppet_agent-0.1.0/examples/read_screen.py +28 -0
  12. puppet_agent-0.1.0/integrations/antigravity/README.md +50 -0
  13. puppet_agent-0.1.0/integrations/claude-code/README.md +44 -0
  14. puppet_agent-0.1.0/integrations/cline/README.md +42 -0
  15. puppet_agent-0.1.0/integrations/codex/README.md +47 -0
  16. puppet_agent-0.1.0/integrations/cursor/README.md +52 -0
  17. puppet_agent-0.1.0/integrations/gemini/README.md +46 -0
  18. puppet_agent-0.1.0/integrations/openclaw/README.md +49 -0
  19. puppet_agent-0.1.0/integrations/openclaw/SOUL_VISION.md +39 -0
  20. puppet_agent-0.1.0/integrations/windsurf/README.md +50 -0
  21. puppet_agent-0.1.0/integrations/zed/README.md +46 -0
  22. puppet_agent-0.1.0/puppet-ai.yaml.example +13 -0
  23. puppet_agent-0.1.0/pyproject.toml +51 -0
  24. puppet_agent-0.1.0/src/puppet_ai/__init__.py +4 -0
  25. puppet_agent-0.1.0/src/puppet_ai/cli.py +189 -0
  26. puppet_agent-0.1.0/src/puppet_ai/config/__init__.py +0 -0
  27. puppet_agent-0.1.0/src/puppet_ai/config/presets.py +30 -0
  28. puppet_agent-0.1.0/src/puppet_ai/config/schema.py +31 -0
  29. puppet_agent-0.1.0/src/puppet_ai/core/__init__.py +0 -0
  30. puppet_agent-0.1.0/src/puppet_ai/core/accessibility.py +119 -0
  31. puppet_agent-0.1.0/src/puppet_ai/core/actions.py +81 -0
  32. puppet_agent-0.1.0/src/puppet_ai/core/capture.py +128 -0
  33. puppet_agent-0.1.0/src/puppet_ai/core/detector.py +84 -0
  34. puppet_agent-0.1.0/src/puppet_ai/core/foveal.py +23 -0
  35. puppet_agent-0.1.0/src/puppet_ai/core/ocr.py +136 -0
  36. puppet_agent-0.1.0/src/puppet_ai/core/ocr_cache.py +40 -0
  37. puppet_agent-0.1.0/src/puppet_ai/core/peripheral.py +24 -0
  38. puppet_agent-0.1.0/src/puppet_ai/core/permissions.py +43 -0
  39. puppet_agent-0.1.0/src/puppet_ai/core/pii_filter.py +97 -0
  40. puppet_agent-0.1.0/src/puppet_ai/core/wait.py +50 -0
  41. puppet_agent-0.1.0/src/puppet_ai/core/world_model.py +69 -0
  42. puppet_agent-0.1.0/src/puppet_ai/instructions.py +165 -0
  43. puppet_agent-0.1.0/src/puppet_ai/providers/__init__.py +48 -0
  44. puppet_agent-0.1.0/src/puppet_ai/providers/anthropic.py +156 -0
  45. puppet_agent-0.1.0/src/puppet_ai/providers/base.py +9 -0
  46. puppet_agent-0.1.0/src/puppet_ai/providers/gemini.py +40 -0
  47. puppet_agent-0.1.0/src/puppet_ai/providers/ollama.py +108 -0
  48. puppet_agent-0.1.0/src/puppet_ai/providers/openai.py +42 -0
  49. puppet_agent-0.1.0/src/puppet_ai/providers/sampling.py +45 -0
  50. puppet_agent-0.1.0/src/puppet_ai/server/__init__.py +0 -0
  51. puppet_agent-0.1.0/src/puppet_ai/server/mcp.py +399 -0
  52. puppet_agent-0.1.0/src/puppet_ai/types.py +61 -0
  53. puppet_agent-0.1.0/tests/conftest.py +60 -0
  54. puppet_agent-0.1.0/tests/test_accessibility.py +51 -0
  55. puppet_agent-0.1.0/tests/test_actions.py +97 -0
  56. puppet_agent-0.1.0/tests/test_cli.py +37 -0
  57. puppet_agent-0.1.0/tests/test_config.py +66 -0
  58. puppet_agent-0.1.0/tests/test_detector.py +64 -0
  59. puppet_agent-0.1.0/tests/test_foveal.py +65 -0
  60. puppet_agent-0.1.0/tests/test_instructions.py +22 -0
  61. puppet_agent-0.1.0/tests/test_mcp_v2.py +116 -0
  62. puppet_agent-0.1.0/tests/test_ocr_bounds.py +54 -0
  63. puppet_agent-0.1.0/tests/test_ocr_cache.py +37 -0
  64. puppet_agent-0.1.0/tests/test_peripheral.py +49 -0
  65. puppet_agent-0.1.0/tests/test_permissions.py +12 -0
  66. puppet_agent-0.1.0/tests/test_pii_filter.py +88 -0
  67. puppet_agent-0.1.0/tests/test_providers/__init__.py +0 -0
  68. puppet_agent-0.1.0/tests/test_providers/test_registry.py +48 -0
  69. puppet_agent-0.1.0/tests/test_types.py +75 -0
  70. puppet_agent-0.1.0/tests/test_wait.py +37 -0
  71. puppet_agent-0.1.0/tests/test_world_model.py +84 -0
@@ -0,0 +1,21 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .venv/
7
+ .pytest_cache/
8
+ .ruff_cache/
9
+ .superpowers/
10
+ .DS_Store
11
+ *.DS_Store
12
+
13
+ # Test artifacts
14
+ TEST_RESULTS.md
15
+ TEST_EXECUTION_SUMMARY.txt
16
+ COMPREHENSIVE_TEST_REPORT.md
17
+ 00_TEST_INDEX.md
18
+ test_results.log
19
+ test_comprehensive.py
20
+ twitter_news_*.py
21
+ TWITTER_NEWS_REPORT.md
@@ -0,0 +1,19 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (2026-04-03)
4
+
5
+ Initial release.
6
+
7
+ ### Features
8
+
9
+ - **27 MCP tools** for full computer control — vision, actions, system
10
+ - **Native macOS OCR** via Apple Vision Framework (Russian + English)
11
+ - **Per-window capture** via CGWindowList — no app switching needed
12
+ - **Accessibility API** — detect buttons, links, checkboxes, text fields
13
+ - **Computer control** — click, type, scroll, drag, hotkeys, clipboard
14
+ - **Smart tools** — `action_click_text`, `action_click_and_wait`, `action_open_url`
15
+ - **PII protection** — auto-mask API keys, passwords, credit cards in OCR output
16
+ - **OCR cache** — 7x speedup on repeated reads, auto-invalidation after actions
17
+ - **Retina display** support — automatic coordinate scaling
18
+ - **Agent-agnostic** — works with Claude Code, OpenClaw, any MCP client
19
+ - **System prompt** — built-in instructions teach agents how to use the tools
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 transoff
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,409 @@
1
+ Metadata-Version: 2.4
2
+ Name: puppet-agent
3
+ Version: 0.1.0
4
+ Summary: Full computer control for AI agents — see, click, type, scroll via MCP
5
+ Project-URL: Homepage, https://github.com/transoff/puppet-ai
6
+ Project-URL: Repository, https://github.com/transoff/puppet-ai
7
+ Project-URL: Issues, https://github.com/transoff/puppet-ai/issues
8
+ Author: Daniel Starkov
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Requires-Python: >=3.11
12
+ Requires-Dist: click>=8.0
13
+ Requires-Dist: mcp>=1.0.0
14
+ Requires-Dist: numpy>=1.24
15
+ Requires-Dist: pillow>=10.0
16
+ Requires-Dist: pyautogui>=0.9
17
+ Requires-Dist: pydantic>=2.0
18
+ Requires-Dist: pyperclip>=1.8
19
+ Requires-Dist: pyyaml>=6.0
20
+ Provides-Extra: all
21
+ Requires-Dist: puppet-ai[anthropic,apple-vision,gemini,mlx,openai]; extra == 'all'
22
+ Provides-Extra: anthropic
23
+ Requires-Dist: anthropic>=0.40; extra == 'anthropic'
24
+ Provides-Extra: apple-vision
25
+ Requires-Dist: pyobjc-framework-screencapturekit>=10.0; extra == 'apple-vision'
26
+ Requires-Dist: pyobjc-framework-vision>=10.0; extra == 'apple-vision'
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.5; extra == 'dev'
31
+ Provides-Extra: gemini
32
+ Requires-Dist: google-genai>=1.0; extra == 'gemini'
33
+ Provides-Extra: mlx
34
+ Requires-Dist: mlx-vlm>=0.1; extra == 'mlx'
35
+ Requires-Dist: mlx>=0.20; extra == 'mlx'
36
+ Provides-Extra: openai
37
+ Requires-Dist: openai>=1.50; extra == 'openai'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # puppet-ai
41
+
42
+ Full computer control for AI agents — see, click, type, scroll via MCP.
43
+
44
+ Give any AI agent eyes and hands. puppet-ai captures the screen, reads text via native OCR, detects UI elements, and controls mouse + keyboard. Works with any app on macOS — browsers, desktop apps, games, terminals.
45
+
46
+ ## Why puppet-ai?
47
+
48
+ - **Fast** — native macOS OCR in ~0.5s, 7x faster with caching
49
+ - **Universal** — works with ANY app, not just browsers
50
+ - **Agent-agnostic** — MCP standard, plug into any AI agent
51
+ - **Secure** — auto-masks API keys, passwords, credit cards, emails in OCR output
52
+ - **Complete** — 27 tools: vision + actions + system
53
+ - **Private** — all processing on-device, no data leaves your Mac
54
+
55
+ ## Quick Start
56
+
57
+ ### 1. Install
58
+
59
+ ```bash
60
+ pip install puppet-ai
61
+ ```
62
+
63
+ ### 2. Enable Accessibility
64
+
65
+ System Settings → Privacy & Security → Accessibility → enable your terminal/IDE app.
66
+
67
+ ### 3. Connect to your AI agent
68
+
69
+ puppet-ai is an MCP server. Connect it to any MCP-compatible agent below, then ask the agent to interact with your computer.
70
+
71
+ ---
72
+
73
+ ## Integrations
74
+
75
+ ### Claude Code
76
+
77
+ Add to `~/.claude/settings.json`:
78
+
79
+ ```json
80
+ {
81
+ "mcpServers": {
82
+ "puppet-ai": {
83
+ "command": "puppet-ai",
84
+ "args": ["serve"]
85
+ }
86
+ }
87
+ }
88
+ ```
89
+
90
+ ### OpenAI Codex CLI
91
+
92
+ Add to `~/.codex/config.toml`:
93
+
94
+ ```toml
95
+ [mcp_servers.puppet-ai]
96
+ command = "puppet-ai"
97
+ args = ["serve"]
98
+ ```
99
+
100
+ Or via CLI:
101
+
102
+ ```bash
103
+ codex mcp add puppet-ai -- puppet-ai serve
104
+ ```
105
+
106
+ ### Google Gemini CLI
107
+
108
+ Add to `~/.gemini/settings.json`:
109
+
110
+ ```json
111
+ {
112
+ "mcpServers": {
113
+ "puppet-ai": {
114
+ "command": "puppet-ai",
115
+ "args": ["serve"]
116
+ }
117
+ }
118
+ }
119
+ ```
120
+
121
+ Verify: launch `gemini` and run `/mcp` to check connection.
122
+
123
+ ### Google Antigravity
124
+
125
+ Via MCP settings in Antigravity, or add to your project's `.antigravity/settings.json`:
126
+
127
+ ```json
128
+ {
129
+ "mcpServers": {
130
+ "puppet-ai": {
131
+ "command": "puppet-ai",
132
+ "args": ["serve"]
133
+ }
134
+ }
135
+ }
136
+ ```
137
+
138
+ ### Cursor
139
+
140
+ Add to `~/.cursor/mcp.json`:
141
+
142
+ ```json
143
+ {
144
+ "mcpServers": {
145
+ "puppet-ai": {
146
+ "command": "puppet-ai",
147
+ "args": ["serve"]
148
+ }
149
+ }
150
+ }
151
+ ```
152
+
153
+ Or: Cursor Settings → Tools & MCP → Add Server.
154
+
155
+ ### Windsurf
156
+
157
+ Add to `~/.codeium/windsurf/mcp_config.json`:
158
+
159
+ ```json
160
+ {
161
+ "mcpServers": {
162
+ "puppet-ai": {
163
+ "command": "puppet-ai",
164
+ "args": ["serve"]
165
+ }
166
+ }
167
+ }
168
+ ```
169
+
170
+ ### Cline (VS Code)
171
+
172
+ In VS Code, open Cline settings → MCP Servers → Add:
173
+
174
+ ```json
175
+ {
176
+ "puppet-ai": {
177
+ "command": "puppet-ai",
178
+ "args": ["serve"]
179
+ }
180
+ }
181
+ ```
182
+
183
+ ### Zed
184
+
185
+ Add to Zed settings (`~/.config/zed/settings.json`):
186
+
187
+ ```json
188
+ {
189
+ "context_servers": {
190
+ "puppet-ai": {
191
+ "command": {
192
+ "path": "puppet-ai",
193
+ "args": ["serve"]
194
+ }
195
+ }
196
+ }
197
+ }
198
+ ```
199
+
200
+ ### OpenClaw
201
+
202
+ Add to your agent's MCP config:
203
+
204
+ ```yaml
205
+ mcp_servers:
206
+ puppet-ai:
207
+ command: puppet-ai
208
+ args: [serve]
209
+ ```
210
+
211
+ ### Any MCP Client
212
+
213
+ puppet-ai speaks MCP over stdio. Spawn it as a subprocess:
214
+
215
+ ```python
216
+ import subprocess
217
+ proc = subprocess.Popen(
218
+ ["puppet-ai", "serve"],
219
+ stdin=subprocess.PIPE,
220
+ stdout=subprocess.PIPE,
221
+ )
222
+ # Communicate via MCP JSON-RPC over stdin/stdout
223
+ ```
224
+
225
+ Works with any agent that supports the [Model Context Protocol](https://modelcontextprotocol.io).
226
+
227
+ Detailed setup guides: [`integrations/`](integrations/)
228
+
229
+ ---
230
+
231
+ ## Tools
232
+
233
+ ### Vision (see the screen)
234
+
235
+ | Tool | Description |
236
+ |------|-------------|
237
+ | `vision_list_windows` | List all open windows (app, title, size) |
238
+ | `vision_read_window(app)` | Read text via OCR with bounding boxes for clicking |
239
+ | `vision_screenshot(app)` | Capture screenshot as base64 JPEG |
240
+ | `vision_get_state` | Full screen state: all windows + active window text |
241
+ | `vision_ui_elements(app)` | Get UI elements via Accessibility API (buttons, links, checkboxes) |
242
+
243
+ ### Actions (control the computer)
244
+
245
+ | Tool | Description |
246
+ |------|-------------|
247
+ | `action_click(x, y)` | Click at coordinates |
248
+ | `action_click_text(text, app)` | Find text on screen and click it — no coordinates needed |
249
+ | `action_click_and_wait(text, app)` | Click text, wait for screen to stabilize, return new state |
250
+ | `action_type_safe(text)` | Type text via clipboard paste (works with any keyboard layout) |
251
+ | `action_open_url(url)` | Open URL in browser (http/https only) |
252
+ | `action_scroll(amount, app)` | Scroll up/down in an app |
253
+ | `action_hotkey(keys)` | Keyboard shortcut (e.g. `["cmd", "c"]`) |
254
+ | `action_press(key)` | Press a key (enter, tab, escape, etc.) |
255
+ | `action_drag(...)` | Drag and drop |
256
+ | `action_activate_window(app)` | Bring app to front |
257
+ | `action_clipboard_copy(text)` | Copy to clipboard |
258
+ | `action_clipboard_paste()` | Paste from clipboard |
259
+
260
+ ### System
261
+
262
+ | Tool | Description |
263
+ |------|-------------|
264
+ | `system_check_permissions` | Check accessibility access |
265
+ | `system_get_screen_size` | Screen dimensions |
266
+ | `system_get_mouse_position` | Current cursor position |
267
+ | `system_unmask(reason)` | Temporarily disable PII masking |
268
+ | `system_mask()` | Re-enable PII masking |
269
+
270
+ ## How It Works
271
+
272
+ ```
273
+ AI Agent (Claude, Codex, Gemini, Cursor, Windsurf, ...)
274
+ ↕ MCP protocol (stdio)
275
+ puppet-ai server
276
+ ├── Vision: Apple Vision OCR + CGWindowList capture
277
+ ├── Accessibility: AXUIElement tree (buttons, links, fields)
278
+ ├── Actions: pyautogui (mouse, keyboard, scroll)
279
+ └── Security: PII regex filter (API keys, cards, passwords, emails)
280
+ ```
281
+
282
+ **The loop:**
283
+ 1. **Look** — `vision_read_window("Safari")` → text + coordinates
284
+ 2. **Decide** — agent plans next action
285
+ 3. **Act** — `action_click_text("Sign In")` → clicks center of text
286
+ 4. **Verify** — `vision_read_window` again → confirm it worked
287
+ 5. **Repeat**
288
+
289
+ ## Features
290
+
291
+ ### Native macOS OCR
292
+
293
+ Uses Apple Vision Framework — no external API, no GPU needed, works offline. Supports Russian and English.
294
+
295
+ ### Per-Window Capture
296
+
297
+ Captures specific windows via CGWindowList without switching apps or stealing focus.
298
+
299
+ ### Smart Coordinates
300
+
301
+ OCR returns absolute screen coordinates with Retina scaling handled automatically.
302
+
303
+ ### OCR Cache
304
+
305
+ Repeated reads of unchanged windows are 7x faster. Cache auto-invalidates after any action.
306
+
307
+ ### PII Protection
308
+
309
+ Sensitive data is automatically masked in OCR output:
310
+ - API keys: `sk-1***ef`
311
+ - Credit cards: `4111***1111`
312
+ - Emails: `user***com`
313
+ - Passwords in forms
314
+ - Crypto keys
315
+
316
+ ### Accessibility API
317
+
318
+ Detect interactive UI elements — buttons, checkboxes, links, text fields — with exact clickable coordinates.
319
+
320
+ ### Built-in Agent Instructions
321
+
322
+ The MCP server includes a system prompt that teaches agents how to use all 27 tools, macOS keyboard shortcuts, and the look-decide-act-verify loop.
323
+
324
+ ## Security
325
+
326
+ - **All data stays on your Mac** — no telemetry, no analytics, no external calls
327
+ - **PII auto-masking** — API keys, credit cards, emails, passwords masked before reaching the agent
328
+ - **URL validation** — only `http://` and `https://` allowed, `file://` blocked
329
+ - **Input sanitization** — app names validated to prevent injection
330
+ - **Browser allowlist** — only known browsers accepted (Chrome, Safari, Firefox, Arc, etc.)
331
+ - **Failsafe** — pyautogui failsafe enabled by default (move mouse to corner to abort)
332
+
333
+ ## Examples
334
+
335
+ ```python
336
+ import asyncio
337
+ from puppet_ai.core.capture import ScreenCapture
338
+ from puppet_ai.core.actions import DesktopActions
339
+ from puppet_ai.server.mcp import VisionPipeContext, create_all_tools
340
+
341
+ async def main():
342
+ ctx = VisionPipeContext(
343
+ capture=ScreenCapture(),
344
+ actions=DesktopActions(failsafe=True),
345
+ )
346
+ tools = create_all_tools(ctx)
347
+
348
+ # See what's on screen
349
+ windows = await tools["vision_list_windows"]()
350
+ for w in windows:
351
+ print(f"{w['app']:20s} — {w['title'][:50]}")
352
+
353
+ # Read a window
354
+ page = await tools["vision_read_window"](app="Safari")
355
+ print(page["text"][:500])
356
+
357
+ # Click text on screen
358
+ await tools["action_click_text"](text="Sign In", app="Safari")
359
+
360
+ # Open a URL
361
+ await tools["action_open_url"](url="https://example.com", browser="Safari")
362
+
363
+ asyncio.run(main())
364
+ ```
365
+
366
+ More examples in [`examples/`](examples/).
367
+
368
+ ## Configuration
369
+
370
+ ```yaml
371
+ # puppet-ai.yaml
372
+ ocr:
373
+ languages: ["en", "ru"]
374
+ mode: accurate # or "fast"
375
+
376
+ pii:
377
+ enabled: true
378
+ categories: [api_keys, credit_cards, crypto_keys, emails, passwords]
379
+
380
+ capture:
381
+ max_width: 800
382
+ format: jpeg
383
+ quality: 75
384
+ ```
385
+
386
+ Presets:
387
+
388
+ ```bash
389
+ puppet-ai serve --preset fast # speed over accuracy
390
+ puppet-ai serve --preset balanced # default
391
+ puppet-ai serve --preset quality # max accuracy
392
+ ```
393
+
394
+ ## Requirements
395
+
396
+ - macOS 13+ (Ventura or later)
397
+ - Python 3.11+
398
+ - Accessibility permissions enabled
399
+
400
+ ## Author
401
+
402
+ **Daniel Starkov**
403
+
404
+ - Twitter: [@retardTransoff](https://x.com/retardTransoff)
405
+ - LinkedIn: [Daniel Starkov](https://www.linkedin.com/in/daniel-starkov-568baa39b/)
406
+
407
+ ## License
408
+
409
+ MIT