clementine-agent 1.6.2 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.6.2",
3
+ "version": "1.6.3",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -2,13 +2,12 @@
2
2
 
3
3
  Stdio MCP server that wraps [browser-use/browser-harness](https://github.com/browser-use/browser-harness) so Clementine can drive your real Chrome via CDP.
4
4
 
5
- **Status:** Phase 1 plumbing — tools return `[stub]` placeholders until the harness primitives are wired in.
6
-
7
5
  ## Setup
8
6
 
9
7
  ```bash
10
8
  clementine browser install # clone harness + install Python deps
11
9
  clementine browser enable # register MCP server in ~/.clementine/mcp-servers.json
10
+ clementine browser connect # quit Chrome and relaunch with --remote-debugging-port=9222
12
11
  clementine restart
13
12
  ```
14
13
 
@@ -18,10 +17,16 @@ To remove: `clementine browser disable` (the venv and harness clone are kept; de
18
17
 
19
18
  | Tool | Tier | Description |
20
19
  |------|------|-------------|
21
- | `browser_status` | 1 | Diagnostic: install state + CDP URL |
22
- | `browser_screenshot` | 1 | Capture active tab |
23
- | `browser_inspect` | 1 | Read page HTML or selector |
24
- | `browser_navigate` | 2 | Open a URL in connected Chrome |
25
- | `browser_run_python` | 3 | Execute Python in `agent-workspace/` (approval required) |
20
+ | `browser_status` | 1 | Diagnostic: install state, daemon liveness, current page |
21
+ | `browser_screenshot` | 1 | Capture active tab as base64 PNG |
22
+ | `browser_page_info` | 1 | Current URL, title, viewport |
23
+ | `browser_list_tabs` | 1 | Enumerate open tabs |
24
+ | `browser_eval_js` | 2 | Run a JavaScript expression and return result |
25
+ | `browser_navigate` | 2 | Open a URL (new tab by default) |
26
+ | `browser_click_xy` | 3 | Click at viewport coordinates |
27
+ | `browser_type_text` | 3 | Type into focused input |
28
+ | `browser_press_key` | 3 | Press Enter/Tab/Escape/etc. |
29
+ | `browser_scroll` | 3 | Scroll the page |
30
+ | `browser_run_python` | 3 | Execute Python in harness context (full helpers in scope) |
26
31
 
27
32
  Tier policies are enforced by Clementine's `src/agent/hooks.ts`. Tier 3 actions require explicit approval and run only with a per-domain allowlist (Phase 2).
@@ -1,11 +1,14 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- Clementine ↔ browser-harness MCP bridge.
3
+ Clementine ↔ browser-harness MCP bridge (Phase 1.5).
4
4
 
5
5
  Stdio MCP server that exposes browser-harness primitives to the Claude Agent
6
- SDK. Fails gracefully: if browser-harness or its deps aren't installed, the
7
- server still starts and every tool returns a clear "not installed" message
8
- so the rest of Clementine keeps working.
6
+ SDK. Wraps `browser_harness.helpers` (CDP control) and `browser_harness.admin`
7
+ (daemon lifecycle).
8
+
9
+ Fails gracefully: if browser-harness or its deps aren't installed, the server
10
+ still starts and every tool returns a clear "not installed" message so the
11
+ rest of Clementine keeps working.
9
12
 
10
13
  Wire-up:
11
14
  mcpServers in ~/.clementine/mcp-servers.json:
@@ -25,9 +28,15 @@ Run `clementine browser install` and `clementine browser enable` to set up.
25
28
  """
26
29
  from __future__ import annotations
27
30
 
31
+ import base64
32
+ import io
28
33
  import os
29
34
  import sys
35
+ import textwrap
36
+ import traceback
37
+ from contextlib import redirect_stderr, redirect_stdout
30
38
  from pathlib import Path
39
+ from typing import Any
31
40
 
32
41
  # Best-effort: load browser-harness from the user's data home.
33
42
  HARNESS_HOME = Path(
@@ -41,12 +50,14 @@ CDP_URL = os.environ.get("BROWSER_CDP_URL", "ws://localhost:9222")
41
50
 
42
51
  _HARNESS_AVAILABLE = False
43
52
  _HARNESS_ERROR: str | None = None
53
+ _HELPERS: Any = None
54
+ _ADMIN: Any = None
44
55
 
45
56
  try:
46
57
  if (HARNESS_HOME / "src").is_dir():
47
58
  sys.path.insert(0, str(HARNESS_HOME / "src"))
48
- # The actual harness module — import is lazy to keep startup cheap.
49
- import browser_harness # type: ignore # noqa: F401
59
+ from browser_harness import helpers as _HELPERS # type: ignore
60
+ from browser_harness import admin as _ADMIN # type: ignore
50
61
  _HARNESS_AVAILABLE = True
51
62
  except Exception as e: # noqa: BLE001
52
63
  _HARNESS_ERROR = f"{type(e).__name__}: {e}"
@@ -63,69 +74,302 @@ except Exception as e: # noqa: BLE001
63
74
 
64
75
 
65
76
  server = FastMCP("browser-harness")
77
+ _DAEMON_READY = False
66
78
 
67
79
 
68
- def _not_ready_message() -> str:
69
- if _HARNESS_AVAILABLE:
70
- return ""
71
- return (
72
- "browser-harness is not installed. Run `clementine browser install` "
73
- "to clone the harness into ~/.clementine/browser-harness and install "
74
- f"Python dependencies. (Underlying: {_HARNESS_ERROR})"
75
- )
80
+ def _ensure_ready() -> str | None:
81
+ """Returns an error string if the harness isn't usable, None when ready.
82
+
83
+ Calls ensure_daemon() lazily so the daemon doesn't spin up unless a tool
84
+ is actually invoked (and so Chrome doesn't get prodded just from MCP
85
+ handshake).
86
+ """
87
+ global _DAEMON_READY
88
+ if not _HARNESS_AVAILABLE:
89
+ return (
90
+ "browser-harness is not installed. Run `clementine browser install` "
91
+ "to clone the harness into ~/.clementine/browser-harness and install "
92
+ f"Python dependencies. (Underlying: {_HARNESS_ERROR})"
93
+ )
94
+ if _DAEMON_READY:
95
+ return None
96
+ try:
97
+ # ensure_daemon is idempotent and self-heals stale daemons / cold Chrome
98
+ _ADMIN.ensure_daemon(_open_inspect=False)
99
+ _DAEMON_READY = True
100
+ return None
101
+ except Exception as e: # noqa: BLE001
102
+ return (
103
+ f"Could not connect to Chrome via CDP at {CDP_URL}.\n"
104
+ f" Reason: {type(e).__name__}: {e}\n"
105
+ f" Fix: run `clementine browser connect` to relaunch Chrome with "
106
+ f"--remote-debugging-port=9222."
107
+ )
108
+
109
+
110
+ def _format_result(value: Any) -> str:
111
+ """Turn helper return values into human/agent-readable text."""
112
+ if value is None:
113
+ return "ok"
114
+ if isinstance(value, (str, int, float, bool)):
115
+ return str(value)
116
+ if isinstance(value, (dict, list, tuple)):
117
+ try:
118
+ import json
119
+ return json.dumps(value, default=str, indent=2)
120
+ except Exception: # noqa: BLE001
121
+ return repr(value)
122
+ return repr(value)
76
123
 
77
124
 
78
125
  @server.tool()
79
126
  def browser_status() -> str:
80
- """Report whether browser-harness is installed and the CDP target it's pointed at."""
127
+ """Diagnostic: install state, CDP target, daemon liveness, current page."""
81
128
  parts = [
82
129
  f"harness_installed: {_HARNESS_AVAILABLE}",
83
130
  f"harness_home: {HARNESS_HOME}",
84
131
  f"cdp_url: {CDP_URL}",
85
132
  ]
86
- if not _HARNESS_AVAILABLE and _HARNESS_ERROR:
87
- parts.append(f"error: {_HARNESS_ERROR}")
133
+ if not _HARNESS_AVAILABLE:
134
+ if _HARNESS_ERROR:
135
+ parts.append(f"error: {_HARNESS_ERROR}")
136
+ return "\n".join(parts)
137
+
138
+ try:
139
+ alive = _ADMIN.daemon_alive()
140
+ parts.append(f"daemon_alive: {alive}")
141
+ except Exception as e: # noqa: BLE001
142
+ parts.append(f"daemon_check_error: {type(e).__name__}: {e}")
143
+
144
+ err = _ensure_ready()
145
+ if err:
146
+ parts.append(f"daemon_ready: false")
147
+ parts.append(f"reason: {err}")
148
+ return "\n".join(parts)
149
+
150
+ try:
151
+ info = _HELPERS.page_info()
152
+ parts.append(f"daemon_ready: true")
153
+ parts.append(f"current_page: {_format_result(info)}")
154
+ except Exception as e: # noqa: BLE001
155
+ parts.append(f"page_info_error: {type(e).__name__}: {e}")
156
+
88
157
  return "\n".join(parts)
89
158
 
90
159
 
91
160
  @server.tool()
92
- def browser_navigate(url: str) -> str:
93
- """Open a URL in the connected Chrome via CDP. Tier 2 (logged)."""
94
- msg = _not_ready_message()
95
- if msg:
96
- return msg
97
- # TODO: implement via browser_harness CDP helpers
98
- return f"[stub] would navigate to {url} via {CDP_URL}"
161
+ def browser_navigate(url: str, new_tab: bool = True) -> str:
162
+ """Navigate to a URL.
163
+
164
+ By default opens a new tab so the user's current tab isn't clobbered. Set
165
+ new_tab=False to navigate the active tab in place.
166
+ """
167
+ err = _ensure_ready()
168
+ if err:
169
+ return err
170
+ try:
171
+ if new_tab:
172
+ _HELPERS.new_tab(url)
173
+ else:
174
+ _HELPERS.goto_url(url)
175
+ _HELPERS.wait_for_load(timeout=15.0)
176
+ info = _HELPERS.page_info()
177
+ return f"Navigated to {url}\n{_format_result(info)}"
178
+ except Exception as e: # noqa: BLE001
179
+ return f"Navigation failed: {type(e).__name__}: {e}"
180
+
181
+
182
+ @server.tool()
183
+ def browser_screenshot(full_page: bool = False, max_dim: int | None = 1600) -> str:
184
+ """Capture a screenshot of the active tab and return it as a base64 PNG.
185
+
186
+ full_page=True captures beyond the viewport. max_dim downscales the longest
187
+ edge to keep the response small (default 1600px).
188
+ """
189
+ err = _ensure_ready()
190
+ if err:
191
+ return err
192
+ try:
193
+ # capture_screenshot returns bytes when path=None
194
+ png_bytes = _HELPERS.capture_screenshot(path=None, full=full_page, max_dim=max_dim)
195
+ if isinstance(png_bytes, bytes):
196
+ b64 = base64.b64encode(png_bytes).decode("ascii")
197
+ return f"data:image/png;base64,{b64}"
198
+ # Fallback if helper returned a path
199
+ return f"screenshot saved to: {png_bytes}"
200
+ except Exception as e: # noqa: BLE001
201
+ return f"Screenshot failed: {type(e).__name__}: {e}"
202
+
203
+
204
+ @server.tool()
205
+ def browser_page_info() -> str:
206
+ """Return current URL, title, and viewport info for the active tab."""
207
+ err = _ensure_ready()
208
+ if err:
209
+ return err
210
+ try:
211
+ return _format_result(_HELPERS.page_info())
212
+ except Exception as e: # noqa: BLE001
213
+ return f"page_info failed: {type(e).__name__}: {e}"
99
214
 
100
215
 
101
216
  @server.tool()
102
- def browser_screenshot() -> str:
103
- """Capture a screenshot of the active tab and return its file path. Tier 1."""
104
- msg = _not_ready_message()
105
- if msg:
106
- return msg
107
- # TODO: implement via browser_harness CDP helpers
108
- return f"[stub] would screenshot active tab via {CDP_URL}"
217
+ def browser_list_tabs() -> str:
218
+ """List all open browser tabs."""
219
+ err = _ensure_ready()
220
+ if err:
221
+ return err
222
+ try:
223
+ return _format_result(_HELPERS.list_tabs(include_chrome=False))
224
+ except Exception as e: # noqa: BLE001
225
+ return f"list_tabs failed: {type(e).__name__}: {e}"
109
226
 
110
227
 
111
228
  @server.tool()
112
- def browser_inspect(selector: str = "body") -> str:
113
- """Read the current page HTML or a specific selector. Tier 1 (read-only)."""
114
- msg = _not_ready_message()
115
- if msg:
116
- return msg
117
- # TODO: implement via browser_harness CDP helpers
118
- return f"[stub] would inspect '{selector}' via {CDP_URL}"
229
+ def browser_eval_js(expression: str) -> str:
230
+ """Run a JavaScript expression in the active tab and return the result.
231
+
232
+ Use for reading page state — e.g. document.querySelector('h1').textContent,
233
+ or document.querySelectorAll('.item').length. Tier 2 (logged) — does not
234
+ write to the page or click anything by itself, but the agent could read
235
+ sensitive content.
236
+ """
237
+ err = _ensure_ready()
238
+ if err:
239
+ return err
240
+ try:
241
+ return _format_result(_HELPERS.js(expression))
242
+ except Exception as e: # noqa: BLE001
243
+ return f"js eval failed: {type(e).__name__}: {e}"
244
+
245
+
246
+ @server.tool()
247
+ def browser_click_xy(x: int, y: int, button: str = "left", clicks: int = 1) -> str:
248
+ """Click at viewport coordinates (x, y). Tier 3 (autonomous-blocked).
249
+
250
+ To click a specific element, use browser_eval_js to find its bounding box,
251
+ then call this. Example JS to get center coords:
252
+ const r = el.getBoundingClientRect();
253
+ [r.left + r.width/2, r.top + r.height/2]
254
+ """
255
+ err = _ensure_ready()
256
+ if err:
257
+ return err
258
+ try:
259
+ _HELPERS.click_at_xy(x, y, button=button, clicks=clicks)
260
+ return f"clicked ({x}, {y}) {button} x{clicks}"
261
+ except Exception as e: # noqa: BLE001
262
+ return f"click failed: {type(e).__name__}: {e}"
263
+
264
+
265
+ @server.tool()
266
+ def browser_type_text(text: str) -> str:
267
+ """Type text into the focused input. Tier 3 (autonomous-blocked).
268
+
269
+ Combine with browser_click_xy to click a field first. Use browser_press_key
270
+ for special keys like Enter / Tab / Escape.
271
+ """
272
+ err = _ensure_ready()
273
+ if err:
274
+ return err
275
+ try:
276
+ _HELPERS.type_text(text)
277
+ return f"typed {len(text)} chars"
278
+ except Exception as e: # noqa: BLE001
279
+ return f"type failed: {type(e).__name__}: {e}"
280
+
281
+
282
+ @server.tool()
283
+ def browser_press_key(key: str) -> str:
284
+ """Press a single key in the focused element. Tier 3.
285
+
286
+ Examples: 'Enter', 'Tab', 'Escape', 'ArrowDown', 'a', 'A'.
287
+ """
288
+ err = _ensure_ready()
289
+ if err:
290
+ return err
291
+ try:
292
+ _HELPERS.press_key(key)
293
+ return f"pressed {key}"
294
+ except Exception as e: # noqa: BLE001
295
+ return f"press_key failed: {type(e).__name__}: {e}"
296
+
297
+
298
+ @server.tool()
299
+ def browser_scroll(dy: int = -300, dx: int = 0, x: int | None = None, y: int | None = None) -> str:
300
+ """Scroll the page. Negative dy scrolls down (counter-intuitive: dy is the
301
+ delta the *content* moves, so dy=-300 moves the content up = scrolls down).
302
+
303
+ x/y default to the viewport center.
304
+ """
305
+ err = _ensure_ready()
306
+ if err:
307
+ return err
308
+ try:
309
+ if x is None or y is None:
310
+ info = _HELPERS.page_info() or {}
311
+ vw = info.get("viewport", {}).get("width", 1280)
312
+ vh = info.get("viewport", {}).get("height", 800)
313
+ x = x if x is not None else int(vw // 2)
314
+ y = y if y is not None else int(vh // 2)
315
+ _HELPERS.scroll(x, y, dy=dy, dx=dx)
316
+ return f"scrolled dy={dy} dx={dx} at ({x}, {y})"
317
+ except Exception as e: # noqa: BLE001
318
+ return f"scroll failed: {type(e).__name__}: {e}"
119
319
 
120
320
 
121
321
  @server.tool()
122
322
  def browser_run_python(code: str) -> str:
123
- """Run Python in the harness workspace. Tier 3 (autonomous-blocked, requires approval)."""
124
- msg = _not_ready_message()
125
- if msg:
126
- return msg
127
- # TODO: thread through agent-workspace/agent_helpers.py — see SKILL.md
128
- return f"[stub] would run python ({len(code)} bytes) in {HARNESS_HOME}/agent-workspace"
323
+ """Run Python in the harness context with helpers pre-imported. Tier 3.
324
+
325
+ Escape hatch for anything the typed tools above don't cover. All helpers
326
+ from browser_harness.helpers are in scope: goto_url, new_tab, page_info,
327
+ list_tabs, current_tab, switch_tab, click_at_xy, type_text, press_key,
328
+ scroll, capture_screenshot, js, wait, wait_for_load, dispatch_key,
329
+ upload_file, etc.
330
+
331
+ Captures stdout. Last expression value is returned if there's no print.
332
+ """
333
+ err = _ensure_ready()
334
+ if err:
335
+ return err
336
+
337
+ # Dedent so the agent doesn't have to worry about leading whitespace
338
+ code = textwrap.dedent(code)
339
+
340
+ namespace: dict[str, Any] = {"__name__": "__harness_inline__"}
341
+ # Pre-import every helper into the namespace
342
+ for name in dir(_HELPERS):
343
+ if not name.startswith("_"):
344
+ namespace[name] = getattr(_HELPERS, name)
345
+
346
+ out = io.StringIO()
347
+ err_buf = io.StringIO()
348
+ try:
349
+ with redirect_stdout(out), redirect_stderr(err_buf):
350
+ # Compile as 'exec' so multi-line statements work; if it ends with a
351
+ # bare expression, evaluate that and append the value to output.
352
+ try:
353
+ tree = compile(code, "<harness-inline>", "exec")
354
+ exec(tree, namespace) # noqa: S102
355
+ except SyntaxError:
356
+ # Maybe it's a single expression
357
+ exec(compile(code, "<harness-inline>", "single"), namespace) # noqa: S102
358
+ captured = out.getvalue()
359
+ captured_err = err_buf.getvalue()
360
+ result_parts = []
361
+ if captured:
362
+ result_parts.append(captured.rstrip())
363
+ if captured_err:
364
+ result_parts.append(f"[stderr]\n{captured_err.rstrip()}")
365
+ if not result_parts:
366
+ result_parts.append("(no output)")
367
+ return "\n".join(result_parts)
368
+ except Exception: # noqa: BLE001
369
+ tb = traceback.format_exc()
370
+ captured = out.getvalue()
371
+ prefix = (captured.rstrip() + "\n") if captured else ""
372
+ return f"{prefix}[exception]\n{tb}"
129
373
 
130
374
 
131
375
  if __name__ == "__main__":