hypha-debugger 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10517,12 +10517,56 @@
10517
10517
  /**
10518
10518
  * Arbitrary JavaScript execution service.
10519
10519
  */
10520
+ /**
10521
+ * Attempt to auto-return the last expression in a code block.
10522
+ * If the code doesn't contain an explicit `return`, we try to
10523
+ * add one to the last expression statement so the result is captured.
10524
+ *
10525
+ * Examples:
10526
+ * "document.title" → "return (document.title);"
10527
+ * "const x = 1; x + 2" → "const x = 1; return (x + 2);"
10528
+ * "const x = 1\nx + 2" → "const x = 1\nreturn (x + 2);"
10529
+ * "for(...) {}" → unchanged (control flow)
10530
+ * "return 42" → unchanged (explicit return)
10531
+ */
10532
+ function autoReturn(code) {
10533
+ const trimmed = code.trim();
10534
+ // Already has a return statement? Leave it alone.
10535
+ if (/\breturn\b/.test(trimmed))
10536
+ return trimmed;
10537
+ // Split into statements: by newlines first, then by semicolons for
10538
+ // single-line multi-statement code like "const x = 1; x + 2"
10539
+ let lines = trimmed.split("\n").map((l) => l.trim()).filter(Boolean);
10540
+ // If there's only one line with semicolons, split on semicolons
10541
+ if (lines.length === 1 && lines[0].includes(";")) {
10542
+ lines = lines[0].split(";").map((s) => s.trim()).filter(Boolean);
10543
+ }
10544
+ if (lines.length === 0)
10545
+ return trimmed;
10546
+ const lastLine = lines[lines.length - 1];
10547
+ // Don't add return to control flow, declarations, or assignment-only statements
10548
+ if (/^(if|for|while|switch|try|class|function |const |let |var |import |export )/.test(lastLine)) {
10549
+ return trimmed;
10550
+ }
10551
+ // Replace last statement with return
10552
+ lines[lines.length - 1] = "return (" + lastLine.replace(/;$/, "") + ");";
10553
+ return lines.join(";\n");
10554
+ }
10520
10555
  async function executeScript(code, timeout_ms) {
10521
10556
  const timeoutMs = timeout_ms ?? 10000;
10522
10557
  try {
10558
+ // Try with auto-return first, fall back to original code if syntax error
10559
+ let execCode = autoReturn(code);
10560
+ let fn;
10561
+ try {
10562
+ fn = new Function("return (async () => {" + execCode + "})()");
10563
+ }
10564
+ catch {
10565
+ // Auto-return broke the syntax — use original code
10566
+ fn = new Function("return (async () => {" + code + "})()");
10567
+ }
10523
10568
  const result = await Promise.race([
10524
- // Use async Function to allow top-level await in the code
10525
- new Function("return (async () => {" + code + "})()")(),
10569
+ fn(),
10526
10570
  new Promise((_, reject) => setTimeout(() => reject(new Error("Execution timed out")), timeoutMs)),
10527
10571
  ]);
10528
10572
  // Serialize the result safely
@@ -10537,6 +10581,7 @@
10537
10581
  serialized = {
10538
10582
  tag: result.tagName.toLowerCase(),
10539
10583
  id: result.id,
10584
+ className: result.className,
10540
10585
  text: (result.textContent ?? "").trim().slice(0, 500),
10541
10586
  };
10542
10587
  type = "HTMLElement";
@@ -10566,13 +10611,13 @@
10566
10611
  }
10567
10612
  executeScript.__schema__ = {
10568
10613
  name: "executeScript",
10569
- description: "Execute arbitrary JavaScript code in the page context. Supports async/await. Returns the result of the last expression.",
10614
+ description: 'Execute arbitrary JavaScript code in the page context. Supports async/await. The last expression is auto-returned (no need for explicit "return"). Examples: "document.title", "document.querySelectorAll(\'a\').length", "await fetch(\'/api/data\').then(r => r.json())".',
10570
10615
  parameters: {
10571
10616
  type: "object",
10572
10617
  properties: {
10573
10618
  code: {
10574
10619
  type: "string",
10575
- description: 'JavaScript code to execute. The result of the last expression is returned. Example: "return document.title"',
10620
+ description: 'JavaScript code to execute. The last expression is automatically returned. Examples: "document.title", "document.querySelector(\'h1\').textContent".',
10576
10621
  },
10577
10622
  timeout_ms: {
10578
10623
  type: "number",
@@ -11095,70 +11140,78 @@
11095
11140
  "# Web Debugger Skill",
11096
11141
  "",
11097
11142
  "This skill allows you to remotely debug and interact with a web page through HTTP API endpoints.",
11143
+ "Pick the approach that fits your task — they can be combined freely.",
11144
+ "",
11145
+ "## Approaches",
11098
11146
  "",
11099
- "## Recommended Workflow (Index-Based Interaction)",
11147
+ "### execute_script Run Arbitrary JavaScript",
11100
11148
  "",
11101
- "The most reliable way to interact with a page is using the smart DOM analysis:",
11149
+ "The most versatile function. Use it to read/modify page state, call APIs, query the DOM,",
11150
+ "or do anything JavaScript can do. The last expression is auto-returned (no need for `return`).",
11102
11151
  "",
11103
- "### Step 1: Observe the page",
11104
11152
  "```bash",
11105
- `curl '{SERVICE_URL}/get_browser_state'`,
11106
- "```",
11107
- "This returns all interactive elements indexed as `[0]`, `[1]`, `[2]`, etc.",
11108
- "Elements are detected via smart heuristics: CSS cursor, ARIA roles, event listeners, tag names.",
11109
- "Visual highlight labels are overlaid on the page for each detected element.",
11153
+ `# Read page state`,
11154
+ `curl -X POST '{SERVICE_URL}/execute_script' \\`,
11155
+ ` -H 'Content-Type: application/json' -d '{"code": "document.title"}'`,
11110
11156
  "",
11111
- "Example output:",
11112
- "```",
11113
- "[0]<a aria-label=Home>Home />",
11114
- "[1]<input placeholder=Search... />",
11115
- "[2]<button>Sign In />",
11116
- "[3]<select name=language>English />",
11117
- "[4]<div data-scrollable=\"top=200, bottom=1500\">Content area />",
11157
+ `# Query DOM`,
11158
+ `curl -X POST '{SERVICE_URL}/execute_script' \\`,
11159
+ ` -H 'Content-Type: application/json' -d '{"code": "document.querySelector(\\\"h1\\\").textContent"}'`,
11160
+ "",
11161
+ `# Call an API`,
11162
+ `curl -X POST '{SERVICE_URL}/execute_script' \\`,
11163
+ ` -H 'Content-Type: application/json' -d '{"code": "await fetch(\\\"/api/data\\\").then(r => r.json())"}'`,
11164
+ "",
11165
+ `# Modify the page`,
11166
+ `curl -X POST '{SERVICE_URL}/execute_script' \\`,
11167
+ ` -H 'Content-Type: application/json' -d '{"code": "document.getElementById(\\\"name\\\").value = \\\"Alice\\\""}'`,
11118
11168
  "```",
11119
11169
  "",
11120
- "### Step 2: Act on elements by index",
11170
+ "### get_browser_state + Index-Based Interaction",
11171
+ "",
11172
+ "Best for UI interaction as a user would — clicking buttons, filling forms, selecting options.",
11173
+ "All interactive elements are detected and indexed as `[0]`, `[1]`, `[2]`, etc.",
11174
+ "",
11121
11175
  "```bash",
11122
- "# Click a button (e.g. [2] Sign In):",
11176
+ `# Step 1: See all interactive elements`,
11177
+ `curl '{SERVICE_URL}/get_browser_state'`,
11178
+ "",
11179
+ `# Step 2: Act by index`,
11123
11180
  `curl -X POST '{SERVICE_URL}/click_element_by_index' \\`,
11124
11181
  ` -H 'Content-Type: application/json' -d '{"index": 2}'`,
11125
11182
  "",
11126
- "# Type into an input (e.g. [1] Search):",
11127
11183
  `curl -X POST '{SERVICE_URL}/input_text' \\`,
11128
11184
  ` -H 'Content-Type: application/json' -d '{"index": 1, "text": "hello world"}'`,
11129
11185
  "",
11130
- "# Select a dropdown option (e.g. [3] Language):",
11131
11186
  `curl -X POST '{SERVICE_URL}/select_option' \\`,
11132
11187
  ` -H 'Content-Type: application/json' -d '{"index": 3, "option_text": "French"}'`,
11133
11188
  "",
11134
- "# Scroll down:",
11135
11189
  `curl -X POST '{SERVICE_URL}/scroll' \\`,
11136
11190
  ` -H 'Content-Type: application/json' -d '{"direction": "down"}'`,
11137
11191
  "",
11138
- "# Scroll a specific container (e.g. [4]):",
11139
- `curl -X POST '{SERVICE_URL}/scroll' \\`,
11140
- ` -H 'Content-Type: application/json' -d '{"direction": "down", "index": 4}'`,
11141
- "```",
11142
- "",
11143
- "### Step 3: Verify",
11144
- "```bash",
11192
+ `# Step 3: Verify visually`,
11145
11193
  `curl '{SERVICE_URL}/take_screenshot'`,
11146
11194
  "```",
11147
11195
  "",
11148
- "### Remove visual highlights (optional, for clean screenshots)",
11196
+ "### get_react_tree Inspect React Components",
11197
+ "",
11198
+ "If the page uses React, inspect component names, props, state, and hooks:",
11149
11199
  "```bash",
11150
- `curl '{SERVICE_URL}/remove_highlights'`,
11200
+ `curl '{SERVICE_URL}/get_react_tree'`,
11151
11201
  "```",
11152
11202
  "",
11153
- "## CSS Selector-Based Functions (Alternative)",
11203
+ "### CSS Selector-Based Functions",
11154
11204
  "",
11155
- "You can also use CSS selectors directly for precise targeting:",
11205
+ "Use CSS selectors directly when you know the element:",
11156
11206
  "```bash",
11157
11207
  `curl -X POST '{SERVICE_URL}/click_element' \\`,
11158
11208
  ` -H 'Content-Type: application/json' -d '{"selector": "button.submit"}'`,
11159
11209
  "",
11160
11210
  `curl -X POST '{SERVICE_URL}/fill_input' \\`,
11161
11211
  ` -H 'Content-Type: application/json' -d '{"selector": "#email", "value": "user@example.com"}'`,
11212
+ "",
11213
+ `curl -X POST '{SERVICE_URL}/query_dom' \\`,
11214
+ ` -H 'Content-Type: application/json' -d '{"selector": ".product-card"}'`,
11162
11215
  "```",
11163
11216
  "",
11164
11217
  "## How to call functions",
@@ -11234,14 +11287,14 @@
11234
11287
  const tips = [
11235
11288
  "## Tips",
11236
11289
  "",
11237
- "- **Start with `get_browser_state`** — it's the best way to understand what's on the page and what you can interact with.",
11238
- "- **Prefer index-based interaction** (`click_element_by_index`, `input_text`, `select_option`) over CSS selectorsindices are more reliable across dynamic pages.",
11290
+ "- **`execute_script` is the most versatile** use it for reading state, calling APIs, DOM queries, or anything not covered by other functions. The last expression is auto-returned.",
11291
+ "- **`get_browser_state` is the best way to see what's on the page** it detects all interactive elements and shows them as indexed items.",
11239
11292
  "- **After each action, call `get_browser_state` again** — element indices change when the DOM updates.",
11240
11293
  "- **Use `take_screenshot`** to visually verify the page state. Call `remove_highlights` first for a clean view.",
11241
- "- **Use `execute_script`** for anything not covered by the built-in functions — it runs arbitrary JavaScript.",
11242
11294
  "- **Use `scroll`** with an element index to scroll inside a specific container (e.g. a chat window, sidebar).",
11243
11295
  "- **Use `get_page_info` with `include_logs=true`** to check for JavaScript errors or debug output.",
11244
- "- **Use `get_react_tree`** if the page uses React — it gives you component names, props, and state.",
11296
+ "- **Use `get_react_tree`** if the page uses React — it gives you component names, props, and state without needing DevTools.",
11297
+ "- **Use `navigate`** to go to other pages — same-origin navigation auto-reconnects the debugger.",
11245
11298
  "- All POST endpoints accept JSON body with the parameter names as keys.",
11246
11299
  "",
11247
11300
  ].join("\n");
@@ -11275,14 +11328,24 @@
11275
11328
  // Create a wrapper that:
11276
11329
  // 1. Has correct, unminified parameter names (for hypha-rpc getParamNames)
11277
11330
  // 2. Detects when kwargs are passed as a single object and destructures them
11331
+ //
11332
+ // hypha-rpc HTTP handler passes kwargs as a single plain object, e.g.:
11333
+ // execute_script({code: "..."}) instead of execute_script("...")
11334
+ // get_react_tree({}) instead of get_react_tree()
11335
+ // We detect this and destructure, or discard empty objects.
11278
11336
  const paramList = paramNames.join(", ");
11337
+ const firstParam = paramNames[0];
11279
11338
  const wrapper = new Function("fn", "paramNames", `return async function(${paramList}) {
11280
11339
  // Detect kwargs-as-object: single argument that is a plain object
11281
- // whose keys match schema parameter names
11282
- if (arguments.length === 1 && ${paramList} != null && typeof ${paramList} === "object" && !Array.isArray(${paramList}) && !(${paramList} instanceof Date)) {
11283
- var _kw = ${paramList};
11284
- var _firstKey = Object.keys(_kw)[0];
11285
- if (_firstKey && paramNames.indexOf(_firstKey) !== -1) {
11340
+ if (arguments.length === 1 && ${firstParam} != null && typeof ${firstParam} === "object" && !Array.isArray(${firstParam}) && !(${firstParam} instanceof Date) && ${firstParam}.constructor === Object) {
11341
+ var _kw = ${firstParam};
11342
+ var _keys = Object.keys(_kw);
11343
+ // Empty object {} → call with no args (all defaults)
11344
+ if (_keys.length === 0) {
11345
+ return fn();
11346
+ }
11347
+ // Keys match schema params → destructure
11348
+ if (paramNames.indexOf(_keys[0]) !== -1) {
11286
11349
  var _args = paramNames.map(function(n) { return _kw[n]; });
11287
11350
  return fn.apply(null, _args);
11288
11351
  }
@@ -14078,6 +14141,13 @@
14078
14141
  this.cursor = new AICursor();
14079
14142
  }
14080
14143
  try {
14144
+ // Polyfill Promise.prototype.finally if missing (needed by hypha-rpc
14145
+ // in some older environments / polyfilled Promise implementations)
14146
+ if (typeof Promise.prototype.finally !== "function") {
14147
+ Promise.prototype.finally = function (cb) {
14148
+ return this.then((value) => Promise.resolve(cb()).then(() => value), (reason) => Promise.resolve(cb()).then(() => { throw reason; }));
14149
+ };
14150
+ }
14081
14151
  // Get the connectToServer function
14082
14152
  const connect = this.getConnectToServer();
14083
14153
  // Connect to Hypha server
@@ -14365,22 +14435,21 @@
14365
14435
  `# A debugger is attached to a live web page.`,
14366
14436
  `# You can remotely inspect, interact with, and control this page via the HTTP API below.`,
14367
14437
  `#`,
14368
- `# RECOMMENDED WORKFLOW (index-based, most reliable):`,
14369
- `# 1. get_browser_state see all interactive elements as [0], [1], [2], ...`,
14370
- `# 2. click_element_by_index / input_text / select_option / scroll act by index`,
14371
- `# 3. take_screenshot verify the result visually`,
14372
- `#`,
14373
- `# Also available: get_page_info, get_html, query_dom, click_element, fill_input,`,
14374
- `# scroll_to, execute_script, navigate, get_react_tree, remove_highlights`,
14438
+ `# APPROACHES (pick what fits your task):`,
14439
+ `# - execute_script: Run arbitrary JS fastest for reading/modifying page state, DOM queries, API calls`,
14440
+ `# - get_browser_state + click/input/select by index: Visual interaction with the page as a user would`,
14441
+ `# - get_html / query_dom: Read DOM structure with CSS selectors`,
14442
+ `# - get_react_tree: Inspect React component props, state, and hooks`,
14443
+ `# - take_screenshot: Visual verification of page state`,
14375
14444
  `#`,
14376
- `# POST endpoints accept JSON body with parameter names as keys.`,
14445
+ `# All POST endpoints accept JSON body with parameter names as keys.`,
14377
14446
  ``,
14378
14447
  `SERVICE_URL="${serviceUrl}"`,
14379
14448
  ];
14380
14449
  if (token) {
14381
14450
  lines.push(`TOKEN="${token}"`);
14382
14451
  }
14383
- lines.push(``, `# 1. Get interactive elements (smart DOM analysis with indexed elements):`, `curl "$SERVICE_URL/get_browser_state"${auth}`, ``, `# 2. Click element by index (e.g. click [3]):`, `curl -X POST "$SERVICE_URL/click_element_by_index"${auth} -H "Content-Type: application/json" -d '{"index": 3}'`, ``, `# 3. Type into an input by index:`, `curl -X POST "$SERVICE_URL/input_text"${auth} -H "Content-Type: application/json" -d '{"index": 5, "text": "hello"}'`, ``, `# Take a screenshot:`, `curl "$SERVICE_URL/take_screenshot"${auth}`, ``, `# Execute JavaScript remotely:`, `curl -X POST "$SERVICE_URL/execute_script"${auth} -H "Content-Type: application/json" -d '{"code": "document.title"}'`, ``, `# Full API docs:`, `curl "$SERVICE_URL/get_skill_md"${auth}`);
14452
+ lines.push(``, `# Execute JavaScript (most versatile read state, call APIs, modify DOM):`, `curl -X POST "$SERVICE_URL/execute_script"${auth} -H "Content-Type: application/json" -d '{"code": "document.title"}'`, ``, `# Smart DOM analysis (indexed interactive elements for click/type/select):`, `curl "$SERVICE_URL/get_browser_state"${auth}`, ``, `# Interact by element index:`, `curl -X POST "$SERVICE_URL/click_element_by_index"${auth} -H "Content-Type: application/json" -d '{"index": 3}'`, `curl -X POST "$SERVICE_URL/input_text"${auth} -H "Content-Type: application/json" -d '{"index": 5, "text": "hello"}'`, ``, `# Screenshot + React inspection:`, `curl "$SERVICE_URL/take_screenshot"${auth}`, `curl "$SERVICE_URL/get_react_tree"${auth}`, ``, `# Navigate (auto-reconnects for same-origin):`, `curl -X POST "$SERVICE_URL/navigate"${auth} -H "Content-Type: application/json" -d '{"url": "/other-page"}'`, ``, `# Full API docs:`, `curl "$SERVICE_URL/get_skill_md"${auth}`);
14384
14453
  return lines.join("\n");
14385
14454
  }
14386
14455
  /**