hypha-debugger 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2060,12 +2060,56 @@ takeScreenshot.__schema__ = {
2060
2060
  /**
2061
2061
  * Arbitrary JavaScript execution service.
2062
2062
  */
2063
+ /**
2064
+ * Attempt to auto-return the last expression in a code block.
2065
+ * If the code doesn't contain an explicit `return`, we try to
2066
+ * add one to the last expression statement so the result is captured.
2067
+ *
2068
+ * Examples:
2069
+ * "document.title" → "return (document.title);"
2070
+ * "const x = 1; x + 2" → "const x = 1; return (x + 2);"
2071
+ * "const x = 1\nx + 2" → "const x = 1\nreturn (x + 2);"
2072
+ * "for(...) {}" → unchanged (control flow)
2073
+ * "return 42" → unchanged (explicit return)
2074
+ */
2075
+ function autoReturn(code) {
2076
+ const trimmed = code.trim();
2077
+ // Already has a return statement? Leave it alone.
2078
+ if (/\breturn\b/.test(trimmed))
2079
+ return trimmed;
2080
+ // Split into statements: by newlines first, then by semicolons for
2081
+ // single-line multi-statement code like "const x = 1; x + 2"
2082
+ let lines = trimmed.split("\n").map((l) => l.trim()).filter(Boolean);
2083
+ // If there's only one line with semicolons, split on semicolons
2084
+ if (lines.length === 1 && lines[0].includes(";")) {
2085
+ lines = lines[0].split(";").map((s) => s.trim()).filter(Boolean);
2086
+ }
2087
+ if (lines.length === 0)
2088
+ return trimmed;
2089
+ const lastLine = lines[lines.length - 1];
2090
+ // Don't add return to control flow, declarations, or assignment-only statements
2091
+ if (/^(if|for|while|switch|try|class|function |const |let |var |import |export )/.test(lastLine)) {
2092
+ return trimmed;
2093
+ }
2094
+ // Replace last statement with return
2095
+ lines[lines.length - 1] = "return (" + lastLine.replace(/;$/, "") + ");";
2096
+ return lines.join(";\n");
2097
+ }
2063
2098
  async function executeScript(code, timeout_ms) {
2064
2099
  const timeoutMs = timeout_ms ?? 10000;
2065
2100
  try {
2101
+ // Try with auto-return first, fall back to original code if syntax error
2102
+ let execCode = autoReturn(code);
2103
+ let fn;
2104
+ try {
2105
+ fn = new Function("return (async () => {" + execCode + "})()");
2106
+ }
2107
+ catch {
2108
+ // Auto-return broke the syntax — use original code
2109
+ fn = new Function("return (async () => {" + code + "})()");
2110
+ }
2066
2111
  const result = await Promise.race([
2067
- // Use async Function to allow top-level await in the code
2068
- new Function("return (async () => {" + code + "})()")(),
2112
+ fn(),
2069
2113
  new Promise((_, reject) => setTimeout(() => reject(new Error("Execution timed out")), timeoutMs)),
2070
2114
  ]);
2071
2115
  // Serialize the result safely
@@ -2080,6 +2124,7 @@ async function executeScript(code, timeout_ms) {
2080
2124
  serialized = {
2081
2125
  tag: result.tagName.toLowerCase(),
2082
2126
  id: result.id,
2127
+ className: result.className,
2083
2128
  text: (result.textContent ?? "").trim().slice(0, 500),
2084
2129
  };
2085
2130
  type = "HTMLElement";
@@ -2109,13 +2154,13 @@ async function executeScript(code, timeout_ms) {
2109
2154
  }
2110
2155
  executeScript.__schema__ = {
2111
2156
  name: "executeScript",
2112
- description: "Execute arbitrary JavaScript code in the page context. Supports async/await. Returns the result of the last expression.",
2157
+ description: 'Execute arbitrary JavaScript code in the page context. Supports async/await. The last expression is auto-returned (no need for explicit "return"). Examples: "document.title", "document.querySelectorAll(\'a\').length", "await fetch(\'/api/data\').then(r => r.json())".',
2113
2158
  parameters: {
2114
2159
  type: "object",
2115
2160
  properties: {
2116
2161
  code: {
2117
2162
  type: "string",
2118
- description: 'JavaScript code to execute. The result of the last expression is returned. Example: "return document.title"',
2163
+ description: 'JavaScript code to execute. The last expression is automatically returned. Examples: "document.title", "document.querySelector(\'h1\').textContent".',
2119
2164
  },
2120
2165
  timeout_ms: {
2121
2166
  type: "number",
@@ -2638,70 +2683,78 @@ function generateSkillMd(serviceFunctions, serviceUrl) {
2638
2683
  "# Web Debugger Skill",
2639
2684
  "",
2640
2685
  "This skill allows you to remotely debug and interact with a web page through HTTP API endpoints.",
2686
+ "Pick the approach that fits your task — they can be combined freely.",
2687
+ "",
2688
+ "## Approaches",
2641
2689
  "",
2642
- "## Recommended Workflow (Index-Based Interaction)",
2690
+ "### execute_script Run Arbitrary JavaScript",
2643
2691
  "",
2644
- "The most reliable way to interact with a page is using the smart DOM analysis:",
2692
+ "The most versatile function. Use it to read/modify page state, call APIs, query the DOM,",
2693
+ "or do anything JavaScript can do. The last expression is auto-returned (no need for `return`).",
2645
2694
  "",
2646
- "### Step 1: Observe the page",
2647
2695
  "```bash",
2648
- `curl '{SERVICE_URL}/get_browser_state'`,
2649
- "```",
2650
- "This returns all interactive elements indexed as `[0]`, `[1]`, `[2]`, etc.",
2651
- "Elements are detected via smart heuristics: CSS cursor, ARIA roles, event listeners, tag names.",
2652
- "Visual highlight labels are overlaid on the page for each detected element.",
2696
+ `# Read page state`,
2697
+ `curl -X POST '{SERVICE_URL}/execute_script' \\`,
2698
+ ` -H 'Content-Type: application/json' -d '{"code": "document.title"}'`,
2653
2699
  "",
2654
- "Example output:",
2655
- "```",
2656
- "[0]<a aria-label=Home>Home />",
2657
- "[1]<input placeholder=Search... />",
2658
- "[2]<button>Sign In />",
2659
- "[3]<select name=language>English />",
2660
- "[4]<div data-scrollable=\"top=200, bottom=1500\">Content area />",
2700
+ `# Query DOM`,
2701
+ `curl -X POST '{SERVICE_URL}/execute_script' \\`,
2702
+ ` -H 'Content-Type: application/json' -d '{"code": "document.querySelector(\\\"h1\\\").textContent"}'`,
2703
+ "",
2704
+ `# Call an API`,
2705
+ `curl -X POST '{SERVICE_URL}/execute_script' \\`,
2706
+ ` -H 'Content-Type: application/json' -d '{"code": "await fetch(\\\"/api/data\\\").then(r => r.json())"}'`,
2707
+ "",
2708
+ `# Modify the page`,
2709
+ `curl -X POST '{SERVICE_URL}/execute_script' \\`,
2710
+ ` -H 'Content-Type: application/json' -d '{"code": "document.getElementById(\\\"name\\\").value = \\\"Alice\\\""}'`,
2661
2711
  "```",
2662
2712
  "",
2663
- "### Step 2: Act on elements by index",
2713
+ "### get_browser_state + Index-Based Interaction",
2714
+ "",
2715
+ "Best for UI interaction as a user would — clicking buttons, filling forms, selecting options.",
2716
+ "All interactive elements are detected and indexed as `[0]`, `[1]`, `[2]`, etc.",
2717
+ "",
2664
2718
  "```bash",
2665
- "# Click a button (e.g. [2] Sign In):",
2719
+ `# Step 1: See all interactive elements`,
2720
+ `curl '{SERVICE_URL}/get_browser_state'`,
2721
+ "",
2722
+ `# Step 2: Act by index`,
2666
2723
  `curl -X POST '{SERVICE_URL}/click_element_by_index' \\`,
2667
2724
  ` -H 'Content-Type: application/json' -d '{"index": 2}'`,
2668
2725
  "",
2669
- "# Type into an input (e.g. [1] Search):",
2670
2726
  `curl -X POST '{SERVICE_URL}/input_text' \\`,
2671
2727
  ` -H 'Content-Type: application/json' -d '{"index": 1, "text": "hello world"}'`,
2672
2728
  "",
2673
- "# Select a dropdown option (e.g. [3] Language):",
2674
2729
  `curl -X POST '{SERVICE_URL}/select_option' \\`,
2675
2730
  ` -H 'Content-Type: application/json' -d '{"index": 3, "option_text": "French"}'`,
2676
2731
  "",
2677
- "# Scroll down:",
2678
2732
  `curl -X POST '{SERVICE_URL}/scroll' \\`,
2679
2733
  ` -H 'Content-Type: application/json' -d '{"direction": "down"}'`,
2680
2734
  "",
2681
- "# Scroll a specific container (e.g. [4]):",
2682
- `curl -X POST '{SERVICE_URL}/scroll' \\`,
2683
- ` -H 'Content-Type: application/json' -d '{"direction": "down", "index": 4}'`,
2684
- "```",
2685
- "",
2686
- "### Step 3: Verify",
2687
- "```bash",
2735
+ `# Step 3: Verify visually`,
2688
2736
  `curl '{SERVICE_URL}/take_screenshot'`,
2689
2737
  "```",
2690
2738
  "",
2691
- "### Remove visual highlights (optional, for clean screenshots)",
2739
+ "### get_react_tree Inspect React Components",
2740
+ "",
2741
+ "If the page uses React, inspect component names, props, state, and hooks:",
2692
2742
  "```bash",
2693
- `curl '{SERVICE_URL}/remove_highlights'`,
2743
+ `curl '{SERVICE_URL}/get_react_tree'`,
2694
2744
  "```",
2695
2745
  "",
2696
- "## CSS Selector-Based Functions (Alternative)",
2746
+ "### CSS Selector-Based Functions",
2697
2747
  "",
2698
- "You can also use CSS selectors directly for precise targeting:",
2748
+ "Use CSS selectors directly when you know the element:",
2699
2749
  "```bash",
2700
2750
  `curl -X POST '{SERVICE_URL}/click_element' \\`,
2701
2751
  ` -H 'Content-Type: application/json' -d '{"selector": "button.submit"}'`,
2702
2752
  "",
2703
2753
  `curl -X POST '{SERVICE_URL}/fill_input' \\`,
2704
2754
  ` -H 'Content-Type: application/json' -d '{"selector": "#email", "value": "user@example.com"}'`,
2755
+ "",
2756
+ `curl -X POST '{SERVICE_URL}/query_dom' \\`,
2757
+ ` -H 'Content-Type: application/json' -d '{"selector": ".product-card"}'`,
2705
2758
  "```",
2706
2759
  "",
2707
2760
  "## How to call functions",
@@ -2777,14 +2830,14 @@ function generateSkillMd(serviceFunctions, serviceUrl) {
2777
2830
  const tips = [
2778
2831
  "## Tips",
2779
2832
  "",
2780
- "- **Start with `get_browser_state`** — it's the best way to understand what's on the page and what you can interact with.",
2781
- "- **Prefer index-based interaction** (`click_element_by_index`, `input_text`, `select_option`) over CSS selectorsindices are more reliable across dynamic pages.",
2833
+ "- **`execute_script` is the most versatile** use it for reading state, calling APIs, DOM queries, or anything not covered by other functions. The last expression is auto-returned.",
2834
+ "- **`get_browser_state` is the best way to see what's on the page** it detects all interactive elements and shows them as indexed items.",
2782
2835
  "- **After each action, call `get_browser_state` again** — element indices change when the DOM updates.",
2783
2836
  "- **Use `take_screenshot`** to visually verify the page state. Call `remove_highlights` first for a clean view.",
2784
- "- **Use `execute_script`** for anything not covered by the built-in functions — it runs arbitrary JavaScript.",
2785
2837
  "- **Use `scroll`** with an element index to scroll inside a specific container (e.g. a chat window, sidebar).",
2786
2838
  "- **Use `get_page_info` with `include_logs=true`** to check for JavaScript errors or debug output.",
2787
- "- **Use `get_react_tree`** if the page uses React — it gives you component names, props, and state.",
2839
+ "- **Use `get_react_tree`** if the page uses React — it gives you component names, props, and state without needing DevTools.",
2840
+ "- **Use `navigate`** to go to other pages — same-origin navigation auto-reconnects the debugger.",
2788
2841
  "- All POST endpoints accept JSON body with the parameter names as keys.",
2789
2842
  "",
2790
2843
  ].join("\n");
@@ -2818,14 +2871,24 @@ function wrapFn(fn) {
2818
2871
  // Create a wrapper that:
2819
2872
  // 1. Has correct, unminified parameter names (for hypha-rpc getParamNames)
2820
2873
  // 2. Detects when kwargs are passed as a single object and destructures them
2874
+ //
2875
+ // hypha-rpc HTTP handler passes kwargs as a single plain object, e.g.:
2876
+ // execute_script({code: "..."}) instead of execute_script("...")
2877
+ // get_react_tree({}) instead of get_react_tree()
2878
+ // We detect this and destructure, or discard empty objects.
2821
2879
  const paramList = paramNames.join(", ");
2880
+ const firstParam = paramNames[0];
2822
2881
  const wrapper = new Function("fn", "paramNames", `return async function(${paramList}) {
2823
2882
  // Detect kwargs-as-object: single argument that is a plain object
2824
- // whose keys match schema parameter names
2825
- if (arguments.length === 1 && ${paramList} != null && typeof ${paramList} === "object" && !Array.isArray(${paramList}) && !(${paramList} instanceof Date)) {
2826
- var _kw = ${paramList};
2827
- var _firstKey = Object.keys(_kw)[0];
2828
- if (_firstKey && paramNames.indexOf(_firstKey) !== -1) {
2883
+ if (arguments.length === 1 && ${firstParam} != null && typeof ${firstParam} === "object" && !Array.isArray(${firstParam}) && !(${firstParam} instanceof Date) && ${firstParam}.constructor === Object) {
2884
+ var _kw = ${firstParam};
2885
+ var _keys = Object.keys(_kw);
2886
+ // Empty object {} → call with no args (all defaults)
2887
+ if (_keys.length === 0) {
2888
+ return fn();
2889
+ }
2890
+ // Keys match schema params → destructure
2891
+ if (paramNames.indexOf(_keys[0]) !== -1) {
2829
2892
  var _args = paramNames.map(function(n) { return _kw[n]; });
2830
2893
  return fn.apply(null, _args);
2831
2894
  }
@@ -5621,6 +5684,13 @@ class HyphaDebugger {
5621
5684
  this.cursor = new AICursor();
5622
5685
  }
5623
5686
  try {
5687
+ // Polyfill Promise.prototype.finally if missing (needed by hypha-rpc
5688
+ // in some older environments / polyfilled Promise implementations)
5689
+ if (typeof Promise.prototype.finally !== "function") {
5690
+ Promise.prototype.finally = function (cb) {
5691
+ return this.then((value) => Promise.resolve(cb()).then(() => value), (reason) => Promise.resolve(cb()).then(() => { throw reason; }));
5692
+ };
5693
+ }
5624
5694
  // Get the connectToServer function
5625
5695
  const connect = this.getConnectToServer();
5626
5696
  // Connect to Hypha server
@@ -5908,22 +5978,21 @@ class HyphaDebugger {
5908
5978
  `# A debugger is attached to a live web page.`,
5909
5979
  `# You can remotely inspect, interact with, and control this page via the HTTP API below.`,
5910
5980
  `#`,
5911
- `# RECOMMENDED WORKFLOW (index-based, most reliable):`,
5912
- `# 1. get_browser_state see all interactive elements as [0], [1], [2], ...`,
5913
- `# 2. click_element_by_index / input_text / select_option / scroll act by index`,
5914
- `# 3. take_screenshot verify the result visually`,
5915
- `#`,
5916
- `# Also available: get_page_info, get_html, query_dom, click_element, fill_input,`,
5917
- `# scroll_to, execute_script, navigate, get_react_tree, remove_highlights`,
5981
+ `# APPROACHES (pick what fits your task):`,
5982
+ `# - execute_script: Run arbitrary JS fastest for reading/modifying page state, DOM queries, API calls`,
5983
+ `# - get_browser_state + click/input/select by index: Visual interaction with the page as a user would`,
5984
+ `# - get_html / query_dom: Read DOM structure with CSS selectors`,
5985
+ `# - get_react_tree: Inspect React component props, state, and hooks`,
5986
+ `# - take_screenshot: Visual verification of page state`,
5918
5987
  `#`,
5919
- `# POST endpoints accept JSON body with parameter names as keys.`,
5988
+ `# All POST endpoints accept JSON body with parameter names as keys.`,
5920
5989
  ``,
5921
5990
  `SERVICE_URL="${serviceUrl}"`,
5922
5991
  ];
5923
5992
  if (token) {
5924
5993
  lines.push(`TOKEN="${token}"`);
5925
5994
  }
5926
- lines.push(``, `# 1. Get interactive elements (smart DOM analysis with indexed elements):`, `curl "$SERVICE_URL/get_browser_state"${auth}`, ``, `# 2. Click element by index (e.g. click [3]):`, `curl -X POST "$SERVICE_URL/click_element_by_index"${auth} -H "Content-Type: application/json" -d '{"index": 3}'`, ``, `# 3. Type into an input by index:`, `curl -X POST "$SERVICE_URL/input_text"${auth} -H "Content-Type: application/json" -d '{"index": 5, "text": "hello"}'`, ``, `# Take a screenshot:`, `curl "$SERVICE_URL/take_screenshot"${auth}`, ``, `# Execute JavaScript remotely:`, `curl -X POST "$SERVICE_URL/execute_script"${auth} -H "Content-Type: application/json" -d '{"code": "document.title"}'`, ``, `# Full API docs:`, `curl "$SERVICE_URL/get_skill_md"${auth}`);
5995
+ lines.push(``, `# Execute JavaScript (most versatile read state, call APIs, modify DOM):`, `curl -X POST "$SERVICE_URL/execute_script"${auth} -H "Content-Type: application/json" -d '{"code": "document.title"}'`, ``, `# Smart DOM analysis (indexed interactive elements for click/type/select):`, `curl "$SERVICE_URL/get_browser_state"${auth}`, ``, `# Interact by element index:`, `curl -X POST "$SERVICE_URL/click_element_by_index"${auth} -H "Content-Type: application/json" -d '{"index": 3}'`, `curl -X POST "$SERVICE_URL/input_text"${auth} -H "Content-Type: application/json" -d '{"index": 5, "text": "hello"}'`, ``, `# Screenshot + React inspection:`, `curl "$SERVICE_URL/take_screenshot"${auth}`, `curl "$SERVICE_URL/get_react_tree"${auth}`, ``, `# Navigate (auto-reconnects for same-origin):`, `curl -X POST "$SERVICE_URL/navigate"${auth} -H "Content-Type: application/json" -d '{"url": "/other-page"}'`, ``, `# Full API docs:`, `curl "$SERVICE_URL/get_skill_md"${auth}`);
5927
5996
  return lines.join("\n");
5928
5997
  }
5929
5998
  /**