npm - hypha-debugger - Versions diffs - 0.2.8 → 0.2.9 - Mend

hypha-debugger 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/hypha-debugger.js +78 -55
package/dist/hypha-debugger.min.js +2 -2
package/dist/hypha-debugger.mjs +78 -55
package/dist/hypha-debugger.mjs.map +1 -1
package/dist/services/screenshot.d.ts +3 -1
package/package.json +1 -1

package/dist/hypha-debugger.mjs CHANGED Viewed

@@ -1969,8 +1969,14 @@ async function toJpeg(node, options = {}) {
 /**
  * Screenshot capture service using html-to-image.
  *
- * Images are downscaled before being returned so agents don't receive
- * multi-megabyte base64 payloads that can crash their context window.
+ * Returns image data in a format directly usable by AI agents:
+ *   - `base64`: raw base64 (no data: prefix) — what Claude/GPT image
+ *     content fields expect.
+ *   - `media_type`: e.g. "image/jpeg" — the MIME type to pair with base64.
+ *   - `data_url`: full `data:image/jpeg;base64,...` URL for HTML/preview use.
+ *
+ * Images are aggressively downscaled by default (max 800px, JPEG q=0.6)
+ * because most agent context windows can't tolerate multi-MB payloads.
  */
 /** Extract a useful string from an unknown error value. */
 function errorMessage(err) {
@@ -1989,10 +1995,24 @@ function errorMessage(err) {
         return String(err);
     }
 }
+/** Split a `data:<mime>;base64,<...>` URL into its parts. Throws on malformed. */
+function splitDataUrl(dataUrl) {
+    const m = /^data:([^;,]+)(?:;[^,]*)?,(.*)$/.exec(dataUrl);
+    if (!m)
+        throw new Error("Output is not a valid data: URL");
+    const mediaType = m[1];
+    let payload = m[2];
+    // If charset=utf-8 (no base64), html-to-image returned an SVG fallback —
+    // which is unusable for agent vision. Reject so the caller knows.
+    if (!/;base64/i.test(dataUrl)) {
+        throw new Error(`Output is not base64-encoded (got ${mediaType}). Capture probably failed silently.`);
+    }
+    return { mediaType, base64: payload };
+}
 /**
  * Resize an image data URL via a canvas. Returns a new data URL at the
- * requested format/quality. Maintains aspect ratio: fits within
- * (maxWidth × maxHeight) without distortion.
+ * requested format/quality, fitting within (maxWidth × maxHeight) without
+ * distortion.
  */
 async function resizeDataUrl(dataUrl, maxWidth, maxHeight, format, quality) {
     return new Promise((resolve, reject) => {
@@ -2001,6 +2021,10 @@ async function resizeDataUrl(dataUrl, maxWidth, maxHeight, format, quality) {
             try {
                 const srcW = img.naturalWidth;
                 const srcH = img.naturalHeight;
+                if (!srcW || !srcH) {
+                    reject(new Error("Captured image has zero dimensions"));
+                    return;
+                }
                 const scale = Math.min(maxWidth / srcW, maxHeight / srcH, 1);
                 const dstW = Math.max(1, Math.round(srcW * scale));
                 const dstH = Math.max(1, Math.round(srcH * scale));
@@ -2030,17 +2054,13 @@ async function resizeDataUrl(dataUrl, maxWidth, maxHeight, format, quality) {
     });
 }
 async function takeScreenshot(selector, format, quality, max_width, max_height, full_page) {
-    // Agent-friendly defaults: JPEG, moderate quality, capped at 1024px,
-    // viewport-only (not the entire scrollable page).
+    // Agent-friendly defaults: JPEG at q=0.6, capped at 800px.
+    // These are smaller than before because larger images crash some agents.
     const fmt = format ?? "jpeg";
-    const qual = quality ?? 0.75;
-    const maxW = max_width ?? 1024;
-    const maxH = max_height ?? 1024;
+    const qual = quality ?? 0.6;
+    const maxW = max_width ?? 800;
+    const maxH = max_height ?? 800;
     const capturePage = full_page ?? false;
-    // Pick target:
-    //   - explicit selector → that element
-    //   - full_page=true → document.documentElement (the entire scrollable page)
-    //   - default → viewport-sized region (clipped to window size)
     let target;
     if (selector) {
         target = document.querySelector(selector);
@@ -2056,29 +2076,23 @@ async function takeScreenshot(selector, format, quality, max_width, max_height,
     }
     try {
         const node = target;
-        // For viewport-only captures, limit html-to-image's output size
-        // to the viewport dimensions.
         const viewportW = window.innerWidth;
         const viewportH = window.innerHeight;
-        // 1x1 transparent PNG — used as placeholder for images that fail
-        // to load (CORS-blocked, 404, etc.) so html-to-image doesn't reject.
         const TRANSPARENT_PIXEL = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII=";
         const captureOptions = {
             quality: qual,
-            pixelRatio: 1, // always capture at 1x — we'll resize after
+            pixelRatio: 1,
             cacheBust: true,
             skipAutoScale: true,
-            skipFonts: true, // CORS-blocked stylesheets can hang font inlining
-            imagePlaceholder: TRANSPARENT_PIXEL, // fallback for broken images
+            skipFonts: true,
+            imagePlaceholder: TRANSPARENT_PIXEL,
             filter: (el) => {
-                // Exclude the debugger overlay and cursor from screenshots
                 return (el.id !== "hypha-debugger-host" &&
                     el.id !== "hypha-debugger-cursor" &&
                     el.id !== "playwright-highlight-container");
             },
         };
         if (!selector && !capturePage) {
-            // Viewport-only capture: constrain canvas to window size
             captureOptions.width = viewportW;
             captureOptions.height = viewportH;
         }
@@ -2094,8 +2108,7 @@ async function takeScreenshot(selector, format, quality, max_width, max_height,
             dataUrl = await runCapture(captureOptions);
         }
         catch (captureErr) {
-            // Fallback: retry without images (filter them out). Some pages have
-            // images that html-to-image can't inline even with imagePlaceholder.
+            // Fallback: retry without images
             try {
                 const noImagesOpts = {
                     ...captureOptions,
@@ -2114,33 +2127,42 @@ async function takeScreenshot(selector, format, quality, max_width, max_height,
                 };
             }
         }
-        // Resize down to fit within (maxW × maxH) and re-encode. If resize
-        // fails (e.g. data URL too large to load back into an Image), fall
-        // back to returning the original capture so the caller still gets
-        // something useful.
+        // Resize + re-encode through canvas. This both downsizes and ensures
+        // a clean base64 PNG/JPEG (rather than a possibly-broken html-to-image
+        // SVG-via-data-URL that some agent runtimes reject).
+        let resized;
         try {
-            const resized = await resizeDataUrl(dataUrl, maxW, maxH, fmt, qual);
-            const sizeKb = Math.round((resized.dataUrl.length * 0.75) / 1024);
+            resized = await resizeDataUrl(dataUrl, maxW, maxH, fmt, qual);
+        }
+        catch (resizeErr) {
             return {
-                data: resized.dataUrl,
-                format: fmt,
-                width: resized.width,
-                height: resized.height,
-                size_kb: sizeKb,
+                error: `Resize failed: ${errorMessage(resizeErr)} (this usually means the captured image was malformed; try lowering max_width or use full_page:false)`,
             };
         }
-        catch (resizeErr) {
-            const rect = node.getBoundingClientRect();
-            const sizeKb = Math.round((dataUrl.length * 0.75) / 1024);
+        // Validate the final data URL — should be data:image/jpeg;base64,...
+        let parts;
+        try {
+            parts = splitDataUrl(resized.dataUrl);
+        }
+        catch (validateErr) {
+            return { error: `Output validation failed: ${errorMessage(validateErr)}` };
+        }
+        // Sanity-check: a valid JPEG/PNG is at least a few hundred bytes.
+        if (parts.base64.length < 200) {
             return {
-                data: dataUrl,
-                format: fmt,
-                width: Math.round(rect.width),
-                height: Math.round(rect.height),
-                size_kb: sizeKb,
-                warning: `Resize failed, returning original: ${errorMessage(resizeErr)}`,
+                error: `Output too small (${parts.base64.length} chars base64) — capture likely failed`,
             };
         }
+        const sizeKb = Math.round((parts.base64.length * 0.75) / 1024);
+        return {
+            base64: parts.base64,
+            media_type: parts.mediaType,
+            data_url: resized.dataUrl,
+            format: fmt,
+            width: resized.width,
+            height: resized.height,
+            size_kb: sizeKb,
+        };
     }
     catch (err) {
         return { error: `Screenshot failed: ${errorMessage(err)}` };
@@ -2149,11 +2171,12 @@ async function takeScreenshot(selector, format, quality, max_width, max_height,
 takeScreenshot.__schema__ = {
     name: "takeScreenshot",
     description: "Capture a screenshot of the current viewport, a specific element, or the full page. " +
-        "Downscaled to fit within max_width × max_height (default 1024px) to keep the payload " +
-        "small enough for AI agents. Defaults to JPEG at 0.75 quality. " +
-        "Returns: { data: 'data:image/jpeg;base64,...', format, width, height, size_kb }. " +
-        "Note: the image is in the `data` field as a full data: URL — strip the `data:...;base64,` " +
-        "prefix before base64-decoding.",
+        "Downscaled to fit within max_width × max_height (default 800px) and JPEG-encoded at " +
+        "quality 0.6 by default for agent-friendly payload sizes. " +
+        "Returns: { base64, media_type, data_url, format, width, height, size_kb }. " +
+        "Use `base64` (raw base64, no prefix) directly with Claude/GPT image content fields. " +
+        "Use `data_url` for HTML <img src=...> previews. " +
+        "On failure returns { error }.",
     parameters: {
         type: "object",
         properties: {
@@ -2164,19 +2187,19 @@ takeScreenshot.__schema__ = {
             format: {
                 type: "string",
                 enum: ["png", "jpeg"],
-                description: 'Image format. Default: "jpeg" (much smaller than PNG). Use "png" for sharp text.',
+                description: 'Image format. Default: "jpeg" (much smaller than PNG). Use "png" only when sharp text really matters.',
             },
             quality: {
                 type: "number",
-                description: "JPEG quality (0–1). Default: 0.75. Ignored for PNG. Lower = smaller payload.",
+                description: "JPEG quality (0–1). Default: 0.6. Ignored for PNG. Lower = smaller payload.",
             },
             max_width: {
                 type: "number",
-                description: "Maximum output width in pixels. Default: 1024. Image is scaled down preserving aspect ratio.",
+                description: "Maximum output width in pixels. Default: 800. Image scaled down preserving aspect ratio.",
             },
             max_height: {
                 type: "number",
-                description: "Maximum output height in pixels. Default: 1024. Image is scaled down preserving aspect ratio.",
+                description: "Maximum output height in pixels. Default: 800. Image scaled down preserving aspect ratio.",
             },
             full_page: {
                 type: "boolean",
@@ -2942,7 +2965,7 @@ function generateSkillMd(serviceFunctions, serviceUrl) {
         "",
         "**1. Data-returning functions** (e.g. `take_screenshot`, `get_page_info`, `execute_script`, `get_browser_state`, `get_html`, `get_react_tree`) return function-specific keys:",
         "",
-        "- `take_screenshot` → `{data, format, width, height, size_kb}` where `data` is a `data:image/jpeg;base64,...` URL (note: field is `data`, not `screenshot` or `image`)",
+        "- `take_screenshot` → `{base64, media_type, data_url, format, width, height, size_kb}`. Use `base64` (raw, no prefix) for Claude/GPT image content fields. Use `data_url` for HTML `<img src=...>` previews.",
         "- `execute_script` → `{result, type}` (or `{error}` on exception)",
         "- `get_browser_state` → `{url, title, header, content, footer, element_count}`",
         "- `get_page_info` → `{url, title, viewport_width, viewport_height, ...}`",
@@ -3030,7 +3053,7 @@ function generateSkillMd(serviceFunctions, serviceUrl) {
         "- **`execute_script` is the most versatile** — use it for reading state, calling APIs, DOM queries, or anything not covered by other functions. The last expression is auto-returned. Returns `{result, type}`.",
         "- **`get_browser_state` is the best way to see what's on the page** — it detects all interactive elements and shows them as indexed items.",
         "- **After each action, call `get_browser_state` again** — element indices change when the DOM updates.",
-        "- **Use `take_screenshot`** to visually verify the page state. The image is returned in the `data` field as a `data:image/jpeg;base64,...` URL — strip the `data:...;base64,` prefix before decoding.",
+        "- **Use `take_screenshot`** to visually verify the page state. The response includes `base64` (raw, ready for Claude/GPT image fields) and `data_url` (for HTML previews). Default size is 800px JPEG q=0.6 — bump `max_width` if you need more detail.",
         "- **Use `remove_highlights`** before a screenshot for a clean view.",
         "- **Use `scroll`** with an element index to scroll inside a specific container (e.g. a chat window, sidebar).",
         "- **Use `get_page_info` with `include_logs=true`** to check for JavaScript errors or debug output.",