hypha-debugger 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10314,8 +10314,14 @@
10314
10314
  /**
10315
10315
  * Screenshot capture service using html-to-image.
10316
10316
  *
10317
- * Images are downscaled before being returned so agents don't receive
10318
- * multi-megabyte base64 payloads that can crash their context window.
10317
+ * Returns image data in a format directly usable by AI agents:
10318
+ * - `base64`: raw base64 (no data: prefix) what Claude/GPT image
10319
+ * content fields expect.
10320
+ * - `media_type`: e.g. "image/jpeg" — the MIME type to pair with base64.
10321
+ * - `data_url`: full `data:image/jpeg;base64,...` URL for HTML/preview use.
10322
+ *
10323
+ * Images are aggressively downscaled by default (max 800px, JPEG q=0.6)
10324
+ * because most agent context windows can't tolerate multi-MB payloads.
10319
10325
  */
10320
10326
  /** Extract a useful string from an unknown error value. */
10321
10327
  function errorMessage(err) {
@@ -10334,10 +10340,24 @@
10334
10340
  return String(err);
10335
10341
  }
10336
10342
  }
10343
+ /** Split a `data:<mime>;base64,<...>` URL into its parts. Throws on malformed. */
10344
+ function splitDataUrl(dataUrl) {
10345
+ const m = /^data:([^;,]+)(?:;[^,]*)?,(.*)$/.exec(dataUrl);
10346
+ if (!m)
10347
+ throw new Error("Output is not a valid data: URL");
10348
+ const mediaType = m[1];
10349
+ let payload = m[2];
10350
+ // If charset=utf-8 (no base64), html-to-image returned an SVG fallback —
10351
+ // which is unusable for agent vision. Reject so the caller knows.
10352
+ if (!/;base64/i.test(dataUrl)) {
10353
+ throw new Error(`Output is not base64-encoded (got ${mediaType}). Capture probably failed silently.`);
10354
+ }
10355
+ return { mediaType, base64: payload };
10356
+ }
10337
10357
  /**
10338
10358
  * Resize an image data URL via a canvas. Returns a new data URL at the
10339
- * requested format/quality. Maintains aspect ratio: fits within
10340
- * (maxWidth × maxHeight) without distortion.
10359
+ * requested format/quality, fitting within (maxWidth × maxHeight) without
10360
+ * distortion.
10341
10361
  */
10342
10362
  async function resizeDataUrl(dataUrl, maxWidth, maxHeight, format, quality) {
10343
10363
  return new Promise((resolve, reject) => {
@@ -10346,6 +10366,10 @@
10346
10366
  try {
10347
10367
  const srcW = img.naturalWidth;
10348
10368
  const srcH = img.naturalHeight;
10369
+ if (!srcW || !srcH) {
10370
+ reject(new Error("Captured image has zero dimensions"));
10371
+ return;
10372
+ }
10349
10373
  const scale = Math.min(maxWidth / srcW, maxHeight / srcH, 1);
10350
10374
  const dstW = Math.max(1, Math.round(srcW * scale));
10351
10375
  const dstH = Math.max(1, Math.round(srcH * scale));
@@ -10375,17 +10399,13 @@
10375
10399
  });
10376
10400
  }
10377
10401
  async function takeScreenshot(selector, format, quality, max_width, max_height, full_page) {
10378
- // Agent-friendly defaults: JPEG, moderate quality, capped at 1024px,
10379
- // viewport-only (not the entire scrollable page).
10402
+ // Agent-friendly defaults: JPEG at q=0.6, capped at 800px.
10403
+ // These are smaller than before because larger images crash some agents.
10380
10404
  const fmt = format ?? "jpeg";
10381
- const qual = quality ?? 0.75;
10382
- const maxW = max_width ?? 1024;
10383
- const maxH = max_height ?? 1024;
10405
+ const qual = quality ?? 0.6;
10406
+ const maxW = max_width ?? 800;
10407
+ const maxH = max_height ?? 800;
10384
10408
  const capturePage = full_page ?? false;
10385
- // Pick target:
10386
- // - explicit selector → that element
10387
- // - full_page=true → document.documentElement (the entire scrollable page)
10388
- // - default → viewport-sized region (clipped to window size)
10389
10409
  let target;
10390
10410
  if (selector) {
10391
10411
  target = document.querySelector(selector);
@@ -10401,29 +10421,23 @@
10401
10421
  }
10402
10422
  try {
10403
10423
  const node = target;
10404
- // For viewport-only captures, limit html-to-image's output size
10405
- // to the viewport dimensions.
10406
10424
  const viewportW = window.innerWidth;
10407
10425
  const viewportH = window.innerHeight;
10408
- // 1x1 transparent PNG — used as placeholder for images that fail
10409
- // to load (CORS-blocked, 404, etc.) so html-to-image doesn't reject.
10410
10426
  const TRANSPARENT_PIXEL = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII=";
10411
10427
  const captureOptions = {
10412
10428
  quality: qual,
10413
- pixelRatio: 1, // always capture at 1x — we'll resize after
10429
+ pixelRatio: 1,
10414
10430
  cacheBust: true,
10415
10431
  skipAutoScale: true,
10416
- skipFonts: true, // CORS-blocked stylesheets can hang font inlining
10417
- imagePlaceholder: TRANSPARENT_PIXEL, // fallback for broken images
10432
+ skipFonts: true,
10433
+ imagePlaceholder: TRANSPARENT_PIXEL,
10418
10434
  filter: (el) => {
10419
- // Exclude the debugger overlay and cursor from screenshots
10420
10435
  return (el.id !== "hypha-debugger-host" &&
10421
10436
  el.id !== "hypha-debugger-cursor" &&
10422
10437
  el.id !== "playwright-highlight-container");
10423
10438
  },
10424
10439
  };
10425
10440
  if (!selector && !capturePage) {
10426
- // Viewport-only capture: constrain canvas to window size
10427
10441
  captureOptions.width = viewportW;
10428
10442
  captureOptions.height = viewportH;
10429
10443
  }
@@ -10439,8 +10453,7 @@
10439
10453
  dataUrl = await runCapture(captureOptions);
10440
10454
  }
10441
10455
  catch (captureErr) {
10442
- // Fallback: retry without images (filter them out). Some pages have
10443
- // images that html-to-image can't inline even with imagePlaceholder.
10456
+ // Fallback: retry without images
10444
10457
  try {
10445
10458
  const noImagesOpts = {
10446
10459
  ...captureOptions,
@@ -10459,33 +10472,42 @@
10459
10472
  };
10460
10473
  }
10461
10474
  }
10462
- // Resize down to fit within (maxW × maxH) and re-encode. If resize
10463
- // fails (e.g. data URL too large to load back into an Image), fall
10464
- // back to returning the original capture so the caller still gets
10465
- // something useful.
10475
+ // Resize + re-encode through canvas. This both downsizes and ensures
10476
+ // a clean base64 PNG/JPEG (rather than a possibly-broken html-to-image
10477
+ // SVG-via-data-URL that some agent runtimes reject).
10478
+ let resized;
10466
10479
  try {
10467
- const resized = await resizeDataUrl(dataUrl, maxW, maxH, fmt, qual);
10468
- const sizeKb = Math.round((resized.dataUrl.length * 0.75) / 1024);
10480
+ resized = await resizeDataUrl(dataUrl, maxW, maxH, fmt, qual);
10481
+ }
10482
+ catch (resizeErr) {
10469
10483
  return {
10470
- data: resized.dataUrl,
10471
- format: fmt,
10472
- width: resized.width,
10473
- height: resized.height,
10474
- size_kb: sizeKb,
10484
+ error: `Resize failed: ${errorMessage(resizeErr)} (this usually means the captured image was malformed; try lowering max_width or use full_page:false)`,
10475
10485
  };
10476
10486
  }
10477
- catch (resizeErr) {
10478
- const rect = node.getBoundingClientRect();
10479
- const sizeKb = Math.round((dataUrl.length * 0.75) / 1024);
10487
+ // Validate the final data URL — should be data:image/jpeg;base64,...
10488
+ let parts;
10489
+ try {
10490
+ parts = splitDataUrl(resized.dataUrl);
10491
+ }
10492
+ catch (validateErr) {
10493
+ return { error: `Output validation failed: ${errorMessage(validateErr)}` };
10494
+ }
10495
+ // Sanity-check: a valid JPEG/PNG is at least a few hundred bytes.
10496
+ if (parts.base64.length < 200) {
10480
10497
  return {
10481
- data: dataUrl,
10482
- format: fmt,
10483
- width: Math.round(rect.width),
10484
- height: Math.round(rect.height),
10485
- size_kb: sizeKb,
10486
- warning: `Resize failed, returning original: ${errorMessage(resizeErr)}`,
10498
+ error: `Output too small (${parts.base64.length} chars base64) — capture likely failed`,
10487
10499
  };
10488
10500
  }
10501
+ const sizeKb = Math.round((parts.base64.length * 0.75) / 1024);
10502
+ return {
10503
+ base64: parts.base64,
10504
+ media_type: parts.mediaType,
10505
+ data_url: resized.dataUrl,
10506
+ format: fmt,
10507
+ width: resized.width,
10508
+ height: resized.height,
10509
+ size_kb: sizeKb,
10510
+ };
10489
10511
  }
10490
10512
  catch (err) {
10491
10513
  return { error: `Screenshot failed: ${errorMessage(err)}` };
@@ -10494,11 +10516,12 @@
10494
10516
  takeScreenshot.__schema__ = {
10495
10517
  name: "takeScreenshot",
10496
10518
  description: "Capture a screenshot of the current viewport, a specific element, or the full page. " +
10497
- "Downscaled to fit within max_width × max_height (default 1024px) to keep the payload " +
10498
- "small enough for AI agents. Defaults to JPEG at 0.75 quality. " +
10499
- "Returns: { data: 'data:image/jpeg;base64,...', format, width, height, size_kb }. " +
10500
- "Note: the image is in the `data` field as a full data: URL strip the `data:...;base64,` " +
10501
- "prefix before base64-decoding.",
10519
+ "Downscaled to fit within max_width × max_height (default 800px) and JPEG-encoded at " +
10520
+ "quality 0.6 by default for agent-friendly payload sizes. " +
10521
+ "Returns: { base64, media_type, data_url, format, width, height, size_kb }. " +
10522
+ "Use `base64` (raw base64, no prefix) directly with Claude/GPT image content fields. " +
10523
+ "Use `data_url` for HTML <img src=...> previews. " +
10524
+ "On failure returns { error }.",
10502
10525
  parameters: {
10503
10526
  type: "object",
10504
10527
  properties: {
@@ -10509,19 +10532,19 @@
10509
10532
  format: {
10510
10533
  type: "string",
10511
10534
  enum: ["png", "jpeg"],
10512
- description: 'Image format. Default: "jpeg" (much smaller than PNG). Use "png" for sharp text.',
10535
+ description: 'Image format. Default: "jpeg" (much smaller than PNG). Use "png" only when sharp text really matters.',
10513
10536
  },
10514
10537
  quality: {
10515
10538
  type: "number",
10516
- description: "JPEG quality (0–1). Default: 0.75. Ignored for PNG. Lower = smaller payload.",
10539
+ description: "JPEG quality (0–1). Default: 0.6. Ignored for PNG. Lower = smaller payload.",
10517
10540
  },
10518
10541
  max_width: {
10519
10542
  type: "number",
10520
- description: "Maximum output width in pixels. Default: 1024. Image is scaled down preserving aspect ratio.",
10543
+ description: "Maximum output width in pixels. Default: 800. Image scaled down preserving aspect ratio.",
10521
10544
  },
10522
10545
  max_height: {
10523
10546
  type: "number",
10524
- description: "Maximum output height in pixels. Default: 1024. Image is scaled down preserving aspect ratio.",
10547
+ description: "Maximum output height in pixels. Default: 800. Image scaled down preserving aspect ratio.",
10525
10548
  },
10526
10549
  full_page: {
10527
10550
  type: "boolean",
@@ -11287,7 +11310,7 @@
11287
11310
  "",
11288
11311
  "**1. Data-returning functions** (e.g. `take_screenshot`, `get_page_info`, `execute_script`, `get_browser_state`, `get_html`, `get_react_tree`) return function-specific keys:",
11289
11312
  "",
11290
- "- `take_screenshot` → `{data, format, width, height, size_kb}` where `data` is a `data:image/jpeg;base64,...` URL (note: field is `data`, not `screenshot` or `image`)",
11313
+ "- `take_screenshot` → `{base64, media_type, data_url, format, width, height, size_kb}`. Use `base64` (raw, no prefix) for Claude/GPT image content fields. Use `data_url` for HTML `<img src=...>` previews.",
11291
11314
  "- `execute_script` → `{result, type}` (or `{error}` on exception)",
11292
11315
  "- `get_browser_state` → `{url, title, header, content, footer, element_count}`",
11293
11316
  "- `get_page_info` → `{url, title, viewport_width, viewport_height, ...}`",
@@ -11375,7 +11398,7 @@
11375
11398
  "- **`execute_script` is the most versatile** — use it for reading state, calling APIs, DOM queries, or anything not covered by other functions. The last expression is auto-returned. Returns `{result, type}`.",
11376
11399
  "- **`get_browser_state` is the best way to see what's on the page** — it detects all interactive elements and shows them as indexed items.",
11377
11400
  "- **After each action, call `get_browser_state` again** — element indices change when the DOM updates.",
11378
- "- **Use `take_screenshot`** to visually verify the page state. The image is returned in the `data` field as a `data:image/jpeg;base64,...` URLstrip the `data:...;base64,` prefix before decoding.",
11401
+ "- **Use `take_screenshot`** to visually verify the page state. The response includes `base64` (raw, ready for Claude/GPT image fields) and `data_url` (for HTML previews). Default size is 800px JPEG q=0.6 bump `max_width` if you need more detail.",
11379
11402
  "- **Use `remove_highlights`** before a screenshot for a clean view.",
11380
11403
  "- **Use `scroll`** with an element index to scroll inside a specific container (e.g. a chat window, sidebar).",
11381
11404
  "- **Use `get_page_info` with `include_logs=true`** to check for JavaScript errors or debug output.",