assistme 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -318,6 +318,7 @@ var CDP_SETTLE_MS = 300;
318
318
  var PAGE_TRANSITION_MS = 500;
319
319
  var MAX_WAIT_MS = 5e3;
320
320
  var SCREENSHOT_QUALITY = 80;
321
+ var SCREENSHOT_MAX_DIM_PX = 1568;
321
322
  function delay(ms) {
322
323
  return new Promise((r) => {
323
324
  setTimeout(r, ms);
@@ -809,6 +810,56 @@ async function tryDismissOverlay(conn) {
809
810
  }
810
811
  }
811
812
 
813
+ // src/browser/screenshot.ts
814
+ async function getViewportMetrics(conn) {
815
+ try {
816
+ const result = await conn.send("Runtime.evaluate", {
817
+ expression: `JSON.stringify({ width: window.innerWidth, height: window.innerHeight, dpr: window.devicePixelRatio || 1 })`,
818
+ returnByValue: true
819
+ });
820
+ const value = result.result?.value;
821
+ if (value) {
822
+ const parsed = JSON.parse(value);
823
+ const width = Number(parsed.width) || 0;
824
+ const height = Number(parsed.height) || 0;
825
+ const dpr = Number(parsed.dpr) || 1;
826
+ if (width > 0 && height > 0) {
827
+ return { width, height, dpr };
828
+ }
829
+ }
830
+ } catch {
831
+ }
832
+ return { width: 0, height: 0, dpr: 1 };
833
+ }
834
+ async function captureBoundedScreenshot(conn, maxDim = SCREENSHOT_MAX_DIM_PX) {
835
+ const { width, height, dpr } = await getViewportMetrics(conn);
836
+ if (width === 0 || height === 0) {
837
+ const result2 = await conn.send("Page.captureScreenshot", {
838
+ format: "png",
839
+ quality: SCREENSHOT_QUALITY,
840
+ captureBeyondViewport: false
841
+ });
842
+ return result2.data || "";
843
+ }
844
+ const naturalMax = Math.max(width, height) * dpr;
845
+ if (naturalMax <= maxDim) {
846
+ const result2 = await conn.send("Page.captureScreenshot", {
847
+ format: "png",
848
+ quality: SCREENSHOT_QUALITY,
849
+ captureBeyondViewport: false
850
+ });
851
+ return result2.data || "";
852
+ }
853
+ const scale = Math.min(dpr, maxDim / Math.max(width, height));
854
+ const result = await conn.send("Page.captureScreenshot", {
855
+ format: "png",
856
+ quality: SCREENSHOT_QUALITY,
857
+ captureBeyondViewport: false,
858
+ clip: { x: 0, y: 0, width, height, scale }
859
+ });
860
+ return result.data || "";
861
+ }
862
+
812
863
  // src/browser/actions.ts
813
864
  var Actions = class {
814
865
  constructor(conn, snapshotEngine) {
@@ -1004,12 +1055,7 @@ var Actions = class {
1004
1055
  let screenshot;
1005
1056
  if (takeScreenshot) {
1006
1057
  await delay(CDP_SETTLE_MS);
1007
- const screenshotResult = await this.conn.send("Page.captureScreenshot", {
1008
- format: "png",
1009
- quality: SCREENSHOT_QUALITY,
1010
- captureBeyondViewport: false
1011
- });
1012
- screenshot = screenshotResult.data || "";
1058
+ screenshot = await captureBoundedScreenshot(this.conn);
1013
1059
  }
1014
1060
  return { results, screenshot };
1015
1061
  }
@@ -1378,12 +1424,7 @@ URL: ${info.url}`;
1378
1424
  }
1379
1425
  async screenshot() {
1380
1426
  this.conn.ensureConnected();
1381
- const result = await this.conn.send("Page.captureScreenshot", {
1382
- format: "png",
1383
- quality: SCREENSHOT_QUALITY,
1384
- captureBeyondViewport: false
1385
- });
1386
- return result.data || "";
1427
+ return captureBoundedScreenshot(this.conn);
1387
1428
  }
1388
1429
  async detectLoginPage() {
1389
1430
  try {
@@ -1833,12 +1874,7 @@ var SnapshotEngine = class {
1833
1874
  expression: buildAnnotationOverlayJS(refsJson)
1834
1875
  });
1835
1876
  }
1836
- const screenshotResult = await this.conn.send("Page.captureScreenshot", {
1837
- format: "png",
1838
- quality: SCREENSHOT_QUALITY,
1839
- captureBeyondViewport: false
1840
- });
1841
- const image = screenshotResult.data || "";
1877
+ const image = await captureBoundedScreenshot(this.conn);
1842
1878
  if (annotate) {
1843
1879
  await this.conn.send("Runtime.evaluate", {
1844
1880
  expression: `(function() { var el = document.getElementById('__assistme_refs__'); if (el) el.remove(); })()`
package/dist/index.js CHANGED
@@ -34,7 +34,7 @@ import {
34
34
  setSessionBusy,
35
35
  toggleScheduledTask,
36
36
  updateHeartbeat
37
- } from "./chunk-E27P57PM.js";
37
+ } from "./chunk-O5K33FBW.js";
38
38
  import {
39
39
  JobRunner,
40
40
  callMcpHandler
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  TaskProcessor,
4
4
  getBrowser
5
- } from "../chunk-E27P57PM.js";
5
+ } from "../chunk-O5K33FBW.js";
6
6
  import "../chunk-RRMI6RDG.js";
7
7
  import {
8
8
  setLogTransport
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "assistme",
3
- "version": "0.9.0",
3
+ "version": "0.9.1",
4
4
  "description": "AssistMe CLI Agent - AI-powered agentic assistant for code, browser, and automation",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -25,9 +25,9 @@ import {
25
25
  KEY_PRESS_DELAY_MS,
26
26
  MAX_WAIT_MS,
27
27
  PAGE_TRANSITION_MS,
28
- SCREENSHOT_QUALITY,
29
28
  SCROLL_SETTLE_MS,
30
29
  } from "./delays.js";
30
+ import { captureBoundedScreenshot } from "./screenshot.js";
31
31
  import { clearFieldScript } from "./scripts.js";
32
32
  import type { SnapshotEngine } from "./snapshot.js";
33
33
  import type { ActionResult, ActionSpec, CDPEvalResult, RefActionResult } from "./types.js";
@@ -268,12 +268,7 @@ export class Actions {
268
268
  let screenshot: string | undefined;
269
269
  if (takeScreenshot) {
270
270
  await delay(CDP_SETTLE_MS);
271
- const screenshotResult = await this.conn.send("Page.captureScreenshot", {
272
- format: "png",
273
- quality: SCREENSHOT_QUALITY,
274
- captureBeyondViewport: false,
275
- });
276
- screenshot = (screenshotResult as { data?: string }).data || "";
271
+ screenshot = await captureBoundedScreenshot(this.conn);
277
272
  }
278
273
 
279
274
  return { results, screenshot };
@@ -29,6 +29,16 @@ export const MAX_WAIT_MS = 5000;
29
29
  /** JPEG quality for CDP screenshots (0-100). */
30
30
  export const SCREENSHOT_QUALITY = 80;
31
31
 
32
+ /**
33
+ * Maximum pixels on either side of a captured screenshot.
34
+ *
35
+ * Anthropic's Messages API rejects requests where any image exceeds 2000px
36
+ * per side once a conversation contains many images. Browser-agent sessions
37
+ * accumulate screenshots quickly, so we cap at a safe value below that limit.
38
+ * 1568 is also Anthropic's recommended maximum for best model performance.
39
+ */
40
+ export const SCREENSHOT_MAX_DIM_PX = 1568;
41
+
32
42
  /** Promise-based delay helper. */
33
43
  export function delay(ms: number): Promise<void> {
34
44
  return new Promise((r) => { setTimeout(r, ms); });
@@ -3,7 +3,7 @@
3
3
  */
4
4
 
5
5
  import type { CDPConnection } from "./connection.js";
6
- import { SCREENSHOT_QUALITY } from "./delays.js";
6
+ import { captureBoundedScreenshot } from "./screenshot.js";
7
7
  import type { CDPEvalResult } from "./types.js";
8
8
 
9
9
  export class Navigation {
@@ -100,12 +100,7 @@ export class Navigation {
100
100
 
101
101
  async screenshot(): Promise<string> {
102
102
  this.conn.ensureConnected();
103
- const result = await this.conn.send("Page.captureScreenshot", {
104
- format: "png",
105
- quality: SCREENSHOT_QUALITY,
106
- captureBeyondViewport: false,
107
- });
108
- return (result as { data?: string }).data || "";
103
+ return captureBoundedScreenshot(this.conn);
109
104
  }
110
105
 
111
106
  async detectLoginPage(): Promise<{ isLoginPage: boolean; reason: string }> {
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Screenshot helper — captures a page screenshot via CDP while ensuring
3
+ * output dimensions stay within Anthropic's per-image size limit.
4
+ *
5
+ * Without this cap, high-DPI displays (Retina, 2× / 3×) produce PNGs that
6
+ * exceed 2000px per side. Once a conversation accumulates many images the
7
+ * API rejects the request with:
8
+ * "An image in the conversation exceeds the dimension limit for
9
+ * many-image requests (2000px)."
10
+ *
11
+ * Implementation uses CDP's native `clip.scale` so no image library is
12
+ * needed. We read the viewport size and devicePixelRatio, then downscale
13
+ * only when the natural capture would exceed the cap.
14
+ */
15
+
16
+ import type { CDPConnection } from "./connection.js";
17
+ import { SCREENSHOT_MAX_DIM_PX, SCREENSHOT_QUALITY } from "./delays.js";
18
+ import type { CDPEvalResult } from "./types.js";
19
+
20
+ interface ViewportMetrics {
21
+ width: number;
22
+ height: number;
23
+ dpr: number;
24
+ }
25
+
26
+ async function getViewportMetrics(conn: CDPConnection): Promise<ViewportMetrics> {
27
+ try {
28
+ const result = await conn.send("Runtime.evaluate", {
29
+ expression: `JSON.stringify({ width: window.innerWidth, height: window.innerHeight, dpr: window.devicePixelRatio || 1 })`,
30
+ returnByValue: true,
31
+ });
32
+ const value = (result as CDPEvalResult).result?.value as string | undefined;
33
+ if (value) {
34
+ const parsed = JSON.parse(value) as Partial<ViewportMetrics>;
35
+ const width = Number(parsed.width) || 0;
36
+ const height = Number(parsed.height) || 0;
37
+ const dpr = Number(parsed.dpr) || 1;
38
+ if (width > 0 && height > 0) {
39
+ return { width, height, dpr };
40
+ }
41
+ }
42
+ } catch {
43
+ // Fall through to defaults
44
+ }
45
+ // Safe defaults if viewport query fails — no clip, CDP uses natural capture
46
+ return { width: 0, height: 0, dpr: 1 };
47
+ }
48
+
49
+ /**
50
+ * Capture a PNG screenshot of the current page, downsampled so the longer
51
+ * side does not exceed {@link SCREENSHOT_MAX_DIM_PX}. Returns base64 data.
52
+ */
53
+ export async function captureBoundedScreenshot(
54
+ conn: CDPConnection,
55
+ maxDim: number = SCREENSHOT_MAX_DIM_PX
56
+ ): Promise<string> {
57
+ const { width, height, dpr } = await getViewportMetrics(conn);
58
+
59
+ // If we couldn't determine metrics, fall back to unclipped capture.
60
+ if (width === 0 || height === 0) {
61
+ const result = await conn.send("Page.captureScreenshot", {
62
+ format: "png",
63
+ quality: SCREENSHOT_QUALITY,
64
+ captureBeyondViewport: false,
65
+ });
66
+ return (result as { data?: string }).data || "";
67
+ }
68
+
69
+ // Natural output dimensions (what we'd get without clip): viewport CSS px × DPR.
70
+ const naturalMax = Math.max(width, height) * dpr;
71
+
72
+ // If the natural capture already fits under the cap, no scaling needed.
73
+ if (naturalMax <= maxDim) {
74
+ const result = await conn.send("Page.captureScreenshot", {
75
+ format: "png",
76
+ quality: SCREENSHOT_QUALITY,
77
+ captureBeyondViewport: false,
78
+ });
79
+ return (result as { data?: string }).data || "";
80
+ }
81
+
82
+ // CDP clip.scale applies to CSS pixels: output = cssPx × scale.
83
+ // Pick the largest scale that keeps max(output) ≤ maxDim, capped at dpr
84
+ // so we never upscale beyond the native render.
85
+ const scale = Math.min(dpr, maxDim / Math.max(width, height));
86
+
87
+ const result = await conn.send("Page.captureScreenshot", {
88
+ format: "png",
89
+ quality: SCREENSHOT_QUALITY,
90
+ captureBeyondViewport: false,
91
+ clip: { x: 0, y: 0, width, height, scale },
92
+ });
93
+ return (result as { data?: string }).data || "";
94
+ }
@@ -6,7 +6,7 @@ import { SNAPSHOT_LOAD_TIMEOUT_MS } from "../utils/constants.js";
6
6
  import { safeJsonParse } from "../utils/errors.js";
7
7
  import { log } from "../utils/logger.js";
8
8
  import type { CDPConnection } from "./connection.js";
9
- import { SCREENSHOT_QUALITY } from "./delays.js";
9
+ import { captureBoundedScreenshot } from "./screenshot.js";
10
10
  import {
11
11
  buildAnnotationOverlayJS,
12
12
  buildFrameDiscoverJS,
@@ -83,13 +83,8 @@ export class SnapshotEngine {
83
83
  });
84
84
  }
85
85
 
86
- // 3. Take screenshot
87
- const screenshotResult = await this.conn.send("Page.captureScreenshot", {
88
- format: "png",
89
- quality: SCREENSHOT_QUALITY,
90
- captureBeyondViewport: false,
91
- });
92
- const image = (screenshotResult as { data?: string }).data || "";
86
+ // 3. Take screenshot (bounded to stay within Anthropic's many-image dim limit)
87
+ const image = await captureBoundedScreenshot(this.conn);
93
88
 
94
89
  // 4. Remove overlay if injected
95
90
  if (annotate) {
@@ -12,6 +12,7 @@ vi.mock("../../src/utils/logger.js", () => ({
12
12
 
13
13
  vi.mock("../../src/browser/delays.js", () => ({
14
14
  SCREENSHOT_QUALITY: 80,
15
+ SCREENSHOT_MAX_DIM_PX: 1568,
15
16
  }));
16
17
 
17
18
  // Create mock connection