assistme 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-E27P57PM.js → chunk-S7PYDTKE.js} +117 -26
- package/dist/index.js +1 -1
- package/dist/workers/entry.js +1 -1
- package/package.json +1 -1
- package/src/browser/actions.ts +2 -7
- package/src/browser/delays.ts +13 -1
- package/src/browser/navigation.ts +2 -7
- package/src/browser/screenshot.ts +180 -0
- package/src/browser/snapshot.ts +25 -17
- package/tests/browser/snapshot.test.ts +1 -0
|
@@ -318,6 +318,7 @@ var CDP_SETTLE_MS = 300;
|
|
|
318
318
|
var PAGE_TRANSITION_MS = 500;
|
|
319
319
|
var MAX_WAIT_MS = 5e3;
|
|
320
320
|
var SCREENSHOT_QUALITY = 80;
|
|
321
|
+
var SCREENSHOT_MAX_DIM_PX = 1568;
|
|
321
322
|
function delay(ms) {
|
|
322
323
|
return new Promise((r) => {
|
|
323
324
|
setTimeout(r, ms);
|
|
@@ -809,6 +810,109 @@ async function tryDismissOverlay(conn) {
|
|
|
809
810
|
}
|
|
810
811
|
}
|
|
811
812
|
|
|
813
|
+
// src/browser/screenshot.ts
|
|
814
|
+
var PNG_SIGNATURE_LEN = 8;
|
|
815
|
+
var PNG_IHDR_WIDTH_OFFSET = PNG_SIGNATURE_LEN + 8;
|
|
816
|
+
var PNG_IHDR_HEADER_BYTES = PNG_IHDR_WIDTH_OFFSET + 8;
|
|
817
|
+
async function getViewportMetrics(conn) {
|
|
818
|
+
try {
|
|
819
|
+
const result = await conn.send("Runtime.evaluate", {
|
|
820
|
+
expression: `JSON.stringify({ width: window.innerWidth, height: window.innerHeight, dpr: window.devicePixelRatio || 1 })`,
|
|
821
|
+
returnByValue: true
|
|
822
|
+
});
|
|
823
|
+
const value = result.result?.value;
|
|
824
|
+
if (value) {
|
|
825
|
+
const parsed = JSON.parse(value);
|
|
826
|
+
const width = Number(parsed.width) || 0;
|
|
827
|
+
const height = Number(parsed.height) || 0;
|
|
828
|
+
const dpr = Number(parsed.dpr) || 1;
|
|
829
|
+
if (width > 0 && height > 0) {
|
|
830
|
+
return { width, height, dpr };
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
} catch {
|
|
834
|
+
}
|
|
835
|
+
return { width: 0, height: 0, dpr: 1 };
|
|
836
|
+
}
|
|
837
|
+
function pngDimensions(base64) {
|
|
838
|
+
if (!base64 || base64.length < 100) return null;
|
|
839
|
+
const b64Slice = base64.slice(0, Math.ceil(PNG_IHDR_HEADER_BYTES / 3) * 4);
|
|
840
|
+
let buf;
|
|
841
|
+
try {
|
|
842
|
+
buf = Buffer.from(b64Slice, "base64");
|
|
843
|
+
} catch {
|
|
844
|
+
return null;
|
|
845
|
+
}
|
|
846
|
+
if (buf.length < PNG_IHDR_HEADER_BYTES) return null;
|
|
847
|
+
if (buf[0] !== 137 || buf[1] !== 80 || buf[2] !== 78 || buf[3] !== 71) {
|
|
848
|
+
return null;
|
|
849
|
+
}
|
|
850
|
+
const width = buf.readUInt32BE(PNG_IHDR_WIDTH_OFFSET);
|
|
851
|
+
const height = buf.readUInt32BE(PNG_IHDR_WIDTH_OFFSET + 4);
|
|
852
|
+
return { width, height };
|
|
853
|
+
}
|
|
854
|
+
async function capture(conn, clip) {
|
|
855
|
+
const params = {
|
|
856
|
+
format: "png",
|
|
857
|
+
quality: SCREENSHOT_QUALITY,
|
|
858
|
+
captureBeyondViewport: false
|
|
859
|
+
};
|
|
860
|
+
if (clip) params.clip = clip;
|
|
861
|
+
const result = await conn.send("Page.captureScreenshot", params);
|
|
862
|
+
return result.data || "";
|
|
863
|
+
}
|
|
864
|
+
async function captureBoundedScreenshot(conn, maxDim = SCREENSHOT_MAX_DIM_PX) {
|
|
865
|
+
const { width, height, dpr } = await getViewportMetrics(conn);
|
|
866
|
+
let currentWidth = width;
|
|
867
|
+
let currentHeight = height;
|
|
868
|
+
let currentScale = null;
|
|
869
|
+
if (width > 0 && height > 0) {
|
|
870
|
+
currentScale = Math.min(1, maxDim / (Math.max(width, height) * dpr));
|
|
871
|
+
}
|
|
872
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
873
|
+
const data = currentScale !== null && currentWidth > 0 && currentHeight > 0 ? await capture(conn, {
|
|
874
|
+
x: 0,
|
|
875
|
+
y: 0,
|
|
876
|
+
width: currentWidth,
|
|
877
|
+
height: currentHeight,
|
|
878
|
+
scale: currentScale
|
|
879
|
+
}) : await capture(conn);
|
|
880
|
+
if (!data) return data;
|
|
881
|
+
const dims = pngDimensions(data);
|
|
882
|
+
if (!dims) {
|
|
883
|
+
return data;
|
|
884
|
+
}
|
|
885
|
+
const longest = Math.max(dims.width, dims.height);
|
|
886
|
+
if (longest <= maxDim) {
|
|
887
|
+
return data;
|
|
888
|
+
}
|
|
889
|
+
const shrinkFactor = maxDim / longest * 0.95;
|
|
890
|
+
if (currentScale === null) {
|
|
891
|
+
currentWidth = dims.width;
|
|
892
|
+
currentHeight = dims.height;
|
|
893
|
+
currentScale = shrinkFactor;
|
|
894
|
+
} else {
|
|
895
|
+
currentScale = currentScale * shrinkFactor;
|
|
896
|
+
}
|
|
897
|
+
log.debug(
|
|
898
|
+
`Screenshot over cap (${dims.width}\xD7${dims.height}, cap ${maxDim}); retrying at scale ${currentScale.toFixed(3)}`
|
|
899
|
+
);
|
|
900
|
+
if (currentScale <= 0) {
|
|
901
|
+
return data;
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
if (currentWidth > 0 && currentHeight > 0) {
|
|
905
|
+
return capture(conn, {
|
|
906
|
+
x: 0,
|
|
907
|
+
y: 0,
|
|
908
|
+
width: currentWidth,
|
|
909
|
+
height: currentHeight,
|
|
910
|
+
scale: Math.min(currentScale ?? 0.25, 0.25)
|
|
911
|
+
});
|
|
912
|
+
}
|
|
913
|
+
return capture(conn);
|
|
914
|
+
}
|
|
915
|
+
|
|
812
916
|
// src/browser/actions.ts
|
|
813
917
|
var Actions = class {
|
|
814
918
|
constructor(conn, snapshotEngine) {
|
|
@@ -1004,12 +1108,7 @@ var Actions = class {
|
|
|
1004
1108
|
let screenshot;
|
|
1005
1109
|
if (takeScreenshot) {
|
|
1006
1110
|
await delay(CDP_SETTLE_MS);
|
|
1007
|
-
|
|
1008
|
-
format: "png",
|
|
1009
|
-
quality: SCREENSHOT_QUALITY,
|
|
1010
|
-
captureBeyondViewport: false
|
|
1011
|
-
});
|
|
1012
|
-
screenshot = screenshotResult.data || "";
|
|
1111
|
+
screenshot = await captureBoundedScreenshot(this.conn);
|
|
1013
1112
|
}
|
|
1014
1113
|
return { results, screenshot };
|
|
1015
1114
|
}
|
|
@@ -1378,12 +1477,7 @@ URL: ${info.url}`;
|
|
|
1378
1477
|
}
|
|
1379
1478
|
async screenshot() {
|
|
1380
1479
|
this.conn.ensureConnected();
|
|
1381
|
-
|
|
1382
|
-
format: "png",
|
|
1383
|
-
quality: SCREENSHOT_QUALITY,
|
|
1384
|
-
captureBeyondViewport: false
|
|
1385
|
-
});
|
|
1386
|
-
return result.data || "";
|
|
1480
|
+
return captureBoundedScreenshot(this.conn);
|
|
1387
1481
|
}
|
|
1388
1482
|
async detectLoginPage() {
|
|
1389
1483
|
try {
|
|
@@ -1833,12 +1927,7 @@ var SnapshotEngine = class {
|
|
|
1833
1927
|
expression: buildAnnotationOverlayJS(refsJson)
|
|
1834
1928
|
});
|
|
1835
1929
|
}
|
|
1836
|
-
const
|
|
1837
|
-
format: "png",
|
|
1838
|
-
quality: SCREENSHOT_QUALITY,
|
|
1839
|
-
captureBeyondViewport: false
|
|
1840
|
-
});
|
|
1841
|
-
const image = screenshotResult.data || "";
|
|
1930
|
+
const image = await captureBoundedScreenshot(this.conn);
|
|
1842
1931
|
if (annotate) {
|
|
1843
1932
|
await this.conn.send("Runtime.evaluate", {
|
|
1844
1933
|
expression: `(function() { var el = document.getElementById('__assistme_refs__'); if (el) el.remove(); })()`
|
|
@@ -1925,7 +2014,9 @@ Refs:
|
|
|
1925
2014
|
this.frameContexts.set(r.id, contextId);
|
|
1926
2015
|
}
|
|
1927
2016
|
} catch (err) {
|
|
1928
|
-
log.debug(
|
|
2017
|
+
log.debug(
|
|
2018
|
+
`Frame evaluation failed for frame ${frameId}: ${err instanceof Error ? err.message : err}`
|
|
2019
|
+
);
|
|
1929
2020
|
}
|
|
1930
2021
|
}
|
|
1931
2022
|
} catch (err) {
|
|
@@ -1950,7 +2041,9 @@ Refs:
|
|
|
1950
2041
|
contexts.set(child.frame.id, world.executionContextId);
|
|
1951
2042
|
}
|
|
1952
2043
|
} catch (err) {
|
|
1953
|
-
log.debug(
|
|
2044
|
+
log.debug(
|
|
2045
|
+
`Frame ${child.frame.id} does not support isolated worlds: ${err instanceof Error ? err.message : err}`
|
|
2046
|
+
);
|
|
1954
2047
|
}
|
|
1955
2048
|
}
|
|
1956
2049
|
} catch (err) {
|
|
@@ -2027,11 +2120,7 @@ Refs:
|
|
|
2027
2120
|
});
|
|
2028
2121
|
const value = frameResult.result?.value;
|
|
2029
2122
|
if (!value || value === "null") return null;
|
|
2030
|
-
const parsed = safeJsonParse(
|
|
2031
|
-
value,
|
|
2032
|
-
null,
|
|
2033
|
-
"snapshot.resolveRefInFrame"
|
|
2034
|
-
);
|
|
2123
|
+
const parsed = safeJsonParse(value, null, "snapshot.resolveRefInFrame");
|
|
2035
2124
|
if (!parsed) return null;
|
|
2036
2125
|
if (parsed.error) return parsed;
|
|
2037
2126
|
return {
|
|
@@ -2041,7 +2130,9 @@ Refs:
|
|
|
2041
2130
|
height: parsed.height
|
|
2042
2131
|
};
|
|
2043
2132
|
} catch (err) {
|
|
2044
|
-
log.debug(
|
|
2133
|
+
log.debug(
|
|
2134
|
+
`resolveRefInFrame failed for ref ${refId}: ${err instanceof Error ? err.message : err}`
|
|
2135
|
+
);
|
|
2045
2136
|
return null;
|
|
2046
2137
|
}
|
|
2047
2138
|
}
|
package/dist/index.js
CHANGED
package/dist/workers/entry.js
CHANGED
package/package.json
CHANGED
package/src/browser/actions.ts
CHANGED
|
@@ -25,9 +25,9 @@ import {
|
|
|
25
25
|
KEY_PRESS_DELAY_MS,
|
|
26
26
|
MAX_WAIT_MS,
|
|
27
27
|
PAGE_TRANSITION_MS,
|
|
28
|
-
SCREENSHOT_QUALITY,
|
|
29
28
|
SCROLL_SETTLE_MS,
|
|
30
29
|
} from "./delays.js";
|
|
30
|
+
import { captureBoundedScreenshot } from "./screenshot.js";
|
|
31
31
|
import { clearFieldScript } from "./scripts.js";
|
|
32
32
|
import type { SnapshotEngine } from "./snapshot.js";
|
|
33
33
|
import type { ActionResult, ActionSpec, CDPEvalResult, RefActionResult } from "./types.js";
|
|
@@ -268,12 +268,7 @@ export class Actions {
|
|
|
268
268
|
let screenshot: string | undefined;
|
|
269
269
|
if (takeScreenshot) {
|
|
270
270
|
await delay(CDP_SETTLE_MS);
|
|
271
|
-
|
|
272
|
-
format: "png",
|
|
273
|
-
quality: SCREENSHOT_QUALITY,
|
|
274
|
-
captureBeyondViewport: false,
|
|
275
|
-
});
|
|
276
|
-
screenshot = (screenshotResult as { data?: string }).data || "";
|
|
271
|
+
screenshot = await captureBoundedScreenshot(this.conn);
|
|
277
272
|
}
|
|
278
273
|
|
|
279
274
|
return { results, screenshot };
|
package/src/browser/delays.ts
CHANGED
|
@@ -29,7 +29,19 @@ export const MAX_WAIT_MS = 5000;
|
|
|
29
29
|
/** JPEG quality for CDP screenshots (0-100). */
|
|
30
30
|
export const SCREENSHOT_QUALITY = 80;
|
|
31
31
|
|
|
32
|
+
/**
|
|
33
|
+
* Maximum pixels on either side of a captured screenshot.
|
|
34
|
+
*
|
|
35
|
+
* Anthropic's Messages API rejects requests where any image exceeds 2000px
|
|
36
|
+
* per side once a conversation contains many images. Browser-agent sessions
|
|
37
|
+
* accumulate screenshots quickly, so we cap at a safe value below that limit.
|
|
38
|
+
* 1568 is also Anthropic's recommended maximum for best model performance.
|
|
39
|
+
*/
|
|
40
|
+
export const SCREENSHOT_MAX_DIM_PX = 1568;
|
|
41
|
+
|
|
32
42
|
/** Promise-based delay helper. */
|
|
33
43
|
export function delay(ms: number): Promise<void> {
|
|
34
|
-
return new Promise((r) => {
|
|
44
|
+
return new Promise((r) => {
|
|
45
|
+
setTimeout(r, ms);
|
|
46
|
+
});
|
|
35
47
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import type { CDPConnection } from "./connection.js";
|
|
6
|
-
import {
|
|
6
|
+
import { captureBoundedScreenshot } from "./screenshot.js";
|
|
7
7
|
import type { CDPEvalResult } from "./types.js";
|
|
8
8
|
|
|
9
9
|
export class Navigation {
|
|
@@ -100,12 +100,7 @@ export class Navigation {
|
|
|
100
100
|
|
|
101
101
|
async screenshot(): Promise<string> {
|
|
102
102
|
this.conn.ensureConnected();
|
|
103
|
-
|
|
104
|
-
format: "png",
|
|
105
|
-
quality: SCREENSHOT_QUALITY,
|
|
106
|
-
captureBeyondViewport: false,
|
|
107
|
-
});
|
|
108
|
-
return (result as { data?: string }).data || "";
|
|
103
|
+
return captureBoundedScreenshot(this.conn);
|
|
109
104
|
}
|
|
110
105
|
|
|
111
106
|
async detectLoginPage(): Promise<{ isLoginPage: boolean; reason: string }> {
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Screenshot helper — captures a page screenshot via CDP while ensuring
|
|
3
|
+
* output dimensions stay within Anthropic's per-image size limit.
|
|
4
|
+
*
|
|
5
|
+
* Without this cap, high-DPI displays (Retina, 2× / 3×) produce PNGs that
|
|
6
|
+
* exceed 2000px per side. Once a conversation accumulates many images the
|
|
7
|
+
* API rejects the request with:
|
|
8
|
+
* "An image in the conversation exceeds the dimension limit for
|
|
9
|
+
* many-image requests (2000px)."
|
|
10
|
+
*
|
|
11
|
+
* Implementation uses CDP's native `clip.scale` so no image library is
|
|
12
|
+
* needed. We read the viewport size and devicePixelRatio to pick an initial
|
|
13
|
+
* scale, then decode the actual PNG dimensions from its header and re-
|
|
14
|
+
* capture with a smaller scale if the output is still over the cap (guards
|
|
15
|
+
* against any DPR/scale compounding differences across Chrome builds).
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { log } from "../utils/logger.js";
|
|
19
|
+
import type { CDPConnection } from "./connection.js";
|
|
20
|
+
import { SCREENSHOT_MAX_DIM_PX, SCREENSHOT_QUALITY } from "./delays.js";
|
|
21
|
+
import type { CDPEvalResult } from "./types.js";
|
|
22
|
+
|
|
23
|
+
interface ViewportMetrics {
|
|
24
|
+
width: number;
|
|
25
|
+
height: number;
|
|
26
|
+
dpr: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** PNG signature + IHDR header positions. See https://www.w3.org/TR/png/#5PNG-file-signature */
|
|
30
|
+
const PNG_SIGNATURE_LEN = 8;
|
|
31
|
+
const PNG_IHDR_WIDTH_OFFSET = PNG_SIGNATURE_LEN + 8; // 8 (sig) + 4 (IHDR length) + 4 ("IHDR")
|
|
32
|
+
const PNG_IHDR_HEADER_BYTES = PNG_IHDR_WIDTH_OFFSET + 8; // width (4) + height (4)
|
|
33
|
+
|
|
34
|
+
async function getViewportMetrics(conn: CDPConnection): Promise<ViewportMetrics> {
|
|
35
|
+
try {
|
|
36
|
+
const result = await conn.send("Runtime.evaluate", {
|
|
37
|
+
expression: `JSON.stringify({ width: window.innerWidth, height: window.innerHeight, dpr: window.devicePixelRatio || 1 })`,
|
|
38
|
+
returnByValue: true,
|
|
39
|
+
});
|
|
40
|
+
const value = (result as CDPEvalResult).result?.value as string | undefined;
|
|
41
|
+
if (value) {
|
|
42
|
+
const parsed = JSON.parse(value) as Partial<ViewportMetrics>;
|
|
43
|
+
const width = Number(parsed.width) || 0;
|
|
44
|
+
const height = Number(parsed.height) || 0;
|
|
45
|
+
const dpr = Number(parsed.dpr) || 1;
|
|
46
|
+
if (width > 0 && height > 0) {
|
|
47
|
+
return { width, height, dpr };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
} catch {
|
|
51
|
+
// Fall through to defaults
|
|
52
|
+
}
|
|
53
|
+
return { width: 0, height: 0, dpr: 1 };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** Decode PNG dimensions from base64 data, or null if not a PNG. */
|
|
57
|
+
function pngDimensions(base64: string): { width: number; height: number } | null {
|
|
58
|
+
if (!base64 || base64.length < 100) return null;
|
|
59
|
+
// Decode just enough bytes to read IHDR.
|
|
60
|
+
// Base64 → binary: every 4 chars decodes to 3 bytes. Need PNG_IHDR_HEADER_BYTES bytes.
|
|
61
|
+
const b64Slice = base64.slice(0, Math.ceil(PNG_IHDR_HEADER_BYTES / 3) * 4);
|
|
62
|
+
let buf: Buffer;
|
|
63
|
+
try {
|
|
64
|
+
buf = Buffer.from(b64Slice, "base64");
|
|
65
|
+
} catch {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
if (buf.length < PNG_IHDR_HEADER_BYTES) return null;
|
|
69
|
+
// PNG signature: 89 50 4E 47 0D 0A 1A 0A
|
|
70
|
+
if (buf[0] !== 0x89 || buf[1] !== 0x50 || buf[2] !== 0x4e || buf[3] !== 0x47) {
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
const width = buf.readUInt32BE(PNG_IHDR_WIDTH_OFFSET);
|
|
74
|
+
const height = buf.readUInt32BE(PNG_IHDR_WIDTH_OFFSET + 4);
|
|
75
|
+
return { width, height };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async function capture(
|
|
79
|
+
conn: CDPConnection,
|
|
80
|
+
clip?: { x: number; y: number; width: number; height: number; scale: number }
|
|
81
|
+
): Promise<string> {
|
|
82
|
+
const params: Record<string, unknown> = {
|
|
83
|
+
format: "png",
|
|
84
|
+
quality: SCREENSHOT_QUALITY,
|
|
85
|
+
captureBeyondViewport: false,
|
|
86
|
+
};
|
|
87
|
+
if (clip) params.clip = clip;
|
|
88
|
+
const result = await conn.send("Page.captureScreenshot", params);
|
|
89
|
+
return (result as { data?: string }).data || "";
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Capture a PNG screenshot of the current page, downsampled so the longer
|
|
94
|
+
* side does not exceed {@link SCREENSHOT_MAX_DIM_PX}. Returns base64 data.
|
|
95
|
+
*/
|
|
96
|
+
export async function captureBoundedScreenshot(
|
|
97
|
+
conn: CDPConnection,
|
|
98
|
+
maxDim: number = SCREENSHOT_MAX_DIM_PX
|
|
99
|
+
): Promise<string> {
|
|
100
|
+
const { width, height, dpr } = await getViewportMetrics(conn);
|
|
101
|
+
|
|
102
|
+
// Without viewport metrics we can't pre-scale — capture, verify, retry if needed.
|
|
103
|
+
let currentWidth = width;
|
|
104
|
+
let currentHeight = height;
|
|
105
|
+
let currentScale: number | null = null;
|
|
106
|
+
|
|
107
|
+
if (width > 0 && height > 0) {
|
|
108
|
+
// Pick an initial scale that is safe in the worst case where CDP's
|
|
109
|
+
// clip.scale compounds with the device pixel ratio (so output pixels
|
|
110
|
+
// = css × scale × dpr). If CDP instead treats scale as a pure CSS
|
|
111
|
+
// multiplier (output = css × scale), we end up smaller than necessary,
|
|
112
|
+
// but the post-capture verification below guarantees correctness either
|
|
113
|
+
// way. Cap at 1 so we never request an upscale.
|
|
114
|
+
currentScale = Math.min(1, maxDim / (Math.max(width, height) * dpr));
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Capture + verify loop. Each retry shrinks scale based on the actual
|
|
118
|
+
// measured output, so we converge in at most a couple iterations even if
|
|
119
|
+
// CDP's scale semantics compound with DPR on this Chrome build.
|
|
120
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
121
|
+
const data =
|
|
122
|
+
currentScale !== null && currentWidth > 0 && currentHeight > 0
|
|
123
|
+
? await capture(conn, {
|
|
124
|
+
x: 0,
|
|
125
|
+
y: 0,
|
|
126
|
+
width: currentWidth,
|
|
127
|
+
height: currentHeight,
|
|
128
|
+
scale: currentScale,
|
|
129
|
+
})
|
|
130
|
+
: await capture(conn);
|
|
131
|
+
|
|
132
|
+
if (!data) return data;
|
|
133
|
+
|
|
134
|
+
const dims = pngDimensions(data);
|
|
135
|
+
if (!dims) {
|
|
136
|
+
// Can't verify — return as-is (shouldn't happen with PNG format).
|
|
137
|
+
return data;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const longest = Math.max(dims.width, dims.height);
|
|
141
|
+
if (longest <= maxDim) {
|
|
142
|
+
return data;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Output too large — shrink. Derive a new scale from actual output size.
|
|
146
|
+
// Leave a small safety margin (0.95) so rounding doesn't push us back over.
|
|
147
|
+
const shrinkFactor = (maxDim / longest) * 0.95;
|
|
148
|
+
if (currentScale === null) {
|
|
149
|
+
// We had no viewport metrics on the first pass. Approximate a clip scale
|
|
150
|
+
// from the observed output: assume natural capture gave dims.width px
|
|
151
|
+
// at scale 1, so scale to fit.
|
|
152
|
+
currentWidth = dims.width;
|
|
153
|
+
currentHeight = dims.height;
|
|
154
|
+
currentScale = shrinkFactor;
|
|
155
|
+
} else {
|
|
156
|
+
currentScale = currentScale * shrinkFactor;
|
|
157
|
+
}
|
|
158
|
+
log.debug(
|
|
159
|
+
`Screenshot over cap (${dims.width}×${dims.height}, cap ${maxDim}); retrying at scale ${currentScale.toFixed(3)}`
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
// Retry with the new scale on the next iteration.
|
|
163
|
+
if (currentScale <= 0) {
|
|
164
|
+
// Sanity guard — return current data rather than infinite shrink.
|
|
165
|
+
return data;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Last resort — capture with a very small scale so we don't block the task.
|
|
170
|
+
if (currentWidth > 0 && currentHeight > 0) {
|
|
171
|
+
return capture(conn, {
|
|
172
|
+
x: 0,
|
|
173
|
+
y: 0,
|
|
174
|
+
width: currentWidth,
|
|
175
|
+
height: currentHeight,
|
|
176
|
+
scale: Math.min(currentScale ?? 0.25, 0.25),
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
return capture(conn);
|
|
180
|
+
}
|
package/src/browser/snapshot.ts
CHANGED
|
@@ -6,7 +6,7 @@ import { SNAPSHOT_LOAD_TIMEOUT_MS } from "../utils/constants.js";
|
|
|
6
6
|
import { safeJsonParse } from "../utils/errors.js";
|
|
7
7
|
import { log } from "../utils/logger.js";
|
|
8
8
|
import type { CDPConnection } from "./connection.js";
|
|
9
|
-
import {
|
|
9
|
+
import { captureBoundedScreenshot } from "./screenshot.js";
|
|
10
10
|
import {
|
|
11
11
|
buildAnnotationOverlayJS,
|
|
12
12
|
buildFrameDiscoverJS,
|
|
@@ -62,7 +62,9 @@ export class SnapshotEngine {
|
|
|
62
62
|
});
|
|
63
63
|
|
|
64
64
|
const refs: RefEntry[] = safeJsonParse<RawRefData[]>(
|
|
65
|
-
((findResult as CDPEvalResult).result?.value as string) || "[]",
|
|
65
|
+
((findResult as CDPEvalResult).result?.value as string) || "[]",
|
|
66
|
+
[],
|
|
67
|
+
"snapshot.findElements"
|
|
66
68
|
).map((r) => ({
|
|
67
69
|
id: r.id,
|
|
68
70
|
role: r.role,
|
|
@@ -83,13 +85,8 @@ export class SnapshotEngine {
|
|
|
83
85
|
});
|
|
84
86
|
}
|
|
85
87
|
|
|
86
|
-
// 3. Take screenshot
|
|
87
|
-
const
|
|
88
|
-
format: "png",
|
|
89
|
-
quality: SCREENSHOT_QUALITY,
|
|
90
|
-
captureBeyondViewport: false,
|
|
91
|
-
});
|
|
92
|
-
const image = (screenshotResult as { data?: string }).data || "";
|
|
88
|
+
// 3. Take screenshot (bounded to stay within Anthropic's many-image dim limit)
|
|
89
|
+
const image = await captureBoundedScreenshot(this.conn);
|
|
93
90
|
|
|
94
91
|
// 4. Remove overlay if injected
|
|
95
92
|
if (annotate) {
|
|
@@ -173,7 +170,9 @@ export class SnapshotEngine {
|
|
|
173
170
|
});
|
|
174
171
|
|
|
175
172
|
const frameRefs = safeJsonParse<RawRefData[]>(
|
|
176
|
-
((frameResult as CDPEvalResult).result?.value as string) || "[]",
|
|
173
|
+
((frameResult as CDPEvalResult).result?.value as string) || "[]",
|
|
174
|
+
[],
|
|
175
|
+
"snapshot.frameRefs"
|
|
177
176
|
);
|
|
178
177
|
|
|
179
178
|
for (const r of frameRefs) {
|
|
@@ -193,7 +192,9 @@ export class SnapshotEngine {
|
|
|
193
192
|
this.frameContexts.set(r.id, contextId);
|
|
194
193
|
}
|
|
195
194
|
} catch (err) {
|
|
196
|
-
log.debug(
|
|
195
|
+
log.debug(
|
|
196
|
+
`Frame evaluation failed for frame ${frameId}: ${err instanceof Error ? err.message : err}`
|
|
197
|
+
);
|
|
197
198
|
}
|
|
198
199
|
}
|
|
199
200
|
} catch (err) {
|
|
@@ -228,7 +229,9 @@ export class SnapshotEngine {
|
|
|
228
229
|
contexts.set(child.frame.id, world.executionContextId);
|
|
229
230
|
}
|
|
230
231
|
} catch (err) {
|
|
231
|
-
log.debug(
|
|
232
|
+
log.debug(
|
|
233
|
+
`Frame ${child.frame.id} does not support isolated worlds: ${err instanceof Error ? err.message : err}`
|
|
234
|
+
);
|
|
232
235
|
}
|
|
233
236
|
}
|
|
234
237
|
} catch (err) {
|
|
@@ -325,9 +328,13 @@ export class SnapshotEngine {
|
|
|
325
328
|
const value = (frameResult as CDPEvalResult).result?.value as string;
|
|
326
329
|
if (!value || value === "null") return null;
|
|
327
330
|
|
|
328
|
-
const parsed = safeJsonParse<{
|
|
329
|
-
|
|
330
|
-
|
|
331
|
+
const parsed = safeJsonParse<{
|
|
332
|
+
x: number;
|
|
333
|
+
y: number;
|
|
334
|
+
width: number;
|
|
335
|
+
height: number;
|
|
336
|
+
error?: string;
|
|
337
|
+
} | null>(value, null, "snapshot.resolveRefInFrame");
|
|
331
338
|
if (!parsed) return null;
|
|
332
339
|
if (parsed.error) return parsed;
|
|
333
340
|
|
|
@@ -338,9 +345,10 @@ export class SnapshotEngine {
|
|
|
338
345
|
height: parsed.height,
|
|
339
346
|
};
|
|
340
347
|
} catch (err) {
|
|
341
|
-
log.debug(
|
|
348
|
+
log.debug(
|
|
349
|
+
`resolveRefInFrame failed for ref ${refId}: ${err instanceof Error ? err.message : err}`
|
|
350
|
+
);
|
|
342
351
|
return null;
|
|
343
352
|
}
|
|
344
353
|
}
|
|
345
354
|
}
|
|
346
|
-
|