assistme 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -811,6 +811,9 @@ async function tryDismissOverlay(conn) {
811
811
  }
812
812
 
813
813
  // src/browser/screenshot.ts
814
+ var PNG_SIGNATURE_LEN = 8;
815
+ var PNG_IHDR_WIDTH_OFFSET = PNG_SIGNATURE_LEN + 8;
816
+ var PNG_IHDR_HEADER_BYTES = PNG_IHDR_WIDTH_OFFSET + 8;
814
817
  async function getViewportMetrics(conn) {
815
818
  try {
816
819
  const result = await conn.send("Runtime.evaluate", {
@@ -831,34 +834,84 @@ async function getViewportMetrics(conn) {
831
834
  }
832
835
  return { width: 0, height: 0, dpr: 1 };
833
836
  }
834
- async function captureBoundedScreenshot(conn, maxDim = SCREENSHOT_MAX_DIM_PX) {
835
- const { width, height, dpr } = await getViewportMetrics(conn);
836
- if (width === 0 || height === 0) {
837
- const result2 = await conn.send("Page.captureScreenshot", {
838
- format: "png",
839
- quality: SCREENSHOT_QUALITY,
840
- captureBeyondViewport: false
841
- });
842
- return result2.data || "";
843
- }
844
- const naturalMax = Math.max(width, height) * dpr;
845
- if (naturalMax <= maxDim) {
846
- const result2 = await conn.send("Page.captureScreenshot", {
847
- format: "png",
848
- quality: SCREENSHOT_QUALITY,
849
- captureBeyondViewport: false
850
- });
851
- return result2.data || "";
837
+ function pngDimensions(base64) {
838
+ if (!base64 || base64.length < 100) return null;
839
+ const b64Slice = base64.slice(0, Math.ceil(PNG_IHDR_HEADER_BYTES / 3) * 4);
840
+ let buf;
841
+ try {
842
+ buf = Buffer.from(b64Slice, "base64");
843
+ } catch {
844
+ return null;
852
845
  }
853
- const scale = Math.min(dpr, maxDim / Math.max(width, height));
854
- const result = await conn.send("Page.captureScreenshot", {
846
+ if (buf.length < PNG_IHDR_HEADER_BYTES) return null;
847
+ if (buf[0] !== 137 || buf[1] !== 80 || buf[2] !== 78 || buf[3] !== 71) {
848
+ return null;
849
+ }
850
+ const width = buf.readUInt32BE(PNG_IHDR_WIDTH_OFFSET);
851
+ const height = buf.readUInt32BE(PNG_IHDR_WIDTH_OFFSET + 4);
852
+ return { width, height };
853
+ }
854
+ async function capture(conn, clip) {
855
+ const params = {
855
856
  format: "png",
856
857
  quality: SCREENSHOT_QUALITY,
857
- captureBeyondViewport: false,
858
- clip: { x: 0, y: 0, width, height, scale }
859
- });
858
+ captureBeyondViewport: false
859
+ };
860
+ if (clip) params.clip = clip;
861
+ const result = await conn.send("Page.captureScreenshot", params);
860
862
  return result.data || "";
861
863
  }
864
+ async function captureBoundedScreenshot(conn, maxDim = SCREENSHOT_MAX_DIM_PX) {
865
+ const { width, height, dpr } = await getViewportMetrics(conn);
866
+ let currentWidth = width;
867
+ let currentHeight = height;
868
+ let currentScale = null;
869
+ if (width > 0 && height > 0) {
870
+ currentScale = Math.min(1, maxDim / (Math.max(width, height) * dpr));
871
+ }
872
+ for (let attempt = 0; attempt < 3; attempt++) {
873
+ const data = currentScale !== null && currentWidth > 0 && currentHeight > 0 ? await capture(conn, {
874
+ x: 0,
875
+ y: 0,
876
+ width: currentWidth,
877
+ height: currentHeight,
878
+ scale: currentScale
879
+ }) : await capture(conn);
880
+ if (!data) return data;
881
+ const dims = pngDimensions(data);
882
+ if (!dims) {
883
+ return data;
884
+ }
885
+ const longest = Math.max(dims.width, dims.height);
886
+ if (longest <= maxDim) {
887
+ return data;
888
+ }
889
+ const shrinkFactor = maxDim / longest * 0.95;
890
+ if (currentScale === null) {
891
+ currentWidth = dims.width;
892
+ currentHeight = dims.height;
893
+ currentScale = shrinkFactor;
894
+ } else {
895
+ currentScale = currentScale * shrinkFactor;
896
+ }
897
+ log.debug(
898
+ `Screenshot over cap (${dims.width}\xD7${dims.height}, cap ${maxDim}); retrying at scale ${currentScale.toFixed(3)}`
899
+ );
900
+ if (currentScale <= 0) {
901
+ return data;
902
+ }
903
+ }
904
+ if (currentWidth > 0 && currentHeight > 0) {
905
+ return capture(conn, {
906
+ x: 0,
907
+ y: 0,
908
+ width: currentWidth,
909
+ height: currentHeight,
910
+ scale: Math.min(currentScale ?? 0.25, 0.25)
911
+ });
912
+ }
913
+ return capture(conn);
914
+ }
862
915
 
863
916
  // src/browser/actions.ts
864
917
  var Actions = class {
@@ -1961,7 +2014,9 @@ Refs:
1961
2014
  this.frameContexts.set(r.id, contextId);
1962
2015
  }
1963
2016
  } catch (err) {
1964
- log.debug(`Frame evaluation failed for frame ${frameId}: ${err instanceof Error ? err.message : err}`);
2017
+ log.debug(
2018
+ `Frame evaluation failed for frame ${frameId}: ${err instanceof Error ? err.message : err}`
2019
+ );
1965
2020
  }
1966
2021
  }
1967
2022
  } catch (err) {
@@ -1986,7 +2041,9 @@ Refs:
1986
2041
  contexts.set(child.frame.id, world.executionContextId);
1987
2042
  }
1988
2043
  } catch (err) {
1989
- log.debug(`Frame ${child.frame.id} does not support isolated worlds: ${err instanceof Error ? err.message : err}`);
2044
+ log.debug(
2045
+ `Frame ${child.frame.id} does not support isolated worlds: ${err instanceof Error ? err.message : err}`
2046
+ );
1990
2047
  }
1991
2048
  }
1992
2049
  } catch (err) {
@@ -2063,11 +2120,7 @@ Refs:
2063
2120
  });
2064
2121
  const value = frameResult.result?.value;
2065
2122
  if (!value || value === "null") return null;
2066
- const parsed = safeJsonParse(
2067
- value,
2068
- null,
2069
- "snapshot.resolveRefInFrame"
2070
- );
2123
+ const parsed = safeJsonParse(value, null, "snapshot.resolveRefInFrame");
2071
2124
  if (!parsed) return null;
2072
2125
  if (parsed.error) return parsed;
2073
2126
  return {
@@ -2077,7 +2130,9 @@ Refs:
2077
2130
  height: parsed.height
2078
2131
  };
2079
2132
  } catch (err) {
2080
- log.debug(`resolveRefInFrame failed for ref ${refId}: ${err instanceof Error ? err.message : err}`);
2133
+ log.debug(
2134
+ `resolveRefInFrame failed for ref ${refId}: ${err instanceof Error ? err.message : err}`
2135
+ );
2081
2136
  return null;
2082
2137
  }
2083
2138
  }
package/dist/index.js CHANGED
@@ -34,7 +34,7 @@ import {
34
34
  setSessionBusy,
35
35
  toggleScheduledTask,
36
36
  updateHeartbeat
37
- } from "./chunk-O5K33FBW.js";
37
+ } from "./chunk-S7PYDTKE.js";
38
38
  import {
39
39
  JobRunner,
40
40
  callMcpHandler
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  TaskProcessor,
4
4
  getBrowser
5
- } from "../chunk-O5K33FBW.js";
5
+ } from "../chunk-S7PYDTKE.js";
6
6
  import "../chunk-RRMI6RDG.js";
7
7
  import {
8
8
  setLogTransport
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "assistme",
3
- "version": "0.9.1",
3
+ "version": "0.9.2",
4
4
  "description": "AssistMe CLI Agent - AI-powered agentic assistant for code, browser, and automation",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -41,5 +41,7 @@ export const SCREENSHOT_MAX_DIM_PX = 1568;
41
41
 
42
42
  /** Promise-based delay helper. */
43
43
  export function delay(ms: number): Promise<void> {
44
- return new Promise((r) => { setTimeout(r, ms); });
44
+ return new Promise((r) => {
45
+ setTimeout(r, ms);
46
+ });
45
47
  }
@@ -9,10 +9,13 @@
9
9
  * many-image requests (2000px)."
10
10
  *
11
11
  * Implementation uses CDP's native `clip.scale` so no image library is
12
- * needed. We read the viewport size and devicePixelRatio, then downscale
13
- * only when the natural capture would exceed the cap.
12
+ * needed. We read the viewport size and devicePixelRatio to pick an initial
13
+ * scale, then decode the actual PNG dimensions from its header and re-
14
+ * capture with a smaller scale if the output is still over the cap (guards
15
+ * against any DPR/scale compounding differences across Chrome builds).
14
16
  */
15
17
 
18
+ import { log } from "../utils/logger.js";
16
19
  import type { CDPConnection } from "./connection.js";
17
20
  import { SCREENSHOT_MAX_DIM_PX, SCREENSHOT_QUALITY } from "./delays.js";
18
21
  import type { CDPEvalResult } from "./types.js";
@@ -23,6 +26,11 @@ interface ViewportMetrics {
23
26
  dpr: number;
24
27
  }
25
28
 
29
+ /** PNG signature + IHDR header positions. See https://www.w3.org/TR/png/#5PNG-file-signature */
30
+ const PNG_SIGNATURE_LEN = 8;
31
+ const PNG_IHDR_WIDTH_OFFSET = PNG_SIGNATURE_LEN + 8; // 8 (sig) + 4 (IHDR length) + 4 ("IHDR")
32
+ const PNG_IHDR_HEADER_BYTES = PNG_IHDR_WIDTH_OFFSET + 8; // width (4) + height (4)
33
+
26
34
  async function getViewportMetrics(conn: CDPConnection): Promise<ViewportMetrics> {
27
35
  try {
28
36
  const result = await conn.send("Runtime.evaluate", {
@@ -42,10 +50,45 @@ async function getViewportMetrics(conn: CDPConnection): Promise<ViewportMetrics>
42
50
  } catch {
43
51
  // Fall through to defaults
44
52
  }
45
- // Safe defaults if viewport query fails — no clip, CDP uses natural capture
46
53
  return { width: 0, height: 0, dpr: 1 };
47
54
  }
48
55
 
56
+ /** Decode PNG dimensions from base64 data, or null if not a PNG. */
57
+ function pngDimensions(base64: string): { width: number; height: number } | null {
58
+ if (!base64 || base64.length < 100) return null;
59
+ // Decode just enough bytes to read IHDR.
60
+ // Base64 → binary: every 4 chars decodes to 3 bytes. Need PNG_IHDR_HEADER_BYTES bytes.
61
+ const b64Slice = base64.slice(0, Math.ceil(PNG_IHDR_HEADER_BYTES / 3) * 4);
62
+ let buf: Buffer;
63
+ try {
64
+ buf = Buffer.from(b64Slice, "base64");
65
+ } catch {
66
+ return null;
67
+ }
68
+ if (buf.length < PNG_IHDR_HEADER_BYTES) return null;
69
+ // PNG signature: 89 50 4E 47 0D 0A 1A 0A
70
+ if (buf[0] !== 0x89 || buf[1] !== 0x50 || buf[2] !== 0x4e || buf[3] !== 0x47) {
71
+ return null;
72
+ }
73
+ const width = buf.readUInt32BE(PNG_IHDR_WIDTH_OFFSET);
74
+ const height = buf.readUInt32BE(PNG_IHDR_WIDTH_OFFSET + 4);
75
+ return { width, height };
76
+ }
77
+
78
+ async function capture(
79
+ conn: CDPConnection,
80
+ clip?: { x: number; y: number; width: number; height: number; scale: number }
81
+ ): Promise<string> {
82
+ const params: Record<string, unknown> = {
83
+ format: "png",
84
+ quality: SCREENSHOT_QUALITY,
85
+ captureBeyondViewport: false,
86
+ };
87
+ if (clip) params.clip = clip;
88
+ const result = await conn.send("Page.captureScreenshot", params);
89
+ return (result as { data?: string }).data || "";
90
+ }
91
+
49
92
  /**
50
93
  * Capture a PNG screenshot of the current page, downsampled so the longer
51
94
  * side does not exceed {@link SCREENSHOT_MAX_DIM_PX}. Returns base64 data.
@@ -56,39 +99,82 @@ export async function captureBoundedScreenshot(
56
99
  ): Promise<string> {
57
100
  const { width, height, dpr } = await getViewportMetrics(conn);
58
101
 
59
- // If we couldn't determine metrics, fall back to unclipped capture.
60
- if (width === 0 || height === 0) {
61
- const result = await conn.send("Page.captureScreenshot", {
62
- format: "png",
63
- quality: SCREENSHOT_QUALITY,
64
- captureBeyondViewport: false,
65
- });
66
- return (result as { data?: string }).data || "";
102
+ // Without viewport metrics we can't pre-scale — capture, verify, retry if needed.
103
+ let currentWidth = width;
104
+ let currentHeight = height;
105
+ let currentScale: number | null = null;
106
+
107
+ if (width > 0 && height > 0) {
108
+ // Pick an initial scale that is safe in the worst case where CDP's
109
+ // clip.scale compounds with the device pixel ratio (so output pixels
110
+ // = css × scale × dpr). If CDP instead treats scale as a pure CSS
111
+ // multiplier (output = css × scale), we end up smaller than necessary,
112
+ // but the post-capture verification below guarantees correctness either
113
+ // way. Cap at 1 so we never request an upscale.
114
+ currentScale = Math.min(1, maxDim / (Math.max(width, height) * dpr));
67
115
  }
68
116
 
69
- // Natural output dimensions (what we'd get without clip): viewport CSS px × DPR.
70
- const naturalMax = Math.max(width, height) * dpr;
117
+ // Capture + verify loop. Each retry shrinks scale based on the actual
118
+ // measured output, so we converge in at most a couple iterations even if
119
+ // CDP's scale semantics compound with DPR on this Chrome build.
120
+ for (let attempt = 0; attempt < 3; attempt++) {
121
+ const data =
122
+ currentScale !== null && currentWidth > 0 && currentHeight > 0
123
+ ? await capture(conn, {
124
+ x: 0,
125
+ y: 0,
126
+ width: currentWidth,
127
+ height: currentHeight,
128
+ scale: currentScale,
129
+ })
130
+ : await capture(conn);
71
131
 
72
- // If the natural capture already fits under the cap, no scaling needed.
73
- if (naturalMax <= maxDim) {
74
- const result = await conn.send("Page.captureScreenshot", {
75
- format: "png",
76
- quality: SCREENSHOT_QUALITY,
77
- captureBeyondViewport: false,
78
- });
79
- return (result as { data?: string }).data || "";
80
- }
132
+ if (!data) return data;
81
133
 
82
- // CDP clip.scale applies to CSS pixels: output = cssPx × scale.
83
- // Pick the largest scale that keeps max(output) ≤ maxDim, capped at dpr
84
- // so we never upscale beyond the native render.
85
- const scale = Math.min(dpr, maxDim / Math.max(width, height));
134
+ const dims = pngDimensions(data);
135
+ if (!dims) {
136
+ // Can't verify return as-is (shouldn't happen with PNG format).
137
+ return data;
138
+ }
86
139
 
87
- const result = await conn.send("Page.captureScreenshot", {
88
- format: "png",
89
- quality: SCREENSHOT_QUALITY,
90
- captureBeyondViewport: false,
91
- clip: { x: 0, y: 0, width, height, scale },
92
- });
93
- return (result as { data?: string }).data || "";
140
+ const longest = Math.max(dims.width, dims.height);
141
+ if (longest <= maxDim) {
142
+ return data;
143
+ }
144
+
145
+ // Output too large — shrink. Derive a new scale from actual output size.
146
+ // Leave a small safety margin (0.95) so rounding doesn't push us back over.
147
+ const shrinkFactor = (maxDim / longest) * 0.95;
148
+ if (currentScale === null) {
149
+ // We had no viewport metrics on the first pass. Approximate a clip scale
150
+ // from the observed output: assume natural capture gave dims.width px
151
+ // at scale 1, so scale to fit.
152
+ currentWidth = dims.width;
153
+ currentHeight = dims.height;
154
+ currentScale = shrinkFactor;
155
+ } else {
156
+ currentScale = currentScale * shrinkFactor;
157
+ }
158
+ log.debug(
159
+ `Screenshot over cap (${dims.width}×${dims.height}, cap ${maxDim}); retrying at scale ${currentScale.toFixed(3)}`
160
+ );
161
+
162
+ // Retry with the new scale on the next iteration.
163
+ if (currentScale <= 0) {
164
+ // Sanity guard — return current data rather than infinite shrink.
165
+ return data;
166
+ }
167
+ }
168
+
169
+ // Last resort — capture with a very small scale so we don't block the task.
170
+ if (currentWidth > 0 && currentHeight > 0) {
171
+ return capture(conn, {
172
+ x: 0,
173
+ y: 0,
174
+ width: currentWidth,
175
+ height: currentHeight,
176
+ scale: Math.min(currentScale ?? 0.25, 0.25),
177
+ });
178
+ }
179
+ return capture(conn);
94
180
  }
@@ -62,7 +62,9 @@ export class SnapshotEngine {
62
62
  });
63
63
 
64
64
  const refs: RefEntry[] = safeJsonParse<RawRefData[]>(
65
- ((findResult as CDPEvalResult).result?.value as string) || "[]", [], "snapshot.findElements"
65
+ ((findResult as CDPEvalResult).result?.value as string) || "[]",
66
+ [],
67
+ "snapshot.findElements"
66
68
  ).map((r) => ({
67
69
  id: r.id,
68
70
  role: r.role,
@@ -168,7 +170,9 @@ export class SnapshotEngine {
168
170
  });
169
171
 
170
172
  const frameRefs = safeJsonParse<RawRefData[]>(
171
- ((frameResult as CDPEvalResult).result?.value as string) || "[]", [], "snapshot.frameRefs"
173
+ ((frameResult as CDPEvalResult).result?.value as string) || "[]",
174
+ [],
175
+ "snapshot.frameRefs"
172
176
  );
173
177
 
174
178
  for (const r of frameRefs) {
@@ -188,7 +192,9 @@ export class SnapshotEngine {
188
192
  this.frameContexts.set(r.id, contextId);
189
193
  }
190
194
  } catch (err) {
191
- log.debug(`Frame evaluation failed for frame ${frameId}: ${err instanceof Error ? err.message : err}`);
195
+ log.debug(
196
+ `Frame evaluation failed for frame ${frameId}: ${err instanceof Error ? err.message : err}`
197
+ );
192
198
  }
193
199
  }
194
200
  } catch (err) {
@@ -223,7 +229,9 @@ export class SnapshotEngine {
223
229
  contexts.set(child.frame.id, world.executionContextId);
224
230
  }
225
231
  } catch (err) {
226
- log.debug(`Frame ${child.frame.id} does not support isolated worlds: ${err instanceof Error ? err.message : err}`);
232
+ log.debug(
233
+ `Frame ${child.frame.id} does not support isolated worlds: ${err instanceof Error ? err.message : err}`
234
+ );
227
235
  }
228
236
  }
229
237
  } catch (err) {
@@ -320,9 +328,13 @@ export class SnapshotEngine {
320
328
  const value = (frameResult as CDPEvalResult).result?.value as string;
321
329
  if (!value || value === "null") return null;
322
330
 
323
- const parsed = safeJsonParse<{ x: number; y: number; width: number; height: number; error?: string } | null>(
324
- value, null, "snapshot.resolveRefInFrame"
325
- );
331
+ const parsed = safeJsonParse<{
332
+ x: number;
333
+ y: number;
334
+ width: number;
335
+ height: number;
336
+ error?: string;
337
+ } | null>(value, null, "snapshot.resolveRefInFrame");
326
338
  if (!parsed) return null;
327
339
  if (parsed.error) return parsed;
328
340
 
@@ -333,9 +345,10 @@ export class SnapshotEngine {
333
345
  height: parsed.height,
334
346
  };
335
347
  } catch (err) {
336
- log.debug(`resolveRefInFrame failed for ref ${refId}: ${err instanceof Error ? err.message : err}`);
348
+ log.debug(
349
+ `resolveRefInFrame failed for ref ${refId}: ${err instanceof Error ? err.message : err}`
350
+ );
337
351
  return null;
338
352
  }
339
353
  }
340
354
  }
341
-