@reconcrap/boss-recommend-mcp 2.0.46 → 2.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/boss-recommend-mcp.js +4 -4
- package/config/screening-config.example.json +27 -27
- package/package.json +1 -1
- package/scripts/postinstall.cjs +44 -44
- package/skills/boss-chat/README.md +39 -39
- package/skills/boss-chat/SKILL.md +93 -93
- package/skills/boss-recommend-pipeline/README.md +12 -12
- package/skills/boss-recommend-pipeline/SKILL.md +180 -180
- package/skills/boss-recruit-pipeline/README.md +17 -17
- package/skills/boss-recruit-pipeline/SKILL.md +58 -58
- package/src/chat-mcp.js +1780 -1780
- package/src/chat-runtime-config.js +749 -749
- package/src/cli.js +3054 -3054
- package/src/core/boss-cards/index.js +199 -199
- package/src/core/browser/index.js +1453 -1453
- package/src/core/capture/index.js +1201 -1201
- package/src/core/cv-acquisition/index.js +238 -238
- package/src/core/cv-capture-target/index.js +299 -299
- package/src/core/greet-quota/index.js +54 -54
- package/src/core/infinite-list/index.js +1326 -1326
- package/src/core/reporting/legacy-csv.js +341 -341
- package/src/core/run/timing.js +33 -33
- package/src/core/screening/index.js +50 -3
- package/src/core/self-heal/index.js +973 -973
- package/src/core/self-heal/viewport.js +564 -564
- package/src/domains/chat/cards.js +137 -137
- package/src/domains/chat/constants.js +221 -221
- package/src/domains/chat/detail.js +1668 -1668
- package/src/domains/chat/index.js +7 -7
- package/src/domains/chat/jobs.js +592 -592
- package/src/domains/chat/page-guard.js +98 -98
- package/src/domains/chat/roots.js +56 -56
- package/src/domains/chat/run-service.js +1977 -1977
- package/src/domains/recommend/actions.js +457 -457
- package/src/domains/recommend/cards.js +243 -243
- package/src/domains/recommend/constants.js +165 -165
- package/src/domains/recommend/detail.js +25 -18
- package/src/domains/recommend/filters.js +610 -610
- package/src/domains/recommend/index.js +10 -10
- package/src/domains/recommend/jobs.js +316 -316
- package/src/domains/recommend/refresh.js +472 -472
- package/src/domains/recommend/roots.js +80 -80
- package/src/domains/recommend/run-service.js +27 -20
- package/src/domains/recommend/scopes.js +246 -246
- package/src/domains/recruit/actions.js +277 -277
- package/src/domains/recruit/cards.js +74 -74
- package/src/domains/recruit/constants.js +167 -167
- package/src/domains/recruit/detail.js +461 -461
- package/src/domains/recruit/index.js +9 -9
- package/src/domains/recruit/instruction-parser.js +451 -451
- package/src/domains/recruit/refresh.js +44 -44
- package/src/domains/recruit/roots.js +68 -68
- package/src/domains/recruit/run-service.js +1207 -1207
- package/src/domains/recruit/search.js +1202 -1202
- package/src/recommend-mcp.js +22 -22
- package/src/recruit-mcp.js +1338 -1338
|
@@ -1,1201 +1,1201 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import crypto from "node:crypto";
|
|
3
|
-
import path from "node:path";
|
|
4
|
-
import sharp from "sharp";
|
|
5
|
-
import {
|
|
6
|
-
getAttributesMap,
|
|
7
|
-
getNodeBox,
|
|
8
|
-
getOuterHTML,
|
|
9
|
-
querySelectorAll,
|
|
10
|
-
sleep
|
|
11
|
-
} from "../browser/index.js";
|
|
12
|
-
import {
|
|
13
|
-
htmlToText,
|
|
14
|
-
normalizeText
|
|
15
|
-
} from "../screening/index.js";
|
|
16
|
-
|
|
17
|
-
function nowIso() {
|
|
18
|
-
return new Date().toISOString();
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
function resolveOutputPath(filePath) {
|
|
22
|
-
if (!filePath) return null;
|
|
23
|
-
const resolved = path.resolve(filePath);
|
|
24
|
-
fs.mkdirSync(path.dirname(resolved), { recursive: true });
|
|
25
|
-
return resolved;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
function withPadding(rect, padding = 0) {
|
|
29
|
-
const safePadding = Math.max(0, Number(padding) || 0);
|
|
30
|
-
const x = Math.max(0, rect.x - safePadding);
|
|
31
|
-
const y = Math.max(0, rect.y - safePadding);
|
|
32
|
-
return {
|
|
33
|
-
x,
|
|
34
|
-
y,
|
|
35
|
-
width: Math.max(1, rect.width + safePadding * 2 - (rect.x - x)),
|
|
36
|
-
height: Math.max(1, rect.height + safePadding * 2 - (rect.y - y)),
|
|
37
|
-
scale: 1
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
function normalizeRandom(random) {
|
|
42
|
-
return typeof random === "function" ? random : Math.random;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
function randomBetween(random, min, max) {
|
|
46
|
-
const lower = Number(min) || 0;
|
|
47
|
-
const upper = Number(max) || lower;
|
|
48
|
-
if (upper <= lower) return lower;
|
|
49
|
-
return lower + normalizeRandom(random)() * (upper - lower);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
function normalizeRatio(raw, fallback, { min = 0, max = 1 } = {}) {
|
|
53
|
-
const parsed = Number(raw);
|
|
54
|
-
const value = Number.isFinite(parsed) ? parsed : fallback;
|
|
55
|
-
return Math.min(max, Math.max(min, value));
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function normalizeScrollDeltaJitter({
|
|
59
|
-
enabled = false,
|
|
60
|
-
minRatio = 0.65,
|
|
61
|
-
maxRatio = 0.9,
|
|
62
|
-
minOverlapRatio = 0.2,
|
|
63
|
-
preserveCoverage = true,
|
|
64
|
-
random = Math.random
|
|
65
|
-
} = {}) {
|
|
66
|
-
const safeMinRatio = normalizeRatio(minRatio, 0.65, { min: 0.1, max: 1 });
|
|
67
|
-
const safeMaxRatio = Math.max(safeMinRatio, normalizeRatio(maxRatio, 0.9, { min: safeMinRatio, max: 1 }));
|
|
68
|
-
return {
|
|
69
|
-
enabled: enabled === true,
|
|
70
|
-
min_ratio: safeMinRatio,
|
|
71
|
-
max_ratio: safeMaxRatio,
|
|
72
|
-
min_overlap_ratio: normalizeRatio(minOverlapRatio, 0.2, { min: 0, max: 0.8 }),
|
|
73
|
-
preserve_coverage: preserveCoverage !== false,
|
|
74
|
-
random: normalizeRandom(random)
|
|
75
|
-
};
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function resolveCoverageSafeScrollDelta({
|
|
79
|
-
baseDelta,
|
|
80
|
-
clipHeight,
|
|
81
|
-
jitter
|
|
82
|
-
} = {}) {
|
|
83
|
-
const safeBase = Math.max(1, Number(baseDelta) || 650);
|
|
84
|
-
if (!jitter?.enabled) {
|
|
85
|
-
return {
|
|
86
|
-
deltaY: safeBase,
|
|
87
|
-
jittered: false,
|
|
88
|
-
base_delta_y: safeBase
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
const safeClipHeight = Math.max(1, Number(clipHeight) || 1);
|
|
92
|
-
const maxDeltaForOverlap = Math.max(1, Math.floor(safeClipHeight * (1 - jitter.min_overlap_ratio)));
|
|
93
|
-
const upper = Math.max(1, Math.min(Math.round(safeBase * jitter.max_ratio), maxDeltaForOverlap));
|
|
94
|
-
const lower = Math.min(upper, Math.max(1, Math.round(safeBase * jitter.min_ratio)));
|
|
95
|
-
const deltaY = Math.max(1, Math.round(randomBetween(jitter.random, lower, upper)));
|
|
96
|
-
return {
|
|
97
|
-
deltaY,
|
|
98
|
-
jittered: true,
|
|
99
|
-
base_delta_y: safeBase,
|
|
100
|
-
min_delta_y: lower,
|
|
101
|
-
max_delta_y: upper,
|
|
102
|
-
min_ratio: jitter.min_ratio,
|
|
103
|
-
max_ratio: jitter.max_ratio,
|
|
104
|
-
min_overlap_ratio: jitter.min_overlap_ratio,
|
|
105
|
-
clip_height: safeClipHeight,
|
|
106
|
-
max_delta_for_overlap: maxDeltaForOverlap,
|
|
107
|
-
preserve_coverage: jitter.preserve_coverage
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
export async function captureNodeHtml(client, nodeId, {
|
|
112
|
-
domain = "unknown",
|
|
113
|
-
source = "dom",
|
|
114
|
-
metadata = {}
|
|
115
|
-
} = {}) {
|
|
116
|
-
const [attributes, outerHTML] = await Promise.all([
|
|
117
|
-
getAttributesMap(client, nodeId),
|
|
118
|
-
getOuterHTML(client, nodeId)
|
|
119
|
-
]);
|
|
120
|
-
const text = htmlToText(outerHTML);
|
|
121
|
-
return {
|
|
122
|
-
schema_version: 1,
|
|
123
|
-
domain: normalizeText(domain) || "unknown",
|
|
124
|
-
source,
|
|
125
|
-
captured_at: nowIso(),
|
|
126
|
-
node_id: nodeId,
|
|
127
|
-
attributes,
|
|
128
|
-
outer_html_length: outerHTML.length,
|
|
129
|
-
text_length: text.length,
|
|
130
|
-
text,
|
|
131
|
-
outer_html: outerHTML,
|
|
132
|
-
metadata
|
|
133
|
-
};
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
export async function captureNodeScreenshot(client, nodeId, {
|
|
137
|
-
filePath,
|
|
138
|
-
format = "png",
|
|
139
|
-
quality,
|
|
140
|
-
padding = 0,
|
|
141
|
-
captureBeyondViewport = true,
|
|
142
|
-
fromSurface = true,
|
|
143
|
-
metadata = {}
|
|
144
|
-
} = {}) {
|
|
145
|
-
const box = await getNodeBox(client, nodeId);
|
|
146
|
-
const clip = withPadding(box.rect, padding);
|
|
147
|
-
const captureOptions = {
|
|
148
|
-
format,
|
|
149
|
-
fromSurface,
|
|
150
|
-
captureBeyondViewport,
|
|
151
|
-
clip
|
|
152
|
-
};
|
|
153
|
-
if (quality != null) {
|
|
154
|
-
captureOptions.quality = quality;
|
|
155
|
-
}
|
|
156
|
-
const screenshot = await client.Page.captureScreenshot(captureOptions);
|
|
157
|
-
const buffer = Buffer.from(screenshot.data || "", "base64");
|
|
158
|
-
const resolvedPath = resolveOutputPath(filePath);
|
|
159
|
-
if (resolvedPath) {
|
|
160
|
-
fs.writeFileSync(resolvedPath, buffer);
|
|
161
|
-
}
|
|
162
|
-
return {
|
|
163
|
-
schema_version: 1,
|
|
164
|
-
source: "image",
|
|
165
|
-
captured_at: nowIso(),
|
|
166
|
-
node_id: nodeId,
|
|
167
|
-
format,
|
|
168
|
-
mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
|
|
169
|
-
byte_length: buffer.length,
|
|
170
|
-
file_path: resolvedPath,
|
|
171
|
-
clip,
|
|
172
|
-
node_rect: box.rect,
|
|
173
|
-
metadata
|
|
174
|
-
};
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
export async function captureViewportScreenshot(client, {
|
|
178
|
-
filePath,
|
|
179
|
-
format = "png",
|
|
180
|
-
quality,
|
|
181
|
-
captureBeyondViewport = false,
|
|
182
|
-
fromSurface = true,
|
|
183
|
-
metadata = {}
|
|
184
|
-
} = {}) {
|
|
185
|
-
const captureOptions = {
|
|
186
|
-
format,
|
|
187
|
-
fromSurface,
|
|
188
|
-
captureBeyondViewport
|
|
189
|
-
};
|
|
190
|
-
if (quality != null) {
|
|
191
|
-
captureOptions.quality = quality;
|
|
192
|
-
}
|
|
193
|
-
const screenshot = await client.Page.captureScreenshot(captureOptions);
|
|
194
|
-
const buffer = Buffer.from(screenshot.data || "", "base64");
|
|
195
|
-
const resolvedPath = resolveOutputPath(filePath);
|
|
196
|
-
if (resolvedPath) {
|
|
197
|
-
fs.writeFileSync(resolvedPath, buffer);
|
|
198
|
-
}
|
|
199
|
-
return {
|
|
200
|
-
schema_version: 1,
|
|
201
|
-
source: "viewport-image",
|
|
202
|
-
captured_at: nowIso(),
|
|
203
|
-
format,
|
|
204
|
-
mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
|
|
205
|
-
byte_length: buffer.length,
|
|
206
|
-
file_path: resolvedPath,
|
|
207
|
-
capture_beyond_viewport: Boolean(captureBeyondViewport),
|
|
208
|
-
metadata
|
|
209
|
-
};
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
function filePathForSequence(basePath, index, extension) {
|
|
213
|
-
const resolved = resolveOutputPath(basePath);
|
|
214
|
-
if (!resolved) return null;
|
|
215
|
-
const parsed = path.parse(resolved);
|
|
216
|
-
const page = String(index + 1).padStart(2, "0");
|
|
217
|
-
return path.join(parsed.dir, `${parsed.name}-page-${page}${parsed.ext || `.${extension}`}`);
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
function filePathForLlmSequence(basePath, index) {
|
|
221
|
-
const resolved = resolveOutputPath(basePath);
|
|
222
|
-
if (!resolved) return null;
|
|
223
|
-
const parsed = path.parse(resolved);
|
|
224
|
-
const page = String(index + 1).padStart(2, "0");
|
|
225
|
-
return path.join(parsed.dir, `${parsed.name}-llm-${page}.jpg`);
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
function screenshotHash(buffer) {
|
|
229
|
-
return crypto.createHash("sha256").update(buffer).digest("hex");
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
function createCaptureTimeoutError(label, timeoutMs) {
|
|
233
|
-
const error = new Error(`Image fallback capture timed out during ${label} after ${timeoutMs}ms`);
|
|
234
|
-
error.code = "IMAGE_CAPTURE_TIMEOUT";
|
|
235
|
-
error.capture_step = label;
|
|
236
|
-
error.timeout_ms = timeoutMs;
|
|
237
|
-
return error;
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
async function withCaptureTimeout(promise, {
|
|
241
|
-
label = "capture_step",
|
|
242
|
-
timeoutMs = 0
|
|
243
|
-
} = {}) {
|
|
244
|
-
const safeTimeout = Math.max(0, Number(timeoutMs) || 0);
|
|
245
|
-
if (!safeTimeout) return promise;
|
|
246
|
-
let timer = null;
|
|
247
|
-
try {
|
|
248
|
-
return await Promise.race([
|
|
249
|
-
promise,
|
|
250
|
-
new Promise((_, reject) => {
|
|
251
|
-
timer = setTimeout(() => reject(createCaptureTimeoutError(label, safeTimeout)), safeTimeout);
|
|
252
|
-
})
|
|
253
|
-
]);
|
|
254
|
-
} finally {
|
|
255
|
-
if (timer) clearTimeout(timer);
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
function assertCaptureTotalBudget(started, totalTimeoutMs, label) {
|
|
260
|
-
const safeTimeout = Math.max(0, Number(totalTimeoutMs) || 0);
|
|
261
|
-
if (!safeTimeout) return;
|
|
262
|
-
const elapsed = Date.now() - started;
|
|
263
|
-
if (elapsed <= safeTimeout) return;
|
|
264
|
-
const error = createCaptureTimeoutError(label, safeTimeout);
|
|
265
|
-
error.elapsed_ms = elapsed;
|
|
266
|
-
error.code = "IMAGE_CAPTURE_TOTAL_TIMEOUT";
|
|
267
|
-
throw error;
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
const DEFAULT_SCROLL_ANCHOR_SELECTOR = [
|
|
271
|
-
"h1",
|
|
272
|
-
"h2",
|
|
273
|
-
"h3",
|
|
274
|
-
"h4",
|
|
275
|
-
"h5",
|
|
276
|
-
"p",
|
|
277
|
-
"li",
|
|
278
|
-
"section",
|
|
279
|
-
"article",
|
|
280
|
-
"table",
|
|
281
|
-
"tr",
|
|
282
|
-
"dl",
|
|
283
|
-
"dt",
|
|
284
|
-
"dd",
|
|
285
|
-
"[class*='resume']",
|
|
286
|
-
"[class*='work']",
|
|
287
|
-
"[class*='project']",
|
|
288
|
-
"[class*='education']",
|
|
289
|
-
"[class*='experience']",
|
|
290
|
-
"[class*='item']",
|
|
291
|
-
"div"
|
|
292
|
-
].join(",");
|
|
293
|
-
|
|
294
|
-
function normalizeScrollMethod(value = "dom-anchor-fallback-input") {
|
|
295
|
-
const normalized = normalizeText(value).toLowerCase();
|
|
296
|
-
if (["dom", "dom-anchor", "dom_anchor", "anchor"].includes(normalized)) return "dom-anchor";
|
|
297
|
-
if (["dom-anchor-fallback-input", "dom_anchor_fallback_input", "dom-fallback-input"].includes(normalized)) {
|
|
298
|
-
return "dom-anchor-fallback-input";
|
|
299
|
-
}
|
|
300
|
-
return "input";
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
function uniqueNumbers(values = []) {
|
|
304
|
-
return Array.from(new Set(values.map((value) => Number(value) || 0).filter(Boolean)));
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
function pickEvenly(items = [], limit = 1) {
|
|
308
|
-
const safeLimit = Math.max(1, Number(limit) || 1);
|
|
309
|
-
if (items.length <= safeLimit) return items;
|
|
310
|
-
const picked = [];
|
|
311
|
-
const last = items.length - 1;
|
|
312
|
-
for (let index = 0; index < safeLimit; index += 1) {
|
|
313
|
-
const sourceIndex = Math.round((index * last) / Math.max(1, safeLimit - 1));
|
|
314
|
-
picked.push(items[sourceIndex]);
|
|
315
|
-
}
|
|
316
|
-
return Array.from(new Map(picked.map((item) => [item.node_id, item])).values());
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
function patternLabel(pattern) {
|
|
320
|
-
if (pattern instanceof RegExp) return pattern.source;
|
|
321
|
-
return normalizeText(pattern);
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
function stopBoundaryPatterns(patterns = []) {
|
|
325
|
-
return (Array.isArray(patterns) ? patterns : [patterns])
|
|
326
|
-
.filter(Boolean)
|
|
327
|
-
.map((pattern) => {
|
|
328
|
-
if (pattern instanceof RegExp) {
|
|
329
|
-
return {
|
|
330
|
-
raw: pattern,
|
|
331
|
-
label: pattern.source,
|
|
332
|
-
matches: (text) => pattern.test(text)
|
|
333
|
-
};
|
|
334
|
-
}
|
|
335
|
-
const normalized = normalizeText(pattern);
|
|
336
|
-
return {
|
|
337
|
-
raw: pattern,
|
|
338
|
-
label: normalized,
|
|
339
|
-
matches: (text) => normalized && text.includes(normalized)
|
|
340
|
-
};
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
async function collectStopBoundaryNodes(client, rootNodeId, {
|
|
345
|
-
selector = "",
|
|
346
|
-
textPatterns = [],
|
|
347
|
-
maxProbeNodes = 180,
|
|
348
|
-
maxTextLength = 700,
|
|
349
|
-
stepTimeoutMs = 45000
|
|
350
|
-
} = {}) {
|
|
351
|
-
const patterns = stopBoundaryPatterns(textPatterns);
|
|
352
|
-
const normalizedSelector = normalizeText(selector);
|
|
353
|
-
if (!normalizedSelector && !patterns.length) {
|
|
354
|
-
return {
|
|
355
|
-
enabled: false,
|
|
356
|
-
ok: false,
|
|
357
|
-
reason: "not_configured",
|
|
358
|
-
nodes: []
|
|
359
|
-
};
|
|
360
|
-
}
|
|
361
|
-
const started = Date.now();
|
|
362
|
-
let nodeIds = [];
|
|
363
|
-
try {
|
|
364
|
-
nodeIds = uniqueNumbers(await querySelectorAll(
|
|
365
|
-
client,
|
|
366
|
-
rootNodeId,
|
|
367
|
-
normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR
|
|
368
|
-
));
|
|
369
|
-
} catch (error) {
|
|
370
|
-
return {
|
|
371
|
-
enabled: true,
|
|
372
|
-
ok: false,
|
|
373
|
-
reason: "query_selector_all_failed",
|
|
374
|
-
selector: normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR,
|
|
375
|
-
error: error?.message || String(error),
|
|
376
|
-
nodes: []
|
|
377
|
-
};
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
const probeLimit = Math.max(1, Number(maxProbeNodes) || 180);
|
|
381
|
-
const maxStopTextLength = Math.max(40, Number(maxTextLength) || 700);
|
|
382
|
-
const perNodeTimeoutMs = Math.min(1000, Math.max(200, Math.floor((Number(stepTimeoutMs) || 45000) / 40)));
|
|
383
|
-
const nodes = [];
|
|
384
|
-
for (const nodeId of nodeIds.slice(0, probeLimit)) {
|
|
385
|
-
try {
|
|
386
|
-
let text = "";
|
|
387
|
-
let matchedPattern = null;
|
|
388
|
-
if (patterns.length) {
|
|
389
|
-
const outerHTML = await withCaptureTimeout(getOuterHTML(client, nodeId), {
|
|
390
|
-
label: `stop_boundary_html_${nodeId}`,
|
|
391
|
-
timeoutMs: perNodeTimeoutMs
|
|
392
|
-
});
|
|
393
|
-
text = normalizeText(htmlToText(outerHTML));
|
|
394
|
-
if (!text || text.length > maxStopTextLength) continue;
|
|
395
|
-
matchedPattern = patterns.find((pattern) => pattern.matches(text));
|
|
396
|
-
if (!matchedPattern) continue;
|
|
397
|
-
}
|
|
398
|
-
nodes.push({
|
|
399
|
-
node_id: nodeId,
|
|
400
|
-
text_preview: text.slice(0, 120),
|
|
401
|
-
matched_pattern: matchedPattern ? patternLabel(matchedPattern.raw) : null
|
|
402
|
-
});
|
|
403
|
-
} catch {}
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
return {
|
|
407
|
-
enabled: true,
|
|
408
|
-
ok: nodes.length > 0,
|
|
409
|
-
reason: nodes.length ? null : "no_matching_stop_boundary_nodes",
|
|
410
|
-
selector: normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR,
|
|
411
|
-
elapsed_ms: Date.now() - started,
|
|
412
|
-
discovered_node_count: nodeIds.length,
|
|
413
|
-
probed_node_count: Math.min(nodeIds.length, probeLimit),
|
|
414
|
-
match_count: nodes.length,
|
|
415
|
-
pattern_labels: patterns.map((pattern) => pattern.label),
|
|
416
|
-
nodes
|
|
417
|
-
};
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
async function resolveVisibleStopBoundary(client, stopBoundaryPlan, clip, {
|
|
421
|
-
topPadding = 8,
|
|
422
|
-
minCaptureHeight = 180,
|
|
423
|
-
stepTimeoutMs = 45000
|
|
424
|
-
} = {}) {
|
|
425
|
-
if (!stopBoundaryPlan?.nodes?.length || !clip) return null;
|
|
426
|
-
const clipTop = Number(clip.y) || 0;
|
|
427
|
-
const clipBottom = clipTop + (Number(clip.height) || 0);
|
|
428
|
-
const safePadding = Math.max(0, Number(topPadding) || 0);
|
|
429
|
-
const safeMinHeight = Math.max(1, Number(minCaptureHeight) || 180);
|
|
430
|
-
const perNodeTimeoutMs = Math.min(900, Math.max(180, Math.floor((Number(stepTimeoutMs) || 45000) / 50)));
|
|
431
|
-
const visible = [];
|
|
432
|
-
|
|
433
|
-
for (const node of stopBoundaryPlan.nodes) {
|
|
434
|
-
try {
|
|
435
|
-
const box = await withCaptureTimeout(getNodeBox(client, node.node_id), {
|
|
436
|
-
label: `stop_boundary_box_${node.node_id}`,
|
|
437
|
-
timeoutMs: perNodeTimeoutMs
|
|
438
|
-
});
|
|
439
|
-
const rect = box?.rect || {};
|
|
440
|
-
const width = Number(rect.width) || 0;
|
|
441
|
-
const height = Number(rect.height) || 0;
|
|
442
|
-
if (width < 40 || height < 6) continue;
|
|
443
|
-
const top = Number(rect.y) || 0;
|
|
444
|
-
const bottom = top + height;
|
|
445
|
-
if (bottom <= clipTop + 1) {
|
|
446
|
-
return {
|
|
447
|
-
action: "stop_before_capture",
|
|
448
|
-
reason: "stop_boundary_above_clip",
|
|
449
|
-
node_id: node.node_id,
|
|
450
|
-
matched_pattern: node.matched_pattern,
|
|
451
|
-
text_preview: node.text_preview,
|
|
452
|
-
rect,
|
|
453
|
-
clip
|
|
454
|
-
};
|
|
455
|
-
}
|
|
456
|
-
if (top < clipBottom && bottom > clipTop) {
|
|
457
|
-
visible.push({
|
|
458
|
-
...node,
|
|
459
|
-
rect,
|
|
460
|
-
top,
|
|
461
|
-
bottom
|
|
462
|
-
});
|
|
463
|
-
}
|
|
464
|
-
} catch {}
|
|
465
|
-
}
|
|
466
|
-
if (!visible.length) return null;
|
|
467
|
-
|
|
468
|
-
visible.sort((a, b) => a.top - b.top);
|
|
469
|
-
const boundary = visible[0];
|
|
470
|
-
const boundaryY = Math.max(clipTop, boundary.top - safePadding);
|
|
471
|
-
const adjustedHeight = Math.max(0, boundaryY - clipTop);
|
|
472
|
-
if (adjustedHeight < safeMinHeight) {
|
|
473
|
-
return {
|
|
474
|
-
action: "stop_before_capture",
|
|
475
|
-
reason: "stop_boundary_near_clip_top",
|
|
476
|
-
node_id: boundary.node_id,
|
|
477
|
-
matched_pattern: boundary.matched_pattern,
|
|
478
|
-
text_preview: boundary.text_preview,
|
|
479
|
-
rect: boundary.rect,
|
|
480
|
-
clip,
|
|
481
|
-
adjusted_height: adjustedHeight,
|
|
482
|
-
min_capture_height: safeMinHeight
|
|
483
|
-
};
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
return {
|
|
487
|
-
action: "capture_then_stop",
|
|
488
|
-
reason: "stop_boundary_visible",
|
|
489
|
-
node_id: boundary.node_id,
|
|
490
|
-
matched_pattern: boundary.matched_pattern,
|
|
491
|
-
text_preview: boundary.text_preview,
|
|
492
|
-
rect: boundary.rect,
|
|
493
|
-
clip,
|
|
494
|
-
adjusted_clip: {
|
|
495
|
-
...clip,
|
|
496
|
-
height: adjustedHeight
|
|
497
|
-
},
|
|
498
|
-
adjusted_height: adjustedHeight,
|
|
499
|
-
min_capture_height: safeMinHeight
|
|
500
|
-
};
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
async function collectDomScrollAnchors(client, rootNodeId, {
|
|
504
|
-
selector = DEFAULT_SCROLL_ANCHOR_SELECTOR,
|
|
505
|
-
maxScreenshots = 6,
|
|
506
|
-
maxProbeNodes = 260,
|
|
507
|
-
minAnchorGap = 180,
|
|
508
|
-
stepTimeoutMs = 45000
|
|
509
|
-
} = {}) {
|
|
510
|
-
const started = Date.now();
|
|
511
|
-
let nodeIds = [];
|
|
512
|
-
try {
|
|
513
|
-
nodeIds = uniqueNumbers(await querySelectorAll(client, rootNodeId, selector));
|
|
514
|
-
} catch (error) {
|
|
515
|
-
return {
|
|
516
|
-
ok: false,
|
|
517
|
-
method: "dom-anchor",
|
|
518
|
-
reason: "query_selector_all_failed",
|
|
519
|
-
error: error?.message || String(error)
|
|
520
|
-
};
|
|
521
|
-
}
|
|
522
|
-
if (!nodeIds.length) {
|
|
523
|
-
return {
|
|
524
|
-
ok: false,
|
|
525
|
-
method: "dom-anchor",
|
|
526
|
-
reason: "no_anchor_nodes"
|
|
527
|
-
};
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
const probeLimit = Math.max(1, Number(maxProbeNodes) || 260);
|
|
531
|
-
const perNodeTimeoutMs = Math.min(1200, Math.max(250, Math.floor((Number(stepTimeoutMs) || 45000) / 30)));
|
|
532
|
-
const measured = [];
|
|
533
|
-
for (const nodeId of nodeIds.slice(0, probeLimit)) {
|
|
534
|
-
try {
|
|
535
|
-
const box = await withCaptureTimeout(getNodeBox(client, nodeId), {
|
|
536
|
-
label: `anchor_box_${nodeId}`,
|
|
537
|
-
timeoutMs: perNodeTimeoutMs
|
|
538
|
-
});
|
|
539
|
-
const rect = box?.rect || {};
|
|
540
|
-
if ((Number(rect.width) || 0) < 80 || (Number(rect.height) || 0) < 8) continue;
|
|
541
|
-
measured.push({
|
|
542
|
-
node_id: nodeId,
|
|
543
|
-
y: Math.round(Number(rect.y) || 0),
|
|
544
|
-
height: Math.round(Number(rect.height) || 0)
|
|
545
|
-
});
|
|
546
|
-
} catch {}
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
let anchors = [];
|
|
550
|
-
if (measured.length) {
|
|
551
|
-
const sorted = measured.sort((a, b) => a.y - b.y);
|
|
552
|
-
for (const item of sorted) {
|
|
553
|
-
const last = anchors[anchors.length - 1];
|
|
554
|
-
if (!last || Math.abs(item.y - last.y) >= Math.max(40, Number(minAnchorGap) || 180)) {
|
|
555
|
-
anchors.push(item);
|
|
556
|
-
}
|
|
557
|
-
}
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
if (anchors.length < 2) {
|
|
561
|
-
anchors = nodeIds.slice(0, probeLimit).map((nodeId, index) => ({
|
|
562
|
-
node_id: nodeId,
|
|
563
|
-
y: null,
|
|
564
|
-
height: null,
|
|
565
|
-
document_order: index
|
|
566
|
-
}));
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
anchors = pickEvenly(anchors, Math.max(1, Number(maxScreenshots) || 1));
|
|
570
|
-
return {
|
|
571
|
-
ok: anchors.length > 0,
|
|
572
|
-
method: "dom-anchor",
|
|
573
|
-
elapsed_ms: Date.now() - started,
|
|
574
|
-
selector,
|
|
575
|
-
discovered_node_count: nodeIds.length,
|
|
576
|
-
measured_node_count: measured.length,
|
|
577
|
-
anchor_count: anchors.length,
|
|
578
|
-
anchors
|
|
579
|
-
};
|
|
580
|
-
}
|
|
581
|
-
|
|
582
|
-
async function scrollDomAnchorIntoView(client, nodeId, {
|
|
583
|
-
timeoutMs = 10000,
|
|
584
|
-
label = "dom_scroll_anchor"
|
|
585
|
-
} = {}) {
|
|
586
|
-
if (client.DOM && typeof client.DOM.scrollIntoViewIfNeeded === "function") {
|
|
587
|
-
return withCaptureTimeout(client.DOM.scrollIntoViewIfNeeded({ nodeId }), { label, timeoutMs });
|
|
588
|
-
}
|
|
589
|
-
if (typeof client.send === "function") {
|
|
590
|
-
return withCaptureTimeout(client.send("DOM.scrollIntoViewIfNeeded", { nodeId }), { label, timeoutMs });
|
|
591
|
-
}
|
|
592
|
-
throw new Error("CDP client does not expose DOM.scrollIntoViewIfNeeded");
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
async function optimizeScreenshotBuffer(buffer, {
|
|
596
|
-
enabled = false,
|
|
597
|
-
format = "png",
|
|
598
|
-
quality,
|
|
599
|
-
resizeMaxWidth = 0
|
|
600
|
-
} = {}) {
|
|
601
|
-
if (!enabled && !resizeMaxWidth) {
|
|
602
|
-
return {
|
|
603
|
-
buffer,
|
|
604
|
-
optimized: false,
|
|
605
|
-
optimization_error: null
|
|
606
|
-
};
|
|
607
|
-
}
|
|
608
|
-
try {
|
|
609
|
-
const normalizedFormat = format === "jpg" ? "jpeg" : format;
|
|
610
|
-
let pipeline = sharp(buffer, { failOn: "none" });
|
|
611
|
-
const metadata = await pipeline.metadata();
|
|
612
|
-
const width = Number(metadata.width) || 0;
|
|
613
|
-
const safeMaxWidth = Math.max(0, Number(resizeMaxWidth) || 0);
|
|
614
|
-
if (safeMaxWidth > 0 && width > safeMaxWidth) {
|
|
615
|
-
pipeline = pipeline.resize({
|
|
616
|
-
width: safeMaxWidth,
|
|
617
|
-
withoutEnlargement: true
|
|
618
|
-
});
|
|
619
|
-
}
|
|
620
|
-
if (normalizedFormat === "jpeg") {
|
|
621
|
-
pipeline = pipeline.jpeg({
|
|
622
|
-
quality: quality == null ? 72 : Math.max(35, Math.min(95, Number(quality) || 72)),
|
|
623
|
-
mozjpeg: true
|
|
624
|
-
});
|
|
625
|
-
} else if (normalizedFormat === "webp") {
|
|
626
|
-
pipeline = pipeline.webp({
|
|
627
|
-
quality: quality == null ? 76 : Math.max(35, Math.min(95, Number(quality) || 76))
|
|
628
|
-
});
|
|
629
|
-
} else {
|
|
630
|
-
pipeline = pipeline.png({
|
|
631
|
-
compressionLevel: 9,
|
|
632
|
-
adaptiveFiltering: true
|
|
633
|
-
});
|
|
634
|
-
}
|
|
635
|
-
const optimizedBuffer = await pipeline.toBuffer();
|
|
636
|
-
return {
|
|
637
|
-
buffer: optimizedBuffer,
|
|
638
|
-
optimized: true,
|
|
639
|
-
original_byte_length: buffer.length,
|
|
640
|
-
optimization_error: null
|
|
641
|
-
};
|
|
642
|
-
} catch (error) {
|
|
643
|
-
return {
|
|
644
|
-
buffer,
|
|
645
|
-
optimized: false,
|
|
646
|
-
original_byte_length: buffer.length,
|
|
647
|
-
optimization_error: error?.message || String(error)
|
|
648
|
-
};
|
|
649
|
-
}
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
async function composeScreenshotsForLlm(screenshots = [], {
|
|
653
|
-
basePath,
|
|
654
|
-
pagesPerImage = 3,
|
|
655
|
-
resizeMaxWidth = 1100,
|
|
656
|
-
quality = 72
|
|
657
|
-
} = {}) {
|
|
658
|
-
const fileScreenshots = screenshots.filter((item) => item?.file_path);
|
|
659
|
-
if (!basePath || fileScreenshots.length <= 1) {
|
|
660
|
-
return {
|
|
661
|
-
llm_file_paths: fileScreenshots.map((item) => item.file_path),
|
|
662
|
-
llm_screenshots: [],
|
|
663
|
-
llm_total_byte_length: 0,
|
|
664
|
-
llm_original_total_byte_length: 0,
|
|
665
|
-
llm_composition_error: null
|
|
666
|
-
};
|
|
667
|
-
}
|
|
668
|
-
|
|
669
|
-
const safePagesPerImage = Math.max(1, Math.min(5, Number(pagesPerImage) || 3));
|
|
670
|
-
const safeWidth = Math.max(700, Math.min(1400, Number(resizeMaxWidth) || 1100));
|
|
671
|
-
const safeQuality = Math.max(45, Math.min(90, Number(quality) || 72));
|
|
672
|
-
const llmScreenshots = [];
|
|
673
|
-
|
|
674
|
-
try {
|
|
675
|
-
for (let index = 0; index < fileScreenshots.length; index += safePagesPerImage) {
|
|
676
|
-
const group = fileScreenshots.slice(index, index + safePagesPerImage);
|
|
677
|
-
const prepared = [];
|
|
678
|
-
for (const item of group) {
|
|
679
|
-
const sourceBuffer = fs.readFileSync(item.file_path);
|
|
680
|
-
const { data, info } = await sharp(sourceBuffer, { failOn: "none" })
|
|
681
|
-
.resize({
|
|
682
|
-
width: safeWidth,
|
|
683
|
-
withoutEnlargement: true
|
|
684
|
-
})
|
|
685
|
-
.jpeg({
|
|
686
|
-
quality: safeQuality,
|
|
687
|
-
mozjpeg: true
|
|
688
|
-
})
|
|
689
|
-
.toBuffer({ resolveWithObject: true });
|
|
690
|
-
prepared.push({
|
|
691
|
-
input: data,
|
|
692
|
-
width: info.width,
|
|
693
|
-
height: info.height,
|
|
694
|
-
source_file_path: item.file_path
|
|
695
|
-
});
|
|
696
|
-
}
|
|
697
|
-
|
|
698
|
-
const width = Math.max(...prepared.map((item) => item.width), 1);
|
|
699
|
-
const height = prepared.reduce((sum, item) => sum + item.height, 0);
|
|
700
|
-
let top = 0;
|
|
701
|
-
const composites = prepared.map((item) => {
|
|
702
|
-
const layer = {
|
|
703
|
-
input: item.input,
|
|
704
|
-
left: 0,
|
|
705
|
-
top
|
|
706
|
-
};
|
|
707
|
-
top += item.height;
|
|
708
|
-
return layer;
|
|
709
|
-
});
|
|
710
|
-
const outputBuffer = await sharp({
|
|
711
|
-
create: {
|
|
712
|
-
width,
|
|
713
|
-
height,
|
|
714
|
-
channels: 3,
|
|
715
|
-
background: "#ffffff"
|
|
716
|
-
}
|
|
717
|
-
})
|
|
718
|
-
.composite(composites)
|
|
719
|
-
.jpeg({
|
|
720
|
-
quality: safeQuality,
|
|
721
|
-
mozjpeg: true
|
|
722
|
-
})
|
|
723
|
-
.toBuffer();
|
|
724
|
-
const outputPath = filePathForLlmSequence(basePath, llmScreenshots.length);
|
|
725
|
-
fs.writeFileSync(outputPath, outputBuffer);
|
|
726
|
-
llmScreenshots.push({
|
|
727
|
-
index: llmScreenshots.length,
|
|
728
|
-
file_path: outputPath,
|
|
729
|
-
byte_length: outputBuffer.length,
|
|
730
|
-
source_file_paths: prepared.map((item) => item.source_file_path),
|
|
731
|
-
source_page_count: prepared.length,
|
|
732
|
-
width,
|
|
733
|
-
height,
|
|
734
|
-
format: "jpeg",
|
|
735
|
-
mime_type: "image/jpeg"
|
|
736
|
-
});
|
|
737
|
-
}
|
|
738
|
-
} catch (error) {
|
|
739
|
-
return {
|
|
740
|
-
llm_file_paths: fileScreenshots.map((item) => item.file_path),
|
|
741
|
-
llm_screenshots: [],
|
|
742
|
-
llm_total_byte_length: 0,
|
|
743
|
-
llm_original_total_byte_length: fileScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
|
|
744
|
-
llm_composition_error: error?.message || String(error)
|
|
745
|
-
};
|
|
746
|
-
}
|
|
747
|
-
|
|
748
|
-
return {
|
|
749
|
-
llm_file_paths: llmScreenshots.map((item) => item.file_path),
|
|
750
|
-
llm_screenshots: llmScreenshots,
|
|
751
|
-
llm_total_byte_length: llmScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
|
|
752
|
-
llm_original_total_byte_length: fileScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
|
|
753
|
-
llm_composition_error: null
|
|
754
|
-
};
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
export async function captureScrolledNodeScreenshots(client, nodeId, {
|
|
758
|
-
filePath,
|
|
759
|
-
format = "png",
|
|
760
|
-
quality,
|
|
761
|
-
padding = 0,
|
|
762
|
-
captureBeyondViewport = true,
|
|
763
|
-
fromSurface = true,
|
|
764
|
-
captureViewport = false,
|
|
765
|
-
maxScreenshots = 6,
|
|
766
|
-
wheelDeltaY = 650,
|
|
767
|
-
settleMs = 900,
|
|
768
|
-
duplicateStopCount = 2,
|
|
769
|
-
skipDuplicateScreenshots = false,
|
|
770
|
-
optimize = false,
|
|
771
|
-
resizeMaxWidth = 0,
|
|
772
|
-
composeForLlm = false,
|
|
773
|
-
llmPagesPerImage = 3,
|
|
774
|
-
llmResizeMaxWidth = 1100,
|
|
775
|
-
llmQuality = 72,
|
|
776
|
-
stepTimeoutMs = 45000,
|
|
777
|
-
totalTimeoutMs = 90000,
|
|
778
|
-
scrollMethod = "dom-anchor-fallback-input",
|
|
779
|
-
scrollAnchorSelector = DEFAULT_SCROLL_ANCHOR_SELECTOR,
|
|
780
|
-
scrollAnchorMaxProbeNodes = 260,
|
|
781
|
-
scrollAnchorMinGap = 180,
|
|
782
|
-
scrollDeltaJitterEnabled = false,
|
|
783
|
-
scrollDeltaJitterMinRatio = 0.65,
|
|
784
|
-
scrollDeltaJitterMaxRatio = 0.9,
|
|
785
|
-
scrollDeltaJitterMinOverlapRatio = 0.2,
|
|
786
|
-
scrollDeltaJitterPreserveCoverage = true,
|
|
787
|
-
scrollDeltaJitterRandom = Math.random,
|
|
788
|
-
stopBoundarySelector = "",
|
|
789
|
-
stopBoundaryTextPatterns = [],
|
|
790
|
-
stopBoundaryMaxProbeNodes = 180,
|
|
791
|
-
stopBoundaryMaxTextLength = 700,
|
|
792
|
-
stopBoundaryTopPadding = 8,
|
|
793
|
-
stopBoundaryMinCaptureHeight = 180,
|
|
794
|
-
metadata = {}
|
|
795
|
-
} = {}) {
|
|
796
|
-
if (!nodeId) throw new Error("captureScrolledNodeScreenshots requires nodeId");
|
|
797
|
-
const sequenceStarted = Date.now();
|
|
798
|
-
const normalizedScrollMethod = normalizeScrollMethod(scrollMethod);
|
|
799
|
-
const maxScreenshotCount = Math.max(1, Number(maxScreenshots) || 1);
|
|
800
|
-
const scrollDeltaJitter = normalizeScrollDeltaJitter({
|
|
801
|
-
enabled: scrollDeltaJitterEnabled,
|
|
802
|
-
minRatio: scrollDeltaJitterMinRatio,
|
|
803
|
-
maxRatio: scrollDeltaJitterMaxRatio,
|
|
804
|
-
minOverlapRatio: scrollDeltaJitterMinOverlapRatio,
|
|
805
|
-
preserveCoverage: scrollDeltaJitterPreserveCoverage,
|
|
806
|
-
random: scrollDeltaJitterRandom
|
|
807
|
-
});
|
|
808
|
-
const maxCaptureIterations = scrollDeltaJitter.enabled && scrollDeltaJitter.preserve_coverage
|
|
809
|
-
? Math.max(maxScreenshotCount, Math.ceil(maxScreenshotCount / scrollDeltaJitter.min_ratio))
|
|
810
|
-
: maxScreenshotCount;
|
|
811
|
-
const anchorPlan = normalizedScrollMethod !== "input"
|
|
812
|
-
? await collectDomScrollAnchors(client, nodeId, {
|
|
813
|
-
selector: scrollAnchorSelector,
|
|
814
|
-
maxScreenshots: maxCaptureIterations,
|
|
815
|
-
maxProbeNodes: scrollAnchorMaxProbeNodes,
|
|
816
|
-
minAnchorGap: scrollAnchorMinGap,
|
|
817
|
-
stepTimeoutMs
|
|
818
|
-
})
|
|
819
|
-
: null;
|
|
820
|
-
const stopBoundaryEnabled = Boolean(
|
|
821
|
-
normalizeText(stopBoundarySelector)
|
|
822
|
-
|| (Array.isArray(stopBoundaryTextPatterns)
|
|
823
|
-
? stopBoundaryTextPatterns.length
|
|
824
|
-
: stopBoundaryTextPatterns)
|
|
825
|
-
);
|
|
826
|
-
let stopBoundaryPlan = {
|
|
827
|
-
enabled: false,
|
|
828
|
-
ok: false,
|
|
829
|
-
reason: "not_configured",
|
|
830
|
-
nodes: []
|
|
831
|
-
};
|
|
832
|
-
const stopBoundaryChecks = [];
|
|
833
|
-
const screenshots = [];
|
|
834
|
-
let consecutiveDuplicates = 0;
|
|
835
|
-
let previousHash = "";
|
|
836
|
-
let captureCount = 0;
|
|
837
|
-
let droppedDuplicateCount = 0;
|
|
838
|
-
let forceInputScrollAfterDuplicate = false;
|
|
839
|
-
let stopBoundaryResult = null;
|
|
840
|
-
let currentScrollMetadata = {
|
|
841
|
-
before_capture: "initial",
|
|
842
|
-
method: normalizedScrollMethod,
|
|
843
|
-
anchor_plan: anchorPlan
|
|
844
|
-
? {
|
|
845
|
-
ok: Boolean(anchorPlan.ok),
|
|
846
|
-
reason: anchorPlan.reason || null,
|
|
847
|
-
discovered_node_count: anchorPlan.discovered_node_count || 0,
|
|
848
|
-
measured_node_count: anchorPlan.measured_node_count || 0,
|
|
849
|
-
anchor_count: anchorPlan.anchor_count || 0,
|
|
850
|
-
elapsed_ms: anchorPlan.elapsed_ms || 0
|
|
851
|
-
}
|
|
852
|
-
: null
|
|
853
|
-
};
|
|
854
|
-
|
|
855
|
-
if (anchorPlan?.anchors?.[0]?.node_id && normalizedScrollMethod !== "input") {
|
|
856
|
-
try {
|
|
857
|
-
await scrollDomAnchorIntoView(client, anchorPlan.anchors[0].node_id, {
|
|
858
|
-
label: "scroll_dom_anchor_initial",
|
|
859
|
-
timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
|
|
860
|
-
});
|
|
861
|
-
currentScrollMetadata = {
|
|
862
|
-
before_capture: "dom_anchor_initial",
|
|
863
|
-
method: "DOM.scrollIntoViewIfNeeded",
|
|
864
|
-
anchor_node_id: anchorPlan.anchors[0].node_id,
|
|
865
|
-
anchor_y: anchorPlan.anchors[0].y,
|
|
866
|
-
anchor_height: anchorPlan.anchors[0].height,
|
|
867
|
-
anchor_plan: currentScrollMetadata.anchor_plan
|
|
868
|
-
};
|
|
869
|
-
} catch (error) {
|
|
870
|
-
if (normalizedScrollMethod === "dom-anchor") {
|
|
871
|
-
throw error;
|
|
872
|
-
}
|
|
873
|
-
currentScrollMetadata = {
|
|
874
|
-
before_capture: "dom_anchor_initial_failed",
|
|
875
|
-
method: "DOM.scrollIntoViewIfNeeded",
|
|
876
|
-
anchor_node_id: anchorPlan.anchors[0].node_id,
|
|
877
|
-
error: error?.message || String(error),
|
|
878
|
-
anchor_plan: currentScrollMetadata.anchor_plan
|
|
879
|
-
};
|
|
880
|
-
}
|
|
881
|
-
}
|
|
882
|
-
|
|
883
|
-
for (let index = 0; index < maxCaptureIterations; index += 1) {
|
|
884
|
-
assertCaptureTotalBudget(sequenceStarted, totalTimeoutMs, `capture_page_${index + 1}`);
|
|
885
|
-
captureCount += 1;
|
|
886
|
-
const captureStarted = Date.now();
|
|
887
|
-
const box = await withCaptureTimeout(getNodeBox(client, nodeId), {
|
|
888
|
-
label: `get_box_${index + 1}`,
|
|
889
|
-
timeoutMs: stepTimeoutMs
|
|
890
|
-
});
|
|
891
|
-
const clip = withPadding(box.rect, padding);
|
|
892
|
-
let visibleStopBoundary = null;
|
|
893
|
-
if (stopBoundaryEnabled) {
|
|
894
|
-
stopBoundaryPlan = await collectStopBoundaryNodes(client, nodeId, {
|
|
895
|
-
selector: stopBoundarySelector,
|
|
896
|
-
textPatterns: stopBoundaryTextPatterns,
|
|
897
|
-
maxProbeNodes: stopBoundaryMaxProbeNodes,
|
|
898
|
-
maxTextLength: stopBoundaryMaxTextLength,
|
|
899
|
-
stepTimeoutMs
|
|
900
|
-
});
|
|
901
|
-
stopBoundaryChecks.push({
|
|
902
|
-
capture_index: index,
|
|
903
|
-
ok: Boolean(stopBoundaryPlan.ok),
|
|
904
|
-
reason: stopBoundaryPlan.reason || null,
|
|
905
|
-
discovered_node_count: stopBoundaryPlan.discovered_node_count || 0,
|
|
906
|
-
probed_node_count: stopBoundaryPlan.probed_node_count || 0,
|
|
907
|
-
match_count: stopBoundaryPlan.match_count || 0,
|
|
908
|
-
elapsed_ms: stopBoundaryPlan.elapsed_ms || 0
|
|
909
|
-
});
|
|
910
|
-
visibleStopBoundary = await resolveVisibleStopBoundary(client, stopBoundaryPlan, clip, {
|
|
911
|
-
topPadding: stopBoundaryTopPadding,
|
|
912
|
-
minCaptureHeight: stopBoundaryMinCaptureHeight,
|
|
913
|
-
stepTimeoutMs
|
|
914
|
-
});
|
|
915
|
-
}
|
|
916
|
-
if (visibleStopBoundary?.action === "stop_before_capture") {
|
|
917
|
-
stopBoundaryResult = visibleStopBoundary;
|
|
918
|
-
break;
|
|
919
|
-
}
|
|
920
|
-
const effectiveClip = visibleStopBoundary?.adjusted_clip || clip;
|
|
921
|
-
const effectiveCaptureViewport = Boolean(captureViewport && !visibleStopBoundary?.adjusted_clip);
|
|
922
|
-
const captureOptions = effectiveCaptureViewport ? {
|
|
923
|
-
format,
|
|
924
|
-
fromSurface,
|
|
925
|
-
captureBeyondViewport: false
|
|
926
|
-
} : {
|
|
927
|
-
format,
|
|
928
|
-
fromSurface,
|
|
929
|
-
captureBeyondViewport,
|
|
930
|
-
clip: effectiveClip
|
|
931
|
-
};
|
|
932
|
-
if (quality != null) {
|
|
933
|
-
captureOptions.quality = quality;
|
|
934
|
-
}
|
|
935
|
-
const screenshot = await withCaptureTimeout(client.Page.captureScreenshot(captureOptions), {
|
|
936
|
-
label: `capture_screenshot_${index + 1}`,
|
|
937
|
-
timeoutMs: stepTimeoutMs
|
|
938
|
-
});
|
|
939
|
-
const originalBuffer = Buffer.from(screenshot.data || "", "base64");
|
|
940
|
-
const optimized = await withCaptureTimeout(optimizeScreenshotBuffer(originalBuffer, {
|
|
941
|
-
enabled: optimize,
|
|
942
|
-
format,
|
|
943
|
-
quality,
|
|
944
|
-
resizeMaxWidth
|
|
945
|
-
}), {
|
|
946
|
-
label: `optimize_screenshot_${index + 1}`,
|
|
947
|
-
timeoutMs: stepTimeoutMs
|
|
948
|
-
});
|
|
949
|
-
const buffer = optimized.buffer;
|
|
950
|
-
const hash = screenshotHash(buffer);
|
|
951
|
-
const duplicateOfPrevious = previousHash && previousHash === hash;
|
|
952
|
-
if (duplicateOfPrevious) {
|
|
953
|
-
consecutiveDuplicates += 1;
|
|
954
|
-
} else {
|
|
955
|
-
consecutiveDuplicates = 0;
|
|
956
|
-
}
|
|
957
|
-
|
|
958
|
-
let outputPath = null;
|
|
959
|
-
if (duplicateOfPrevious && skipDuplicateScreenshots) {
|
|
960
|
-
droppedDuplicateCount += 1;
|
|
961
|
-
} else {
|
|
962
|
-
outputPath = filePath ? filePathForSequence(filePath, screenshots.length, format) : null;
|
|
963
|
-
if (outputPath) {
|
|
964
|
-
fs.writeFileSync(outputPath, buffer);
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
screenshots.push({
|
|
968
|
-
index: screenshots.length,
|
|
969
|
-
capture_index: index,
|
|
970
|
-
source: "image",
|
|
971
|
-
captured_at: nowIso(),
|
|
972
|
-
node_id: nodeId,
|
|
973
|
-
format,
|
|
974
|
-
mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
|
|
975
|
-
byte_length: buffer.length,
|
|
976
|
-
original_byte_length: optimized.original_byte_length || originalBuffer.length,
|
|
977
|
-
optimized: Boolean(optimized.optimized),
|
|
978
|
-
optimization_error: optimized.optimization_error || null,
|
|
979
|
-
elapsed_ms: Date.now() - captureStarted,
|
|
980
|
-
file_path: outputPath,
|
|
981
|
-
sha256: hash,
|
|
982
|
-
duplicate_of_previous: Boolean(duplicateOfPrevious),
|
|
983
|
-
clip: effectiveClip,
|
|
984
|
-
capture_viewport: effectiveCaptureViewport,
|
|
985
|
-
node_rect: box.rect,
|
|
986
|
-
scroll: currentScrollMetadata,
|
|
987
|
-
stop_boundary: visibleStopBoundary || null,
|
|
988
|
-
metadata
|
|
989
|
-
});
|
|
990
|
-
}
|
|
991
|
-
|
|
992
|
-
if (visibleStopBoundary?.action === "capture_then_stop") {
|
|
993
|
-
stopBoundaryResult = visibleStopBoundary;
|
|
994
|
-
break;
|
|
995
|
-
}
|
|
996
|
-
|
|
997
|
-
previousHash = hash;
|
|
998
|
-
forceInputScrollAfterDuplicate = Boolean(
|
|
999
|
-
duplicateOfPrevious
|
|
1000
|
-
&& normalizedScrollMethod === "dom-anchor-fallback-input"
|
|
1001
|
-
&& currentScrollMetadata?.method === "DOM.scrollIntoViewIfNeeded"
|
|
1002
|
-
);
|
|
1003
|
-
if (
|
|
1004
|
-
consecutiveDuplicates >= Math.max(1, Number(duplicateStopCount) || 1)
|
|
1005
|
-
&& !forceInputScrollAfterDuplicate
|
|
1006
|
-
) {
|
|
1007
|
-
break;
|
|
1008
|
-
}
|
|
1009
|
-
|
|
1010
|
-
if (index < maxCaptureIterations - 1) {
|
|
1011
|
-
assertCaptureTotalBudget(sequenceStarted, totalTimeoutMs, `scroll_after_page_${index + 1}`);
|
|
1012
|
-
let scrolledByDomAnchor = false;
|
|
1013
|
-
const nextAnchor = anchorPlan?.anchors?.[index + 1] || null;
|
|
1014
|
-
if (nextAnchor?.node_id && normalizedScrollMethod !== "input" && !forceInputScrollAfterDuplicate) {
|
|
1015
|
-
try {
|
|
1016
|
-
await scrollDomAnchorIntoView(client, nextAnchor.node_id, {
|
|
1017
|
-
label: `scroll_dom_anchor_${index + 1}`,
|
|
1018
|
-
timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
|
|
1019
|
-
});
|
|
1020
|
-
scrolledByDomAnchor = true;
|
|
1021
|
-
currentScrollMetadata = {
|
|
1022
|
-
before_capture: `dom_anchor_${index + 1}`,
|
|
1023
|
-
method: "DOM.scrollIntoViewIfNeeded",
|
|
1024
|
-
anchor_node_id: nextAnchor.node_id,
|
|
1025
|
-
anchor_y: nextAnchor.y,
|
|
1026
|
-
anchor_height: nextAnchor.height
|
|
1027
|
-
};
|
|
1028
|
-
} catch (error) {
|
|
1029
|
-
if (normalizedScrollMethod === "dom-anchor") {
|
|
1030
|
-
throw error;
|
|
1031
|
-
}
|
|
1032
|
-
currentScrollMetadata = {
|
|
1033
|
-
before_capture: `dom_anchor_${index + 1}_failed`,
|
|
1034
|
-
method: "DOM.scrollIntoViewIfNeeded",
|
|
1035
|
-
anchor_node_id: nextAnchor.node_id,
|
|
1036
|
-
error: error?.message || String(error)
|
|
1037
|
-
};
|
|
1038
|
-
}
|
|
1039
|
-
} else if (normalizedScrollMethod === "dom-anchor") {
|
|
1040
|
-
break;
|
|
1041
|
-
}
|
|
1042
|
-
|
|
1043
|
-
if (!scrolledByDomAnchor && normalizedScrollMethod !== "dom-anchor") {
|
|
1044
|
-
const x = box.center.x;
|
|
1045
|
-
const y = box.center.y;
|
|
1046
|
-
const scrollDelta = resolveCoverageSafeScrollDelta({
|
|
1047
|
-
baseDelta: wheelDeltaY,
|
|
1048
|
-
clipHeight: effectiveClip.height,
|
|
1049
|
-
jitter: scrollDeltaJitter
|
|
1050
|
-
});
|
|
1051
|
-
await withCaptureTimeout(client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y, button: "none" }), {
|
|
1052
|
-
label: `scroll_mouse_move_${index + 1}`,
|
|
1053
|
-
timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
|
|
1054
|
-
});
|
|
1055
|
-
await withCaptureTimeout(client.Input.dispatchMouseEvent({
|
|
1056
|
-
type: "mouseWheel",
|
|
1057
|
-
x,
|
|
1058
|
-
y,
|
|
1059
|
-
deltaX: 0,
|
|
1060
|
-
deltaY: scrollDelta.deltaY
|
|
1061
|
-
}), {
|
|
1062
|
-
label: `scroll_wheel_${index + 1}`,
|
|
1063
|
-
timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
|
|
1064
|
-
});
|
|
1065
|
-
currentScrollMetadata = {
|
|
1066
|
-
before_capture: `wheel_down_${index + 1}`,
|
|
1067
|
-
method: "Input.dispatchMouseEvent",
|
|
1068
|
-
fallback_from_dom_anchor: Boolean(anchorPlan && normalizedScrollMethod === "dom-anchor-fallback-input"),
|
|
1069
|
-
wheel_delta_y: scrollDelta.deltaY,
|
|
1070
|
-
wheel_delta_base_y: scrollDelta.base_delta_y,
|
|
1071
|
-
wheel_delta_jitter: scrollDelta.jittered ? scrollDelta : null
|
|
1072
|
-
};
|
|
1073
|
-
}
|
|
1074
|
-
if (settleMs > 0) await sleep(settleMs);
|
|
1075
|
-
}
|
|
1076
|
-
}
|
|
1077
|
-
|
|
1078
|
-
const llmComposition = composeForLlm
|
|
1079
|
-
? await withCaptureTimeout(composeScreenshotsForLlm(screenshots, {
|
|
1080
|
-
basePath: filePath,
|
|
1081
|
-
pagesPerImage: llmPagesPerImage,
|
|
1082
|
-
resizeMaxWidth: llmResizeMaxWidth,
|
|
1083
|
-
quality: llmQuality
|
|
1084
|
-
}), {
|
|
1085
|
-
label: "compose_llm_screenshots",
|
|
1086
|
-
timeoutMs: stepTimeoutMs
|
|
1087
|
-
})
|
|
1088
|
-
: {
|
|
1089
|
-
llm_file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
|
|
1090
|
-
llm_screenshots: [],
|
|
1091
|
-
llm_total_byte_length: 0,
|
|
1092
|
-
llm_original_total_byte_length: 0,
|
|
1093
|
-
llm_composition_error: null
|
|
1094
|
-
};
|
|
1095
|
-
|
|
1096
|
-
return {
|
|
1097
|
-
schema_version: 1,
|
|
1098
|
-
ok: true,
|
|
1099
|
-
source: "image-scroll-sequence",
|
|
1100
|
-
captured_at: nowIso(),
|
|
1101
|
-
node_id: nodeId,
|
|
1102
|
-
elapsed_ms: Date.now() - sequenceStarted,
|
|
1103
|
-
capture_count: captureCount,
|
|
1104
|
-
screenshot_count: screenshots.length,
|
|
1105
|
-
unique_screenshot_count: new Set(screenshots.map((item) => item.sha256)).size,
|
|
1106
|
-
duplicate_screenshot_count: captureCount - new Set(screenshots.map((item) => item.sha256)).size,
|
|
1107
|
-
dropped_duplicate_count: droppedDuplicateCount,
|
|
1108
|
-
total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
|
|
1109
|
-
original_total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.original_byte_length) || 0), 0),
|
|
1110
|
-
llm_file_paths: llmComposition.llm_file_paths,
|
|
1111
|
-
llm_screenshot_count: llmComposition.llm_file_paths.length,
|
|
1112
|
-
llm_total_byte_length: llmComposition.llm_total_byte_length,
|
|
1113
|
-
llm_original_total_byte_length: llmComposition.llm_original_total_byte_length,
|
|
1114
|
-
llm_composition_error: llmComposition.llm_composition_error,
|
|
1115
|
-
llm_screenshots: llmComposition.llm_screenshots,
|
|
1116
|
-
optimization: {
|
|
1117
|
-
enabled: Boolean(optimize),
|
|
1118
|
-
resize_max_width: Math.max(0, Number(resizeMaxWidth) || 0),
|
|
1119
|
-
capture_viewport: Boolean(captureViewport),
|
|
1120
|
-
format,
|
|
1121
|
-
quality: quality ?? null,
|
|
1122
|
-
llm_compose_enabled: Boolean(composeForLlm),
|
|
1123
|
-
llm_pages_per_image: Math.max(1, Math.min(5, Number(llmPagesPerImage) || 3)),
|
|
1124
|
-
llm_resize_max_width: Math.max(0, Number(llmResizeMaxWidth) || 0),
|
|
1125
|
-
llm_quality: llmQuality ?? null,
|
|
1126
|
-
step_timeout_ms: Math.max(0, Number(stepTimeoutMs) || 0),
|
|
1127
|
-
total_timeout_ms: Math.max(0, Number(totalTimeoutMs) || 0),
|
|
1128
|
-
scroll_method: normalizedScrollMethod,
|
|
1129
|
-
requested_max_screenshots: maxScreenshotCount,
|
|
1130
|
-
effective_max_screenshots: maxCaptureIterations,
|
|
1131
|
-
scroll_anchor_selector: scrollAnchorSelector,
|
|
1132
|
-
scroll_anchor_max_probe_nodes: Math.max(1, Number(scrollAnchorMaxProbeNodes) || 260),
|
|
1133
|
-
scroll_anchor_min_gap: Math.max(0, Number(scrollAnchorMinGap) || 0),
|
|
1134
|
-
scroll_delta_jitter: {
|
|
1135
|
-
enabled: scrollDeltaJitter.enabled,
|
|
1136
|
-
min_ratio: scrollDeltaJitter.min_ratio,
|
|
1137
|
-
max_ratio: scrollDeltaJitter.max_ratio,
|
|
1138
|
-
min_overlap_ratio: scrollDeltaJitter.min_overlap_ratio,
|
|
1139
|
-
preserve_coverage: scrollDeltaJitter.preserve_coverage
|
|
1140
|
-
}
|
|
1141
|
-
},
|
|
1142
|
-
scroll_anchor_plan: anchorPlan,
|
|
1143
|
-
stop_boundary_plan: stopBoundaryPlan,
|
|
1144
|
-
stop_boundary_checks: stopBoundaryChecks,
|
|
1145
|
-
stop_boundary_result: stopBoundaryResult,
|
|
1146
|
-
file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
|
|
1147
|
-
screenshots,
|
|
1148
|
-
metadata
|
|
1149
|
-
};
|
|
1150
|
-
}
|
|
1151
|
-
|
|
1152
|
-
export async function captureCandidateEvidence(client, {
|
|
1153
|
-
nodeId,
|
|
1154
|
-
domain = "unknown",
|
|
1155
|
-
source = "dom",
|
|
1156
|
-
screenshotPath,
|
|
1157
|
-
includeHtml = true,
|
|
1158
|
-
includeScreenshot = false,
|
|
1159
|
-
screenshotMode = "scroll",
|
|
1160
|
-
screenshotOptions = {},
|
|
1161
|
-
metadata = {}
|
|
1162
|
-
} = {}) {
|
|
1163
|
-
if (!nodeId) throw new Error("captureCandidateEvidence requires nodeId");
|
|
1164
|
-
const evidence = {
|
|
1165
|
-
schema_version: 1,
|
|
1166
|
-
domain: normalizeText(domain) || "unknown",
|
|
1167
|
-
source,
|
|
1168
|
-
captured_at: nowIso(),
|
|
1169
|
-
node_id: nodeId,
|
|
1170
|
-
html: null,
|
|
1171
|
-
image: null,
|
|
1172
|
-
metadata
|
|
1173
|
-
};
|
|
1174
|
-
if (includeHtml) {
|
|
1175
|
-
evidence.html = await captureNodeHtml(client, nodeId, {
|
|
1176
|
-
domain,
|
|
1177
|
-
source: "dom",
|
|
1178
|
-
metadata
|
|
1179
|
-
});
|
|
1180
|
-
}
|
|
1181
|
-
if (includeScreenshot) {
|
|
1182
|
-
evidence.image = screenshotMode === "single"
|
|
1183
|
-
? await captureNodeScreenshot(client, nodeId, {
|
|
1184
|
-
...screenshotOptions,
|
|
1185
|
-
filePath: screenshotPath,
|
|
1186
|
-
metadata: {
|
|
1187
|
-
...metadata,
|
|
1188
|
-
capture_mode: "single_visible_clip"
|
|
1189
|
-
}
|
|
1190
|
-
})
|
|
1191
|
-
: await captureScrolledNodeScreenshots(client, nodeId, {
|
|
1192
|
-
...screenshotOptions,
|
|
1193
|
-
filePath: screenshotPath,
|
|
1194
|
-
metadata: {
|
|
1195
|
-
...metadata,
|
|
1196
|
-
capture_mode: "scroll_sequence"
|
|
1197
|
-
}
|
|
1198
|
-
});
|
|
1199
|
-
}
|
|
1200
|
-
return evidence;
|
|
1201
|
-
}
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import crypto from "node:crypto";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import sharp from "sharp";
|
|
5
|
+
import {
|
|
6
|
+
getAttributesMap,
|
|
7
|
+
getNodeBox,
|
|
8
|
+
getOuterHTML,
|
|
9
|
+
querySelectorAll,
|
|
10
|
+
sleep
|
|
11
|
+
} from "../browser/index.js";
|
|
12
|
+
import {
|
|
13
|
+
htmlToText,
|
|
14
|
+
normalizeText
|
|
15
|
+
} from "../screening/index.js";
|
|
16
|
+
|
|
17
|
+
function nowIso() {
|
|
18
|
+
return new Date().toISOString();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function resolveOutputPath(filePath) {
|
|
22
|
+
if (!filePath) return null;
|
|
23
|
+
const resolved = path.resolve(filePath);
|
|
24
|
+
fs.mkdirSync(path.dirname(resolved), { recursive: true });
|
|
25
|
+
return resolved;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function withPadding(rect, padding = 0) {
|
|
29
|
+
const safePadding = Math.max(0, Number(padding) || 0);
|
|
30
|
+
const x = Math.max(0, rect.x - safePadding);
|
|
31
|
+
const y = Math.max(0, rect.y - safePadding);
|
|
32
|
+
return {
|
|
33
|
+
x,
|
|
34
|
+
y,
|
|
35
|
+
width: Math.max(1, rect.width + safePadding * 2 - (rect.x - x)),
|
|
36
|
+
height: Math.max(1, rect.height + safePadding * 2 - (rect.y - y)),
|
|
37
|
+
scale: 1
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function normalizeRandom(random) {
|
|
42
|
+
return typeof random === "function" ? random : Math.random;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function randomBetween(random, min, max) {
|
|
46
|
+
const lower = Number(min) || 0;
|
|
47
|
+
const upper = Number(max) || lower;
|
|
48
|
+
if (upper <= lower) return lower;
|
|
49
|
+
return lower + normalizeRandom(random)() * (upper - lower);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function normalizeRatio(raw, fallback, { min = 0, max = 1 } = {}) {
|
|
53
|
+
const parsed = Number(raw);
|
|
54
|
+
const value = Number.isFinite(parsed) ? parsed : fallback;
|
|
55
|
+
return Math.min(max, Math.max(min, value));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function normalizeScrollDeltaJitter({
|
|
59
|
+
enabled = false,
|
|
60
|
+
minRatio = 0.65,
|
|
61
|
+
maxRatio = 0.9,
|
|
62
|
+
minOverlapRatio = 0.2,
|
|
63
|
+
preserveCoverage = true,
|
|
64
|
+
random = Math.random
|
|
65
|
+
} = {}) {
|
|
66
|
+
const safeMinRatio = normalizeRatio(minRatio, 0.65, { min: 0.1, max: 1 });
|
|
67
|
+
const safeMaxRatio = Math.max(safeMinRatio, normalizeRatio(maxRatio, 0.9, { min: safeMinRatio, max: 1 }));
|
|
68
|
+
return {
|
|
69
|
+
enabled: enabled === true,
|
|
70
|
+
min_ratio: safeMinRatio,
|
|
71
|
+
max_ratio: safeMaxRatio,
|
|
72
|
+
min_overlap_ratio: normalizeRatio(minOverlapRatio, 0.2, { min: 0, max: 0.8 }),
|
|
73
|
+
preserve_coverage: preserveCoverage !== false,
|
|
74
|
+
random: normalizeRandom(random)
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function resolveCoverageSafeScrollDelta({
|
|
79
|
+
baseDelta,
|
|
80
|
+
clipHeight,
|
|
81
|
+
jitter
|
|
82
|
+
} = {}) {
|
|
83
|
+
const safeBase = Math.max(1, Number(baseDelta) || 650);
|
|
84
|
+
if (!jitter?.enabled) {
|
|
85
|
+
return {
|
|
86
|
+
deltaY: safeBase,
|
|
87
|
+
jittered: false,
|
|
88
|
+
base_delta_y: safeBase
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
const safeClipHeight = Math.max(1, Number(clipHeight) || 1);
|
|
92
|
+
const maxDeltaForOverlap = Math.max(1, Math.floor(safeClipHeight * (1 - jitter.min_overlap_ratio)));
|
|
93
|
+
const upper = Math.max(1, Math.min(Math.round(safeBase * jitter.max_ratio), maxDeltaForOverlap));
|
|
94
|
+
const lower = Math.min(upper, Math.max(1, Math.round(safeBase * jitter.min_ratio)));
|
|
95
|
+
const deltaY = Math.max(1, Math.round(randomBetween(jitter.random, lower, upper)));
|
|
96
|
+
return {
|
|
97
|
+
deltaY,
|
|
98
|
+
jittered: true,
|
|
99
|
+
base_delta_y: safeBase,
|
|
100
|
+
min_delta_y: lower,
|
|
101
|
+
max_delta_y: upper,
|
|
102
|
+
min_ratio: jitter.min_ratio,
|
|
103
|
+
max_ratio: jitter.max_ratio,
|
|
104
|
+
min_overlap_ratio: jitter.min_overlap_ratio,
|
|
105
|
+
clip_height: safeClipHeight,
|
|
106
|
+
max_delta_for_overlap: maxDeltaForOverlap,
|
|
107
|
+
preserve_coverage: jitter.preserve_coverage
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export async function captureNodeHtml(client, nodeId, {
|
|
112
|
+
domain = "unknown",
|
|
113
|
+
source = "dom",
|
|
114
|
+
metadata = {}
|
|
115
|
+
} = {}) {
|
|
116
|
+
const [attributes, outerHTML] = await Promise.all([
|
|
117
|
+
getAttributesMap(client, nodeId),
|
|
118
|
+
getOuterHTML(client, nodeId)
|
|
119
|
+
]);
|
|
120
|
+
const text = htmlToText(outerHTML);
|
|
121
|
+
return {
|
|
122
|
+
schema_version: 1,
|
|
123
|
+
domain: normalizeText(domain) || "unknown",
|
|
124
|
+
source,
|
|
125
|
+
captured_at: nowIso(),
|
|
126
|
+
node_id: nodeId,
|
|
127
|
+
attributes,
|
|
128
|
+
outer_html_length: outerHTML.length,
|
|
129
|
+
text_length: text.length,
|
|
130
|
+
text,
|
|
131
|
+
outer_html: outerHTML,
|
|
132
|
+
metadata
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export async function captureNodeScreenshot(client, nodeId, {
|
|
137
|
+
filePath,
|
|
138
|
+
format = "png",
|
|
139
|
+
quality,
|
|
140
|
+
padding = 0,
|
|
141
|
+
captureBeyondViewport = true,
|
|
142
|
+
fromSurface = true,
|
|
143
|
+
metadata = {}
|
|
144
|
+
} = {}) {
|
|
145
|
+
const box = await getNodeBox(client, nodeId);
|
|
146
|
+
const clip = withPadding(box.rect, padding);
|
|
147
|
+
const captureOptions = {
|
|
148
|
+
format,
|
|
149
|
+
fromSurface,
|
|
150
|
+
captureBeyondViewport,
|
|
151
|
+
clip
|
|
152
|
+
};
|
|
153
|
+
if (quality != null) {
|
|
154
|
+
captureOptions.quality = quality;
|
|
155
|
+
}
|
|
156
|
+
const screenshot = await client.Page.captureScreenshot(captureOptions);
|
|
157
|
+
const buffer = Buffer.from(screenshot.data || "", "base64");
|
|
158
|
+
const resolvedPath = resolveOutputPath(filePath);
|
|
159
|
+
if (resolvedPath) {
|
|
160
|
+
fs.writeFileSync(resolvedPath, buffer);
|
|
161
|
+
}
|
|
162
|
+
return {
|
|
163
|
+
schema_version: 1,
|
|
164
|
+
source: "image",
|
|
165
|
+
captured_at: nowIso(),
|
|
166
|
+
node_id: nodeId,
|
|
167
|
+
format,
|
|
168
|
+
mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
|
|
169
|
+
byte_length: buffer.length,
|
|
170
|
+
file_path: resolvedPath,
|
|
171
|
+
clip,
|
|
172
|
+
node_rect: box.rect,
|
|
173
|
+
metadata
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export async function captureViewportScreenshot(client, {
|
|
178
|
+
filePath,
|
|
179
|
+
format = "png",
|
|
180
|
+
quality,
|
|
181
|
+
captureBeyondViewport = false,
|
|
182
|
+
fromSurface = true,
|
|
183
|
+
metadata = {}
|
|
184
|
+
} = {}) {
|
|
185
|
+
const captureOptions = {
|
|
186
|
+
format,
|
|
187
|
+
fromSurface,
|
|
188
|
+
captureBeyondViewport
|
|
189
|
+
};
|
|
190
|
+
if (quality != null) {
|
|
191
|
+
captureOptions.quality = quality;
|
|
192
|
+
}
|
|
193
|
+
const screenshot = await client.Page.captureScreenshot(captureOptions);
|
|
194
|
+
const buffer = Buffer.from(screenshot.data || "", "base64");
|
|
195
|
+
const resolvedPath = resolveOutputPath(filePath);
|
|
196
|
+
if (resolvedPath) {
|
|
197
|
+
fs.writeFileSync(resolvedPath, buffer);
|
|
198
|
+
}
|
|
199
|
+
return {
|
|
200
|
+
schema_version: 1,
|
|
201
|
+
source: "viewport-image",
|
|
202
|
+
captured_at: nowIso(),
|
|
203
|
+
format,
|
|
204
|
+
mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
|
|
205
|
+
byte_length: buffer.length,
|
|
206
|
+
file_path: resolvedPath,
|
|
207
|
+
capture_beyond_viewport: Boolean(captureBeyondViewport),
|
|
208
|
+
metadata
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function filePathForSequence(basePath, index, extension) {
|
|
213
|
+
const resolved = resolveOutputPath(basePath);
|
|
214
|
+
if (!resolved) return null;
|
|
215
|
+
const parsed = path.parse(resolved);
|
|
216
|
+
const page = String(index + 1).padStart(2, "0");
|
|
217
|
+
return path.join(parsed.dir, `${parsed.name}-page-${page}${parsed.ext || `.${extension}`}`);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function filePathForLlmSequence(basePath, index) {
|
|
221
|
+
const resolved = resolveOutputPath(basePath);
|
|
222
|
+
if (!resolved) return null;
|
|
223
|
+
const parsed = path.parse(resolved);
|
|
224
|
+
const page = String(index + 1).padStart(2, "0");
|
|
225
|
+
return path.join(parsed.dir, `${parsed.name}-llm-${page}.jpg`);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function screenshotHash(buffer) {
|
|
229
|
+
return crypto.createHash("sha256").update(buffer).digest("hex");
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function createCaptureTimeoutError(label, timeoutMs) {
|
|
233
|
+
const error = new Error(`Image fallback capture timed out during ${label} after ${timeoutMs}ms`);
|
|
234
|
+
error.code = "IMAGE_CAPTURE_TIMEOUT";
|
|
235
|
+
error.capture_step = label;
|
|
236
|
+
error.timeout_ms = timeoutMs;
|
|
237
|
+
return error;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
async function withCaptureTimeout(promise, {
|
|
241
|
+
label = "capture_step",
|
|
242
|
+
timeoutMs = 0
|
|
243
|
+
} = {}) {
|
|
244
|
+
const safeTimeout = Math.max(0, Number(timeoutMs) || 0);
|
|
245
|
+
if (!safeTimeout) return promise;
|
|
246
|
+
let timer = null;
|
|
247
|
+
try {
|
|
248
|
+
return await Promise.race([
|
|
249
|
+
promise,
|
|
250
|
+
new Promise((_, reject) => {
|
|
251
|
+
timer = setTimeout(() => reject(createCaptureTimeoutError(label, safeTimeout)), safeTimeout);
|
|
252
|
+
})
|
|
253
|
+
]);
|
|
254
|
+
} finally {
|
|
255
|
+
if (timer) clearTimeout(timer);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function assertCaptureTotalBudget(started, totalTimeoutMs, label) {
|
|
260
|
+
const safeTimeout = Math.max(0, Number(totalTimeoutMs) || 0);
|
|
261
|
+
if (!safeTimeout) return;
|
|
262
|
+
const elapsed = Date.now() - started;
|
|
263
|
+
if (elapsed <= safeTimeout) return;
|
|
264
|
+
const error = createCaptureTimeoutError(label, safeTimeout);
|
|
265
|
+
error.elapsed_ms = elapsed;
|
|
266
|
+
error.code = "IMAGE_CAPTURE_TOTAL_TIMEOUT";
|
|
267
|
+
throw error;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const DEFAULT_SCROLL_ANCHOR_SELECTOR = [
|
|
271
|
+
"h1",
|
|
272
|
+
"h2",
|
|
273
|
+
"h3",
|
|
274
|
+
"h4",
|
|
275
|
+
"h5",
|
|
276
|
+
"p",
|
|
277
|
+
"li",
|
|
278
|
+
"section",
|
|
279
|
+
"article",
|
|
280
|
+
"table",
|
|
281
|
+
"tr",
|
|
282
|
+
"dl",
|
|
283
|
+
"dt",
|
|
284
|
+
"dd",
|
|
285
|
+
"[class*='resume']",
|
|
286
|
+
"[class*='work']",
|
|
287
|
+
"[class*='project']",
|
|
288
|
+
"[class*='education']",
|
|
289
|
+
"[class*='experience']",
|
|
290
|
+
"[class*='item']",
|
|
291
|
+
"div"
|
|
292
|
+
].join(",");
|
|
293
|
+
|
|
294
|
+
function normalizeScrollMethod(value = "dom-anchor-fallback-input") {
|
|
295
|
+
const normalized = normalizeText(value).toLowerCase();
|
|
296
|
+
if (["dom", "dom-anchor", "dom_anchor", "anchor"].includes(normalized)) return "dom-anchor";
|
|
297
|
+
if (["dom-anchor-fallback-input", "dom_anchor_fallback_input", "dom-fallback-input"].includes(normalized)) {
|
|
298
|
+
return "dom-anchor-fallback-input";
|
|
299
|
+
}
|
|
300
|
+
return "input";
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function uniqueNumbers(values = []) {
|
|
304
|
+
return Array.from(new Set(values.map((value) => Number(value) || 0).filter(Boolean)));
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function pickEvenly(items = [], limit = 1) {
|
|
308
|
+
const safeLimit = Math.max(1, Number(limit) || 1);
|
|
309
|
+
if (items.length <= safeLimit) return items;
|
|
310
|
+
const picked = [];
|
|
311
|
+
const last = items.length - 1;
|
|
312
|
+
for (let index = 0; index < safeLimit; index += 1) {
|
|
313
|
+
const sourceIndex = Math.round((index * last) / Math.max(1, safeLimit - 1));
|
|
314
|
+
picked.push(items[sourceIndex]);
|
|
315
|
+
}
|
|
316
|
+
return Array.from(new Map(picked.map((item) => [item.node_id, item])).values());
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function patternLabel(pattern) {
|
|
320
|
+
if (pattern instanceof RegExp) return pattern.source;
|
|
321
|
+
return normalizeText(pattern);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function stopBoundaryPatterns(patterns = []) {
|
|
325
|
+
return (Array.isArray(patterns) ? patterns : [patterns])
|
|
326
|
+
.filter(Boolean)
|
|
327
|
+
.map((pattern) => {
|
|
328
|
+
if (pattern instanceof RegExp) {
|
|
329
|
+
return {
|
|
330
|
+
raw: pattern,
|
|
331
|
+
label: pattern.source,
|
|
332
|
+
matches: (text) => pattern.test(text)
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
const normalized = normalizeText(pattern);
|
|
336
|
+
return {
|
|
337
|
+
raw: pattern,
|
|
338
|
+
label: normalized,
|
|
339
|
+
matches: (text) => normalized && text.includes(normalized)
|
|
340
|
+
};
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
async function collectStopBoundaryNodes(client, rootNodeId, {
|
|
345
|
+
selector = "",
|
|
346
|
+
textPatterns = [],
|
|
347
|
+
maxProbeNodes = 180,
|
|
348
|
+
maxTextLength = 700,
|
|
349
|
+
stepTimeoutMs = 45000
|
|
350
|
+
} = {}) {
|
|
351
|
+
const patterns = stopBoundaryPatterns(textPatterns);
|
|
352
|
+
const normalizedSelector = normalizeText(selector);
|
|
353
|
+
if (!normalizedSelector && !patterns.length) {
|
|
354
|
+
return {
|
|
355
|
+
enabled: false,
|
|
356
|
+
ok: false,
|
|
357
|
+
reason: "not_configured",
|
|
358
|
+
nodes: []
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
const started = Date.now();
|
|
362
|
+
let nodeIds = [];
|
|
363
|
+
try {
|
|
364
|
+
nodeIds = uniqueNumbers(await querySelectorAll(
|
|
365
|
+
client,
|
|
366
|
+
rootNodeId,
|
|
367
|
+
normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR
|
|
368
|
+
));
|
|
369
|
+
} catch (error) {
|
|
370
|
+
return {
|
|
371
|
+
enabled: true,
|
|
372
|
+
ok: false,
|
|
373
|
+
reason: "query_selector_all_failed",
|
|
374
|
+
selector: normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR,
|
|
375
|
+
error: error?.message || String(error),
|
|
376
|
+
nodes: []
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
const probeLimit = Math.max(1, Number(maxProbeNodes) || 180);
|
|
381
|
+
const maxStopTextLength = Math.max(40, Number(maxTextLength) || 700);
|
|
382
|
+
const perNodeTimeoutMs = Math.min(1000, Math.max(200, Math.floor((Number(stepTimeoutMs) || 45000) / 40)));
|
|
383
|
+
const nodes = [];
|
|
384
|
+
for (const nodeId of nodeIds.slice(0, probeLimit)) {
|
|
385
|
+
try {
|
|
386
|
+
let text = "";
|
|
387
|
+
let matchedPattern = null;
|
|
388
|
+
if (patterns.length) {
|
|
389
|
+
const outerHTML = await withCaptureTimeout(getOuterHTML(client, nodeId), {
|
|
390
|
+
label: `stop_boundary_html_${nodeId}`,
|
|
391
|
+
timeoutMs: perNodeTimeoutMs
|
|
392
|
+
});
|
|
393
|
+
text = normalizeText(htmlToText(outerHTML));
|
|
394
|
+
if (!text || text.length > maxStopTextLength) continue;
|
|
395
|
+
matchedPattern = patterns.find((pattern) => pattern.matches(text));
|
|
396
|
+
if (!matchedPattern) continue;
|
|
397
|
+
}
|
|
398
|
+
nodes.push({
|
|
399
|
+
node_id: nodeId,
|
|
400
|
+
text_preview: text.slice(0, 120),
|
|
401
|
+
matched_pattern: matchedPattern ? patternLabel(matchedPattern.raw) : null
|
|
402
|
+
});
|
|
403
|
+
} catch {}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
return {
|
|
407
|
+
enabled: true,
|
|
408
|
+
ok: nodes.length > 0,
|
|
409
|
+
reason: nodes.length ? null : "no_matching_stop_boundary_nodes",
|
|
410
|
+
selector: normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR,
|
|
411
|
+
elapsed_ms: Date.now() - started,
|
|
412
|
+
discovered_node_count: nodeIds.length,
|
|
413
|
+
probed_node_count: Math.min(nodeIds.length, probeLimit),
|
|
414
|
+
match_count: nodes.length,
|
|
415
|
+
pattern_labels: patterns.map((pattern) => pattern.label),
|
|
416
|
+
nodes
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
async function resolveVisibleStopBoundary(client, stopBoundaryPlan, clip, {
|
|
421
|
+
topPadding = 8,
|
|
422
|
+
minCaptureHeight = 180,
|
|
423
|
+
stepTimeoutMs = 45000
|
|
424
|
+
} = {}) {
|
|
425
|
+
if (!stopBoundaryPlan?.nodes?.length || !clip) return null;
|
|
426
|
+
const clipTop = Number(clip.y) || 0;
|
|
427
|
+
const clipBottom = clipTop + (Number(clip.height) || 0);
|
|
428
|
+
const safePadding = Math.max(0, Number(topPadding) || 0);
|
|
429
|
+
const safeMinHeight = Math.max(1, Number(minCaptureHeight) || 180);
|
|
430
|
+
const perNodeTimeoutMs = Math.min(900, Math.max(180, Math.floor((Number(stepTimeoutMs) || 45000) / 50)));
|
|
431
|
+
const visible = [];
|
|
432
|
+
|
|
433
|
+
for (const node of stopBoundaryPlan.nodes) {
|
|
434
|
+
try {
|
|
435
|
+
const box = await withCaptureTimeout(getNodeBox(client, node.node_id), {
|
|
436
|
+
label: `stop_boundary_box_${node.node_id}`,
|
|
437
|
+
timeoutMs: perNodeTimeoutMs
|
|
438
|
+
});
|
|
439
|
+
const rect = box?.rect || {};
|
|
440
|
+
const width = Number(rect.width) || 0;
|
|
441
|
+
const height = Number(rect.height) || 0;
|
|
442
|
+
if (width < 40 || height < 6) continue;
|
|
443
|
+
const top = Number(rect.y) || 0;
|
|
444
|
+
const bottom = top + height;
|
|
445
|
+
if (bottom <= clipTop + 1) {
|
|
446
|
+
return {
|
|
447
|
+
action: "stop_before_capture",
|
|
448
|
+
reason: "stop_boundary_above_clip",
|
|
449
|
+
node_id: node.node_id,
|
|
450
|
+
matched_pattern: node.matched_pattern,
|
|
451
|
+
text_preview: node.text_preview,
|
|
452
|
+
rect,
|
|
453
|
+
clip
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
if (top < clipBottom && bottom > clipTop) {
|
|
457
|
+
visible.push({
|
|
458
|
+
...node,
|
|
459
|
+
rect,
|
|
460
|
+
top,
|
|
461
|
+
bottom
|
|
462
|
+
});
|
|
463
|
+
}
|
|
464
|
+
} catch {}
|
|
465
|
+
}
|
|
466
|
+
if (!visible.length) return null;
|
|
467
|
+
|
|
468
|
+
visible.sort((a, b) => a.top - b.top);
|
|
469
|
+
const boundary = visible[0];
|
|
470
|
+
const boundaryY = Math.max(clipTop, boundary.top - safePadding);
|
|
471
|
+
const adjustedHeight = Math.max(0, boundaryY - clipTop);
|
|
472
|
+
if (adjustedHeight < safeMinHeight) {
|
|
473
|
+
return {
|
|
474
|
+
action: "stop_before_capture",
|
|
475
|
+
reason: "stop_boundary_near_clip_top",
|
|
476
|
+
node_id: boundary.node_id,
|
|
477
|
+
matched_pattern: boundary.matched_pattern,
|
|
478
|
+
text_preview: boundary.text_preview,
|
|
479
|
+
rect: boundary.rect,
|
|
480
|
+
clip,
|
|
481
|
+
adjusted_height: adjustedHeight,
|
|
482
|
+
min_capture_height: safeMinHeight
|
|
483
|
+
};
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
return {
|
|
487
|
+
action: "capture_then_stop",
|
|
488
|
+
reason: "stop_boundary_visible",
|
|
489
|
+
node_id: boundary.node_id,
|
|
490
|
+
matched_pattern: boundary.matched_pattern,
|
|
491
|
+
text_preview: boundary.text_preview,
|
|
492
|
+
rect: boundary.rect,
|
|
493
|
+
clip,
|
|
494
|
+
adjusted_clip: {
|
|
495
|
+
...clip,
|
|
496
|
+
height: adjustedHeight
|
|
497
|
+
},
|
|
498
|
+
adjusted_height: adjustedHeight,
|
|
499
|
+
min_capture_height: safeMinHeight
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
async function collectDomScrollAnchors(client, rootNodeId, {
|
|
504
|
+
selector = DEFAULT_SCROLL_ANCHOR_SELECTOR,
|
|
505
|
+
maxScreenshots = 6,
|
|
506
|
+
maxProbeNodes = 260,
|
|
507
|
+
minAnchorGap = 180,
|
|
508
|
+
stepTimeoutMs = 45000
|
|
509
|
+
} = {}) {
|
|
510
|
+
const started = Date.now();
|
|
511
|
+
let nodeIds = [];
|
|
512
|
+
try {
|
|
513
|
+
nodeIds = uniqueNumbers(await querySelectorAll(client, rootNodeId, selector));
|
|
514
|
+
} catch (error) {
|
|
515
|
+
return {
|
|
516
|
+
ok: false,
|
|
517
|
+
method: "dom-anchor",
|
|
518
|
+
reason: "query_selector_all_failed",
|
|
519
|
+
error: error?.message || String(error)
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
if (!nodeIds.length) {
|
|
523
|
+
return {
|
|
524
|
+
ok: false,
|
|
525
|
+
method: "dom-anchor",
|
|
526
|
+
reason: "no_anchor_nodes"
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
const probeLimit = Math.max(1, Number(maxProbeNodes) || 260);
|
|
531
|
+
const perNodeTimeoutMs = Math.min(1200, Math.max(250, Math.floor((Number(stepTimeoutMs) || 45000) / 30)));
|
|
532
|
+
const measured = [];
|
|
533
|
+
for (const nodeId of nodeIds.slice(0, probeLimit)) {
|
|
534
|
+
try {
|
|
535
|
+
const box = await withCaptureTimeout(getNodeBox(client, nodeId), {
|
|
536
|
+
label: `anchor_box_${nodeId}`,
|
|
537
|
+
timeoutMs: perNodeTimeoutMs
|
|
538
|
+
});
|
|
539
|
+
const rect = box?.rect || {};
|
|
540
|
+
if ((Number(rect.width) || 0) < 80 || (Number(rect.height) || 0) < 8) continue;
|
|
541
|
+
measured.push({
|
|
542
|
+
node_id: nodeId,
|
|
543
|
+
y: Math.round(Number(rect.y) || 0),
|
|
544
|
+
height: Math.round(Number(rect.height) || 0)
|
|
545
|
+
});
|
|
546
|
+
} catch {}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
let anchors = [];
|
|
550
|
+
if (measured.length) {
|
|
551
|
+
const sorted = measured.sort((a, b) => a.y - b.y);
|
|
552
|
+
for (const item of sorted) {
|
|
553
|
+
const last = anchors[anchors.length - 1];
|
|
554
|
+
if (!last || Math.abs(item.y - last.y) >= Math.max(40, Number(minAnchorGap) || 180)) {
|
|
555
|
+
anchors.push(item);
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
if (anchors.length < 2) {
|
|
561
|
+
anchors = nodeIds.slice(0, probeLimit).map((nodeId, index) => ({
|
|
562
|
+
node_id: nodeId,
|
|
563
|
+
y: null,
|
|
564
|
+
height: null,
|
|
565
|
+
document_order: index
|
|
566
|
+
}));
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
anchors = pickEvenly(anchors, Math.max(1, Number(maxScreenshots) || 1));
|
|
570
|
+
return {
|
|
571
|
+
ok: anchors.length > 0,
|
|
572
|
+
method: "dom-anchor",
|
|
573
|
+
elapsed_ms: Date.now() - started,
|
|
574
|
+
selector,
|
|
575
|
+
discovered_node_count: nodeIds.length,
|
|
576
|
+
measured_node_count: measured.length,
|
|
577
|
+
anchor_count: anchors.length,
|
|
578
|
+
anchors
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
async function scrollDomAnchorIntoView(client, nodeId, {
|
|
583
|
+
timeoutMs = 10000,
|
|
584
|
+
label = "dom_scroll_anchor"
|
|
585
|
+
} = {}) {
|
|
586
|
+
if (client.DOM && typeof client.DOM.scrollIntoViewIfNeeded === "function") {
|
|
587
|
+
return withCaptureTimeout(client.DOM.scrollIntoViewIfNeeded({ nodeId }), { label, timeoutMs });
|
|
588
|
+
}
|
|
589
|
+
if (typeof client.send === "function") {
|
|
590
|
+
return withCaptureTimeout(client.send("DOM.scrollIntoViewIfNeeded", { nodeId }), { label, timeoutMs });
|
|
591
|
+
}
|
|
592
|
+
throw new Error("CDP client does not expose DOM.scrollIntoViewIfNeeded");
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
async function optimizeScreenshotBuffer(buffer, {
|
|
596
|
+
enabled = false,
|
|
597
|
+
format = "png",
|
|
598
|
+
quality,
|
|
599
|
+
resizeMaxWidth = 0
|
|
600
|
+
} = {}) {
|
|
601
|
+
if (!enabled && !resizeMaxWidth) {
|
|
602
|
+
return {
|
|
603
|
+
buffer,
|
|
604
|
+
optimized: false,
|
|
605
|
+
optimization_error: null
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
try {
|
|
609
|
+
const normalizedFormat = format === "jpg" ? "jpeg" : format;
|
|
610
|
+
let pipeline = sharp(buffer, { failOn: "none" });
|
|
611
|
+
const metadata = await pipeline.metadata();
|
|
612
|
+
const width = Number(metadata.width) || 0;
|
|
613
|
+
const safeMaxWidth = Math.max(0, Number(resizeMaxWidth) || 0);
|
|
614
|
+
if (safeMaxWidth > 0 && width > safeMaxWidth) {
|
|
615
|
+
pipeline = pipeline.resize({
|
|
616
|
+
width: safeMaxWidth,
|
|
617
|
+
withoutEnlargement: true
|
|
618
|
+
});
|
|
619
|
+
}
|
|
620
|
+
if (normalizedFormat === "jpeg") {
|
|
621
|
+
pipeline = pipeline.jpeg({
|
|
622
|
+
quality: quality == null ? 72 : Math.max(35, Math.min(95, Number(quality) || 72)),
|
|
623
|
+
mozjpeg: true
|
|
624
|
+
});
|
|
625
|
+
} else if (normalizedFormat === "webp") {
|
|
626
|
+
pipeline = pipeline.webp({
|
|
627
|
+
quality: quality == null ? 76 : Math.max(35, Math.min(95, Number(quality) || 76))
|
|
628
|
+
});
|
|
629
|
+
} else {
|
|
630
|
+
pipeline = pipeline.png({
|
|
631
|
+
compressionLevel: 9,
|
|
632
|
+
adaptiveFiltering: true
|
|
633
|
+
});
|
|
634
|
+
}
|
|
635
|
+
const optimizedBuffer = await pipeline.toBuffer();
|
|
636
|
+
return {
|
|
637
|
+
buffer: optimizedBuffer,
|
|
638
|
+
optimized: true,
|
|
639
|
+
original_byte_length: buffer.length,
|
|
640
|
+
optimization_error: null
|
|
641
|
+
};
|
|
642
|
+
} catch (error) {
|
|
643
|
+
return {
|
|
644
|
+
buffer,
|
|
645
|
+
optimized: false,
|
|
646
|
+
original_byte_length: buffer.length,
|
|
647
|
+
optimization_error: error?.message || String(error)
|
|
648
|
+
};
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
async function composeScreenshotsForLlm(screenshots = [], {
|
|
653
|
+
basePath,
|
|
654
|
+
pagesPerImage = 3,
|
|
655
|
+
resizeMaxWidth = 1100,
|
|
656
|
+
quality = 72
|
|
657
|
+
} = {}) {
|
|
658
|
+
const fileScreenshots = screenshots.filter((item) => item?.file_path);
|
|
659
|
+
if (!basePath || fileScreenshots.length <= 1) {
|
|
660
|
+
return {
|
|
661
|
+
llm_file_paths: fileScreenshots.map((item) => item.file_path),
|
|
662
|
+
llm_screenshots: [],
|
|
663
|
+
llm_total_byte_length: 0,
|
|
664
|
+
llm_original_total_byte_length: 0,
|
|
665
|
+
llm_composition_error: null
|
|
666
|
+
};
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
const safePagesPerImage = Math.max(1, Math.min(5, Number(pagesPerImage) || 3));
|
|
670
|
+
const safeWidth = Math.max(700, Math.min(1400, Number(resizeMaxWidth) || 1100));
|
|
671
|
+
const safeQuality = Math.max(45, Math.min(90, Number(quality) || 72));
|
|
672
|
+
const llmScreenshots = [];
|
|
673
|
+
|
|
674
|
+
try {
|
|
675
|
+
for (let index = 0; index < fileScreenshots.length; index += safePagesPerImage) {
|
|
676
|
+
const group = fileScreenshots.slice(index, index + safePagesPerImage);
|
|
677
|
+
const prepared = [];
|
|
678
|
+
for (const item of group) {
|
|
679
|
+
const sourceBuffer = fs.readFileSync(item.file_path);
|
|
680
|
+
const { data, info } = await sharp(sourceBuffer, { failOn: "none" })
|
|
681
|
+
.resize({
|
|
682
|
+
width: safeWidth,
|
|
683
|
+
withoutEnlargement: true
|
|
684
|
+
})
|
|
685
|
+
.jpeg({
|
|
686
|
+
quality: safeQuality,
|
|
687
|
+
mozjpeg: true
|
|
688
|
+
})
|
|
689
|
+
.toBuffer({ resolveWithObject: true });
|
|
690
|
+
prepared.push({
|
|
691
|
+
input: data,
|
|
692
|
+
width: info.width,
|
|
693
|
+
height: info.height,
|
|
694
|
+
source_file_path: item.file_path
|
|
695
|
+
});
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
const width = Math.max(...prepared.map((item) => item.width), 1);
|
|
699
|
+
const height = prepared.reduce((sum, item) => sum + item.height, 0);
|
|
700
|
+
let top = 0;
|
|
701
|
+
const composites = prepared.map((item) => {
|
|
702
|
+
const layer = {
|
|
703
|
+
input: item.input,
|
|
704
|
+
left: 0,
|
|
705
|
+
top
|
|
706
|
+
};
|
|
707
|
+
top += item.height;
|
|
708
|
+
return layer;
|
|
709
|
+
});
|
|
710
|
+
const outputBuffer = await sharp({
|
|
711
|
+
create: {
|
|
712
|
+
width,
|
|
713
|
+
height,
|
|
714
|
+
channels: 3,
|
|
715
|
+
background: "#ffffff"
|
|
716
|
+
}
|
|
717
|
+
})
|
|
718
|
+
.composite(composites)
|
|
719
|
+
.jpeg({
|
|
720
|
+
quality: safeQuality,
|
|
721
|
+
mozjpeg: true
|
|
722
|
+
})
|
|
723
|
+
.toBuffer();
|
|
724
|
+
const outputPath = filePathForLlmSequence(basePath, llmScreenshots.length);
|
|
725
|
+
fs.writeFileSync(outputPath, outputBuffer);
|
|
726
|
+
llmScreenshots.push({
|
|
727
|
+
index: llmScreenshots.length,
|
|
728
|
+
file_path: outputPath,
|
|
729
|
+
byte_length: outputBuffer.length,
|
|
730
|
+
source_file_paths: prepared.map((item) => item.source_file_path),
|
|
731
|
+
source_page_count: prepared.length,
|
|
732
|
+
width,
|
|
733
|
+
height,
|
|
734
|
+
format: "jpeg",
|
|
735
|
+
mime_type: "image/jpeg"
|
|
736
|
+
});
|
|
737
|
+
}
|
|
738
|
+
} catch (error) {
|
|
739
|
+
return {
|
|
740
|
+
llm_file_paths: fileScreenshots.map((item) => item.file_path),
|
|
741
|
+
llm_screenshots: [],
|
|
742
|
+
llm_total_byte_length: 0,
|
|
743
|
+
llm_original_total_byte_length: fileScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
|
|
744
|
+
llm_composition_error: error?.message || String(error)
|
|
745
|
+
};
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
return {
|
|
749
|
+
llm_file_paths: llmScreenshots.map((item) => item.file_path),
|
|
750
|
+
llm_screenshots: llmScreenshots,
|
|
751
|
+
llm_total_byte_length: llmScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
|
|
752
|
+
llm_original_total_byte_length: fileScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
|
|
753
|
+
llm_composition_error: null
|
|
754
|
+
};
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
export async function captureScrolledNodeScreenshots(client, nodeId, {
|
|
758
|
+
filePath,
|
|
759
|
+
format = "png",
|
|
760
|
+
quality,
|
|
761
|
+
padding = 0,
|
|
762
|
+
captureBeyondViewport = true,
|
|
763
|
+
fromSurface = true,
|
|
764
|
+
captureViewport = false,
|
|
765
|
+
maxScreenshots = 6,
|
|
766
|
+
wheelDeltaY = 650,
|
|
767
|
+
settleMs = 900,
|
|
768
|
+
duplicateStopCount = 2,
|
|
769
|
+
skipDuplicateScreenshots = false,
|
|
770
|
+
optimize = false,
|
|
771
|
+
resizeMaxWidth = 0,
|
|
772
|
+
composeForLlm = false,
|
|
773
|
+
llmPagesPerImage = 3,
|
|
774
|
+
llmResizeMaxWidth = 1100,
|
|
775
|
+
llmQuality = 72,
|
|
776
|
+
stepTimeoutMs = 45000,
|
|
777
|
+
totalTimeoutMs = 90000,
|
|
778
|
+
scrollMethod = "dom-anchor-fallback-input",
|
|
779
|
+
scrollAnchorSelector = DEFAULT_SCROLL_ANCHOR_SELECTOR,
|
|
780
|
+
scrollAnchorMaxProbeNodes = 260,
|
|
781
|
+
scrollAnchorMinGap = 180,
|
|
782
|
+
scrollDeltaJitterEnabled = false,
|
|
783
|
+
scrollDeltaJitterMinRatio = 0.65,
|
|
784
|
+
scrollDeltaJitterMaxRatio = 0.9,
|
|
785
|
+
scrollDeltaJitterMinOverlapRatio = 0.2,
|
|
786
|
+
scrollDeltaJitterPreserveCoverage = true,
|
|
787
|
+
scrollDeltaJitterRandom = Math.random,
|
|
788
|
+
stopBoundarySelector = "",
|
|
789
|
+
stopBoundaryTextPatterns = [],
|
|
790
|
+
stopBoundaryMaxProbeNodes = 180,
|
|
791
|
+
stopBoundaryMaxTextLength = 700,
|
|
792
|
+
stopBoundaryTopPadding = 8,
|
|
793
|
+
stopBoundaryMinCaptureHeight = 180,
|
|
794
|
+
metadata = {}
|
|
795
|
+
} = {}) {
|
|
796
|
+
if (!nodeId) throw new Error("captureScrolledNodeScreenshots requires nodeId");
|
|
797
|
+
const sequenceStarted = Date.now();
|
|
798
|
+
const normalizedScrollMethod = normalizeScrollMethod(scrollMethod);
|
|
799
|
+
const maxScreenshotCount = Math.max(1, Number(maxScreenshots) || 1);
|
|
800
|
+
const scrollDeltaJitter = normalizeScrollDeltaJitter({
|
|
801
|
+
enabled: scrollDeltaJitterEnabled,
|
|
802
|
+
minRatio: scrollDeltaJitterMinRatio,
|
|
803
|
+
maxRatio: scrollDeltaJitterMaxRatio,
|
|
804
|
+
minOverlapRatio: scrollDeltaJitterMinOverlapRatio,
|
|
805
|
+
preserveCoverage: scrollDeltaJitterPreserveCoverage,
|
|
806
|
+
random: scrollDeltaJitterRandom
|
|
807
|
+
});
|
|
808
|
+
const maxCaptureIterations = scrollDeltaJitter.enabled && scrollDeltaJitter.preserve_coverage
|
|
809
|
+
? Math.max(maxScreenshotCount, Math.ceil(maxScreenshotCount / scrollDeltaJitter.min_ratio))
|
|
810
|
+
: maxScreenshotCount;
|
|
811
|
+
const anchorPlan = normalizedScrollMethod !== "input"
|
|
812
|
+
? await collectDomScrollAnchors(client, nodeId, {
|
|
813
|
+
selector: scrollAnchorSelector,
|
|
814
|
+
maxScreenshots: maxCaptureIterations,
|
|
815
|
+
maxProbeNodes: scrollAnchorMaxProbeNodes,
|
|
816
|
+
minAnchorGap: scrollAnchorMinGap,
|
|
817
|
+
stepTimeoutMs
|
|
818
|
+
})
|
|
819
|
+
: null;
|
|
820
|
+
const stopBoundaryEnabled = Boolean(
|
|
821
|
+
normalizeText(stopBoundarySelector)
|
|
822
|
+
|| (Array.isArray(stopBoundaryTextPatterns)
|
|
823
|
+
? stopBoundaryTextPatterns.length
|
|
824
|
+
: stopBoundaryTextPatterns)
|
|
825
|
+
);
|
|
826
|
+
let stopBoundaryPlan = {
|
|
827
|
+
enabled: false,
|
|
828
|
+
ok: false,
|
|
829
|
+
reason: "not_configured",
|
|
830
|
+
nodes: []
|
|
831
|
+
};
|
|
832
|
+
const stopBoundaryChecks = [];
|
|
833
|
+
const screenshots = [];
|
|
834
|
+
let consecutiveDuplicates = 0;
|
|
835
|
+
let previousHash = "";
|
|
836
|
+
let captureCount = 0;
|
|
837
|
+
let droppedDuplicateCount = 0;
|
|
838
|
+
let forceInputScrollAfterDuplicate = false;
|
|
839
|
+
let stopBoundaryResult = null;
|
|
840
|
+
let currentScrollMetadata = {
|
|
841
|
+
before_capture: "initial",
|
|
842
|
+
method: normalizedScrollMethod,
|
|
843
|
+
anchor_plan: anchorPlan
|
|
844
|
+
? {
|
|
845
|
+
ok: Boolean(anchorPlan.ok),
|
|
846
|
+
reason: anchorPlan.reason || null,
|
|
847
|
+
discovered_node_count: anchorPlan.discovered_node_count || 0,
|
|
848
|
+
measured_node_count: anchorPlan.measured_node_count || 0,
|
|
849
|
+
anchor_count: anchorPlan.anchor_count || 0,
|
|
850
|
+
elapsed_ms: anchorPlan.elapsed_ms || 0
|
|
851
|
+
}
|
|
852
|
+
: null
|
|
853
|
+
};
|
|
854
|
+
|
|
855
|
+
if (anchorPlan?.anchors?.[0]?.node_id && normalizedScrollMethod !== "input") {
|
|
856
|
+
try {
|
|
857
|
+
await scrollDomAnchorIntoView(client, anchorPlan.anchors[0].node_id, {
|
|
858
|
+
label: "scroll_dom_anchor_initial",
|
|
859
|
+
timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
|
|
860
|
+
});
|
|
861
|
+
currentScrollMetadata = {
|
|
862
|
+
before_capture: "dom_anchor_initial",
|
|
863
|
+
method: "DOM.scrollIntoViewIfNeeded",
|
|
864
|
+
anchor_node_id: anchorPlan.anchors[0].node_id,
|
|
865
|
+
anchor_y: anchorPlan.anchors[0].y,
|
|
866
|
+
anchor_height: anchorPlan.anchors[0].height,
|
|
867
|
+
anchor_plan: currentScrollMetadata.anchor_plan
|
|
868
|
+
};
|
|
869
|
+
} catch (error) {
|
|
870
|
+
if (normalizedScrollMethod === "dom-anchor") {
|
|
871
|
+
throw error;
|
|
872
|
+
}
|
|
873
|
+
currentScrollMetadata = {
|
|
874
|
+
before_capture: "dom_anchor_initial_failed",
|
|
875
|
+
method: "DOM.scrollIntoViewIfNeeded",
|
|
876
|
+
anchor_node_id: anchorPlan.anchors[0].node_id,
|
|
877
|
+
error: error?.message || String(error),
|
|
878
|
+
anchor_plan: currentScrollMetadata.anchor_plan
|
|
879
|
+
};
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
for (let index = 0; index < maxCaptureIterations; index += 1) {
|
|
884
|
+
assertCaptureTotalBudget(sequenceStarted, totalTimeoutMs, `capture_page_${index + 1}`);
|
|
885
|
+
captureCount += 1;
|
|
886
|
+
const captureStarted = Date.now();
|
|
887
|
+
const box = await withCaptureTimeout(getNodeBox(client, nodeId), {
|
|
888
|
+
label: `get_box_${index + 1}`,
|
|
889
|
+
timeoutMs: stepTimeoutMs
|
|
890
|
+
});
|
|
891
|
+
const clip = withPadding(box.rect, padding);
|
|
892
|
+
let visibleStopBoundary = null;
|
|
893
|
+
if (stopBoundaryEnabled) {
|
|
894
|
+
stopBoundaryPlan = await collectStopBoundaryNodes(client, nodeId, {
|
|
895
|
+
selector: stopBoundarySelector,
|
|
896
|
+
textPatterns: stopBoundaryTextPatterns,
|
|
897
|
+
maxProbeNodes: stopBoundaryMaxProbeNodes,
|
|
898
|
+
maxTextLength: stopBoundaryMaxTextLength,
|
|
899
|
+
stepTimeoutMs
|
|
900
|
+
});
|
|
901
|
+
stopBoundaryChecks.push({
|
|
902
|
+
capture_index: index,
|
|
903
|
+
ok: Boolean(stopBoundaryPlan.ok),
|
|
904
|
+
reason: stopBoundaryPlan.reason || null,
|
|
905
|
+
discovered_node_count: stopBoundaryPlan.discovered_node_count || 0,
|
|
906
|
+
probed_node_count: stopBoundaryPlan.probed_node_count || 0,
|
|
907
|
+
match_count: stopBoundaryPlan.match_count || 0,
|
|
908
|
+
elapsed_ms: stopBoundaryPlan.elapsed_ms || 0
|
|
909
|
+
});
|
|
910
|
+
visibleStopBoundary = await resolveVisibleStopBoundary(client, stopBoundaryPlan, clip, {
|
|
911
|
+
topPadding: stopBoundaryTopPadding,
|
|
912
|
+
minCaptureHeight: stopBoundaryMinCaptureHeight,
|
|
913
|
+
stepTimeoutMs
|
|
914
|
+
});
|
|
915
|
+
}
|
|
916
|
+
if (visibleStopBoundary?.action === "stop_before_capture") {
|
|
917
|
+
stopBoundaryResult = visibleStopBoundary;
|
|
918
|
+
break;
|
|
919
|
+
}
|
|
920
|
+
const effectiveClip = visibleStopBoundary?.adjusted_clip || clip;
|
|
921
|
+
const effectiveCaptureViewport = Boolean(captureViewport && !visibleStopBoundary?.adjusted_clip);
|
|
922
|
+
const captureOptions = effectiveCaptureViewport ? {
|
|
923
|
+
format,
|
|
924
|
+
fromSurface,
|
|
925
|
+
captureBeyondViewport: false
|
|
926
|
+
} : {
|
|
927
|
+
format,
|
|
928
|
+
fromSurface,
|
|
929
|
+
captureBeyondViewport,
|
|
930
|
+
clip: effectiveClip
|
|
931
|
+
};
|
|
932
|
+
if (quality != null) {
|
|
933
|
+
captureOptions.quality = quality;
|
|
934
|
+
}
|
|
935
|
+
const screenshot = await withCaptureTimeout(client.Page.captureScreenshot(captureOptions), {
|
|
936
|
+
label: `capture_screenshot_${index + 1}`,
|
|
937
|
+
timeoutMs: stepTimeoutMs
|
|
938
|
+
});
|
|
939
|
+
const originalBuffer = Buffer.from(screenshot.data || "", "base64");
|
|
940
|
+
const optimized = await withCaptureTimeout(optimizeScreenshotBuffer(originalBuffer, {
|
|
941
|
+
enabled: optimize,
|
|
942
|
+
format,
|
|
943
|
+
quality,
|
|
944
|
+
resizeMaxWidth
|
|
945
|
+
}), {
|
|
946
|
+
label: `optimize_screenshot_${index + 1}`,
|
|
947
|
+
timeoutMs: stepTimeoutMs
|
|
948
|
+
});
|
|
949
|
+
const buffer = optimized.buffer;
|
|
950
|
+
const hash = screenshotHash(buffer);
|
|
951
|
+
const duplicateOfPrevious = previousHash && previousHash === hash;
|
|
952
|
+
if (duplicateOfPrevious) {
|
|
953
|
+
consecutiveDuplicates += 1;
|
|
954
|
+
} else {
|
|
955
|
+
consecutiveDuplicates = 0;
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
let outputPath = null;
|
|
959
|
+
if (duplicateOfPrevious && skipDuplicateScreenshots) {
|
|
960
|
+
droppedDuplicateCount += 1;
|
|
961
|
+
} else {
|
|
962
|
+
outputPath = filePath ? filePathForSequence(filePath, screenshots.length, format) : null;
|
|
963
|
+
if (outputPath) {
|
|
964
|
+
fs.writeFileSync(outputPath, buffer);
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
screenshots.push({
|
|
968
|
+
index: screenshots.length,
|
|
969
|
+
capture_index: index,
|
|
970
|
+
source: "image",
|
|
971
|
+
captured_at: nowIso(),
|
|
972
|
+
node_id: nodeId,
|
|
973
|
+
format,
|
|
974
|
+
mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
|
|
975
|
+
byte_length: buffer.length,
|
|
976
|
+
original_byte_length: optimized.original_byte_length || originalBuffer.length,
|
|
977
|
+
optimized: Boolean(optimized.optimized),
|
|
978
|
+
optimization_error: optimized.optimization_error || null,
|
|
979
|
+
elapsed_ms: Date.now() - captureStarted,
|
|
980
|
+
file_path: outputPath,
|
|
981
|
+
sha256: hash,
|
|
982
|
+
duplicate_of_previous: Boolean(duplicateOfPrevious),
|
|
983
|
+
clip: effectiveClip,
|
|
984
|
+
capture_viewport: effectiveCaptureViewport,
|
|
985
|
+
node_rect: box.rect,
|
|
986
|
+
scroll: currentScrollMetadata,
|
|
987
|
+
stop_boundary: visibleStopBoundary || null,
|
|
988
|
+
metadata
|
|
989
|
+
});
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
if (visibleStopBoundary?.action === "capture_then_stop") {
|
|
993
|
+
stopBoundaryResult = visibleStopBoundary;
|
|
994
|
+
break;
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
previousHash = hash;
|
|
998
|
+
forceInputScrollAfterDuplicate = Boolean(
|
|
999
|
+
duplicateOfPrevious
|
|
1000
|
+
&& normalizedScrollMethod === "dom-anchor-fallback-input"
|
|
1001
|
+
&& currentScrollMetadata?.method === "DOM.scrollIntoViewIfNeeded"
|
|
1002
|
+
);
|
|
1003
|
+
if (
|
|
1004
|
+
consecutiveDuplicates >= Math.max(1, Number(duplicateStopCount) || 1)
|
|
1005
|
+
&& !forceInputScrollAfterDuplicate
|
|
1006
|
+
) {
|
|
1007
|
+
break;
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
if (index < maxCaptureIterations - 1) {
|
|
1011
|
+
assertCaptureTotalBudget(sequenceStarted, totalTimeoutMs, `scroll_after_page_${index + 1}`);
|
|
1012
|
+
let scrolledByDomAnchor = false;
|
|
1013
|
+
const nextAnchor = anchorPlan?.anchors?.[index + 1] || null;
|
|
1014
|
+
if (nextAnchor?.node_id && normalizedScrollMethod !== "input" && !forceInputScrollAfterDuplicate) {
|
|
1015
|
+
try {
|
|
1016
|
+
await scrollDomAnchorIntoView(client, nextAnchor.node_id, {
|
|
1017
|
+
label: `scroll_dom_anchor_${index + 1}`,
|
|
1018
|
+
timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
|
|
1019
|
+
});
|
|
1020
|
+
scrolledByDomAnchor = true;
|
|
1021
|
+
currentScrollMetadata = {
|
|
1022
|
+
before_capture: `dom_anchor_${index + 1}`,
|
|
1023
|
+
method: "DOM.scrollIntoViewIfNeeded",
|
|
1024
|
+
anchor_node_id: nextAnchor.node_id,
|
|
1025
|
+
anchor_y: nextAnchor.y,
|
|
1026
|
+
anchor_height: nextAnchor.height
|
|
1027
|
+
};
|
|
1028
|
+
} catch (error) {
|
|
1029
|
+
if (normalizedScrollMethod === "dom-anchor") {
|
|
1030
|
+
throw error;
|
|
1031
|
+
}
|
|
1032
|
+
currentScrollMetadata = {
|
|
1033
|
+
before_capture: `dom_anchor_${index + 1}_failed`,
|
|
1034
|
+
method: "DOM.scrollIntoViewIfNeeded",
|
|
1035
|
+
anchor_node_id: nextAnchor.node_id,
|
|
1036
|
+
error: error?.message || String(error)
|
|
1037
|
+
};
|
|
1038
|
+
}
|
|
1039
|
+
} else if (normalizedScrollMethod === "dom-anchor") {
|
|
1040
|
+
break;
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
if (!scrolledByDomAnchor && normalizedScrollMethod !== "dom-anchor") {
|
|
1044
|
+
const x = box.center.x;
|
|
1045
|
+
const y = box.center.y;
|
|
1046
|
+
const scrollDelta = resolveCoverageSafeScrollDelta({
|
|
1047
|
+
baseDelta: wheelDeltaY,
|
|
1048
|
+
clipHeight: effectiveClip.height,
|
|
1049
|
+
jitter: scrollDeltaJitter
|
|
1050
|
+
});
|
|
1051
|
+
await withCaptureTimeout(client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y, button: "none" }), {
|
|
1052
|
+
label: `scroll_mouse_move_${index + 1}`,
|
|
1053
|
+
timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
|
|
1054
|
+
});
|
|
1055
|
+
await withCaptureTimeout(client.Input.dispatchMouseEvent({
|
|
1056
|
+
type: "mouseWheel",
|
|
1057
|
+
x,
|
|
1058
|
+
y,
|
|
1059
|
+
deltaX: 0,
|
|
1060
|
+
deltaY: scrollDelta.deltaY
|
|
1061
|
+
}), {
|
|
1062
|
+
label: `scroll_wheel_${index + 1}`,
|
|
1063
|
+
timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
|
|
1064
|
+
});
|
|
1065
|
+
currentScrollMetadata = {
|
|
1066
|
+
before_capture: `wheel_down_${index + 1}`,
|
|
1067
|
+
method: "Input.dispatchMouseEvent",
|
|
1068
|
+
fallback_from_dom_anchor: Boolean(anchorPlan && normalizedScrollMethod === "dom-anchor-fallback-input"),
|
|
1069
|
+
wheel_delta_y: scrollDelta.deltaY,
|
|
1070
|
+
wheel_delta_base_y: scrollDelta.base_delta_y,
|
|
1071
|
+
wheel_delta_jitter: scrollDelta.jittered ? scrollDelta : null
|
|
1072
|
+
};
|
|
1073
|
+
}
|
|
1074
|
+
if (settleMs > 0) await sleep(settleMs);
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
const llmComposition = composeForLlm
|
|
1079
|
+
? await withCaptureTimeout(composeScreenshotsForLlm(screenshots, {
|
|
1080
|
+
basePath: filePath,
|
|
1081
|
+
pagesPerImage: llmPagesPerImage,
|
|
1082
|
+
resizeMaxWidth: llmResizeMaxWidth,
|
|
1083
|
+
quality: llmQuality
|
|
1084
|
+
}), {
|
|
1085
|
+
label: "compose_llm_screenshots",
|
|
1086
|
+
timeoutMs: stepTimeoutMs
|
|
1087
|
+
})
|
|
1088
|
+
: {
|
|
1089
|
+
llm_file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
|
|
1090
|
+
llm_screenshots: [],
|
|
1091
|
+
llm_total_byte_length: 0,
|
|
1092
|
+
llm_original_total_byte_length: 0,
|
|
1093
|
+
llm_composition_error: null
|
|
1094
|
+
};
|
|
1095
|
+
|
|
1096
|
+
return {
|
|
1097
|
+
schema_version: 1,
|
|
1098
|
+
ok: true,
|
|
1099
|
+
source: "image-scroll-sequence",
|
|
1100
|
+
captured_at: nowIso(),
|
|
1101
|
+
node_id: nodeId,
|
|
1102
|
+
elapsed_ms: Date.now() - sequenceStarted,
|
|
1103
|
+
capture_count: captureCount,
|
|
1104
|
+
screenshot_count: screenshots.length,
|
|
1105
|
+
unique_screenshot_count: new Set(screenshots.map((item) => item.sha256)).size,
|
|
1106
|
+
duplicate_screenshot_count: captureCount - new Set(screenshots.map((item) => item.sha256)).size,
|
|
1107
|
+
dropped_duplicate_count: droppedDuplicateCount,
|
|
1108
|
+
total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
|
|
1109
|
+
original_total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.original_byte_length) || 0), 0),
|
|
1110
|
+
llm_file_paths: llmComposition.llm_file_paths,
|
|
1111
|
+
llm_screenshot_count: llmComposition.llm_file_paths.length,
|
|
1112
|
+
llm_total_byte_length: llmComposition.llm_total_byte_length,
|
|
1113
|
+
llm_original_total_byte_length: llmComposition.llm_original_total_byte_length,
|
|
1114
|
+
llm_composition_error: llmComposition.llm_composition_error,
|
|
1115
|
+
llm_screenshots: llmComposition.llm_screenshots,
|
|
1116
|
+
optimization: {
|
|
1117
|
+
enabled: Boolean(optimize),
|
|
1118
|
+
resize_max_width: Math.max(0, Number(resizeMaxWidth) || 0),
|
|
1119
|
+
capture_viewport: Boolean(captureViewport),
|
|
1120
|
+
format,
|
|
1121
|
+
quality: quality ?? null,
|
|
1122
|
+
llm_compose_enabled: Boolean(composeForLlm),
|
|
1123
|
+
llm_pages_per_image: Math.max(1, Math.min(5, Number(llmPagesPerImage) || 3)),
|
|
1124
|
+
llm_resize_max_width: Math.max(0, Number(llmResizeMaxWidth) || 0),
|
|
1125
|
+
llm_quality: llmQuality ?? null,
|
|
1126
|
+
step_timeout_ms: Math.max(0, Number(stepTimeoutMs) || 0),
|
|
1127
|
+
total_timeout_ms: Math.max(0, Number(totalTimeoutMs) || 0),
|
|
1128
|
+
scroll_method: normalizedScrollMethod,
|
|
1129
|
+
requested_max_screenshots: maxScreenshotCount,
|
|
1130
|
+
effective_max_screenshots: maxCaptureIterations,
|
|
1131
|
+
scroll_anchor_selector: scrollAnchorSelector,
|
|
1132
|
+
scroll_anchor_max_probe_nodes: Math.max(1, Number(scrollAnchorMaxProbeNodes) || 260),
|
|
1133
|
+
scroll_anchor_min_gap: Math.max(0, Number(scrollAnchorMinGap) || 0),
|
|
1134
|
+
scroll_delta_jitter: {
|
|
1135
|
+
enabled: scrollDeltaJitter.enabled,
|
|
1136
|
+
min_ratio: scrollDeltaJitter.min_ratio,
|
|
1137
|
+
max_ratio: scrollDeltaJitter.max_ratio,
|
|
1138
|
+
min_overlap_ratio: scrollDeltaJitter.min_overlap_ratio,
|
|
1139
|
+
preserve_coverage: scrollDeltaJitter.preserve_coverage
|
|
1140
|
+
}
|
|
1141
|
+
},
|
|
1142
|
+
scroll_anchor_plan: anchorPlan,
|
|
1143
|
+
stop_boundary_plan: stopBoundaryPlan,
|
|
1144
|
+
stop_boundary_checks: stopBoundaryChecks,
|
|
1145
|
+
stop_boundary_result: stopBoundaryResult,
|
|
1146
|
+
file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
|
|
1147
|
+
screenshots,
|
|
1148
|
+
metadata
|
|
1149
|
+
};
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
export async function captureCandidateEvidence(client, {
|
|
1153
|
+
nodeId,
|
|
1154
|
+
domain = "unknown",
|
|
1155
|
+
source = "dom",
|
|
1156
|
+
screenshotPath,
|
|
1157
|
+
includeHtml = true,
|
|
1158
|
+
includeScreenshot = false,
|
|
1159
|
+
screenshotMode = "scroll",
|
|
1160
|
+
screenshotOptions = {},
|
|
1161
|
+
metadata = {}
|
|
1162
|
+
} = {}) {
|
|
1163
|
+
if (!nodeId) throw new Error("captureCandidateEvidence requires nodeId");
|
|
1164
|
+
const evidence = {
|
|
1165
|
+
schema_version: 1,
|
|
1166
|
+
domain: normalizeText(domain) || "unknown",
|
|
1167
|
+
source,
|
|
1168
|
+
captured_at: nowIso(),
|
|
1169
|
+
node_id: nodeId,
|
|
1170
|
+
html: null,
|
|
1171
|
+
image: null,
|
|
1172
|
+
metadata
|
|
1173
|
+
};
|
|
1174
|
+
if (includeHtml) {
|
|
1175
|
+
evidence.html = await captureNodeHtml(client, nodeId, {
|
|
1176
|
+
domain,
|
|
1177
|
+
source: "dom",
|
|
1178
|
+
metadata
|
|
1179
|
+
});
|
|
1180
|
+
}
|
|
1181
|
+
if (includeScreenshot) {
|
|
1182
|
+
evidence.image = screenshotMode === "single"
|
|
1183
|
+
? await captureNodeScreenshot(client, nodeId, {
|
|
1184
|
+
...screenshotOptions,
|
|
1185
|
+
filePath: screenshotPath,
|
|
1186
|
+
metadata: {
|
|
1187
|
+
...metadata,
|
|
1188
|
+
capture_mode: "single_visible_clip"
|
|
1189
|
+
}
|
|
1190
|
+
})
|
|
1191
|
+
: await captureScrolledNodeScreenshots(client, nodeId, {
|
|
1192
|
+
...screenshotOptions,
|
|
1193
|
+
filePath: screenshotPath,
|
|
1194
|
+
metadata: {
|
|
1195
|
+
...metadata,
|
|
1196
|
+
capture_mode: "scroll_sequence"
|
|
1197
|
+
}
|
|
1198
|
+
});
|
|
1199
|
+
}
|
|
1200
|
+
return evidence;
|
|
1201
|
+
}
|