@reconcrap/boss-recommend-mcp 2.0.47 → 2.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/bin/boss-recommend-mcp.js +4 -4
  2. package/config/screening-config.example.json +27 -27
  3. package/package.json +1 -1
  4. package/scripts/postinstall.cjs +44 -44
  5. package/skills/boss-chat/README.md +39 -39
  6. package/skills/boss-chat/SKILL.md +93 -93
  7. package/skills/boss-recommend-pipeline/README.md +12 -12
  8. package/skills/boss-recommend-pipeline/SKILL.md +180 -180
  9. package/skills/boss-recruit-pipeline/README.md +17 -17
  10. package/skills/boss-recruit-pipeline/SKILL.md +58 -58
  11. package/src/chat-mcp.js +1780 -1780
  12. package/src/chat-runtime-config.js +749 -749
  13. package/src/cli.js +3054 -3054
  14. package/src/core/boss-cards/index.js +199 -199
  15. package/src/core/browser/index.js +1586 -1453
  16. package/src/core/capture/index.js +1201 -1201
  17. package/src/core/cv-acquisition/index.js +238 -238
  18. package/src/core/cv-capture-target/index.js +299 -299
  19. package/src/core/greet-quota/index.js +54 -54
  20. package/src/core/infinite-list/index.js +1326 -1326
  21. package/src/core/reporting/legacy-csv.js +341 -341
  22. package/src/core/run/timing.js +33 -33
  23. package/src/core/self-heal/index.js +973 -973
  24. package/src/core/self-heal/viewport.js +564 -564
  25. package/src/domains/chat/cards.js +137 -137
  26. package/src/domains/chat/constants.js +221 -221
  27. package/src/domains/chat/detail.js +1668 -1668
  28. package/src/domains/chat/index.js +7 -7
  29. package/src/domains/chat/jobs.js +592 -592
  30. package/src/domains/chat/page-guard.js +98 -98
  31. package/src/domains/chat/roots.js +56 -56
  32. package/src/domains/chat/run-service.js +1977 -1977
  33. package/src/domains/recommend/actions.js +457 -457
  34. package/src/domains/recommend/cards.js +243 -243
  35. package/src/domains/recommend/constants.js +165 -165
  36. package/src/domains/recommend/filters.js +610 -610
  37. package/src/domains/recommend/index.js +10 -10
  38. package/src/domains/recommend/jobs.js +316 -316
  39. package/src/domains/recommend/refresh.js +472 -472
  40. package/src/domains/recommend/roots.js +80 -80
  41. package/src/domains/recommend/scopes.js +246 -246
  42. package/src/domains/recruit/actions.js +277 -277
  43. package/src/domains/recruit/cards.js +74 -74
  44. package/src/domains/recruit/constants.js +167 -167
  45. package/src/domains/recruit/detail.js +461 -461
  46. package/src/domains/recruit/index.js +9 -9
  47. package/src/domains/recruit/instruction-parser.js +451 -451
  48. package/src/domains/recruit/refresh.js +44 -44
  49. package/src/domains/recruit/roots.js +68 -68
  50. package/src/domains/recruit/run-service.js +1207 -1207
  51. package/src/domains/recruit/search.js +1202 -1202
  52. package/src/recommend-mcp.js +22 -22
  53. package/src/recruit-mcp.js +1338 -1338
@@ -1,1201 +1,1201 @@
1
- import fs from "node:fs";
2
- import crypto from "node:crypto";
3
- import path from "node:path";
4
- import sharp from "sharp";
5
- import {
6
- getAttributesMap,
7
- getNodeBox,
8
- getOuterHTML,
9
- querySelectorAll,
10
- sleep
11
- } from "../browser/index.js";
12
- import {
13
- htmlToText,
14
- normalizeText
15
- } from "../screening/index.js";
16
-
17
- function nowIso() {
18
- return new Date().toISOString();
19
- }
20
-
21
- function resolveOutputPath(filePath) {
22
- if (!filePath) return null;
23
- const resolved = path.resolve(filePath);
24
- fs.mkdirSync(path.dirname(resolved), { recursive: true });
25
- return resolved;
26
- }
27
-
28
- function withPadding(rect, padding = 0) {
29
- const safePadding = Math.max(0, Number(padding) || 0);
30
- const x = Math.max(0, rect.x - safePadding);
31
- const y = Math.max(0, rect.y - safePadding);
32
- return {
33
- x,
34
- y,
35
- width: Math.max(1, rect.width + safePadding * 2 - (rect.x - x)),
36
- height: Math.max(1, rect.height + safePadding * 2 - (rect.y - y)),
37
- scale: 1
38
- };
39
- }
40
-
41
- function normalizeRandom(random) {
42
- return typeof random === "function" ? random : Math.random;
43
- }
44
-
45
- function randomBetween(random, min, max) {
46
- const lower = Number(min) || 0;
47
- const upper = Number(max) || lower;
48
- if (upper <= lower) return lower;
49
- return lower + normalizeRandom(random)() * (upper - lower);
50
- }
51
-
52
- function normalizeRatio(raw, fallback, { min = 0, max = 1 } = {}) {
53
- const parsed = Number(raw);
54
- const value = Number.isFinite(parsed) ? parsed : fallback;
55
- return Math.min(max, Math.max(min, value));
56
- }
57
-
58
- function normalizeScrollDeltaJitter({
59
- enabled = false,
60
- minRatio = 0.65,
61
- maxRatio = 0.9,
62
- minOverlapRatio = 0.2,
63
- preserveCoverage = true,
64
- random = Math.random
65
- } = {}) {
66
- const safeMinRatio = normalizeRatio(minRatio, 0.65, { min: 0.1, max: 1 });
67
- const safeMaxRatio = Math.max(safeMinRatio, normalizeRatio(maxRatio, 0.9, { min: safeMinRatio, max: 1 }));
68
- return {
69
- enabled: enabled === true,
70
- min_ratio: safeMinRatio,
71
- max_ratio: safeMaxRatio,
72
- min_overlap_ratio: normalizeRatio(minOverlapRatio, 0.2, { min: 0, max: 0.8 }),
73
- preserve_coverage: preserveCoverage !== false,
74
- random: normalizeRandom(random)
75
- };
76
- }
77
-
78
- function resolveCoverageSafeScrollDelta({
79
- baseDelta,
80
- clipHeight,
81
- jitter
82
- } = {}) {
83
- const safeBase = Math.max(1, Number(baseDelta) || 650);
84
- if (!jitter?.enabled) {
85
- return {
86
- deltaY: safeBase,
87
- jittered: false,
88
- base_delta_y: safeBase
89
- };
90
- }
91
- const safeClipHeight = Math.max(1, Number(clipHeight) || 1);
92
- const maxDeltaForOverlap = Math.max(1, Math.floor(safeClipHeight * (1 - jitter.min_overlap_ratio)));
93
- const upper = Math.max(1, Math.min(Math.round(safeBase * jitter.max_ratio), maxDeltaForOverlap));
94
- const lower = Math.min(upper, Math.max(1, Math.round(safeBase * jitter.min_ratio)));
95
- const deltaY = Math.max(1, Math.round(randomBetween(jitter.random, lower, upper)));
96
- return {
97
- deltaY,
98
- jittered: true,
99
- base_delta_y: safeBase,
100
- min_delta_y: lower,
101
- max_delta_y: upper,
102
- min_ratio: jitter.min_ratio,
103
- max_ratio: jitter.max_ratio,
104
- min_overlap_ratio: jitter.min_overlap_ratio,
105
- clip_height: safeClipHeight,
106
- max_delta_for_overlap: maxDeltaForOverlap,
107
- preserve_coverage: jitter.preserve_coverage
108
- };
109
- }
110
-
111
- export async function captureNodeHtml(client, nodeId, {
112
- domain = "unknown",
113
- source = "dom",
114
- metadata = {}
115
- } = {}) {
116
- const [attributes, outerHTML] = await Promise.all([
117
- getAttributesMap(client, nodeId),
118
- getOuterHTML(client, nodeId)
119
- ]);
120
- const text = htmlToText(outerHTML);
121
- return {
122
- schema_version: 1,
123
- domain: normalizeText(domain) || "unknown",
124
- source,
125
- captured_at: nowIso(),
126
- node_id: nodeId,
127
- attributes,
128
- outer_html_length: outerHTML.length,
129
- text_length: text.length,
130
- text,
131
- outer_html: outerHTML,
132
- metadata
133
- };
134
- }
135
-
136
- export async function captureNodeScreenshot(client, nodeId, {
137
- filePath,
138
- format = "png",
139
- quality,
140
- padding = 0,
141
- captureBeyondViewport = true,
142
- fromSurface = true,
143
- metadata = {}
144
- } = {}) {
145
- const box = await getNodeBox(client, nodeId);
146
- const clip = withPadding(box.rect, padding);
147
- const captureOptions = {
148
- format,
149
- fromSurface,
150
- captureBeyondViewport,
151
- clip
152
- };
153
- if (quality != null) {
154
- captureOptions.quality = quality;
155
- }
156
- const screenshot = await client.Page.captureScreenshot(captureOptions);
157
- const buffer = Buffer.from(screenshot.data || "", "base64");
158
- const resolvedPath = resolveOutputPath(filePath);
159
- if (resolvedPath) {
160
- fs.writeFileSync(resolvedPath, buffer);
161
- }
162
- return {
163
- schema_version: 1,
164
- source: "image",
165
- captured_at: nowIso(),
166
- node_id: nodeId,
167
- format,
168
- mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
169
- byte_length: buffer.length,
170
- file_path: resolvedPath,
171
- clip,
172
- node_rect: box.rect,
173
- metadata
174
- };
175
- }
176
-
177
- export async function captureViewportScreenshot(client, {
178
- filePath,
179
- format = "png",
180
- quality,
181
- captureBeyondViewport = false,
182
- fromSurface = true,
183
- metadata = {}
184
- } = {}) {
185
- const captureOptions = {
186
- format,
187
- fromSurface,
188
- captureBeyondViewport
189
- };
190
- if (quality != null) {
191
- captureOptions.quality = quality;
192
- }
193
- const screenshot = await client.Page.captureScreenshot(captureOptions);
194
- const buffer = Buffer.from(screenshot.data || "", "base64");
195
- const resolvedPath = resolveOutputPath(filePath);
196
- if (resolvedPath) {
197
- fs.writeFileSync(resolvedPath, buffer);
198
- }
199
- return {
200
- schema_version: 1,
201
- source: "viewport-image",
202
- captured_at: nowIso(),
203
- format,
204
- mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
205
- byte_length: buffer.length,
206
- file_path: resolvedPath,
207
- capture_beyond_viewport: Boolean(captureBeyondViewport),
208
- metadata
209
- };
210
- }
211
-
212
- function filePathForSequence(basePath, index, extension) {
213
- const resolved = resolveOutputPath(basePath);
214
- if (!resolved) return null;
215
- const parsed = path.parse(resolved);
216
- const page = String(index + 1).padStart(2, "0");
217
- return path.join(parsed.dir, `${parsed.name}-page-${page}${parsed.ext || `.${extension}`}`);
218
- }
219
-
220
- function filePathForLlmSequence(basePath, index) {
221
- const resolved = resolveOutputPath(basePath);
222
- if (!resolved) return null;
223
- const parsed = path.parse(resolved);
224
- const page = String(index + 1).padStart(2, "0");
225
- return path.join(parsed.dir, `${parsed.name}-llm-${page}.jpg`);
226
- }
227
-
228
- function screenshotHash(buffer) {
229
- return crypto.createHash("sha256").update(buffer).digest("hex");
230
- }
231
-
232
- function createCaptureTimeoutError(label, timeoutMs) {
233
- const error = new Error(`Image fallback capture timed out during ${label} after ${timeoutMs}ms`);
234
- error.code = "IMAGE_CAPTURE_TIMEOUT";
235
- error.capture_step = label;
236
- error.timeout_ms = timeoutMs;
237
- return error;
238
- }
239
-
240
- async function withCaptureTimeout(promise, {
241
- label = "capture_step",
242
- timeoutMs = 0
243
- } = {}) {
244
- const safeTimeout = Math.max(0, Number(timeoutMs) || 0);
245
- if (!safeTimeout) return promise;
246
- let timer = null;
247
- try {
248
- return await Promise.race([
249
- promise,
250
- new Promise((_, reject) => {
251
- timer = setTimeout(() => reject(createCaptureTimeoutError(label, safeTimeout)), safeTimeout);
252
- })
253
- ]);
254
- } finally {
255
- if (timer) clearTimeout(timer);
256
- }
257
- }
258
-
259
- function assertCaptureTotalBudget(started, totalTimeoutMs, label) {
260
- const safeTimeout = Math.max(0, Number(totalTimeoutMs) || 0);
261
- if (!safeTimeout) return;
262
- const elapsed = Date.now() - started;
263
- if (elapsed <= safeTimeout) return;
264
- const error = createCaptureTimeoutError(label, safeTimeout);
265
- error.elapsed_ms = elapsed;
266
- error.code = "IMAGE_CAPTURE_TOTAL_TIMEOUT";
267
- throw error;
268
- }
269
-
270
- const DEFAULT_SCROLL_ANCHOR_SELECTOR = [
271
- "h1",
272
- "h2",
273
- "h3",
274
- "h4",
275
- "h5",
276
- "p",
277
- "li",
278
- "section",
279
- "article",
280
- "table",
281
- "tr",
282
- "dl",
283
- "dt",
284
- "dd",
285
- "[class*='resume']",
286
- "[class*='work']",
287
- "[class*='project']",
288
- "[class*='education']",
289
- "[class*='experience']",
290
- "[class*='item']",
291
- "div"
292
- ].join(",");
293
-
294
- function normalizeScrollMethod(value = "dom-anchor-fallback-input") {
295
- const normalized = normalizeText(value).toLowerCase();
296
- if (["dom", "dom-anchor", "dom_anchor", "anchor"].includes(normalized)) return "dom-anchor";
297
- if (["dom-anchor-fallback-input", "dom_anchor_fallback_input", "dom-fallback-input"].includes(normalized)) {
298
- return "dom-anchor-fallback-input";
299
- }
300
- return "input";
301
- }
302
-
303
- function uniqueNumbers(values = []) {
304
- return Array.from(new Set(values.map((value) => Number(value) || 0).filter(Boolean)));
305
- }
306
-
307
- function pickEvenly(items = [], limit = 1) {
308
- const safeLimit = Math.max(1, Number(limit) || 1);
309
- if (items.length <= safeLimit) return items;
310
- const picked = [];
311
- const last = items.length - 1;
312
- for (let index = 0; index < safeLimit; index += 1) {
313
- const sourceIndex = Math.round((index * last) / Math.max(1, safeLimit - 1));
314
- picked.push(items[sourceIndex]);
315
- }
316
- return Array.from(new Map(picked.map((item) => [item.node_id, item])).values());
317
- }
318
-
319
- function patternLabel(pattern) {
320
- if (pattern instanceof RegExp) return pattern.source;
321
- return normalizeText(pattern);
322
- }
323
-
324
- function stopBoundaryPatterns(patterns = []) {
325
- return (Array.isArray(patterns) ? patterns : [patterns])
326
- .filter(Boolean)
327
- .map((pattern) => {
328
- if (pattern instanceof RegExp) {
329
- return {
330
- raw: pattern,
331
- label: pattern.source,
332
- matches: (text) => pattern.test(text)
333
- };
334
- }
335
- const normalized = normalizeText(pattern);
336
- return {
337
- raw: pattern,
338
- label: normalized,
339
- matches: (text) => normalized && text.includes(normalized)
340
- };
341
- });
342
- }
343
-
344
- async function collectStopBoundaryNodes(client, rootNodeId, {
345
- selector = "",
346
- textPatterns = [],
347
- maxProbeNodes = 180,
348
- maxTextLength = 700,
349
- stepTimeoutMs = 45000
350
- } = {}) {
351
- const patterns = stopBoundaryPatterns(textPatterns);
352
- const normalizedSelector = normalizeText(selector);
353
- if (!normalizedSelector && !patterns.length) {
354
- return {
355
- enabled: false,
356
- ok: false,
357
- reason: "not_configured",
358
- nodes: []
359
- };
360
- }
361
- const started = Date.now();
362
- let nodeIds = [];
363
- try {
364
- nodeIds = uniqueNumbers(await querySelectorAll(
365
- client,
366
- rootNodeId,
367
- normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR
368
- ));
369
- } catch (error) {
370
- return {
371
- enabled: true,
372
- ok: false,
373
- reason: "query_selector_all_failed",
374
- selector: normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR,
375
- error: error?.message || String(error),
376
- nodes: []
377
- };
378
- }
379
-
380
- const probeLimit = Math.max(1, Number(maxProbeNodes) || 180);
381
- const maxStopTextLength = Math.max(40, Number(maxTextLength) || 700);
382
- const perNodeTimeoutMs = Math.min(1000, Math.max(200, Math.floor((Number(stepTimeoutMs) || 45000) / 40)));
383
- const nodes = [];
384
- for (const nodeId of nodeIds.slice(0, probeLimit)) {
385
- try {
386
- let text = "";
387
- let matchedPattern = null;
388
- if (patterns.length) {
389
- const outerHTML = await withCaptureTimeout(getOuterHTML(client, nodeId), {
390
- label: `stop_boundary_html_${nodeId}`,
391
- timeoutMs: perNodeTimeoutMs
392
- });
393
- text = normalizeText(htmlToText(outerHTML));
394
- if (!text || text.length > maxStopTextLength) continue;
395
- matchedPattern = patterns.find((pattern) => pattern.matches(text));
396
- if (!matchedPattern) continue;
397
- }
398
- nodes.push({
399
- node_id: nodeId,
400
- text_preview: text.slice(0, 120),
401
- matched_pattern: matchedPattern ? patternLabel(matchedPattern.raw) : null
402
- });
403
- } catch {}
404
- }
405
-
406
- return {
407
- enabled: true,
408
- ok: nodes.length > 0,
409
- reason: nodes.length ? null : "no_matching_stop_boundary_nodes",
410
- selector: normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR,
411
- elapsed_ms: Date.now() - started,
412
- discovered_node_count: nodeIds.length,
413
- probed_node_count: Math.min(nodeIds.length, probeLimit),
414
- match_count: nodes.length,
415
- pattern_labels: patterns.map((pattern) => pattern.label),
416
- nodes
417
- };
418
- }
419
-
420
- async function resolveVisibleStopBoundary(client, stopBoundaryPlan, clip, {
421
- topPadding = 8,
422
- minCaptureHeight = 180,
423
- stepTimeoutMs = 45000
424
- } = {}) {
425
- if (!stopBoundaryPlan?.nodes?.length || !clip) return null;
426
- const clipTop = Number(clip.y) || 0;
427
- const clipBottom = clipTop + (Number(clip.height) || 0);
428
- const safePadding = Math.max(0, Number(topPadding) || 0);
429
- const safeMinHeight = Math.max(1, Number(minCaptureHeight) || 180);
430
- const perNodeTimeoutMs = Math.min(900, Math.max(180, Math.floor((Number(stepTimeoutMs) || 45000) / 50)));
431
- const visible = [];
432
-
433
- for (const node of stopBoundaryPlan.nodes) {
434
- try {
435
- const box = await withCaptureTimeout(getNodeBox(client, node.node_id), {
436
- label: `stop_boundary_box_${node.node_id}`,
437
- timeoutMs: perNodeTimeoutMs
438
- });
439
- const rect = box?.rect || {};
440
- const width = Number(rect.width) || 0;
441
- const height = Number(rect.height) || 0;
442
- if (width < 40 || height < 6) continue;
443
- const top = Number(rect.y) || 0;
444
- const bottom = top + height;
445
- if (bottom <= clipTop + 1) {
446
- return {
447
- action: "stop_before_capture",
448
- reason: "stop_boundary_above_clip",
449
- node_id: node.node_id,
450
- matched_pattern: node.matched_pattern,
451
- text_preview: node.text_preview,
452
- rect,
453
- clip
454
- };
455
- }
456
- if (top < clipBottom && bottom > clipTop) {
457
- visible.push({
458
- ...node,
459
- rect,
460
- top,
461
- bottom
462
- });
463
- }
464
- } catch {}
465
- }
466
- if (!visible.length) return null;
467
-
468
- visible.sort((a, b) => a.top - b.top);
469
- const boundary = visible[0];
470
- const boundaryY = Math.max(clipTop, boundary.top - safePadding);
471
- const adjustedHeight = Math.max(0, boundaryY - clipTop);
472
- if (adjustedHeight < safeMinHeight) {
473
- return {
474
- action: "stop_before_capture",
475
- reason: "stop_boundary_near_clip_top",
476
- node_id: boundary.node_id,
477
- matched_pattern: boundary.matched_pattern,
478
- text_preview: boundary.text_preview,
479
- rect: boundary.rect,
480
- clip,
481
- adjusted_height: adjustedHeight,
482
- min_capture_height: safeMinHeight
483
- };
484
- }
485
-
486
- return {
487
- action: "capture_then_stop",
488
- reason: "stop_boundary_visible",
489
- node_id: boundary.node_id,
490
- matched_pattern: boundary.matched_pattern,
491
- text_preview: boundary.text_preview,
492
- rect: boundary.rect,
493
- clip,
494
- adjusted_clip: {
495
- ...clip,
496
- height: adjustedHeight
497
- },
498
- adjusted_height: adjustedHeight,
499
- min_capture_height: safeMinHeight
500
- };
501
- }
502
-
503
- async function collectDomScrollAnchors(client, rootNodeId, {
504
- selector = DEFAULT_SCROLL_ANCHOR_SELECTOR,
505
- maxScreenshots = 6,
506
- maxProbeNodes = 260,
507
- minAnchorGap = 180,
508
- stepTimeoutMs = 45000
509
- } = {}) {
510
- const started = Date.now();
511
- let nodeIds = [];
512
- try {
513
- nodeIds = uniqueNumbers(await querySelectorAll(client, rootNodeId, selector));
514
- } catch (error) {
515
- return {
516
- ok: false,
517
- method: "dom-anchor",
518
- reason: "query_selector_all_failed",
519
- error: error?.message || String(error)
520
- };
521
- }
522
- if (!nodeIds.length) {
523
- return {
524
- ok: false,
525
- method: "dom-anchor",
526
- reason: "no_anchor_nodes"
527
- };
528
- }
529
-
530
- const probeLimit = Math.max(1, Number(maxProbeNodes) || 260);
531
- const perNodeTimeoutMs = Math.min(1200, Math.max(250, Math.floor((Number(stepTimeoutMs) || 45000) / 30)));
532
- const measured = [];
533
- for (const nodeId of nodeIds.slice(0, probeLimit)) {
534
- try {
535
- const box = await withCaptureTimeout(getNodeBox(client, nodeId), {
536
- label: `anchor_box_${nodeId}`,
537
- timeoutMs: perNodeTimeoutMs
538
- });
539
- const rect = box?.rect || {};
540
- if ((Number(rect.width) || 0) < 80 || (Number(rect.height) || 0) < 8) continue;
541
- measured.push({
542
- node_id: nodeId,
543
- y: Math.round(Number(rect.y) || 0),
544
- height: Math.round(Number(rect.height) || 0)
545
- });
546
- } catch {}
547
- }
548
-
549
- let anchors = [];
550
- if (measured.length) {
551
- const sorted = measured.sort((a, b) => a.y - b.y);
552
- for (const item of sorted) {
553
- const last = anchors[anchors.length - 1];
554
- if (!last || Math.abs(item.y - last.y) >= Math.max(40, Number(minAnchorGap) || 180)) {
555
- anchors.push(item);
556
- }
557
- }
558
- }
559
-
560
- if (anchors.length < 2) {
561
- anchors = nodeIds.slice(0, probeLimit).map((nodeId, index) => ({
562
- node_id: nodeId,
563
- y: null,
564
- height: null,
565
- document_order: index
566
- }));
567
- }
568
-
569
- anchors = pickEvenly(anchors, Math.max(1, Number(maxScreenshots) || 1));
570
- return {
571
- ok: anchors.length > 0,
572
- method: "dom-anchor",
573
- elapsed_ms: Date.now() - started,
574
- selector,
575
- discovered_node_count: nodeIds.length,
576
- measured_node_count: measured.length,
577
- anchor_count: anchors.length,
578
- anchors
579
- };
580
- }
581
-
582
- async function scrollDomAnchorIntoView(client, nodeId, {
583
- timeoutMs = 10000,
584
- label = "dom_scroll_anchor"
585
- } = {}) {
586
- if (client.DOM && typeof client.DOM.scrollIntoViewIfNeeded === "function") {
587
- return withCaptureTimeout(client.DOM.scrollIntoViewIfNeeded({ nodeId }), { label, timeoutMs });
588
- }
589
- if (typeof client.send === "function") {
590
- return withCaptureTimeout(client.send("DOM.scrollIntoViewIfNeeded", { nodeId }), { label, timeoutMs });
591
- }
592
- throw new Error("CDP client does not expose DOM.scrollIntoViewIfNeeded");
593
- }
594
-
595
- async function optimizeScreenshotBuffer(buffer, {
596
- enabled = false,
597
- format = "png",
598
- quality,
599
- resizeMaxWidth = 0
600
- } = {}) {
601
- if (!enabled && !resizeMaxWidth) {
602
- return {
603
- buffer,
604
- optimized: false,
605
- optimization_error: null
606
- };
607
- }
608
- try {
609
- const normalizedFormat = format === "jpg" ? "jpeg" : format;
610
- let pipeline = sharp(buffer, { failOn: "none" });
611
- const metadata = await pipeline.metadata();
612
- const width = Number(metadata.width) || 0;
613
- const safeMaxWidth = Math.max(0, Number(resizeMaxWidth) || 0);
614
- if (safeMaxWidth > 0 && width > safeMaxWidth) {
615
- pipeline = pipeline.resize({
616
- width: safeMaxWidth,
617
- withoutEnlargement: true
618
- });
619
- }
620
- if (normalizedFormat === "jpeg") {
621
- pipeline = pipeline.jpeg({
622
- quality: quality == null ? 72 : Math.max(35, Math.min(95, Number(quality) || 72)),
623
- mozjpeg: true
624
- });
625
- } else if (normalizedFormat === "webp") {
626
- pipeline = pipeline.webp({
627
- quality: quality == null ? 76 : Math.max(35, Math.min(95, Number(quality) || 76))
628
- });
629
- } else {
630
- pipeline = pipeline.png({
631
- compressionLevel: 9,
632
- adaptiveFiltering: true
633
- });
634
- }
635
- const optimizedBuffer = await pipeline.toBuffer();
636
- return {
637
- buffer: optimizedBuffer,
638
- optimized: true,
639
- original_byte_length: buffer.length,
640
- optimization_error: null
641
- };
642
- } catch (error) {
643
- return {
644
- buffer,
645
- optimized: false,
646
- original_byte_length: buffer.length,
647
- optimization_error: error?.message || String(error)
648
- };
649
- }
650
- }
651
-
652
- async function composeScreenshotsForLlm(screenshots = [], {
653
- basePath,
654
- pagesPerImage = 3,
655
- resizeMaxWidth = 1100,
656
- quality = 72
657
- } = {}) {
658
- const fileScreenshots = screenshots.filter((item) => item?.file_path);
659
- if (!basePath || fileScreenshots.length <= 1) {
660
- return {
661
- llm_file_paths: fileScreenshots.map((item) => item.file_path),
662
- llm_screenshots: [],
663
- llm_total_byte_length: 0,
664
- llm_original_total_byte_length: 0,
665
- llm_composition_error: null
666
- };
667
- }
668
-
669
- const safePagesPerImage = Math.max(1, Math.min(5, Number(pagesPerImage) || 3));
670
- const safeWidth = Math.max(700, Math.min(1400, Number(resizeMaxWidth) || 1100));
671
- const safeQuality = Math.max(45, Math.min(90, Number(quality) || 72));
672
- const llmScreenshots = [];
673
-
674
- try {
675
- for (let index = 0; index < fileScreenshots.length; index += safePagesPerImage) {
676
- const group = fileScreenshots.slice(index, index + safePagesPerImage);
677
- const prepared = [];
678
- for (const item of group) {
679
- const sourceBuffer = fs.readFileSync(item.file_path);
680
- const { data, info } = await sharp(sourceBuffer, { failOn: "none" })
681
- .resize({
682
- width: safeWidth,
683
- withoutEnlargement: true
684
- })
685
- .jpeg({
686
- quality: safeQuality,
687
- mozjpeg: true
688
- })
689
- .toBuffer({ resolveWithObject: true });
690
- prepared.push({
691
- input: data,
692
- width: info.width,
693
- height: info.height,
694
- source_file_path: item.file_path
695
- });
696
- }
697
-
698
- const width = Math.max(...prepared.map((item) => item.width), 1);
699
- const height = prepared.reduce((sum, item) => sum + item.height, 0);
700
- let top = 0;
701
- const composites = prepared.map((item) => {
702
- const layer = {
703
- input: item.input,
704
- left: 0,
705
- top
706
- };
707
- top += item.height;
708
- return layer;
709
- });
710
- const outputBuffer = await sharp({
711
- create: {
712
- width,
713
- height,
714
- channels: 3,
715
- background: "#ffffff"
716
- }
717
- })
718
- .composite(composites)
719
- .jpeg({
720
- quality: safeQuality,
721
- mozjpeg: true
722
- })
723
- .toBuffer();
724
- const outputPath = filePathForLlmSequence(basePath, llmScreenshots.length);
725
- fs.writeFileSync(outputPath, outputBuffer);
726
- llmScreenshots.push({
727
- index: llmScreenshots.length,
728
- file_path: outputPath,
729
- byte_length: outputBuffer.length,
730
- source_file_paths: prepared.map((item) => item.source_file_path),
731
- source_page_count: prepared.length,
732
- width,
733
- height,
734
- format: "jpeg",
735
- mime_type: "image/jpeg"
736
- });
737
- }
738
- } catch (error) {
739
- return {
740
- llm_file_paths: fileScreenshots.map((item) => item.file_path),
741
- llm_screenshots: [],
742
- llm_total_byte_length: 0,
743
- llm_original_total_byte_length: fileScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
744
- llm_composition_error: error?.message || String(error)
745
- };
746
- }
747
-
748
- return {
749
- llm_file_paths: llmScreenshots.map((item) => item.file_path),
750
- llm_screenshots: llmScreenshots,
751
- llm_total_byte_length: llmScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
752
- llm_original_total_byte_length: fileScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
753
- llm_composition_error: null
754
- };
755
- }
756
-
757
- export async function captureScrolledNodeScreenshots(client, nodeId, {
758
- filePath,
759
- format = "png",
760
- quality,
761
- padding = 0,
762
- captureBeyondViewport = true,
763
- fromSurface = true,
764
- captureViewport = false,
765
- maxScreenshots = 6,
766
- wheelDeltaY = 650,
767
- settleMs = 900,
768
- duplicateStopCount = 2,
769
- skipDuplicateScreenshots = false,
770
- optimize = false,
771
- resizeMaxWidth = 0,
772
- composeForLlm = false,
773
- llmPagesPerImage = 3,
774
- llmResizeMaxWidth = 1100,
775
- llmQuality = 72,
776
- stepTimeoutMs = 45000,
777
- totalTimeoutMs = 90000,
778
- scrollMethod = "dom-anchor-fallback-input",
779
- scrollAnchorSelector = DEFAULT_SCROLL_ANCHOR_SELECTOR,
780
- scrollAnchorMaxProbeNodes = 260,
781
- scrollAnchorMinGap = 180,
782
- scrollDeltaJitterEnabled = false,
783
- scrollDeltaJitterMinRatio = 0.65,
784
- scrollDeltaJitterMaxRatio = 0.9,
785
- scrollDeltaJitterMinOverlapRatio = 0.2,
786
- scrollDeltaJitterPreserveCoverage = true,
787
- scrollDeltaJitterRandom = Math.random,
788
- stopBoundarySelector = "",
789
- stopBoundaryTextPatterns = [],
790
- stopBoundaryMaxProbeNodes = 180,
791
- stopBoundaryMaxTextLength = 700,
792
- stopBoundaryTopPadding = 8,
793
- stopBoundaryMinCaptureHeight = 180,
794
- metadata = {}
795
- } = {}) {
796
- if (!nodeId) throw new Error("captureScrolledNodeScreenshots requires nodeId");
797
- const sequenceStarted = Date.now();
798
- const normalizedScrollMethod = normalizeScrollMethod(scrollMethod);
799
- const maxScreenshotCount = Math.max(1, Number(maxScreenshots) || 1);
800
- const scrollDeltaJitter = normalizeScrollDeltaJitter({
801
- enabled: scrollDeltaJitterEnabled,
802
- minRatio: scrollDeltaJitterMinRatio,
803
- maxRatio: scrollDeltaJitterMaxRatio,
804
- minOverlapRatio: scrollDeltaJitterMinOverlapRatio,
805
- preserveCoverage: scrollDeltaJitterPreserveCoverage,
806
- random: scrollDeltaJitterRandom
807
- });
808
- const maxCaptureIterations = scrollDeltaJitter.enabled && scrollDeltaJitter.preserve_coverage
809
- ? Math.max(maxScreenshotCount, Math.ceil(maxScreenshotCount / scrollDeltaJitter.min_ratio))
810
- : maxScreenshotCount;
811
- const anchorPlan = normalizedScrollMethod !== "input"
812
- ? await collectDomScrollAnchors(client, nodeId, {
813
- selector: scrollAnchorSelector,
814
- maxScreenshots: maxCaptureIterations,
815
- maxProbeNodes: scrollAnchorMaxProbeNodes,
816
- minAnchorGap: scrollAnchorMinGap,
817
- stepTimeoutMs
818
- })
819
- : null;
820
- const stopBoundaryEnabled = Boolean(
821
- normalizeText(stopBoundarySelector)
822
- || (Array.isArray(stopBoundaryTextPatterns)
823
- ? stopBoundaryTextPatterns.length
824
- : stopBoundaryTextPatterns)
825
- );
826
- let stopBoundaryPlan = {
827
- enabled: false,
828
- ok: false,
829
- reason: "not_configured",
830
- nodes: []
831
- };
832
- const stopBoundaryChecks = [];
833
- const screenshots = [];
834
- let consecutiveDuplicates = 0;
835
- let previousHash = "";
836
- let captureCount = 0;
837
- let droppedDuplicateCount = 0;
838
- let forceInputScrollAfterDuplicate = false;
839
- let stopBoundaryResult = null;
840
- let currentScrollMetadata = {
841
- before_capture: "initial",
842
- method: normalizedScrollMethod,
843
- anchor_plan: anchorPlan
844
- ? {
845
- ok: Boolean(anchorPlan.ok),
846
- reason: anchorPlan.reason || null,
847
- discovered_node_count: anchorPlan.discovered_node_count || 0,
848
- measured_node_count: anchorPlan.measured_node_count || 0,
849
- anchor_count: anchorPlan.anchor_count || 0,
850
- elapsed_ms: anchorPlan.elapsed_ms || 0
851
- }
852
- : null
853
- };
854
-
855
- if (anchorPlan?.anchors?.[0]?.node_id && normalizedScrollMethod !== "input") {
856
- try {
857
- await scrollDomAnchorIntoView(client, anchorPlan.anchors[0].node_id, {
858
- label: "scroll_dom_anchor_initial",
859
- timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
860
- });
861
- currentScrollMetadata = {
862
- before_capture: "dom_anchor_initial",
863
- method: "DOM.scrollIntoViewIfNeeded",
864
- anchor_node_id: anchorPlan.anchors[0].node_id,
865
- anchor_y: anchorPlan.anchors[0].y,
866
- anchor_height: anchorPlan.anchors[0].height,
867
- anchor_plan: currentScrollMetadata.anchor_plan
868
- };
869
- } catch (error) {
870
- if (normalizedScrollMethod === "dom-anchor") {
871
- throw error;
872
- }
873
- currentScrollMetadata = {
874
- before_capture: "dom_anchor_initial_failed",
875
- method: "DOM.scrollIntoViewIfNeeded",
876
- anchor_node_id: anchorPlan.anchors[0].node_id,
877
- error: error?.message || String(error),
878
- anchor_plan: currentScrollMetadata.anchor_plan
879
- };
880
- }
881
- }
882
-
883
- for (let index = 0; index < maxCaptureIterations; index += 1) {
884
- assertCaptureTotalBudget(sequenceStarted, totalTimeoutMs, `capture_page_${index + 1}`);
885
- captureCount += 1;
886
- const captureStarted = Date.now();
887
- const box = await withCaptureTimeout(getNodeBox(client, nodeId), {
888
- label: `get_box_${index + 1}`,
889
- timeoutMs: stepTimeoutMs
890
- });
891
- const clip = withPadding(box.rect, padding);
892
- let visibleStopBoundary = null;
893
- if (stopBoundaryEnabled) {
894
- stopBoundaryPlan = await collectStopBoundaryNodes(client, nodeId, {
895
- selector: stopBoundarySelector,
896
- textPatterns: stopBoundaryTextPatterns,
897
- maxProbeNodes: stopBoundaryMaxProbeNodes,
898
- maxTextLength: stopBoundaryMaxTextLength,
899
- stepTimeoutMs
900
- });
901
- stopBoundaryChecks.push({
902
- capture_index: index,
903
- ok: Boolean(stopBoundaryPlan.ok),
904
- reason: stopBoundaryPlan.reason || null,
905
- discovered_node_count: stopBoundaryPlan.discovered_node_count || 0,
906
- probed_node_count: stopBoundaryPlan.probed_node_count || 0,
907
- match_count: stopBoundaryPlan.match_count || 0,
908
- elapsed_ms: stopBoundaryPlan.elapsed_ms || 0
909
- });
910
- visibleStopBoundary = await resolveVisibleStopBoundary(client, stopBoundaryPlan, clip, {
911
- topPadding: stopBoundaryTopPadding,
912
- minCaptureHeight: stopBoundaryMinCaptureHeight,
913
- stepTimeoutMs
914
- });
915
- }
916
- if (visibleStopBoundary?.action === "stop_before_capture") {
917
- stopBoundaryResult = visibleStopBoundary;
918
- break;
919
- }
920
- const effectiveClip = visibleStopBoundary?.adjusted_clip || clip;
921
- const effectiveCaptureViewport = Boolean(captureViewport && !visibleStopBoundary?.adjusted_clip);
922
- const captureOptions = effectiveCaptureViewport ? {
923
- format,
924
- fromSurface,
925
- captureBeyondViewport: false
926
- } : {
927
- format,
928
- fromSurface,
929
- captureBeyondViewport,
930
- clip: effectiveClip
931
- };
932
- if (quality != null) {
933
- captureOptions.quality = quality;
934
- }
935
- const screenshot = await withCaptureTimeout(client.Page.captureScreenshot(captureOptions), {
936
- label: `capture_screenshot_${index + 1}`,
937
- timeoutMs: stepTimeoutMs
938
- });
939
- const originalBuffer = Buffer.from(screenshot.data || "", "base64");
940
- const optimized = await withCaptureTimeout(optimizeScreenshotBuffer(originalBuffer, {
941
- enabled: optimize,
942
- format,
943
- quality,
944
- resizeMaxWidth
945
- }), {
946
- label: `optimize_screenshot_${index + 1}`,
947
- timeoutMs: stepTimeoutMs
948
- });
949
- const buffer = optimized.buffer;
950
- const hash = screenshotHash(buffer);
951
- const duplicateOfPrevious = previousHash && previousHash === hash;
952
- if (duplicateOfPrevious) {
953
- consecutiveDuplicates += 1;
954
- } else {
955
- consecutiveDuplicates = 0;
956
- }
957
-
958
- let outputPath = null;
959
- if (duplicateOfPrevious && skipDuplicateScreenshots) {
960
- droppedDuplicateCount += 1;
961
- } else {
962
- outputPath = filePath ? filePathForSequence(filePath, screenshots.length, format) : null;
963
- if (outputPath) {
964
- fs.writeFileSync(outputPath, buffer);
965
- }
966
-
967
- screenshots.push({
968
- index: screenshots.length,
969
- capture_index: index,
970
- source: "image",
971
- captured_at: nowIso(),
972
- node_id: nodeId,
973
- format,
974
- mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
975
- byte_length: buffer.length,
976
- original_byte_length: optimized.original_byte_length || originalBuffer.length,
977
- optimized: Boolean(optimized.optimized),
978
- optimization_error: optimized.optimization_error || null,
979
- elapsed_ms: Date.now() - captureStarted,
980
- file_path: outputPath,
981
- sha256: hash,
982
- duplicate_of_previous: Boolean(duplicateOfPrevious),
983
- clip: effectiveClip,
984
- capture_viewport: effectiveCaptureViewport,
985
- node_rect: box.rect,
986
- scroll: currentScrollMetadata,
987
- stop_boundary: visibleStopBoundary || null,
988
- metadata
989
- });
990
- }
991
-
992
- if (visibleStopBoundary?.action === "capture_then_stop") {
993
- stopBoundaryResult = visibleStopBoundary;
994
- break;
995
- }
996
-
997
- previousHash = hash;
998
- forceInputScrollAfterDuplicate = Boolean(
999
- duplicateOfPrevious
1000
- && normalizedScrollMethod === "dom-anchor-fallback-input"
1001
- && currentScrollMetadata?.method === "DOM.scrollIntoViewIfNeeded"
1002
- );
1003
- if (
1004
- consecutiveDuplicates >= Math.max(1, Number(duplicateStopCount) || 1)
1005
- && !forceInputScrollAfterDuplicate
1006
- ) {
1007
- break;
1008
- }
1009
-
1010
- if (index < maxCaptureIterations - 1) {
1011
- assertCaptureTotalBudget(sequenceStarted, totalTimeoutMs, `scroll_after_page_${index + 1}`);
1012
- let scrolledByDomAnchor = false;
1013
- const nextAnchor = anchorPlan?.anchors?.[index + 1] || null;
1014
- if (nextAnchor?.node_id && normalizedScrollMethod !== "input" && !forceInputScrollAfterDuplicate) {
1015
- try {
1016
- await scrollDomAnchorIntoView(client, nextAnchor.node_id, {
1017
- label: `scroll_dom_anchor_${index + 1}`,
1018
- timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
1019
- });
1020
- scrolledByDomAnchor = true;
1021
- currentScrollMetadata = {
1022
- before_capture: `dom_anchor_${index + 1}`,
1023
- method: "DOM.scrollIntoViewIfNeeded",
1024
- anchor_node_id: nextAnchor.node_id,
1025
- anchor_y: nextAnchor.y,
1026
- anchor_height: nextAnchor.height
1027
- };
1028
- } catch (error) {
1029
- if (normalizedScrollMethod === "dom-anchor") {
1030
- throw error;
1031
- }
1032
- currentScrollMetadata = {
1033
- before_capture: `dom_anchor_${index + 1}_failed`,
1034
- method: "DOM.scrollIntoViewIfNeeded",
1035
- anchor_node_id: nextAnchor.node_id,
1036
- error: error?.message || String(error)
1037
- };
1038
- }
1039
- } else if (normalizedScrollMethod === "dom-anchor") {
1040
- break;
1041
- }
1042
-
1043
- if (!scrolledByDomAnchor && normalizedScrollMethod !== "dom-anchor") {
1044
- const x = box.center.x;
1045
- const y = box.center.y;
1046
- const scrollDelta = resolveCoverageSafeScrollDelta({
1047
- baseDelta: wheelDeltaY,
1048
- clipHeight: effectiveClip.height,
1049
- jitter: scrollDeltaJitter
1050
- });
1051
- await withCaptureTimeout(client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y, button: "none" }), {
1052
- label: `scroll_mouse_move_${index + 1}`,
1053
- timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
1054
- });
1055
- await withCaptureTimeout(client.Input.dispatchMouseEvent({
1056
- type: "mouseWheel",
1057
- x,
1058
- y,
1059
- deltaX: 0,
1060
- deltaY: scrollDelta.deltaY
1061
- }), {
1062
- label: `scroll_wheel_${index + 1}`,
1063
- timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
1064
- });
1065
- currentScrollMetadata = {
1066
- before_capture: `wheel_down_${index + 1}`,
1067
- method: "Input.dispatchMouseEvent",
1068
- fallback_from_dom_anchor: Boolean(anchorPlan && normalizedScrollMethod === "dom-anchor-fallback-input"),
1069
- wheel_delta_y: scrollDelta.deltaY,
1070
- wheel_delta_base_y: scrollDelta.base_delta_y,
1071
- wheel_delta_jitter: scrollDelta.jittered ? scrollDelta : null
1072
- };
1073
- }
1074
- if (settleMs > 0) await sleep(settleMs);
1075
- }
1076
- }
1077
-
1078
- const llmComposition = composeForLlm
1079
- ? await withCaptureTimeout(composeScreenshotsForLlm(screenshots, {
1080
- basePath: filePath,
1081
- pagesPerImage: llmPagesPerImage,
1082
- resizeMaxWidth: llmResizeMaxWidth,
1083
- quality: llmQuality
1084
- }), {
1085
- label: "compose_llm_screenshots",
1086
- timeoutMs: stepTimeoutMs
1087
- })
1088
- : {
1089
- llm_file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
1090
- llm_screenshots: [],
1091
- llm_total_byte_length: 0,
1092
- llm_original_total_byte_length: 0,
1093
- llm_composition_error: null
1094
- };
1095
-
1096
- return {
1097
- schema_version: 1,
1098
- ok: true,
1099
- source: "image-scroll-sequence",
1100
- captured_at: nowIso(),
1101
- node_id: nodeId,
1102
- elapsed_ms: Date.now() - sequenceStarted,
1103
- capture_count: captureCount,
1104
- screenshot_count: screenshots.length,
1105
- unique_screenshot_count: new Set(screenshots.map((item) => item.sha256)).size,
1106
- duplicate_screenshot_count: captureCount - new Set(screenshots.map((item) => item.sha256)).size,
1107
- dropped_duplicate_count: droppedDuplicateCount,
1108
- total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
1109
- original_total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.original_byte_length) || 0), 0),
1110
- llm_file_paths: llmComposition.llm_file_paths,
1111
- llm_screenshot_count: llmComposition.llm_file_paths.length,
1112
- llm_total_byte_length: llmComposition.llm_total_byte_length,
1113
- llm_original_total_byte_length: llmComposition.llm_original_total_byte_length,
1114
- llm_composition_error: llmComposition.llm_composition_error,
1115
- llm_screenshots: llmComposition.llm_screenshots,
1116
- optimization: {
1117
- enabled: Boolean(optimize),
1118
- resize_max_width: Math.max(0, Number(resizeMaxWidth) || 0),
1119
- capture_viewport: Boolean(captureViewport),
1120
- format,
1121
- quality: quality ?? null,
1122
- llm_compose_enabled: Boolean(composeForLlm),
1123
- llm_pages_per_image: Math.max(1, Math.min(5, Number(llmPagesPerImage) || 3)),
1124
- llm_resize_max_width: Math.max(0, Number(llmResizeMaxWidth) || 0),
1125
- llm_quality: llmQuality ?? null,
1126
- step_timeout_ms: Math.max(0, Number(stepTimeoutMs) || 0),
1127
- total_timeout_ms: Math.max(0, Number(totalTimeoutMs) || 0),
1128
- scroll_method: normalizedScrollMethod,
1129
- requested_max_screenshots: maxScreenshotCount,
1130
- effective_max_screenshots: maxCaptureIterations,
1131
- scroll_anchor_selector: scrollAnchorSelector,
1132
- scroll_anchor_max_probe_nodes: Math.max(1, Number(scrollAnchorMaxProbeNodes) || 260),
1133
- scroll_anchor_min_gap: Math.max(0, Number(scrollAnchorMinGap) || 0),
1134
- scroll_delta_jitter: {
1135
- enabled: scrollDeltaJitter.enabled,
1136
- min_ratio: scrollDeltaJitter.min_ratio,
1137
- max_ratio: scrollDeltaJitter.max_ratio,
1138
- min_overlap_ratio: scrollDeltaJitter.min_overlap_ratio,
1139
- preserve_coverage: scrollDeltaJitter.preserve_coverage
1140
- }
1141
- },
1142
- scroll_anchor_plan: anchorPlan,
1143
- stop_boundary_plan: stopBoundaryPlan,
1144
- stop_boundary_checks: stopBoundaryChecks,
1145
- stop_boundary_result: stopBoundaryResult,
1146
- file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
1147
- screenshots,
1148
- metadata
1149
- };
1150
- }
1151
-
1152
- export async function captureCandidateEvidence(client, {
1153
- nodeId,
1154
- domain = "unknown",
1155
- source = "dom",
1156
- screenshotPath,
1157
- includeHtml = true,
1158
- includeScreenshot = false,
1159
- screenshotMode = "scroll",
1160
- screenshotOptions = {},
1161
- metadata = {}
1162
- } = {}) {
1163
- if (!nodeId) throw new Error("captureCandidateEvidence requires nodeId");
1164
- const evidence = {
1165
- schema_version: 1,
1166
- domain: normalizeText(domain) || "unknown",
1167
- source,
1168
- captured_at: nowIso(),
1169
- node_id: nodeId,
1170
- html: null,
1171
- image: null,
1172
- metadata
1173
- };
1174
- if (includeHtml) {
1175
- evidence.html = await captureNodeHtml(client, nodeId, {
1176
- domain,
1177
- source: "dom",
1178
- metadata
1179
- });
1180
- }
1181
- if (includeScreenshot) {
1182
- evidence.image = screenshotMode === "single"
1183
- ? await captureNodeScreenshot(client, nodeId, {
1184
- ...screenshotOptions,
1185
- filePath: screenshotPath,
1186
- metadata: {
1187
- ...metadata,
1188
- capture_mode: "single_visible_clip"
1189
- }
1190
- })
1191
- : await captureScrolledNodeScreenshots(client, nodeId, {
1192
- ...screenshotOptions,
1193
- filePath: screenshotPath,
1194
- metadata: {
1195
- ...metadata,
1196
- capture_mode: "scroll_sequence"
1197
- }
1198
- });
1199
- }
1200
- return evidence;
1201
- }
1
+ import fs from "node:fs";
2
+ import crypto from "node:crypto";
3
+ import path from "node:path";
4
+ import sharp from "sharp";
5
+ import {
6
+ getAttributesMap,
7
+ getNodeBox,
8
+ getOuterHTML,
9
+ querySelectorAll,
10
+ sleep
11
+ } from "../browser/index.js";
12
+ import {
13
+ htmlToText,
14
+ normalizeText
15
+ } from "../screening/index.js";
16
+
17
+ function nowIso() {
18
+ return new Date().toISOString();
19
+ }
20
+
21
+ function resolveOutputPath(filePath) {
22
+ if (!filePath) return null;
23
+ const resolved = path.resolve(filePath);
24
+ fs.mkdirSync(path.dirname(resolved), { recursive: true });
25
+ return resolved;
26
+ }
27
+
28
+ function withPadding(rect, padding = 0) {
29
+ const safePadding = Math.max(0, Number(padding) || 0);
30
+ const x = Math.max(0, rect.x - safePadding);
31
+ const y = Math.max(0, rect.y - safePadding);
32
+ return {
33
+ x,
34
+ y,
35
+ width: Math.max(1, rect.width + safePadding * 2 - (rect.x - x)),
36
+ height: Math.max(1, rect.height + safePadding * 2 - (rect.y - y)),
37
+ scale: 1
38
+ };
39
+ }
40
+
41
+ function normalizeRandom(random) {
42
+ return typeof random === "function" ? random : Math.random;
43
+ }
44
+
45
+ function randomBetween(random, min, max) {
46
+ const lower = Number(min) || 0;
47
+ const upper = Number(max) || lower;
48
+ if (upper <= lower) return lower;
49
+ return lower + normalizeRandom(random)() * (upper - lower);
50
+ }
51
+
52
+ function normalizeRatio(raw, fallback, { min = 0, max = 1 } = {}) {
53
+ const parsed = Number(raw);
54
+ const value = Number.isFinite(parsed) ? parsed : fallback;
55
+ return Math.min(max, Math.max(min, value));
56
+ }
57
+
58
+ function normalizeScrollDeltaJitter({
59
+ enabled = false,
60
+ minRatio = 0.65,
61
+ maxRatio = 0.9,
62
+ minOverlapRatio = 0.2,
63
+ preserveCoverage = true,
64
+ random = Math.random
65
+ } = {}) {
66
+ const safeMinRatio = normalizeRatio(minRatio, 0.65, { min: 0.1, max: 1 });
67
+ const safeMaxRatio = Math.max(safeMinRatio, normalizeRatio(maxRatio, 0.9, { min: safeMinRatio, max: 1 }));
68
+ return {
69
+ enabled: enabled === true,
70
+ min_ratio: safeMinRatio,
71
+ max_ratio: safeMaxRatio,
72
+ min_overlap_ratio: normalizeRatio(minOverlapRatio, 0.2, { min: 0, max: 0.8 }),
73
+ preserve_coverage: preserveCoverage !== false,
74
+ random: normalizeRandom(random)
75
+ };
76
+ }
77
+
78
+ function resolveCoverageSafeScrollDelta({
79
+ baseDelta,
80
+ clipHeight,
81
+ jitter
82
+ } = {}) {
83
+ const safeBase = Math.max(1, Number(baseDelta) || 650);
84
+ if (!jitter?.enabled) {
85
+ return {
86
+ deltaY: safeBase,
87
+ jittered: false,
88
+ base_delta_y: safeBase
89
+ };
90
+ }
91
+ const safeClipHeight = Math.max(1, Number(clipHeight) || 1);
92
+ const maxDeltaForOverlap = Math.max(1, Math.floor(safeClipHeight * (1 - jitter.min_overlap_ratio)));
93
+ const upper = Math.max(1, Math.min(Math.round(safeBase * jitter.max_ratio), maxDeltaForOverlap));
94
+ const lower = Math.min(upper, Math.max(1, Math.round(safeBase * jitter.min_ratio)));
95
+ const deltaY = Math.max(1, Math.round(randomBetween(jitter.random, lower, upper)));
96
+ return {
97
+ deltaY,
98
+ jittered: true,
99
+ base_delta_y: safeBase,
100
+ min_delta_y: lower,
101
+ max_delta_y: upper,
102
+ min_ratio: jitter.min_ratio,
103
+ max_ratio: jitter.max_ratio,
104
+ min_overlap_ratio: jitter.min_overlap_ratio,
105
+ clip_height: safeClipHeight,
106
+ max_delta_for_overlap: maxDeltaForOverlap,
107
+ preserve_coverage: jitter.preserve_coverage
108
+ };
109
+ }
110
+
111
+ export async function captureNodeHtml(client, nodeId, {
112
+ domain = "unknown",
113
+ source = "dom",
114
+ metadata = {}
115
+ } = {}) {
116
+ const [attributes, outerHTML] = await Promise.all([
117
+ getAttributesMap(client, nodeId),
118
+ getOuterHTML(client, nodeId)
119
+ ]);
120
+ const text = htmlToText(outerHTML);
121
+ return {
122
+ schema_version: 1,
123
+ domain: normalizeText(domain) || "unknown",
124
+ source,
125
+ captured_at: nowIso(),
126
+ node_id: nodeId,
127
+ attributes,
128
+ outer_html_length: outerHTML.length,
129
+ text_length: text.length,
130
+ text,
131
+ outer_html: outerHTML,
132
+ metadata
133
+ };
134
+ }
135
+
136
+ export async function captureNodeScreenshot(client, nodeId, {
137
+ filePath,
138
+ format = "png",
139
+ quality,
140
+ padding = 0,
141
+ captureBeyondViewport = true,
142
+ fromSurface = true,
143
+ metadata = {}
144
+ } = {}) {
145
+ const box = await getNodeBox(client, nodeId);
146
+ const clip = withPadding(box.rect, padding);
147
+ const captureOptions = {
148
+ format,
149
+ fromSurface,
150
+ captureBeyondViewport,
151
+ clip
152
+ };
153
+ if (quality != null) {
154
+ captureOptions.quality = quality;
155
+ }
156
+ const screenshot = await client.Page.captureScreenshot(captureOptions);
157
+ const buffer = Buffer.from(screenshot.data || "", "base64");
158
+ const resolvedPath = resolveOutputPath(filePath);
159
+ if (resolvedPath) {
160
+ fs.writeFileSync(resolvedPath, buffer);
161
+ }
162
+ return {
163
+ schema_version: 1,
164
+ source: "image",
165
+ captured_at: nowIso(),
166
+ node_id: nodeId,
167
+ format,
168
+ mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
169
+ byte_length: buffer.length,
170
+ file_path: resolvedPath,
171
+ clip,
172
+ node_rect: box.rect,
173
+ metadata
174
+ };
175
+ }
176
+
177
+ export async function captureViewportScreenshot(client, {
178
+ filePath,
179
+ format = "png",
180
+ quality,
181
+ captureBeyondViewport = false,
182
+ fromSurface = true,
183
+ metadata = {}
184
+ } = {}) {
185
+ const captureOptions = {
186
+ format,
187
+ fromSurface,
188
+ captureBeyondViewport
189
+ };
190
+ if (quality != null) {
191
+ captureOptions.quality = quality;
192
+ }
193
+ const screenshot = await client.Page.captureScreenshot(captureOptions);
194
+ const buffer = Buffer.from(screenshot.data || "", "base64");
195
+ const resolvedPath = resolveOutputPath(filePath);
196
+ if (resolvedPath) {
197
+ fs.writeFileSync(resolvedPath, buffer);
198
+ }
199
+ return {
200
+ schema_version: 1,
201
+ source: "viewport-image",
202
+ captured_at: nowIso(),
203
+ format,
204
+ mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
205
+ byte_length: buffer.length,
206
+ file_path: resolvedPath,
207
+ capture_beyond_viewport: Boolean(captureBeyondViewport),
208
+ metadata
209
+ };
210
+ }
211
+
212
+ function filePathForSequence(basePath, index, extension) {
213
+ const resolved = resolveOutputPath(basePath);
214
+ if (!resolved) return null;
215
+ const parsed = path.parse(resolved);
216
+ const page = String(index + 1).padStart(2, "0");
217
+ return path.join(parsed.dir, `${parsed.name}-page-${page}${parsed.ext || `.${extension}`}`);
218
+ }
219
+
220
+ function filePathForLlmSequence(basePath, index) {
221
+ const resolved = resolveOutputPath(basePath);
222
+ if (!resolved) return null;
223
+ const parsed = path.parse(resolved);
224
+ const page = String(index + 1).padStart(2, "0");
225
+ return path.join(parsed.dir, `${parsed.name}-llm-${page}.jpg`);
226
+ }
227
+
228
+ function screenshotHash(buffer) {
229
+ return crypto.createHash("sha256").update(buffer).digest("hex");
230
+ }
231
+
232
+ function createCaptureTimeoutError(label, timeoutMs) {
233
+ const error = new Error(`Image fallback capture timed out during ${label} after ${timeoutMs}ms`);
234
+ error.code = "IMAGE_CAPTURE_TIMEOUT";
235
+ error.capture_step = label;
236
+ error.timeout_ms = timeoutMs;
237
+ return error;
238
+ }
239
+
240
+ async function withCaptureTimeout(promise, {
241
+ label = "capture_step",
242
+ timeoutMs = 0
243
+ } = {}) {
244
+ const safeTimeout = Math.max(0, Number(timeoutMs) || 0);
245
+ if (!safeTimeout) return promise;
246
+ let timer = null;
247
+ try {
248
+ return await Promise.race([
249
+ promise,
250
+ new Promise((_, reject) => {
251
+ timer = setTimeout(() => reject(createCaptureTimeoutError(label, safeTimeout)), safeTimeout);
252
+ })
253
+ ]);
254
+ } finally {
255
+ if (timer) clearTimeout(timer);
256
+ }
257
+ }
258
+
259
+ function assertCaptureTotalBudget(started, totalTimeoutMs, label) {
260
+ const safeTimeout = Math.max(0, Number(totalTimeoutMs) || 0);
261
+ if (!safeTimeout) return;
262
+ const elapsed = Date.now() - started;
263
+ if (elapsed <= safeTimeout) return;
264
+ const error = createCaptureTimeoutError(label, safeTimeout);
265
+ error.elapsed_ms = elapsed;
266
+ error.code = "IMAGE_CAPTURE_TOTAL_TIMEOUT";
267
+ throw error;
268
+ }
269
+
270
+ const DEFAULT_SCROLL_ANCHOR_SELECTOR = [
271
+ "h1",
272
+ "h2",
273
+ "h3",
274
+ "h4",
275
+ "h5",
276
+ "p",
277
+ "li",
278
+ "section",
279
+ "article",
280
+ "table",
281
+ "tr",
282
+ "dl",
283
+ "dt",
284
+ "dd",
285
+ "[class*='resume']",
286
+ "[class*='work']",
287
+ "[class*='project']",
288
+ "[class*='education']",
289
+ "[class*='experience']",
290
+ "[class*='item']",
291
+ "div"
292
+ ].join(",");
293
+
294
+ function normalizeScrollMethod(value = "dom-anchor-fallback-input") {
295
+ const normalized = normalizeText(value).toLowerCase();
296
+ if (["dom", "dom-anchor", "dom_anchor", "anchor"].includes(normalized)) return "dom-anchor";
297
+ if (["dom-anchor-fallback-input", "dom_anchor_fallback_input", "dom-fallback-input"].includes(normalized)) {
298
+ return "dom-anchor-fallback-input";
299
+ }
300
+ return "input";
301
+ }
302
+
303
+ function uniqueNumbers(values = []) {
304
+ return Array.from(new Set(values.map((value) => Number(value) || 0).filter(Boolean)));
305
+ }
306
+
307
+ function pickEvenly(items = [], limit = 1) {
308
+ const safeLimit = Math.max(1, Number(limit) || 1);
309
+ if (items.length <= safeLimit) return items;
310
+ const picked = [];
311
+ const last = items.length - 1;
312
+ for (let index = 0; index < safeLimit; index += 1) {
313
+ const sourceIndex = Math.round((index * last) / Math.max(1, safeLimit - 1));
314
+ picked.push(items[sourceIndex]);
315
+ }
316
+ return Array.from(new Map(picked.map((item) => [item.node_id, item])).values());
317
+ }
318
+
319
+ function patternLabel(pattern) {
320
+ if (pattern instanceof RegExp) return pattern.source;
321
+ return normalizeText(pattern);
322
+ }
323
+
324
+ function stopBoundaryPatterns(patterns = []) {
325
+ return (Array.isArray(patterns) ? patterns : [patterns])
326
+ .filter(Boolean)
327
+ .map((pattern) => {
328
+ if (pattern instanceof RegExp) {
329
+ return {
330
+ raw: pattern,
331
+ label: pattern.source,
332
+ matches: (text) => pattern.test(text)
333
+ };
334
+ }
335
+ const normalized = normalizeText(pattern);
336
+ return {
337
+ raw: pattern,
338
+ label: normalized,
339
+ matches: (text) => normalized && text.includes(normalized)
340
+ };
341
+ });
342
+ }
343
+
344
+ async function collectStopBoundaryNodes(client, rootNodeId, {
345
+ selector = "",
346
+ textPatterns = [],
347
+ maxProbeNodes = 180,
348
+ maxTextLength = 700,
349
+ stepTimeoutMs = 45000
350
+ } = {}) {
351
+ const patterns = stopBoundaryPatterns(textPatterns);
352
+ const normalizedSelector = normalizeText(selector);
353
+ if (!normalizedSelector && !patterns.length) {
354
+ return {
355
+ enabled: false,
356
+ ok: false,
357
+ reason: "not_configured",
358
+ nodes: []
359
+ };
360
+ }
361
+ const started = Date.now();
362
+ let nodeIds = [];
363
+ try {
364
+ nodeIds = uniqueNumbers(await querySelectorAll(
365
+ client,
366
+ rootNodeId,
367
+ normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR
368
+ ));
369
+ } catch (error) {
370
+ return {
371
+ enabled: true,
372
+ ok: false,
373
+ reason: "query_selector_all_failed",
374
+ selector: normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR,
375
+ error: error?.message || String(error),
376
+ nodes: []
377
+ };
378
+ }
379
+
380
+ const probeLimit = Math.max(1, Number(maxProbeNodes) || 180);
381
+ const maxStopTextLength = Math.max(40, Number(maxTextLength) || 700);
382
+ const perNodeTimeoutMs = Math.min(1000, Math.max(200, Math.floor((Number(stepTimeoutMs) || 45000) / 40)));
383
+ const nodes = [];
384
+ for (const nodeId of nodeIds.slice(0, probeLimit)) {
385
+ try {
386
+ let text = "";
387
+ let matchedPattern = null;
388
+ if (patterns.length) {
389
+ const outerHTML = await withCaptureTimeout(getOuterHTML(client, nodeId), {
390
+ label: `stop_boundary_html_${nodeId}`,
391
+ timeoutMs: perNodeTimeoutMs
392
+ });
393
+ text = normalizeText(htmlToText(outerHTML));
394
+ if (!text || text.length > maxStopTextLength) continue;
395
+ matchedPattern = patterns.find((pattern) => pattern.matches(text));
396
+ if (!matchedPattern) continue;
397
+ }
398
+ nodes.push({
399
+ node_id: nodeId,
400
+ text_preview: text.slice(0, 120),
401
+ matched_pattern: matchedPattern ? patternLabel(matchedPattern.raw) : null
402
+ });
403
+ } catch {}
404
+ }
405
+
406
+ return {
407
+ enabled: true,
408
+ ok: nodes.length > 0,
409
+ reason: nodes.length ? null : "no_matching_stop_boundary_nodes",
410
+ selector: normalizedSelector || DEFAULT_SCROLL_ANCHOR_SELECTOR,
411
+ elapsed_ms: Date.now() - started,
412
+ discovered_node_count: nodeIds.length,
413
+ probed_node_count: Math.min(nodeIds.length, probeLimit),
414
+ match_count: nodes.length,
415
+ pattern_labels: patterns.map((pattern) => pattern.label),
416
+ nodes
417
+ };
418
+ }
419
+
420
+ async function resolveVisibleStopBoundary(client, stopBoundaryPlan, clip, {
421
+ topPadding = 8,
422
+ minCaptureHeight = 180,
423
+ stepTimeoutMs = 45000
424
+ } = {}) {
425
+ if (!stopBoundaryPlan?.nodes?.length || !clip) return null;
426
+ const clipTop = Number(clip.y) || 0;
427
+ const clipBottom = clipTop + (Number(clip.height) || 0);
428
+ const safePadding = Math.max(0, Number(topPadding) || 0);
429
+ const safeMinHeight = Math.max(1, Number(minCaptureHeight) || 180);
430
+ const perNodeTimeoutMs = Math.min(900, Math.max(180, Math.floor((Number(stepTimeoutMs) || 45000) / 50)));
431
+ const visible = [];
432
+
433
+ for (const node of stopBoundaryPlan.nodes) {
434
+ try {
435
+ const box = await withCaptureTimeout(getNodeBox(client, node.node_id), {
436
+ label: `stop_boundary_box_${node.node_id}`,
437
+ timeoutMs: perNodeTimeoutMs
438
+ });
439
+ const rect = box?.rect || {};
440
+ const width = Number(rect.width) || 0;
441
+ const height = Number(rect.height) || 0;
442
+ if (width < 40 || height < 6) continue;
443
+ const top = Number(rect.y) || 0;
444
+ const bottom = top + height;
445
+ if (bottom <= clipTop + 1) {
446
+ return {
447
+ action: "stop_before_capture",
448
+ reason: "stop_boundary_above_clip",
449
+ node_id: node.node_id,
450
+ matched_pattern: node.matched_pattern,
451
+ text_preview: node.text_preview,
452
+ rect,
453
+ clip
454
+ };
455
+ }
456
+ if (top < clipBottom && bottom > clipTop) {
457
+ visible.push({
458
+ ...node,
459
+ rect,
460
+ top,
461
+ bottom
462
+ });
463
+ }
464
+ } catch {}
465
+ }
466
+ if (!visible.length) return null;
467
+
468
+ visible.sort((a, b) => a.top - b.top);
469
+ const boundary = visible[0];
470
+ const boundaryY = Math.max(clipTop, boundary.top - safePadding);
471
+ const adjustedHeight = Math.max(0, boundaryY - clipTop);
472
+ if (adjustedHeight < safeMinHeight) {
473
+ return {
474
+ action: "stop_before_capture",
475
+ reason: "stop_boundary_near_clip_top",
476
+ node_id: boundary.node_id,
477
+ matched_pattern: boundary.matched_pattern,
478
+ text_preview: boundary.text_preview,
479
+ rect: boundary.rect,
480
+ clip,
481
+ adjusted_height: adjustedHeight,
482
+ min_capture_height: safeMinHeight
483
+ };
484
+ }
485
+
486
+ return {
487
+ action: "capture_then_stop",
488
+ reason: "stop_boundary_visible",
489
+ node_id: boundary.node_id,
490
+ matched_pattern: boundary.matched_pattern,
491
+ text_preview: boundary.text_preview,
492
+ rect: boundary.rect,
493
+ clip,
494
+ adjusted_clip: {
495
+ ...clip,
496
+ height: adjustedHeight
497
+ },
498
+ adjusted_height: adjustedHeight,
499
+ min_capture_height: safeMinHeight
500
+ };
501
+ }
502
+
503
+ async function collectDomScrollAnchors(client, rootNodeId, {
504
+ selector = DEFAULT_SCROLL_ANCHOR_SELECTOR,
505
+ maxScreenshots = 6,
506
+ maxProbeNodes = 260,
507
+ minAnchorGap = 180,
508
+ stepTimeoutMs = 45000
509
+ } = {}) {
510
+ const started = Date.now();
511
+ let nodeIds = [];
512
+ try {
513
+ nodeIds = uniqueNumbers(await querySelectorAll(client, rootNodeId, selector));
514
+ } catch (error) {
515
+ return {
516
+ ok: false,
517
+ method: "dom-anchor",
518
+ reason: "query_selector_all_failed",
519
+ error: error?.message || String(error)
520
+ };
521
+ }
522
+ if (!nodeIds.length) {
523
+ return {
524
+ ok: false,
525
+ method: "dom-anchor",
526
+ reason: "no_anchor_nodes"
527
+ };
528
+ }
529
+
530
+ const probeLimit = Math.max(1, Number(maxProbeNodes) || 260);
531
+ const perNodeTimeoutMs = Math.min(1200, Math.max(250, Math.floor((Number(stepTimeoutMs) || 45000) / 30)));
532
+ const measured = [];
533
+ for (const nodeId of nodeIds.slice(0, probeLimit)) {
534
+ try {
535
+ const box = await withCaptureTimeout(getNodeBox(client, nodeId), {
536
+ label: `anchor_box_${nodeId}`,
537
+ timeoutMs: perNodeTimeoutMs
538
+ });
539
+ const rect = box?.rect || {};
540
+ if ((Number(rect.width) || 0) < 80 || (Number(rect.height) || 0) < 8) continue;
541
+ measured.push({
542
+ node_id: nodeId,
543
+ y: Math.round(Number(rect.y) || 0),
544
+ height: Math.round(Number(rect.height) || 0)
545
+ });
546
+ } catch {}
547
+ }
548
+
549
+ let anchors = [];
550
+ if (measured.length) {
551
+ const sorted = measured.sort((a, b) => a.y - b.y);
552
+ for (const item of sorted) {
553
+ const last = anchors[anchors.length - 1];
554
+ if (!last || Math.abs(item.y - last.y) >= Math.max(40, Number(minAnchorGap) || 180)) {
555
+ anchors.push(item);
556
+ }
557
+ }
558
+ }
559
+
560
+ if (anchors.length < 2) {
561
+ anchors = nodeIds.slice(0, probeLimit).map((nodeId, index) => ({
562
+ node_id: nodeId,
563
+ y: null,
564
+ height: null,
565
+ document_order: index
566
+ }));
567
+ }
568
+
569
+ anchors = pickEvenly(anchors, Math.max(1, Number(maxScreenshots) || 1));
570
+ return {
571
+ ok: anchors.length > 0,
572
+ method: "dom-anchor",
573
+ elapsed_ms: Date.now() - started,
574
+ selector,
575
+ discovered_node_count: nodeIds.length,
576
+ measured_node_count: measured.length,
577
+ anchor_count: anchors.length,
578
+ anchors
579
+ };
580
+ }
581
+
582
+ async function scrollDomAnchorIntoView(client, nodeId, {
583
+ timeoutMs = 10000,
584
+ label = "dom_scroll_anchor"
585
+ } = {}) {
586
+ if (client.DOM && typeof client.DOM.scrollIntoViewIfNeeded === "function") {
587
+ return withCaptureTimeout(client.DOM.scrollIntoViewIfNeeded({ nodeId }), { label, timeoutMs });
588
+ }
589
+ if (typeof client.send === "function") {
590
+ return withCaptureTimeout(client.send("DOM.scrollIntoViewIfNeeded", { nodeId }), { label, timeoutMs });
591
+ }
592
+ throw new Error("CDP client does not expose DOM.scrollIntoViewIfNeeded");
593
+ }
594
+
595
+ async function optimizeScreenshotBuffer(buffer, {
596
+ enabled = false,
597
+ format = "png",
598
+ quality,
599
+ resizeMaxWidth = 0
600
+ } = {}) {
601
+ if (!enabled && !resizeMaxWidth) {
602
+ return {
603
+ buffer,
604
+ optimized: false,
605
+ optimization_error: null
606
+ };
607
+ }
608
+ try {
609
+ const normalizedFormat = format === "jpg" ? "jpeg" : format;
610
+ let pipeline = sharp(buffer, { failOn: "none" });
611
+ const metadata = await pipeline.metadata();
612
+ const width = Number(metadata.width) || 0;
613
+ const safeMaxWidth = Math.max(0, Number(resizeMaxWidth) || 0);
614
+ if (safeMaxWidth > 0 && width > safeMaxWidth) {
615
+ pipeline = pipeline.resize({
616
+ width: safeMaxWidth,
617
+ withoutEnlargement: true
618
+ });
619
+ }
620
+ if (normalizedFormat === "jpeg") {
621
+ pipeline = pipeline.jpeg({
622
+ quality: quality == null ? 72 : Math.max(35, Math.min(95, Number(quality) || 72)),
623
+ mozjpeg: true
624
+ });
625
+ } else if (normalizedFormat === "webp") {
626
+ pipeline = pipeline.webp({
627
+ quality: quality == null ? 76 : Math.max(35, Math.min(95, Number(quality) || 76))
628
+ });
629
+ } else {
630
+ pipeline = pipeline.png({
631
+ compressionLevel: 9,
632
+ adaptiveFiltering: true
633
+ });
634
+ }
635
+ const optimizedBuffer = await pipeline.toBuffer();
636
+ return {
637
+ buffer: optimizedBuffer,
638
+ optimized: true,
639
+ original_byte_length: buffer.length,
640
+ optimization_error: null
641
+ };
642
+ } catch (error) {
643
+ return {
644
+ buffer,
645
+ optimized: false,
646
+ original_byte_length: buffer.length,
647
+ optimization_error: error?.message || String(error)
648
+ };
649
+ }
650
+ }
651
+
652
+ async function composeScreenshotsForLlm(screenshots = [], {
653
+ basePath,
654
+ pagesPerImage = 3,
655
+ resizeMaxWidth = 1100,
656
+ quality = 72
657
+ } = {}) {
658
+ const fileScreenshots = screenshots.filter((item) => item?.file_path);
659
+ if (!basePath || fileScreenshots.length <= 1) {
660
+ return {
661
+ llm_file_paths: fileScreenshots.map((item) => item.file_path),
662
+ llm_screenshots: [],
663
+ llm_total_byte_length: 0,
664
+ llm_original_total_byte_length: 0,
665
+ llm_composition_error: null
666
+ };
667
+ }
668
+
669
+ const safePagesPerImage = Math.max(1, Math.min(5, Number(pagesPerImage) || 3));
670
+ const safeWidth = Math.max(700, Math.min(1400, Number(resizeMaxWidth) || 1100));
671
+ const safeQuality = Math.max(45, Math.min(90, Number(quality) || 72));
672
+ const llmScreenshots = [];
673
+
674
+ try {
675
+ for (let index = 0; index < fileScreenshots.length; index += safePagesPerImage) {
676
+ const group = fileScreenshots.slice(index, index + safePagesPerImage);
677
+ const prepared = [];
678
+ for (const item of group) {
679
+ const sourceBuffer = fs.readFileSync(item.file_path);
680
+ const { data, info } = await sharp(sourceBuffer, { failOn: "none" })
681
+ .resize({
682
+ width: safeWidth,
683
+ withoutEnlargement: true
684
+ })
685
+ .jpeg({
686
+ quality: safeQuality,
687
+ mozjpeg: true
688
+ })
689
+ .toBuffer({ resolveWithObject: true });
690
+ prepared.push({
691
+ input: data,
692
+ width: info.width,
693
+ height: info.height,
694
+ source_file_path: item.file_path
695
+ });
696
+ }
697
+
698
+ const width = Math.max(...prepared.map((item) => item.width), 1);
699
+ const height = prepared.reduce((sum, item) => sum + item.height, 0);
700
+ let top = 0;
701
+ const composites = prepared.map((item) => {
702
+ const layer = {
703
+ input: item.input,
704
+ left: 0,
705
+ top
706
+ };
707
+ top += item.height;
708
+ return layer;
709
+ });
710
+ const outputBuffer = await sharp({
711
+ create: {
712
+ width,
713
+ height,
714
+ channels: 3,
715
+ background: "#ffffff"
716
+ }
717
+ })
718
+ .composite(composites)
719
+ .jpeg({
720
+ quality: safeQuality,
721
+ mozjpeg: true
722
+ })
723
+ .toBuffer();
724
+ const outputPath = filePathForLlmSequence(basePath, llmScreenshots.length);
725
+ fs.writeFileSync(outputPath, outputBuffer);
726
+ llmScreenshots.push({
727
+ index: llmScreenshots.length,
728
+ file_path: outputPath,
729
+ byte_length: outputBuffer.length,
730
+ source_file_paths: prepared.map((item) => item.source_file_path),
731
+ source_page_count: prepared.length,
732
+ width,
733
+ height,
734
+ format: "jpeg",
735
+ mime_type: "image/jpeg"
736
+ });
737
+ }
738
+ } catch (error) {
739
+ return {
740
+ llm_file_paths: fileScreenshots.map((item) => item.file_path),
741
+ llm_screenshots: [],
742
+ llm_total_byte_length: 0,
743
+ llm_original_total_byte_length: fileScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
744
+ llm_composition_error: error?.message || String(error)
745
+ };
746
+ }
747
+
748
+ return {
749
+ llm_file_paths: llmScreenshots.map((item) => item.file_path),
750
+ llm_screenshots: llmScreenshots,
751
+ llm_total_byte_length: llmScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
752
+ llm_original_total_byte_length: fileScreenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
753
+ llm_composition_error: null
754
+ };
755
+ }
756
+
757
+ export async function captureScrolledNodeScreenshots(client, nodeId, {
758
+ filePath,
759
+ format = "png",
760
+ quality,
761
+ padding = 0,
762
+ captureBeyondViewport = true,
763
+ fromSurface = true,
764
+ captureViewport = false,
765
+ maxScreenshots = 6,
766
+ wheelDeltaY = 650,
767
+ settleMs = 900,
768
+ duplicateStopCount = 2,
769
+ skipDuplicateScreenshots = false,
770
+ optimize = false,
771
+ resizeMaxWidth = 0,
772
+ composeForLlm = false,
773
+ llmPagesPerImage = 3,
774
+ llmResizeMaxWidth = 1100,
775
+ llmQuality = 72,
776
+ stepTimeoutMs = 45000,
777
+ totalTimeoutMs = 90000,
778
+ scrollMethod = "dom-anchor-fallback-input",
779
+ scrollAnchorSelector = DEFAULT_SCROLL_ANCHOR_SELECTOR,
780
+ scrollAnchorMaxProbeNodes = 260,
781
+ scrollAnchorMinGap = 180,
782
+ scrollDeltaJitterEnabled = false,
783
+ scrollDeltaJitterMinRatio = 0.65,
784
+ scrollDeltaJitterMaxRatio = 0.9,
785
+ scrollDeltaJitterMinOverlapRatio = 0.2,
786
+ scrollDeltaJitterPreserveCoverage = true,
787
+ scrollDeltaJitterRandom = Math.random,
788
+ stopBoundarySelector = "",
789
+ stopBoundaryTextPatterns = [],
790
+ stopBoundaryMaxProbeNodes = 180,
791
+ stopBoundaryMaxTextLength = 700,
792
+ stopBoundaryTopPadding = 8,
793
+ stopBoundaryMinCaptureHeight = 180,
794
+ metadata = {}
795
+ } = {}) {
796
+ if (!nodeId) throw new Error("captureScrolledNodeScreenshots requires nodeId");
797
+ const sequenceStarted = Date.now();
798
+ const normalizedScrollMethod = normalizeScrollMethod(scrollMethod);
799
+ const maxScreenshotCount = Math.max(1, Number(maxScreenshots) || 1);
800
+ const scrollDeltaJitter = normalizeScrollDeltaJitter({
801
+ enabled: scrollDeltaJitterEnabled,
802
+ minRatio: scrollDeltaJitterMinRatio,
803
+ maxRatio: scrollDeltaJitterMaxRatio,
804
+ minOverlapRatio: scrollDeltaJitterMinOverlapRatio,
805
+ preserveCoverage: scrollDeltaJitterPreserveCoverage,
806
+ random: scrollDeltaJitterRandom
807
+ });
808
+ const maxCaptureIterations = scrollDeltaJitter.enabled && scrollDeltaJitter.preserve_coverage
809
+ ? Math.max(maxScreenshotCount, Math.ceil(maxScreenshotCount / scrollDeltaJitter.min_ratio))
810
+ : maxScreenshotCount;
811
+ const anchorPlan = normalizedScrollMethod !== "input"
812
+ ? await collectDomScrollAnchors(client, nodeId, {
813
+ selector: scrollAnchorSelector,
814
+ maxScreenshots: maxCaptureIterations,
815
+ maxProbeNodes: scrollAnchorMaxProbeNodes,
816
+ minAnchorGap: scrollAnchorMinGap,
817
+ stepTimeoutMs
818
+ })
819
+ : null;
820
+ const stopBoundaryEnabled = Boolean(
821
+ normalizeText(stopBoundarySelector)
822
+ || (Array.isArray(stopBoundaryTextPatterns)
823
+ ? stopBoundaryTextPatterns.length
824
+ : stopBoundaryTextPatterns)
825
+ );
826
+ let stopBoundaryPlan = {
827
+ enabled: false,
828
+ ok: false,
829
+ reason: "not_configured",
830
+ nodes: []
831
+ };
832
+ const stopBoundaryChecks = [];
833
+ const screenshots = [];
834
+ let consecutiveDuplicates = 0;
835
+ let previousHash = "";
836
+ let captureCount = 0;
837
+ let droppedDuplicateCount = 0;
838
+ let forceInputScrollAfterDuplicate = false;
839
+ let stopBoundaryResult = null;
840
+ let currentScrollMetadata = {
841
+ before_capture: "initial",
842
+ method: normalizedScrollMethod,
843
+ anchor_plan: anchorPlan
844
+ ? {
845
+ ok: Boolean(anchorPlan.ok),
846
+ reason: anchorPlan.reason || null,
847
+ discovered_node_count: anchorPlan.discovered_node_count || 0,
848
+ measured_node_count: anchorPlan.measured_node_count || 0,
849
+ anchor_count: anchorPlan.anchor_count || 0,
850
+ elapsed_ms: anchorPlan.elapsed_ms || 0
851
+ }
852
+ : null
853
+ };
854
+
855
+ if (anchorPlan?.anchors?.[0]?.node_id && normalizedScrollMethod !== "input") {
856
+ try {
857
+ await scrollDomAnchorIntoView(client, anchorPlan.anchors[0].node_id, {
858
+ label: "scroll_dom_anchor_initial",
859
+ timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
860
+ });
861
+ currentScrollMetadata = {
862
+ before_capture: "dom_anchor_initial",
863
+ method: "DOM.scrollIntoViewIfNeeded",
864
+ anchor_node_id: anchorPlan.anchors[0].node_id,
865
+ anchor_y: anchorPlan.anchors[0].y,
866
+ anchor_height: anchorPlan.anchors[0].height,
867
+ anchor_plan: currentScrollMetadata.anchor_plan
868
+ };
869
+ } catch (error) {
870
+ if (normalizedScrollMethod === "dom-anchor") {
871
+ throw error;
872
+ }
873
+ currentScrollMetadata = {
874
+ before_capture: "dom_anchor_initial_failed",
875
+ method: "DOM.scrollIntoViewIfNeeded",
876
+ anchor_node_id: anchorPlan.anchors[0].node_id,
877
+ error: error?.message || String(error),
878
+ anchor_plan: currentScrollMetadata.anchor_plan
879
+ };
880
+ }
881
+ }
882
+
883
+ for (let index = 0; index < maxCaptureIterations; index += 1) {
884
+ assertCaptureTotalBudget(sequenceStarted, totalTimeoutMs, `capture_page_${index + 1}`);
885
+ captureCount += 1;
886
+ const captureStarted = Date.now();
887
+ const box = await withCaptureTimeout(getNodeBox(client, nodeId), {
888
+ label: `get_box_${index + 1}`,
889
+ timeoutMs: stepTimeoutMs
890
+ });
891
+ const clip = withPadding(box.rect, padding);
892
+ let visibleStopBoundary = null;
893
+ if (stopBoundaryEnabled) {
894
+ stopBoundaryPlan = await collectStopBoundaryNodes(client, nodeId, {
895
+ selector: stopBoundarySelector,
896
+ textPatterns: stopBoundaryTextPatterns,
897
+ maxProbeNodes: stopBoundaryMaxProbeNodes,
898
+ maxTextLength: stopBoundaryMaxTextLength,
899
+ stepTimeoutMs
900
+ });
901
+ stopBoundaryChecks.push({
902
+ capture_index: index,
903
+ ok: Boolean(stopBoundaryPlan.ok),
904
+ reason: stopBoundaryPlan.reason || null,
905
+ discovered_node_count: stopBoundaryPlan.discovered_node_count || 0,
906
+ probed_node_count: stopBoundaryPlan.probed_node_count || 0,
907
+ match_count: stopBoundaryPlan.match_count || 0,
908
+ elapsed_ms: stopBoundaryPlan.elapsed_ms || 0
909
+ });
910
+ visibleStopBoundary = await resolveVisibleStopBoundary(client, stopBoundaryPlan, clip, {
911
+ topPadding: stopBoundaryTopPadding,
912
+ minCaptureHeight: stopBoundaryMinCaptureHeight,
913
+ stepTimeoutMs
914
+ });
915
+ }
916
+ if (visibleStopBoundary?.action === "stop_before_capture") {
917
+ stopBoundaryResult = visibleStopBoundary;
918
+ break;
919
+ }
920
+ const effectiveClip = visibleStopBoundary?.adjusted_clip || clip;
921
+ const effectiveCaptureViewport = Boolean(captureViewport && !visibleStopBoundary?.adjusted_clip);
922
+ const captureOptions = effectiveCaptureViewport ? {
923
+ format,
924
+ fromSurface,
925
+ captureBeyondViewport: false
926
+ } : {
927
+ format,
928
+ fromSurface,
929
+ captureBeyondViewport,
930
+ clip: effectiveClip
931
+ };
932
+ if (quality != null) {
933
+ captureOptions.quality = quality;
934
+ }
935
+ const screenshot = await withCaptureTimeout(client.Page.captureScreenshot(captureOptions), {
936
+ label: `capture_screenshot_${index + 1}`,
937
+ timeoutMs: stepTimeoutMs
938
+ });
939
+ const originalBuffer = Buffer.from(screenshot.data || "", "base64");
940
+ const optimized = await withCaptureTimeout(optimizeScreenshotBuffer(originalBuffer, {
941
+ enabled: optimize,
942
+ format,
943
+ quality,
944
+ resizeMaxWidth
945
+ }), {
946
+ label: `optimize_screenshot_${index + 1}`,
947
+ timeoutMs: stepTimeoutMs
948
+ });
949
+ const buffer = optimized.buffer;
950
+ const hash = screenshotHash(buffer);
951
+ const duplicateOfPrevious = previousHash && previousHash === hash;
952
+ if (duplicateOfPrevious) {
953
+ consecutiveDuplicates += 1;
954
+ } else {
955
+ consecutiveDuplicates = 0;
956
+ }
957
+
958
+ let outputPath = null;
959
+ if (duplicateOfPrevious && skipDuplicateScreenshots) {
960
+ droppedDuplicateCount += 1;
961
+ } else {
962
+ outputPath = filePath ? filePathForSequence(filePath, screenshots.length, format) : null;
963
+ if (outputPath) {
964
+ fs.writeFileSync(outputPath, buffer);
965
+ }
966
+
967
+ screenshots.push({
968
+ index: screenshots.length,
969
+ capture_index: index,
970
+ source: "image",
971
+ captured_at: nowIso(),
972
+ node_id: nodeId,
973
+ format,
974
+ mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
975
+ byte_length: buffer.length,
976
+ original_byte_length: optimized.original_byte_length || originalBuffer.length,
977
+ optimized: Boolean(optimized.optimized),
978
+ optimization_error: optimized.optimization_error || null,
979
+ elapsed_ms: Date.now() - captureStarted,
980
+ file_path: outputPath,
981
+ sha256: hash,
982
+ duplicate_of_previous: Boolean(duplicateOfPrevious),
983
+ clip: effectiveClip,
984
+ capture_viewport: effectiveCaptureViewport,
985
+ node_rect: box.rect,
986
+ scroll: currentScrollMetadata,
987
+ stop_boundary: visibleStopBoundary || null,
988
+ metadata
989
+ });
990
+ }
991
+
992
+ if (visibleStopBoundary?.action === "capture_then_stop") {
993
+ stopBoundaryResult = visibleStopBoundary;
994
+ break;
995
+ }
996
+
997
+ previousHash = hash;
998
+ forceInputScrollAfterDuplicate = Boolean(
999
+ duplicateOfPrevious
1000
+ && normalizedScrollMethod === "dom-anchor-fallback-input"
1001
+ && currentScrollMetadata?.method === "DOM.scrollIntoViewIfNeeded"
1002
+ );
1003
+ if (
1004
+ consecutiveDuplicates >= Math.max(1, Number(duplicateStopCount) || 1)
1005
+ && !forceInputScrollAfterDuplicate
1006
+ ) {
1007
+ break;
1008
+ }
1009
+
1010
+ if (index < maxCaptureIterations - 1) {
1011
+ assertCaptureTotalBudget(sequenceStarted, totalTimeoutMs, `scroll_after_page_${index + 1}`);
1012
+ let scrolledByDomAnchor = false;
1013
+ const nextAnchor = anchorPlan?.anchors?.[index + 1] || null;
1014
+ if (nextAnchor?.node_id && normalizedScrollMethod !== "input" && !forceInputScrollAfterDuplicate) {
1015
+ try {
1016
+ await scrollDomAnchorIntoView(client, nextAnchor.node_id, {
1017
+ label: `scroll_dom_anchor_${index + 1}`,
1018
+ timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
1019
+ });
1020
+ scrolledByDomAnchor = true;
1021
+ currentScrollMetadata = {
1022
+ before_capture: `dom_anchor_${index + 1}`,
1023
+ method: "DOM.scrollIntoViewIfNeeded",
1024
+ anchor_node_id: nextAnchor.node_id,
1025
+ anchor_y: nextAnchor.y,
1026
+ anchor_height: nextAnchor.height
1027
+ };
1028
+ } catch (error) {
1029
+ if (normalizedScrollMethod === "dom-anchor") {
1030
+ throw error;
1031
+ }
1032
+ currentScrollMetadata = {
1033
+ before_capture: `dom_anchor_${index + 1}_failed`,
1034
+ method: "DOM.scrollIntoViewIfNeeded",
1035
+ anchor_node_id: nextAnchor.node_id,
1036
+ error: error?.message || String(error)
1037
+ };
1038
+ }
1039
+ } else if (normalizedScrollMethod === "dom-anchor") {
1040
+ break;
1041
+ }
1042
+
1043
+ if (!scrolledByDomAnchor && normalizedScrollMethod !== "dom-anchor") {
1044
+ const x = box.center.x;
1045
+ const y = box.center.y;
1046
+ const scrollDelta = resolveCoverageSafeScrollDelta({
1047
+ baseDelta: wheelDeltaY,
1048
+ clipHeight: effectiveClip.height,
1049
+ jitter: scrollDeltaJitter
1050
+ });
1051
+ await withCaptureTimeout(client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y, button: "none" }), {
1052
+ label: `scroll_mouse_move_${index + 1}`,
1053
+ timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
1054
+ });
1055
+ await withCaptureTimeout(client.Input.dispatchMouseEvent({
1056
+ type: "mouseWheel",
1057
+ x,
1058
+ y,
1059
+ deltaX: 0,
1060
+ deltaY: scrollDelta.deltaY
1061
+ }), {
1062
+ label: `scroll_wheel_${index + 1}`,
1063
+ timeoutMs: Math.min(Math.max(3000, Number(stepTimeoutMs) || 45000), 10000)
1064
+ });
1065
+ currentScrollMetadata = {
1066
+ before_capture: `wheel_down_${index + 1}`,
1067
+ method: "Input.dispatchMouseEvent",
1068
+ fallback_from_dom_anchor: Boolean(anchorPlan && normalizedScrollMethod === "dom-anchor-fallback-input"),
1069
+ wheel_delta_y: scrollDelta.deltaY,
1070
+ wheel_delta_base_y: scrollDelta.base_delta_y,
1071
+ wheel_delta_jitter: scrollDelta.jittered ? scrollDelta : null
1072
+ };
1073
+ }
1074
+ if (settleMs > 0) await sleep(settleMs);
1075
+ }
1076
+ }
1077
+
1078
+ const llmComposition = composeForLlm
1079
+ ? await withCaptureTimeout(composeScreenshotsForLlm(screenshots, {
1080
+ basePath: filePath,
1081
+ pagesPerImage: llmPagesPerImage,
1082
+ resizeMaxWidth: llmResizeMaxWidth,
1083
+ quality: llmQuality
1084
+ }), {
1085
+ label: "compose_llm_screenshots",
1086
+ timeoutMs: stepTimeoutMs
1087
+ })
1088
+ : {
1089
+ llm_file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
1090
+ llm_screenshots: [],
1091
+ llm_total_byte_length: 0,
1092
+ llm_original_total_byte_length: 0,
1093
+ llm_composition_error: null
1094
+ };
1095
+
1096
+ return {
1097
+ schema_version: 1,
1098
+ ok: true,
1099
+ source: "image-scroll-sequence",
1100
+ captured_at: nowIso(),
1101
+ node_id: nodeId,
1102
+ elapsed_ms: Date.now() - sequenceStarted,
1103
+ capture_count: captureCount,
1104
+ screenshot_count: screenshots.length,
1105
+ unique_screenshot_count: new Set(screenshots.map((item) => item.sha256)).size,
1106
+ duplicate_screenshot_count: captureCount - new Set(screenshots.map((item) => item.sha256)).size,
1107
+ dropped_duplicate_count: droppedDuplicateCount,
1108
+ total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
1109
+ original_total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.original_byte_length) || 0), 0),
1110
+ llm_file_paths: llmComposition.llm_file_paths,
1111
+ llm_screenshot_count: llmComposition.llm_file_paths.length,
1112
+ llm_total_byte_length: llmComposition.llm_total_byte_length,
1113
+ llm_original_total_byte_length: llmComposition.llm_original_total_byte_length,
1114
+ llm_composition_error: llmComposition.llm_composition_error,
1115
+ llm_screenshots: llmComposition.llm_screenshots,
1116
+ optimization: {
1117
+ enabled: Boolean(optimize),
1118
+ resize_max_width: Math.max(0, Number(resizeMaxWidth) || 0),
1119
+ capture_viewport: Boolean(captureViewport),
1120
+ format,
1121
+ quality: quality ?? null,
1122
+ llm_compose_enabled: Boolean(composeForLlm),
1123
+ llm_pages_per_image: Math.max(1, Math.min(5, Number(llmPagesPerImage) || 3)),
1124
+ llm_resize_max_width: Math.max(0, Number(llmResizeMaxWidth) || 0),
1125
+ llm_quality: llmQuality ?? null,
1126
+ step_timeout_ms: Math.max(0, Number(stepTimeoutMs) || 0),
1127
+ total_timeout_ms: Math.max(0, Number(totalTimeoutMs) || 0),
1128
+ scroll_method: normalizedScrollMethod,
1129
+ requested_max_screenshots: maxScreenshotCount,
1130
+ effective_max_screenshots: maxCaptureIterations,
1131
+ scroll_anchor_selector: scrollAnchorSelector,
1132
+ scroll_anchor_max_probe_nodes: Math.max(1, Number(scrollAnchorMaxProbeNodes) || 260),
1133
+ scroll_anchor_min_gap: Math.max(0, Number(scrollAnchorMinGap) || 0),
1134
+ scroll_delta_jitter: {
1135
+ enabled: scrollDeltaJitter.enabled,
1136
+ min_ratio: scrollDeltaJitter.min_ratio,
1137
+ max_ratio: scrollDeltaJitter.max_ratio,
1138
+ min_overlap_ratio: scrollDeltaJitter.min_overlap_ratio,
1139
+ preserve_coverage: scrollDeltaJitter.preserve_coverage
1140
+ }
1141
+ },
1142
+ scroll_anchor_plan: anchorPlan,
1143
+ stop_boundary_plan: stopBoundaryPlan,
1144
+ stop_boundary_checks: stopBoundaryChecks,
1145
+ stop_boundary_result: stopBoundaryResult,
1146
+ file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
1147
+ screenshots,
1148
+ metadata
1149
+ };
1150
+ }
1151
+
1152
+ export async function captureCandidateEvidence(client, {
1153
+ nodeId,
1154
+ domain = "unknown",
1155
+ source = "dom",
1156
+ screenshotPath,
1157
+ includeHtml = true,
1158
+ includeScreenshot = false,
1159
+ screenshotMode = "scroll",
1160
+ screenshotOptions = {},
1161
+ metadata = {}
1162
+ } = {}) {
1163
+ if (!nodeId) throw new Error("captureCandidateEvidence requires nodeId");
1164
+ const evidence = {
1165
+ schema_version: 1,
1166
+ domain: normalizeText(domain) || "unknown",
1167
+ source,
1168
+ captured_at: nowIso(),
1169
+ node_id: nodeId,
1170
+ html: null,
1171
+ image: null,
1172
+ metadata
1173
+ };
1174
+ if (includeHtml) {
1175
+ evidence.html = await captureNodeHtml(client, nodeId, {
1176
+ domain,
1177
+ source: "dom",
1178
+ metadata
1179
+ });
1180
+ }
1181
+ if (includeScreenshot) {
1182
+ evidence.image = screenshotMode === "single"
1183
+ ? await captureNodeScreenshot(client, nodeId, {
1184
+ ...screenshotOptions,
1185
+ filePath: screenshotPath,
1186
+ metadata: {
1187
+ ...metadata,
1188
+ capture_mode: "single_visible_clip"
1189
+ }
1190
+ })
1191
+ : await captureScrolledNodeScreenshots(client, nodeId, {
1192
+ ...screenshotOptions,
1193
+ filePath: screenshotPath,
1194
+ metadata: {
1195
+ ...metadata,
1196
+ capture_mode: "scroll_sequence"
1197
+ }
1198
+ });
1199
+ }
1200
+ return evidence;
1201
+ }