@d-zero/puppeteer-scroll 4.0.2 → 4.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/scroll-all-over.js +48 -5
- package/package.json +2 -2
package/dist/scroll-all-over.js
CHANGED
|
@@ -9,6 +9,49 @@ import { resolveValue } from './resolve-value.js';
|
|
|
9
9
|
* enough to confirm that scrolling is genuinely blocked.
|
|
10
10
|
*/
|
|
11
11
|
const MAX_STUCK_ITERATIONS = 3;
|
|
12
|
+
/**
|
|
13
|
+
* Max attempts for each `page.evaluate` call when it fails with a transient
|
|
14
|
+
* frame error (Chrome may briefly swap or re-attach the main frame during a
|
|
15
|
+
* long scroll, even when the target site is not doing anything observable).
|
|
16
|
+
* Three attempts with a 200 ms gap absorbs the typical re-attach window
|
|
17
|
+
* without masking a genuinely broken page.
|
|
18
|
+
*/
|
|
19
|
+
const MAX_EVALUATE_RETRIES = 3;
|
|
20
|
+
const DETACHED_RETRY_DELAY_MS = 200;
|
|
21
|
+
/**
|
|
22
|
+
* Transient errors that occur when `page.evaluate` lands inside Puppeteer's
|
|
23
|
+
* own frame-swap or session-teardown window. Retrying after a short delay
|
|
24
|
+
* usually succeeds because the new execution context is then in place.
|
|
25
|
+
* @param error - Error caught from `page.evaluate`.
|
|
26
|
+
* @returns `true` when the error is a known transient frame/session error.
|
|
27
|
+
*/
|
|
28
|
+
function isTransientFrameError(error) {
|
|
29
|
+
if (!(error instanceof Error))
|
|
30
|
+
return false;
|
|
31
|
+
return /Attempted to use detached Frame|Session closed|Execution context was destroyed/i.test(error.message);
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Retries `evaluator` (typically a `page.evaluate` call) when it fails with
|
|
35
|
+
* a transient frame error. Non-transient errors are re-thrown immediately.
|
|
36
|
+
* @template T - Evaluator return type.
|
|
37
|
+
* @param evaluator - Thunk that performs a single `page.evaluate` call.
|
|
38
|
+
* @returns Whatever `evaluator` returns on success.
|
|
39
|
+
*/
|
|
40
|
+
async function evaluateWithFrameRetry(evaluator) {
|
|
41
|
+
let lastError;
|
|
42
|
+
for (let attempt = 0; attempt < MAX_EVALUATE_RETRIES; attempt++) {
|
|
43
|
+
try {
|
|
44
|
+
return await evaluator();
|
|
45
|
+
}
|
|
46
|
+
catch (error) {
|
|
47
|
+
lastError = error;
|
|
48
|
+
if (!isTransientFrameError(error))
|
|
49
|
+
throw error;
|
|
50
|
+
await new Promise((resolve) => setTimeout(resolve, DETACHED_RETRY_DELAY_MS));
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
throw lastError;
|
|
54
|
+
}
|
|
12
55
|
/**
|
|
13
56
|
* Default interval range (ms) used when `options.interval` is omitted.
|
|
14
57
|
* Randomized to mimic human-like reading pauses while staying close to the
|
|
@@ -43,14 +86,14 @@ export async function scrollAllOver(page, options) {
|
|
|
43
86
|
const interval = options?.interval ?? DEFAULT_INTERVAL;
|
|
44
87
|
const distance = options?.distance;
|
|
45
88
|
let currentScrollY = 0;
|
|
46
|
-
let scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
89
|
+
let scrollHeight = await evaluateWithFrameRetry(() => page.evaluate(() => document.body.scrollHeight));
|
|
47
90
|
let prevScrollY = -1;
|
|
48
91
|
let stuckCount = 0;
|
|
49
92
|
while (Math.ceil(currentScrollY) < Math.ceil(scrollHeight)) {
|
|
50
93
|
// Force a minimum of 1 px so a user-supplied 0/negative distance
|
|
51
94
|
// cannot stall the loop into the stuck-detection bail out.
|
|
52
95
|
const stepDistance = distance === undefined ? null : Math.max(1, resolveValue(distance));
|
|
53
|
-
[currentScrollY, scrollHeight] = await page.evaluate((step, ratioMin, ratioMax) => {
|
|
96
|
+
[currentScrollY, scrollHeight] = await evaluateWithFrameRetry(() => page.evaluate((step, ratioMin, ratioMax) => {
|
|
54
97
|
// When step is null, sample a random fraction of the viewport
|
|
55
98
|
// height so each scroll feels less mechanical.
|
|
56
99
|
const actualStep = step ??
|
|
@@ -61,7 +104,7 @@ export async function scrollAllOver(page, options) {
|
|
|
61
104
|
Math.ceil(globalThis.scrollY + globalThis.innerHeight),
|
|
62
105
|
Math.ceil(document.body.scrollHeight),
|
|
63
106
|
];
|
|
64
|
-
}, stepDistance, DEFAULT_DISTANCE_RATIO_MIN, DEFAULT_DISTANCE_RATIO_MAX);
|
|
107
|
+
}, stepDistance, DEFAULT_DISTANCE_RATIO_MIN, DEFAULT_DISTANCE_RATIO_MAX));
|
|
65
108
|
options?.logger?.(currentScrollY, scrollHeight, 'Scrolling');
|
|
66
109
|
if (currentScrollY === prevScrollY) {
|
|
67
110
|
stuckCount++;
|
|
@@ -77,10 +120,10 @@ export async function scrollAllOver(page, options) {
|
|
|
77
120
|
await delay(interval);
|
|
78
121
|
}
|
|
79
122
|
options?.logger?.(currentScrollY, scrollHeight, 'End of page');
|
|
80
|
-
await page.evaluate(() => {
|
|
123
|
+
await evaluateWithFrameRetry(() => page.evaluate(() => {
|
|
81
124
|
// Move the scroll position to the top of the page.
|
|
82
125
|
globalThis.scrollTo(0, 0);
|
|
83
|
-
});
|
|
126
|
+
}));
|
|
84
127
|
await delay(400);
|
|
85
128
|
options?.logger?.(currentScrollY, scrollHeight, 'End of page');
|
|
86
129
|
await delay(400);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/puppeteer-scroll",
|
|
3
|
-
"version": "4.0.
|
|
3
|
+
"version": "4.0.3",
|
|
4
4
|
"description": "Scroll function for puppeteer",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -36,5 +36,5 @@
|
|
|
36
36
|
"url": "https://github.com/d-zero-dev/tools.git",
|
|
37
37
|
"directory": "packages/@d-zero/puppeteer-scroll"
|
|
38
38
|
},
|
|
39
|
-
"gitHead": "
|
|
39
|
+
"gitHead": "d876ace142711051c337f7922931776526047cb0"
|
|
40
40
|
}
|