@d-zero/puppeteer-scroll 4.0.2 → 4.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Max attempts for each `page.evaluate` call when it fails with a transient
|
|
3
|
+
* frame error. Chrome may briefly swap or re-attach the main frame during a
|
|
4
|
+
* long scroll or immediately after navigation, even when the target site is
|
|
5
|
+
* not doing anything observable. Three attempts with a 200 ms gap absorbs
|
|
6
|
+
* the typical re-attach window without masking a genuinely broken page.
|
|
7
|
+
*/
|
|
8
|
+
export declare const MAX_EVALUATE_RETRIES = 3;
|
|
9
|
+
export declare const DETACHED_RETRY_DELAY_MS = 200;
|
|
10
|
+
/**
|
|
11
|
+
* Transient errors that occur when `page.evaluate` lands inside Puppeteer's
|
|
12
|
+
* own frame-swap or session-teardown window. Retrying after a short delay
|
|
13
|
+
* usually succeeds because the new execution context is then in place.
|
|
14
|
+
* @param error - Error caught from `page.evaluate`.
|
|
15
|
+
* @returns `true` when the error is a known transient frame/session error.
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* try {
|
|
19
|
+
* await page.evaluate(...);
|
|
20
|
+
* } catch (error) {
|
|
21
|
+
* if (isTransientFrameError(error)) {
|
|
22
|
+
* // retry after a short delay
|
|
23
|
+
* } else {
|
|
24
|
+
* throw error;
|
|
25
|
+
* }
|
|
26
|
+
* }
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export declare function isTransientFrameError(error: unknown): boolean;
|
|
30
|
+
/**
|
|
31
|
+
* Retries `evaluator` (typically a `page.evaluate` call) when it fails with
|
|
32
|
+
* a transient frame error. Non-transient errors are re-thrown immediately.
|
|
33
|
+
*
|
|
34
|
+
* Used both inside long-running scroll loops and around the single
|
|
35
|
+
* `page.evaluate` calls that bracket them, so that Chrome's brief
|
|
36
|
+
* post-navigation main-frame swap does not surface as an unrecoverable
|
|
37
|
+
* "Attempted to use detached Frame" error in the caller.
|
|
38
|
+
* @template T - Evaluator return type.
|
|
39
|
+
* @param evaluator - Thunk that performs a single `page.evaluate` call.
|
|
40
|
+
* @returns Whatever `evaluator` returns on success.
|
|
41
|
+
* @example
|
|
42
|
+
* ```ts
|
|
43
|
+
* const scrollHeight = await evaluateWithFrameRetry(() =>
|
|
44
|
+
* page.evaluate(() => document.body.scrollHeight),
|
|
45
|
+
* );
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
export declare function evaluateWithFrameRetry<T>(evaluator: () => Promise<T>): Promise<T>;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Max attempts for each `page.evaluate` call when it fails with a transient
|
|
3
|
+
* frame error. Chrome may briefly swap or re-attach the main frame during a
|
|
4
|
+
* long scroll or immediately after navigation, even when the target site is
|
|
5
|
+
* not doing anything observable. Three attempts with a 200 ms gap absorbs
|
|
6
|
+
* the typical re-attach window without masking a genuinely broken page.
|
|
7
|
+
*/
|
|
8
|
+
export const MAX_EVALUATE_RETRIES = 3;
|
|
9
|
+
export const DETACHED_RETRY_DELAY_MS = 200;
|
|
10
|
+
/**
|
|
11
|
+
* Transient errors that occur when `page.evaluate` lands inside Puppeteer's
|
|
12
|
+
* own frame-swap or session-teardown window. Retrying after a short delay
|
|
13
|
+
* usually succeeds because the new execution context is then in place.
|
|
14
|
+
* @param error - Error caught from `page.evaluate`.
|
|
15
|
+
* @returns `true` when the error is a known transient frame/session error.
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* try {
|
|
19
|
+
* await page.evaluate(...);
|
|
20
|
+
* } catch (error) {
|
|
21
|
+
* if (isTransientFrameError(error)) {
|
|
22
|
+
* // retry after a short delay
|
|
23
|
+
* } else {
|
|
24
|
+
* throw error;
|
|
25
|
+
* }
|
|
26
|
+
* }
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export function isTransientFrameError(error) {
|
|
30
|
+
if (!(error instanceof Error))
|
|
31
|
+
return false;
|
|
32
|
+
return /Attempted to use detached Frame|Session closed|Execution context was destroyed/i.test(error.message);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Retries `evaluator` (typically a `page.evaluate` call) when it fails with
|
|
36
|
+
* a transient frame error. Non-transient errors are re-thrown immediately.
|
|
37
|
+
*
|
|
38
|
+
* Used both inside long-running scroll loops and around the single
|
|
39
|
+
* `page.evaluate` calls that bracket them, so that Chrome's brief
|
|
40
|
+
* post-navigation main-frame swap does not surface as an unrecoverable
|
|
41
|
+
* "Attempted to use detached Frame" error in the caller.
|
|
42
|
+
* @template T - Evaluator return type.
|
|
43
|
+
* @param evaluator - Thunk that performs a single `page.evaluate` call.
|
|
44
|
+
* @returns Whatever `evaluator` returns on success.
|
|
45
|
+
* @example
|
|
46
|
+
* ```ts
|
|
47
|
+
* const scrollHeight = await evaluateWithFrameRetry(() =>
|
|
48
|
+
* page.evaluate(() => document.body.scrollHeight),
|
|
49
|
+
* );
|
|
50
|
+
* ```
|
|
51
|
+
*/
|
|
52
|
+
export async function evaluateWithFrameRetry(evaluator) {
|
|
53
|
+
let lastError;
|
|
54
|
+
for (let attempt = 0; attempt < MAX_EVALUATE_RETRIES; attempt++) {
|
|
55
|
+
try {
|
|
56
|
+
return await evaluator();
|
|
57
|
+
}
|
|
58
|
+
catch (error) {
|
|
59
|
+
lastError = error;
|
|
60
|
+
if (!isTransientFrameError(error))
|
|
61
|
+
throw error;
|
|
62
|
+
await new Promise((resolve) => setTimeout(resolve, DETACHED_RETRY_DELAY_MS));
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
throw lastError;
|
|
66
|
+
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/scroll-all-over.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { delay } from '@d-zero/shared/delay';
|
|
2
|
+
import { evaluateWithFrameRetry } from './evaluate-with-frame-retry.js';
|
|
2
3
|
import { resolveValue } from './resolve-value.js';
|
|
3
4
|
/**
|
|
4
5
|
* Number of consecutive iterations without scroll progress before bailing out.
|
|
@@ -43,14 +44,14 @@ export async function scrollAllOver(page, options) {
|
|
|
43
44
|
const interval = options?.interval ?? DEFAULT_INTERVAL;
|
|
44
45
|
const distance = options?.distance;
|
|
45
46
|
let currentScrollY = 0;
|
|
46
|
-
let scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
47
|
+
let scrollHeight = await evaluateWithFrameRetry(() => page.evaluate(() => document.body.scrollHeight));
|
|
47
48
|
let prevScrollY = -1;
|
|
48
49
|
let stuckCount = 0;
|
|
49
50
|
while (Math.ceil(currentScrollY) < Math.ceil(scrollHeight)) {
|
|
50
51
|
// Force a minimum of 1 px so a user-supplied 0/negative distance
|
|
51
52
|
// cannot stall the loop into the stuck-detection bail out.
|
|
52
53
|
const stepDistance = distance === undefined ? null : Math.max(1, resolveValue(distance));
|
|
53
|
-
[currentScrollY, scrollHeight] = await page.evaluate((step, ratioMin, ratioMax) => {
|
|
54
|
+
[currentScrollY, scrollHeight] = await evaluateWithFrameRetry(() => page.evaluate((step, ratioMin, ratioMax) => {
|
|
54
55
|
// When step is null, sample a random fraction of the viewport
|
|
55
56
|
// height so each scroll feels less mechanical.
|
|
56
57
|
const actualStep = step ??
|
|
@@ -61,7 +62,7 @@ export async function scrollAllOver(page, options) {
|
|
|
61
62
|
Math.ceil(globalThis.scrollY + globalThis.innerHeight),
|
|
62
63
|
Math.ceil(document.body.scrollHeight),
|
|
63
64
|
];
|
|
64
|
-
}, stepDistance, DEFAULT_DISTANCE_RATIO_MIN, DEFAULT_DISTANCE_RATIO_MAX);
|
|
65
|
+
}, stepDistance, DEFAULT_DISTANCE_RATIO_MIN, DEFAULT_DISTANCE_RATIO_MAX));
|
|
65
66
|
options?.logger?.(currentScrollY, scrollHeight, 'Scrolling');
|
|
66
67
|
if (currentScrollY === prevScrollY) {
|
|
67
68
|
stuckCount++;
|
|
@@ -77,10 +78,10 @@ export async function scrollAllOver(page, options) {
|
|
|
77
78
|
await delay(interval);
|
|
78
79
|
}
|
|
79
80
|
options?.logger?.(currentScrollY, scrollHeight, 'End of page');
|
|
80
|
-
await page.evaluate(() => {
|
|
81
|
+
await evaluateWithFrameRetry(() => page.evaluate(() => {
|
|
81
82
|
// Move the scroll position to the top of the page.
|
|
82
83
|
globalThis.scrollTo(0, 0);
|
|
83
|
-
});
|
|
84
|
+
}));
|
|
84
85
|
await delay(400);
|
|
85
86
|
options?.logger?.(currentScrollY, scrollHeight, 'End of page');
|
|
86
87
|
await delay(400);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/puppeteer-scroll",
|
|
3
|
-
"version": "4.0.
|
|
3
|
+
"version": "4.0.4",
|
|
4
4
|
"description": "Scroll function for puppeteer",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -36,5 +36,5 @@
|
|
|
36
36
|
"url": "https://github.com/d-zero-dev/tools.git",
|
|
37
37
|
"directory": "packages/@d-zero/puppeteer-scroll"
|
|
38
38
|
},
|
|
39
|
-
"gitHead": "
|
|
39
|
+
"gitHead": "9a26e6d8c1e996684691055ffc070547344b21e9"
|
|
40
40
|
}
|