@d-zero/puppeteer-page-scan 4.5.2 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/before-page-scan.d.ts +19 -1
- package/dist/before-page-scan.js +27 -2
- package/dist/index.d.ts +1 -0
- package/package.json +3 -3
|
@@ -10,12 +10,30 @@ type Options = {
|
|
|
10
10
|
openDisclosures?: boolean;
|
|
11
11
|
scrollInterval?: number | DelayOptions;
|
|
12
12
|
scrollDistance?: number | DelayOptions;
|
|
13
|
+
/**
|
|
14
|
+
* Maximum `document.body.scrollHeight` (px) tolerated before `scrollAllOver`
|
|
15
|
+
* is skipped. Pages whose post-load scrollHeight exceeds this threshold
|
|
16
|
+
* return `{ scrolled: false, scrollHeight }` without scrolling, so callers
|
|
17
|
+
* can decide to abandon the device preset rather than letting the scroll
|
|
18
|
+
* run unbounded. Omit to disable the check (legacy behavior).
|
|
19
|
+
*/
|
|
20
|
+
maxScrollHeight?: number;
|
|
13
21
|
} & Size;
|
|
22
|
+
export type BeforePageScanResult = {
|
|
23
|
+
/**
|
|
24
|
+
* `true` when `scrollAllOver` ran to completion (or to a stuck bail-out).
|
|
25
|
+
* `false` when the scroll was skipped because `scrollHeight` exceeded
|
|
26
|
+
* `maxScrollHeight`.
|
|
27
|
+
*/
|
|
28
|
+
scrolled: boolean;
|
|
29
|
+
/** `document.body.scrollHeight` measured immediately before scroll. */
|
|
30
|
+
scrollHeight: number;
|
|
31
|
+
};
|
|
14
32
|
/**
|
|
15
33
|
*
|
|
16
34
|
* @param page
|
|
17
35
|
* @param url
|
|
18
36
|
* @param options
|
|
19
37
|
*/
|
|
20
|
-
export declare function beforePageScan(page: Page, url: string, options?: Options): Promise<
|
|
38
|
+
export declare function beforePageScan(page: Page, url: string, options?: Options): Promise<BeforePageScanResult>;
|
|
21
39
|
export {};
|
package/dist/before-page-scan.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { scrollAllOver } from '@d-zero/puppeteer-scroll';
|
|
1
|
+
import { evaluateWithFrameRetry, scrollAllOver } from '@d-zero/puppeteer-scroll';
|
|
2
2
|
/**
|
|
3
3
|
* Open all disclosure elements on the page
|
|
4
4
|
* This function loops until all disclosure elements are expanded,
|
|
@@ -61,6 +61,7 @@ export async function beforePageScan(page, url, options) {
|
|
|
61
61
|
const width = options?.width ?? 1400;
|
|
62
62
|
const resolution = options?.resolution;
|
|
63
63
|
const timeout = options?.timeout || 5000;
|
|
64
|
+
const maxScrollHeight = options?.maxScrollHeight;
|
|
64
65
|
const countDownId = `${name}${url}_timeout`;
|
|
65
66
|
listener?.('setViewport', { name, width, resolution });
|
|
66
67
|
await page.setViewport({
|
|
@@ -94,6 +95,29 @@ export async function beforePageScan(page, url, options) {
|
|
|
94
95
|
message: `Opened ${result.details} <details> elements and clicked ${result.buttons} [aria-expanded="false"] buttons`,
|
|
95
96
|
});
|
|
96
97
|
}
|
|
98
|
+
// WHY measure before scrollAllOver: pathological pages can have a
|
|
99
|
+
// post-load scrollHeight of millions of pixels (e.g. responsive data
|
|
100
|
+
// tables that expand to ~321k px at 320px viewport, and worse cases exist).
|
|
101
|
+
// `scrollAllOver` has no upper bound, so without this guard it can run
|
|
102
|
+
// for tens of minutes — long enough to exceed any reasonable retry
|
|
103
|
+
// timeout, leaving the scroll's page.evaluate calls executing in the
|
|
104
|
+
// background while the next retry attempts to use the same page.
|
|
105
|
+
//
|
|
106
|
+
// WHY retry on detached-Frame: this evaluation runs immediately after
|
|
107
|
+
// `page.reload()` resolves, which is exactly when Chrome may still be
|
|
108
|
+
// finishing an internal main-frame swap. A single read landing in that
|
|
109
|
+
// window throws even though the page itself is doing nothing observable,
|
|
110
|
+
// and the throw escapes `beforePageScan` before `scrollAllOver`'s own
|
|
111
|
+
// retry layer can absorb anything. Reuse the same retry helper as
|
|
112
|
+
// `scrollAllOver` to keep the swap-window absorption consistent.
|
|
113
|
+
const scrollHeight = await evaluateWithFrameRetry(() => page.evaluate(() => document.body.scrollHeight));
|
|
114
|
+
if (maxScrollHeight !== undefined && scrollHeight > maxScrollHeight) {
|
|
115
|
+
listener?.('hook', {
|
|
116
|
+
name,
|
|
117
|
+
message: `Skipped scroll: scrollHeight ${scrollHeight} exceeds limit ${maxScrollHeight}`,
|
|
118
|
+
});
|
|
119
|
+
return { scrolled: false, scrollHeight };
|
|
120
|
+
}
|
|
97
121
|
listener?.('scroll', {
|
|
98
122
|
name,
|
|
99
123
|
scrollY: 0,
|
|
@@ -103,8 +127,9 @@ export async function beforePageScan(page, url, options) {
|
|
|
103
127
|
await scrollAllOver(page, {
|
|
104
128
|
interval: options?.scrollInterval,
|
|
105
129
|
distance: options?.scrollDistance,
|
|
106
|
-
logger: (scrollY,
|
|
130
|
+
logger: (scrollY, scrollHeightCurrent, message) => listener?.('scroll', { name, scrollY, scrollHeight: scrollHeightCurrent, message }),
|
|
107
131
|
});
|
|
132
|
+
return { scrolled: true, scrollHeight };
|
|
108
133
|
}
|
|
109
134
|
/**
|
|
110
135
|
* Navigate with fallback from networkidle0 to networkidle2 on timeout
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { beforePageScan } from './before-page-scan.js';
|
|
2
|
+
export type { BeforePageScanResult } from './before-page-scan.js';
|
|
2
3
|
export { defaultSizes, devicePresets, createSizesFromDevices, parseDevicesOption, } from './default-sizes.js';
|
|
3
4
|
export { readPageHooks } from './read-page-hooks.js';
|
|
4
5
|
export { pageScanListener, pageScanLoggers } from './page-scan-listener.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/puppeteer-page-scan",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.6.1",
|
|
4
4
|
"description": "Scanning page function for puppeteer",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
26
|
"@d-zero/puppeteer-general-actions": "1.2.5",
|
|
27
|
-
"@d-zero/puppeteer-scroll": "4.0.
|
|
27
|
+
"@d-zero/puppeteer-scroll": "4.0.4",
|
|
28
28
|
"@d-zero/shared": "0.22.0"
|
|
29
29
|
},
|
|
30
30
|
"devDependencies": {
|
|
@@ -38,5 +38,5 @@
|
|
|
38
38
|
"url": "https://github.com/d-zero-dev/tools.git",
|
|
39
39
|
"directory": "packages/@d-zero/puppeteer-page-scan"
|
|
40
40
|
},
|
|
41
|
-
"gitHead": "
|
|
41
|
+
"gitHead": "9a26e6d8c1e996684691055ffc070547344b21e9"
|
|
42
42
|
}
|