@d-zero/puppeteer-page-scan 4.5.2 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/before-page-scan.d.ts +19 -1
- package/dist/before-page-scan.js +18 -1
- package/dist/index.d.ts +1 -0
- package/package.json +3 -3
|
@@ -10,12 +10,30 @@ type Options = {
|
|
|
10
10
|
openDisclosures?: boolean;
|
|
11
11
|
scrollInterval?: number | DelayOptions;
|
|
12
12
|
scrollDistance?: number | DelayOptions;
|
|
13
|
+
/**
|
|
14
|
+
* Maximum `document.body.scrollHeight` (px) tolerated before `scrollAllOver`
|
|
15
|
+
* is skipped. Pages whose post-load scrollHeight exceeds this threshold
|
|
16
|
+
* return `{ scrolled: false, scrollHeight }` without scrolling, so callers
|
|
17
|
+
* can decide to abandon the device preset rather than letting the scroll
|
|
18
|
+
* run unbounded. Omit to disable the check (legacy behavior).
|
|
19
|
+
*/
|
|
20
|
+
maxScrollHeight?: number;
|
|
13
21
|
} & Size;
|
|
22
|
+
export type BeforePageScanResult = {
|
|
23
|
+
/**
|
|
24
|
+
* `true` when `scrollAllOver` ran to completion (or to a stuck bail-out).
|
|
25
|
+
* `false` when the scroll was skipped because `scrollHeight` exceeded
|
|
26
|
+
* `maxScrollHeight`.
|
|
27
|
+
*/
|
|
28
|
+
scrolled: boolean;
|
|
29
|
+
/** `document.body.scrollHeight` measured immediately before scroll. */
|
|
30
|
+
scrollHeight: number;
|
|
31
|
+
};
|
|
14
32
|
/**
|
|
15
33
|
*
|
|
16
34
|
* @param page
|
|
17
35
|
* @param url
|
|
18
36
|
* @param options
|
|
19
37
|
*/
|
|
20
|
-
export declare function beforePageScan(page: Page, url: string, options?: Options): Promise<
|
|
38
|
+
export declare function beforePageScan(page: Page, url: string, options?: Options): Promise<BeforePageScanResult>;
|
|
21
39
|
export {};
|
package/dist/before-page-scan.js
CHANGED
|
@@ -61,6 +61,7 @@ export async function beforePageScan(page, url, options) {
|
|
|
61
61
|
const width = options?.width ?? 1400;
|
|
62
62
|
const resolution = options?.resolution;
|
|
63
63
|
const timeout = options?.timeout || 5000;
|
|
64
|
+
const maxScrollHeight = options?.maxScrollHeight;
|
|
64
65
|
const countDownId = `${name}${url}_timeout`;
|
|
65
66
|
listener?.('setViewport', { name, width, resolution });
|
|
66
67
|
await page.setViewport({
|
|
@@ -94,6 +95,21 @@ export async function beforePageScan(page, url, options) {
|
|
|
94
95
|
message: `Opened ${result.details} <details> elements and clicked ${result.buttons} [aria-expanded="false"] buttons`,
|
|
95
96
|
});
|
|
96
97
|
}
|
|
98
|
+
// WHY measure before scrollAllOver: pathological pages can have a
|
|
99
|
+
// post-load scrollHeight of millions of pixels (e.g. responsive data
|
|
100
|
+
// tables that expand to ~321k px at 320px viewport, and worse cases exist).
|
|
101
|
+
// `scrollAllOver` has no upper bound, so without this guard it can run
|
|
102
|
+
// for tens of minutes — long enough to exceed any reasonable retry
|
|
103
|
+
// timeout, leaving the scroll's page.evaluate calls executing in the
|
|
104
|
+
// background while the next retry attempts to use the same page.
|
|
105
|
+
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
106
|
+
if (maxScrollHeight !== undefined && scrollHeight > maxScrollHeight) {
|
|
107
|
+
listener?.('hook', {
|
|
108
|
+
name,
|
|
109
|
+
message: `Skipped scroll: scrollHeight ${scrollHeight} exceeds limit ${maxScrollHeight}`,
|
|
110
|
+
});
|
|
111
|
+
return { scrolled: false, scrollHeight };
|
|
112
|
+
}
|
|
97
113
|
listener?.('scroll', {
|
|
98
114
|
name,
|
|
99
115
|
scrollY: 0,
|
|
@@ -103,8 +119,9 @@ export async function beforePageScan(page, url, options) {
|
|
|
103
119
|
await scrollAllOver(page, {
|
|
104
120
|
interval: options?.scrollInterval,
|
|
105
121
|
distance: options?.scrollDistance,
|
|
106
|
-
logger: (scrollY,
|
|
122
|
+
logger: (scrollY, scrollHeightCurrent, message) => listener?.('scroll', { name, scrollY, scrollHeight: scrollHeightCurrent, message }),
|
|
107
123
|
});
|
|
124
|
+
return { scrolled: true, scrollHeight };
|
|
108
125
|
}
|
|
109
126
|
/**
|
|
110
127
|
* Navigate with fallback from networkidle0 to networkidle2 on timeout
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { beforePageScan } from './before-page-scan.js';
|
|
2
|
+
export type { BeforePageScanResult } from './before-page-scan.js';
|
|
2
3
|
export { defaultSizes, devicePresets, createSizesFromDevices, parseDevicesOption, } from './default-sizes.js';
|
|
3
4
|
export { readPageHooks } from './read-page-hooks.js';
|
|
4
5
|
export { pageScanListener, pageScanLoggers } from './page-scan-listener.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/puppeteer-page-scan",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.6.0",
|
|
4
4
|
"description": "Scanning page function for puppeteer",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
26
|
"@d-zero/puppeteer-general-actions": "1.2.5",
|
|
27
|
-
"@d-zero/puppeteer-scroll": "4.0.
|
|
27
|
+
"@d-zero/puppeteer-scroll": "4.0.3",
|
|
28
28
|
"@d-zero/shared": "0.22.0"
|
|
29
29
|
},
|
|
30
30
|
"devDependencies": {
|
|
@@ -38,5 +38,5 @@
|
|
|
38
38
|
"url": "https://github.com/d-zero-dev/tools.git",
|
|
39
39
|
"directory": "packages/@d-zero/puppeteer-page-scan"
|
|
40
40
|
},
|
|
41
|
-
"gitHead": "
|
|
41
|
+
"gitHead": "d876ace142711051c337f7922931776526047cb0"
|
|
42
42
|
}
|