@d-zero/puppeteer-page-scan 4.5.2 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,12 +10,30 @@ type Options = {
10
10
  openDisclosures?: boolean;
11
11
  scrollInterval?: number | DelayOptions;
12
12
  scrollDistance?: number | DelayOptions;
13
+ /**
14
+ * Maximum `document.body.scrollHeight` (px) tolerated before `scrollAllOver`
15
+ * is skipped. Pages whose post-load scrollHeight exceeds this threshold
16
+ * return `{ scrolled: false, scrollHeight }` without scrolling, so callers
17
+ * can decide to abandon the device preset rather than letting the scroll
18
+ * run unbounded. Omit to disable the check (legacy behavior).
19
+ */
20
+ maxScrollHeight?: number;
13
21
  } & Size;
22
+ export type BeforePageScanResult = {
23
+ /**
24
+ * `true` when `scrollAllOver` ran to completion (or to a stuck bail-out).
25
+ * `false` when the scroll was skipped because `scrollHeight` exceeded
26
+ * `maxScrollHeight`.
27
+ */
28
+ scrolled: boolean;
29
+ /** `document.body.scrollHeight` measured immediately before scroll. */
30
+ scrollHeight: number;
31
+ };
14
32
  /**
15
33
  *
16
34
  * @param page
17
35
  * @param url
18
36
  * @param options
19
37
  */
20
- export declare function beforePageScan(page: Page, url: string, options?: Options): Promise<void>;
38
+ export declare function beforePageScan(page: Page, url: string, options?: Options): Promise<BeforePageScanResult>;
21
39
  export {};
@@ -1,4 +1,4 @@
1
- import { scrollAllOver } from '@d-zero/puppeteer-scroll';
1
+ import { evaluateWithFrameRetry, scrollAllOver } from '@d-zero/puppeteer-scroll';
2
2
  /**
3
3
  * Open all disclosure elements on the page
4
4
  * This function loops until all disclosure elements are expanded,
@@ -61,6 +61,7 @@ export async function beforePageScan(page, url, options) {
61
61
  const width = options?.width ?? 1400;
62
62
  const resolution = options?.resolution;
63
63
  const timeout = options?.timeout || 5000;
64
+ const maxScrollHeight = options?.maxScrollHeight;
64
65
  const countDownId = `${name}${url}_timeout`;
65
66
  listener?.('setViewport', { name, width, resolution });
66
67
  await page.setViewport({
@@ -94,6 +95,29 @@ export async function beforePageScan(page, url, options) {
94
95
  message: `Opened ${result.details} <details> elements and clicked ${result.buttons} [aria-expanded="false"] buttons`,
95
96
  });
96
97
  }
98
+ // WHY measure before scrollAllOver: pathological pages can have a
99
+ // post-load scrollHeight of millions of pixels (e.g. responsive data
100
+ // tables that expand to ~321k px at 320px viewport, and worse cases exist).
101
+ // `scrollAllOver` has no upper bound, so without this guard it can run
102
+ // for tens of minutes — long enough to exceed any reasonable retry
103
+ // timeout, leaving the scroll's page.evaluate calls executing in the
104
+ // background while the next retry attempts to use the same page.
105
+ //
106
+ // WHY retry on detached-Frame: this evaluation runs immediately after
107
+ // `page.reload()` resolves, which is exactly when Chrome may still be
108
+ // finishing an internal main-frame swap. A single read landing in that
109
+ // window throws even though the page itself is doing nothing observable,
110
+ // and the throw escapes `beforePageScan` before `scrollAllOver`'s own
111
+ // retry layer can absorb anything. Reuse the same retry helper as
112
+ // `scrollAllOver` to keep the swap-window absorption consistent.
113
+ const scrollHeight = await evaluateWithFrameRetry(() => page.evaluate(() => document.body.scrollHeight));
114
+ if (maxScrollHeight !== undefined && scrollHeight > maxScrollHeight) {
115
+ listener?.('hook', {
116
+ name,
117
+ message: `Skipped scroll: scrollHeight ${scrollHeight} exceeds limit ${maxScrollHeight}`,
118
+ });
119
+ return { scrolled: false, scrollHeight };
120
+ }
97
121
  listener?.('scroll', {
98
122
  name,
99
123
  scrollY: 0,
@@ -103,8 +127,9 @@ export async function beforePageScan(page, url, options) {
103
127
  await scrollAllOver(page, {
104
128
  interval: options?.scrollInterval,
105
129
  distance: options?.scrollDistance,
106
- logger: (scrollY, scrollHeight, message) => listener?.('scroll', { name, scrollY, scrollHeight, message }),
130
+ logger: (scrollY, scrollHeightCurrent, message) => listener?.('scroll', { name, scrollY, scrollHeight: scrollHeightCurrent, message }),
107
131
  });
132
+ return { scrolled: true, scrollHeight };
108
133
  }
109
134
  /**
110
135
  * Navigate with fallback from networkidle0 to networkidle2 on timeout
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export { beforePageScan } from './before-page-scan.js';
2
+ export type { BeforePageScanResult } from './before-page-scan.js';
2
3
  export { defaultSizes, devicePresets, createSizesFromDevices, parseDevicesOption, } from './default-sizes.js';
3
4
  export { readPageHooks } from './read-page-hooks.js';
4
5
  export { pageScanListener, pageScanLoggers } from './page-scan-listener.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d-zero/puppeteer-page-scan",
3
- "version": "4.5.2",
3
+ "version": "4.6.1",
4
4
  "description": "Scanning page function for puppeteer",
5
5
  "author": "D-ZERO",
6
6
  "license": "MIT",
@@ -24,7 +24,7 @@
24
24
  },
25
25
  "dependencies": {
26
26
  "@d-zero/puppeteer-general-actions": "1.2.5",
27
- "@d-zero/puppeteer-scroll": "4.0.2",
27
+ "@d-zero/puppeteer-scroll": "4.0.4",
28
28
  "@d-zero/shared": "0.22.0"
29
29
  },
30
30
  "devDependencies": {
@@ -38,5 +38,5 @@
38
38
  "url": "https://github.com/d-zero-dev/tools.git",
39
39
  "directory": "packages/@d-zero/puppeteer-page-scan"
40
40
  },
41
- "gitHead": "16c831105a12bb635d49130e7f5add25b6643c40"
41
+ "gitHead": "9a26e6d8c1e996684691055ffc070547344b21e9"
42
42
  }