@d-zero/beholder 2.1.5 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,521 @@
1
+ import type { ElementHandle, Page } from 'puppeteer';
2
+
3
+ import { readFileSync } from 'node:fs';
4
+ import { createRequire } from 'node:module';
5
+
6
+ import { afterEach, describe, expect, it, vi } from 'vitest';
7
+
8
+ import {
9
+ DEFAULT_DOM_EVALUATION_TIMEOUT,
10
+ getAnchorList,
11
+ getImageList,
12
+ getMeta,
13
+ getProp,
14
+ } from './dom-evaluation.js';
15
+ import { emptyMeta } from './meta/classify.js';
16
+
17
+ afterEach(() => {
18
+ vi.useRealTimers();
19
+ });
20
+
21
+ /**
22
+ * Builds a minimal `Page` mock whose `evaluate` resolves with the given value.
23
+ * @param value
24
+ */
25
+ function mockPageEvaluate(value: unknown): Page {
26
+ return {
27
+ evaluate: () => Promise.resolve(value),
28
+ } as unknown as Page;
29
+ }
30
+
31
+ /**
32
+ * Builds an `ElementHandle` mock returning the given property value.
33
+ * @param value
34
+ */
35
+ function mockElementHandle(value: unknown): ElementHandle<Element> {
36
+ return {
37
+ getProperty: () =>
38
+ Promise.resolve({
39
+ jsonValue: () => Promise.resolve(value),
40
+ }),
41
+ } as unknown as ElementHandle<Element>;
42
+ }
43
+
44
+ describe('getMeta', () => {
45
+ it('returns emptyMeta() when page.evaluate rejects', async () => {
46
+ const page = {
47
+ evaluate: () => Promise.reject(new Error('execution context destroyed')),
48
+ content: () => Promise.resolve('<html></html>'),
49
+ } as unknown as Page;
50
+
51
+ const meta = await getMeta(page, { url: 'https://example.com/' });
52
+
53
+ expect(meta).toEqual(emptyMeta());
54
+ });
55
+
56
+ it('returns emptyMeta() when the main thread is unresponsive (timeout)', async () => {
57
+ vi.useFakeTimers();
58
+ const page = {
59
+ // Never resolves — simulates a blocked main thread.
60
+ evaluate: () => new Promise(() => {}),
61
+ content: () => new Promise(() => {}),
62
+ } as unknown as Page;
63
+
64
+ const promise = getMeta(page, { url: 'https://example.com/' }, 5000);
65
+ await vi.advanceTimersByTimeAsync(5000);
66
+ const meta = await promise;
67
+
68
+ expect(meta).toEqual(emptyMeta());
69
+ expect(vi.getTimerCount()).toBe(0);
70
+ });
71
+ });
72
+
73
+ describe('getImageList', () => {
74
+ it('maps raw images, deriving isLazy and recording the viewport width', async () => {
75
+ const page = mockPageEvaluate([
76
+ {
77
+ src: 'https://example.com/a.png',
78
+ currentSrc: 'https://example.com/a.png',
79
+ alt: 'A',
80
+ width: 100,
81
+ height: 50,
82
+ naturalWidth: 200,
83
+ naturalHeight: 100,
84
+ loading: 'LAZY',
85
+ sourceCode: '<img>',
86
+ },
87
+ {
88
+ src: 'https://example.com/b.png',
89
+ currentSrc: 'https://example.com/b.png',
90
+ alt: 'B',
91
+ width: 0,
92
+ height: 0,
93
+ naturalWidth: 0,
94
+ naturalHeight: 0,
95
+ loading: 'eager',
96
+ sourceCode: '<img>',
97
+ },
98
+ ]);
99
+
100
+ const images = await getImageList(page, 375);
101
+
102
+ expect(images).toStrictEqual([
103
+ {
104
+ src: 'https://example.com/a.png',
105
+ currentSrc: 'https://example.com/a.png',
106
+ alt: 'A',
107
+ width: 100,
108
+ height: 50,
109
+ naturalWidth: 200,
110
+ naturalHeight: 100,
111
+ isLazy: true,
112
+ viewportWidth: 375,
113
+ sourceCode: '<img>',
114
+ },
115
+ {
116
+ src: 'https://example.com/b.png',
117
+ currentSrc: 'https://example.com/b.png',
118
+ alt: 'B',
119
+ width: 0,
120
+ height: 0,
121
+ naturalWidth: 0,
122
+ naturalHeight: 0,
123
+ isLazy: false,
124
+ viewportWidth: 375,
125
+ sourceCode: '<img>',
126
+ },
127
+ ]);
128
+ });
129
+
130
+ it('returns an empty array when extraction rejects', async () => {
131
+ const page = {
132
+ evaluate: () => Promise.reject(new Error('execution context destroyed')),
133
+ } as unknown as Page;
134
+
135
+ const images = await getImageList(page, 375);
136
+
137
+ expect(images).toStrictEqual([]);
138
+ });
139
+
140
+ it('returns an empty array (not a failure fallback) for a page with no images', async () => {
141
+ const page = mockPageEvaluate([]);
142
+
143
+ const images = await getImageList(page, 375);
144
+
145
+ expect(images).toStrictEqual([]);
146
+ });
147
+
148
+ it('returns an empty array when extraction times out', async () => {
149
+ vi.useFakeTimers();
150
+ const page = {
151
+ evaluate: () => new Promise(() => {}),
152
+ } as unknown as Page;
153
+
154
+ const promise = getImageList(page, 375, 5000);
155
+ await vi.advanceTimersByTimeAsync(5000);
156
+ const images = await promise;
157
+
158
+ expect(images).toStrictEqual([]);
159
+ expect(vi.getTimerCount()).toBe(0);
160
+ });
161
+ });
162
+
163
+ describe('getProp', () => {
164
+ it('returns the property value and clears the loser-side timer', async () => {
165
+ vi.useFakeTimers();
166
+ const $el = mockElementHandle('hello');
167
+
168
+ const result = await getProp({ $el, propName: 'textContent', fallback: '' });
169
+
170
+ expect(result).toBe('hello');
171
+ // raceWithTimeout must clear the timeout it lost so it cannot keep the event loop alive.
172
+ expect(vi.getTimerCount()).toBe(0);
173
+ });
174
+
175
+ it('returns the fallback when property retrieval throws', async () => {
176
+ const $el = {
177
+ getProperty: () => Promise.reject(new Error('detached')),
178
+ } as unknown as ElementHandle<Element>;
179
+
180
+ const result = await getProp({ $el, propName: 'textContent', fallback: 'fb' });
181
+
182
+ expect(result).toBe('fb');
183
+ });
184
+
185
+ it('returns the fallback when retrieval hangs past the timeout', async () => {
186
+ vi.useFakeTimers();
187
+ const $el = {
188
+ getProperty: () => new Promise(() => {}),
189
+ } as unknown as ElementHandle<Element>;
190
+
191
+ const promise = getProp({ $el, propName: 'textContent', fallback: 'fb' }, 5000);
192
+ await vi.advanceTimersByTimeAsync(5000);
193
+ const result = await promise;
194
+
195
+ expect(result).toBe('fb');
196
+ expect(vi.getTimerCount()).toBe(0);
197
+ });
198
+ });
199
+
200
+ /**
201
+ * Builds an anchor element handle whose `remoteObject().objectId` and per-property
202
+ * reads can be customized for the new Strategy F implementation.
203
+ * @param objectId The remote object id used to map this handle back to an AX node.
204
+ * @param props Property values returned by `getProperty(propName).jsonValue()`.
205
+ */
206
+ function mockAnchorHandle(
207
+ objectId: string,
208
+ props: Record<string, unknown>,
209
+ ): ElementHandle<Element> {
210
+ return {
211
+ remoteObject: () => ({ objectId }),
212
+ getProperty: (propName: string) =>
213
+ Promise.resolve({
214
+ jsonValue: () => Promise.resolve(props[propName] ?? ''),
215
+ }),
216
+ } as unknown as ElementHandle<Element>;
217
+ }
218
+
219
+ /**
220
+ * Builds a page mock for the new `getAnchorList` implementation, wiring up
221
+ * `_client()` to return a stub CDP session whose `send(method)` is dispatched
222
+ * by `axNodes`/`describeNodes` (matched by `objectId`).
223
+ * @param args - Mock configuration.
224
+ * @param args.anchors - Anchor element handles to be returned by `page.$$()`.
225
+ * @param args.axNodes - Raw AX nodes returned by `Accessibility.getFullAXTree`.
226
+ * @param args.describeNodes - Map from `objectId` → `backendNodeId` for `DOM.describeNode`.
227
+ * @param args.getFullAXTree - Optional override for `Accessibility.getFullAXTree` (e.g., simulate rejection).
228
+ * @param args.describeNode - Optional override for `DOM.describeNode` (e.g., simulate rejection).
229
+ */
230
+ function mockPageForAnchors(args: {
231
+ anchors: ElementHandle<Element>[];
232
+ axNodes?: Array<{
233
+ backendDOMNodeId?: number;
234
+ ignored?: boolean;
235
+ name?: { value?: unknown };
236
+ }>;
237
+ describeNodes?: Record<string, number | undefined>;
238
+ getFullAXTree?: () => Promise<unknown>;
239
+ describeNode?: (params: { objectId: string }) => Promise<unknown>;
240
+ }): Page {
241
+ const { anchors, axNodes = [], describeNodes = {}, getFullAXTree, describeNode } = args;
242
+ const client = {
243
+ send: (method: string, params?: { objectId?: string }) => {
244
+ if (method === 'Accessibility.getFullAXTree') {
245
+ return getFullAXTree ? getFullAXTree() : Promise.resolve({ nodes: axNodes });
246
+ }
247
+ if (method === 'DOM.describeNode') {
248
+ if (describeNode) return describeNode({ objectId: params?.objectId ?? '' });
249
+ const backendNodeId =
250
+ params?.objectId == null ? undefined : describeNodes[params.objectId];
251
+ return Promise.resolve({ node: { backendNodeId } });
252
+ }
253
+ return Promise.reject(new Error(`unexpected CDP method: ${method}`));
254
+ },
255
+ };
256
+ return {
257
+ $$: () => Promise.resolve(anchors),
258
+ _client: () => client,
259
+ } as unknown as Page;
260
+ }
261
+
262
+ describe('getAnchorList', () => {
263
+ it('resolves the href and uses the accessible name from the AX tree', async () => {
264
+ const $anchor = mockAnchorHandle('obj-1', { href: 'https://example.com/page' });
265
+ const page = mockPageForAnchors({
266
+ anchors: [$anchor],
267
+ axNodes: [{ backendDOMNodeId: 42, name: { value: 'Accessible Name' } }],
268
+ describeNodes: { 'obj-1': 42 },
269
+ });
270
+
271
+ const anchors = await getAnchorList(page);
272
+
273
+ expect(anchors).toHaveLength(1);
274
+ expect(anchors[0]?.textContent).toBe('Accessible Name');
275
+ expect(anchors[0]?.href.href).toBe('https://example.com/page');
276
+ });
277
+
278
+ it('uses an empty AX name as-is without falling back to textContent', async () => {
279
+ // Mirrors the old `axNode.name || ''` behavior: when the AX tree DOES contain
280
+ // the anchor (so it's not "missing from the tree") but its computed name is
281
+ // empty, we keep the empty string — no textContent fallback.
282
+ const textContent = vi.fn();
283
+ const $anchor = {
284
+ remoteObject: () => ({ objectId: 'obj-1' }),
285
+ getProperty: (propName: string) => {
286
+ if (propName === 'href') {
287
+ return Promise.resolve({
288
+ jsonValue: () => Promise.resolve('https://example.com/page'),
289
+ });
290
+ }
291
+ textContent();
292
+ return Promise.resolve({ jsonValue: () => Promise.resolve('text fallback') });
293
+ },
294
+ } as unknown as ElementHandle<Element>;
295
+ const page = mockPageForAnchors({
296
+ anchors: [$anchor],
297
+ axNodes: [{ backendDOMNodeId: 42, name: { value: '' } }],
298
+ describeNodes: { 'obj-1': 42 },
299
+ });
300
+
301
+ const anchors = await getAnchorList(page);
302
+
303
+ expect(anchors).toHaveLength(1);
304
+ expect(anchors[0]?.textContent).toBe('');
305
+ expect(textContent).not.toHaveBeenCalled();
306
+ });
307
+
308
+ it('falls back to textContent for ignored AX nodes (aria-hidden / display:none anchors)', async () => {
309
+ // Mirrors puppeteer's high-level snapshot({root}) with interestingOnly:true,
310
+ // which returns null for ignored nodes — old code then used textContent.
311
+ const $anchor = mockAnchorHandle('obj-1', {
312
+ href: 'https://example.com/page',
313
+ textContent: 'Visible text',
314
+ });
315
+ const page = mockPageForAnchors({
316
+ anchors: [$anchor],
317
+ axNodes: [{ backendDOMNodeId: 42, ignored: true, name: { value: '' } }],
318
+ describeNodes: { 'obj-1': 42 },
319
+ });
320
+
321
+ const anchors = await getAnchorList(page);
322
+
323
+ expect(anchors).toHaveLength(1);
324
+ expect(anchors[0]?.textContent).toBe('Visible text');
325
+ });
326
+
327
+ it('drops a single anchor whose handle throws (detached) without rejecting the whole list', async () => {
328
+ const $detached = {
329
+ remoteObject: () => {
330
+ throw new Error('Handle is detached');
331
+ },
332
+ } as unknown as ElementHandle<Element>;
333
+ const $good = mockAnchorHandle('obj-1', { href: 'https://example.com/page' });
334
+ const page = mockPageForAnchors({
335
+ anchors: [$detached, $good],
336
+ axNodes: [{ backendDOMNodeId: 42, name: { value: 'Name' } }],
337
+ describeNodes: { 'obj-1': 42 },
338
+ });
339
+
340
+ const anchors = await getAnchorList(page);
341
+
342
+ expect(anchors).toHaveLength(1);
343
+ expect(anchors[0]?.href.href).toBe('https://example.com/page');
344
+ });
345
+
346
+ it('falls back to trimmed textContent when the anchor is not represented in the AX tree', async () => {
347
+ const $anchor = mockAnchorHandle('obj-1', {
348
+ href: 'https://example.com/page',
349
+ textContent: ' Link text ',
350
+ });
351
+ const page = mockPageForAnchors({
352
+ anchors: [$anchor],
353
+ axNodes: [], // anchor's backendNodeId not present
354
+ describeNodes: { 'obj-1': 99 },
355
+ });
356
+
357
+ const anchors = await getAnchorList(page);
358
+
359
+ expect(anchors).toHaveLength(1);
360
+ expect(anchors[0]?.textContent).toBe('Link text');
361
+ });
362
+
363
+ it('falls back to textContent when the AX tree response is malformed (no `nodes` field)', async () => {
364
+ // Defensive: an unexpected CDP shape must not throw or pollute the map.
365
+ const $anchor = mockAnchorHandle('obj-1', {
366
+ href: 'https://example.com/page',
367
+ textContent: 'Plain text',
368
+ });
369
+ const page = mockPageForAnchors({
370
+ anchors: [$anchor],
371
+ getFullAXTree: () => Promise.resolve({}),
372
+ describeNodes: { 'obj-1': 1 },
373
+ });
374
+
375
+ const anchors = await getAnchorList(page);
376
+
377
+ expect(anchors).toHaveLength(1);
378
+ expect(anchors[0]?.textContent).toBe('Plain text');
379
+ });
380
+
381
+ it('falls back to textContent when DOM.describeNode response is malformed (no `node` field)', async () => {
382
+ // Defensive: an unexpected CDP shape must not throw inside Promise.all.
383
+ const $anchor = mockAnchorHandle('obj-1', {
384
+ href: 'https://example.com/page',
385
+ textContent: 'Plain text',
386
+ });
387
+ const page = mockPageForAnchors({
388
+ anchors: [$anchor],
389
+ axNodes: [{ backendDOMNodeId: 1, name: { value: 'AX Name' } }],
390
+ describeNode: () => Promise.resolve({}),
391
+ });
392
+
393
+ const anchors = await getAnchorList(page);
394
+
395
+ expect(anchors).toHaveLength(1);
396
+ expect(anchors[0]?.textContent).toBe('Plain text');
397
+ });
398
+
399
+ it('falls back to textContent for every anchor when the AX tree fetch rejects', async () => {
400
+ const $anchor = mockAnchorHandle('obj-1', {
401
+ href: 'https://example.com/page',
402
+ textContent: 'Plain text',
403
+ });
404
+ const page = mockPageForAnchors({
405
+ anchors: [$anchor],
406
+ getFullAXTree: () => Promise.reject(new Error('CDP unavailable')),
407
+ describeNodes: { 'obj-1': 1 },
408
+ });
409
+
410
+ const anchors = await getAnchorList(page);
411
+
412
+ expect(anchors).toHaveLength(1);
413
+ expect(anchors[0]?.textContent).toBe('Plain text');
414
+ });
415
+
416
+ it('falls back to textContent when DOM.describeNode rejects for an anchor', async () => {
417
+ const $anchor = mockAnchorHandle('obj-1', {
418
+ href: 'https://example.com/page',
419
+ textContent: 'Plain text',
420
+ });
421
+ const page = mockPageForAnchors({
422
+ anchors: [$anchor],
423
+ axNodes: [{ backendDOMNodeId: 1, name: { value: 'AX Name' } }],
424
+ describeNode: () => Promise.reject(new Error('detached')),
425
+ });
426
+
427
+ const anchors = await getAnchorList(page);
428
+
429
+ expect(anchors).toHaveLength(1);
430
+ expect(anchors[0]?.textContent).toBe('Plain text');
431
+ });
432
+
433
+ it('returns partial results when the overall operation exceeds the timeout', async () => {
434
+ vi.useFakeTimers();
435
+ const $fast = mockAnchorHandle('obj-fast', { href: 'https://example.com/fast' });
436
+ const $slow = {
437
+ remoteObject: () => ({ objectId: 'obj-slow' }),
438
+ getProperty: () => new Promise(() => {}), // never resolves
439
+ } as unknown as ElementHandle<Element>;
440
+ const page = mockPageForAnchors({
441
+ anchors: [$fast, $slow],
442
+ axNodes: [{ backendDOMNodeId: 1, name: { value: 'Fast' } }],
443
+ describeNodes: { 'obj-fast': 1, 'obj-slow': 2 },
444
+ });
445
+
446
+ const promise = getAnchorList(page, undefined, 5000);
447
+ await vi.advanceTimersByTimeAsync(5000);
448
+ const anchors = await promise;
449
+
450
+ // The fast anchor was collected before the overall race tripped; the slow
451
+ // one was abandoned.
452
+ expect(anchors).toHaveLength(1);
453
+ expect(anchors[0]?.href.href).toBe('https://example.com/fast');
454
+ });
455
+
456
+ it('skips non-HTTP links', async () => {
457
+ const $anchor = mockAnchorHandle('obj-1', { href: 'javascript:void(0)' });
458
+ const page = mockPageForAnchors({
459
+ anchors: [$anchor],
460
+ axNodes: [{ backendDOMNodeId: 1, name: { value: 'JS link' } }],
461
+ describeNodes: { 'obj-1': 1 },
462
+ });
463
+
464
+ const anchors = await getAnchorList(page);
465
+
466
+ expect(anchors).toStrictEqual([]);
467
+ });
468
+
469
+ it("falls back to textContent for every anchor when puppeteer's internal CDP session is unavailable", async () => {
470
+ const $anchor = mockAnchorHandle('obj-1', {
471
+ href: 'https://example.com/page',
472
+ textContent: ' Plain text ',
473
+ });
474
+ // Page mock without `_client()`: simulates puppeteer wrappers that hide the
475
+ // internal session — the function must still produce anchor data, just
476
+ // without AX names.
477
+ const page = {
478
+ $$: () => Promise.resolve([$anchor]),
479
+ } as unknown as Page;
480
+
481
+ const anchors = await getAnchorList(page);
482
+
483
+ expect(anchors).toHaveLength(1);
484
+ expect(anchors[0]?.textContent).toBe('Plain text');
485
+ });
486
+
487
+ it('returns an empty array when the page has no anchors', async () => {
488
+ const page = mockPageForAnchors({ anchors: [] });
489
+
490
+ const anchors = await getAnchorList(page);
491
+
492
+ expect(anchors).toStrictEqual([]);
493
+ });
494
+ });
495
+
496
+ describe('DEFAULT_DOM_EVALUATION_TIMEOUT', () => {
497
+ it('defaults to 180 seconds', () => {
498
+ expect(DEFAULT_DOM_EVALUATION_TIMEOUT).toBe(180_000);
499
+ });
500
+ });
501
+
502
+ /**
503
+ * Tripwire: `getAnchorList` reads `(page as any)._client()` to reuse puppeteer's
504
+ * internal CDP session. Unit tests mock that method directly, so a silent
505
+ * removal/rename in a future puppeteer release would not be caught by the
506
+ * functional tests — the production path would just fall back to
507
+ * textContent-only mode without anyone noticing.
508
+ *
509
+ * This block inspects the actual installed puppeteer-core source to assert the
510
+ * `_client()` method still exists. If puppeteer drops or renames it, this test
511
+ * fails and forces a maintainer to update `getInternalCDPClient` instead of
512
+ * silently degrading.
513
+ */
514
+ describe('puppeteer internal API tripwire', () => {
515
+ it('puppeteer-core CDP Page still defines _client()', () => {
516
+ const require = createRequire(import.meta.url);
517
+ const cdpPagePath = require.resolve('puppeteer-core/lib/cjs/puppeteer/cdp/Page.js');
518
+ const src = readFileSync(cdpPagePath, 'utf8');
519
+ expect(src).toMatch(/_client\s*\(\s*\)\s*\{/);
520
+ });
521
+ });