@ulpi/browse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,497 @@
1
+ /**
2
+ * Snapshot command — accessibility tree with ref-based element selection
3
+ *
4
+ * Architecture (Locator map — no DOM mutation):
5
+ * 1. page.locator(scope).ariaSnapshot() → YAML-like accessibility tree
6
+ * 2. Parse tree, assign refs @e1, @e2, ...
7
+ * 3. Build Playwright Locator for each ref (getByRole + nth)
8
+ * 4. Store Map<string, Locator> on BrowserManager
9
+ * 5. Return compact text output with refs prepended
10
+ *
11
+ * Cursor-interactive detection (-C flag):
12
+ * After the normal ARIA snapshot, scans the DOM for elements that are
13
+ * clickable but invisible to the accessibility tree — divs with
14
+ * cursor:pointer, onclick, tabindex, role, or data-action attributes.
15
+ * These get refs and locators just like ARIA elements, so "click @e15"
16
+ * works seamlessly.
17
+ *
18
+ * Later: "click @e3" → look up Locator → locator.click()
19
+ */
20
+
21
+ import type { Page, Locator } from 'playwright';
22
+ import type { BrowserManager } from './browser-manager';
23
+
24
+ // Roles considered "interactive" for the -i flag
25
+ const INTERACTIVE_ROLES = new Set([
26
+ 'button', 'link', 'textbox', 'checkbox', 'radio', 'combobox',
27
+ 'listbox', 'menuitem', 'menuitemcheckbox', 'menuitemradio',
28
+ 'option', 'searchbox', 'slider', 'spinbutton', 'switch', 'tab',
29
+ 'treeitem',
30
+ ]);
31
+
32
+ interface SnapshotOptions {
33
+ interactive?: boolean; // -i: only interactive elements
34
+ compact?: boolean; // -c: remove empty structural elements
35
+ depth?: number; // -d N: limit tree depth
36
+ selector?: string; // -s SEL: scope to CSS selector
37
+ cursor?: boolean; // -C: detect cursor-interactive elements (divs with cursor:pointer, onclick, tabindex)
38
+ }
39
+
40
+ interface ParsedNode {
41
+ indent: number;
42
+ role: string;
43
+ name: string | null;
44
+ props: string; // e.g., "[level=1]"
45
+ children: string; // inline text content after ":"
46
+ rawLine: string;
47
+ }
48
+
49
+ /** Info returned from the in-page DOM scan for cursor-interactive elements */
50
+ interface CursorElement {
51
+ tag: string;
52
+ id: string;
53
+ className: string;
54
+ text: string;
55
+ reason: string; // "cursor:pointer" | "onclick" | "tabindex" | "role" | "data-action"
56
+ cssSelector: string; // best-effort unique CSS selector for building Locator
57
+ selectorIndex: number; // element's index among all matches of cssSelector in the DOM
58
+ }
59
+
60
+ /**
61
+ * Parse CLI args into SnapshotOptions
62
+ */
63
+ export function parseSnapshotArgs(args: string[]): SnapshotOptions {
64
+ const opts: SnapshotOptions = {};
65
+ for (let i = 0; i < args.length; i++) {
66
+ switch (args[i]) {
67
+ case '-i':
68
+ case '--interactive':
69
+ opts.interactive = true;
70
+ break;
71
+ case '-c':
72
+ case '--compact':
73
+ opts.compact = true;
74
+ break;
75
+ case '-C':
76
+ case '--cursor':
77
+ opts.cursor = true;
78
+ break;
79
+ case '-d':
80
+ case '--depth':
81
+ opts.depth = parseInt(args[++i], 10);
82
+ if (isNaN(opts.depth!)) throw new Error('Usage: snapshot -d <number>');
83
+ break;
84
+ case '-s':
85
+ case '--selector':
86
+ opts.selector = args[++i];
87
+ if (!opts.selector) throw new Error('Usage: snapshot -s <selector>');
88
+ break;
89
+ default:
90
+ throw new Error(`Unknown snapshot flag: ${args[i]}`);
91
+ }
92
+ }
93
+ return opts;
94
+ }
95
+
96
+ /**
97
+ * Parse one line of ariaSnapshot output.
98
+ *
99
+ * Format examples:
100
+ * - heading "Test" [level=1]
101
+ * - link "Link A":
102
+ * - /url: /a
103
+ * - textbox "Name"
104
+ * - paragraph: Some text
105
+ * - combobox "Role":
106
+ */
107
+ function parseLine(line: string): ParsedNode | null {
108
+ // Match: (indent)(- )(role)( "name")?( [props])?(: inline)?
109
+ const match = line.match(/^(\s*)-\s+(\w+)(?:\s+"([^"]*)")?(?:\s+(\[.*?\]))?\s*(?::\s*(.*))?$/);
110
+ if (!match) {
111
+ // Skip metadata lines like "- /url: /a"
112
+ return null;
113
+ }
114
+ return {
115
+ indent: match[1].length,
116
+ role: match[2],
117
+ name: match[3] ?? null,
118
+ props: match[4] || '',
119
+ children: match[5]?.trim() || '',
120
+ rawLine: line,
121
+ };
122
+ }
123
+
124
+ /**
125
+ * Native interactive tags that are already captured by the accessibility tree.
126
+ * We skip these in the cursor-interactive scan to avoid duplicates.
127
+ */
128
+ const NATIVE_INTERACTIVE_TAGS = new Set([
129
+ 'a', 'button', 'input', 'select', 'textarea', 'option', 'details', 'summary',
130
+ ]);
131
+
132
+ /**
133
+ * Scan the DOM for elements that look clickable/interactive but were missed
134
+ * by the accessibility tree. Runs inside page.evaluate().
135
+ *
136
+ * Detection heuristics:
137
+ * - cursor: pointer computed style
138
+ * - onclick attribute (or any on* event attribute)
139
+ * - tabindex attribute (explicitly set)
140
+ * - role attribute matching interactive roles
141
+ * - data-action, data-click, or similar data attributes
142
+ *
143
+ * Exclusions:
144
+ * - Native interactive tags (a, button, input, select, textarea, option)
145
+ * - Hidden or zero-size elements
146
+ * - Elements already covered by ARIA roles
147
+ */
148
+ async function findCursorInteractiveElements(
149
+ page: Page,
150
+ scopeSelector?: string,
151
+ ): Promise<CursorElement[]> {
152
+ const interactiveRolesList = [...INTERACTIVE_ROLES];
153
+ const nativeTagsList = [...NATIVE_INTERACTIVE_TAGS];
154
+
155
+ return await page.evaluate(
156
+ ({ scopeSel, interactiveRoles, nativeTags }) => {
157
+ const root = scopeSel
158
+ ? document.querySelector(scopeSel) || document.body
159
+ : document.body;
160
+
161
+ const nativeSet = new Set(nativeTags);
162
+ const interactiveSet = new Set(interactiveRoles);
163
+ const results: Array<{
164
+ tag: string;
165
+ id: string;
166
+ className: string;
167
+ text: string;
168
+ reason: string;
169
+ cssSelector: string;
170
+ selectorIndex: number;
171
+ }> = [];
172
+
173
+ // Build a set of elements already in the accessibility tree by checking
174
+ // native interactive tags — these will already have ARIA roles
175
+ const allElements = root.querySelectorAll('*');
176
+
177
+ for (let i = 0; i < allElements.length; i++) {
178
+ const el = allElements[i] as HTMLElement;
179
+ const tag = el.tagName.toLowerCase();
180
+
181
+ // Skip native interactive elements — ARIA already captures these
182
+ if (nativeSet.has(tag)) continue;
183
+
184
+ // Skip hidden or zero-size elements
185
+ if (el.offsetWidth === 0 && el.offsetHeight === 0) continue;
186
+ const style = getComputedStyle(el);
187
+ if (style.display === 'none' || style.visibility === 'hidden') continue;
188
+
189
+ // Detect reasons this element might be interactive
190
+ let reason = '';
191
+
192
+ // Check for role attribute matching interactive roles
193
+ const roleAttr = el.getAttribute('role');
194
+ if (roleAttr && interactiveSet.has(roleAttr)) {
195
+ // Elements with explicit interactive ARIA roles ARE captured by ariaSnapshot
196
+ // Skip these to avoid duplicates
197
+ continue;
198
+ }
199
+
200
+ // Check for onclick or other event handler attributes
201
+ if (el.hasAttribute('onclick') || el.hasAttribute('onmousedown') || el.hasAttribute('onmouseup') || el.hasAttribute('ontouchstart')) {
202
+ reason = 'onclick';
203
+ }
204
+
205
+ // Check for tabindex (explicitly set, not inherited)
206
+ if (!reason && el.hasAttribute('tabindex')) {
207
+ const tabindex = el.getAttribute('tabindex');
208
+ // tabindex="-1" is programmatic focus only, still worth flagging
209
+ reason = 'tabindex';
210
+ }
211
+
212
+ // Check for data-action, data-click, or similar interaction attributes
213
+ if (!reason) {
214
+ for (const attr of el.attributes) {
215
+ if (attr.name === 'data-action' || attr.name === 'data-click' ||
216
+ attr.name === 'data-handler' || attr.name === 'data-toggle' ||
217
+ attr.name === 'data-dismiss' || attr.name === 'data-target' ||
218
+ attr.name === 'data-bs-toggle' || attr.name === 'data-bs-dismiss') {
219
+ reason = attr.name;
220
+ break;
221
+ }
222
+ }
223
+ }
224
+
225
+ // Check for cursor: pointer computed style (most common signal)
226
+ if (!reason && style.cursor === 'pointer') {
227
+ reason = 'cursor:pointer';
228
+ }
229
+
230
+ if (!reason) continue;
231
+
232
+ // Extract visible text (first 60 chars)
233
+ const text = (el.textContent || '').trim().slice(0, 60).replace(/\s+/g, ' ');
234
+
235
+ // Build a best-effort CSS selector for locator construction.
236
+ // Strategy: find nearest ancestor with an ID and anchor from there.
237
+ let cssSelector = '';
238
+ if (el.id) {
239
+ cssSelector = `#${CSS.escape(el.id)}`;
240
+ } else {
241
+ // Build the element's own selector: tag.class1.class2:nth-of-type(N)
242
+ let sel = tag;
243
+ if (el.className && typeof el.className === 'string') {
244
+ const classes = el.className.trim().split(/\s+/).slice(0, 3);
245
+ for (const cls of classes) {
246
+ if (cls) sel += `.${CSS.escape(cls)}`;
247
+ }
248
+ }
249
+ // Add nth-of-type to disambiguate among siblings
250
+ const parent = el.parentElement;
251
+ if (parent) {
252
+ const siblings = parent.querySelectorAll(`:scope > ${tag}`);
253
+ if (siblings.length > 1) {
254
+ let nth = 1;
255
+ for (let s = 0; s < siblings.length; s++) {
256
+ if (siblings[s] === el) { nth = s + 1; break; }
257
+ }
258
+ sel += `:nth-of-type(${nth})`;
259
+ }
260
+ }
261
+
262
+ // Walk up to find nearest ancestor with an ID (max 5 levels).
263
+ // When scoped, skip ancestors outside the scope root to avoid
264
+ // generating selectors that reference IDs the scoped locator can't reach.
265
+ let ancestor: HTMLElement | null = el.parentElement;
266
+ let anchor = '';
267
+ let depth = 0;
268
+ while (ancestor && ancestor !== document.body && depth < 5) {
269
+ if (ancestor.id) {
270
+ // If scoped, only use this anchor if it's inside the scope root
271
+ if (!scopeSel || root.contains(ancestor)) {
272
+ anchor = `#${CSS.escape(ancestor.id)}`;
273
+ }
274
+ break;
275
+ }
276
+ ancestor = ancestor.parentElement;
277
+ depth++;
278
+ }
279
+
280
+ // Anchor from ID'd ancestor for uniqueness, or fall back to element selector alone
281
+ cssSelector = anchor ? `${anchor} ${sel}` : sel;
282
+ }
283
+
284
+ // Compute the element's actual index among all DOM matches of cssSelector.
285
+ // When scoped, query against the scope root so nth() aligns with
286
+ // Playwright's page.locator(scope).locator(cssSelector).
287
+ let selectorIndex = 0;
288
+ try {
289
+ const queryRoot = scopeSel ? root : document.body;
290
+ const allMatches = queryRoot.querySelectorAll(cssSelector);
291
+ for (let m = 0; m < allMatches.length; m++) {
292
+ if (allMatches[m] === el) { selectorIndex = m; break; }
293
+ }
294
+ } catch {}
295
+
296
+ results.push({
297
+ tag,
298
+ id: el.id || '',
299
+ className: typeof el.className === 'string' ? el.className.trim() : '',
300
+ text,
301
+ reason,
302
+ cssSelector,
303
+ selectorIndex,
304
+ });
305
+ }
306
+
307
+ return results;
308
+ },
309
+ {
310
+ scopeSel: scopeSelector || null,
311
+ interactiveRoles: interactiveRolesList,
312
+ nativeTags: nativeTagsList,
313
+ },
314
+ );
315
+ }
316
+
317
+ /**
318
+ * Take an accessibility snapshot and build the ref map.
319
+ */
320
+ export async function handleSnapshot(
321
+ args: string[],
322
+ bm: BrowserManager
323
+ ): Promise<string> {
324
+ const opts = parseSnapshotArgs(args);
325
+ const page = bm.getPage();
326
+
327
+ // Get accessibility tree via ariaSnapshot
328
+ let rootLocator: Locator;
329
+ if (opts.selector) {
330
+ rootLocator = page.locator(opts.selector);
331
+ const count = await rootLocator.count();
332
+ if (count === 0) throw new Error(`Selector not found: ${opts.selector}`);
333
+ } else {
334
+ rootLocator = page.locator('body');
335
+ }
336
+
337
+ const ariaText = await rootLocator.ariaSnapshot();
338
+ if (!ariaText || ariaText.trim().length === 0) {
339
+ bm.setRefMap(new Map());
340
+ // If -C is active, still scan for cursor-interactive even with empty ARIA
341
+ if (opts.cursor) {
342
+ const result = await appendCursorElements(page, opts, [], new Map(), 1, bm);
343
+ bm.setLastSnapshot(result, args);
344
+ return result;
345
+ }
346
+ bm.setLastSnapshot('(no accessible elements found)', args);
347
+ return '(no accessible elements found)';
348
+ }
349
+
350
+ // Parse the ariaSnapshot output
351
+ const lines = ariaText.split('\n');
352
+ const refMap = new Map<string, Locator>();
353
+ const output: string[] = [];
354
+ let refCounter = 1;
355
+
356
+ // Track role+name occurrences for nth() disambiguation
357
+ const roleNameCounts = new Map<string, number>();
358
+ const roleNameSeen = new Map<string, number>();
359
+
360
+ // First pass: count role+name pairs for disambiguation
361
+ for (const line of lines) {
362
+ const node = parseLine(line);
363
+ if (!node) continue;
364
+ const key = `${node.role}:${node.name || ''}`;
365
+ roleNameCounts.set(key, (roleNameCounts.get(key) || 0) + 1);
366
+ }
367
+
368
+ // Second pass: assign refs and build locators
369
+ for (const line of lines) {
370
+ const node = parseLine(line);
371
+ if (!node) continue;
372
+
373
+ const depth = Math.floor(node.indent / 2);
374
+ const isInteractive = INTERACTIVE_ROLES.has(node.role);
375
+
376
+ // Always advance the seen counter for every parsed node, regardless of
377
+ // filtering. nth() indices must match the full (unfiltered) tree so that
378
+ // locators point to the correct element even when siblings are filtered out.
379
+ const key = `${node.role}:${node.name || ''}`;
380
+ const seenIndex = roleNameSeen.get(key) || 0;
381
+ roleNameSeen.set(key, seenIndex + 1);
382
+ const totalCount = roleNameCounts.get(key) || 1;
383
+
384
+ // Depth filter
385
+ if (opts.depth !== undefined && depth > opts.depth) continue;
386
+
387
+ // Interactive filter
388
+ if (opts.interactive && !isInteractive) continue;
389
+
390
+ // Compact filter: skip elements with no name and no inline content that aren't interactive
391
+ if (opts.compact && !isInteractive && !node.name && !node.children) continue;
392
+
393
+ // Assign ref
394
+ const ref = `e${refCounter++}`;
395
+ const indent = ' '.repeat(depth);
396
+
397
+ let locator: Locator;
398
+ if (opts.selector) {
399
+ locator = page.locator(opts.selector).getByRole(node.role as any, {
400
+ name: node.name || undefined,
401
+ });
402
+ } else {
403
+ locator = page.getByRole(node.role as any, {
404
+ name: node.name || undefined,
405
+ });
406
+ }
407
+
408
+ // Disambiguate with nth() if multiple elements share role+name
409
+ if (totalCount > 1) {
410
+ locator = locator.nth(seenIndex);
411
+ }
412
+
413
+ refMap.set(ref, locator);
414
+
415
+ // Format output line
416
+ let outputLine = `${indent}@${ref} [${node.role}]`;
417
+ if (node.name) outputLine += ` "${node.name}"`;
418
+ if (node.props) outputLine += ` ${node.props}`;
419
+ if (node.children) outputLine += `: ${node.children}`;
420
+
421
+ output.push(outputLine);
422
+ }
423
+
424
+ // Cursor-interactive detection: supplement ARIA tree with DOM-level scan
425
+ if (opts.cursor) {
426
+ const result = await appendCursorElements(page, opts, output, refMap, refCounter, bm);
427
+ bm.setLastSnapshot(result, args);
428
+ return result;
429
+ }
430
+
431
+ // Store ref map and rendered snapshot on BrowserManager
432
+ bm.setRefMap(refMap);
433
+
434
+ if (output.length === 0) {
435
+ bm.setLastSnapshot('(no interactive elements found)', args);
436
+ return '(no interactive elements found)';
437
+ }
438
+
439
+ const rendered = output.join('\n');
440
+ bm.setLastSnapshot(rendered, args);
441
+ return rendered;
442
+ }
443
+
444
+ /**
445
+ * Scan DOM for cursor-interactive elements, assign refs, append to output.
446
+ * Called when -C flag is active.
447
+ */
448
+ async function appendCursorElements(
449
+ page: Page,
450
+ opts: SnapshotOptions,
451
+ output: string[],
452
+ refMap: Map<string, Locator>,
453
+ refCounter: number,
454
+ bm: BrowserManager,
455
+ ): Promise<string> {
456
+ const cursorElements = await findCursorInteractiveElements(page, opts.selector);
457
+
458
+ if (cursorElements.length > 0) {
459
+ output.push('');
460
+ output.push('[cursor-interactive]');
461
+
462
+ for (const elem of cursorElements) {
463
+ const ref = `e${refCounter++}`;
464
+
465
+ // Build Playwright locator via CSS selector.
466
+ // Use nth(selectorIndex) — the actual index among all DOM matches —
467
+ // instead of a seen-counter which can misalign when non-cursor siblings
468
+ // share the same selector.
469
+ let baseLocator: Locator;
470
+ if (opts.selector) {
471
+ baseLocator = page.locator(opts.selector).locator(elem.cssSelector);
472
+ } else {
473
+ baseLocator = page.locator(elem.cssSelector);
474
+ }
475
+ const locator = baseLocator.nth(elem.selectorIndex);
476
+
477
+ refMap.set(ref, locator);
478
+
479
+ // Format: @e15 [div.cursor] "Add to cart" (cursor:pointer)
480
+ const tagDisplay = elem.tag + (elem.className ? '.' + elem.className.split(/\s+/)[0] : '');
481
+ let outputLine = `@${ref} [${tagDisplay}]`;
482
+ if (elem.text) outputLine += ` "${elem.text}"`;
483
+ outputLine += ` (${elem.reason})`;
484
+
485
+ output.push(outputLine);
486
+ }
487
+ }
488
+
489
+ // Store ref map on BrowserManager
490
+ bm.setRefMap(refMap);
491
+
492
+ if (output.length === 0) {
493
+ return '(no interactive elements found)';
494
+ }
495
+
496
+ return output.join('\n');
497
+ }
package/src/types.ts ADDED
@@ -0,0 +1,12 @@
1
+ import type { BrowserManager } from './browser-manager';
2
+
3
+ export interface CommandContext {
4
+ manager: BrowserManager;
5
+ command: string;
6
+ args: string[];
7
+ }
8
+
9
+ export interface CommandResult {
10
+ output: string;
11
+ hint?: string;
12
+ }