@specsage/cli 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/browser.js CHANGED
@@ -128,229 +128,454 @@ async function captureState() {
128
128
  };
129
129
  }
130
130
 
131
- async function enumerateElements() {
132
- const elements = [];
133
-
134
- // Native interactive elements
135
- const nativeSelectors = [
136
- 'button, [role="button"]',
137
- 'a, [role="link"]',
138
- 'input, textarea',
139
- 'select, [role="combobox"]',
140
- '[role="option"]',
141
- '[role="menuitem"]',
142
- '[role="tab"]',
143
- '[role="checkbox"]',
144
- '[role="radio"]',
145
- ];
131
+ // ===== ARIA Snapshot: Constants & Helpers =====
132
+
133
+ const INTERACTIVE_ROLES = new Set([
134
+ 'button', 'link', 'textbox', 'checkbox', 'radio',
135
+ 'combobox', 'option', 'menuitem', 'tab',
136
+ 'switch', 'spinbutton', 'searchbox'
137
+ ]);
138
+
139
+ const ROLE_TO_TYPE = {
140
+ button: 'button',
141
+ link: 'link',
142
+ textbox: 'input',
143
+ spinbutton: 'input',
144
+ searchbox: 'input',
145
+ checkbox: 'checkbox',
146
+ switch: 'checkbox',
147
+ radio: 'radio',
148
+ combobox: 'select',
149
+ option: 'option',
150
+ menuitem: 'option',
151
+ tab: 'tab',
152
+ };
146
153
 
147
- // Scripted interactive elements
148
- const scriptedSelectors = [
149
- { selector: '[onclick]', source: 'onclick' },
150
- { selector: '[data-action]', source: 'data-action' },
151
- { selector: '[data-testid]', source: 'data-testid' },
152
- { selector: '[tabindex]', source: 'tabindex' },
153
- ];
154
+ /**
155
+ * Parse ariaSnapshot YAML into a tree using indentation-aware loop.
156
+ * Each node: { role, name, attributes, children, depth }
157
+ */
158
+ function parseAriaSnapshot(yaml) {
159
+ if (!yaml) return [];
160
+
161
+ const lines = yaml.split('\n');
162
+ const root = { role: 'root', name: '', attributes: {}, children: [], depth: -1 };
163
+ const stack = [root];
164
+
165
+ for (const line of lines) {
166
+ const trimmed = line.trimStart();
167
+ if (!trimmed || !trimmed.startsWith('- ')) continue;
168
+
169
+ // Indentation depth: count leading spaces, each 2 = 1 level
170
+ const indent = line.length - line.trimStart().length;
171
+ const depth = Math.floor(indent / 2);
172
+
173
+ // Content after "- "
174
+ let content = trimmed.slice(2);
175
+
176
+ // Strip trailing colon (signals children) or inline text after ": "
177
+ const trailingColonIdx = content.lastIndexOf(':');
178
+ if (trailingColonIdx !== -1) {
179
+ const afterColon = content.slice(trailingColonIdx + 1);
180
+ if (afterColon === '' || afterColon.startsWith(' ')) {
181
+ content = content.slice(0, trailingColonIdx).trim();
182
+ }
183
+ }
184
+
185
+ // Extract role (first word)
186
+ let role = '';
187
+ const spaceIdx = content.indexOf(' ');
188
+ const quoteIdx = content.indexOf('"');
189
+ const bracketIdx = content.indexOf('[');
190
+
191
+ if (spaceIdx === -1 && quoteIdx === -1 && bracketIdx === -1) {
192
+ role = content;
193
+ content = '';
194
+ } else {
195
+ const endCandidates = [spaceIdx, quoteIdx, bracketIdx].filter(i => i !== -1);
196
+ const firstEnd = Math.min(...endCandidates);
197
+ role = content.slice(0, firstEnd);
198
+ content = content.slice(firstEnd).trim();
199
+ }
200
+
201
+ // Extract name (quoted string)
202
+ let name = '';
203
+ const nameMatch = content.match(/^"([^"]*)"/);
204
+ if (nameMatch) {
205
+ name = nameMatch[1].trim();
206
+ content = content.slice(nameMatch[0].length).trim();
207
+ }
208
+
209
+ // Extract attributes [key=value] or [key]
210
+ const attributes = {};
211
+ const attrRegex = /\[([^\]]+)\]/g;
212
+ let attrMatch;
213
+ while ((attrMatch = attrRegex.exec(content)) !== null) {
214
+ const attr = attrMatch[1];
215
+ const eqIdx = attr.indexOf('=');
216
+ if (eqIdx !== -1) {
217
+ attributes[attr.slice(0, eqIdx).trim()] = attr.slice(eqIdx + 1).trim();
218
+ } else {
219
+ attributes[attr.trim()] = true;
220
+ }
221
+ }
222
+
223
+ const node = { role, name, attributes, children: [], depth };
224
+
225
+ // Pop stack to find parent at lower depth
226
+ while (stack.length > 1 && stack[stack.length - 1].depth >= depth) {
227
+ stack.pop();
228
+ }
229
+
230
+ stack[stack.length - 1].children.push(node);
231
+ stack.push(node);
232
+ }
233
+
234
+ return root.children;
235
+ }
236
+
237
+ /**
238
+ * Walk the ARIA tree, collecting interactive nodes with their ancestor chain.
239
+ * parentChain enables scoped locator construction (e.g. main > navigation > link).
240
+ */
241
+ function collectInteractiveNodes(nodes, parentChain = []) {
242
+ const result = [];
243
+
244
+ for (const node of nodes) {
245
+ if (INTERACTIVE_ROLES.has(node.role)) {
246
+ result.push({
247
+ role: node.role,
248
+ name: node.name,
249
+ attributes: node.attributes,
250
+ parentChain: [...parentChain]
251
+ });
252
+ }
253
+
254
+ // Recurse into children with this node added to the chain
255
+ if (node.children.length > 0) {
256
+ const newChain = [...parentChain, { role: node.role, name: node.name }];
257
+ result.push(...collectInteractiveNodes(node.children, newChain));
258
+ }
259
+ }
260
+
261
+ return result;
262
+ }
263
+
264
+ /**
265
+ * Assign occurrence indices so duplicate (parentChain + role + name) combos
266
+ * can be disambiguated with .nth(i).
267
+ */
268
+ function assignOccurrenceIndices(nodes) {
269
+ const counts = new Map();
270
+ for (const node of nodes) {
271
+ const key = JSON.stringify([node.parentChain, node.role, node.name]);
272
+ const count = counts.get(key) || 0;
273
+ node.occurrenceIndex = count;
274
+ counts.set(key, count + 1);
275
+ }
276
+ for (const node of nodes) {
277
+ const key = JSON.stringify([node.parentChain, node.role, node.name]);
278
+ node.totalOccurrences = counts.get(key);
279
+ }
280
+ }
281
+
282
+ /**
283
+ * Build a scoped Playwright locator from parentChain + interactive node.
284
+ * e.g. page.getByRole('main').getByRole('navigation').getByRole('link', { name: 'Home' })
285
+ */
286
+ function buildScopedLocator(pg, node) {
287
+ let locator = pg;
288
+
289
+ for (const ancestor of node.parentChain) {
290
+ const opts = ancestor.name ? { name: ancestor.name, exact: true } : {};
291
+ locator = locator.getByRole(ancestor.role, opts);
292
+ }
293
+
294
+ const opts = { exact: true };
295
+ if (node.name) opts.name = node.name;
296
+ locator = locator.getByRole(node.role, opts);
297
+
298
+ if (node.totalOccurrences > 1) {
299
+ locator = locator.nth(node.occurrenceIndex);
300
+ }
301
+
302
+ return locator;
303
+ }
304
+
305
+ /**
306
+ * Check if two bounding boxes overlap significantly (>=50% of smaller area).
307
+ * Used to prevent "double vision" in the scripted fallback.
308
+ */
309
+ function bboxOverlaps(a, b) {
310
+ const overlapX = Math.max(0, Math.min(a.x + a.w, b.x + b.w) - Math.max(a.x, b.x));
311
+ const overlapY = Math.max(0, Math.min(a.y + a.h, b.y + b.h) - Math.max(a.y, b.y));
312
+ const overlapArea = overlapX * overlapY;
313
+ const smallerArea = Math.min(a.w * a.h, b.w * b.h);
314
+ return smallerArea > 0 && (overlapArea / smallerArea) >= 0.5;
315
+ }
316
+
317
+ /**
318
+ * Extract element details from a DOM element (runs in browser context via locator.evaluate).
319
+ * Returns null if the element is invisible (opacity:0).
320
+ */
321
+ function extractElementDetails(el) {
322
+ const style = getComputedStyle(el);
323
+ if (style.opacity === '0') return null;
324
+
325
+ const tagName = el.tagName.toLowerCase();
326
+ const ariaLabel = el.getAttribute('aria-label');
327
+ const name = el.getAttribute('name');
328
+ const placeholder = el.getAttribute('placeholder');
329
+ const type = el.getAttribute('type');
330
+ const href = el.getAttribute('href');
331
+ const visibleText = (el.textContent || '').trim().substring(0, 100);
332
+ const disabled = el.disabled === true || el.getAttribute('aria-disabled') === 'true';
333
+ const checked = el.checked === true || el.getAttribute('aria-checked') === 'true';
334
+
335
+ let backgroundColor = null;
336
+ const bg = style.backgroundColor;
337
+ if (bg && bg !== 'rgba(0, 0, 0, 0)' && bg !== 'transparent') {
338
+ backgroundColor = bg;
339
+ }
340
+ if (!backgroundColor) {
341
+ for (const child of el.children) {
342
+ const childBg = getComputedStyle(child).backgroundColor;
343
+ if (childBg && childBg !== 'rgba(0, 0, 0, 0)' && childBg !== 'transparent'
344
+ && childBg !== 'rgb(255, 255, 255)') {
345
+ backgroundColor = childBg;
346
+ break;
347
+ }
348
+ }
349
+ }
350
+
351
+ // Generate domPath for scroll-into-view fallback
352
+ const parts = [];
353
+ let node = el;
354
+ while (node && node.nodeType === Node.ELEMENT_NODE) {
355
+ let seg = node.tagName.toLowerCase();
356
+ if (node.id) {
357
+ seg += `#${node.id}`;
358
+ parts.unshift(seg);
359
+ break;
360
+ } else {
361
+ const siblings = node.parentNode
362
+ ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName)
363
+ : [];
364
+ if (siblings.length > 1) {
365
+ const index = siblings.indexOf(node) + 1;
366
+ seg += `:nth-of-type(${index})`;
367
+ }
368
+ parts.unshift(seg);
369
+ }
370
+ node = node.parentNode;
371
+ }
372
+
373
+ return {
374
+ tagName, ariaLabel, name, placeholder, type, href,
375
+ disabled, checked, visibleText,
376
+ accessibleName: ariaLabel || visibleText || name || placeholder || '',
377
+ backgroundColor,
378
+ domPath: parts.join(' > ')
379
+ };
380
+ }
154
381
 
155
- // Process native elements
156
- for (const selector of nativeSelectors) {
157
- const locators = page.locator(selector);
158
- const count = await locators.count();
382
+ async function enumerateElements() {
383
+ // ---- Step 1: Get the Aria Snapshot ----
384
+ let snapshot;
385
+ try {
386
+ snapshot = await page.locator('body').ariaSnapshot();
387
+ } catch {
388
+ snapshot = null;
389
+ }
159
390
 
160
- for (let i = 0; i < count; i++) {
161
- const el = locators.nth(i);
391
+ // ---- Step 2: Parse into tree, extract interactive nodes ----
392
+ const tree = parseAriaSnapshot(snapshot);
393
+ const interactiveNodes = collectInteractiveNodes(tree);
394
+ assignOccurrenceIndices(interactiveNodes);
162
395
 
396
+ // ---- Step 3: Resolve each to DOM via scoped locators ----
397
+ const ariaElements = (await Promise.all(
398
+ interactiveNodes.map(async (node) => {
163
399
  try {
164
- const isVisible = await el.isVisible();
165
- if (!isVisible) continue;
166
-
167
- const box = await el.boundingBox();
168
- if (!box) continue;
169
-
170
- const tagName = await el.evaluate(e => e.tagName.toLowerCase());
171
- const role = await el.getAttribute('role');
172
- const ariaLabel = await el.getAttribute('aria-label');
173
- const name = await el.getAttribute('name');
174
- const placeholder = await el.getAttribute('placeholder');
175
- const type = await el.getAttribute('type');
176
- const href = await el.getAttribute('href');
177
- const disabled = await el.isDisabled();
178
-
179
- // Get DOM path for stable identification
180
- const domPath = await el.evaluate(e => {
181
- const parts = [];
182
- let node = e;
183
- while (node && node.nodeType === Node.ELEMENT_NODE) {
184
- let selector = node.tagName.toLowerCase();
185
- if (node.id) {
186
- selector += `#${node.id}`;
187
- parts.unshift(selector);
188
- break; // ID is unique, stop here
189
- } else {
190
- const siblings = node.parentNode ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName) : [];
191
- if (siblings.length > 1) {
192
- const index = siblings.indexOf(node) + 1;
193
- selector += `:nth-of-type(${index})`;
194
- }
195
- parts.unshift(selector);
196
- }
197
- node = node.parentNode;
198
- }
199
- return parts.join(' > ');
200
- });
400
+ const locator = buildScopedLocator(page, node);
201
401
 
202
- let accessibleName = ariaLabel;
203
- if (!accessibleName) {
204
- accessibleName = await el.evaluate(e => e.textContent?.trim().substring(0, 100));
205
- }
206
- if (!accessibleName) {
207
- accessibleName = name || placeholder || '';
208
- }
402
+ const [box, details] = await Promise.all([
403
+ locator.boundingBox({ timeout: 3000 }).catch(() => null),
404
+ locator.evaluate(extractElementDetails).catch(() => null)
405
+ ]);
209
406
 
210
- const visibleText = await el.evaluate(e => e.textContent?.trim().substring(0, 100) || '');
211
-
212
- let elementType;
213
- const effectiveRole = role || tagName;
214
- if (tagName === 'button' || role === 'button') {
215
- elementType = 'button';
216
- } else if (tagName === 'a' || role === 'link') {
217
- elementType = 'link';
218
- } else if (tagName === 'input' || tagName === 'textarea') {
219
- elementType = 'input';
220
- } else if (tagName === 'select' || role === 'combobox') {
221
- elementType = 'select';
222
- } else if (role === 'option' || role === 'menuitem') {
223
- elementType = 'option';
224
- } else if (role === 'checkbox') {
225
- elementType = 'checkbox';
226
- } else if (role === 'radio') {
227
- elementType = 'radio';
228
- } else if (role === 'tab') {
229
- elementType = 'tab';
230
- } else {
231
- elementType = tagName;
232
- }
407
+ if (!details || !box || box.width === 0 || box.height === 0) return null;
408
+
409
+ const elementType = ROLE_TO_TYPE[node.role] || node.role;
233
410
 
234
- // Generate stable key for this element
235
411
  const stableKey = generateStableKey({
236
- tagName, role, name, type, placeholder, ariaLabel, href, domPath
412
+ tagName: details.tagName,
413
+ role: node.role,
414
+ name: details.name,
415
+ type: details.type,
416
+ placeholder: details.placeholder,
417
+ ariaLabel: details.ariaLabel,
418
+ href: details.href,
419
+ domPath: details.domPath
237
420
  });
238
421
 
239
- elements.push({
240
- id: null, // Will be assigned after deduplication
422
+ return {
423
+ id: null,
241
424
  stable_key: stableKey,
242
425
  type: elementType,
243
- role: effectiveRole,
244
- accessible_name: accessibleName || '',
245
- visible_text: visibleText,
246
- disabled: disabled,
247
- input_type: type || null,
426
+ role: node.role,
427
+ accessible_name: node.name || details.accessibleName || '',
428
+ visible_text: details.visibleText,
429
+ disabled: details.disabled || !!node.attributes.disabled,
430
+ checked: details.checked || !!node.attributes.checked,
431
+ input_type: details.type || null,
432
+ background_color: details.backgroundColor || null,
248
433
  bounding_box: {
249
434
  x: Math.round(box.x),
250
435
  y: Math.round(box.y),
251
436
  w: Math.round(box.width),
252
437
  h: Math.round(box.height)
253
438
  },
254
- locator: el,
439
+ domPath: details.domPath,
255
440
  mechanism: 'native',
256
441
  source: 'native'
257
- });
258
- } catch (err) {
259
- continue;
442
+ };
443
+ } catch {
444
+ return null;
260
445
  }
446
+ })
447
+ )).filter(Boolean);
448
+
449
+ // ---- Step 5: Scripted element fallback (anti-double-vision) ----
450
+ const scriptedRaw = await page.evaluate(() => {
451
+ const results = [];
452
+ const scriptedSelectors = [
453
+ { selector: '[onclick]', source: 'onclick' },
454
+ { selector: '[data-action]', source: 'data-action' },
455
+ { selector: '[data-testid]', source: 'data-testid' },
456
+ { selector: '[tabindex]', source: 'tabindex' },
457
+ ];
458
+
459
+ function isElementVisible(el) {
460
+ const style = getComputedStyle(el);
461
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return false;
462
+ const rect = el.getBoundingClientRect();
463
+ return rect.width > 0 && rect.height > 0;
261
464
  }
262
- }
263
465
 
264
- // Process scripted elements
265
- for (const { selector, source } of scriptedSelectors) {
266
- const locators = page.locator(selector);
267
- const count = await locators.count();
268
-
269
- for (let i = 0; i < count; i++) {
270
- const el = locators.nth(i);
271
-
272
- try {
273
- const isVisible = await el.isVisible();
274
- if (!isVisible) continue;
275
-
276
- const box = await el.boundingBox();
277
- if (!box) continue;
278
-
279
- const tagName = await el.evaluate(e => e.tagName.toLowerCase());
280
- const role = await el.getAttribute('role');
281
- const ariaLabel = await el.getAttribute('aria-label');
282
- const name = await el.getAttribute('name');
283
- const placeholder = await el.getAttribute('placeholder');
284
- const type = await el.getAttribute('type');
285
- const href = await el.getAttribute('href');
286
- const disabled = await el.isDisabled();
287
-
288
- // Get DOM path for stable identification
289
- const domPath = await el.evaluate(e => {
290
- const parts = [];
291
- let node = e;
292
- while (node && node.nodeType === Node.ELEMENT_NODE) {
293
- let selector = node.tagName.toLowerCase();
294
- if (node.id) {
295
- selector += `#${node.id}`;
296
- parts.unshift(selector);
297
- break; // ID is unique, stop here
298
- } else {
299
- const siblings = node.parentNode ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName) : [];
300
- if (siblings.length > 1) {
301
- const index = siblings.indexOf(node) + 1;
302
- selector += `:nth-of-type(${index})`;
303
- }
304
- parts.unshift(selector);
305
- }
306
- node = node.parentNode;
466
+ function getDomPath(el) {
467
+ const parts = [];
468
+ let node = el;
469
+ while (node && node.nodeType === Node.ELEMENT_NODE) {
470
+ let seg = node.tagName.toLowerCase();
471
+ if (node.id) {
472
+ seg += `#${node.id}`;
473
+ parts.unshift(seg);
474
+ break;
475
+ } else {
476
+ const siblings = node.parentNode ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName) : [];
477
+ if (siblings.length > 1) {
478
+ const index = siblings.indexOf(node) + 1;
479
+ seg += `:nth-of-type(${index})`;
307
480
  }
308
- return parts.join(' > ');
309
- });
481
+ parts.unshift(seg);
482
+ }
483
+ node = node.parentNode;
484
+ }
485
+ return parts.join(' > ');
486
+ }
310
487
 
311
- let accessibleName = ariaLabel;
312
- if (!accessibleName) {
313
- accessibleName = await el.evaluate(e => e.textContent?.trim().substring(0, 100));
488
+ for (const { selector, source } of scriptedSelectors) {
489
+ for (const el of document.querySelectorAll(selector)) {
490
+ if (!isElementVisible(el)) continue;
491
+
492
+ const rect = el.getBoundingClientRect();
493
+ if (rect.width === 0 || rect.height === 0) continue;
494
+
495
+ const tagName = el.tagName.toLowerCase();
496
+ const role = el.getAttribute('role');
497
+ const ariaLabel = el.getAttribute('aria-label');
498
+ const name = el.getAttribute('name');
499
+ const placeholder = el.getAttribute('placeholder');
500
+ const type = el.getAttribute('type');
501
+ const href = el.getAttribute('href');
502
+ const disabled = el.disabled === true || el.getAttribute('aria-disabled') === 'true';
503
+ const checked = el.checked === true || el.getAttribute('aria-checked') === 'true';
504
+ const visibleText = (el.textContent || '').trim().substring(0, 100);
505
+ const accessibleName = ariaLabel || visibleText || name || placeholder || '';
506
+
507
+ let backgroundColor = null;
508
+ const style = getComputedStyle(el);
509
+ const bg = style.backgroundColor;
510
+ if (bg && bg !== 'rgba(0, 0, 0, 0)' && bg !== 'transparent') {
511
+ backgroundColor = bg;
314
512
  }
315
- if (!accessibleName) {
316
- accessibleName = name || placeholder || '';
513
+ if (!backgroundColor) {
514
+ for (const child of el.children) {
515
+ const childBg = getComputedStyle(child).backgroundColor;
516
+ if (childBg && childBg !== 'rgba(0, 0, 0, 0)' && childBg !== 'transparent'
517
+ && childBg !== 'rgb(255, 255, 255)') {
518
+ backgroundColor = childBg;
519
+ break;
520
+ }
521
+ }
317
522
  }
318
523
 
319
- const visibleText = await el.evaluate(e => e.textContent?.trim().substring(0, 100) || '');
320
-
321
- const effectiveRole = role || tagName;
322
-
323
- // Generate stable key for this element
324
- const stableKey = generateStableKey({
325
- tagName, role, name, type, placeholder, ariaLabel, href, domPath
326
- });
327
-
328
- elements.push({
329
- id: null, // Will be assigned after deduplication
330
- stable_key: stableKey,
331
- type: tagName,
332
- role: effectiveRole,
333
- accessible_name: accessibleName || '',
334
- visible_text: visibleText,
335
- disabled: disabled,
336
- input_type: type || null,
524
+ results.push({
525
+ tagName, role, ariaLabel, name, placeholder, type, href,
526
+ disabled, checked, visibleText, accessibleName, backgroundColor,
527
+ domPath: getDomPath(el),
337
528
  bounding_box: {
338
- x: Math.round(box.x),
339
- y: Math.round(box.y),
340
- w: Math.round(box.width),
341
- h: Math.round(box.height)
529
+ x: Math.round(rect.x),
530
+ y: Math.round(rect.y),
531
+ w: Math.round(rect.width),
532
+ h: Math.round(rect.height)
342
533
  },
343
- locator: el,
344
- mechanism: 'scripted',
345
- source: source
534
+ source
346
535
  });
347
- } catch (err) {
348
- continue;
349
536
  }
350
537
  }
351
- }
538
+ return results;
539
+ });
352
540
 
353
- // Deduplicate by bounding box (some elements match multiple selectors)
541
+ // Only add scripted elements whose bbox does NOT overlap with any ARIA element
542
+ const scriptedElements = scriptedRaw
543
+ .filter(raw => !ariaElements.some(aria => bboxOverlaps(aria.bounding_box, raw.bounding_box)))
544
+ .map(raw => {
545
+ const effectiveRole = raw.role || raw.tagName;
546
+ const stableKey = generateStableKey({
547
+ tagName: raw.tagName,
548
+ role: raw.role,
549
+ name: raw.name,
550
+ type: raw.type,
551
+ placeholder: raw.placeholder,
552
+ ariaLabel: raw.ariaLabel,
553
+ href: raw.href,
554
+ domPath: raw.domPath
555
+ });
556
+
557
+ return {
558
+ id: null,
559
+ stable_key: stableKey,
560
+ type: raw.tagName,
561
+ role: effectiveRole,
562
+ accessible_name: raw.accessibleName || '',
563
+ visible_text: raw.visibleText,
564
+ disabled: raw.disabled,
565
+ checked: raw.checked,
566
+ input_type: raw.type || null,
567
+ background_color: raw.backgroundColor || null,
568
+ bounding_box: raw.bounding_box,
569
+ domPath: raw.domPath,
570
+ mechanism: 'scripted',
571
+ source: raw.source
572
+ };
573
+ });
574
+
575
+ // ---- Post-processing: dedup, stable keys, display IDs (unchanged logic) ----
576
+ const elements = [...ariaElements, ...scriptedElements];
577
+
578
+ // Deduplicate by bounding box
354
579
  const seen = new Set();
355
580
  const uniqueElements = [];
356
581
  for (const el of elements) {
@@ -362,7 +587,6 @@ async function enumerateElements() {
362
587
  }
363
588
 
364
589
  // Disambiguate stable_keys by appending occurrence index for collisions
365
- // Group elements by their base stable_key
366
590
  const keyGroups = new Map();
367
591
  for (const el of uniqueElements) {
368
592
  const baseKey = el.stable_key;
@@ -372,13 +596,11 @@ async function enumerateElements() {
372
596
  keyGroups.get(baseKey).push(el);
373
597
  }
374
598
 
375
- // For each group with collisions, sort by position (top-to-bottom, left-to-right)
376
- // then assign deterministic indices
377
599
  for (const [baseKey, group] of keyGroups) {
378
600
  if (group.length > 1) {
379
601
  group.sort((a, b) => {
380
602
  const yDiff = a.bounding_box.y - b.bounding_box.y;
381
- if (Math.abs(yDiff) > 5) return yDiff; // 5px threshold for "same row"
603
+ if (Math.abs(yDiff) > 5) return yDiff;
382
604
  return a.bounding_box.x - b.bounding_box.x;
383
605
  });
384
606
  }
@@ -388,11 +610,9 @@ async function enumerateElements() {
388
610
  }
389
611
 
390
612
  // Assign stable display IDs based on stable_key
391
- // Elements that existed before keep their display ID, new elements get the next available ID
392
613
  const usedDisplayIds = new Set();
393
614
  const newElementKeyToDisplayId = new Map();
394
615
 
395
- // First pass: assign existing display IDs to elements we've seen before
396
616
  for (const el of uniqueElements) {
397
617
  if (elementKeyToDisplayId.has(el.stable_key)) {
398
618
  const existingId = elementKeyToDisplayId.get(el.stable_key);
@@ -402,11 +622,9 @@ async function enumerateElements() {
402
622
  }
403
623
  }
404
624
 
405
- // Second pass: assign new display IDs to new elements
406
625
  let nextId = 1;
407
626
  for (const el of uniqueElements) {
408
627
  if (el.id === null) {
409
- // Find next available ID
410
628
  while (usedDisplayIds.has(`E${nextId}`)) {
411
629
  nextId++;
412
630
  }
@@ -432,8 +650,7 @@ async function enumerateElements() {
432
650
  }
433
651
 
434
652
  lastElements = uniqueElements;
435
- // Return elements without the locator property (not JSON-serializable)
436
- return lastElements.map(({ locator, ...rest }) => rest);
653
+ return lastElements.map(({ domPath, ...rest }) => rest);
437
654
  }
438
655
 
439
656
  async function debugOverlay(x, y) {
@@ -461,7 +678,7 @@ async function debugOverlay(x, y) {
461
678
  }
462
679
 
463
680
  // Commands that should be blocked when a dialog is pending
464
- const DIALOG_BLOCKING_COMMANDS = ['navigate', 'click_element', 'select_option', 'keypress', 'scroll', 'wait', 'capture'];
681
+ const DIALOG_BLOCKING_COMMANDS = ['navigate', 'click_element', 'select_option', 'type', 'hotkey', 'scroll', 'wait', 'capture'];
465
682
 
466
683
  async function handleCommand(msg) {
467
684
  const { request_id, command, params } = msg;
@@ -503,23 +720,32 @@ async function handleCommand(msg) {
503
720
 
504
721
  const element = displayIdToElement.get(element_id);
505
722
  if (!element) throw new Error(`Element not found: ${element_id}`);
506
- lastClickedElement = element; // Store for keypress context
723
+ lastClickedElement = element; // Store for type command context
507
724
 
508
725
  let { x, y, w, h } = element.bounding_box;
509
726
 
510
727
  // Auto-scroll element into view if its center is outside the viewport
511
728
  const viewportSize = page.viewportSize();
512
729
  let centerY = y + h / 2;
513
- if (element.locator && (centerY < 0 || centerY >= viewportSize.height)) {
514
- await element.locator.scrollIntoViewIfNeeded({ timeout: 3000 });
515
- await new Promise(r => setTimeout(r, 200));
516
- const newBox = await element.locator.boundingBox();
517
- if (newBox) {
518
- x = Math.round(newBox.x);
519
- y = Math.round(newBox.y);
520
- w = Math.round(newBox.width);
521
- h = Math.round(newBox.height);
522
- element.bounding_box = { x, y, w, h };
730
+ if (element.domPath && (centerY < 0 || centerY >= viewportSize.height)) {
731
+ try {
732
+ const locator = page.locator(element.domPath);
733
+ await locator.scrollIntoViewIfNeeded({ timeout: 3000 });
734
+ await new Promise(r => setTimeout(r, 200));
735
+ const newBox = await locator.boundingBox();
736
+ if (newBox) {
737
+ x = Math.round(newBox.x);
738
+ y = Math.round(newBox.y);
739
+ w = Math.round(newBox.width);
740
+ h = Math.round(newBox.height);
741
+ element.bounding_box = { x, y, w, h };
742
+ }
743
+ } catch (scrollErr) {
744
+ // Strict mode violation or other locator error — fall back to
745
+ // coordinate-based scroll so the click can still proceed.
746
+ const targetY = y + h / 2 - viewportSize.height / 2;
747
+ await page.evaluate((scrollY) => window.scrollBy(0, scrollY), targetY);
748
+ await new Promise(r => setTimeout(r, 200));
523
749
  }
524
750
  }
525
751
 
@@ -587,16 +813,25 @@ async function handleCommand(msg) {
587
813
  // Auto-scroll element into view if its center is outside the viewport
588
814
  const viewportSize = page.viewportSize();
589
815
  let centerY = y + h / 2;
590
- if (element.locator && (centerY < 0 || centerY >= viewportSize.height)) {
591
- await element.locator.scrollIntoViewIfNeeded({ timeout: 3000 });
592
- await new Promise(r => setTimeout(r, 200));
593
- const newBox = await element.locator.boundingBox();
594
- if (newBox) {
595
- x = Math.round(newBox.x);
596
- y = Math.round(newBox.y);
597
- w = Math.round(newBox.width);
598
- h = Math.round(newBox.height);
599
- element.bounding_box = { x, y, w, h };
816
+ if (element.domPath && (centerY < 0 || centerY >= viewportSize.height)) {
817
+ try {
818
+ const locator = page.locator(element.domPath);
819
+ await locator.scrollIntoViewIfNeeded({ timeout: 3000 });
820
+ await new Promise(r => setTimeout(r, 200));
821
+ const newBox = await locator.boundingBox();
822
+ if (newBox) {
823
+ x = Math.round(newBox.x);
824
+ y = Math.round(newBox.y);
825
+ w = Math.round(newBox.width);
826
+ h = Math.round(newBox.height);
827
+ element.bounding_box = { x, y, w, h };
828
+ }
829
+ } catch (scrollErr) {
830
+ // Strict mode violation or other locator error — fall back to
831
+ // coordinate-based scroll so the select can still proceed.
832
+ const targetY = y + h / 2 - viewportSize.height / 2;
833
+ await page.evaluate((scrollY) => window.scrollBy(0, scrollY), targetY);
834
+ await new Promise(r => setTimeout(r, 200));
600
835
  }
601
836
  }
602
837
 
@@ -627,18 +862,14 @@ async function handleCommand(msg) {
627
862
  break;
628
863
  }
629
864
 
630
- case "keypress": {
631
- if (!params?.keys) throw new Error("keypress requires keys");
865
+ case "type": {
866
+ if (!params?.keys) throw new Error("type requires keys");
632
867
 
633
- // Handle Ctrl+A (select all) as a special case
634
- // Use Meta+a on macOS (Command key) and Control+a on other platforms
635
- if (params.keys === 'ctrl+a' || params.keys === 'Ctrl+A') {
636
- const modifier = process.platform === 'darwin' ? 'Meta' : 'Control';
637
- await page.keyboard.press(`${modifier}+a`);
638
- await new Promise(r => setTimeout(r, 100));
639
- result.screenshot_base64 = await screenshotBase64();
640
- result.elements = await enumerateElements();
641
- break;
868
+ // Reject modifier combos those belong in the "hotkey" command
869
+ if (/^(ctrl|alt|meta|shift)\+/i.test(params.keys)) {
870
+ throw new Error(
871
+ `"type" is for typing text only. Use "hotkey" for keyboard shortcuts like "${params.keys}".`
872
+ );
642
873
  }
643
874
 
644
875
  // Map special characters to Playwright key names
@@ -666,10 +897,22 @@ async function handleCommand(msg) {
666
897
  }
667
898
  }
668
899
 
669
- // Type each character, mapping special chars to Playwright key names
900
+ // Split into runs of normal text (use keyboard.type) and special chars (use keyboard.press)
901
+ let textBuffer = '';
670
902
  for (const char of params.keys) {
671
- const key = specialKeyMap[char] || char;
672
- await page.keyboard.press(key);
903
+ const special = specialKeyMap[char];
904
+ if (special) {
905
+ if (textBuffer) {
906
+ await page.keyboard.type(textBuffer, { delay: 20 });
907
+ textBuffer = '';
908
+ }
909
+ await page.keyboard.press(special);
910
+ } else {
911
+ textBuffer += char;
912
+ }
913
+ }
914
+ if (textBuffer) {
915
+ await page.keyboard.type(textBuffer, { delay: 20 });
673
916
  }
674
917
 
675
918
  // Wait for search/autocomplete to settle
@@ -679,8 +922,40 @@ async function handleCommand(msg) {
679
922
  break;
680
923
  }
681
924
 
925
+ case "hotkey": {
926
+ if (!params?.keys) throw new Error("hotkey requires keys");
927
+
928
+ // Map common shorthand to Playwright key names
929
+ // Supports: "ctrl+a", "Enter", "Backspace", "Tab", "Escape", "ctrl+c", etc.
930
+ const hotkeyMap = {
931
+ 'ctrl': process.platform === 'darwin' ? 'Meta' : 'Control',
932
+ 'alt': 'Alt',
933
+ 'shift': 'Shift',
934
+ 'meta': 'Meta',
935
+ };
936
+
937
+ const combo = params.keys;
938
+
939
+ if (/\+/.test(combo)) {
940
+ // Modifier combo like "ctrl+a", "ctrl+shift+z"
941
+ const parts = combo.split('+');
942
+ const key = parts.pop(); // Last part is the actual key
943
+ const modifiers = parts.map(m => hotkeyMap[m.toLowerCase()] || m);
944
+ const playwrightCombo = [...modifiers, key].join('+');
945
+ await page.keyboard.press(playwrightCombo);
946
+ } else {
947
+ // Single special key like "Enter", "Backspace", "Tab", "Escape"
948
+ await page.keyboard.press(combo);
949
+ }
950
+
951
+ await new Promise(r => setTimeout(r, 100));
952
+ result.screenshot_base64 = await screenshotBase64();
953
+ result.elements = await enumerateElements();
954
+ break;
955
+ }
956
+
682
957
  case "wait": {
683
- if (!params?.ms) throw new Error("wait requires ms");
958
+ if (params?.ms == null) throw new Error("wait requires ms");
684
959
  await new Promise(r => setTimeout(r, params.ms));
685
960
  result.screenshot_base64 = await screenshotBase64();
686
961
  result.elements = await enumerateElements();
package/lib/dialogs.js CHANGED
@@ -1,3 +1,4 @@
1
+ // test cli sync check
1
2
  /**
2
3
  * Dialog handling module for SpecSage browser automation.
3
4
  * Captures JavaScript dialogs (alert, confirm, prompt) and exposes them
package/lib/runner.rb CHANGED
@@ -18,7 +18,7 @@ class Runner
18
18
  NODE_IO_TIMEOUT_SECONDS = 30
19
19
  NODE_SHUTDOWN_TIMEOUT_SECONDS = 45
20
20
 
21
- BROWSER_ACTIONS = %w[navigate click select keypress wait scroll accept_dialog dismiss_dialog].freeze
21
+ BROWSER_ACTIONS = %w[navigate click select type hotkey wait scroll accept_dialog dismiss_dialog].freeze
22
22
 
23
23
  # Pattern for safe path segment: alphanumeric, underscore, hyphen only
24
24
  # Prevents directory traversal, special chars, and filesystem issues
@@ -315,8 +315,8 @@ class Runner
315
315
  break if line.nil? || line.empty?
316
316
  line.each_line { |l| log "Node: #{l.strip}" unless l.strip.empty? }
317
317
  end
318
- rescue IO::WaitReadable, EOFError
319
- # No more data available
318
+ rescue IO::WaitReadable, EOFError, IOError
319
+ # No more data available (IOError covers closed stream on second stop_node_process call)
320
320
  end
321
321
 
322
322
  def stop_node_process
@@ -381,10 +381,13 @@ class Runner
381
381
  # Read any remaining stderr output from Node process for debugging
382
382
  drain_node_stderr
383
383
 
384
- # Close IO streams
384
+ # Close IO streams and nil references to prevent IOError on subsequent calls
385
385
  @node_stdin&.close rescue nil
386
386
  @node_stdout&.close rescue nil
387
387
  @node_stderr&.close rescue nil
388
+ @node_stdin = nil
389
+ @node_stdout = nil
390
+ @node_stderr = nil
388
391
 
389
392
  # Force kill the process if still running
390
393
  if pid
@@ -403,6 +406,14 @@ class Runner
403
406
  end
404
407
  end
405
408
 
409
+ # If we couldn't get video via quit (channel poisoned, BROWSER_ERROR,
410
+ # or any other failure), try to recover from temp directory.
411
+ # Playwright streams video data to disk during recording, so a
412
+ # partial file likely exists even without a graceful context.close().
413
+ if !@video_data && @temp_dir && Dir.exist?(@temp_dir)
414
+ recover_video_from_temp_dir
415
+ end
416
+
406
417
  @node_wait_thread = nil
407
418
  end
408
419
 
@@ -503,18 +514,24 @@ class Runner
503
514
  elements = response.dig('result', 'elements') || []
504
515
  { result: "Selected '#{display_value}' in element #{element_id}", screenshot_base64: screenshot_base64, elements: elements }
505
516
 
506
- when 'keypress'
517
+ when 'type'
507
518
  # Substitute credential placeholders at the last moment before browser execution
508
519
  # Supports inline placeholders: <<USERNAME>>@example.com, <<USER>>:<<PASS>>, etc.
509
520
  keys = action['keys']
510
521
  display_keys = keys # For logging (shows placeholders, not actual values)
511
- # Skip credential substitution for special key combos like ctrl+a
512
- keys = substitute_credentials(keys) if contains_credential_placeholder?(keys) && !special_key_combo?(keys)
522
+ keys = substitute_credentials(keys) if contains_credential_placeholder?(keys)
523
+
524
+ response = send_to_node('type', { keys: keys })
525
+ screenshot_base64 = response.dig('result', 'screenshot_base64')
526
+ elements = response.dig('result', 'elements') || []
527
+ { result: "Typed: #{display_keys}", screenshot_base64: screenshot_base64, elements: elements }
513
528
 
514
- response = send_to_node('keypress', { keys: keys })
529
+ when 'hotkey'
530
+ keys = action['keys']
531
+ response = send_to_node('hotkey', { keys: keys })
515
532
  screenshot_base64 = response.dig('result', 'screenshot_base64')
516
533
  elements = response.dig('result', 'elements') || []
517
- { result: "Pressed keys: #{display_keys}", screenshot_base64: screenshot_base64, elements: elements }
534
+ { result: "Hotkey: #{keys}", screenshot_base64: screenshot_base64, elements: elements }
518
535
 
519
536
  when 'wait'
520
537
  response = send_to_node('wait', { ms: action['ms'] })
@@ -592,8 +609,8 @@ class Runner
592
609
  log "Uploading video (#{@video_data.bytesize} bytes)..."
593
610
  @step_client.upload_video(scenario_id: @scenario_id, video_data: @video_data)
594
611
  log "Video uploaded successfully."
595
- rescue StepClientError => e
596
- log "Warning: Failed to upload video: #{e.message}"
612
+ rescue StandardError => e
613
+ log "Warning: Failed to upload video: #{e.class}: #{e.message}"
597
614
  end
598
615
 
599
616
  def cleanup_temp_dir
@@ -616,6 +633,50 @@ class Runner
616
633
  # Ignore cleanup errors
617
634
  end
618
635
 
636
+ # Recover video file from temp directory when the Node channel was poisoned
637
+ # and we couldn't send the quit command to get the video path.
638
+ # Playwright streams video data to disk during recording, so a partial
639
+ # .webm file likely exists even without a graceful context.close().
640
+ #
641
+ # Safety: @temp_dir is scoped to tmp/<server_run_id>/<scenario_id>.
642
+ # server_run_id has a unique index (see Document::Test::Run), so each run
643
+ # gets an isolated directory. Stale .webm files from other runs cannot
644
+ # appear here. On a permanent filesystem (Sidekiq workers), leftover files
645
+ # would only exist from a previous crash of this exact run ID, which cannot
646
+ # be re-enqueued with the same ID.
647
+ def recover_video_from_temp_dir
648
+ video_files = Dir.glob(File.join(@temp_dir, '*.webm')).sort_by { |f| File.size(f) }.reverse
649
+ return if video_files.empty?
650
+
651
+ video_path = video_files.first
652
+ file_size = File.size(video_path)
653
+
654
+ if file_size > 0
655
+ if ffmpeg_available?
656
+ remuxed_path = video_path.sub(/\.webm$/, '_remuxed.webm')
657
+ if system('ffmpeg', '-i', video_path, '-c', 'copy', remuxed_path, '-y', '-loglevel', 'error')
658
+ log "Recovered video remuxed: #{remuxed_path}"
659
+ File.delete(video_path) rescue nil
660
+ video_path = remuxed_path
661
+ else
662
+ log "Warning: ffmpeg remux failed on recovered video, using original"
663
+ end
664
+ end
665
+
666
+ @video_data = File.binread(video_path)
667
+ log "Recovered video from temp dir: #{video_path} (#{@video_data.bytesize} bytes)"
668
+ File.delete(video_path) rescue nil
669
+ else
670
+ log "Warning: Video file in temp dir is empty: #{video_path}"
671
+ end
672
+ rescue StandardError => e
673
+ log "Warning: Failed to recover video from temp dir: #{e.message}"
674
+ end
675
+
676
+ def ffmpeg_available?
677
+ system('ffmpeg', '-version', out: File::NULL, err: File::NULL)
678
+ end
679
+
619
680
  # --- Safe path handling ---
620
681
 
621
682
  # Build a safe temp directory path, validating that IDs are safe path segments
@@ -686,14 +747,6 @@ class Runner
686
747
 
687
748
  CREDENTIAL_PLACEHOLDER_PATTERN = /<<([A-Z][A-Z0-9_]*)>>/
688
749
 
689
- # Special key combinations that should not be treated as credential placeholders
690
- SPECIAL_KEY_COMBOS = %w[ctrl+a Ctrl+A].freeze
691
-
692
- # Check if the value is a special key combo (e.g., ctrl+a)
693
- def special_key_combo?(value)
694
- SPECIAL_KEY_COMBOS.include?(value)
695
- end
696
-
697
750
  # Check if the value contains any credential placeholders
698
751
  def contains_credential_placeholder?(value)
699
752
  return false unless value.is_a?(String)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@specsage/cli",
3
- "version": "0.1.13",
3
+ "version": "0.1.15",
4
4
  "description": "SpecSage CLI - AI-powered end-to-end testing automation (Node wrapper for Ruby CLI)",
5
5
  "type": "module",
6
6
  "bin": {