@specsage/cli 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/browser.js CHANGED
@@ -128,229 +128,454 @@ async function captureState() {
128
128
  };
129
129
  }
130
130
 
131
- async function enumerateElements() {
132
- const elements = [];
133
-
134
- // Native interactive elements
135
- const nativeSelectors = [
136
- 'button, [role="button"]',
137
- 'a, [role="link"]',
138
- 'input, textarea',
139
- 'select, [role="combobox"]',
140
- '[role="option"]',
141
- '[role="menuitem"]',
142
- '[role="tab"]',
143
- '[role="checkbox"]',
144
- '[role="radio"]',
145
- ];
131
+ // ===== ARIA Snapshot: Constants & Helpers =====
132
+
133
+ const INTERACTIVE_ROLES = new Set([
134
+ 'button', 'link', 'textbox', 'checkbox', 'radio',
135
+ 'combobox', 'option', 'menuitem', 'tab',
136
+ 'switch', 'spinbutton', 'searchbox'
137
+ ]);
138
+
139
+ const ROLE_TO_TYPE = {
140
+ button: 'button',
141
+ link: 'link',
142
+ textbox: 'input',
143
+ spinbutton: 'input',
144
+ searchbox: 'input',
145
+ checkbox: 'checkbox',
146
+ switch: 'checkbox',
147
+ radio: 'radio',
148
+ combobox: 'select',
149
+ option: 'option',
150
+ menuitem: 'option',
151
+ tab: 'tab',
152
+ };
146
153
 
147
- // Scripted interactive elements
148
- const scriptedSelectors = [
149
- { selector: '[onclick]', source: 'onclick' },
150
- { selector: '[data-action]', source: 'data-action' },
151
- { selector: '[data-testid]', source: 'data-testid' },
152
- { selector: '[tabindex]', source: 'tabindex' },
153
- ];
154
+ /**
155
+ * Parse ariaSnapshot YAML into a tree using indentation-aware loop.
156
+ * Each node: { role, name, attributes, children, depth }
157
+ */
158
+ function parseAriaSnapshot(yaml) {
159
+ if (!yaml) return [];
160
+
161
+ const lines = yaml.split('\n');
162
+ const root = { role: 'root', name: '', attributes: {}, children: [], depth: -1 };
163
+ const stack = [root];
164
+
165
+ for (const line of lines) {
166
+ const trimmed = line.trimStart();
167
+ if (!trimmed || !trimmed.startsWith('- ')) continue;
168
+
169
+ // Indentation depth: count leading spaces, each 2 = 1 level
170
+ const indent = line.length - line.trimStart().length;
171
+ const depth = Math.floor(indent / 2);
172
+
173
+ // Content after "- "
174
+ let content = trimmed.slice(2);
175
+
176
+ // Strip trailing colon (signals children) or inline text after ": "
177
+ const trailingColonIdx = content.lastIndexOf(':');
178
+ if (trailingColonIdx !== -1) {
179
+ const afterColon = content.slice(trailingColonIdx + 1);
180
+ if (afterColon === '' || afterColon.startsWith(' ')) {
181
+ content = content.slice(0, trailingColonIdx).trim();
182
+ }
183
+ }
184
+
185
+ // Extract role (first word)
186
+ let role = '';
187
+ const spaceIdx = content.indexOf(' ');
188
+ const quoteIdx = content.indexOf('"');
189
+ const bracketIdx = content.indexOf('[');
190
+
191
+ if (spaceIdx === -1 && quoteIdx === -1 && bracketIdx === -1) {
192
+ role = content;
193
+ content = '';
194
+ } else {
195
+ const endCandidates = [spaceIdx, quoteIdx, bracketIdx].filter(i => i !== -1);
196
+ const firstEnd = Math.min(...endCandidates);
197
+ role = content.slice(0, firstEnd);
198
+ content = content.slice(firstEnd).trim();
199
+ }
200
+
201
+ // Extract name (quoted string)
202
+ let name = '';
203
+ const nameMatch = content.match(/^"([^"]*)"/);
204
+ if (nameMatch) {
205
+ name = nameMatch[1].trim();
206
+ content = content.slice(nameMatch[0].length).trim();
207
+ }
208
+
209
+ // Extract attributes [key=value] or [key]
210
+ const attributes = {};
211
+ const attrRegex = /\[([^\]]+)\]/g;
212
+ let attrMatch;
213
+ while ((attrMatch = attrRegex.exec(content)) !== null) {
214
+ const attr = attrMatch[1];
215
+ const eqIdx = attr.indexOf('=');
216
+ if (eqIdx !== -1) {
217
+ attributes[attr.slice(0, eqIdx).trim()] = attr.slice(eqIdx + 1).trim();
218
+ } else {
219
+ attributes[attr.trim()] = true;
220
+ }
221
+ }
222
+
223
+ const node = { role, name, attributes, children: [], depth };
224
+
225
+ // Pop stack to find parent at lower depth
226
+ while (stack.length > 1 && stack[stack.length - 1].depth >= depth) {
227
+ stack.pop();
228
+ }
229
+
230
+ stack[stack.length - 1].children.push(node);
231
+ stack.push(node);
232
+ }
233
+
234
+ return root.children;
235
+ }
236
+
237
+ /**
238
+ * Walk the ARIA tree, collecting interactive nodes with their ancestor chain.
239
+ * parentChain enables scoped locator construction (e.g. main > navigation > link).
240
+ */
241
+ function collectInteractiveNodes(nodes, parentChain = []) {
242
+ const result = [];
243
+
244
+ for (const node of nodes) {
245
+ if (INTERACTIVE_ROLES.has(node.role)) {
246
+ result.push({
247
+ role: node.role,
248
+ name: node.name,
249
+ attributes: node.attributes,
250
+ parentChain: [...parentChain]
251
+ });
252
+ }
253
+
254
+ // Recurse into children with this node added to the chain
255
+ if (node.children.length > 0) {
256
+ const newChain = [...parentChain, { role: node.role, name: node.name }];
257
+ result.push(...collectInteractiveNodes(node.children, newChain));
258
+ }
259
+ }
260
+
261
+ return result;
262
+ }
263
+
264
+ /**
265
+ * Assign occurrence indices so duplicate (parentChain + role + name) combos
266
+ * can be disambiguated with .nth(i).
267
+ */
268
+ function assignOccurrenceIndices(nodes) {
269
+ const counts = new Map();
270
+ for (const node of nodes) {
271
+ const key = JSON.stringify([node.parentChain, node.role, node.name]);
272
+ const count = counts.get(key) || 0;
273
+ node.occurrenceIndex = count;
274
+ counts.set(key, count + 1);
275
+ }
276
+ for (const node of nodes) {
277
+ const key = JSON.stringify([node.parentChain, node.role, node.name]);
278
+ node.totalOccurrences = counts.get(key);
279
+ }
280
+ }
281
+
282
+ /**
283
+ * Build a scoped Playwright locator from parentChain + interactive node.
284
+ * e.g. page.getByRole('main').getByRole('navigation').getByRole('link', { name: 'Home' })
285
+ */
286
+ function buildScopedLocator(pg, node) {
287
+ let locator = pg;
288
+
289
+ for (const ancestor of node.parentChain) {
290
+ const opts = ancestor.name ? { name: ancestor.name, exact: true } : {};
291
+ locator = locator.getByRole(ancestor.role, opts);
292
+ }
293
+
294
+ const opts = { exact: true };
295
+ if (node.name) opts.name = node.name;
296
+ locator = locator.getByRole(node.role, opts);
297
+
298
+ if (node.totalOccurrences > 1) {
299
+ locator = locator.nth(node.occurrenceIndex);
300
+ }
301
+
302
+ return locator;
303
+ }
304
+
305
+ /**
306
+ * Check if two bounding boxes overlap significantly (>=50% of smaller area).
307
+ * Used to prevent "double vision" in the scripted fallback.
308
+ */
309
+ function bboxOverlaps(a, b) {
310
+ const overlapX = Math.max(0, Math.min(a.x + a.w, b.x + b.w) - Math.max(a.x, b.x));
311
+ const overlapY = Math.max(0, Math.min(a.y + a.h, b.y + b.h) - Math.max(a.y, b.y));
312
+ const overlapArea = overlapX * overlapY;
313
+ const smallerArea = Math.min(a.w * a.h, b.w * b.h);
314
+ return smallerArea > 0 && (overlapArea / smallerArea) >= 0.5;
315
+ }
316
+
317
+ /**
318
+ * Extract element details from a DOM element (runs in browser context via locator.evaluate).
319
+ * Returns null if the element is invisible (opacity:0).
320
+ */
321
+ function extractElementDetails(el) {
322
+ const style = getComputedStyle(el);
323
+ if (style.opacity === '0') return null;
324
+
325
+ const tagName = el.tagName.toLowerCase();
326
+ const ariaLabel = el.getAttribute('aria-label');
327
+ const name = el.getAttribute('name');
328
+ const placeholder = el.getAttribute('placeholder');
329
+ const type = el.getAttribute('type');
330
+ const href = el.getAttribute('href');
331
+ const visibleText = (el.textContent || '').trim().substring(0, 100);
332
+ const disabled = el.disabled === true || el.getAttribute('aria-disabled') === 'true';
333
+ const checked = el.checked === true || el.getAttribute('aria-checked') === 'true';
334
+
335
+ let backgroundColor = null;
336
+ const bg = style.backgroundColor;
337
+ if (bg && bg !== 'rgba(0, 0, 0, 0)' && bg !== 'transparent') {
338
+ backgroundColor = bg;
339
+ }
340
+ if (!backgroundColor) {
341
+ for (const child of el.children) {
342
+ const childBg = getComputedStyle(child).backgroundColor;
343
+ if (childBg && childBg !== 'rgba(0, 0, 0, 0)' && childBg !== 'transparent'
344
+ && childBg !== 'rgb(255, 255, 255)') {
345
+ backgroundColor = childBg;
346
+ break;
347
+ }
348
+ }
349
+ }
350
+
351
+ // Generate domPath for scroll-into-view fallback
352
+ const parts = [];
353
+ let node = el;
354
+ while (node && node.nodeType === Node.ELEMENT_NODE) {
355
+ let seg = node.tagName.toLowerCase();
356
+ if (node.id) {
357
+ seg += `#${node.id}`;
358
+ parts.unshift(seg);
359
+ break;
360
+ } else {
361
+ const siblings = node.parentNode
362
+ ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName)
363
+ : [];
364
+ if (siblings.length > 1) {
365
+ const index = siblings.indexOf(node) + 1;
366
+ seg += `:nth-of-type(${index})`;
367
+ }
368
+ parts.unshift(seg);
369
+ }
370
+ node = node.parentNode;
371
+ }
372
+
373
+ return {
374
+ tagName, ariaLabel, name, placeholder, type, href,
375
+ disabled, checked, visibleText,
376
+ accessibleName: ariaLabel || visibleText || name || placeholder || '',
377
+ backgroundColor,
378
+ domPath: parts.join(' > ')
379
+ };
380
+ }
154
381
 
155
- // Process native elements
156
- for (const selector of nativeSelectors) {
157
- const locators = page.locator(selector);
158
- const count = await locators.count();
382
+ async function enumerateElements() {
383
+ // ---- Step 1: Get the Aria Snapshot ----
384
+ let snapshot;
385
+ try {
386
+ snapshot = await page.locator('body').ariaSnapshot();
387
+ } catch {
388
+ snapshot = null;
389
+ }
159
390
 
160
- for (let i = 0; i < count; i++) {
161
- const el = locators.nth(i);
391
+ // ---- Step 2: Parse into tree, extract interactive nodes ----
392
+ const tree = parseAriaSnapshot(snapshot);
393
+ const interactiveNodes = collectInteractiveNodes(tree);
394
+ assignOccurrenceIndices(interactiveNodes);
162
395
 
396
+ // ---- Step 3: Resolve each to DOM via scoped locators ----
397
+ const ariaElements = (await Promise.all(
398
+ interactiveNodes.map(async (node) => {
163
399
  try {
164
- const isVisible = await el.isVisible();
165
- if (!isVisible) continue;
166
-
167
- const box = await el.boundingBox();
168
- if (!box) continue;
169
-
170
- const tagName = await el.evaluate(e => e.tagName.toLowerCase());
171
- const role = await el.getAttribute('role');
172
- const ariaLabel = await el.getAttribute('aria-label');
173
- const name = await el.getAttribute('name');
174
- const placeholder = await el.getAttribute('placeholder');
175
- const type = await el.getAttribute('type');
176
- const href = await el.getAttribute('href');
177
- const disabled = await el.isDisabled();
178
-
179
- // Get DOM path for stable identification
180
- const domPath = await el.evaluate(e => {
181
- const parts = [];
182
- let node = e;
183
- while (node && node.nodeType === Node.ELEMENT_NODE) {
184
- let selector = node.tagName.toLowerCase();
185
- if (node.id) {
186
- selector += `#${node.id}`;
187
- parts.unshift(selector);
188
- break; // ID is unique, stop here
189
- } else {
190
- const siblings = node.parentNode ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName) : [];
191
- if (siblings.length > 1) {
192
- const index = siblings.indexOf(node) + 1;
193
- selector += `:nth-of-type(${index})`;
194
- }
195
- parts.unshift(selector);
196
- }
197
- node = node.parentNode;
198
- }
199
- return parts.join(' > ');
200
- });
400
+ const locator = buildScopedLocator(page, node);
201
401
 
202
- let accessibleName = ariaLabel;
203
- if (!accessibleName) {
204
- accessibleName = await el.evaluate(e => e.textContent?.trim().substring(0, 100));
205
- }
206
- if (!accessibleName) {
207
- accessibleName = name || placeholder || '';
208
- }
402
+ const [box, details] = await Promise.all([
403
+ locator.boundingBox({ timeout: 3000 }).catch(() => null),
404
+ locator.evaluate(extractElementDetails).catch(() => null)
405
+ ]);
209
406
 
210
- const visibleText = await el.evaluate(e => e.textContent?.trim().substring(0, 100) || '');
211
-
212
- let elementType;
213
- const effectiveRole = role || tagName;
214
- if (tagName === 'button' || role === 'button') {
215
- elementType = 'button';
216
- } else if (tagName === 'a' || role === 'link') {
217
- elementType = 'link';
218
- } else if (tagName === 'input' || tagName === 'textarea') {
219
- elementType = 'input';
220
- } else if (tagName === 'select' || role === 'combobox') {
221
- elementType = 'select';
222
- } else if (role === 'option' || role === 'menuitem') {
223
- elementType = 'option';
224
- } else if (role === 'checkbox') {
225
- elementType = 'checkbox';
226
- } else if (role === 'radio') {
227
- elementType = 'radio';
228
- } else if (role === 'tab') {
229
- elementType = 'tab';
230
- } else {
231
- elementType = tagName;
232
- }
407
+ if (!details || !box || box.width === 0 || box.height === 0) return null;
408
+
409
+ const elementType = ROLE_TO_TYPE[node.role] || node.role;
233
410
 
234
- // Generate stable key for this element
235
411
  const stableKey = generateStableKey({
236
- tagName, role, name, type, placeholder, ariaLabel, href, domPath
412
+ tagName: details.tagName,
413
+ role: node.role,
414
+ name: details.name,
415
+ type: details.type,
416
+ placeholder: details.placeholder,
417
+ ariaLabel: details.ariaLabel,
418
+ href: details.href,
419
+ domPath: details.domPath
237
420
  });
238
421
 
239
- elements.push({
240
- id: null, // Will be assigned after deduplication
422
+ return {
423
+ id: null,
241
424
  stable_key: stableKey,
242
425
  type: elementType,
243
- role: effectiveRole,
244
- accessible_name: accessibleName || '',
245
- visible_text: visibleText,
246
- disabled: disabled,
247
- input_type: type || null,
426
+ role: node.role,
427
+ accessible_name: node.name || details.accessibleName || '',
428
+ visible_text: details.visibleText,
429
+ disabled: details.disabled || !!node.attributes.disabled,
430
+ checked: details.checked || !!node.attributes.checked,
431
+ input_type: details.type || null,
432
+ background_color: details.backgroundColor || null,
248
433
  bounding_box: {
249
434
  x: Math.round(box.x),
250
435
  y: Math.round(box.y),
251
436
  w: Math.round(box.width),
252
437
  h: Math.round(box.height)
253
438
  },
254
- locator: el,
439
+ domPath: details.domPath,
255
440
  mechanism: 'native',
256
441
  source: 'native'
257
- });
258
- } catch (err) {
259
- continue;
442
+ };
443
+ } catch {
444
+ return null;
260
445
  }
446
+ })
447
+ )).filter(Boolean);
448
+
449
+ // ---- Step 5: Scripted element fallback (anti-double-vision) ----
450
+ const scriptedRaw = await page.evaluate(() => {
451
+ const results = [];
452
+ const scriptedSelectors = [
453
+ { selector: '[onclick]', source: 'onclick' },
454
+ { selector: '[data-action]', source: 'data-action' },
455
+ { selector: '[data-testid]', source: 'data-testid' },
456
+ { selector: '[tabindex]', source: 'tabindex' },
457
+ ];
458
+
459
+ function isElementVisible(el) {
460
+ const style = getComputedStyle(el);
461
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return false;
462
+ const rect = el.getBoundingClientRect();
463
+ return rect.width > 0 && rect.height > 0;
261
464
  }
262
- }
263
465
 
264
- // Process scripted elements
265
- for (const { selector, source } of scriptedSelectors) {
266
- const locators = page.locator(selector);
267
- const count = await locators.count();
268
-
269
- for (let i = 0; i < count; i++) {
270
- const el = locators.nth(i);
271
-
272
- try {
273
- const isVisible = await el.isVisible();
274
- if (!isVisible) continue;
275
-
276
- const box = await el.boundingBox();
277
- if (!box) continue;
278
-
279
- const tagName = await el.evaluate(e => e.tagName.toLowerCase());
280
- const role = await el.getAttribute('role');
281
- const ariaLabel = await el.getAttribute('aria-label');
282
- const name = await el.getAttribute('name');
283
- const placeholder = await el.getAttribute('placeholder');
284
- const type = await el.getAttribute('type');
285
- const href = await el.getAttribute('href');
286
- const disabled = await el.isDisabled();
287
-
288
- // Get DOM path for stable identification
289
- const domPath = await el.evaluate(e => {
290
- const parts = [];
291
- let node = e;
292
- while (node && node.nodeType === Node.ELEMENT_NODE) {
293
- let selector = node.tagName.toLowerCase();
294
- if (node.id) {
295
- selector += `#${node.id}`;
296
- parts.unshift(selector);
297
- break; // ID is unique, stop here
298
- } else {
299
- const siblings = node.parentNode ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName) : [];
300
- if (siblings.length > 1) {
301
- const index = siblings.indexOf(node) + 1;
302
- selector += `:nth-of-type(${index})`;
303
- }
304
- parts.unshift(selector);
305
- }
306
- node = node.parentNode;
466
+ function getDomPath(el) {
467
+ const parts = [];
468
+ let node = el;
469
+ while (node && node.nodeType === Node.ELEMENT_NODE) {
470
+ let seg = node.tagName.toLowerCase();
471
+ if (node.id) {
472
+ seg += `#${node.id}`;
473
+ parts.unshift(seg);
474
+ break;
475
+ } else {
476
+ const siblings = node.parentNode ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName) : [];
477
+ if (siblings.length > 1) {
478
+ const index = siblings.indexOf(node) + 1;
479
+ seg += `:nth-of-type(${index})`;
307
480
  }
308
- return parts.join(' > ');
309
- });
481
+ parts.unshift(seg);
482
+ }
483
+ node = node.parentNode;
484
+ }
485
+ return parts.join(' > ');
486
+ }
310
487
 
311
- let accessibleName = ariaLabel;
312
- if (!accessibleName) {
313
- accessibleName = await el.evaluate(e => e.textContent?.trim().substring(0, 100));
488
+ for (const { selector, source } of scriptedSelectors) {
489
+ for (const el of document.querySelectorAll(selector)) {
490
+ if (!isElementVisible(el)) continue;
491
+
492
+ const rect = el.getBoundingClientRect();
493
+ if (rect.width === 0 || rect.height === 0) continue;
494
+
495
+ const tagName = el.tagName.toLowerCase();
496
+ const role = el.getAttribute('role');
497
+ const ariaLabel = el.getAttribute('aria-label');
498
+ const name = el.getAttribute('name');
499
+ const placeholder = el.getAttribute('placeholder');
500
+ const type = el.getAttribute('type');
501
+ const href = el.getAttribute('href');
502
+ const disabled = el.disabled === true || el.getAttribute('aria-disabled') === 'true';
503
+ const checked = el.checked === true || el.getAttribute('aria-checked') === 'true';
504
+ const visibleText = (el.textContent || '').trim().substring(0, 100);
505
+ const accessibleName = ariaLabel || visibleText || name || placeholder || '';
506
+
507
+ let backgroundColor = null;
508
+ const style = getComputedStyle(el);
509
+ const bg = style.backgroundColor;
510
+ if (bg && bg !== 'rgba(0, 0, 0, 0)' && bg !== 'transparent') {
511
+ backgroundColor = bg;
314
512
  }
315
- if (!accessibleName) {
316
- accessibleName = name || placeholder || '';
513
+ if (!backgroundColor) {
514
+ for (const child of el.children) {
515
+ const childBg = getComputedStyle(child).backgroundColor;
516
+ if (childBg && childBg !== 'rgba(0, 0, 0, 0)' && childBg !== 'transparent'
517
+ && childBg !== 'rgb(255, 255, 255)') {
518
+ backgroundColor = childBg;
519
+ break;
520
+ }
521
+ }
317
522
  }
318
523
 
319
- const visibleText = await el.evaluate(e => e.textContent?.trim().substring(0, 100) || '');
320
-
321
- const effectiveRole = role || tagName;
322
-
323
- // Generate stable key for this element
324
- const stableKey = generateStableKey({
325
- tagName, role, name, type, placeholder, ariaLabel, href, domPath
326
- });
327
-
328
- elements.push({
329
- id: null, // Will be assigned after deduplication
330
- stable_key: stableKey,
331
- type: tagName,
332
- role: effectiveRole,
333
- accessible_name: accessibleName || '',
334
- visible_text: visibleText,
335
- disabled: disabled,
336
- input_type: type || null,
524
+ results.push({
525
+ tagName, role, ariaLabel, name, placeholder, type, href,
526
+ disabled, checked, visibleText, accessibleName, backgroundColor,
527
+ domPath: getDomPath(el),
337
528
  bounding_box: {
338
- x: Math.round(box.x),
339
- y: Math.round(box.y),
340
- w: Math.round(box.width),
341
- h: Math.round(box.height)
529
+ x: Math.round(rect.x),
530
+ y: Math.round(rect.y),
531
+ w: Math.round(rect.width),
532
+ h: Math.round(rect.height)
342
533
  },
343
- locator: el,
344
- mechanism: 'scripted',
345
- source: source
534
+ source
346
535
  });
347
- } catch (err) {
348
- continue;
349
536
  }
350
537
  }
351
- }
538
+ return results;
539
+ });
352
540
 
353
- // Deduplicate by bounding box (some elements match multiple selectors)
541
+ // Only add scripted elements whose bbox does NOT overlap with any ARIA element
542
+ const scriptedElements = scriptedRaw
543
+ .filter(raw => !ariaElements.some(aria => bboxOverlaps(aria.bounding_box, raw.bounding_box)))
544
+ .map(raw => {
545
+ const effectiveRole = raw.role || raw.tagName;
546
+ const stableKey = generateStableKey({
547
+ tagName: raw.tagName,
548
+ role: raw.role,
549
+ name: raw.name,
550
+ type: raw.type,
551
+ placeholder: raw.placeholder,
552
+ ariaLabel: raw.ariaLabel,
553
+ href: raw.href,
554
+ domPath: raw.domPath
555
+ });
556
+
557
+ return {
558
+ id: null,
559
+ stable_key: stableKey,
560
+ type: raw.tagName,
561
+ role: effectiveRole,
562
+ accessible_name: raw.accessibleName || '',
563
+ visible_text: raw.visibleText,
564
+ disabled: raw.disabled,
565
+ checked: raw.checked,
566
+ input_type: raw.type || null,
567
+ background_color: raw.backgroundColor || null,
568
+ bounding_box: raw.bounding_box,
569
+ domPath: raw.domPath,
570
+ mechanism: 'scripted',
571
+ source: raw.source
572
+ };
573
+ });
574
+
575
+ // ---- Post-processing: dedup, stable keys, display IDs (unchanged logic) ----
576
+ const elements = [...ariaElements, ...scriptedElements];
577
+
578
+ // Deduplicate by bounding box
354
579
  const seen = new Set();
355
580
  const uniqueElements = [];
356
581
  for (const el of elements) {
@@ -362,7 +587,6 @@ async function enumerateElements() {
362
587
  }
363
588
 
364
589
  // Disambiguate stable_keys by appending occurrence index for collisions
365
- // Group elements by their base stable_key
366
590
  const keyGroups = new Map();
367
591
  for (const el of uniqueElements) {
368
592
  const baseKey = el.stable_key;
@@ -372,13 +596,11 @@ async function enumerateElements() {
372
596
  keyGroups.get(baseKey).push(el);
373
597
  }
374
598
 
375
- // For each group with collisions, sort by position (top-to-bottom, left-to-right)
376
- // then assign deterministic indices
377
599
  for (const [baseKey, group] of keyGroups) {
378
600
  if (group.length > 1) {
379
601
  group.sort((a, b) => {
380
602
  const yDiff = a.bounding_box.y - b.bounding_box.y;
381
- if (Math.abs(yDiff) > 5) return yDiff; // 5px threshold for "same row"
603
+ if (Math.abs(yDiff) > 5) return yDiff;
382
604
  return a.bounding_box.x - b.bounding_box.x;
383
605
  });
384
606
  }
@@ -388,11 +610,9 @@ async function enumerateElements() {
388
610
  }
389
611
 
390
612
  // Assign stable display IDs based on stable_key
391
- // Elements that existed before keep their display ID, new elements get the next available ID
392
613
  const usedDisplayIds = new Set();
393
614
  const newElementKeyToDisplayId = new Map();
394
615
 
395
- // First pass: assign existing display IDs to elements we've seen before
396
616
  for (const el of uniqueElements) {
397
617
  if (elementKeyToDisplayId.has(el.stable_key)) {
398
618
  const existingId = elementKeyToDisplayId.get(el.stable_key);
@@ -402,11 +622,9 @@ async function enumerateElements() {
402
622
  }
403
623
  }
404
624
 
405
- // Second pass: assign new display IDs to new elements
406
625
  let nextId = 1;
407
626
  for (const el of uniqueElements) {
408
627
  if (el.id === null) {
409
- // Find next available ID
410
628
  while (usedDisplayIds.has(`E${nextId}`)) {
411
629
  nextId++;
412
630
  }
@@ -432,8 +650,7 @@ async function enumerateElements() {
432
650
  }
433
651
 
434
652
  lastElements = uniqueElements;
435
- // Return elements without the locator property (not JSON-serializable)
436
- return lastElements.map(({ locator, ...rest }) => rest);
653
+ return lastElements.map(({ domPath, ...rest }) => rest);
437
654
  }
438
655
 
439
656
  async function debugOverlay(x, y) {
@@ -461,7 +678,7 @@ async function debugOverlay(x, y) {
461
678
  }
462
679
 
463
680
  // Commands that should be blocked when a dialog is pending
464
- const DIALOG_BLOCKING_COMMANDS = ['navigate', 'click_element', 'select_option', 'keypress', 'scroll', 'wait', 'capture'];
681
+ const DIALOG_BLOCKING_COMMANDS = ['navigate', 'click_element', 'select_option', 'type', 'hotkey', 'scroll', 'wait', 'capture'];
465
682
 
466
683
  async function handleCommand(msg) {
467
684
  const { request_id, command, params } = msg;
@@ -503,23 +720,32 @@ async function handleCommand(msg) {
503
720
 
504
721
  const element = displayIdToElement.get(element_id);
505
722
  if (!element) throw new Error(`Element not found: ${element_id}`);
506
- lastClickedElement = element; // Store for keypress context
723
+ lastClickedElement = element; // Store for type command context
507
724
 
508
725
  let { x, y, w, h } = element.bounding_box;
509
726
 
510
727
  // Auto-scroll element into view if its center is outside the viewport
511
728
  const viewportSize = page.viewportSize();
512
729
  let centerY = y + h / 2;
513
- if (element.locator && (centerY < 0 || centerY >= viewportSize.height)) {
514
- await element.locator.scrollIntoViewIfNeeded({ timeout: 3000 });
515
- await new Promise(r => setTimeout(r, 200));
516
- const newBox = await element.locator.boundingBox();
517
- if (newBox) {
518
- x = Math.round(newBox.x);
519
- y = Math.round(newBox.y);
520
- w = Math.round(newBox.width);
521
- h = Math.round(newBox.height);
522
- element.bounding_box = { x, y, w, h };
730
+ if (element.domPath && (centerY < 0 || centerY >= viewportSize.height)) {
731
+ try {
732
+ const locator = page.locator(element.domPath);
733
+ await locator.scrollIntoViewIfNeeded({ timeout: 3000 });
734
+ await new Promise(r => setTimeout(r, 200));
735
+ const newBox = await locator.boundingBox();
736
+ if (newBox) {
737
+ x = Math.round(newBox.x);
738
+ y = Math.round(newBox.y);
739
+ w = Math.round(newBox.width);
740
+ h = Math.round(newBox.height);
741
+ element.bounding_box = { x, y, w, h };
742
+ }
743
+ } catch (scrollErr) {
744
+ // Strict mode violation or other locator error — fall back to
745
+ // coordinate-based scroll so the click can still proceed.
746
+ const targetY = y + h / 2 - viewportSize.height / 2;
747
+ await page.evaluate((scrollY) => window.scrollBy(0, scrollY), targetY);
748
+ await new Promise(r => setTimeout(r, 200));
523
749
  }
524
750
  }
525
751
 
@@ -587,16 +813,25 @@ async function handleCommand(msg) {
587
813
  // Auto-scroll element into view if its center is outside the viewport
588
814
  const viewportSize = page.viewportSize();
589
815
  let centerY = y + h / 2;
590
- if (element.locator && (centerY < 0 || centerY >= viewportSize.height)) {
591
- await element.locator.scrollIntoViewIfNeeded({ timeout: 3000 });
592
- await new Promise(r => setTimeout(r, 200));
593
- const newBox = await element.locator.boundingBox();
594
- if (newBox) {
595
- x = Math.round(newBox.x);
596
- y = Math.round(newBox.y);
597
- w = Math.round(newBox.width);
598
- h = Math.round(newBox.height);
599
- element.bounding_box = { x, y, w, h };
816
+ if (element.domPath && (centerY < 0 || centerY >= viewportSize.height)) {
817
+ try {
818
+ const locator = page.locator(element.domPath);
819
+ await locator.scrollIntoViewIfNeeded({ timeout: 3000 });
820
+ await new Promise(r => setTimeout(r, 200));
821
+ const newBox = await locator.boundingBox();
822
+ if (newBox) {
823
+ x = Math.round(newBox.x);
824
+ y = Math.round(newBox.y);
825
+ w = Math.round(newBox.width);
826
+ h = Math.round(newBox.height);
827
+ element.bounding_box = { x, y, w, h };
828
+ }
829
+ } catch (scrollErr) {
830
+ // Strict mode violation or other locator error — fall back to
831
+ // coordinate-based scroll so the select can still proceed.
832
+ const targetY = y + h / 2 - viewportSize.height / 2;
833
+ await page.evaluate((scrollY) => window.scrollBy(0, scrollY), targetY);
834
+ await new Promise(r => setTimeout(r, 200));
600
835
  }
601
836
  }
602
837
 
@@ -627,18 +862,14 @@ async function handleCommand(msg) {
627
862
  break;
628
863
  }
629
864
 
630
- case "keypress": {
631
- if (!params?.keys) throw new Error("keypress requires keys");
865
+ case "type": {
866
+ if (!params?.keys) throw new Error("type requires keys");
632
867
 
633
- // Handle Ctrl+A (select all) as a special case
634
- // Use Meta+a on macOS (Command key) and Control+a on other platforms
635
- if (params.keys === 'ctrl+a' || params.keys === 'Ctrl+A') {
636
- const modifier = process.platform === 'darwin' ? 'Meta' : 'Control';
637
- await page.keyboard.press(`${modifier}+a`);
638
- await new Promise(r => setTimeout(r, 100));
639
- result.screenshot_base64 = await screenshotBase64();
640
- result.elements = await enumerateElements();
641
- break;
868
+ // Reject modifier combos those belong in the "hotkey" command
869
+ if (/^(ctrl|alt|meta|shift)\+/i.test(params.keys)) {
870
+ throw new Error(
871
+ `"type" is for typing text only. Use "hotkey" for keyboard shortcuts like "${params.keys}".`
872
+ );
642
873
  }
643
874
 
644
875
  // Map special characters to Playwright key names
@@ -666,10 +897,22 @@ async function handleCommand(msg) {
666
897
  }
667
898
  }
668
899
 
669
- // Type each character, mapping special chars to Playwright key names
900
+ // Split into runs of normal text (use keyboard.type) and special chars (use keyboard.press)
901
+ let textBuffer = '';
670
902
  for (const char of params.keys) {
671
- const key = specialKeyMap[char] || char;
672
- await page.keyboard.press(key);
903
+ const special = specialKeyMap[char];
904
+ if (special) {
905
+ if (textBuffer) {
906
+ await page.keyboard.type(textBuffer, { delay: 20 });
907
+ textBuffer = '';
908
+ }
909
+ await page.keyboard.press(special);
910
+ } else {
911
+ textBuffer += char;
912
+ }
913
+ }
914
+ if (textBuffer) {
915
+ await page.keyboard.type(textBuffer, { delay: 20 });
673
916
  }
674
917
 
675
918
  // Wait for search/autocomplete to settle
@@ -679,8 +922,40 @@ async function handleCommand(msg) {
679
922
  break;
680
923
  }
681
924
 
925
+ case "hotkey": {
926
+ if (!params?.keys) throw new Error("hotkey requires keys");
927
+
928
+ // Map common shorthand to Playwright key names
929
+ // Supports: "ctrl+a", "Enter", "Backspace", "Tab", "Escape", "ctrl+c", etc.
930
+ const hotkeyMap = {
931
+ 'ctrl': process.platform === 'darwin' ? 'Meta' : 'Control',
932
+ 'alt': 'Alt',
933
+ 'shift': 'Shift',
934
+ 'meta': 'Meta',
935
+ };
936
+
937
+ const combo = params.keys;
938
+
939
+ if (/\+/.test(combo)) {
940
+ // Modifier combo like "ctrl+a", "ctrl+shift+z"
941
+ const parts = combo.split('+');
942
+ const key = parts.pop(); // Last part is the actual key
943
+ const modifiers = parts.map(m => hotkeyMap[m.toLowerCase()] || m);
944
+ const playwrightCombo = [...modifiers, key].join('+');
945
+ await page.keyboard.press(playwrightCombo);
946
+ } else {
947
+ // Single special key like "Enter", "Backspace", "Tab", "Escape"
948
+ await page.keyboard.press(combo);
949
+ }
950
+
951
+ await new Promise(r => setTimeout(r, 100));
952
+ result.screenshot_base64 = await screenshotBase64();
953
+ result.elements = await enumerateElements();
954
+ break;
955
+ }
956
+
682
957
  case "wait": {
683
- if (!params?.ms) throw new Error("wait requires ms");
958
+ if (params?.ms == null) throw new Error("wait requires ms");
684
959
  await new Promise(r => setTimeout(r, params.ms));
685
960
  result.screenshot_base64 = await screenshotBase64();
686
961
  result.elements = await enumerateElements();
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Shared base error type for step clients (StepClient, DirectStepClient).
4
+ # Runner can rescue this base type to handle errors from either client.
5
+ # Each client defines its own StepError subclass for API stability.
6
+ class StepClientError < StandardError; end
package/lib/runner.rb CHANGED
@@ -11,13 +11,14 @@ require 'fileutils'
11
11
  # Determine SpecSage home directory for locating resources
12
12
  SPECSAGE_HOME ||= File.expand_path('..', __dir__)
13
13
 
14
+ require_relative 'client_errors'
14
15
  require_relative 'step_client'
15
16
 
16
17
  class Runner
17
18
  NODE_IO_TIMEOUT_SECONDS = 30
18
19
  NODE_SHUTDOWN_TIMEOUT_SECONDS = 45
19
20
 
20
- BROWSER_ACTIONS = %w[navigate click select keypress wait scroll accept_dialog dismiss_dialog].freeze
21
+ BROWSER_ACTIONS = %w[navigate click select type hotkey wait scroll accept_dialog dismiss_dialog].freeze
21
22
 
22
23
  # Pattern for safe path segment: alphanumeric, underscore, hyphen only
23
24
  # Prevents directory traversal, special chars, and filesystem issues
@@ -25,7 +26,8 @@ class Runner
25
26
 
26
27
  # Initialize runner with scenario data from server
27
28
  # @param all_scenarios [Hash] optional map of scenario_id => scenario_data for pre-scenario lookup
28
- def initialize(scenario_data, visible: false, record: false, publisher: nil, server_run_id: nil, all_scenarios: nil)
29
+ # @param step_client [StepClient, DirectStepClient] optional pre-configured client for step processing
30
+ def initialize(scenario_data, visible: false, record: false, publisher: nil, server_run_id: nil, all_scenarios: nil, step_client: nil)
29
31
  @scenario = normalize_scenario_data(scenario_data)
30
32
  @scenario_id = @scenario['id']
31
33
  @scenario_name = @scenario['name'] || @scenario['id'] || 'unnamed'
@@ -38,7 +40,7 @@ class Runner
38
40
  @next_request_id = 1
39
41
  @node_channel_poisoned = false
40
42
  @publisher = publisher
41
- @step_client = nil
43
+ @step_client = step_client # Accept pre-configured client (nil = create StepClient in run())
42
44
  @server_run_id = server_run_id
43
45
  @credentials = {} # Credentials received from server { "NAME" => "value" }
44
46
  @max_steps = nil # Max browser actions allowed, received from server on first step
@@ -51,7 +53,10 @@ class Runner
51
53
 
52
54
  raise ArgumentError, 'server_run_id is required' unless @server_run_id
53
55
 
54
- @step_client = StepClient.new(
56
+ # Only create StepClient if not injected (CLI path uses HTTP, Sidekiq injects DirectStepClient)
57
+ # TODO: Validate publisher presence here when step_client is nil. Currently crashes with
58
+ # NoMethodError on @publisher.base_url if caller omits both step_client and publisher.
59
+ @step_client ||= StepClient.new(
55
60
  base_url: @publisher.base_url,
56
61
  server_run_id: @server_run_id,
57
62
  api_key: @publisher.api_key
@@ -124,7 +129,7 @@ class Runner
124
129
  cleanup_temp_dir
125
130
 
126
131
  result[:verdict]
127
- rescue StepClient::StepError => e
132
+ rescue StepClientError => e
128
133
  send_client_verdict_if_needed('ERROR', "Server error: #{e.message}")
129
134
  stop_node_process
130
135
  upload_video
@@ -245,7 +250,7 @@ class Runner
245
250
  status: status,
246
251
  reason: reason
247
252
  )
248
- rescue StepClient::StepError => e
253
+ rescue StepClientError => e
249
254
  log "Warning: Failed to send main scenario verdict: #{e.message}"
250
255
  end
251
256
 
@@ -310,8 +315,8 @@ class Runner
310
315
  break if line.nil? || line.empty?
311
316
  line.each_line { |l| log "Node: #{l.strip}" unless l.strip.empty? }
312
317
  end
313
- rescue IO::WaitReadable, EOFError
314
- # No more data available
318
+ rescue IO::WaitReadable, EOFError, IOError
319
+ # No more data available (IOError covers closed stream on second stop_node_process call)
315
320
  end
316
321
 
317
322
  def stop_node_process
@@ -376,10 +381,13 @@ class Runner
376
381
  # Read any remaining stderr output from Node process for debugging
377
382
  drain_node_stderr
378
383
 
379
- # Close IO streams
384
+ # Close IO streams and nil references to prevent IOError on subsequent calls
380
385
  @node_stdin&.close rescue nil
381
386
  @node_stdout&.close rescue nil
382
387
  @node_stderr&.close rescue nil
388
+ @node_stdin = nil
389
+ @node_stdout = nil
390
+ @node_stderr = nil
383
391
 
384
392
  # Force kill the process if still running
385
393
  if pid
@@ -398,6 +406,14 @@ class Runner
398
406
  end
399
407
  end
400
408
 
409
+ # If we couldn't get video via quit (channel poisoned, BROWSER_ERROR,
410
+ # or any other failure), try to recover from temp directory.
411
+ # Playwright streams video data to disk during recording, so a
412
+ # partial file likely exists even without a graceful context.close().
413
+ if !@video_data && @temp_dir && Dir.exist?(@temp_dir)
414
+ recover_video_from_temp_dir
415
+ end
416
+
401
417
  @node_wait_thread = nil
402
418
  end
403
419
 
@@ -498,18 +514,24 @@ class Runner
498
514
  elements = response.dig('result', 'elements') || []
499
515
  { result: "Selected '#{display_value}' in element #{element_id}", screenshot_base64: screenshot_base64, elements: elements }
500
516
 
501
- when 'keypress'
517
+ when 'type'
502
518
  # Substitute credential placeholders at the last moment before browser execution
503
519
  # Supports inline placeholders: <<USERNAME>>@example.com, <<USER>>:<<PASS>>, etc.
504
520
  keys = action['keys']
505
521
  display_keys = keys # For logging (shows placeholders, not actual values)
506
- # Skip credential substitution for special key combos like ctrl+a
507
- keys = substitute_credentials(keys) if contains_credential_placeholder?(keys) && !special_key_combo?(keys)
522
+ keys = substitute_credentials(keys) if contains_credential_placeholder?(keys)
508
523
 
509
- response = send_to_node('keypress', { keys: keys })
524
+ response = send_to_node('type', { keys: keys })
510
525
  screenshot_base64 = response.dig('result', 'screenshot_base64')
511
526
  elements = response.dig('result', 'elements') || []
512
- { result: "Pressed keys: #{display_keys}", screenshot_base64: screenshot_base64, elements: elements }
527
+ { result: "Typed: #{display_keys}", screenshot_base64: screenshot_base64, elements: elements }
528
+
529
+ when 'hotkey'
530
+ keys = action['keys']
531
+ response = send_to_node('hotkey', { keys: keys })
532
+ screenshot_base64 = response.dig('result', 'screenshot_base64')
533
+ elements = response.dig('result', 'elements') || []
534
+ { result: "Hotkey: #{keys}", screenshot_base64: screenshot_base64, elements: elements }
513
535
 
514
536
  when 'wait'
515
537
  response = send_to_node('wait', { ms: action['ms'] })
@@ -587,8 +609,8 @@ class Runner
587
609
  log "Uploading video (#{@video_data.bytesize} bytes)..."
588
610
  @step_client.upload_video(scenario_id: @scenario_id, video_data: @video_data)
589
611
  log "Video uploaded successfully."
590
- rescue StepClient::StepError => e
591
- log "Warning: Failed to upload video: #{e.message}"
612
+ rescue StandardError => e
613
+ log "Warning: Failed to upload video: #{e.class}: #{e.message}"
592
614
  end
593
615
 
594
616
  def cleanup_temp_dir
@@ -611,6 +633,50 @@ class Runner
611
633
  # Ignore cleanup errors
612
634
  end
613
635
 
636
+ # Recover video file from temp directory when the Node channel was poisoned
637
+ # and we couldn't send the quit command to get the video path.
638
+ # Playwright streams video data to disk during recording, so a partial
639
+ # .webm file likely exists even without a graceful context.close().
640
+ #
641
+ # Safety: @temp_dir is scoped to tmp/<server_run_id>/<scenario_id>.
642
+ # server_run_id has a unique index (see Document::Test::Run), so each run
643
+ # gets an isolated directory. Stale .webm files from other runs cannot
644
+ # appear here. On a permanent filesystem (Sidekiq workers), leftover files
645
+ # would only exist from a previous crash of this exact run ID, which cannot
646
+ # be re-enqueued with the same ID.
647
+ def recover_video_from_temp_dir
648
+ video_files = Dir.glob(File.join(@temp_dir, '*.webm')).sort_by { |f| File.size(f) }.reverse
649
+ return if video_files.empty?
650
+
651
+ video_path = video_files.first
652
+ file_size = File.size(video_path)
653
+
654
+ if file_size > 0
655
+ if ffmpeg_available?
656
+ remuxed_path = video_path.sub(/\.webm$/, '_remuxed.webm')
657
+ if system('ffmpeg', '-i', video_path, '-c', 'copy', remuxed_path, '-y', '-loglevel', 'error')
658
+ log "Recovered video remuxed: #{remuxed_path}"
659
+ File.delete(video_path) rescue nil
660
+ video_path = remuxed_path
661
+ else
662
+ log "Warning: ffmpeg remux failed on recovered video, using original"
663
+ end
664
+ end
665
+
666
+ @video_data = File.binread(video_path)
667
+ log "Recovered video from temp dir: #{video_path} (#{@video_data.bytesize} bytes)"
668
+ File.delete(video_path) rescue nil
669
+ else
670
+ log "Warning: Video file in temp dir is empty: #{video_path}"
671
+ end
672
+ rescue StandardError => e
673
+ log "Warning: Failed to recover video from temp dir: #{e.message}"
674
+ end
675
+
676
+ def ffmpeg_available?
677
+ system('ffmpeg', '-version', out: File::NULL, err: File::NULL)
678
+ end
679
+
614
680
  # --- Safe path handling ---
615
681
 
616
682
  # Build a safe temp directory path, validating that IDs are safe path segments
@@ -641,7 +707,7 @@ class Runner
641
707
  status: status,
642
708
  reason: reason
643
709
  )
644
- rescue StepClient::StepError => e
710
+ rescue StepClientError => e
645
711
  log "Warning: Failed to send verdict to server: #{e.message}"
646
712
  end
647
713
 
@@ -681,14 +747,6 @@ class Runner
681
747
 
682
748
  CREDENTIAL_PLACEHOLDER_PATTERN = /<<([A-Z][A-Z0-9_]*)>>/
683
749
 
684
- # Special key combinations that should not be treated as credential placeholders
685
- SPECIAL_KEY_COMBOS = %w[ctrl+a Ctrl+A].freeze
686
-
687
- # Check if the value is a special key combo (e.g., ctrl+a)
688
- def special_key_combo?(value)
689
- SPECIAL_KEY_COMBOS.include?(value)
690
- end
691
-
692
750
  # Check if the value contains any credential placeholders
693
751
  def contains_credential_placeholder?(value)
694
752
  return false unless value.is_a?(String)
@@ -7,9 +7,12 @@ require "net/http"
7
7
  require "uri"
8
8
  require "json"
9
9
  require "base64"
10
+ require_relative "client_errors"
10
11
 
11
12
  class StepClient
12
- class StepError < StandardError; end
13
+ # Subclass shared error for API stability (supports subclassing, .class checks, .name)
14
+ # Runner can rescue either StepClient::StepError or the base StepClientError
15
+ class StepError < StepClientError; end
13
16
 
14
17
  attr_reader :server_run_id
15
18
 
@@ -59,6 +62,10 @@ class StepClient
59
62
 
60
63
  response = post("/api/runs/#{@server_run_id}/step", body)
61
64
 
65
+ # TODO: Credentials defaulted to {} may change semantics. Server returns nil when no
66
+ # credentials are present (after first step). Returning {} instead of nil may cause
67
+ # downstream code to branch incorrectly (`if credentials` is always truthy). Consider
68
+ # returning nil when server returns nil, or document this as intentional contract.
62
69
  {
63
70
  action: response["action"],
64
71
  step_number: response["step_number"],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@specsage/cli",
3
- "version": "0.1.12",
3
+ "version": "0.1.14",
4
4
  "description": "SpecSage CLI - AI-powered end-to-end testing automation (Node wrapper for Ruby CLI)",
5
5
  "type": "module",
6
6
  "bin": {