@specsage/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/browser.js ADDED
@@ -0,0 +1,809 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { chromium } from "playwright";
4
+ import fs from "fs";
5
+ import path from "path";
6
+ import { fileURLToPath } from "url";
7
+ import {
8
+ setupDialogHandler,
9
+ hasDialog,
10
+ getDialogInfo,
11
+ getDialogPseudoElement,
12
+ acceptDialog,
13
+ dismissDialog
14
+ } from "./dialogs.js";
15
+
16
+ let browser = null;
17
+ let context = null;
18
+ let page = null;
19
+ let lastElements = [];
20
+ let elementKeyToDisplayId = new Map(); // stable_key -> current display ID
21
+ let displayIdToElement = new Map(); // display ID -> element data
22
+ let lastClickedElement = null; // Track element clicked for focus context
23
+ let pendingClickPromise = null; // Track click blocked by dialog
24
+
25
+ // Get the directory where this script lives
26
+ // fileURLToPath decodes URL-encoded characters (e.g. %20 -> space) for paths with spaces
27
+ const scriptDir = path.dirname(fileURLToPath(import.meta.url));
28
+
29
+ // Parse --temp-dir argument for isolated video recording directory
30
+ const tempDirArgIndex = process.argv.indexOf('--temp-dir');
31
+ const tmpDir = tempDirArgIndex !== -1 && process.argv[tempDirArgIndex + 1]
32
+ ? process.argv[tempDirArgIndex + 1]
33
+ : path.join(scriptDir, '..', 'tmp');
34
+
35
+ async function init() {
36
+ const visible = process.argv.includes('--visible');
37
+ const record = process.argv.includes('--record');
38
+ const headless = !visible;
39
+
40
+ browser = await chromium.launch({
41
+ headless,
42
+ args: ['--disable-gpu', '--disable-software-rasterizer']
43
+ });
44
+
45
+ const contextOptions = {
46
+ viewport: { width: 1280, height: 800 }
47
+ };
48
+
49
+ if (record) {
50
+ // Ensure tmp directory exists
51
+ if (!fs.existsSync(tmpDir)) {
52
+ fs.mkdirSync(tmpDir, { recursive: true });
53
+ }
54
+ console.error(`[browser.js] Video recording enabled, temp dir: ${tmpDir}`);
55
+ contextOptions.recordVideo = {
56
+ dir: tmpDir,
57
+ size: { width: 1280, height: 800 }
58
+ };
59
+ }
60
+
61
+ context = await browser.newContext(contextOptions);
62
+ page = await context.newPage();
63
+
64
+ // Verify video recording is actually working
65
+ if (record) {
66
+ const video = page.video();
67
+ if (video) {
68
+ console.error('[browser.js] Video recording initialized successfully');
69
+ } else {
70
+ console.error('[browser.js] WARNING: Video recording was requested but page.video() returned null');
71
+ }
72
+ }
73
+
74
+ // Set up dialog handling - captures dialogs for AI to see and respond to
75
+ setupDialogHandler(page);
76
+ }
77
+
78
+ function send(response) {
79
+ process.stdout.write(JSON.stringify(response) + "\n");
80
+ }
81
+
82
+ // Generate a stable key for an element based on intrinsic properties
83
+ function generateStableKey(elementData) {
84
+ const parts = [
85
+ elementData.tagName,
86
+ elementData.role || '',
87
+ elementData.name || '',
88
+ elementData.type || '',
89
+ elementData.placeholder || '',
90
+ elementData.ariaLabel || '',
91
+ elementData.href || '',
92
+ elementData.domPath || ''
93
+ ];
94
+ return parts.join('|');
95
+ }
96
+
97
+ async function screenshotBase64() {
98
+ // Small delay to let any pending renders/paints complete
99
+ await new Promise(r => setTimeout(r, 50));
100
+ const buf = await page.screenshot();
101
+ return buf.toString("base64");
102
+ }
103
+
104
+ /**
105
+ * Capture screenshot and elements, handling the case where a dialog is blocking.
106
+ * When a dialog is open, we can't interact with the page, so we return
107
+ * a placeholder screenshot and only the dialog pseudo-element.
108
+ */
109
+ async function captureState() {
110
+ if (hasDialog()) {
111
+ // Dialog is blocking - can't take screenshot or enumerate elements
112
+ // Return dialog info so AI knows to handle it
113
+ const dialogElement = getDialogPseudoElement();
114
+ return {
115
+ screenshot_base64: null, // Can't screenshot while dialog is blocking
116
+ elements: dialogElement ? [dialogElement] : [],
117
+ dialog_blocking: true
118
+ };
119
+ }
120
+
121
+ return {
122
+ screenshot_base64: await screenshotBase64(),
123
+ elements: await enumerateElements(),
124
+ dialog_blocking: false
125
+ };
126
+ }
127
+
128
+ async function enumerateElements() {
129
+ const elements = [];
130
+
131
+ // Native interactive elements
132
+ const nativeSelectors = [
133
+ 'button, [role="button"]',
134
+ 'a, [role="link"]',
135
+ 'input, textarea',
136
+ 'select, [role="combobox"]',
137
+ '[role="option"]',
138
+ '[role="menuitem"]',
139
+ '[role="tab"]',
140
+ '[role="checkbox"]',
141
+ '[role="radio"]',
142
+ ];
143
+
144
+ // Scripted interactive elements
145
+ const scriptedSelectors = [
146
+ { selector: '[onclick]', source: 'onclick' },
147
+ { selector: '[data-action]', source: 'data-action' },
148
+ { selector: '[data-testid]', source: 'data-testid' },
149
+ { selector: '[tabindex]', source: 'tabindex' },
150
+ ];
151
+
152
+ // Process native elements
153
+ for (const selector of nativeSelectors) {
154
+ const locators = page.locator(selector);
155
+ const count = await locators.count();
156
+
157
+ for (let i = 0; i < count; i++) {
158
+ const el = locators.nth(i);
159
+
160
+ try {
161
+ const isVisible = await el.isVisible();
162
+ if (!isVisible) continue;
163
+
164
+ const box = await el.boundingBox();
165
+ if (!box) continue;
166
+
167
+ const tagName = await el.evaluate(e => e.tagName.toLowerCase());
168
+ const role = await el.getAttribute('role');
169
+ const ariaLabel = await el.getAttribute('aria-label');
170
+ const name = await el.getAttribute('name');
171
+ const placeholder = await el.getAttribute('placeholder');
172
+ const type = await el.getAttribute('type');
173
+ const href = await el.getAttribute('href');
174
+ const disabled = await el.isDisabled();
175
+
176
+ // Get DOM path for stable identification
177
+ const domPath = await el.evaluate(e => {
178
+ const parts = [];
179
+ let node = e;
180
+ while (node && node.nodeType === Node.ELEMENT_NODE) {
181
+ let selector = node.tagName.toLowerCase();
182
+ if (node.id) {
183
+ selector += `#${node.id}`;
184
+ parts.unshift(selector);
185
+ break; // ID is unique, stop here
186
+ } else {
187
+ const siblings = node.parentNode ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName) : [];
188
+ if (siblings.length > 1) {
189
+ const index = siblings.indexOf(node) + 1;
190
+ selector += `:nth-of-type(${index})`;
191
+ }
192
+ parts.unshift(selector);
193
+ }
194
+ node = node.parentNode;
195
+ }
196
+ return parts.join(' > ');
197
+ });
198
+
199
+ let accessibleName = ariaLabel;
200
+ if (!accessibleName) {
201
+ accessibleName = await el.evaluate(e => e.textContent?.trim().substring(0, 100));
202
+ }
203
+ if (!accessibleName) {
204
+ accessibleName = name || placeholder || '';
205
+ }
206
+
207
+ const visibleText = await el.evaluate(e => e.textContent?.trim().substring(0, 100) || '');
208
+
209
+ let elementType;
210
+ const effectiveRole = role || tagName;
211
+ if (tagName === 'button' || role === 'button') {
212
+ elementType = 'button';
213
+ } else if (tagName === 'a' || role === 'link') {
214
+ elementType = 'link';
215
+ } else if (tagName === 'input' || tagName === 'textarea') {
216
+ elementType = 'input';
217
+ } else if (tagName === 'select' || role === 'combobox') {
218
+ elementType = 'select';
219
+ } else if (role === 'option' || role === 'menuitem') {
220
+ elementType = 'option';
221
+ } else if (role === 'checkbox') {
222
+ elementType = 'checkbox';
223
+ } else if (role === 'radio') {
224
+ elementType = 'radio';
225
+ } else if (role === 'tab') {
226
+ elementType = 'tab';
227
+ } else {
228
+ elementType = tagName;
229
+ }
230
+
231
+ // Generate stable key for this element
232
+ const stableKey = generateStableKey({
233
+ tagName, role, name, type, placeholder, ariaLabel, href, domPath
234
+ });
235
+
236
+ elements.push({
237
+ id: null, // Will be assigned after deduplication
238
+ stable_key: stableKey,
239
+ type: elementType,
240
+ role: effectiveRole,
241
+ accessible_name: accessibleName || '',
242
+ visible_text: visibleText,
243
+ disabled: disabled,
244
+ input_type: type || null,
245
+ bounding_box: {
246
+ x: Math.round(box.x),
247
+ y: Math.round(box.y),
248
+ w: Math.round(box.width),
249
+ h: Math.round(box.height)
250
+ },
251
+ mechanism: 'native',
252
+ source: 'native'
253
+ });
254
+ } catch (err) {
255
+ continue;
256
+ }
257
+ }
258
+ }
259
+
260
+ // Process scripted elements
261
+ for (const { selector, source } of scriptedSelectors) {
262
+ const locators = page.locator(selector);
263
+ const count = await locators.count();
264
+
265
+ for (let i = 0; i < count; i++) {
266
+ const el = locators.nth(i);
267
+
268
+ try {
269
+ const isVisible = await el.isVisible();
270
+ if (!isVisible) continue;
271
+
272
+ const box = await el.boundingBox();
273
+ if (!box) continue;
274
+
275
+ const tagName = await el.evaluate(e => e.tagName.toLowerCase());
276
+ const role = await el.getAttribute('role');
277
+ const ariaLabel = await el.getAttribute('aria-label');
278
+ const name = await el.getAttribute('name');
279
+ const placeholder = await el.getAttribute('placeholder');
280
+ const type = await el.getAttribute('type');
281
+ const href = await el.getAttribute('href');
282
+ const disabled = await el.isDisabled();
283
+
284
+ // Get DOM path for stable identification
285
+ const domPath = await el.evaluate(e => {
286
+ const parts = [];
287
+ let node = e;
288
+ while (node && node.nodeType === Node.ELEMENT_NODE) {
289
+ let selector = node.tagName.toLowerCase();
290
+ if (node.id) {
291
+ selector += `#${node.id}`;
292
+ parts.unshift(selector);
293
+ break; // ID is unique, stop here
294
+ } else {
295
+ const siblings = node.parentNode ? Array.from(node.parentNode.children).filter(c => c.tagName === node.tagName) : [];
296
+ if (siblings.length > 1) {
297
+ const index = siblings.indexOf(node) + 1;
298
+ selector += `:nth-of-type(${index})`;
299
+ }
300
+ parts.unshift(selector);
301
+ }
302
+ node = node.parentNode;
303
+ }
304
+ return parts.join(' > ');
305
+ });
306
+
307
+ let accessibleName = ariaLabel;
308
+ if (!accessibleName) {
309
+ accessibleName = await el.evaluate(e => e.textContent?.trim().substring(0, 100));
310
+ }
311
+ if (!accessibleName) {
312
+ accessibleName = name || placeholder || '';
313
+ }
314
+
315
+ const visibleText = await el.evaluate(e => e.textContent?.trim().substring(0, 100) || '');
316
+
317
+ const effectiveRole = role || tagName;
318
+
319
+ // Generate stable key for this element
320
+ const stableKey = generateStableKey({
321
+ tagName, role, name, type, placeholder, ariaLabel, href, domPath
322
+ });
323
+
324
+ elements.push({
325
+ id: null, // Will be assigned after deduplication
326
+ stable_key: stableKey,
327
+ type: tagName,
328
+ role: effectiveRole,
329
+ accessible_name: accessibleName || '',
330
+ visible_text: visibleText,
331
+ disabled: disabled,
332
+ input_type: type || null,
333
+ bounding_box: {
334
+ x: Math.round(box.x),
335
+ y: Math.round(box.y),
336
+ w: Math.round(box.width),
337
+ h: Math.round(box.height)
338
+ },
339
+ mechanism: 'scripted',
340
+ source: source
341
+ });
342
+ } catch (err) {
343
+ continue;
344
+ }
345
+ }
346
+ }
347
+
348
+ // Deduplicate by bounding box (some elements match multiple selectors)
349
+ const seen = new Set();
350
+ const uniqueElements = [];
351
+ for (const el of elements) {
352
+ const key = `${el.bounding_box.x},${el.bounding_box.y},${el.bounding_box.w},${el.bounding_box.h}`;
353
+ if (!seen.has(key)) {
354
+ seen.add(key);
355
+ uniqueElements.push(el);
356
+ }
357
+ }
358
+
359
+ // Disambiguate stable_keys by appending occurrence index for collisions
360
+ // Group elements by their base stable_key
361
+ const keyGroups = new Map();
362
+ for (const el of uniqueElements) {
363
+ const baseKey = el.stable_key;
364
+ if (!keyGroups.has(baseKey)) {
365
+ keyGroups.set(baseKey, []);
366
+ }
367
+ keyGroups.get(baseKey).push(el);
368
+ }
369
+
370
+ // For each group with collisions, sort by position (top-to-bottom, left-to-right)
371
+ // then assign deterministic indices
372
+ for (const [baseKey, group] of keyGroups) {
373
+ if (group.length > 1) {
374
+ group.sort((a, b) => {
375
+ const yDiff = a.bounding_box.y - b.bounding_box.y;
376
+ if (Math.abs(yDiff) > 5) return yDiff; // 5px threshold for "same row"
377
+ return a.bounding_box.x - b.bounding_box.x;
378
+ });
379
+ }
380
+ group.forEach((el, idx) => {
381
+ el.stable_key = `${baseKey}#${idx}`;
382
+ });
383
+ }
384
+
385
+ // Assign stable display IDs based on stable_key
386
+ // Elements that existed before keep their display ID, new elements get the next available ID
387
+ const usedDisplayIds = new Set();
388
+ const newElementKeyToDisplayId = new Map();
389
+
390
+ // First pass: assign existing display IDs to elements we've seen before
391
+ for (const el of uniqueElements) {
392
+ if (elementKeyToDisplayId.has(el.stable_key)) {
393
+ const existingId = elementKeyToDisplayId.get(el.stable_key);
394
+ el.id = existingId;
395
+ usedDisplayIds.add(existingId);
396
+ newElementKeyToDisplayId.set(el.stable_key, existingId);
397
+ }
398
+ }
399
+
400
+ // Second pass: assign new display IDs to new elements
401
+ let nextId = 1;
402
+ for (const el of uniqueElements) {
403
+ if (el.id === null) {
404
+ // Find next available ID
405
+ while (usedDisplayIds.has(`E${nextId}`)) {
406
+ nextId++;
407
+ }
408
+ el.id = `E${nextId}`;
409
+ usedDisplayIds.add(el.id);
410
+ newElementKeyToDisplayId.set(el.stable_key, el.id);
411
+ nextId++;
412
+ }
413
+ }
414
+
415
+ // Update the global mappings
416
+ elementKeyToDisplayId = newElementKeyToDisplayId;
417
+ displayIdToElement = new Map();
418
+ for (const el of uniqueElements) {
419
+ displayIdToElement.set(el.id, el);
420
+ }
421
+
422
+ // Add dialog pseudo-element if one is pending
423
+ const dialogElement = getDialogPseudoElement();
424
+ if (dialogElement) {
425
+ uniqueElements.unshift(dialogElement);
426
+ displayIdToElement.set(dialogElement.id, dialogElement);
427
+ }
428
+
429
+ lastElements = uniqueElements;
430
+ return lastElements;
431
+ }
432
+
433
+ async function debugOverlay(x, y) {
434
+ await page.evaluate(({ x, y }) => {
435
+ const id = "__debug_click_overlay__";
436
+ let el = document.getElementById(id);
437
+
438
+ if (!el) {
439
+ el = document.createElement("div");
440
+ el.id = id;
441
+ el.style.position = "fixed";
442
+ el.style.width = "14px";
443
+ el.style.height = "14px";
444
+ el.style.border = "2px solid red";
445
+ el.style.borderRadius = "50%";
446
+ el.style.background = "rgba(255,0,0,0.25)";
447
+ el.style.pointerEvents = "none";
448
+ el.style.zIndex = "2147483647";
449
+ document.body.appendChild(el);
450
+ }
451
+
452
+ el.style.left = `${x - 7}px`;
453
+ el.style.top = `${y - 7}px`;
454
+ }, { x, y });
455
+ }
456
+
457
+ // Commands that should be blocked when a dialog is pending
458
+ const DIALOG_BLOCKING_COMMANDS = ['navigate', 'click_element', 'select_option', 'keypress', 'scroll', 'wait', 'capture'];
459
+
460
+ async function handleCommand(msg) {
461
+ const { request_id, command, params } = msg;
462
+
463
+ try {
464
+ let result = {};
465
+
466
+ // Block certain commands when a dialog is pending
467
+ if (DIALOG_BLOCKING_COMMANDS.includes(command) && hasDialog()) {
468
+ const info = getDialogInfo();
469
+ throw new Error(
470
+ `Cannot execute ${command}: JavaScript ${info.type} dialog is blocking the page. ` +
471
+ `Message: "${info.message.substring(0, 100)}". ` +
472
+ `Use accept_dialog or dismiss_dialog first.`
473
+ );
474
+ }
475
+
476
+ switch (command) {
477
+ case "navigate": {
478
+ if (!params?.url) throw new Error("navigate requires url");
479
+ lastClickedElement = null; // Clear focus context on navigation
480
+ await page.goto(params.url, { waitUntil: "load" });
481
+ // Wait for any post-load JS to execute
482
+ await new Promise(r => setTimeout(r, 500));
483
+ result.screenshot_base64 = await screenshotBase64();
484
+ result.elements = await enumerateElements();
485
+ break;
486
+ }
487
+
488
+ case "capture": {
489
+ result.screenshot_base64 = await screenshotBase64();
490
+ result.elements = await enumerateElements();
491
+ break;
492
+ }
493
+
494
+ case "click_element": {
495
+ const { element_id } = params;
496
+ if (!element_id) throw new Error("click_element requires element_id");
497
+
498
+ const element = displayIdToElement.get(element_id);
499
+ if (!element) throw new Error(`Element not found: ${element_id}`);
500
+ lastClickedElement = element; // Store for keypress context
501
+
502
+ const { x, y, w, h } = element.bounding_box;
503
+ const centerX = x + w / 2;
504
+ const centerY = y + h / 2;
505
+
506
+ await debugOverlay(centerX, centerY);
507
+
508
+ // Set up dialog listener BEFORE clicking to avoid race conditions.
509
+ // When a dialog appears, Playwright's click() blocks until the dialog
510
+ // is resolved. We use waitForEvent to detect dialog appearance.
511
+ const dialogPromise = page.waitForEvent('dialog', { timeout: 300 })
512
+ .then(() => 'dialog')
513
+ .catch(() => 'no-dialog');
514
+
515
+ // Start the click - this may block if a dialog appears
516
+ const clickPromise = page.mouse.click(centerX, centerY);
517
+
518
+ // Race: either click completes (no dialog) or dialog appears (click blocked)
519
+ // If dialogPromise wins with 'dialog', the click is blocked on the dialog.
520
+ // If clickPromise wins, no dialog appeared within the timeout window.
521
+ // If dialogPromise wins with 'no-dialog', click is still in flight.
522
+ const raceResult = await Promise.race([
523
+ clickPromise.then(() => 'click-done'),
524
+ dialogPromise
525
+ ]);
526
+
527
+ if (raceResult === 'dialog') {
528
+ // Dialog appeared - click is blocked waiting for dialog resolution.
529
+ // We intentionally don't await clickPromise here because it won't
530
+ // resolve until accept_dialog/dismiss_dialog is called. The click
531
+ // will complete synchronously when the dialog is resolved.
532
+ // Store the pending click so we can await it after dialog handling.
533
+ pendingClickPromise = clickPromise;
534
+
535
+ const dialogElement = getDialogPseudoElement();
536
+ result.screenshot_base64 = null; // Can't screenshot while dialog blocks
537
+ result.elements = dialogElement ? [dialogElement] : [];
538
+ result.dialog_blocking = true;
539
+ } else {
540
+ // Either click completed or timed out waiting for dialog
541
+ if (raceResult !== 'click-done') {
542
+ // dialogPromise timed out ('no-dialog'), click still in flight
543
+ await clickPromise;
544
+ }
545
+ // Click is now complete - wait for page to settle
546
+ await new Promise(r => setTimeout(r, 500));
547
+ result.screenshot_base64 = await screenshotBase64();
548
+ result.elements = await enumerateElements();
549
+ }
550
+ break;
551
+ }
552
+
553
+ case "select_option": {
554
+ const { element_id, value } = params;
555
+ if (!element_id) throw new Error("select_option requires element_id");
556
+ if (!value) throw new Error("select_option requires value");
557
+
558
+ const element = displayIdToElement.get(element_id);
559
+ if (!element) throw new Error(`Element not found: ${element_id}`);
560
+
561
+ // Find the select element by its bounding box center
562
+ const { x, y, w, h } = element.bounding_box;
563
+ const centerX = x + w / 2;
564
+ const centerY = y + h / 2;
565
+
566
+ // Use Playwright's selectOption by locating the element at the position
567
+ const selectEl = page.locator('select').filter({
568
+ has: page.locator(`text="${value}"`)
569
+ }).or(page.locator('select')).first();
570
+
571
+ // Try to find the exact select element by position
572
+ const selectAtPoint = await page.evaluateHandle(
573
+ ({ x, y }) => document.elementFromPoint(x, y)?.closest('select'),
574
+ { x: centerX, y: centerY }
575
+ );
576
+
577
+ if (selectAtPoint) {
578
+ await selectAtPoint.selectOption({ label: value });
579
+ } else {
580
+ throw new Error(`No select element found at position for ${element_id}`);
581
+ }
582
+
583
+ // Wait for page to settle after selection (form updates, validation)
584
+ await new Promise(r => setTimeout(r, 500));
585
+ result.screenshot_base64 = await screenshotBase64();
586
+ result.elements = await enumerateElements();
587
+ break;
588
+ }
589
+
590
+ case "keypress": {
591
+ if (!params?.keys) throw new Error("keypress requires keys");
592
+
593
+ // Handle Ctrl+A (select all) as a special case
594
+ // Use Meta+a on macOS (Command key) and Control+a on other platforms
595
+ if (params.keys === 'ctrl+a' || params.keys === 'Ctrl+A') {
596
+ const modifier = process.platform === 'darwin' ? 'Meta' : 'Control';
597
+ await page.keyboard.press(`${modifier}+a`);
598
+ await new Promise(r => setTimeout(r, 100));
599
+ result.screenshot_base64 = await screenshotBase64();
600
+ result.elements = await enumerateElements();
601
+ break;
602
+ }
603
+
604
+ // Map special characters to Playwright key names
605
+ const specialKeyMap = {
606
+ '\b': 'Backspace',
607
+ '\n': 'Enter',
608
+ '\r': 'Enter',
609
+ '\t': 'Tab',
610
+ '\x1b': 'Escape',
611
+ ' ': 'Space',
612
+ };
613
+
614
+ // Detect typing non-numeric text into a number input
615
+ if (lastClickedElement?.input_type === 'number') {
616
+ // Allow digits, decimal point, minus sign, and special keys
617
+ const nonNumericChars = params.keys.replace(/[0-9.\-]/g, '');
618
+ const hasNonSpecialText = [...nonNumericChars].some(c => !specialKeyMap[c]);
619
+
620
+ if (hasNonSpecialText) {
621
+ throw new Error(
622
+ `Cannot type text "${params.keys}" into number input (${lastClickedElement.id}). ` +
623
+ `Number inputs only accept numeric values. ` +
624
+ `Consider clicking on a text input field instead, or enter only digits.`
625
+ );
626
+ }
627
+ }
628
+
629
+ // Type each character, mapping special chars to Playwright key names
630
+ for (const char of params.keys) {
631
+ const key = specialKeyMap[char] || char;
632
+ await page.keyboard.press(key);
633
+ }
634
+
635
+ // Wait for search/autocomplete to settle
636
+ await new Promise(r => setTimeout(r, 1000));
637
+ result.screenshot_base64 = await screenshotBase64();
638
+ result.elements = await enumerateElements();
639
+ break;
640
+ }
641
+
642
+ case "wait": {
643
+ if (!params?.ms) throw new Error("wait requires ms");
644
+ await new Promise(r => setTimeout(r, params.ms));
645
+ result.screenshot_base64 = await screenshotBase64();
646
+ result.elements = await enumerateElements();
647
+ break;
648
+ }
649
+
650
+ case "scroll": {
651
+ const { direction } = params;
652
+ if (!direction) throw new Error("scroll requires direction");
653
+
654
+ const scrollAmount = 600; // pixels to scroll
655
+ if (direction === "down") {
656
+ await page.mouse.wheel(0, scrollAmount);
657
+ } else if (direction === "up") {
658
+ await page.mouse.wheel(0, -scrollAmount);
659
+ } else {
660
+ throw new Error(`Invalid scroll direction: ${direction}. Must be "up" or "down"`);
661
+ }
662
+
663
+ // Wait for any lazy-loaded content
664
+ await new Promise(r => setTimeout(r, 300));
665
+ result.screenshot_base64 = await screenshotBase64();
666
+ result.elements = await enumerateElements();
667
+ break;
668
+ }
669
+
670
+ case "accept_dialog": {
671
+ const { value } = params || {};
672
+ await acceptDialog(value);
673
+
674
+ // If there was a click blocked on this dialog, wait for it to complete
675
+ if (pendingClickPromise) {
676
+ await pendingClickPromise;
677
+ pendingClickPromise = null;
678
+ }
679
+
680
+ // Wait for page to settle after dialog resolution
681
+ await new Promise(r => setTimeout(r, 500));
682
+ result.screenshot_base64 = await screenshotBase64();
683
+ result.elements = await enumerateElements();
684
+ break;
685
+ }
686
+
687
+ case "dismiss_dialog": {
688
+ await dismissDialog();
689
+
690
+ // If there was a click blocked on this dialog, wait for it to complete
691
+ if (pendingClickPromise) {
692
+ await pendingClickPromise;
693
+ pendingClickPromise = null;
694
+ }
695
+
696
+ // Wait for page to settle after dialog resolution
697
+ await new Promise(r => setTimeout(r, 500));
698
+ result.screenshot_base64 = await screenshotBase64();
699
+ result.elements = await enumerateElements();
700
+ break;
701
+ }
702
+
703
+ case "quit": {
704
+ let videoPath = null;
705
+ if (page) {
706
+ const video = page.video();
707
+ if (video) {
708
+ // Get the path where video will be saved
709
+ videoPath = await video.path();
710
+ console.error(`[browser.js] Video will be saved to: ${videoPath}`);
711
+ } else {
712
+ console.error('[browser.js] No video object - recording may not have been enabled');
713
+ }
714
+ }
715
+ // Close context - this is when the video file is actually written
716
+ await context?.close();
717
+ if (videoPath) {
718
+ // Verify the file was written
719
+ if (fs.existsSync(videoPath)) {
720
+ const stats = fs.statSync(videoPath);
721
+ console.error(`[browser.js] Video file written: ${videoPath} (${stats.size} bytes)`);
722
+ } else {
723
+ console.error(`[browser.js] WARNING: Video file not found after context close: ${videoPath}`);
724
+ }
725
+ }
726
+ await browser.close();
727
+ send({ request_id, ok: true, result: { video_path: videoPath }, error: null });
728
+ process.exit(0);
729
+ return;
730
+ }
731
+
732
+ default:
733
+ throw new Error(`Unknown command: ${command}`);
734
+ }
735
+
736
+ send({ request_id, ok: true, result, error: null });
737
+
738
+ } catch (err) {
739
+ send({
740
+ request_id,
741
+ ok: false,
742
+ result: {},
743
+ error: {
744
+ code: "BROWSER_ERROR",
745
+ message: err.message
746
+ }
747
+ });
748
+ }
749
+ }
750
+
751
+ async function main() {
752
+ await init();
753
+
754
+ // Signal readiness to parent process
755
+ send({ status: "ready" });
756
+
757
+ let buffer = "";
758
+
759
+ process.stdin.on("data", async chunk => {
760
+ buffer += chunk.toString();
761
+
762
+ while (buffer.includes("\n")) {
763
+ const idx = buffer.indexOf("\n");
764
+ const line = buffer.slice(0, idx);
765
+ buffer = buffer.slice(idx + 1);
766
+
767
+ if (!line.trim()) continue;
768
+
769
+ let msg;
770
+ try {
771
+ msg = JSON.parse(line);
772
+ } catch {
773
+ send({
774
+ request_id: null,
775
+ ok: false,
776
+ result: {},
777
+ error: { code: "INVALID_JSON", message: "Invalid JSON" }
778
+ });
779
+ continue;
780
+ }
781
+
782
+ if (!msg.request_id || !msg.command) {
783
+ send({
784
+ request_id: msg.request_id ?? null,
785
+ ok: false,
786
+ result: {},
787
+ error: { code: "INVALID_MESSAGE", message: "Missing request_id or command" }
788
+ });
789
+ continue;
790
+ }
791
+
792
+ await handleCommand(msg);
793
+ }
794
+ });
795
+ }
796
+
797
+ // Only run main when executed directly, not when imported as a module
798
+ const __filename = fileURLToPath(import.meta.url);
799
+ if (__filename === process.argv[1]) {
800
+ main().catch(err => {
801
+ send({
802
+ request_id: null,
803
+ ok: false,
804
+ result: {},
805
+ error: { code: "FATAL", message: err.message }
806
+ });
807
+ process.exit(1);
808
+ });
809
+ }