agentmb 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +82 -1
  2. package/dist/browser/actions.d.ts +272 -0
  3. package/dist/browser/actions.d.ts.map +1 -1
  4. package/dist/browser/actions.js +797 -0
  5. package/dist/browser/actions.js.map +1 -1
  6. package/dist/browser/manager.d.ts +88 -0
  7. package/dist/browser/manager.d.ts.map +1 -1
  8. package/dist/browser/manager.js +231 -0
  9. package/dist/browser/manager.js.map +1 -1
  10. package/dist/cli/client.d.ts +1 -0
  11. package/dist/cli/client.d.ts.map +1 -1
  12. package/dist/cli/client.js +21 -0
  13. package/dist/cli/client.js.map +1 -1
  14. package/dist/cli/commands/actions.d.ts.map +1 -1
  15. package/dist/cli/commands/actions.js +762 -10
  16. package/dist/cli/commands/actions.js.map +1 -1
  17. package/dist/cli/index.js +1 -1
  18. package/dist/daemon/routes/actions.d.ts.map +1 -1
  19. package/dist/daemon/routes/actions.js +529 -6
  20. package/dist/daemon/routes/actions.js.map +1 -1
  21. package/dist/daemon/routes/browser_control.d.ts +12 -0
  22. package/dist/daemon/routes/browser_control.d.ts.map +1 -0
  23. package/dist/daemon/routes/browser_control.js +172 -0
  24. package/dist/daemon/routes/browser_control.js.map +1 -0
  25. package/dist/daemon/routes/interaction.d.ts +11 -0
  26. package/dist/daemon/routes/interaction.d.ts.map +1 -0
  27. package/dist/daemon/routes/interaction.js +176 -0
  28. package/dist/daemon/routes/interaction.js.map +1 -0
  29. package/dist/daemon/routes/state.d.ts +11 -0
  30. package/dist/daemon/routes/state.d.ts.map +1 -0
  31. package/dist/daemon/routes/state.js +190 -0
  32. package/dist/daemon/routes/state.js.map +1 -0
  33. package/dist/daemon/server.d.ts.map +1 -1
  34. package/dist/daemon/server.js +7 -1
  35. package/dist/daemon/server.js.map +1 -1
  36. package/package.json +1 -1
@@ -10,6 +10,7 @@ exports.fill = fill;
10
10
  exports.evaluate = evaluate;
11
11
  exports.extract = extract;
12
12
  exports.screenshot = screenshot;
13
+ exports.annotatedScreenshot = annotatedScreenshot;
13
14
  exports.typeText = typeText;
14
15
  exports.press = press;
15
16
  exports.selectOption = selectOption;
@@ -18,7 +19,38 @@ exports.waitForSelector = waitForSelector;
18
19
  exports.waitForUrl = waitForUrl;
19
20
  exports.waitForResponse = waitForResponse;
20
21
  exports.uploadFile = uploadFile;
22
+ exports.elementMap = elementMap;
23
+ exports.getProperty = getProperty;
24
+ exports.assertState = assertState;
25
+ exports.waitPageStable = waitPageStable;
26
+ exports.dblclick = dblclick;
27
+ exports.focus = focus;
28
+ exports.check = check;
29
+ exports.uncheck = uncheck;
30
+ exports.scroll = scroll;
31
+ exports.scrollIntoView = scrollIntoView;
32
+ exports.drag = drag;
33
+ exports.mouseMove = mouseMove;
34
+ exports.mouseDown = mouseDown;
35
+ exports.mouseUp = mouseUp;
36
+ exports.keyDown = keyDown;
37
+ exports.keyUp = keyUp;
38
+ exports.back = back;
39
+ exports.forward = forward;
40
+ exports.reload = reload;
41
+ exports.waitForText = waitForText;
42
+ exports.waitForLoadState = waitForLoadState;
43
+ exports.waitForFunction = waitForFunction;
44
+ exports.scrollUntil = scrollUntil;
45
+ exports.loadMoreUntil = loadMoreUntil;
21
46
  exports.downloadFile = downloadFile;
47
+ exports.clickAt = clickAt;
48
+ exports.wheelAt = wheelAt;
49
+ exports.insertText = insertText;
50
+ exports.getBbox = getBbox;
51
+ exports.setViewport = setViewport;
52
+ exports.clipboardWrite = clipboardWrite;
53
+ exports.clipboardRead = clipboardRead;
22
54
  const crypto_1 = __importDefault(require("crypto"));
23
55
  const fs_1 = __importDefault(require("fs"));
24
56
  class ActionDiagnosticsError extends Error {
@@ -141,6 +173,57 @@ async function screenshot(page, format = 'png', fullPage = false, logger, sessio
141
173
  throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
142
174
  }
143
175
  }
176
+ async function annotatedScreenshot(page, highlights, format = 'png', fullPage = false, logger, sessionId, purpose, operator) {
177
+ const id = actionId();
178
+ const t0 = Date.now();
179
+ const STYLE_ID = '__agentmb_hl__';
180
+ try {
181
+ // Build and inject highlight CSS
182
+ const rules = highlights.map(({ selector, color = 'rgba(255,80,80,0.35)', label }) => {
183
+ // Sanitize color: strip characters that could break out of a CSS value
184
+ // (curly braces, semicolons, CSS comment markers).
185
+ const safeColor = color.replace(/[{};]|\/\*|\*\//g, '');
186
+ // Escape label for use inside a single-quoted CSS content string:
187
+ // backslash must be escaped first, then quote, then control chars.
188
+ const safeLabel = label
189
+ ? label
190
+ .replace(/\\/g, '\\\\') // backslash → \\
191
+ .replace(/'/g, "\\'") // single-quote → \'
192
+ .replace(/\n/g, '\\A ') // newline → CSS unicode escape
193
+ .replace(/\r/g, '') // carriage return → strip
194
+ : '';
195
+ return [
196
+ `${selector} { outline: 3px solid ${safeColor} !important; background-color: ${safeColor} !important; position: relative !important; }`,
197
+ safeLabel
198
+ ? `${selector}::before { content: '${safeLabel}'; position: absolute; top: 0; left: 0; background: ${safeColor}; color: #000; font-size: 11px; padding: 1px 3px; z-index: 99999; pointer-events: none; }`
199
+ : '',
200
+ ].join('\n');
201
+ }).join('\n');
202
+ await page.evaluate(({ styleId, css }) => {
203
+ const el = globalThis.document.createElement('style');
204
+ el.id = styleId;
205
+ el.textContent = css;
206
+ globalThis.document.head.appendChild(el);
207
+ }, { styleId: STYLE_ID, css: rules });
208
+ const buffer = await page.screenshot({ type: format, fullPage });
209
+ const duration_ms = Date.now() - t0;
210
+ const data = buffer.toString('base64');
211
+ // Remove injected style
212
+ await page.evaluate((styleId) => {
213
+ globalThis.document.getElementById(styleId)?.remove();
214
+ }, STYLE_ID).catch(() => { });
215
+ const result = { status: 'ok', data, format, highlight_count: highlights.length, duration_ms };
216
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'annotated_screenshot', url: page.url(), params: { format, full_page: fullPage, highlights: highlights.length }, result: { status: 'ok', size_bytes: buffer.length, duration_ms }, purpose, operator });
217
+ return result;
218
+ }
219
+ catch (err) {
220
+ // Clean up injected style on error too
221
+ await page.evaluate((styleId) => {
222
+ globalThis.document.getElementById(styleId)?.remove();
223
+ }, STYLE_ID).catch(() => { });
224
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
225
+ }
226
+ }
144
227
  // ---------------------------------------------------------------------------
145
228
  // R05 actions
146
229
  // ---------------------------------------------------------------------------
@@ -262,6 +345,564 @@ async function uploadFile(page, selector, fileContent, filename, mimeType = 'app
262
345
  throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
263
346
  }
264
347
  }
348
+ /**
349
+ * Scan the page for interactive/visible elements, inject `data-agentmb-eid`
350
+ * attributes for stable re-targeting, and return an ordered map.
351
+ * Subsequent actions may use element_id instead of a CSS selector.
352
+ */
353
+ async function elementMap(page, opts = {}, logger, sessionId, purpose, operator) {
354
+ const id = actionId();
355
+ const t0 = Date.now();
356
+ try {
357
+ const { scope, limit = 500 } = opts;
358
+ /* eslint-disable @typescript-eslint/no-explicit-any */
359
+ const elements = await page.evaluate(([scopeSelector, maxElements]) => {
360
+ const doc = globalThis.document;
361
+ const win = globalThis.window;
362
+ const root = scopeSelector ? (doc.querySelector(scopeSelector) ?? doc.body) : doc.body;
363
+ // Remove previous scan IDs
364
+ root.querySelectorAll('[data-agentmb-eid]').forEach((el) => el.removeAttribute('data-agentmb-eid'));
365
+ const SELECTORS = [
366
+ 'a[href]', 'button', 'input:not([type="hidden"])', 'select', 'textarea',
367
+ '[role="button"]', '[role="link"]', '[role="checkbox"]', '[role="radio"]',
368
+ '[role="menuitem"]', '[role="tab"]', '[role="option"]', '[role="combobox"]',
369
+ '[role="switch"]', '[role="spinbutton"]', '[role="slider"]',
370
+ '[tabindex]:not([tabindex="-1"])', 'label[for]',
371
+ ].join(',');
372
+ const candidates = Array.from(root.querySelectorAll(SELECTORS));
373
+ let counter = 0;
374
+ const results = [];
375
+ for (const el of candidates) {
376
+ if (counter >= maxElements)
377
+ break;
378
+ const style = win.getComputedStyle(el);
379
+ if (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity) === 0)
380
+ continue;
381
+ const rect = el.getBoundingClientRect();
382
+ if (rect.width === 0 && rect.height === 0)
383
+ continue;
384
+ counter++;
385
+ const eid = `e${counter}`;
386
+ el.setAttribute('data-agentmb-eid', eid);
387
+ const cx = rect.left + rect.width / 2;
388
+ const cy = rect.top + rect.height / 2;
389
+ const topEl = doc.elementFromPoint(cx, cy);
390
+ const overlayBlocked = topEl ? (!el.contains(topEl) && !topEl.contains(el) && topEl !== el) : false;
391
+ results.push({
392
+ element_id: eid,
393
+ tag: el.tagName.toLowerCase(),
394
+ role: el.getAttribute('role') ?? el.tagName.toLowerCase(),
395
+ text: (el.innerText ?? el.textContent ?? '').trim().slice(0, 200),
396
+ name: el.getAttribute('name') ?? el.getAttribute('aria-label') ?? '',
397
+ placeholder: el.getAttribute('placeholder') ?? '',
398
+ href: el.getAttribute('href') ?? '',
399
+ type: el.getAttribute('type') ?? '',
400
+ overlay_blocked: overlayBlocked,
401
+ rect: {
402
+ x: Math.round(rect.x), y: Math.round(rect.y),
403
+ width: Math.round(rect.width), height: Math.round(rect.height),
404
+ },
405
+ });
406
+ }
407
+ return results;
408
+ }, [scope, limit]);
409
+ /* eslint-enable @typescript-eslint/no-explicit-any */
410
+ const duration_ms = Date.now() - t0;
411
+ const result = { status: 'ok', url: page.url(), elements, count: elements.length, duration_ms };
412
+ logger?.write({
413
+ session_id: sessionId, action_id: id, type: 'action', action: 'element_map',
414
+ url: page.url(), params: { scope: scope ?? null, limit },
415
+ result: { status: 'ok', count: elements.length, duration_ms }, purpose, operator,
416
+ });
417
+ return result;
418
+ }
419
+ catch (err) {
420
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
421
+ }
422
+ }
423
+ async function getProperty(page, selector, property, attrName, logger, sessionId, purpose, operator) {
424
+ const id = actionId();
425
+ const t0 = Date.now();
426
+ try {
427
+ let value;
428
+ switch (property) {
429
+ case 'text':
430
+ value = await page.locator(selector).first().innerText({ timeout: 5000 });
431
+ break;
432
+ case 'html':
433
+ value = await page.locator(selector).first().innerHTML({ timeout: 5000 });
434
+ break;
435
+ case 'value':
436
+ value = await page.locator(selector).first().inputValue({ timeout: 5000 });
437
+ break;
438
+ case 'attr':
439
+ if (!attrName)
440
+ throw new Error('attr_name is required when property=attr');
441
+ value = await page.locator(selector).first().getAttribute(attrName, { timeout: 5000 });
442
+ break;
443
+ case 'count':
444
+ value = await page.locator(selector).count();
445
+ break;
446
+ case 'box':
447
+ value = await page.locator(selector).first().boundingBox({ timeout: 5000 });
448
+ break;
449
+ }
450
+ const duration_ms = Date.now() - t0;
451
+ const result = { status: 'ok', selector, property, value, duration_ms };
452
+ logger?.write({
453
+ session_id: sessionId, action_id: id, type: 'action', action: 'get',
454
+ url: page.url(), selector, params: { property, attr_name: attrName ?? null },
455
+ result: { status: 'ok', duration_ms }, purpose, operator,
456
+ });
457
+ return result;
458
+ }
459
+ catch (err) {
460
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
461
+ }
462
+ }
463
+ async function assertState(page, selector, property, expected = true, logger, sessionId, purpose, operator) {
464
+ const id = actionId();
465
+ const t0 = Date.now();
466
+ try {
467
+ let actual;
468
+ const loc = page.locator(selector).first();
469
+ switch (property) {
470
+ case 'visible':
471
+ actual = await loc.isVisible({ timeout: 5000 });
472
+ break;
473
+ case 'enabled':
474
+ actual = await loc.isEnabled({ timeout: 5000 });
475
+ break;
476
+ case 'checked':
477
+ actual = await loc.isChecked({ timeout: 5000 });
478
+ break;
479
+ }
480
+ const passed = actual === expected;
481
+ const duration_ms = Date.now() - t0;
482
+ const result = { status: 'ok', selector, property, actual, expected, passed, duration_ms };
483
+ logger?.write({
484
+ session_id: sessionId, action_id: id, type: 'action', action: 'assert',
485
+ url: page.url(), selector, params: { property, expected },
486
+ result: { status: 'ok', passed, duration_ms }, purpose, operator,
487
+ });
488
+ return result;
489
+ }
490
+ catch (err) {
491
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
492
+ }
493
+ }
494
+ // ---------------------------------------------------------------------------
495
+ // R07-T07: wait_page_stable — network idle + DOM quiescence + overlay check
496
+ // ---------------------------------------------------------------------------
497
+ async function waitPageStable(page, opts = {}, logger, sessionId, purpose, operator) {
498
+ const id = actionId();
499
+ const t0 = Date.now();
500
+ const { timeout_ms = 10000, dom_stable_ms = 300, overlay_selector } = opts;
501
+ try {
502
+ // 1. Network idle
503
+ await page.waitForLoadState('networkidle', { timeout: timeout_ms });
504
+ // 2. DOM mutation quiescence — MutationObserver waits for `dom_stable_ms` of silence
505
+ /* eslint-disable @typescript-eslint/no-explicit-any */
506
+ await page.evaluate(([stabilityMs, timeoutMs]) => new Promise((resolve, reject) => {
507
+ const doc = globalThis.document;
508
+ let timer;
509
+ const settle = () => {
510
+ clearTimeout(timer);
511
+ timer = setTimeout(() => {
512
+ observer.disconnect();
513
+ resolve();
514
+ }, stabilityMs);
515
+ };
516
+ const observer = new globalThis.MutationObserver(settle);
517
+ observer.observe(doc.documentElement, { childList: true, subtree: true, attributes: false });
518
+ settle();
519
+ setTimeout(() => {
520
+ observer.disconnect();
521
+ clearTimeout(timer);
522
+ reject(new Error('DOM stability timeout'));
523
+ }, Math.max(0, timeoutMs));
524
+ }), [dom_stable_ms, Math.max(500, timeout_ms - (Date.now() - t0))]);
525
+ /* eslint-enable @typescript-eslint/no-explicit-any */
526
+ // 3. Overlay check — wait until overlay_selector matches no elements
527
+ if (overlay_selector) {
528
+ const deadline = t0 + timeout_ms;
529
+ while (Date.now() < deadline) {
530
+ const count = await page.locator(overlay_selector).count();
531
+ if (count === 0)
532
+ break;
533
+ await new Promise((r) => setTimeout(r, 100));
534
+ }
535
+ const remaining = await page.locator(overlay_selector).count();
536
+ if (remaining > 0) {
537
+ throw new Error(`Overlay '${overlay_selector}' still present after ${timeout_ms}ms`);
538
+ }
539
+ }
540
+ const duration_ms = Date.now() - t0;
541
+ const result = { status: 'ok', url: page.url(), waited_ms: duration_ms, duration_ms };
542
+ logger?.write({
543
+ session_id: sessionId, action_id: id, type: 'action', action: 'wait_page_stable',
544
+ url: page.url(), params: { timeout_ms, dom_stable_ms, overlay_selector: overlay_selector ?? null },
545
+ result: { status: 'ok', duration_ms }, purpose, operator,
546
+ });
547
+ return result;
548
+ }
549
+ catch (err) {
550
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
551
+ }
552
+ }
553
+ // ---------------------------------------------------------------------------
554
+ // R07-T03: Interaction primitives — dblclick / focus / check / uncheck /
555
+ // scroll / scroll_into_view / drag + low-level mouse/keyboard
556
+ // ---------------------------------------------------------------------------
557
+ async function dblclick(page, selector, timeoutMs = 5000, logger, sessionId, purpose, operator) {
558
+ const id = actionId();
559
+ const t0 = Date.now();
560
+ try {
561
+ await page.locator(selector).first().dblclick({ timeout: timeoutMs });
562
+ const r = { status: 'ok', selector, duration_ms: Date.now() - t0 };
563
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'dblclick', url: page.url?.(), selector, params: { timeout_ms: timeoutMs }, result: r, purpose, operator });
564
+ return r;
565
+ }
566
+ catch (err) {
567
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
568
+ }
569
+ }
570
+ async function focus(page, selector, logger, sessionId, purpose, operator) {
571
+ const id = actionId();
572
+ const t0 = Date.now();
573
+ try {
574
+ await page.locator(selector).first().focus();
575
+ const r = { status: 'ok', selector, duration_ms: Date.now() - t0 };
576
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'focus', url: page.url?.(), selector, params: {}, result: r, purpose, operator });
577
+ return r;
578
+ }
579
+ catch (err) {
580
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
581
+ }
582
+ }
583
+ async function check(page, selector, timeoutMs = 5000, logger, sessionId, purpose, operator) {
584
+ const id = actionId();
585
+ const t0 = Date.now();
586
+ try {
587
+ await page.locator(selector).first().check({ timeout: timeoutMs });
588
+ const r = { status: 'ok', selector, duration_ms: Date.now() - t0 };
589
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'check', url: page.url?.(), selector, params: { timeout_ms: timeoutMs }, result: r, purpose, operator });
590
+ return r;
591
+ }
592
+ catch (err) {
593
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
594
+ }
595
+ }
596
+ async function uncheck(page, selector, timeoutMs = 5000, logger, sessionId, purpose, operator) {
597
+ const id = actionId();
598
+ const t0 = Date.now();
599
+ try {
600
+ await page.locator(selector).first().uncheck({ timeout: timeoutMs });
601
+ const r = { status: 'ok', selector, duration_ms: Date.now() - t0 };
602
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'uncheck', url: page.url?.(), selector, params: { timeout_ms: timeoutMs }, result: r, purpose, operator });
603
+ return r;
604
+ }
605
+ catch (err) {
606
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
607
+ }
608
+ }
609
+ async function scroll(page, selector, opts = {}, logger, sessionId, purpose, operator) {
610
+ const id = actionId();
611
+ const t0 = Date.now();
612
+ const { delta_x = 0, delta_y = 300 } = opts;
613
+ try {
614
+ // Hover over the element, then use mouse wheel (works for both scroll containers and page)
615
+ const box = await page.locator(selector).first().boundingBox();
616
+ if (box) {
617
+ const cx = box.x + box.width / 2;
618
+ const cy = box.y + box.height / 2;
619
+ await page.mouse?.move(cx, cy);
620
+ await page.mouse?.wheel(delta_x, delta_y);
621
+ }
622
+ else {
623
+ // Fallback: scroll the element itself via evaluate
624
+ /* eslint-disable @typescript-eslint/no-explicit-any */
625
+ await page.locator(selector).first().evaluate((el, args) => el.scrollBy(args[0], args[1]), [delta_x, delta_y]);
626
+ /* eslint-enable @typescript-eslint/no-explicit-any */
627
+ }
628
+ const r = { status: 'ok', selector, duration_ms: Date.now() - t0 };
629
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'scroll', url: page.url?.(), selector, params: { delta_x, delta_y }, result: r, purpose, operator });
630
+ return r;
631
+ }
632
+ catch (err) {
633
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
634
+ }
635
+ }
636
+ async function scrollIntoView(page, selector, logger, sessionId, purpose, operator) {
637
+ const id = actionId();
638
+ const t0 = Date.now();
639
+ try {
640
+ await page.locator(selector).first().scrollIntoViewIfNeeded();
641
+ const r = { status: 'ok', selector, duration_ms: Date.now() - t0 };
642
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'scroll_into_view', url: page.url?.(), selector, params: {}, result: r, purpose, operator });
643
+ return r;
644
+ }
645
+ catch (err) {
646
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
647
+ }
648
+ }
649
+ async function drag(page, sourceSelector, targetSelector, logger, sessionId, purpose, operator) {
650
+ const id = actionId();
651
+ const t0 = Date.now();
652
+ try {
653
+ await page.dragAndDrop(sourceSelector, targetSelector);
654
+ const r = { status: 'ok', source: sourceSelector, target: targetSelector, duration_ms: Date.now() - t0 };
655
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'drag', url: page.url(), params: { source: sourceSelector, target: targetSelector }, result: r, purpose, operator });
656
+ return r;
657
+ }
658
+ catch (err) {
659
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
660
+ }
661
+ }
662
+ async function mouseMove(page, x, y, logger, sessionId, purpose, operator) {
663
+ const id = actionId();
664
+ const t0 = Date.now();
665
+ try {
666
+ await page.mouse.move(x, y);
667
+ const r = { status: 'ok', x, y, duration_ms: Date.now() - t0 };
668
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'mouse_move', url: page.url(), params: { x, y }, result: r, purpose, operator });
669
+ return r;
670
+ }
671
+ catch (err) {
672
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
673
+ }
674
+ }
675
+ async function mouseDown(page, opts = {}, logger, sessionId, purpose, operator) {
676
+ const id = actionId();
677
+ const t0 = Date.now();
678
+ try {
679
+ if (opts.x !== undefined && opts.y !== undefined) {
680
+ await page.mouse.move(opts.x, opts.y);
681
+ }
682
+ await page.mouse.down({ button: opts.button ?? 'left' });
683
+ const r = { status: 'ok', duration_ms: Date.now() - t0 };
684
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'mouse_down', url: page.url(), params: opts, result: r, purpose, operator });
685
+ return r;
686
+ }
687
+ catch (err) {
688
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
689
+ }
690
+ }
691
+ async function mouseUp(page, button = 'left', logger, sessionId, purpose, operator) {
692
+ const id = actionId();
693
+ const t0 = Date.now();
694
+ try {
695
+ await page.mouse.up({ button });
696
+ const r = { status: 'ok', duration_ms: Date.now() - t0 };
697
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'mouse_up', url: page.url(), params: { button }, result: r, purpose, operator });
698
+ return r;
699
+ }
700
+ catch (err) {
701
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
702
+ }
703
+ }
704
+ async function keyDown(page, key, logger, sessionId, purpose, operator) {
705
+ const id = actionId();
706
+ const t0 = Date.now();
707
+ try {
708
+ await page.keyboard.down(key);
709
+ const r = { status: 'ok', key, duration_ms: Date.now() - t0 };
710
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'key_down', url: page.url(), params: { key }, result: r, purpose, operator });
711
+ return r;
712
+ }
713
+ catch (err) {
714
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
715
+ }
716
+ }
717
+ async function keyUp(page, key, logger, sessionId, purpose, operator) {
718
+ const id = actionId();
719
+ const t0 = Date.now();
720
+ try {
721
+ await page.keyboard.up(key);
722
+ const r = { status: 'ok', key, duration_ms: Date.now() - t0 };
723
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'key_up', url: page.url(), params: { key }, result: r, purpose, operator });
724
+ return r;
725
+ }
726
+ catch (err) {
727
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
728
+ }
729
+ }
730
+ // ---------------------------------------------------------------------------
731
+ // R07-T04: Wait / navigation control — back / forward / reload / wait_text /
732
+ // wait_load_state / wait_function
733
+ // ---------------------------------------------------------------------------
734
+ async function back(page, timeoutMs = 5000, waitUntil = 'load', logger, sessionId, purpose, operator) {
735
+ const id = actionId();
736
+ const t0 = Date.now();
737
+ try {
738
+ await page.goBack({ timeout: timeoutMs, waitUntil });
739
+ const r = { status: 'ok', url: page.url(), duration_ms: Date.now() - t0 };
740
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'back', url: page.url(), params: { timeout_ms: timeoutMs, wait_until: waitUntil }, result: r, purpose, operator });
741
+ return r;
742
+ }
743
+ catch (err) {
744
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
745
+ }
746
+ }
747
+ async function forward(page, timeoutMs = 5000, waitUntil = 'load', logger, sessionId, purpose, operator) {
748
+ const id = actionId();
749
+ const t0 = Date.now();
750
+ try {
751
+ await page.goForward({ timeout: timeoutMs, waitUntil });
752
+ const r = { status: 'ok', url: page.url(), duration_ms: Date.now() - t0 };
753
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'forward', url: page.url(), params: { timeout_ms: timeoutMs, wait_until: waitUntil }, result: r, purpose, operator });
754
+ return r;
755
+ }
756
+ catch (err) {
757
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
758
+ }
759
+ }
760
+ async function reload(page, timeoutMs = 10000, waitUntil = 'load', logger, sessionId, purpose, operator) {
761
+ const id = actionId();
762
+ const t0 = Date.now();
763
+ try {
764
+ await page.reload({ timeout: timeoutMs, waitUntil });
765
+ const r = { status: 'ok', url: page.url(), duration_ms: Date.now() - t0 };
766
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'reload', url: page.url(), params: { timeout_ms: timeoutMs, wait_until: waitUntil }, result: r, purpose, operator });
767
+ return r;
768
+ }
769
+ catch (err) {
770
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
771
+ }
772
+ }
773
+ async function waitForText(page, text, timeoutMs = 5000, logger, sessionId, purpose, operator) {
774
+ const id = actionId();
775
+ const t0 = Date.now();
776
+ try {
777
+ await page.getByText(text).first().waitFor({ state: 'visible', timeout: timeoutMs });
778
+ const r = { status: 'ok', text, duration_ms: Date.now() - t0 };
779
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'wait_text', url: page.url?.(), params: { text, timeout_ms: timeoutMs }, result: r, purpose, operator });
780
+ return r;
781
+ }
782
+ catch (err) {
783
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
784
+ }
785
+ }
786
+ async function waitForLoadState(page, state = 'load', timeoutMs = 10000, logger, sessionId, purpose, operator) {
787
+ const id = actionId();
788
+ const t0 = Date.now();
789
+ try {
790
+ await page.waitForLoadState(state, { timeout: timeoutMs });
791
+ const r = { status: 'ok', state, url: page.url(), duration_ms: Date.now() - t0 };
792
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'wait_load_state', url: page.url(), params: { state, timeout_ms: timeoutMs }, result: r, purpose, operator });
793
+ return r;
794
+ }
795
+ catch (err) {
796
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
797
+ }
798
+ }
799
+ async function waitForFunction(page, expression, timeoutMs = 5000, logger, sessionId, purpose, operator) {
800
+ const id = actionId();
801
+ const t0 = Date.now();
802
+ try {
803
+ await page.waitForFunction(expression, undefined, { timeout: timeoutMs });
804
+ const r = { status: 'ok', url: page.url(), duration_ms: Date.now() - t0 };
805
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'wait_function', url: page.url(), params: { expression, timeout_ms: timeoutMs }, result: r, purpose, operator });
806
+ return r;
807
+ }
808
+ catch (err) {
809
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
810
+ }
811
+ }
812
+ // ---------------------------------------------------------------------------
813
+ // R07-T08: Generic scroll primitives — scroll_until / load_more_until
814
+ // ---------------------------------------------------------------------------
815
+ async function scrollUntil(page, opts = {}, logger, sessionId, purpose, operator) {
816
+ const id = actionId();
817
+ const t0 = Date.now();
818
+ const { direction = 'down', scroll_selector, stop_selector, stop_text, max_scrolls = 20, scroll_delta = 400, stall_ms = 500, } = opts;
819
+ const dx = direction === 'right' ? scroll_delta : direction === 'left' ? -scroll_delta : 0;
820
+ const dy = direction === 'down' ? scroll_delta : direction === 'up' ? -scroll_delta : 0;
821
+ let scrolls = 0;
822
+ let stop_reason = 'max_scrolls';
823
+ try {
824
+ for (let i = 0; i < max_scrolls; i++) {
825
+ // Check stop conditions before scrolling
826
+ if (stop_selector) {
827
+ const count = await page.locator(stop_selector).count();
828
+ if (count > 0) {
829
+ stop_reason = 'selector_found';
830
+ break;
831
+ }
832
+ }
833
+ if (stop_text) {
834
+ const found = await page.evaluate((t) => globalThis.document?.body?.innerText?.includes(t) ?? false, stop_text);
835
+ if (found) {
836
+ stop_reason = 'text_found';
837
+ break;
838
+ }
839
+ }
840
+ // Perform scroll
841
+ if (scroll_selector) {
842
+ const box = await page.locator(scroll_selector).first().boundingBox();
843
+ if (box) {
844
+ await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2);
845
+ }
846
+ }
847
+ await page.mouse.wheel(dx, dy);
848
+ scrolls++;
849
+ await new Promise((r) => setTimeout(r, stall_ms));
850
+ }
851
+ const r = { status: 'ok', scrolls_performed: scrolls, stop_reason, duration_ms: Date.now() - t0 };
852
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'scroll_until', url: page.url(), params: opts, result: r, purpose, operator });
853
+ return r;
854
+ }
855
+ catch (err) {
856
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
857
+ }
858
+ }
859
+ async function loadMoreUntil(page, opts, logger, sessionId, purpose, operator) {
860
+ const id = actionId();
861
+ const t0 = Date.now();
862
+ const { load_more_selector, content_selector, item_count, stop_text, max_loads = 10, stall_ms = 800 } = opts;
863
+ let loads = 0;
864
+ let stop_reason = 'max_loads';
865
+ let prev_count = -1;
866
+ try {
867
+ for (let i = 0; i < max_loads; i++) {
868
+ const current = await page.locator(content_selector).count();
869
+ // Check stop conditions
870
+ if (item_count !== undefined && current >= item_count) {
871
+ stop_reason = 'item_count_reached';
872
+ break;
873
+ }
874
+ if (stop_text) {
875
+ const found = await page.evaluate((t) => globalThis.document?.body?.innerText?.includes(t) ?? false, stop_text);
876
+ if (found) {
877
+ stop_reason = 'text_found';
878
+ break;
879
+ }
880
+ }
881
+ // Stall detection: no new items
882
+ if (current === prev_count) {
883
+ stop_reason = 'stalled';
884
+ break;
885
+ }
886
+ prev_count = current;
887
+ // Check load-more button exists
888
+ const btnCount = await page.locator(load_more_selector).count();
889
+ if (btnCount === 0) {
890
+ stop_reason = 'load_more_gone';
891
+ break;
892
+ }
893
+ await page.click(load_more_selector);
894
+ loads++;
895
+ await new Promise((r) => setTimeout(r, stall_ms));
896
+ }
897
+ const final_count = await page.locator(content_selector).count();
898
+ const r = { status: 'ok', loads_performed: loads, final_count, stop_reason, duration_ms: Date.now() - t0 };
899
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'load_more_until', url: page.url(), params: opts, result: r, purpose, operator });
900
+ return r;
901
+ }
902
+ catch (err) {
903
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
904
+ }
905
+ }
265
906
  async function downloadFile(page, selector, timeoutMs = 30000, maxBytes = 50 * 1024 * 1024, logger, sessionId, purpose, operator) {
266
907
  const id = actionId();
267
908
  const t0 = Date.now();
@@ -288,4 +929,160 @@ async function downloadFile(page, selector, timeoutMs = 30000, maxBytes = 50 * 1
288
929
  throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
289
930
  }
290
931
  }
932
+ // ---------------------------------------------------------------------------
933
+ // R07-C04: T19 — coordinate-based primitives (click_at / wheel / insert_text)
934
+ // ---------------------------------------------------------------------------
935
+ /** Click at an absolute page coordinate. */
936
+ async function clickAt(page, x, y, opts = {}, logger, sessionId, purpose, operator) {
937
+ const id = actionId();
938
+ const t0 = Date.now();
939
+ try {
940
+ await page.mouse.click(x, y, {
941
+ button: opts.button ?? 'left',
942
+ clickCount: opts.click_count ?? 1,
943
+ delay: opts.delay_ms ?? 0,
944
+ });
945
+ const r = { status: 'ok', x, y, duration_ms: Date.now() - t0 };
946
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'click_at', url: page.url(), params: { x, y, ...opts }, result: r, purpose, operator });
947
+ return r;
948
+ }
949
+ catch (err) {
950
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
951
+ }
952
+ }
953
+ /** Dispatch a mouse wheel event at the current cursor position. */
954
+ async function wheelAt(page, dx, dy, logger, sessionId, purpose, operator) {
955
+ const id = actionId();
956
+ const t0 = Date.now();
957
+ try {
958
+ await page.mouse.wheel(dx, dy);
959
+ const r = { status: 'ok', dx, dy, duration_ms: Date.now() - t0 };
960
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'wheel_at', url: page.url(), params: { dx, dy }, result: r, purpose, operator });
961
+ return r;
962
+ }
963
+ catch (err) {
964
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
965
+ }
966
+ }
967
+ /**
968
+ * Insert text directly into the focused element, bypassing key events.
969
+ * Useful for emoji, CJK characters, or any input that would be mangled
970
+ * by synthesised keydown/keyup sequences.
971
+ */
972
+ async function insertText(page, text, logger, sessionId, purpose, operator) {
973
+ const id = actionId();
974
+ const t0 = Date.now();
975
+ try {
976
+ await page.keyboard.insertText(text);
977
+ const r = { status: 'ok', length: text.length, duration_ms: Date.now() - t0 };
978
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'insert_text', url: page.url(), params: { length: text.length }, result: r, purpose, operator });
979
+ return r;
980
+ }
981
+ catch (err) {
982
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
983
+ }
984
+ }
985
+ /** Return the bounding box of the first element matching *selector*. */
986
+ async function getBbox(page, selector, logger, sessionId, purpose, operator) {
987
+ const id = actionId();
988
+ const t0 = Date.now();
989
+ try {
990
+ // Use a short timeout so non-existent selectors return found:false immediately
991
+ // instead of waiting for the default 30s Playwright timeout.
992
+ let box = null;
993
+ try {
994
+ box = await page.locator(selector).first().boundingBox({ timeout: 2000 });
995
+ }
996
+ catch (_te) {
997
+ box = null; // element not found within timeout → treat as not-found
998
+ }
999
+ const duration_ms = Date.now() - t0;
1000
+ if (!box) {
1001
+ const r = { status: 'ok', selector, found: false, x: 0, y: 0, width: 0, height: 0, center_x: 0, center_y: 0, duration_ms };
1002
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'bbox', url: page.url(), params: { selector }, result: r, purpose, operator });
1003
+ return r;
1004
+ }
1005
+ const r = {
1006
+ status: 'ok', selector, found: true,
1007
+ x: box.x, y: box.y, width: box.width, height: box.height,
1008
+ center_x: box.x + box.width / 2, center_y: box.y + box.height / 2,
1009
+ duration_ms,
1010
+ };
1011
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'bbox', url: page.url(), params: { selector }, result: r, purpose, operator });
1012
+ return r;
1013
+ }
1014
+ catch (err) {
1015
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
1016
+ }
1017
+ }
1018
+ // ---------------------------------------------------------------------------
1019
+ // R07-C04: T24 — viewport emulation
1020
+ // ---------------------------------------------------------------------------
1021
+ async function setViewport(page, width, height, logger, sessionId, purpose, operator) {
1022
+ const id = actionId();
1023
+ const t0 = Date.now();
1024
+ try {
1025
+ await page.setViewportSize({ width, height });
1026
+ const r = { status: 'ok', width, height, duration_ms: Date.now() - t0 };
1027
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'set_viewport', url: page.url(), params: { width, height }, result: r, purpose, operator });
1028
+ return r;
1029
+ }
1030
+ catch (err) {
1031
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
1032
+ }
1033
+ }
1034
+ // ---------------------------------------------------------------------------
1035
+ // R07-C04: T23 — clipboard read/write
1036
+ // ---------------------------------------------------------------------------
1037
+ /** Write *text* to the system clipboard via the Clipboard API. */
1038
+ async function clipboardWrite(page, text, logger, sessionId, purpose, operator) {
1039
+ const id = actionId();
1040
+ const t0 = Date.now();
1041
+ try {
1042
+ await page.evaluate(async (t) => {
1043
+ if (navigator.clipboard?.writeText) {
1044
+ await navigator.clipboard.writeText(t);
1045
+ }
1046
+ else {
1047
+ // execCommand fallback (deprecated but reliable in Chromium)
1048
+ // Use globalThis to avoid TypeScript node-lib "document not found" error
1049
+ const doc = globalThis.document;
1050
+ const el = doc.createElement('textarea');
1051
+ el.value = t;
1052
+ el.style.position = 'fixed';
1053
+ el.style.opacity = '0';
1054
+ doc.body.appendChild(el);
1055
+ el.select();
1056
+ doc.execCommand('copy');
1057
+ el.remove();
1058
+ }
1059
+ }, text);
1060
+ const r = { status: 'ok', length: text.length, duration_ms: Date.now() - t0 };
1061
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'clipboard_write', url: page.url(), params: { length: text.length }, result: r, purpose, operator });
1062
+ return r;
1063
+ }
1064
+ catch (err) {
1065
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
1066
+ }
1067
+ }
1068
+ /**
1069
+ * Read text from the system clipboard.
1070
+ * Requires the `clipboard-read` permission; may fail in restricted headless
1071
+ * environments. Grant via `context.grantPermissions(['clipboard-read'])`.
1072
+ */
1073
+ async function clipboardRead(page, logger, sessionId, purpose, operator) {
1074
+ const id = actionId();
1075
+ const t0 = Date.now();
1076
+ try {
1077
+ const text = await page.evaluate(async () => {
1078
+ return await navigator.clipboard.readText();
1079
+ });
1080
+ const r = { status: 'ok', text, duration_ms: Date.now() - t0 };
1081
+ logger?.write({ session_id: sessionId, action_id: id, type: 'action', action: 'clipboard_read', url: page.url(), params: {}, result: { status: 'ok', length: text.length, duration_ms: r.duration_ms }, purpose, operator });
1082
+ return r;
1083
+ }
1084
+ catch (err) {
1085
+ throw new ActionDiagnosticsError(await collectDiagnostics(page, t0, err));
1086
+ }
1087
+ }
291
1088
  //# sourceMappingURL=actions.js.map