@web-auto/camo 0.1.25 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -473,6 +473,7 @@ Condition types:
473
473
 
474
474
  ### Environment Variables
475
475
 
476
+ - `CAMO_INPUT_MODE` - Input mode: `playwright` (default) or `cdp`. CDP mode uses `Input.dispatchMouseEvent` via Chrome DevTools Protocol, bypassing OS-level input system. Does not require window foreground. See [CDP Input Mode](#cdp-input-mode) below.
476
477
  - `CAMO_BROWSER_URL` - Browser service URL (default: `http://127.0.0.1:7704`)
477
478
  - `CAMO_INSTALL_DIR` - `@web-auto/camo` 安装目录(可选,首次安装兜底)
478
479
  - `CAMO_REPO_ROOT` - Camo repository root (optional, dev mode)
@@ -484,6 +485,51 @@ Condition types:
484
485
  - `CAMO_PROGRESS_WS_HOST` / `CAMO_PROGRESS_WS_PORT` - Progress websocket daemon bind address (default: `127.0.0.1:7788`)
485
486
  - `CAMO_DEFAULT_WINDOW_VERTICAL_RESERVE` - Reserved vertical pixels for default headful auto-size
486
487
 
488
+ ### CDP Input Mode
489
+
490
+ By default, Camo uses Playwright's high-level input API (`page.mouse.click`), which goes through the OS input system and requires the browser window to be in the foreground. This can cause hangs (up to 30s timeout) on Windows when the window loses focus.
491
+
492
+ CDP mode sends mouse events directly via the Chrome DevTools Protocol (`Input.dispatchMouseEvent`), which:
493
+
494
+ - **Does not require window foreground** — works with minimized, background, or headless windows
495
+ - **Does not depend on OS input system** — no `bringToFront`, no `ensureInputReady`
496
+ - **Bypasses input pipeline checks** — no 30s timeout risk from `ensureInputReady` hanging
497
+
498
+ #### How to enable
499
+
500
+ ```bash
501
+ # Environment variable (recommended)
502
+ CAMO_INPUT_MODE=cdp camo start xhs-qa-1 --url https://www.xiaohongshu.com
503
+
504
+ # Or set in shell profile
505
+ export CAMO_INPUT_MODE=cdp
506
+ ```
507
+
508
+ #### Behavior differences
509
+
510
+ | Feature | Playwright (default) | CDP mode |
511
+ |---------|---------------------|----------|
512
+ | Window foreground required | Yes | No |
513
+ | OS input system | Yes | No |
514
+ | Auto-scroll to element | Yes (via Playwright) | No (caller must ensure element in viewport) |
515
+ | `ensureInputReady` check | Yes (can hang 30s) | Skipped |
516
+ | `bringToFront` | Yes (default) | Skipped |
517
+ | Nudge/recovery on timeout | Yes | No (fast fail) |
518
+ | Input coordinate system | Viewport-relative | Viewport-relative (same) |
519
+
520
+ #### Limitations
521
+
522
+ - **Element must be in viewport**: CDP clicks at coordinates only. If the target element is scrolled out of view, the click will miss. Callers (like webauto's `clickPoint`) already resolve viewport-relative coordinates via `getBoundingClientRect`.
523
+ - **No auto-scroll**: Unlike Playwright's `page.click(selector)`, CDP mode does not scroll to bring elements into view.
524
+ - **keyboard operations still use Playwright**: `keyboard:press` and `keyboard:type` are not affected by CDP mode (they already work reliably in background via Playwright's keyboard API).
525
+
526
+ #### Related environment variables
527
+
528
+ - `CAMO_INPUT_ACTION_TIMEOUT_MS` — Max wait for input action (default: 30000)
529
+ - `CAMO_INPUT_ACTION_MAX_ATTEMPTS` — Retry count on failure (default: 2)
530
+ - `CAMO_INPUT_READY_SETTLE_MS` — Settle time after input ready (default: 80)
531
+ - `CAMO_BRING_TO_FRONT_MODE` — `never` (skip) or `auto` (default, bring window to front)
532
+
487
533
  ## Session Persistence
488
534
 
489
535
  Camo CLI persists session information locally:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@web-auto/camo",
3
- "version": "0.1.25",
3
+ "version": "0.1.26",
4
4
  "description": "Camoufox Browser CLI - Cross-platform browser automation",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,4 +1,39 @@
1
1
  import { isTimeoutLikeError } from './utils.js';
2
+ import { resolveInputMode } from './utils.js';
3
+
4
+ async function createCDPSession(page) {
5
+ const context = page.context();
6
+ return context.newCDPSession(page);
7
+ }
8
+
9
+ async function cdpMouseClick(cdp, x, y, button = 'left', delay = 50) {
10
+ const normalizedButton = button === 'left' ? 'left' : button === 'right' ? 'right' : button === 'middle' ? 'middle' : 'left';
11
+ await cdp.send('Input.dispatchMouseEvent', {
12
+ type: 'mousePressed',
13
+ x: Math.round(x),
14
+ y: Math.round(y),
15
+ button: normalizedButton,
16
+ clickCount: 1
17
+ });
18
+ if (delay > 0) {
19
+ await new Promise(r => setTimeout(r, delay));
20
+ }
21
+ await cdp.send('Input.dispatchMouseEvent', {
22
+ type: 'mouseReleased',
23
+ x: Math.round(x),
24
+ y: Math.round(y),
25
+ button: normalizedButton,
26
+ clickCount: 1
27
+ });
28
+ }
29
+
30
+ async function cdpMouseMove(cdp, x, y) {
31
+ await cdp.send('Input.dispatchMouseEvent', {
32
+ type: 'mouseMoved',
33
+ x: Math.round(x),
34
+ y: Math.round(y)
35
+ });
36
+ }
2
37
 
3
38
  async function readInteractiveViewport(page) {
4
39
  const fallback = page.viewportSize?.() || null;
@@ -39,9 +74,34 @@ export class BrowserSessionInputOps {
39
74
  this.withInputActionLock = withInputActionLock;
40
75
  const envMode = String(process.env.CAMO_SCROLL_INPUT_MODE || '').trim().toLowerCase();
41
76
  this.wheelMode = envMode === 'keyboard' ? 'keyboard' : 'wheel';
77
+ this.inputMode = resolveInputMode();
42
78
  }
43
79
  async mouseClick(opts) {
44
80
  const page = await this.ensurePrimaryPage();
81
+
82
+ if (this.inputMode === 'cdp') {
83
+ const { x, y, button = 'left', clicks = 1, delay = 50 } = opts;
84
+ let cdp = null;
85
+ try {
86
+ cdp = await createCDPSession(page);
87
+ for (let i = 0; i < clicks; i++) {
88
+ if (i > 0) {
89
+ await new Promise(r => setTimeout(r, 100 + Math.random() * 100));
90
+ }
91
+ await this.withInputActionLock(async () => {
92
+ await this.runInputAction(page, 'mouse:click(cdp)', async () => {
93
+ await cdpMouseClick(cdp, x, y, button, delay);
94
+ });
95
+ });
96
+ }
97
+ } finally {
98
+ if (cdp) {
99
+ await cdp.detach().catch(() => {});
100
+ }
101
+ }
102
+ return;
103
+ }
104
+
45
105
  await this.withInputActionLock(async () => {
46
106
  await this.runInputAction(page, 'input:ready', (activePage) => this.ensureInputReady(activePage));
47
107
  const { x, y, button = 'left', clicks = 1, delay = 50, nudgeBefore = false } = opts;
@@ -1,4 +1,4 @@
1
- import { resolveInputActionMaxAttempts, resolveInputActionTimeoutMs, resolveInputRecoveryBringToFrontTimeoutMs, resolveInputRecoveryDelayMs, resolveInputReadySettleMs, shouldSkipBringToFront } from './utils.js';
1
+ import { resolveInputActionMaxAttempts, resolveInputActionTimeoutMs, resolveInputMode, resolveInputRecoveryBringToFrontTimeoutMs, resolveInputRecoveryDelayMs, resolveInputReadySettleMs, shouldSkipBringToFront } from './utils.js';
2
2
  import { ensurePageRuntime } from '../pageRuntime.js';
3
3
  export class BrowserInputPipeline {
4
4
  ensurePrimaryPage;
@@ -9,6 +9,8 @@ export class BrowserInputPipeline {
9
9
  }
10
10
  inputActionTail = Promise.resolve();
11
11
  async ensureInputReady(page) {
12
+ if (resolveInputMode() === 'cdp')
13
+ return;
12
14
  if (this.isHeadless())
13
15
  return;
14
16
  if (shouldSkipBringToFront()) {
@@ -46,6 +46,12 @@ export function isTimeoutLikeError(error) {
46
46
  const message = String(error?.message || error || '').toLowerCase();
47
47
  return message.includes('timed out') || message.includes('timeout');
48
48
  }
49
+
50
+ export function resolveInputMode() {
51
+ const raw = String(process.env.CAMO_INPUT_MODE ?? '').trim().toLowerCase();
52
+ return raw === 'cdp' ? 'cdp' : 'playwright';
53
+ }
54
+
49
55
  export function normalizeUrl(raw) {
50
56
  try {
51
57
  const url = new URL(raw);