cdp-skill 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,130 @@
1
+ # CDP Browser Automation Skill
2
+
3
+ A lightweight, zero-dependency browser automation library using Chrome DevTools Protocol (CDP). Designed for AI agents like Claude Code to control Chrome through simple JSON commands.
4
+
5
+ ## Why CDP Skill?
6
+
7
+ - **Zero dependencies** - Pure Node.js, no Playwright/Puppeteer overhead
8
+ - **AI-agent optimized** - JSON in, JSON out; designed for LLM tool use
9
+ - **Auto-launch Chrome** - Detects and starts Chrome automatically on macOS, Linux, Windows
10
+ - **Accessibility-first** - ARIA snapshots with element refs for resilient automation
11
+ - **Battle-tested** - 600+ unit tests
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ # Check Chrome status (auto-launches if needed)
17
+ node src/cdp-skill.js '{"steps":[{"chromeStatus":true}]}'
18
+
19
+ # Navigate to a page
20
+ node src/cdp-skill.js '{"steps":[{"goto":"https://google.com"}]}'
21
+ ```
22
+
23
+ ## Features
24
+
25
+ ### Chrome Management
26
+ - **Auto-launch** - Detects Chrome path on macOS/Linux/Windows, launches with remote debugging
27
+ - **Status check** - `chromeStatus` step reports running state, version, and open tabs
28
+ - **Multi-agent safe** - Multiple agents can share Chrome; each manages their own tabs
29
+ - **Headless support** - Run Chrome without UI via `{"chromeStatus":{"headless":true}}`
30
+
31
+ ### Navigation
32
+ - **URL navigation** - `goto`, `back`, `forward`
33
+ - **Wait conditions** - Network idle, DOM ready, element visible, text appears, URL changes
34
+ - **Navigation detection** - Automatic navigation tracking after clicks
35
+
36
+ ### Element Interaction
37
+ - **Click** - CSS selectors, ARIA refs, or x/y coordinates
38
+ - **Fill & Type** - Input filling with React/controlled component support
39
+ - **Keyboard** - Key presses, combos (`Control+a`, `Meta+Shift+Enter`)
40
+ - **Hover** - Mouse over with configurable duration
41
+ - **Drag & Drop** - Source to target with step interpolation
42
+ - **Select** - Text selection within inputs
43
+ - **Scroll** - To element, coordinates, or page top/bottom
44
+
45
+ ### Smart Waiting (Auto-Actionability)
46
+ - **Visible** - Element in DOM with dimensions, not hidden
47
+ - **Enabled** - Not disabled or aria-disabled
48
+ - **Stable** - Position unchanged for 3 animation frames
49
+ - **Unobscured** - Not covered by overlays/modals
50
+ - **Pointer events** - CSS pointer-events not disabled
51
+ - **Auto-force** - Retries with force when actionability times out
52
+
53
+ ### Accessibility & Queries
54
+ - **ARIA snapshots** - Get accessibility tree as YAML with clickable refs
55
+ - **Role queries** - Find elements by ARIA role (`button`, `textbox`, `link`, etc.)
56
+ - **CSS queries** - Traditional selector-based queries
57
+ - **Multi-query** - Batch multiple queries in one step
58
+ - **Page inspection** - Quick overview of page structure
59
+
60
+ ### Frame Support
61
+ - **List frames** - Enumerate all iframes
62
+ - **Switch context** - Execute in iframe by selector, index, or name
63
+ - **Cross-origin detection** - Identifies cross-origin frames in snapshots
64
+
65
+ ### Screenshots & PDF
66
+ - **Viewport capture** - Current view
67
+ - **Full page** - Entire scrollable area
68
+ - **Element capture** - Specific element by selector
69
+ - **PDF generation** - With metadata (page count, dimensions)
70
+ - **Temp directory** - Auto-saves to platform temp dir for relative paths
71
+
72
+ ### Data Extraction
73
+ - **Text/HTML/attributes** - Extract content from elements
74
+ - **Console logs** - Capture browser console output
75
+ - **Cookies** - Get, set, delete with expiration support
76
+ - **JavaScript eval** - Execute code in page context with serialization
77
+
78
+ ### Form Handling
79
+ - **Fill form** - Multiple fields in one step
80
+ - **Validation** - Check HTML5 constraint validation state
81
+ - **Submit** - With validation error reporting
82
+
83
+ ### Assertions
84
+ - **URL checks** - Contains, equals, matches regex
85
+ - **Text presence** - Verify text on page
86
+ - **Element state** - Check element properties
87
+
88
+ ### Device Emulation
89
+ - **Viewport presets** - iPhone, iPad, Pixel, Galaxy, desktop sizes
90
+ - **Custom dimensions** - Width, height, scale factor
91
+ - **Mobile mode** - Touch events, mobile user agent
92
+
93
+ ### Tab Management
94
+ - **List tabs** - See all open tabs with targetId
95
+ - **Close tabs** - Clean up when done
96
+ - **Tab reuse** - Pass targetId to reuse existing tab
97
+
98
+ ### Debug Mode
99
+ - **Before/after screenshots** - Capture state around each action
100
+ - **DOM snapshots** - HTML at each step
101
+ - **Output to temp dir** - Automatic cleanup-friendly location
102
+
103
+ ## Documentation
104
+
105
+ - **[SKILL.md](./SKILL.md)** - Complete step reference and API documentation
106
+ - **[src/](./src/)** - Source code with JSDoc comments
107
+
108
+ ## Architecture
109
+
110
+ ```
111
+ src/
112
+ ├── cdp-skill.js # CLI entry point
113
+ ├── cdp.js # CDP connection, discovery, Chrome launcher
114
+ ├── page.js # Page controller, navigation, cookies
115
+ ├── dom.js # Element location, input emulation, clicks
116
+ ├── aria.js # Accessibility snapshots, role queries
117
+ ├── capture.js # Screenshots, PDF, console, network
118
+ ├── runner.js # Step validation and execution
119
+ ├── utils.js # Errors, key validation, device presets
120
+ └── index.js # Public API exports
121
+ ```
122
+
123
+ ## Requirements
124
+
125
+ - Node.js 22+
126
+ - Chrome or Chromium (auto-detected)
127
+
128
+ ## License
129
+
130
+ MIT
package/SKILL.md CHANGED
@@ -1,13 +1,13 @@
1
1
  ---
2
2
  name: cdp-skill
3
- description: Automate Chrome browser interactions via JSON piped to a Node.js CLI. Use when you need to navigate websites, fill forms, click elements, take screenshots, extract data, or run end-to-end browser tests. Supports accessibility snapshots for resilient element targeting.
3
+ description: Automate Chrome browser interactions via JSON passed to a Node.js CLI. Use when you need to navigate websites, fill forms, click elements, take screenshots, extract data, or run end-to-end browser tests. Supports accessibility snapshots for resilient element targeting.
4
4
  license: MIT
5
- compatibility: Requires Chrome/Chromium running with --remote-debugging-port=9222 and Node.js.
5
+ compatibility: Requires Chrome/Chromium (auto-launched if not running) and Node.js.
6
6
  ---
7
7
 
8
8
  # CDP Browser Automation Skill
9
9
 
10
- Automate Chrome browser interactions via JSON piped to a Node.js CLI. Produce JSON step definitions, not JavaScript code.
10
+ Automate Chrome browser interactions via JSON passed to a Node.js CLI. Produce JSON step definitions, not JavaScript code.
11
11
 
12
12
  ## Purpose
13
13
 
@@ -19,34 +19,49 @@ This skill enables **AI-powered browser automation**. The intended workflow:
19
19
 
20
20
  ## Quick Start
21
21
 
22
- Chrome must be running with remote debugging:
22
+ **Step 1: Check Chrome status (auto-launches if needed)**
23
23
  ```bash
24
- # macOS
25
- /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222
24
+ node src/cdp-skill.js '{"steps":[{"chromeStatus":true}]}'
25
+ ```
26
26
 
27
- # Linux
28
- google-chrome --remote-debugging-port=9222
27
+ Returns:
28
+ ```json
29
+ {
30
+ "status": "passed",
31
+ "chrome": {
32
+ "running": true,
33
+ "launched": true,
34
+ "version": "Chrome/120.0.6099.109",
35
+ "port": 9222,
36
+ "tabs": [{"targetId": "ABC123", "url": "about:blank", "title": ""}]
37
+ }
38
+ }
39
+ ```
29
40
 
30
- # Windows
31
- chrome.exe --remote-debugging-port=9222
41
+ The skill auto-detects Chrome location on macOS, Linux, and Windows. Set `CHROME_PATH` environment variable for custom installations.
42
+
43
+ **Step 2: Execute automation steps**
44
+ ```bash
45
+ node src/cdp-skill.js '{"steps":[{"goto":"https://google.com"}]}'
32
46
  ```
33
47
 
34
- Execute steps:
48
+ Stdin pipe also works:
35
49
  ```bash
36
- echo '{"steps":[{"goto":"https://example.com"}]}' | node src/cli.js
50
+ echo '{"steps":[{"goto":"https://google.com"}]}' | node src/cdp-skill.js
37
51
  ```
38
52
 
39
53
  ### Tab Reuse (Critical)
40
54
 
41
- The first invocation creates a new tab and returns a `targetId`. **Include this in ALL subsequent calls** to reuse the same tab:
55
+ Use a `targetId` from `chromeStatus` response or previous step output. **Include targetId in ALL subsequent calls** to reuse the same tab:
42
56
 
43
57
  ```bash
44
- # First call - extract targetId from response
45
- RESULT=$(echo '{"steps":[{"goto":"https://example.com"}]}' | node src/cli.js)
46
- TARGET_ID=$(echo "$RESULT" | jq -r '.tab.targetId')
58
+ # Get available tabs from chromeStatus
59
+ RESULT=$(node src/cdp-skill.js '{"steps":[{"chromeStatus":true}]}')
60
+ TARGET_ID=$(echo "$RESULT" | jq -r '.chrome.tabs[0].targetId')
47
61
 
48
- # All subsequent calls - include targetId
49
- echo "{\"config\":{\"targetId\":\"$TARGET_ID\"},\"steps\":[{\"click\":\"#btn\"}]}" | node src/cli.js
62
+ # Use targetId for all subsequent calls
63
+ node src/cdp-skill.js "{\"config\":{\"targetId\":\"$TARGET_ID\"},\"steps\":[{\"goto\":\"https://google.com\"}]}"
64
+ node src/cdp-skill.js "{\"config\":{\"targetId\":\"$TARGET_ID\"},\"steps\":[{\"click\":\"#btn\"}]}"
50
65
  ```
51
66
 
52
67
  Omitting `targetId` creates orphan tabs that accumulate until Chrome restarts.
@@ -130,11 +145,38 @@ Refs work with: `click`, `fill`, `hover`.
130
145
 
131
146
  ## Step Reference
132
147
 
148
+ ### Chrome Management
149
+
150
+ **chromeStatus** - Check if Chrome is running, auto-launch if not
151
+ ```json
152
+ {"chromeStatus": true}
153
+ {"chromeStatus": {"autoLaunch": false}}
154
+ {"chromeStatus": {"headless": true}}
155
+ ```
156
+ Options: `autoLaunch` (default: true), `headless` (default: false)
157
+
158
+ Returns:
159
+ ```json
160
+ {
161
+ "running": true,
162
+ "launched": false,
163
+ "version": "Chrome/120.0.6099.109",
164
+ "port": 9222,
165
+ "tabs": [
166
+ {"targetId": "ABC123...", "url": "https://google.com", "title": "Google"}
167
+ ]
168
+ }
169
+ ```
170
+
171
+ If Chrome cannot be found: `{running: false, launched: false, error: "Chrome not found..."}`
172
+
173
+ **Note:** This step is lightweight - it doesn't create a session. Use it as your first call to ensure Chrome is ready, then use a `targetId` from the tabs list for subsequent calls.
174
+
133
175
  ### Navigation
134
176
 
135
177
  **goto** - Navigate to URL
136
178
  ```json
137
- {"goto": "https://example.com"}
179
+ {"goto": "https://google.com"}
138
180
  ```
139
181
 
140
182
  **back** / **forward** - History navigation
@@ -339,9 +381,9 @@ Note: Console logs don't persist across CLI invocations.
339
381
 
340
382
  **screenshot**
341
383
  ```json
342
- {"screenshot": "./result.png"}
343
- {"screenshot": {"path": "./full.png", "fullPage": true}}
344
- {"screenshot": {"path": "./element.png", "selector": "#header"}}
384
+ {"screenshot": "result.png"}
385
+ {"screenshot": {"path": "full.png", "fullPage": true}}
386
+ {"screenshot": {"path": "/absolute/path/element.png", "selector": "#header"}}
345
387
  ```
346
388
  Options: `path`, `fullPage`, `selector`, `format` (png|jpeg|webp), `quality`, `omitBackground`, `clip`
347
389
 
@@ -349,13 +391,15 @@ Returns: `{path, viewport: {width, height}, format, fullPage, selector}`
349
391
 
350
392
  **pdf**
351
393
  ```json
352
- {"pdf": "./report.pdf"}
353
- {"pdf": {"path": "./report.pdf", "landscape": true, "printBackground": true}}
394
+ {"pdf": "report.pdf"}
395
+ {"pdf": {"path": "/absolute/path/report.pdf", "landscape": true, "printBackground": true}}
354
396
  ```
355
397
  Options: `path`, `selector`, `landscape`, `printBackground`, `scale`, `paperWidth`, `paperHeight`, margins, `pageRanges`, `validate`
356
398
 
357
399
  Returns: `{path, fileSize, fileSizeFormatted, pageCount, dimensions, validation?}`
358
400
 
401
+ **Note:** Relative paths are saved to the platform temp directory (`$TMPDIR/cdp-skill/` on macOS/Linux, `%TEMP%\cdp-skill\` on Windows). Use absolute paths to save elsewhere.
402
+
359
403
 
360
404
  ### JavaScript Execution
361
405
 
@@ -368,13 +412,13 @@ Options: `expression`, `await`, `timeout`, `serialize`
368
412
 
369
413
  **Shell escaping tip:** For complex expressions with quotes or special characters, use a heredoc or JSON file:
370
414
  ```bash
371
- # Heredoc approach
372
- node src/cli.js <<'EOF'
415
+ # Heredoc approach (Unix)
416
+ node src/cdp-skill.js <<'EOF'
373
417
  {"steps":[{"eval":"document.querySelectorAll('button').length"}]}
374
418
  EOF
375
419
 
376
420
  # Or save to file and pipe
377
- cat steps.json | node src/cli.js
421
+ cat steps.json | node src/cdp-skill.js
378
422
  ```
379
423
 
380
424
  Returns typed results:
@@ -433,7 +477,7 @@ Presets: `iphone-se`, `iphone-14`, `iphone-15-pro`, `ipad`, `ipad-pro-11`, `pixe
433
477
  **cookies** - Get/set/clear cookies
434
478
  ```json
435
479
  {"cookies": {"get": true}}
436
- {"cookies": {"get": ["https://example.com"], "name": "session_id"}}
480
+ {"cookies": {"get": ["https://google.com"], "name": "session_id"}}
437
481
  {"cookies": {"set": [{"name": "token", "value": "abc", "domain": "example.com", "expires": "7d"}]}}
438
482
  {"cookies": {"delete": "session_id"}}
439
483
  {"cookies": {"clear": true}}
@@ -505,12 +549,14 @@ Capture screenshots/DOM before and after each action:
505
549
  {
506
550
  "config": {
507
551
  "debug": true,
508
- "debugOptions": {"outputDir": "./debug", "captureScreenshots": true, "captureDom": true}
552
+ "debugOptions": {"captureScreenshots": true, "captureDom": true}
509
553
  },
510
554
  "steps": [...]
511
555
  }
512
556
  ```
513
557
 
558
+ Debug output goes to the platform temp directory by default. Set `"outputDir": "/path/to/dir"` to override.
559
+
514
560
 
515
561
  ## Not Supported
516
562
 
@@ -526,16 +572,19 @@ Handle via multiple invocations:
526
572
  | Issue | Solution |
527
573
  |-------|----------|
528
574
  | Tabs accumulating | Include `targetId` in config |
529
- | CONNECTION error | Start Chrome with `--remote-debugging-port=9222` |
575
+ | CONNECTION error | Use `chromeStatus` first - it auto-launches Chrome |
576
+ | Chrome not found | Set `CHROME_PATH` environment variable |
530
577
  | Element not found | Add `wait` step first |
531
578
  | Clicks not working | Scroll element into view first |
532
579
 
533
580
  ## Best Practices
534
581
 
535
- 1. **Discover before interacting** - Use `inspect` and `snapshot` to understand page structure
536
- 2. **Use website navigation** - Click links and submit forms; don't guess URLs
537
- 3. **Be persistent** - Try alternative selectors, add waits, scroll first
538
- 4. **Prefer refs** - Use `snapshot` + refs over brittle CSS selectors
582
+ 1. **Start with chromeStatus** - Ensures Chrome is running and gives you available tabs
583
+ 2. **Reuse only your own tabs** - Always pass `targetId` from your previous response; other agents may be using the same browser
584
+ 3. **Discover before interacting** - Use `inspect` and `snapshot` to understand page structure
585
+ 4. **Use website navigation** - Click links and submit forms; don't guess URLs
586
+ 5. **Be persistent** - Try alternative selectors, add waits, scroll first
587
+ 6. **Prefer refs** - Use `snapshot` + refs over brittle CSS selectors
539
588
 
540
589
  ## Feedback
541
590
 
package/package.json CHANGED
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "name": "cdp-skill",
3
- "version": "1.0.0",
4
- "description": "Browser automation skill using Chrome DevTools Protocol for Claude Code and Codex",
3
+ "version": "1.0.1",
4
+ "description": "Browser automation skill using Chrome DevTools Protocol for Claude Code and AI agents",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
7
7
  "bin": {
8
- "cdp-skill": "src/cli.js"
8
+ "cdp-skill": "src/cdp-skill.js"
9
9
  },
10
10
  "exports": {
11
11
  ".": "./src/index.js",
@@ -26,6 +26,14 @@
26
26
  "SKILL.md",
27
27
  "src/"
28
28
  ],
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "git+https://github.com/lotreace/cdp-skill.git"
32
+ },
33
+ "bugs": {
34
+ "url": "https://github.com/lotreace/cdp-skill/issues"
35
+ },
36
+ "homepage": "https://github.com/lotreace/cdp-skill#readme",
29
37
  "keywords": [
30
38
  "cdp",
31
39
  "chrome",
@@ -33,7 +41,11 @@
33
41
  "browser",
34
42
  "automation",
35
43
  "claude-code",
36
- "skill"
44
+ "ai-agent",
45
+ "skill",
46
+ "testing",
47
+ "e2e",
48
+ "web-scraping"
37
49
  ],
38
50
  "skill": {
39
51
  "name": "cdp-skill",
@@ -2,14 +2,15 @@
2
2
  /**
3
3
  * CDP Skill CLI
4
4
  *
5
- * JSON interpreter for browser automation. Reads JSON from stdin,
6
- * executes browser automation steps, and outputs JSON results.
5
+ * JSON interpreter for browser automation. Accepts JSON as argument (preferred)
6
+ * or reads from stdin (fallback).
7
7
  *
8
8
  * Usage:
9
- * echo '{"steps":[{"goto":"https://example.com"}]}' | node src/cli.js
9
+ * node src/cdp-skill.js '{"steps":[{"goto":"https://google.com"}]}'
10
+ * echo '{"steps":[...]}' | node src/cdp-skill.js
10
11
  */
11
12
 
12
- import { createBrowser } from './cdp.js';
13
+ import { createBrowser, getChromeStatus } from './cdp.js';
13
14
  import { createPageController } from './page.js';
14
15
  import { createElementLocator, createInputEmulator } from './dom.js';
15
16
  import { createScreenshotCapture, createConsoleCapture, createPdfCapture } from './capture.js';
@@ -25,15 +26,23 @@ const ErrorType = {
25
26
  };
26
27
 
27
28
  /**
28
- * Reads entire stdin and returns as string
29
+ * Reads entire stdin and returns as string (with timeout for TTY detection)
29
30
  */
30
31
  async function readStdin() {
31
32
  return new Promise((resolve, reject) => {
33
+ // If stdin is a TTY (interactive terminal) with no data, don't wait
34
+ if (process.stdin.isTTY) {
35
+ resolve('');
36
+ return;
37
+ }
38
+
32
39
  const chunks = [];
40
+ let hasData = false;
33
41
 
34
42
  process.stdin.setEncoding('utf8');
35
43
 
36
44
  process.stdin.on('data', chunk => {
45
+ hasData = true;
37
46
  chunks.push(chunk);
38
47
  });
39
48
 
@@ -44,9 +53,36 @@ async function readStdin() {
44
53
  process.stdin.on('error', err => {
45
54
  reject(err);
46
55
  });
56
+
57
+ // Timeout if no data arrives (handles edge cases)
58
+ setTimeout(() => {
59
+ if (!hasData) {
60
+ resolve('');
61
+ }
62
+ }, 100);
47
63
  });
48
64
  }
49
65
 
66
+ /**
67
+ * Get input from argument or stdin
68
+ * Prefers argument for cross-platform compatibility
69
+ */
70
+ async function getInput() {
71
+ // Check for JSON argument (skip node and script path)
72
+ const args = process.argv.slice(2);
73
+
74
+ if (args.length > 0) {
75
+ // Join all args in case JSON was split by shell
76
+ const argInput = args.join(' ').trim();
77
+ if (argInput) {
78
+ return argInput;
79
+ }
80
+ }
81
+
82
+ // Fallback to stdin
83
+ return readStdin();
84
+ }
85
+
50
86
  /**
51
87
  * Parses JSON input and validates basic structure
52
88
  */
@@ -93,6 +129,37 @@ function errorResponse(type, message) {
93
129
  };
94
130
  }
95
131
 
132
+ /**
133
+ * Check if steps contain only chromeStatus (lightweight query)
134
+ */
135
+ function isChromeStatusOnly(steps) {
136
+ return steps.length === 1 && steps[0].chromeStatus !== undefined;
137
+ }
138
+
139
+ /**
140
+ * Handle chromeStatus step - lightweight, no session needed
141
+ */
142
+ async function handleChromeStatus(config, step) {
143
+ const host = config.host || 'localhost';
144
+ const port = config.port || 9222;
145
+ const autoLaunch = step.chromeStatus === true || step.chromeStatus?.autoLaunch !== false;
146
+ const headless = step.chromeStatus?.headless || false;
147
+
148
+ const status = await getChromeStatus({ host, port, autoLaunch, headless });
149
+
150
+ return {
151
+ status: status.running ? 'passed' : 'failed',
152
+ chrome: status,
153
+ steps: [{
154
+ action: 'chromeStatus',
155
+ status: status.running ? 'passed' : 'failed',
156
+ output: status
157
+ }],
158
+ outputs: [{ step: 0, action: 'chromeStatus', output: status }],
159
+ errors: status.error ? [{ step: 0, action: 'chromeStatus', error: status.error }] : []
160
+ };
161
+ }
162
+
96
163
  /**
97
164
  * Main CLI execution
98
165
  */
@@ -101,8 +168,8 @@ async function main() {
101
168
  let pageController = null;
102
169
 
103
170
  try {
104
- // Read and parse input
105
- const input = await readStdin();
171
+ // Read and parse input (argument preferred, stdin fallback)
172
+ const input = await getInput();
106
173
  const json = parseInput(input);
107
174
 
108
175
  // Extract config with defaults
@@ -111,6 +178,13 @@ async function main() {
111
178
  const port = config.port || 9222;
112
179
  const timeout = config.timeout || 30000;
113
180
 
181
+ // Handle chromeStatus specially - no session needed
182
+ if (isChromeStatusOnly(json.steps)) {
183
+ const result = await handleChromeStatus(config, json.steps[0]);
184
+ console.log(JSON.stringify(result));
185
+ process.exit(result.status === 'passed' ? 0 : 1);
186
+ }
187
+
114
188
  // Connect to browser
115
189
  browser = createBrowser({ host, port, connectTimeout: timeout });
116
190
 
package/src/cdp.js CHANGED
@@ -3,7 +3,10 @@
3
3
  * Core CDP connection, discovery, target management, and browser client
4
4
  */
5
5
 
6
- import { timeoutError } from './utils.js';
6
+ import { spawn, execSync } from 'child_process';
7
+ import os from 'os';
8
+ import fs from 'fs';
9
+ import { timeoutError, sleep } from './utils.js';
7
10
 
8
11
  // ============================================================================
9
12
  // Connection
@@ -903,3 +906,204 @@ export function createBrowser(options = {}) {
903
906
  get sessions() { return sessionRegistry; }
904
907
  };
905
908
  }
909
+
910
+ // ============================================================================
911
+ // Chrome Launcher
912
+ // ============================================================================
913
+
914
+ const CHROME_PATHS = {
915
+ darwin: [
916
+ '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
917
+ '/Applications/Chromium.app/Contents/MacOS/Chromium',
918
+ '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary'
919
+ ],
920
+ linux: [
921
+ 'google-chrome',
922
+ 'google-chrome-stable',
923
+ 'chromium-browser',
924
+ 'chromium',
925
+ '/usr/bin/google-chrome',
926
+ '/usr/bin/chromium-browser',
927
+ '/usr/bin/chromium',
928
+ '/snap/bin/chromium'
929
+ ],
930
+ win32: [
931
+ process.env.LOCALAPPDATA + '\\Google\\Chrome\\Application\\chrome.exe',
932
+ process.env.PROGRAMFILES + '\\Google\\Chrome\\Application\\chrome.exe',
933
+ process.env['PROGRAMFILES(X86)'] + '\\Google\\Chrome\\Application\\chrome.exe',
934
+ process.env.LOCALAPPDATA + '\\Chromium\\Application\\chrome.exe'
935
+ ]
936
+ };
937
+
938
+ /**
939
+ * Find Chrome executable on the system
940
+ * @returns {string|null} Path to Chrome executable or null if not found
941
+ */
942
+ export function findChromePath() {
943
+ // Check environment variable first
944
+ if (process.env.CHROME_PATH) {
945
+ if (fs.existsSync(process.env.CHROME_PATH)) {
946
+ return process.env.CHROME_PATH;
947
+ }
948
+ }
949
+
950
+ const platform = os.platform();
951
+ const paths = CHROME_PATHS[platform] || [];
952
+
953
+ for (const chromePath of paths) {
954
+ try {
955
+ // For Linux, check if command exists in PATH
956
+ if (platform === 'linux' && !chromePath.startsWith('/')) {
957
+ try {
958
+ const result = execSync(`which ${chromePath}`, { encoding: 'utf8' }).trim();
959
+ if (result) return result;
960
+ } catch {
961
+ continue;
962
+ }
963
+ } else if (fs.existsSync(chromePath)) {
964
+ return chromePath;
965
+ }
966
+ } catch {
967
+ continue;
968
+ }
969
+ }
970
+
971
+ return null;
972
+ }
973
+
974
+ /**
975
+ * Launch Chrome with remote debugging enabled
976
+ * @param {Object} [options] - Launch options
977
+ * @param {number} [options.port=9222] - Debugging port
978
+ * @param {string} [options.chromePath] - Custom Chrome path
979
+ * @param {boolean} [options.headless=false] - Run in headless mode
980
+ * @param {string} [options.userDataDir] - Custom user data directory
981
+ * @returns {Promise<{process: ChildProcess, port: number}>}
982
+ */
983
+ export async function launchChrome(options = {}) {
984
+ const {
985
+ port = 9222,
986
+ chromePath = findChromePath(),
987
+ headless = false,
988
+ userDataDir = null
989
+ } = options;
990
+
991
+ if (!chromePath) {
992
+ throw new Error(
993
+ 'Chrome not found. Install Google Chrome or set CHROME_PATH environment variable.\n' +
994
+ 'Download: https://www.google.com/chrome/'
995
+ );
996
+ }
997
+
998
+ const args = [
999
+ `--remote-debugging-port=${port}`,
1000
+ '--no-first-run',
1001
+ '--no-default-browser-check'
1002
+ ];
1003
+
1004
+ if (headless) {
1005
+ args.push('--headless=new');
1006
+ }
1007
+
1008
+ if (userDataDir) {
1009
+ args.push(`--user-data-dir=${userDataDir}`);
1010
+ }
1011
+
1012
+ const chromeProcess = spawn(chromePath, args, {
1013
+ detached: true,
1014
+ stdio: 'ignore'
1015
+ });
1016
+
1017
+ // Don't let this process keep Node alive
1018
+ chromeProcess.unref();
1019
+
1020
+ // Wait for Chrome to be ready
1021
+ const discovery = createDiscovery('localhost', port, 1000);
1022
+ const maxWait = 10000;
1023
+ const startTime = Date.now();
1024
+
1025
+ while (Date.now() - startTime < maxWait) {
1026
+ if (await discovery.isAvailable()) {
1027
+ return { process: chromeProcess, port };
1028
+ }
1029
+ await sleep(100);
1030
+ }
1031
+
1032
+ // Kill process if it didn't start properly
1033
+ try {
1034
+ chromeProcess.kill();
1035
+ } catch { /* ignore */ }
1036
+
1037
+ throw new Error(`Chrome failed to start within ${maxWait}ms`);
1038
+ }
1039
+
1040
+ /**
1041
+ * Get Chrome status - check if running, optionally launch if not
1042
+ * @param {Object} [options] - Options
1043
+ * @param {string} [options.host='localhost'] - Chrome host
1044
+ * @param {number} [options.port=9222] - Chrome debugging port
1045
+ * @param {boolean} [options.autoLaunch=true] - Auto-launch if not running
1046
+ * @param {boolean} [options.headless=false] - Launch in headless mode
1047
+ * @returns {Promise<{running: boolean, launched?: boolean, version?: string, tabs?: Array, error?: string}>}
1048
+ */
1049
+ export async function getChromeStatus(options = {}) {
1050
+ const {
1051
+ host = 'localhost',
1052
+ port = 9222,
1053
+ autoLaunch = true,
1054
+ headless = false
1055
+ } = options;
1056
+
1057
+ const discovery = createDiscovery(host, port, 2000);
1058
+
1059
+ // Check if already running
1060
+ let wasRunning = await discovery.isAvailable();
1061
+ let launched = false;
1062
+
1063
+ // Auto-launch if not running
1064
+ if (!wasRunning && autoLaunch && host === 'localhost') {
1065
+ try {
1066
+ await launchChrome({ port, headless });
1067
+ launched = true;
1068
+ wasRunning = true;
1069
+ } catch (err) {
1070
+ return {
1071
+ running: false,
1072
+ launched: false,
1073
+ error: err.message
1074
+ };
1075
+ }
1076
+ }
1077
+
1078
+ if (!wasRunning) {
1079
+ return {
1080
+ running: false,
1081
+ launched: false,
1082
+ error: `Chrome not running on ${host}:${port}`
1083
+ };
1084
+ }
1085
+
1086
+ // Get version and tabs
1087
+ try {
1088
+ const version = await discovery.getVersion();
1089
+ const pages = await discovery.getPages();
1090
+
1091
+ return {
1092
+ running: true,
1093
+ launched,
1094
+ version: version.browser,
1095
+ port,
1096
+ tabs: pages.map(p => ({
1097
+ targetId: p.id,
1098
+ url: p.url,
1099
+ title: p.title
1100
+ }))
1101
+ };
1102
+ } catch (err) {
1103
+ return {
1104
+ running: false,
1105
+ launched,
1106
+ error: err.message
1107
+ };
1108
+ }
1109
+ }
package/src/index.js CHANGED
@@ -15,7 +15,10 @@ export {
15
15
  createTargetManager,
16
16
  createSessionRegistry,
17
17
  createPageSession,
18
- createBrowser
18
+ createBrowser,
19
+ findChromePath,
20
+ launchChrome,
21
+ getChromeStatus
19
22
  } from './cdp.js';
20
23
 
21
24
  // ============================================================================