@wong2kim/wmux 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,25 +2,40 @@
2
2
 
3
3
  **AI Agent Terminal for Windows**
4
4
 
5
- Run Claude Code, Codex, Gemini CLI side by side — with built-in browser, smart notifications, and MCP integration.
5
+ Run Claude Code, Codex, Gemini CLI side by side — with built-in browser automation, smart notifications, and MCP integration.
6
6
 
7
7
  Inspired by [cmux](https://github.com/manaflow-ai/cmux) (macOS), wmux brings the same philosophy to Windows: **a primitive, not a solution.** Composable building blocks for multi-agent workflows.
8
8
 
9
9
  ![Windows](https://img.shields.io/badge/Windows-10%2F11-0078D6?logo=windows)
10
10
  ![Electron](https://img.shields.io/badge/Electron-41-47848F?logo=electron)
11
+ ![npm](https://img.shields.io/npm/v/@wong2kim/wmux?color=CB3837&logo=npm)
11
12
  ![License](https://img.shields.io/badge/License-MIT-green)
12
13
 
13
14
  ---
14
15
 
15
16
  ## Install
16
17
 
17
- **Download:** [wmux-1.1.2 Setup.exe](https://github.com/openwong2kim/wmux/releases/latest)
18
+ **Download:** [wmux-2.0.0 Setup.exe](https://github.com/openwong2kim/wmux/releases/latest)
18
19
 
19
20
  Or build from source:
20
21
  ```powershell
21
22
  irm https://raw.githubusercontent.com/openwong2kim/wmux/main/install.ps1 | iex
22
23
  ```
23
24
 
25
+ **npm (CLI + MCP server only):**
26
+ ```bash
27
+ npm install -g @wong2kim/wmux
28
+ ```
29
+
30
+ ---
31
+
32
+ ## What's New in v2.0.0
33
+
34
+ - **Browser automation via CDP** — Click, fill, type, screenshot directly through Chrome DevTools Protocol. Works with React inputs, CJK text, and controlled components.
35
+ - **Security hardening** — Token auth on all pipes, SSRF protection, input sanitization, randomized CDP ports, memory pressure watchdog.
36
+ - **Workspace reset** — One-click reset in Settings to clean all workspaces and start fresh.
37
+ - **Daemon process** — Background session management with suspend/resume, scrollback persistence, and auto-recovery.
38
+
24
39
  ---
25
40
 
26
41
  ## Why wmux?
@@ -28,7 +43,7 @@ irm https://raw.githubusercontent.com/openwong2kim/wmux/main/install.ps1 | iex
28
43
  | Problem | wmux |
29
44
  |---------|------|
30
45
  | Windows has no cmux | Native Windows terminal multiplexer for AI agents |
31
- | Agents can't see the browser | Built-in browser with MCP — Claude clicks, fills, evaluates JS |
46
+ | Agents can't control the browser | Built-in browser with CDP — Claude clicks, fills, types, screenshots |
32
47
  | "Is it done yet?" | Smart activity-based notifications + taskbar flash |
33
48
  | Can't compare agents | Multiview — Ctrl+click workspaces to view side by side |
34
49
  | Hard to describe UI elements to LLM | Inspector — click any element, LLM-friendly context copied |
@@ -51,22 +66,19 @@ irm https://raw.githubusercontent.com/openwong2kim/wmux/main/install.ps1 | iex
51
66
  - Sidebar with drag-and-drop reordering
52
67
  - `Ctrl+1` ~ `Ctrl+9` quick switch
53
68
  - **Multiview** — `Ctrl+click` workspaces to split-view them simultaneously
54
- - `Ctrl+Shift+G` to exit multiview
55
69
  - **Session persistence** — workspace layout, tabs, cwd, and terminal scrollback all restored on restart
70
+ - **One-click reset** — Settings > General > Reset to clean all workspaces
56
71
 
57
- ### Browser
72
+ ### Browser + CDP Automation
58
73
  - Built-in browser panel — `Ctrl+Shift+L`
59
74
  - Navigation bar, DevTools, back/forward
60
- - **Element Inspector** — magnifying glass button to inspect elements
61
- - Hover to highlight, click to copy LLM-friendly context:
62
- ```
63
- [Inspector] Google (https://www.google.com/)
64
- selector: input.gLFyf
65
- <input type="text" name="q" aria-label="Search">
66
- text: ""
67
- parent: div.RNNXgb > siblings: button"Google Search", button"I'm Feeling Lucky"
68
- ```
69
- - Paste directly into Claude — it understands the element immediately
75
+ - **Element Inspector** — hover to highlight, click to copy LLM-friendly context
76
+ - **Full CDP automation via MCP:**
77
+ - Click elements by ref or CSS selector
78
+ - Fill forms with real keyboard input (handles React, CJK)
79
+ - Take screenshots via CDP `Page.captureScreenshot`
80
+ - Evaluate JavaScript with user gesture context
81
+ - Navigate, go back, press keys
70
82
 
71
83
  ### Notifications
72
84
  - **Activity-based detection** — monitors output throughput, no fragile pattern matching
@@ -83,10 +95,16 @@ wmux automatically registers its MCP server when launched. Claude Code can:
83
95
  |------|-------------|
84
96
  | `browser_open` | Open a new browser panel |
85
97
  | `browser_navigate` | Go to URL |
86
- | `browser_snapshot` | Get full page HTML |
87
- | `browser_click` | Click element by CSS selector |
88
- | `browser_fill` | Fill input field |
89
- | `browser_eval` | Execute JavaScript |
98
+ | `browser_screenshot` | Capture page as PNG (CDP) |
99
+ | `browser_snapshot` | Get page structure with interactive element refs |
100
+ | `browser_click` | Click element by ref number |
101
+ | `browser_fill` | Fill form fields by ref |
102
+ | `browser_type` | Type text into element (CDP keyboard input) |
103
+ | `browser_press_key` | Press keyboard key (Enter, Tab, etc.) |
104
+ | `browser_evaluate` | Execute JavaScript in page context |
105
+ | `browser_hover` | Hover over element |
106
+ | `browser_select` | Select dropdown options |
107
+ | `browser_scroll_into_view` | Scroll element into viewport |
90
108
  | `terminal_read` | Read terminal screen |
91
109
  | `terminal_send` | Send text to terminal |
92
110
  | `terminal_send_key` | Send key (enter, ctrl+c, etc.) |
@@ -96,12 +114,26 @@ wmux automatically registers its MCP server when launched. Claude Code can:
96
114
 
97
115
  **Multi-agent:** All browser tools accept `surfaceId` — each Claude Code session controls its own browser independently.
98
116
 
117
+ ### Security
118
+ - **Token authentication** on all IPC pipes (named pipe + session pipes)
119
+ - **SSRF protection** — URL validation blocks private IPs, file://, javascript: schemes
120
+ - **Input sanitization** — PTY command injection prevention
121
+ - **CDP port randomization** — no fixed debug port
122
+ - **Memory pressure watchdog** — auto-reaps dead sessions at 750MB, blocks new at 1GB
123
+ - **Electron Fuses** — RunAsNode disabled, cookie encryption enabled
124
+
99
125
  ### Agent Status Detection
100
126
  Gate-based detection for AI coding agents:
101
127
  - Claude Code, Cursor, Aider, Codex CLI, Gemini CLI, OpenCode, GitHub Copilot CLI
102
- - Detects agent startup activates monitoring
128
+ - Detects agent startup, monitors activity
103
129
  - Critical action warnings (git push --force, rm -rf, DROP TABLE, etc.)
104
130
 
131
+ ### Daemon Process
132
+ - Background session management (survives app restart)
133
+ - Suspend/resume with scrollback buffer dump
134
+ - Auto-recovery of sessions on daemon restart
135
+ - Dead session TTL reaping (24h default)
136
+
105
137
  ### Themes
106
138
  Catppuccin, Tokyo Night, Dracula, Nord, Gruvbox, Solarized, One Dark, and more.
107
139
 
@@ -177,36 +209,43 @@ Electron Main Process
177
209
  ├── AgentDetector (gate-based agent status)
178
210
  ├── SessionManager (atomic save with .bak recovery)
179
211
  ├── ScrollbackPersistence (dump/load terminal buffers)
180
- ├── PipeServer (Named Pipe JSON-RPC)
212
+ ├── PipeServer (Named Pipe JSON-RPC + token auth)
181
213
  ├── McpRegistrar (auto-registers MCP in ~/.claude.json)
214
+ ├── WebviewCdpManager (CDP proxy to <webview> via debugger)
182
215
  ├── DaemonClient (optional daemon mode connector)
183
216
  └── ToastManager (OS notifications + taskbar flash)
184
217
 
185
218
  Renderer Process (React 19 + Zustand)
186
219
  ├── PaneContainer (recursive split layout)
187
220
  ├── Terminal (xterm.js + WebGL + scrollback restore)
188
- ├── BrowserPanel (webview + Inspector)
221
+ ├── BrowserPanel (webview + Inspector + CDP)
189
222
  ├── NotificationPanel
223
+ ├── SettingsPanel (workspace reset)
190
224
  └── Multiview grid
191
225
 
192
226
  Daemon Process (optional, standalone)
193
227
  ├── DaemonSessionManager (ConPTY lifecycle)
194
228
  ├── RingBuffer (circular scrollback buffer)
195
229
  ├── StateWriter (session suspend/resume)
196
- └── DaemonPipeServer (Named Pipe RPC)
230
+ ├── ProcessMonitor (external process watchdog)
231
+ ├── Watchdog (memory pressure escalation)
232
+ └── DaemonPipeServer (Named Pipe RPC + token auth)
197
233
 
198
234
  MCP Server (stdio)
199
- └── Bridges Claude Code ↔ wmux via Named Pipe RPC
235
+ ├── PlaywrightEngine (CDP connection, fast-fail)
236
+ ├── CDP RPC fallback (browser.screenshot, browser.evaluate, etc.)
237
+ └── Bridges Claude Code <-> wmux via Named Pipe RPC
200
238
  ```
201
239
 
202
240
  ---
203
241
 
204
242
  ## Acknowledgments
205
243
 
206
- - [cmux](https://github.com/manaflow-ai/cmux) — The macOS AI agent terminal that inspired wmux. Same philosophy: primitives over prescriptive workflows.
244
+ - [cmux](https://github.com/manaflow-ai/cmux) — The macOS AI agent terminal that inspired wmux
207
245
  - [xterm.js](https://xtermjs.org/) — Terminal rendering
208
246
  - [node-pty](https://github.com/microsoft/node-pty) — Pseudo-terminal
209
247
  - [Electron](https://www.electronjs.org/) — Desktop framework
248
+ - [Playwright](https://playwright.dev/) — Browser automation engine
210
249
 
211
250
  ---
212
251
 
@@ -34,6 +34,12 @@ exports.ALL_RPC_METHODS = [
34
34
  'browser.type.humanlike',
35
35
  'browser.cdp.target',
36
36
  'browser.cdp.info',
37
+ 'browser.cdp.send',
38
+ 'browser.screenshot',
39
+ 'browser.evaluate',
40
+ 'browser.type.cdp',
41
+ 'browser.click.cdp',
42
+ 'browser.press.cdp',
37
43
  'daemon.createSession',
38
44
  'daemon.destroySession',
39
45
  'daemon.attachSession',
@@ -7,19 +7,22 @@ exports.validateMessage = validateMessage;
7
7
  exports.createSurface = createSurface;
8
8
  exports.createLeafPane = createLeafPane;
9
9
  exports.createWorkspace = createWorkspace;
10
+ exports.validateNavigationUrl = validateNavigationUrl;
10
11
  // === Utility: generate unique IDs ===
11
12
  function generateId(prefix) {
12
13
  return `${prefix}-${crypto.randomUUID()}`;
13
14
  }
14
15
  // === Security: sanitize text before PTY write ===
15
16
  /**
16
- * Strips control characters (\r, \n, \x00-\x1f except \t) from text
17
- * that will be written to a PTY, preventing embedded command injection.
17
+ * Strips dangerous control characters from text before writing to a PTY.
18
+ * Removes: NULL byte (\x00) and C1 control characters (\x80-\x9f).
19
+ * Preserves: CR (\r), LF (\n), Tab (\t), ESC sequences (\x1b[...),
20
+ * and other standard terminal control characters needed for normal operation.
18
21
  */
19
22
  function sanitizePtyText(text) {
20
- // Remove all control chars except tab (\x09)
23
+ // Remove NULL byte and C1 control characters (U+0080–U+009F)
21
24
  // eslint-disable-next-line no-control-regex
22
- return text.replace(/[\x00-\x08\x0a-\x1f\x7f\u0080-\u009f]/g, '');
25
+ return text.replace(/[\x00\u0080-\u009f]/g, '');
23
26
  }
24
27
  /**
25
28
  * Validates and clamps a user-supplied name string.
@@ -77,3 +80,104 @@ function createWorkspace(name) {
77
80
  activePaneId: rootPane.id,
78
81
  };
79
82
  }
83
+ // === Security: URL validation for SSRF prevention ===
84
+ /**
85
+ * Validates a URL for safe navigation. Blocks dangerous schemes and private
86
+ * network addresses to prevent SSRF attacks from AI agent-driven browsing.
87
+ *
88
+ * Allows localhost/127.0.0.1/[::1] for local development servers.
89
+ *
90
+ * NOTE (v1 limitation): This is string-based validation only. DNS-resolved IPs
91
+ * are not checked, so DNS rebinding attacks are not mitigated. A future version
92
+ * should resolve hostnames and re-validate the resolved IP.
93
+ */
94
+ function validateNavigationUrl(url) {
95
+ let parsed;
96
+ try {
97
+ parsed = new URL(url);
98
+ }
99
+ catch {
100
+ return { valid: false, reason: 'Invalid URL' };
101
+ }
102
+ // Only allow http and https schemes
103
+ const scheme = parsed.protocol.toLowerCase();
104
+ if (scheme !== 'http:' && scheme !== 'https:') {
105
+ return { valid: false, reason: `Blocked URL scheme: ${scheme}` };
106
+ }
107
+ // Extract hostname (strip brackets from IPv6)
108
+ const hostname = parsed.hostname.toLowerCase();
109
+ // Allow localhost and IPv4/IPv6 loopback
110
+ if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1') {
111
+ return { valid: true };
112
+ }
113
+ // Block IPv6 private/link-local ranges
114
+ if (hostname.startsWith('[') || hostname.includes(':')) {
115
+ // Hostname is an IPv6 address (URL parser strips brackets in .hostname)
116
+ const addr = hostname;
117
+ // Block fc00::/7 (unique local) — starts with fc or fd
118
+ if (addr.startsWith('fc') || addr.startsWith('fd')) {
119
+ return { valid: false, reason: 'Blocked private IPv6 address (fc00::/7)' };
120
+ }
121
+ // Block fe80::/10 (link-local) — starts with fe8, fe9, fea, feb
122
+ if (/^fe[89ab]/.test(addr)) {
123
+ return { valid: false, reason: 'Blocked link-local IPv6 address (fe80::/10)' };
124
+ }
125
+ // ::1 already allowed above; block any other loopback representation
126
+ // Normalize: collapse :: and check
127
+ if (addr === '0:0:0:0:0:0:0:1' || addr === '0000:0000:0000:0000:0000:0000:0000:0001') {
128
+ return { valid: true };
129
+ }
130
+ // Block null IPv6 address (:: or 0:0:0:0:0:0:0:0) — equivalent to 0.0.0.0
131
+ if (addr === '::' || addr === '0:0:0:0:0:0:0:0' || addr === '0000:0000:0000:0000:0000:0000:0000:0000') {
132
+ return { valid: false, reason: 'Blocked null IPv6 address (equivalent to 0.0.0.0)' };
133
+ }
134
+ // Block IPv4-mapped IPv6 (::ffff:x.x.x.x) and IPv4-compatible IPv6 (::x.x.x.x)
135
+ // These resolve to their embedded IPv4 address, bypassing IPv4 private IP checks.
136
+ const v4MappedMatch = /^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/.exec(addr);
137
+ const v4CompatMatch = !v4MappedMatch ? /^::(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/.exec(addr) : null;
138
+ const embeddedV4 = v4MappedMatch?.[1] ?? v4CompatMatch?.[1];
139
+ if (embeddedV4) {
140
+ // Recursively validate the embedded IPv4 through the same checks
141
+ const embeddedResult = validateNavigationUrl(`http://${embeddedV4}/`);
142
+ if (!embeddedResult.valid) {
143
+ return { valid: false, reason: `Blocked IPv4-mapped/compatible IPv6: embedded ${embeddedV4} — ${embeddedResult.reason}` };
144
+ }
145
+ }
146
+ return { valid: true };
147
+ }
148
+ // Check for IPv4 addresses
149
+ const ipv4Match = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(hostname);
150
+ if (ipv4Match) {
151
+ const octets = [
152
+ parseInt(ipv4Match[1], 10),
153
+ parseInt(ipv4Match[2], 10),
154
+ parseInt(ipv4Match[3], 10),
155
+ parseInt(ipv4Match[4], 10),
156
+ ];
157
+ // 127.0.0.1 already allowed above; block other 127.x.x.x
158
+ if (octets[0] === 127) {
159
+ return { valid: false, reason: 'Blocked loopback address' };
160
+ }
161
+ // Block 10.0.0.0/8
162
+ if (octets[0] === 10) {
163
+ return { valid: false, reason: 'Blocked private IP address (10.0.0.0/8)' };
164
+ }
165
+ // Block 172.16.0.0/12 (172.16.x.x – 172.31.x.x)
166
+ if (octets[0] === 172 && octets[1] >= 16 && octets[1] <= 31) {
167
+ return { valid: false, reason: 'Blocked private IP address (172.16.0.0/12)' };
168
+ }
169
+ // Block 192.168.0.0/16
170
+ if (octets[0] === 192 && octets[1] === 168) {
171
+ return { valid: false, reason: 'Blocked private IP address (192.168.0.0/16)' };
172
+ }
173
+ // Block 169.254.0.0/16 (link-local, includes cloud metadata 169.254.169.254)
174
+ if (octets[0] === 169 && octets[1] === 254) {
175
+ return { valid: false, reason: 'Blocked link-local/cloud metadata address (169.254.0.0/16)' };
176
+ }
177
+ // Block 0.0.0.0
178
+ if (octets.every((o) => o === 0)) {
179
+ return { valid: false, reason: 'Blocked null address (0.0.0.0)' };
180
+ }
181
+ }
182
+ return { valid: true };
183
+ }