@different-ai/opencode-browser 4.3.2 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,9 +21,15 @@ metadata:
21
21
  3. Navigate with `browser_navigate` if needed
22
22
  4. Wait for UI using `browser_query` with `timeoutMs`
23
23
  5. Discover candidates using `browser_query` with `mode=list`
24
- 6. Click or type using `index`
24
+ 6. Click, type, or select using `index`
25
25
  7. Confirm using `browser_query` or `browser_snapshot`
26
26
 
27
+ ## Selecting options
28
+
29
+ - Use `browser_select` for native `<select>` elements
30
+ - Prefer `value` or `label`; use `optionIndex` when needed
31
+ - Example: `browser_select({ selector: "select", value: "plugin" })`
32
+
27
33
  ## Query modes
28
34
 
29
35
  - `text`: read visible text from a matched element
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # OpenCode Browser
2
2
 
3
- Browser automation plugin for [OpenCode](https://github.com/opencode-ai/opencode).
3
+ Browser automation plugin for [OpenCode](https://opencode.ai).
4
4
 
5
5
  Control your real Chromium browser (Chrome/Brave/Arc/Edge) using your existing profile (logins, cookies, bookmarks). No DevTools Protocol, no security prompts.
6
6
 
@@ -34,11 +34,15 @@ The installer will:
34
34
 
35
35
  1. Copy the extension to `~/.opencode-browser/extension/`
36
36
  2. Walk you through loading + pinning it in `chrome://extensions`
37
- 3. Ask for the extension ID and install a **Native Messaging Host manifest**
37
+ 3. Resolve a fixed extension ID (no copy/paste) and install a **Native Messaging Host manifest**
38
38
  4. Update your `opencode.json` or `opencode.jsonc` to load the plugin
39
39
 
40
+ To override the extension ID, pass `--extension-id <id>` or set `OPENCODE_BROWSER_EXTENSION_ID`.
41
+
40
42
  ### Configure OpenCode
41
43
 
44
+ > Note: if you run the installer you'll be prompted to include this automatically. If you said "yes", you can skip this part.
45
+
42
46
  Your `opencode.json` or `opencode.jsonc` should contain:
43
47
 
44
48
  ```json
@@ -48,6 +52,12 @@ Your `opencode.json` or `opencode.jsonc` should contain:
48
52
  }
49
53
  ```
50
54
 
55
+ ### Update
56
+
57
+ ```bash
58
+ bunx @different-ai/opencode-browser@latest update
59
+ ```
60
+
51
61
  ## How it works
52
62
 
53
63
  ```
@@ -58,26 +68,80 @@ OpenCode Plugin <-> Local Broker (unix socket) <-> Native Host <-> Chrome Extens
58
68
  - The plugin talks to the broker over a local unix socket.
59
69
  - The broker forwards tool requests to the extension and enforces tab ownership.
60
70
 
71
+ ## Agent Browser mode (alpha)
72
+
73
+ This branch adds an alternate backend powered by `agent-browser` (Playwright). It runs headless and does **not** reuse your existing Chrome profile.
74
+
75
+ ### Enable locally
76
+
77
+ 1. Install `agent-browser` and Chromium:
78
+
79
+ ```bash
80
+ npm install -g agent-browser
81
+ agent-browser install
82
+ ```
83
+
84
+ 2. Set the backend mode:
85
+
86
+ ```bash
87
+ export OPENCODE_BROWSER_BACKEND=agent
88
+ ```
89
+
90
+ Optional overrides:
91
+ - `OPENCODE_BROWSER_AGENT_SESSION` (custom session name)
92
+ - `OPENCODE_BROWSER_AGENT_SOCKET` (unix socket path)
93
+ - `OPENCODE_BROWSER_AGENT_AUTOSTART=0` (disable auto-start)
94
+ - `OPENCODE_BROWSER_AGENT_DAEMON` (explicit daemon path)
95
+
96
+ ### Tailnet/remote host
97
+
98
+ On the host (e.g., `home-server.taild435d7.ts.net`), run the TCP gateway:
99
+
100
+ ```bash
101
+ OPENCODE_BROWSER_AGENT_GATEWAY_PORT=9833 node bin/agent-gateway.cjs
102
+ ```
103
+
104
+ On the client:
105
+
106
+ ```bash
107
+ export OPENCODE_BROWSER_BACKEND=agent
108
+ export OPENCODE_BROWSER_AGENT_HOST=home-server.taild435d7.ts.net
109
+ export OPENCODE_BROWSER_AGENT_PORT=9833
110
+ ```
111
+
61
112
  ## Per-tab ownership
62
113
 
63
114
  - First time a session touches a tab, the broker **auto-claims** it for that session.
64
- - Other sessions attempting to use the same tab will get an error.
65
- - Use `browser_status` to inspect claims if needed.
115
+ - Each session tracks a default tab; tools without `tabId` route to it.
116
+ - `browser_open_tab` always works; if another session owns the active tab, the new tab opens in the background.
117
+ - Claims expire after inactivity (`OPENCODE_BROWSER_CLAIM_TTL_MS`, default 5 minutes).
118
+ - Use `browser_status` or `browser_list_claims` to inspect claims if needed.
66
119
 
67
120
  ## Available tools
68
121
 
69
122
  Core primitives:
70
123
  - `browser_status`
71
124
  - `browser_get_tabs`
125
+ - `browser_list_claims`
126
+ - `browser_claim_tab`
127
+ - `browser_release_tab`
72
128
  - `browser_open_tab`
73
129
  - `browser_navigate`
74
130
  - `browser_query` (modes: `text`, `value`, `list`, `exists`, `page_text`; optional `timeoutMs`/`pollMs`)
75
- - `browser_click`
76
- - `browser_type`
77
- - `browser_select`
78
- - `browser_scroll`
131
+ - `browser_click` (optional `timeoutMs`/`pollMs`)
132
+ - `browser_type` (optional `timeoutMs`/`pollMs`)
133
+ - `browser_select` (optional `timeoutMs`/`pollMs`)
134
+ - `browser_scroll` (optional `timeoutMs`/`pollMs`)
79
135
  - `browser_wait`
80
136
 
137
+ Selector helpers (usable in `selector`):
138
+ - `label:Mailing Address: City`
139
+ - `aria:Principal Address: City`
140
+ - `placeholder:Search`, `name:email`, `role:button`, `text:Submit`
141
+ - `css:label:has(input)` to force CSS
142
+
143
+ Selector-based tools wait up to 2000ms by default; set `timeoutMs: 0` to disable.
144
+
81
145
  Diagnostics:
82
146
  - `browser_snapshot`
83
147
  - `browser_screenshot`
@@ -95,11 +159,11 @@ Diagnostics:
95
159
 
96
160
  **Extension says native host not available**
97
161
  - Re-run `npx @different-ai/opencode-browser install`
98
- - Confirm the extension ID you pasted matches the loaded extension in `chrome://extensions`
162
+ - If you loaded a custom extension ID, rerun with `--extension-id <id>`
99
163
 
100
164
  **Tab ownership errors**
101
- - Use `browser_status` to see current claims
102
- - Close the other OpenCode session to release ownership
165
+ - Use `browser_status` or `browser_list_claims` to see current claims
166
+ - Use `browser_release_tab` or close the other OpenCode session to release ownership
103
167
 
104
168
  ## Uninstall
105
169
 
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+
4
+ const net = require("net");
5
+ const os = require("os");
6
+ const path = require("path");
7
+ const { spawn } = require("child_process");
8
+
9
+ const session =
10
+ (process.env.OPENCODE_BROWSER_AGENT_SESSION || process.env.AGENT_BROWSER_SESSION || "default").trim();
11
+ const socketPath =
12
+ process.env.OPENCODE_BROWSER_AGENT_SOCKET || path.join(os.tmpdir(), `agent-browser-${session}.sock`);
13
+
14
+ function getPortForSession(name) {
15
+ let hash = 0;
16
+ for (let i = 0; i < name.length; i++) {
17
+ hash = (hash << 5) - hash + name.charCodeAt(i);
18
+ hash |= 0;
19
+ }
20
+ return 49152 + (Math.abs(hash) % 16383);
21
+ }
22
+
23
+ const host = process.env.OPENCODE_BROWSER_AGENT_GATEWAY_HOST || process.env.OPENCODE_BROWSER_AGENT_HOST || "0.0.0.0";
24
+ const port =
25
+ Number(process.env.OPENCODE_BROWSER_AGENT_GATEWAY_PORT || process.env.OPENCODE_BROWSER_AGENT_PORT) ||
26
+ getPortForSession(session);
27
+
28
+ function resolveDaemonPath() {
29
+ const override = process.env.OPENCODE_BROWSER_AGENT_DAEMON;
30
+ if (override) return override;
31
+ try {
32
+ return require.resolve("agent-browser/dist/daemon.js");
33
+ } catch {
34
+ return null;
35
+ }
36
+ }
37
+
38
+ function shouldAutoStart() {
39
+ const autoStart = (process.env.OPENCODE_BROWSER_AGENT_AUTOSTART || "").toLowerCase();
40
+ return !["0", "false", "no"].includes(autoStart);
41
+ }
42
+
43
+ function startDaemon() {
44
+ if (!shouldAutoStart()) return;
45
+ const daemonPath = resolveDaemonPath();
46
+ if (!daemonPath) {
47
+ console.error("[agent-gateway] agent-browser dependency not found.");
48
+ return;
49
+ }
50
+ try {
51
+ const child = spawn(process.execPath, [daemonPath], {
52
+ detached: true,
53
+ stdio: "ignore",
54
+ env: {
55
+ ...process.env,
56
+ AGENT_BROWSER_SESSION: session,
57
+ AGENT_BROWSER_DAEMON: "1",
58
+ },
59
+ });
60
+ child.unref();
61
+ } catch (err) {
62
+ console.error("[agent-gateway] Failed to start daemon:", err?.message || err);
63
+ }
64
+ }
65
+
66
+ async function sleep(ms) {
67
+ return await new Promise((resolve) => setTimeout(resolve, ms));
68
+ }
69
+
70
+ async function connectAgentSocket() {
71
+ return await new Promise((resolve, reject) => {
72
+ const socket = net.createConnection(socketPath);
73
+ socket.once("connect", () => resolve(socket));
74
+ socket.once("error", (err) => reject(err));
75
+ });
76
+ }
77
+
78
+ async function createAgentConnection() {
79
+ try {
80
+ return await connectAgentSocket();
81
+ } catch {
82
+ startDaemon();
83
+ for (let attempt = 0; attempt < 20; attempt++) {
84
+ await sleep(100);
85
+ try {
86
+ return await connectAgentSocket();
87
+ } catch {}
88
+ }
89
+ throw new Error(`Could not connect to agent-browser socket at ${socketPath}`);
90
+ }
91
+ }
92
+
93
+ const server = net.createServer(async (client) => {
94
+ let upstream = null;
95
+ try {
96
+ upstream = await createAgentConnection();
97
+ } catch (err) {
98
+ client.end();
99
+ console.error("[agent-gateway] Connection failed:", err?.message || err);
100
+ return;
101
+ }
102
+
103
+ client.pipe(upstream);
104
+ upstream.pipe(client);
105
+
106
+ const close = () => {
107
+ try {
108
+ client.destroy();
109
+ } catch {}
110
+ try {
111
+ upstream.destroy();
112
+ } catch {}
113
+ };
114
+
115
+ client.on("error", close);
116
+ upstream.on("error", close);
117
+ client.on("close", close);
118
+ upstream.on("close", close);
119
+ });
120
+
121
+ server.on("error", (err) => {
122
+ console.error("[agent-gateway] Server error:", err?.message || err);
123
+ process.exit(1);
124
+ });
125
+
126
+ server.listen(port, host, () => {
127
+ console.log(`[agent-gateway] Listening on ${host}:${port}`);
128
+ console.log(`[agent-gateway] Proxying to ${socketPath}`);
129
+ });
package/bin/broker.cjs CHANGED
@@ -11,6 +11,20 @@ const SOCKET_PATH = path.join(BASE_DIR, "broker.sock");
11
11
 
12
12
  fs.mkdirSync(BASE_DIR, { recursive: true });
13
13
 
14
+ const DEFAULT_LEASE_TTL_MS = 5 * 60 * 1000;
15
+ const LEASE_TTL_MS = (() => {
16
+ const raw = process.env.OPENCODE_BROWSER_CLAIM_TTL_MS;
17
+ const value = Number(raw);
18
+ if (Number.isFinite(value) && value >= 0) return value;
19
+ return DEFAULT_LEASE_TTL_MS;
20
+ })();
21
+ const LEASE_SWEEP_MS =
22
+ LEASE_TTL_MS > 0 ? Math.min(Math.max(10000, Math.floor(LEASE_TTL_MS / 2)), 60000) : 0;
23
+
24
+ function nowMs() {
25
+ return Date.now();
26
+ }
27
+
14
28
  function nowIso() {
15
29
  return new Date().toISOString();
16
30
  }
@@ -39,7 +53,7 @@ function writeJsonLine(socket, msg) {
39
53
  }
40
54
 
41
55
  function wantsTab(toolName) {
42
- return !["get_tabs", "get_active_tab"].includes(toolName);
56
+ return !["get_tabs", "get_active_tab", "open_tab"].includes(toolName);
43
57
  }
44
58
 
45
59
  // --- State ---
@@ -49,22 +63,78 @@ const extPending = new Map(); // extId -> { pluginSocket, pluginRequestId, sessi
49
63
 
50
64
  const clients = new Set();
51
65
 
52
- // Tab ownership: tabId -> { sessionId, claimedAt }
66
+ // Tab ownership: tabId -> { sessionId, claimedAt, lastSeenAt }
53
67
  const claims = new Map();
68
+ // Session state: sessionId -> { defaultTabId, lastSeenAt }
69
+ const sessionState = new Map();
54
70
 
55
71
  function listClaims() {
56
72
  const out = [];
57
73
  for (const [tabId, info] of claims.entries()) {
58
- out.push({ tabId, ...info });
74
+ out.push({
75
+ tabId,
76
+ sessionId: info.sessionId,
77
+ claimedAt: info.claimedAt,
78
+ lastSeenAt: new Date(info.lastSeenAt).toISOString(),
79
+ });
59
80
  }
60
81
  out.sort((a, b) => a.tabId - b.tabId);
61
82
  return out;
62
83
  }
63
84
 
85
+ function sessionHasClaims(sessionId) {
86
+ for (const info of claims.values()) {
87
+ if (info.sessionId === sessionId) return true;
88
+ }
89
+ return false;
90
+ }
91
+
92
+ function getSessionState(sessionId) {
93
+ if (!sessionId) return null;
94
+ let state = sessionState.get(sessionId);
95
+ if (!state) {
96
+ state = { defaultTabId: null, lastSeenAt: nowMs() };
97
+ sessionState.set(sessionId, state);
98
+ }
99
+ return state;
100
+ }
101
+
102
+ function touchSession(sessionId) {
103
+ const state = getSessionState(sessionId);
104
+ if (!state) return null;
105
+ state.lastSeenAt = nowMs();
106
+ return state;
107
+ }
108
+
109
+ function setDefaultTab(sessionId, tabId) {
110
+ const state = getSessionState(sessionId);
111
+ if (!state) return;
112
+ state.defaultTabId = tabId;
113
+ state.lastSeenAt = nowMs();
114
+ }
115
+
116
+ function clearDefaultTab(sessionId, tabId) {
117
+ const state = sessionState.get(sessionId);
118
+ if (!state) return;
119
+ if (tabId === undefined || state.defaultTabId === tabId) {
120
+ state.defaultTabId = null;
121
+ }
122
+ state.lastSeenAt = nowMs();
123
+ }
124
+
125
+ function releaseClaim(tabId) {
126
+ const info = claims.get(tabId);
127
+ if (!info) return;
128
+ claims.delete(tabId);
129
+ clearDefaultTab(info.sessionId, tabId);
130
+ }
131
+
64
132
  function releaseClaimsForSession(sessionId) {
65
133
  for (const [tabId, info] of claims.entries()) {
66
134
  if (info.sessionId === sessionId) claims.delete(tabId);
67
135
  }
136
+ clearDefaultTab(sessionId);
137
+ sessionState.delete(sessionId);
68
138
  }
69
139
 
70
140
  function checkClaim(tabId, sessionId) {
@@ -75,7 +145,37 @@ function checkClaim(tabId, sessionId) {
75
145
  }
76
146
 
77
147
  function setClaim(tabId, sessionId) {
78
- claims.set(tabId, { sessionId, claimedAt: nowIso() });
148
+ const existing = claims.get(tabId);
149
+ claims.set(tabId, {
150
+ sessionId,
151
+ claimedAt: existing ? existing.claimedAt : nowIso(),
152
+ lastSeenAt: nowMs(),
153
+ });
154
+ }
155
+
156
+ function touchClaim(tabId, sessionId) {
157
+ const existing = claims.get(tabId);
158
+ if (existing && existing.sessionId !== sessionId) return;
159
+ if (existing) {
160
+ existing.lastSeenAt = nowMs();
161
+ } else {
162
+ setClaim(tabId, sessionId);
163
+ }
164
+ }
165
+
166
+ function cleanupStaleClaims() {
167
+ if (!LEASE_TTL_MS) return;
168
+ const now = nowMs();
169
+ for (const [tabId, info] of claims.entries()) {
170
+ if (now - info.lastSeenAt > LEASE_TTL_MS) {
171
+ releaseClaim(tabId);
172
+ }
173
+ }
174
+ for (const [sessionId, state] of sessionState.entries()) {
175
+ if (!sessionHasClaims(sessionId) && now - state.lastSeenAt > LEASE_TTL_MS) {
176
+ sessionState.delete(sessionId);
177
+ }
178
+ }
79
179
  }
80
180
 
81
181
  function ensureHost() {
@@ -117,25 +217,47 @@ async function handleTool(pluginSocket, req) {
117
217
  const { tool, args = {}, sessionId } = req;
118
218
  if (!tool) throw new Error("Missing tool");
119
219
 
220
+ if (sessionId) touchSession(sessionId);
221
+
120
222
  let tabId = args.tabId;
223
+ const toolArgs = { ...args };
224
+
225
+ if (tool === "open_tab" && toolArgs.active !== false) {
226
+ const activeTabId = await resolveActiveTab(sessionId);
227
+ const claimCheck = checkClaim(activeTabId, sessionId);
228
+ if (!claimCheck.ok) {
229
+ toolArgs.active = false;
230
+ }
231
+ }
121
232
 
122
233
  if (wantsTab(tool)) {
123
234
  if (typeof tabId !== "number") {
124
- tabId = await resolveActiveTab(sessionId);
235
+ const state = getSessionState(sessionId);
236
+ const defaultTabId = state && Number.isFinite(state.defaultTabId) ? state.defaultTabId : null;
237
+ if (Number.isFinite(defaultTabId)) {
238
+ tabId = defaultTabId;
239
+ } else {
240
+ const activeTabId = await resolveActiveTab(sessionId);
241
+ const claimCheck = checkClaim(activeTabId, sessionId);
242
+ if (!claimCheck.ok) {
243
+ throw new Error(`${claimCheck.error}. No default tab for session; open a new tab or claim one.`);
244
+ }
245
+ tabId = activeTabId;
246
+ setDefaultTab(sessionId, tabId);
247
+ }
125
248
  }
126
249
 
127
250
  const claimCheck = checkClaim(tabId, sessionId);
128
251
  if (!claimCheck.ok) throw new Error(claimCheck.error);
129
252
  }
130
253
 
131
- const res = await callExtension(tool, { ...args, tabId }, sessionId);
254
+ const res = await callExtension(tool, { ...toolArgs, tabId }, sessionId);
132
255
 
133
256
  const usedTabId =
134
257
  res && typeof res.tabId === "number" ? res.tabId : typeof tabId === "number" ? tabId : undefined;
135
258
  if (typeof usedTabId === "number") {
136
- // Auto-claim on first touch
137
- const existing = claims.get(usedTabId);
138
- if (!existing) setClaim(usedTabId, sessionId);
259
+ touchClaim(usedTabId, sessionId);
260
+ setDefaultTab(sessionId, usedTabId);
139
261
  }
140
262
 
141
263
  return res;
@@ -145,6 +267,7 @@ function handleClientMessage(socket, client, msg) {
145
267
  if (msg && msg.type === "hello") {
146
268
  client.role = msg.role || "unknown";
147
269
  client.sessionId = msg.sessionId;
270
+ if (client.sessionId) touchSession(client.sessionId);
148
271
  if (client.role === "native-host") {
149
272
  host = { socket };
150
273
  // allow host to see current state
@@ -174,6 +297,7 @@ function handleClientMessage(socket, client, msg) {
174
297
  if (msg && msg.type === "request" && typeof msg.id === "number") {
175
298
  const requestId = msg.id;
176
299
  const sessionId = msg.sessionId || client.sessionId;
300
+ if (sessionId) touchSession(sessionId);
177
301
 
178
302
  const replyOk = (data) => writeJsonLine(socket, { type: "response", id: requestId, ok: true, data });
179
303
  const replyErr = (err) =>
@@ -182,7 +306,21 @@ function handleClientMessage(socket, client, msg) {
182
306
  (async () => {
183
307
  try {
184
308
  if (msg.op === "status") {
185
- replyOk({ broker: true, hostConnected: !!host && !!host.socket && !host.socket.destroyed, claims: listClaims() });
309
+ const state = sessionId ? sessionState.get(sessionId) : null;
310
+ const sessionInfo = state
311
+ ? {
312
+ sessionId,
313
+ defaultTabId: state.defaultTabId,
314
+ lastSeenAt: new Date(state.lastSeenAt).toISOString(),
315
+ }
316
+ : null;
317
+ replyOk({
318
+ broker: true,
319
+ hostConnected: !!host && !!host.socket && !host.socket.destroyed,
320
+ claims: listClaims(),
321
+ leaseTtlMs: LEASE_TTL_MS,
322
+ session: sessionInfo,
323
+ });
186
324
  return;
187
325
  }
188
326
 
@@ -199,7 +337,11 @@ function handleClientMessage(socket, client, msg) {
199
337
  if (existing && existing.sessionId !== sessionId && !force) {
200
338
  throw new Error(`Tab ${tabId} is owned by another OpenCode session (${existing.sessionId})`);
201
339
  }
340
+ if (existing && existing.sessionId !== sessionId && force) {
341
+ clearDefaultTab(existing.sessionId, tabId);
342
+ }
202
343
  setClaim(tabId, sessionId);
344
+ setDefaultTab(sessionId, tabId);
203
345
  replyOk({ ok: true, tabId, sessionId });
204
346
  return;
205
347
  }
@@ -215,7 +357,7 @@ function handleClientMessage(socket, client, msg) {
215
357
  if (existing.sessionId !== sessionId) {
216
358
  throw new Error(`Tab ${tabId} is owned by another OpenCode session (${existing.sessionId})`);
217
359
  }
218
- claims.delete(tabId);
360
+ releaseClaim(tabId);
219
361
  replyOk({ ok: true, tabId, released: true });
220
362
  return;
221
363
  }
@@ -287,4 +429,9 @@ function start() {
287
429
  });
288
430
  }
289
431
 
432
+ if (LEASE_TTL_MS > 0 && LEASE_SWEEP_MS > 0) {
433
+ const timer = setInterval(cleanupStaleClaims, LEASE_SWEEP_MS);
434
+ if (typeof timer.unref === "function") timer.unref();
435
+ }
436
+
290
437
  start();