@different-ai/opencode-browser 4.2.6 → 4.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,11 +17,12 @@ metadata:
17
17
  ## Best-practice workflow
18
18
 
19
19
  1. Inspect tabs with `browser_get_tabs`
20
- 2. Navigate with `browser_navigate` if needed
21
- 3. Wait for UI using `browser_query` with `timeoutMs`
22
- 4. Discover candidates using `browser_query` with `mode=list`
23
- 5. Click or type using `index`
24
- 6. Confirm using `browser_query` or `browser_snapshot`
20
+ 2. Open new tabs with `browser_open_tab` when needed
21
+ 3. Navigate with `browser_navigate` if needed
22
+ 4. Wait for UI using `browser_query` with `timeoutMs`
23
+ 5. Discover candidates using `browser_query` with `mode=list`
24
+ 6. Click or type using `index`
25
+ 7. Confirm using `browser_query` or `browser_snapshot`
25
26
 
26
27
  ## Query modes
27
28
 
@@ -31,6 +32,11 @@ metadata:
31
32
  - `exists`: check presence and count
32
33
  - `page_text`: extract visible page text
33
34
 
35
+ ## Opening tabs
36
+
37
+ - Use `browser_open_tab` to create a new tab, optionally with `url` and `active`
38
+ - Example: `browser_open_tab({ url: "https://example.com", active: false })`
39
+
34
40
  ## Troubleshooting
35
41
 
36
42
  - If a selector fails, run `browser_query` with `mode=page_text` to confirm the content exists
package/README.md CHANGED
@@ -4,20 +4,32 @@ Browser automation plugin for [OpenCode](https://github.com/opencode-ai/opencode
4
4
 
5
5
  Control your real Chromium browser (Chrome/Brave/Arc/Edge) using your existing profile (logins, cookies, bookmarks). No DevTools Protocol, no security prompts.
6
6
 
7
+
8
+ https://github.com/user-attachments/assets/1496b3b3-419b-436c-b412-8cda2fed83d6
9
+
10
+
7
11
  ## Why this architecture
8
12
 
9
13
  This version is optimized for reliability and predictable multi-session behavior:
10
-
14
+ - **No MCP** -> just opencode plugin
11
15
  - **No WebSocket port** → no port conflicts
12
16
  - **Chrome Native Messaging** between extension and a local host process
13
17
  - A local **broker** multiplexes multiple OpenCode plugin sessions and enforces **per-tab ownership**
14
18
 
15
19
  ## Installation
16
20
 
21
+ > Help me improve this!
22
+
17
23
  ```bash
18
- npx @different-ai/opencode-browser install
24
+ bunx @different-ai/opencode-browser@latest install
19
25
  ```
20
26
 
27
+
28
+ https://github.com/user-attachments/assets/d5767362-fbf3-4023-858b-90f06d9f0b25
29
+
30
+
31
+
32
+
21
33
  The installer will:
22
34
 
23
35
  1. Copy the extension to `~/.opencode-browser/extension/`
@@ -57,10 +69,12 @@ OpenCode Plugin <-> Local Broker (unix socket) <-> Native Host <-> Chrome Extens
57
69
  Core primitives:
58
70
  - `browser_status`
59
71
  - `browser_get_tabs`
72
+ - `browser_open_tab`
60
73
  - `browser_navigate`
61
74
  - `browser_query` (modes: `text`, `value`, `list`, `exists`, `page_text`; optional `timeoutMs`/`pollMs`)
62
75
  - `browser_click`
63
76
  - `browser_type`
77
+ - `browser_select`
64
78
  - `browser_scroll`
65
79
  - `browser_wait`
66
80
 
@@ -69,6 +83,14 @@ Diagnostics:
69
83
  - `browser_screenshot`
70
84
  - `browser_version`
71
85
 
86
+ ## Roadmap
87
+
88
+ - [ ] Add tab management tools (`browser_set_active_tab`, `browser_close_tab`)
89
+ - [ ] Add navigation helpers (`browser_back`, `browser_forward`, `browser_reload`)
90
+ - [ ] Add keyboard input tool (`browser_key`)
91
+ - [ ] Add download support (`browser_download`, `browser_list_downloads`)
92
+ - [ ] Add upload support (`browser_set_file_input`)
93
+
72
94
  ## Troubleshooting
73
95
 
74
96
  **Extension says native host not available**
package/dist/plugin.js CHANGED
@@ -12514,6 +12514,17 @@ var plugin = async (ctx) => {
12514
12514
  return toolResultText(data, "ok");
12515
12515
  }
12516
12516
  }),
12517
+ browser_open_tab: tool({
12518
+ description: "Open a new browser tab",
12519
+ args: {
12520
+ url: schema.string().optional(),
12521
+ active: schema.boolean().optional()
12522
+ },
12523
+ async execute({ url: url2, active }, ctx2) {
12524
+ const data = await brokerRequest("tool", { tool: "open_tab", args: { url: url2, active } });
12525
+ return toolResultText(data, "Opened new tab");
12526
+ }
12527
+ }),
12517
12528
  browser_navigate: tool({
12518
12529
  description: "Navigate to a URL in the browser",
12519
12530
  args: {
@@ -12551,6 +12562,25 @@ var plugin = async (ctx) => {
12551
12562
  return toolResultText(data, `Typed "${text}" into ${selector}`);
12552
12563
  }
12553
12564
  }),
12565
+ browser_select: tool({
12566
+ description: "Select an option in a native select element",
12567
+ args: {
12568
+ selector: schema.string(),
12569
+ value: schema.string().optional(),
12570
+ label: schema.string().optional(),
12571
+ optionIndex: schema.number().optional(),
12572
+ index: schema.number().optional(),
12573
+ tabId: schema.number().optional()
12574
+ },
12575
+ async execute({ selector, value, label, optionIndex, index, tabId }, ctx2) {
12576
+ const data = await brokerRequest("tool", {
12577
+ tool: "select",
12578
+ args: { selector, value, label, optionIndex, index, tabId }
12579
+ });
12580
+ const summary = value ?? label ?? (optionIndex != null ? String(optionIndex) : "option");
12581
+ return toolResultText(data, `Selected ${summary} in ${selector}`);
12582
+ }
12583
+ }),
12554
12584
  browser_screenshot: tool({
12555
12585
  description: "Take a screenshot of the current page. Returns base64 image data URL.",
12556
12586
  args: {
@@ -100,9 +100,11 @@ async function executeTool(toolName, args) {
100
100
  const tools = {
101
101
  get_active_tab: toolGetActiveTab,
102
102
  get_tabs: toolGetTabs,
103
+ open_tab: toolOpenTab,
103
104
  navigate: toolNavigate,
104
105
  click: toolClick,
105
106
  type: toolType,
107
+ select: toolSelect,
106
108
  screenshot: toolScreenshot,
107
109
  snapshot: toolSnapshot,
108
110
  query: toolQuery,
@@ -252,6 +254,12 @@ async function pageOps(command, args) {
252
254
  return false
253
255
  }
254
256
 
257
+ function setSelectValue(el, value) {
258
+ const setter = Object.getOwnPropertyDescriptor(window.HTMLSelectElement.prototype, "value")?.set
259
+ if (setter) setter.call(el, value)
260
+ else el.value = value
261
+ }
262
+
255
263
  function getInputValues() {
256
264
  const out = []
257
265
  const nodes = document.querySelectorAll("input, textarea")
@@ -381,6 +389,66 @@ async function pageOps(command, args) {
381
389
  return { ok: false, error: `Element is not typable: ${match.selectorUsed} (${tag.toLowerCase()})` }
382
390
  }
383
391
 
392
+ if (command === "select") {
393
+ const value = typeof options.value === "string" ? options.value : null
394
+ const label = typeof options.label === "string" ? options.label : null
395
+ const optionIndex = Number.isFinite(options.optionIndex) ? options.optionIndex : null
396
+ const match = resolveMatches(selectors, index)
397
+ if (!match.chosen) {
398
+ return { ok: false, error: `Element not found for selectors: ${selectors.join(", ")}` }
399
+ }
400
+
401
+ const tag = match.chosen.tagName
402
+ if (tag !== "SELECT") {
403
+ return { ok: false, error: `Element is not a select: ${match.selectorUsed} (${tag.toLowerCase()})` }
404
+ }
405
+
406
+ if (value === null && label === null && optionIndex === null) {
407
+ return { ok: false, error: "value, label, or optionIndex is required" }
408
+ }
409
+
410
+ const selectEl = match.chosen
411
+ const optionList = Array.from(selectEl.options || [])
412
+ let option = null
413
+
414
+ if (value !== null) {
415
+ option = optionList.find((opt) => opt.value === value)
416
+ }
417
+
418
+ if (!option && label !== null) {
419
+ const target = label.trim()
420
+ option = optionList.find((opt) => (opt.label || opt.textContent || "").trim() === target)
421
+ }
422
+
423
+ if (!option && optionIndex !== null) {
424
+ option = optionList[optionIndex]
425
+ }
426
+
427
+ if (!option) {
428
+ return { ok: false, error: "Option not found" }
429
+ }
430
+
431
+ try {
432
+ selectEl.scrollIntoView({ block: "center", inline: "center" })
433
+ } catch {}
434
+
435
+ try {
436
+ selectEl.focus()
437
+ } catch {}
438
+
439
+ setSelectValue(selectEl, option.value)
440
+ option.selected = true
441
+ selectEl.dispatchEvent(new Event("input", { bubbles: true }))
442
+ selectEl.dispatchEvent(new Event("change", { bubbles: true }))
443
+
444
+ return {
445
+ ok: true,
446
+ selectorUsed: match.selectorUsed,
447
+ value: selectEl.value,
448
+ label: (option.label || option.textContent || "").trim(),
449
+ }
450
+ }
451
+
384
452
  if (command === "scroll") {
385
453
  const scrollX = Number.isFinite(options.x) ? options.x : 0
386
454
  const scrollY = Number.isFinite(options.y) ? options.y : 0
@@ -487,6 +555,15 @@ async function toolGetActiveTab() {
487
555
  return { tabId: tab.id, content: { tabId: tab.id, url: tab.url, title: tab.title } }
488
556
  }
489
557
 
558
+ async function toolOpenTab({ url, active = true }) {
559
+ const createOptions = {}
560
+ if (typeof url === "string" && url.trim()) createOptions.url = url.trim()
561
+ if (typeof active === "boolean") createOptions.active = active
562
+
563
+ const tab = await chrome.tabs.create(createOptions)
564
+ return { tabId: tab.id, content: { tabId: tab.id, url: tab.url, active: tab.active } }
565
+ }
566
+
490
567
  async function toolNavigate({ url, tabId }) {
491
568
  if (!url) throw new Error("URL is required")
492
569
  const tab = await getTabById(tabId)
@@ -530,6 +607,22 @@ async function toolType({ selector, text, tabId, clear = false, index = 0 }) {
530
607
  return { tabId: tab.id, content: `Typed "${text}" into ${used}` }
531
608
  }
532
609
 
610
+ async function toolSelect({ selector, value, label, optionIndex, tabId, index = 0 }) {
611
+ if (!selector) throw new Error("Selector is required")
612
+ if (value === undefined && label === undefined && optionIndex === undefined) {
613
+ throw new Error("value, label, or optionIndex is required")
614
+ }
615
+ const tab = await getTabById(tabId)
616
+
617
+ const result = await runInPage(tab.id, "select", { selector, value, label, optionIndex, index })
618
+ if (!result?.ok) throw new Error(result?.error || "Select failed")
619
+ const used = result.selectorUsed || selector
620
+ const valueText = result.value ? String(result.value) : ""
621
+ const labelText = result.label ? String(result.label) : ""
622
+ const summary = labelText && valueText && labelText !== valueText ? `${labelText} (${valueText})` : labelText || valueText
623
+ return { tabId: tab.id, content: `Selected ${summary || "option"} in ${used}` }
624
+ }
625
+
533
626
  async function toolScreenshot({ tabId }) {
534
627
  const tab = await getTabById(tabId)
535
628
  const png = await chrome.tabs.captureVisibleTab(tab.windowId, { format: "png" })
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "manifest_version": 3,
3
3
  "name": "OpenCode Browser Automation",
4
- "version": "4.2.6",
4
+ "version": "4.3.0",
5
5
  "description": "Browser automation for OpenCode",
6
6
  "permissions": [
7
7
  "tabs",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@different-ai/opencode-browser",
3
- "version": "4.2.6",
3
+ "version": "4.3.1",
4
4
  "description": "Browser automation plugin for OpenCode (native messaging + per-tab ownership).",
5
5
  "type": "module",
6
6
  "bin": {