opencode-webfetch-plugin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,93 @@
1
+ # Opencode Google AI Search Plugin
2
+
3
+ An Opencode plugin that exposes a native tool (`google_ai_search_plus`) for querying Google AI Mode (aka Google SGE). It uses Playwright to load the AI panel directly and converts the full response into markdown with Turndown so the output renders just like the built-in `webfetch` tool.
4
+
5
+ ## Features
6
+
7
+ - Direct navigation to Google AI Mode with stealth browser headers.
8
+ - Waits for the progressive response to stabilise before extraction.
9
+ - Captures headings, lists, tables, and sources from the AI panel.
10
+ - Converts the response to markdown to avoid truncated tool output.
11
+ - Provides rich metadata (response time, source count, table presence) for the assistant model.
12
+
13
+ ## Installation
14
+
15
+ 1. Clone or download this repository.
16
+ 2. Install dependencies and build the plugin:
17
+
18
+ ```bash
19
+ bun install
20
+ bun run build
21
+ ```
22
+
23
+ > Note: If you encounter TypeScript compilation errors, you may need to fix quote escaping issues in the source code.
24
+
25
+ > Playwright is declared as a peer dependency. Install it (and Chromium) in the same project that will host the plugin:
26
+ >
27
+ > ```bash
28
+ > bun install
29
+ > npx playwright install chromium
30
+ > ```
31
+ >
32
+ > Note: Use `npx` instead of `bunx` if bunx is not available.
33
+
34
+ 3. Add the plugin to Opencode. You can either:
35
+
36
+ - Drop the built files into your project: copy the entire folder somewhere in your repo and add the relative path in `opencode.json`:
37
+
38
+ ```json
39
+ {
40
+ "plugin": [
41
+ "file:///absolute/path/to/google_ai_search/dist/index.js"
42
+ ]
43
+ }
44
+ ```
45
+
46
+ > Important: Use absolute paths starting with `file:///` instead of relative paths to avoid module resolution issues.
47
+
48
+ - Or publish this package to npm (e.g. `npm publish`) and reference it by name:
49
+
50
+ ```json
51
+ {
52
+ "plugin": [
53
+ "opencode-google-ai-search-plugin"
54
+ ]
55
+ }
56
+ ```
57
+
58
+ 4. Restart Opencode. The new tool (`google_ai_search_plus`) will appear in the tool list.
59
+
60
+ ## Usage
61
+
62
+ Once the plugin is loaded, call the tool from any Opencode session:
63
+
64
+ ```text
65
+ google_ai_search_plus "What is the difference between TypeScript and JavaScript?"
66
+ ```
67
+
68
+ Parameters:
69
+
70
+ | Name | Type | Description |
71
+ |----------|---------|-------------------------------------------------------------------|
72
+ | `query` | string | Question or topic to submit to Google AI Mode. |
73
+ | `timeout`| number | Optional timeout in seconds (default 30, max 120). |
74
+ | `followUp` | boolean | Treats the query as part of the same conversation (session reuse). |
75
+
76
+ The tool returns a markdown-formatted answer plus metadata about the response, including source count and whether a comparison table was detected.
77
+
78
+ ## Notes
79
+
80
+ - Google frequently throttles automated traffic. If you see timeout or “blocking” errors, wait a few minutes or reduce query frequency.
81
+ - The plugin stores no state; each call launches (or reuses) an isolated headless Chromium session via Playwright.
82
+ - Formatting mirrors the built-in `webfetch` tool so Opencode renders the full AI answer without summarising it.
83
+ - You can customise the tool ID by editing `src/index.ts` before publishing.
84
+
85
+ ## Development
86
+
87
+ - `bun run build` compiles TypeScript to the `dist/` folder (ESM output with type declarations).
88
+ - `bun run clean` removes the build artefacts.
89
+ - Update the version in `package.json` before publishing to npm.
90
+
91
+ ## License
92
+
93
+ MIT
package/bun.lock ADDED
@@ -0,0 +1,39 @@
1
+ {
2
+ "lockfileVersion": 1,
3
+ "configVersion": 1,
4
+ "workspaces": {
5
+ "": {
6
+ "name": "opencode-google-ai-search-plugin",
7
+ "dependencies": {
8
+ "turndown": "latest",
9
+ },
10
+ "devDependencies": {
11
+ "@opencode-ai/plugin": "latest",
12
+ "typescript": "latest",
13
+ },
14
+ "peerDependencies": {
15
+ "@opencode-ai/plugin": "latest",
16
+ "playwright": "latest",
17
+ },
18
+ },
19
+ },
20
+ "packages": {
21
+ "@mixmark-io/domino": ["@mixmark-io/domino@2.2.0", "", {}, "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="],
22
+
23
+ "@opencode-ai/plugin": ["@opencode-ai/plugin@1.2.10", "", { "dependencies": { "@opencode-ai/sdk": "1.2.10", "zod": "4.1.8" } }, "sha512-Z1BMqNHnD8AGAEb+kUz0b2SOuiODwdQLdCA4aVGTXqkGzhiD44OVxr85MeoJ5AMTnnea9SnJ3jp9GAQ5riXA5g=="],
24
+
25
+ "@opencode-ai/sdk": ["@opencode-ai/sdk@1.2.10", "", {}, "sha512-SyXcVqry2hitPVvQtvXOhqsWyFhSycG/+LTLYXrcq8AFmd9FR7dyBSDB3f5Ol6IPkYOegk8P2Eg2kKPNSNiKGw=="],
26
+
27
+ "fsevents": ["fsevents@2.3.2", "", { "os": "darwin" }, "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA=="],
28
+
29
+ "playwright": ["playwright@1.58.2", "", { "dependencies": { "playwright-core": "1.58.2" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A=="],
30
+
31
+ "playwright-core": ["playwright-core@1.58.2", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg=="],
32
+
33
+ "turndown": ["turndown@7.2.2", "", { "dependencies": { "@mixmark-io/domino": "^2.2.0" } }, "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ=="],
34
+
35
+ "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
36
+
37
+ "zod": ["zod@4.1.8", "", {}, "sha512-5R1P+WwQqmmMIEACyzSvo4JXHY5WiAFHRMg+zBZKgKS+Q1viRa0C1hmUKtHltoIFKtIdki3pRxkmpP74jnNYHQ=="],
38
+ }
39
+ }
package/omega.zip ADDED
Binary file
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "opencode-webfetch-plugin",
3
+ "version": "0.1.0",
4
+ "description": "An opencode plugin that exposes a webfetch tool capable of human-in-the-loop interaction for handling captchas and logins.",
5
+ "type": "module",
6
+ "module": "./src/index.ts",
7
+ "scripts": {},
8
+ "keywords": [
9
+ "opencode",
10
+ "plugin",
11
+ "google",
12
+ "ai",
13
+ "search"
14
+ ],
15
+ "author": "",
16
+ "license": "MIT",
17
+ "dependencies": {
18
+ "@mozilla/readability": "^0.6.0",
19
+ "jsdom": "^28.1.0",
20
+ "turndown": "^7.2.2"
21
+ },
22
+ "peerDependencies": {
23
+ "@opencode-ai/plugin": "latest",
24
+ "playwright": "latest"
25
+ },
26
+ "devDependencies": {
27
+ "@opencode-ai/plugin": "latest",
28
+ "@types/jsdom": "^28.0.0",
29
+ "@types/mozilla-readability": "^0.2.1",
30
+ "@types/node": "^25.3.0",
31
+ "@types/turndown": "^5.0.6",
32
+ "typescript": "latest"
33
+ }
34
+ }
package/plan.md ADDED
@@ -0,0 +1,45 @@
1
+ # 开发计划 (Development Plan)
2
+
3
+ 基于 `research.md` 的架构设计,针对支持人机协同的 `Opencode-Google-AI-Search-Plugin`(现更偏向通用网页读取与交互插件),制定以下分步开发计划:
4
+
5
+ ## 第一阶段:项目初始化与依赖配置 (Project Setup & Dependencies)
6
+ 1. **清理历史代码**:基于原有项目基础,移除特定于“仅限 Google 搜索”的无头代码,为新的单一入口工具做准备。
7
+ 2. **安装必要依赖**:
8
+ - 确保 `playwright` 依赖配置正确。
9
+ - 安装内容提取和格式化工具:`turndown`, `@mozilla/readability`(可能需要使用 `jsdom` 或在 Playwright 浏览器上下文中直接执行 Readability)。
10
+ - 安装对应的 TypeScript 类型定义文件。
11
+
12
+ ## 第二阶段:浏览器上下文与生命周期管理 (Browser Context Management)
13
+ 1. **实现 BrowserManager 类**:
14
+ - 管理单例的浏览器实例。
15
+ - **持久化配置**:使用 `playwright.chromium.launchPersistentContext`,并在项目根目录或指定系统临时目录创建一个 `.userdata` 文件夹,用于存储 Cookies 和 LocalStorage。
16
+ - **有头模式**:默认配置 `headless: false`,确保人类可以随时看到页面内容并进行干预。
17
+ - 处理浏览器的启动、页面标签页的创建以及插件关闭时的清理工作。
18
+
19
+ ## 第三阶段:人机协同挂起/恢复机制 (Human-in-the-Loop Mechanism)
20
+ 1. **终端交互模块**:利用 Node.js 原生的 `readline` 模块,封装一个 `askForHumanHelp(message)` 函数。该函数会阻塞当前 `Promise`,在终端输出提示信息,并等待用户按下回车键。
21
+ 2. **状态监测与拦截逻辑**:
22
+ - 页面加载 `goto` 时设置合理的超时时间。
23
+ - (可选/基础版)当 Agent 请求页面时,先尝试加载。如果加载超时,或者通过简单的 URL/DOM 探测发现类似 Cloudflare 的盾、登录墙等特征,主动触发 `askForHumanHelp`。
24
+ - (进阶版)考虑到启发式探测可能不完善,始终提供一个兜底机制:即使没检测到验证码,只要获取不到核心内容,就可以让用户决定是否介入。
25
+
26
+ ## 第四阶段:页面内容提取与降噪 (Content Extraction & Formatting)
27
+ 1. **核心提取逻辑**:在页面加载完成(或人类接管并确认完成后),注入 `@mozilla/readability` 脚本到页面中执行,或者提取 HTML 到 Node 端处理。Readability 能有效去除广告、侧边栏和导航。
28
+ 2. **Markdown 转换**:将 Readability 提取出的纯净 HTML片段传递给 `turndown`,转换为高质量的 Markdown 文本,极大降低 LLM Token 消耗。
29
+
30
+ ## 第五阶段:核心工具注册与整合 (Opencode Tool Registration)
31
+ 1. **重构 `src/index.ts`**:
32
+ - 清除原有针对 Google 搜索的多个特定工具。
33
+ - 仅注册一个核心工具:`webfetch`。
34
+ - **工具参数**:`url`(必填,目标网址)和 `query`(可选,如果不提供 URL 则作为搜索引擎的搜索词处理)。
35
+ 2. **串联流程**:
36
+ - Agent 调用 `webfetch` -> 检查/启动 BrowserManager -> 新建 Tab 打开 URL -> 检测是否需要人类协助 -> (如果需要)终端挂起并等待人类回车 -> 获取页面 DOM -> Readability + Turndown 转换 -> 返回 Markdown 结果给 Agent。
37
+
38
+ ## 第六阶段:测试与调优 (Testing & Refinement)
39
+ 1. 测试正常网页抓取(如 Wikipedia、GitHub 项目页)。
40
+ 2. 测试拦截验证网页(手动寻找一个带有 Cloudflare 验证或必须登录的页面,验证挂起机制和回车恢复机制)。
41
+ 3. 测试异常处理和容错机制(如用户强行关闭了浏览器窗口等情况的处理)。
42
+
43
+ ---
44
+
45
+ 请确认以上开发计划是否符合您的预期?如果确认无误,我们将进入第一阶段的编码工作。
package/research.md ADDED
@@ -0,0 +1,76 @@
1
+ # 针对 Opencode 人机协同交互式浏览器插件的技术调研
2
+
3
+ ## 背景与目标
4
+
5
+ 当前参考项目 `Opencode-Google-AI-Search-Plugin` 展示了如何利用 `@opencode-ai/plugin` 机制封装 Playwright 进行自动化 Google 搜索(无头模式 `headless: true`)。但这种方式面临一个严重问题:**当遇到反爬虫机制(如 Google 的 CAPTCHA)、需要登录才能访问的网页时,Agent 会直接失败(抛出异常)。**
6
+
7
+ 本调研旨在设计一个**支持人机协同、可持久化、高度可控**的 Opencode 浏览器控制插件。Agent(如 LLM)可以通过该插件调度浏览器进行信息检索和操作,同时在遇到拦截或需要授权时,允许人类接管浏览器完成验证或登录操作,随后交还控制权给 Agent。
8
+
9
+ ## 1. 核心技术选型
10
+
11
+ ### 1.1 浏览器引擎控制框架:Playwright
12
+ **推荐使用 Playwright 而非 Puppeteer**。
13
+ - **原因**:当前 Opencode 已有通过 `peerDependencies` 使用 Playwright 的基础。Playwright 支持更现代的 Web 特性、多浏览器引擎(Chromium, Firefox, WebKit),且在处理跨域(iframe)和等待机制(Auto-waiting)上比 Puppeteer 更智能。
14
+ - **实现方式**:可以继续采用动态导入 `import("playwright")` 结合 `/tmp/node_modules/playwright` 的后备机制。
15
+
16
+ ### 1.2 浏览器运行模式:Persistent Context (持久化上下文) 与有头模式
17
+ 当前的 Google 搜索插件每次调用启动的是临时会话(无持久化),且是无头模式(`headless: true`)。
18
+ 针对我们的新需求,需要做以下改动:
19
+ - **Headless 模式切换**:默认可以后台运行,但必须支持开启 `headless: false`(有头模式),从而让用户能看到界面进行登录、过验证码等操作。
20
+ - **持久化数据 (userDataDir)**:必须使用 `playwright.chromium.launchPersistentContext(userDataDir, options)` 替代普通的 `launch`。这样用户的登录状态、Cookies、LocalStorage 都会被保存到本地硬盘。下次启动 Agent 时,不再需要重新登录。
21
+
22
+ ## 2. 架构设计与交互流程
23
+
24
+ ### 2.1 基础架构
25
+ 插件应在初始化时维护一个全局单例的 Browser Context(持久化),并在其上暴露一组供 Agent 使用的 Tools (函数集)。
26
+
27
+ ### 2.2 核心暴露工具 (Tools) 规划
28
+ 针对 Opencode 的 Agent,为了最大化保持 Agent 职责单一且减少不可控的自动化失败(如复杂的 DOM 变化、验证码、登录墙),我们**仅对外暴露一个核心 Tool**:
29
+
30
+ 1. `webfetch(url_or_query)`: 获取指定网页的最终渲染内容或执行搜索。
31
+ - **输入**: 一个 URL,或者一个搜索关键词(插件内部可将其转化为特定搜索引擎的 URL)。
32
+ - **输出**: 该网页(或搜索结果页面)的主体内容,通常转换为高质量的 Markdown 格式,去除了广告、导航栏等噪音。
33
+
34
+ **设计理念**:所有复杂的中间过程(如登录、重定向、CAPTCHA 人机验证、甚至翻页寻找特定信息)**均不由 Agent 自动处理,而是由插件内部拦截并转移给人类执行**。
35
+
36
+ ### 2.3 人机验证 (Human-in-the-loop) 机制设计
37
+ 当 Agent 调用 `webfetch` 请求一个页面时,插件内部的执行流程如下:
38
+
39
+ 1. **发起请求**:插件使用 Playwright 打开目标 URL。
40
+ 2. **状态监测**:插件监听页面加载后的状态。如果检测到以下情况(可通过 URL 变化、特定元素出现等启发式规则判断):
41
+ - 被重定向到了登录页面(如 `/login`, `auth0.com` 等)。
42
+ - 出现了典型的反爬虫挑战(如 Cloudflare 的 "Checking your browser...",Google 的 CAPTCHA)。
43
+ - 页面迟迟未加载出���期的主体内容。
44
+ 3. **人类接管**:
45
+ - 插件主动**挂起**当前 `webfetch` 的 Promise。
46
+ - 插件在 Opencode 终端输出高亮提示:“[需要人类协助] 访问 `https://xxx` 遇到障碍(如登录/验证码)。请在弹出的浏览器窗口中完成操作,获取到最终目标页面后,在终端按回车键继续...”。
47
+ - (可选)如果浏览器窗口在后台,插件尝试将其唤起至前台。
48
+ 4. **人类操作**:人类在真实的浏览器窗口中输入账号密码、点选验证码、甚至手动点击搜索结果跳转到目标详情页。
49
+ 5. **恢复与提取**:
50
+ - 人类确认操作完成并在终端按下回车。
51
+ - 插件恢复 Promise 的执行,此时直接提取**当前浏览器所处页面**的 DOM 结构。
52
+ - 将提取的 HTML 通过 `turndown`(结合 Readability 算法)转换为 Markdown,并返回给 Agent。
53
+
54
+ 这种设计将最困难的“导航和越权”交给了人类,而 Agent 只需专注于“提出需求 (`webfetch`)”和“分析结果 (Markdown)”。
55
+
56
+ ## 3. 技术难点与解决方案
57
+
58
+ ### 3.1 动态 Headless 切换的局限
59
+ **痛点**:Playwright 启动后无法动态切换无头和有头模式。一直开着有头模式(弹出窗口)会打扰用户正常的编码工作。
60
+ **解决方案**:
61
+ 1. **方案 A (默认有头并最小化)**:使用 `headless: false`,但通过参数/系统命令将其启动时最小化或放置在后台。
62
+ 2. **方案 B (CDP Attach - 推荐)**:用户自己电脑上开一个开启了 debug 端口的 Chrome (`chrome.exe --remote-debugging-port=9222`)。插件不去 `launch` 浏览器,而是通过 `playwright.chromium.connectOverCDP('http://localhost:9222')` 接入。这样用户平时就在这个 Chrome 里正常上网、保持登录,Agent 在后台操控新建的 Tab。当需要验证时,用户切到该 Tab 即可。
63
+
64
+ ### 3.2 页面内容的 Markdown 提取
65
+ 为了让大模型 (LLM) 高效阅读网页,不能直接返回完整 HTML(Token 消耗极大且噪音多)。
66
+ **解决方案**:
67
+ 参考原项目,使用 `turndown` 库,并结合 Playwright 的 `page.evaluate()` 清理不必要的 script、style、nav、footer 等标签,提取主体内容的文本。可以结合 Mozilla 的 `Readability.js` 来提取核心正文。
68
+
69
+ ### 3.3 状态管理与防卡死
70
+ **痛点**:网络状况差或者死链会导致 Agent 长时间等待。
71
+ **解决方案**:
72
+ 所有的 Tool 调用(`goto`, `click`)必须设置合理的 `timeout`。当发生超时时,捕获异常并告诉 Agent "Timeout occurred",让 Agent 决定是重试、放弃还是请求人类帮助。
73
+
74
+ ## 5. 总结
75
+
76
+ 实现这样一个支持人机协作的 Opencode 浏览器插件在技术上是完全可行的。最核心的转变在于从**"一次性的无头自动化脚本"**转变为**"长期存活的、由人类和 Agent 共享的持久化浏览器上下文"**。通过赋予 Agent 遇到障碍时主动寻求人类帮助的能力,将极大拓宽该插件在复杂网络环境(反爬、强登录态网站)下的应用边界。
@@ -0,0 +1,223 @@
1
+ import type { ToolContext } from "@opencode-ai/plugin";
2
+
3
+ import * as os from 'os';
4
+ import * as path from 'path';
5
+ import * as fs from 'fs';
6
+ import { HumanInteractor } from './HumanInteractor.js';
7
+ import { Extractor } from './Extractor.js';
8
+ import type { BrowserContext, Page } from 'playwright';
9
+
10
+ type PlaywrightModule = typeof import('playwright');
11
+
12
+ export class BrowserManager {
13
+ private context: BrowserContext | null = null;
14
+ private page: Page | null = null;
15
+ private readonly playwright: PlaywrightModule;
16
+ private readonly client: any;
17
+
18
+ constructor(playwright: PlaywrightModule, client: any) {
19
+ this.playwright = playwright;
20
+ this.client = client;
21
+ }
22
+
23
+ /**
24
+ * Initializes the persistent context if not already done.
25
+ */
26
+ public async ensureContext(): Promise<void> {
27
+ let isConnected = false;
28
+ if (this.context && this.page) {
29
+ try {
30
+ // Simple check to see if the page is still open and connected
31
+ isConnected = !this.page.isClosed();
32
+ } catch (e) {
33
+ isConnected = false;
34
+ }
35
+ }
36
+
37
+ if (isConnected) {
38
+ return;
39
+ }
40
+
41
+ // Clean up just in case
42
+ await this.dispose();
43
+
44
+ const userDataDir = path.resolve(os.homedir(), '.cache/opencode/user-data');
45
+ if (!fs.existsSync(userDataDir)) {
46
+ fs.mkdirSync(userDataDir, { recursive: true });
47
+ }
48
+
49
+ // Launch a persistent context with extension support
50
+ const extensionPath = path.resolve(os.homedir(), '.cache/opencode/extensions');
51
+ const extensions: string[] = [];
52
+ if (fs.existsSync(extensionPath)) {
53
+ const dirs = fs.readdirSync(extensionPath).map(d => path.join(extensionPath, d));
54
+ extensions.push(...dirs.filter(d => fs.statSync(d).isDirectory()));
55
+ }
56
+
57
+ const launchOptions: Parameters<typeof this.playwright.chromium.launchPersistentContext>[1] = {
58
+ headless: false,
59
+ args: [
60
+ '--no-sandbox',
61
+ '--disable-dev-shm-usage',
62
+ '--disable-blink-features=AutomationControlled',
63
+ '--disable-features=VizDisplayCompositor',
64
+ '--window-size=1280,720',
65
+ ...(extensions.length > 0 ? [
66
+ `--disable-extensions-except=${extensions.join(',')}`,
67
+ `--load-extension=${extensions.join(',')}`
68
+ ] : []),
69
+ ],
70
+ viewport: { width: 1280, height: 720 },
71
+ };
72
+
73
+ this.context = await this.playwright.chromium.launchPersistentContext(userDataDir, launchOptions);
74
+
75
+ // Create a new page or use the default one created by launchPersistentContext
76
+ const pages = this.context.pages();
77
+ if (pages.length > 0) {
78
+ this.page = pages[0];
79
+ } else {
80
+ this.page = await this.context.newPage();
81
+ }
82
+
83
+ // Mask webdriver
84
+ await this.page.addInitScript(() => {
85
+ Object.defineProperty(navigator, 'webdriver', {
86
+ get: () => false,
87
+ });
88
+
89
+ const chrome = (window as any).chrome;
90
+ if (chrome && chrome.runtime && chrome.runtime.onConnect) {
91
+ delete chrome.runtime.onConnect;
92
+ }
93
+ });
94
+ }
95
+
96
+ /**
97
+ * Navigates to a URL and tries to extract the content.
98
+ * Prompts the user via terminal if it encounters a captcha or login screen.
99
+ */
100
+ public async fetchWebpage(url: string, timeout: number, ctx: ToolContext): Promise<string> {
101
+ await this.ensureContext();
102
+ if (!this.page) throw new Error('Page not initialized');
103
+
104
+ console.log(`\nNavigating to: ${url}`);
105
+
106
+ // Add a listener to handle abortions
107
+ const onAbort = () => {
108
+ console.log('Operation aborted by user or timeout.');
109
+ };
110
+ ctx.abort.addEventListener('abort', onAbort);
111
+
112
+ try {
113
+ // Go to the requested URL
114
+ await this.page.goto(url, { waitUntil: 'domcontentloaded', timeout }).catch((e) => {
115
+ console.warn(`Navigation might have timed out or failed partially: ${e.message}`);
116
+ });
117
+
118
+ // too fast
119
+ await this.page.waitForTimeout(2000);
120
+
121
+ // Basic heuristic to check if human intervention is needed
122
+ const needsHelp = await this.detectBlockers(this.page);
123
+
124
+ if (needsHelp.blocked) {
125
+ await HumanInteractor.askForHumanHelp(`The page appears to be blocked or requires login.\nReason: ${needsHelp.reason}\nURL: ${this.page.url()}`, ctx, this.client, () => this.detectBlockers(this.page));
126
+ } else {
127
+ // Wait a little bit for dynamic content if not blocked
128
+ await this.page.waitForTimeout(2000);
129
+ }
130
+
131
+ // Allow one more check in case the user didn't fully resolve it, or if it redirected
132
+ const needsHelpAgain = await this.detectBlockers(this.page);
133
+ if (needsHelpAgain.blocked) {
134
+ await HumanInteractor.askForHumanHelp(`Still detected a blocker.\nReason: ${needsHelpAgain.reason}\nPlease complete the action and try again.`, ctx, this.client, () => this.detectBlockers(this.page));
135
+ }
136
+
137
+ // Extract content as Markdown
138
+ console.log('Extracting page content...');
139
+ const markdown = await Extractor.extractMarkdown(this.page, this.page.url());
140
+ return markdown;
141
+ } finally {
142
+ ctx.abort.removeEventListener('abort', onAbort);
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Detects if the current page is blocked by Captcha, Cloudflare, or a Login wall.
148
+ */
149
+ private async detectBlockers(page: Page | null): Promise<{ blocked: boolean; reason?: string }> {
150
+ try {
151
+ if (page == null) return {blocked: false}
152
+ const url = page.url();
153
+
154
+ // 0. Check for about:blank (no network connection)
155
+ if (url === 'about:blank') {
156
+ return { blocked: true, reason: 'No network connection. Please check your internet and press Enter to continue.' };
157
+ }
158
+
159
+ // 1. Check URL patterns for logins or known captchas
160
+ if (url.includes('/login') || url.includes('/signin') || url.includes('auth0.com')) {
161
+ return { blocked: true, reason: 'Login page detected.' };
162
+ }
163
+
164
+ // 2. Check for Cloudflare Turnstile or similar challenge pages
165
+ const isCloudflare = await page.evaluate(() => {
166
+ const title = document.title.toLowerCase();
167
+ const text = document.body.innerText.toLowerCase();
168
+
169
+ if (title.includes('just a moment') || title.includes('attention required!')) {
170
+ return true;
171
+ }
172
+ if (text.includes('checking your browser before accessing') || text.includes('enable javascript and cookies to continue')) {
173
+ return true;
174
+ }
175
+ return false;
176
+ });
177
+
178
+ if (isCloudflare) {
179
+ return { blocked: true, reason: 'Cloudflare challenge detected.' };
180
+ }
181
+
182
+ // 3. Check for typical Captcha iframes (reCAPTCHA, hCaptcha)
183
+ const hasCaptcha = await page.evaluate(() => {
184
+ const iframes = Array.from(document.querySelectorAll('iframe'));
185
+ return iframes.some((f) => {
186
+ const src = f.src.toLowerCase();
187
+ return src.includes('recaptcha') || src.includes('hcaptcha') || src.includes('turnstile');
188
+ });
189
+ });
190
+
191
+ if (hasCaptcha) {
192
+ // Sometimes captchas are invisible, but if they are visible, we might be blocked.
193
+ return { blocked: true, reason: 'Captcha iframe detected on page.' };
194
+ }
195
+
196
+ // 4. Check for Google "Sorry" page
197
+ if (url.includes('/sorry/')) {
198
+ return { blocked: true, reason: 'Google automated access blocker detected.' };
199
+ }
200
+
201
+ } catch (e) {
202
+ console.error('Error detecting blockers:', e);
203
+ }
204
+
205
+ return { blocked: false };
206
+ }
207
+
208
+ /**
209
+ * Close the browser context safely.
210
+ */
211
+ public async dispose(): Promise<void> {
212
+ try {
213
+ if (this.context) {
214
+ await this.context.close().catch(() => {});
215
+ }
216
+ } catch (e) {
217
+ // Ignore errors on close
218
+ } finally {
219
+ this.context = null;
220
+ this.page = null;
221
+ }
222
+ }
223
+ }
@@ -0,0 +1,47 @@
1
+ import { Readability } from '@mozilla/readability';
2
+ import { JSDOM } from 'jsdom';
3
+ import TurndownService from 'turndown';
4
+ import type { Page } from 'playwright';
5
+
6
+ export class Extractor {
7
+ /**
8
+ * Extracts the main content of a Playwright page and converts it to Markdown.
9
+ * @param page The Playwright Page object.
10
+ * @param url The current URL of the page.
11
+ * @returns The main content formatted as Markdown.
12
+ */
13
+ static async extractMarkdown(page: Page, url: string): Promise<string> {
14
+ // 1. Get full HTML content from the page
15
+ const htmlContent = await page.content();
16
+
17
+ // 2. Parse HTML using JSDOM
18
+ const doc = new JSDOM(htmlContent, { url });
19
+
20
+ // 3. Extract core content using Readability
21
+ const reader = new Readability(doc.window.document);
22
+ const article = reader.parse();
23
+
24
+ if (!article || !article.content) {
25
+ // Fallback if Readability fails
26
+ const bodyText = await page.evaluate(() => document.body.innerText);
27
+ return `Failed to extract main article content.\n\nRaw Body Text:\n${bodyText.substring(0, 5000)}`;
28
+ }
29
+
30
+ // 4. Convert extracted HTML to clean Markdown
31
+ const turndownService = new TurndownService({
32
+ headingStyle: 'atx',
33
+ hr: '---',
34
+ bulletListMarker: '-',
35
+ codeBlockStyle: 'fenced',
36
+ emDelimiter: '*',
37
+ });
38
+
39
+ // Remove noisy elements just in case
40
+ turndownService.remove(['script', 'style', 'noscript', 'iframe']);
41
+
42
+ let markdown = turndownService.turndown(article.content);
43
+
44
+ // Format the output
45
+ return `# ${article.title || 'Extracted Page Content'}\n\n**Source URL:** ${url}\n\n---\n\n${markdown}`;
46
+ }
47
+ }
@@ -0,0 +1,46 @@
1
+ import type { ToolContext } from '@opencode-ai/plugin'
2
+ import { resolve } from 'dns';
3
+ import { title } from 'process';
4
+
5
+ const sleep = (t: number) => new Promise(resolve => {
6
+ setTimeout(() => {
7
+ resolve(null)
8
+ }, t);
9
+ })
10
+ export class HumanInteractor {
11
+ /**
12
+ * Instructs the LLM to pause and asks the human to resolve the issue in the browser.
13
+ * Pre-fills the TUI prompt so the user can just press Enter to continue.
14
+ */
15
+ static async askForHumanHelp (
16
+ message: string,
17
+ ctx: ToolContext,
18
+ client: any,
19
+ checker: any,
20
+ ): Promise<void> {
21
+ try {
22
+
23
+ while(true) {
24
+ // Show a toast to notify the user immediately
25
+ client?.tui?.showToast({
26
+ body: {
27
+ title: 'Browser Action Required',
28
+ message,
29
+ variant: 'warning'
30
+ }
31
+ // duration: 10000
32
+ })
33
+
34
+ await sleep(5000)
35
+ const res = await checker();
36
+ if (res.blocked) {
37
+ continue
38
+ } else {
39
+ break
40
+ }
41
+ }
42
+ } catch (e) {
43
+ console.error(e)
44
+ }
45
+ }
46
+ }
package/src/index.ts ADDED
@@ -0,0 +1,86 @@
1
+ import { type Plugin, tool } from "@opencode-ai/plugin";
2
+ import { BrowserManager } from "./BrowserManager.js";
3
+
4
+ type PlaywrightModule = typeof import("playwright");
5
+
6
+ const DEFAULT_TIMEOUT = 30_000;
7
+ const MAX_TIMEOUT = 120_000;
8
+
9
+ let globalManager: BrowserManager | null = null;
10
+
11
+ export const WebfetchPlugin: Plugin = async ({ client }) => {
12
+ const WebfetchTool = tool({
13
+ description: "Fetch a webpage's main content in markdown.",
14
+ args: {
15
+ url: tool.schema.string().describe("The URL to fetch."),
16
+ // timeout: tool.schema
17
+ // .number()
18
+ // .min(5)
19
+ // .max(120)
20
+ // .optional()
21
+ // .describe("Timeout in seconds (default: 30, max: 120)"),
22
+ },
23
+ async execute(params: any, ctx: any) {
24
+ if (!globalManager) {
25
+ const playwright = await loadPlaywright();
26
+ globalManager = new BrowserManager(playwright, client);
27
+ }
28
+
29
+ const manager = globalManager;
30
+ const timeoutMs = Math.min((params.timeout ?? DEFAULT_TIMEOUT / 1000) * 1000, MAX_TIMEOUT);
31
+
32
+ const abortHandler = () => {
33
+ manager.dispose().catch(() => undefined);
34
+ };
35
+ ctx.abort.addEventListener("abort", abortHandler, { once: true });
36
+
37
+ try {
38
+ let targetUrl = params.url;
39
+ // If it's not a valid URL (e.g., just a string query), convert to Google search
40
+ if (!/^https?:\/\//i.test(targetUrl)) {
41
+ targetUrl = `https://www.google.com/search?q=${encodeURIComponent(targetUrl)}`;
42
+ }
43
+
44
+ const markdownResult = await manager.fetchWebpage(targetUrl, timeoutMs, ctx);
45
+
46
+ ctx.metadata({
47
+ title: `Webfetch: ${targetUrl}`,
48
+ metadata: {
49
+ url: targetUrl,
50
+ length: markdownResult.length,
51
+ },
52
+ });
53
+
54
+ return markdownResult;
55
+ } catch (error) {
56
+ throw error;
57
+ } finally {
58
+ ctx.abort.removeEventListener("abort", abortHandler);
59
+ }
60
+ },
61
+ });
62
+
63
+ return {
64
+ tool: {
65
+ webfetch: WebfetchTool
66
+ }
67
+ };
68
+ };
69
+
70
+ async function loadPlaywright(): Promise<PlaywrightModule> {
71
+ try {
72
+ return await import("playwright");
73
+ } catch (error) {
74
+ try {
75
+ // @ts-ignore
76
+ return await import("/tmp/node_modules/playwright");
77
+ } catch {
78
+ throw new Error(
79
+ "webfetch plugin requires Playwright. Install it with: bun install playwright && bunx playwright install chromium",
80
+ { cause: error },
81
+ );
82
+ }
83
+ }
84
+ }
85
+
86
+ export default WebfetchPlugin;
package/tsconfig.json ADDED
@@ -0,0 +1,17 @@
1
+ {
2
+ "$schema": "https://json.schemastore.org/tsconfig",
3
+ "compilerOptions": {
4
+ "target": "ES2022",
5
+ "module": "ES2022",
6
+ "moduleResolution": "Bundler",
7
+ "lib": ["ES2022", "DOM"],
8
+ "strict": true,
9
+ "declaration": true,
10
+ "declarationMap": true,
11
+ "sourceMap": true,
12
+ "outDir": "dist",
13
+ "esModuleInterop": true,
14
+ "skipLibCheck": true
15
+ },
16
+ "include": ["src/**/*"]
17
+ }