opencode-crawl4ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -0
- package/bin/cli.js +189 -0
- package/dist/plugin.js +12634 -0
- package/package.json +56 -0
- package/python/bridge.py +422 -0
package/README.md
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# opencode-crawl4ai
|
|
2
|
+
|
|
3
|
+
OpenCode plugin that gives AI agents unrestricted web access via [crawl4ai](https://github.com/unclecode/crawl4ai).
|
|
4
|
+
|
|
5
|
+
Fetch URLs, search the web, extract structured data, take screenshots, deep crawl sites, and discover URLs — all from inside OpenCode.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Fetch** — Retrieve any URL as clean markdown or raw HTML, with stealth mode and JS execution
|
|
10
|
+
- **Search** — Web search via SearXNG (primary) or DuckDuckGo (fallback, no setup needed)
|
|
11
|
+
- **Extract** — Structured data extraction using CSS selectors
|
|
12
|
+
- **Screenshot** — Capture full-page screenshots as base64
|
|
13
|
+
- **Crawl** — Deep crawl websites with BFS/DFS strategies
|
|
14
|
+
- **Map** — Discover all URLs on a site
|
|
15
|
+
|
|
16
|
+
## Requirements
|
|
17
|
+
|
|
18
|
+
- [OpenCode](https://github.com/sst/opencode)
|
|
19
|
+
- Python 3.10+ with `uvx` (`pip install uv`)
|
|
20
|
+
- Docker (optional, for SearXNG faster search)
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
bunx opencode-crawl4ai --install
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Or with npx:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
npx opencode-crawl4ai --install
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Copies the plugin to `~/.config/opencode/plugins/`. Restart OpenCode to activate.
|
|
35
|
+
|
|
36
|
+
To install globally:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
npm install -g opencode-crawl4ai
|
|
40
|
+
opencode-crawl4ai --install
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Optional: faster search with SearXNG
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
opencode-crawl4ai searxng # starts SearXNG on port 8888
|
|
47
|
+
export SEARXNG_URL=http://localhost:8888
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
SearXNG aggregates Google, Bing, DuckDuckGo, and more. Without it, the plugin falls back to DuckDuckGo directly.
|
|
51
|
+
|
|
52
|
+
## CLI Commands
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
opencode-crawl4ai --install Copy plugin to ~/.config/opencode/plugins/
|
|
56
|
+
opencode-crawl4ai --uninstall Remove plugin
|
|
57
|
+
opencode-crawl4ai searxng [port] Start SearXNG Docker container (default: 8888)
|
|
58
|
+
opencode-crawl4ai searxng-stop Stop SearXNG container
|
|
59
|
+
opencode-crawl4ai --help Show help
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Available Tools
|
|
63
|
+
|
|
64
|
+
Once installed, these tools are available to the AI in OpenCode:
|
|
65
|
+
|
|
66
|
+
### `crawl4ai_fetch`
|
|
67
|
+
|
|
68
|
+
Fetch a URL and return its content as markdown (default) or HTML.
|
|
69
|
+
|
|
70
|
+
```js
|
|
71
|
+
crawl4ai_fetch({ url: "https://docs.example.com" })
|
|
72
|
+
crawl4ai_fetch({ url: "https://example.com", format: "html" })
|
|
73
|
+
crawl4ai_fetch({ url: "https://spa.example.com", wait_for: ".content-loaded" })
|
|
74
|
+
crawl4ai_fetch({ url: "https://example.com", js_code: "document.querySelector('.show-more').click()" })
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### `crawl4ai_search`
|
|
78
|
+
|
|
79
|
+
Search the web and return results with URL, title, and snippet.
|
|
80
|
+
|
|
81
|
+
```js
|
|
82
|
+
crawl4ai_search({ query: "React hooks tutorial" })
|
|
83
|
+
crawl4ai_search({ query: "Python asyncio", limit: 5 })
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### `crawl4ai_extract`
|
|
87
|
+
|
|
88
|
+
Extract structured data from a URL using CSS selectors.
|
|
89
|
+
|
|
90
|
+
```js
|
|
91
|
+
crawl4ai_extract({
|
|
92
|
+
url: "https://example.com/product",
|
|
93
|
+
schema: { title: "h1.product-name", price: ".price" }
|
|
94
|
+
})
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### `crawl4ai_screenshot`
|
|
98
|
+
|
|
99
|
+
Take a screenshot of a web page. Returns base64-encoded image data URL.
|
|
100
|
+
|
|
101
|
+
```js
|
|
102
|
+
crawl4ai_screenshot({ url: "https://example.com" })
|
|
103
|
+
crawl4ai_screenshot({ url: "https://example.com", width: 1920, height: 1080 })
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### `crawl4ai_crawl`
|
|
107
|
+
|
|
108
|
+
Deep crawl a website starting from a URL, following links up to `max_pages` and `max_depth`.
|
|
109
|
+
|
|
110
|
+
```js
|
|
111
|
+
crawl4ai_crawl({ url: "https://docs.example.com", max_pages: 20 })
|
|
112
|
+
crawl4ai_crawl({ url: "https://example.com", strategy: "bfs", max_depth: 2 })
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### `crawl4ai_map`
|
|
116
|
+
|
|
117
|
+
Discover all URLs on a website.
|
|
118
|
+
|
|
119
|
+
```js
|
|
120
|
+
crawl4ai_map({ url: "https://example.com" })
|
|
121
|
+
crawl4ai_map({ url: "https://example.com", search: "pricing" })
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### `crawl4ai_version`
|
|
125
|
+
|
|
126
|
+
Get the installed crawl4ai version.
|
|
127
|
+
|
|
128
|
+
### `crawl4ai_debug`
|
|
129
|
+
|
|
130
|
+
Debug the plugin and bridge connection.
|
|
131
|
+
|
|
132
|
+
## Environment Variables
|
|
133
|
+
|
|
134
|
+
| Variable | Description | Default |
|
|
135
|
+
|----------|-------------|---------|
|
|
136
|
+
| `SEARXNG_URL` | URL of a SearXNG instance | Falls back to DuckDuckGo |
|
|
137
|
+
|
|
138
|
+
## How It Works
|
|
139
|
+
|
|
140
|
+
The plugin's TypeScript layer spawns a Python bridge (`uvx --with crawl4ai --with ddgs python bridge.py`) on each tool call. No persistent Python process is required.
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
MIT
|
package/bin/cli.js
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
4
|
+
import { appendFile, copyFile, mkdir, readFile, unlink } from "node:fs/promises";
|
|
5
|
+
import { execSync } from "node:child_process";
|
|
6
|
+
import { createInterface } from "node:readline";
|
|
7
|
+
import { homedir } from "node:os";
|
|
8
|
+
import { dirname, join, resolve } from "node:path";
|
|
9
|
+
import { fileURLToPath } from "node:url";
|
|
10
|
+
|
|
11
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
12
|
+
const GLOBAL_PLUGINS_DIR = join(homedir(), ".config", "opencode", "plugins");
|
|
13
|
+
const LOCAL_PLUGINS_DIR = resolve(".opencode", "plugins");
|
|
14
|
+
const source = join(__dirname, "..", "dist", "plugin.js");
|
|
15
|
+
const packageJson = join(__dirname, "..", "package.json");
|
|
16
|
+
|
|
17
|
+
const SEARXNG_CONTAINER = "opencode-crawl4ai-searxng";
|
|
18
|
+
const SEARXNG_DEFAULT_PORT = 8888;
|
|
19
|
+
|
|
20
|
+
async function getVersion() {
|
|
21
|
+
const pkg = JSON.parse(await readFile(packageJson, "utf-8"));
|
|
22
|
+
return pkg.version;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function prompt(question) {
|
|
26
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
27
|
+
return new Promise((res) => rl.question(question, (a) => { rl.close(); res(a.trim()); }));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function isDockerRunning() {
|
|
31
|
+
try { execSync("docker info", { stdio: "pipe" }); return true; } catch { return false; }
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function isSearXNGRunning() {
|
|
35
|
+
try {
|
|
36
|
+
const r = execSync(`docker ps --filter name=${SEARXNG_CONTAINER} --format {{.Names}}`, { stdio: ["pipe","pipe","pipe"] }).toString().trim();
|
|
37
|
+
return r === SEARXNG_CONTAINER;
|
|
38
|
+
} catch { return false; }
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function startSearXNG(port) {
|
|
42
|
+
execSync(`docker rm -f ${SEARXNG_CONTAINER} 2>/dev/null || true`, { stdio: "pipe" });
|
|
43
|
+
execSync(
|
|
44
|
+
`docker run -d --name ${SEARXNG_CONTAINER} -p ${port}:8080 --restart unless-stopped searxng/searxng:latest`,
|
|
45
|
+
{ stdio: "inherit" }
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function setupSearXNG() {
|
|
50
|
+
console.log("\nSearch backend (used by crawl4ai_search):");
|
|
51
|
+
console.log(" 1) Skip — use DuckDuckGo fallback (no setup needed)");
|
|
52
|
+
console.log(" 2) I already have a SearXNG instance");
|
|
53
|
+
console.log(" 3) Spin up SearXNG via Docker\n");
|
|
54
|
+
|
|
55
|
+
const choice = await prompt("Choice [1/2/3] (default: 1): ");
|
|
56
|
+
|
|
57
|
+
if (choice === "2") {
|
|
58
|
+
const url = await prompt("SearXNG URL (e.g. http://localhost:8080): ");
|
|
59
|
+
if (url) {
|
|
60
|
+
await writeEnvVar("SEARXNG_URL", url);
|
|
61
|
+
console.log(`✓ SEARXNG_URL=${url} written to shell rc`);
|
|
62
|
+
}
|
|
63
|
+
} else if (choice === "3") {
|
|
64
|
+
if (!isDockerRunning()) {
|
|
65
|
+
console.log("✗ Docker is not running — skipping. Start Docker and run 'opencode-crawl4ai searxng' later.");
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const portInput = await prompt(`Port (default: ${SEARXNG_DEFAULT_PORT}): `);
|
|
69
|
+
const port = parseInt(portInput || String(SEARXNG_DEFAULT_PORT), 10);
|
|
70
|
+
console.log(`Starting SearXNG on port ${port}...`);
|
|
71
|
+
startSearXNG(port);
|
|
72
|
+
await writeEnvVar("SEARXNG_URL", `http://localhost:${port}`);
|
|
73
|
+
console.log(`✓ SearXNG running at http://localhost:${port}`);
|
|
74
|
+
console.log(`✓ SEARXNG_URL written to shell rc`);
|
|
75
|
+
} else {
|
|
76
|
+
console.log(" Using DuckDuckGo fallback — no setup needed.");
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async function writeEnvVar(key, value) {
|
|
81
|
+
const shell = process.env.SHELL || "";
|
|
82
|
+
const rc = shell.includes("zsh")
|
|
83
|
+
? join(homedir(), ".zshrc")
|
|
84
|
+
: shell.includes("fish")
|
|
85
|
+
? join(homedir(), ".config", "fish", "config.fish")
|
|
86
|
+
: join(homedir(), ".bashrc");
|
|
87
|
+
const line = shell.includes("fish")
|
|
88
|
+
? `\nset -x ${key} "${value}"\n`
|
|
89
|
+
: `\nexport ${key}="${value}"\n`;
|
|
90
|
+
await appendFile(rc, line);
|
|
91
|
+
console.log(` (added to ${rc} — run 'source ${rc}' or restart your shell)`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async function install() {
|
|
95
|
+
const version = await getVersion();
|
|
96
|
+
console.log(`\nopencode-crawl4ai v${version}\n`);
|
|
97
|
+
|
|
98
|
+
if (!existsSync(source)) {
|
|
99
|
+
console.error(`Error: built plugin not found at ${source}`);
|
|
100
|
+
process.exit(1);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// 1. Scope
|
|
104
|
+
console.log("Install scope:");
|
|
105
|
+
console.log(" 1) Global — ~/.config/opencode/plugins/ (all projects)");
|
|
106
|
+
console.log(" 2) Local — .opencode/plugins/ (this project only)\n");
|
|
107
|
+
const scope = await prompt("Choice [1/2] (default: 1): ");
|
|
108
|
+
|
|
109
|
+
const pluginDir = scope === "2" ? LOCAL_PLUGINS_DIR : GLOBAL_PLUGINS_DIR;
|
|
110
|
+
const target = join(pluginDir, "crawl4ai.js");
|
|
111
|
+
|
|
112
|
+
await mkdir(pluginDir, { recursive: true });
|
|
113
|
+
await copyFile(source, target);
|
|
114
|
+
console.log(`\n✓ Plugin installed: ${target}`);
|
|
115
|
+
|
|
116
|
+
// 2. SearXNG
|
|
117
|
+
await setupSearXNG();
|
|
118
|
+
|
|
119
|
+
console.log("\nDone! Restart OpenCode to activate.");
|
|
120
|
+
console.log("Tools: crawl4ai_fetch, crawl4ai_search, crawl4ai_extract, crawl4ai_screenshot, crawl4ai_crawl, crawl4ai_map");
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async function uninstall() {
|
|
124
|
+
console.log("\nWhere is the plugin installed?");
|
|
125
|
+
console.log(" 1) Global — ~/.config/opencode/plugins/");
|
|
126
|
+
console.log(" 2) Local — .opencode/plugins/\n");
|
|
127
|
+
const scope = await prompt("Choice [1/2] (default: 1): ");
|
|
128
|
+
|
|
129
|
+
const pluginDir = scope === "2" ? LOCAL_PLUGINS_DIR : GLOBAL_PLUGINS_DIR;
|
|
130
|
+
const target = join(pluginDir, "crawl4ai.js");
|
|
131
|
+
|
|
132
|
+
if (!existsSync(target)) {
|
|
133
|
+
console.log(`Plugin not found at ${target}`);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
await unlink(target);
|
|
137
|
+
console.log(`✓ Removed: ${target}`);
|
|
138
|
+
console.log("Restart OpenCode to deactivate.");
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function stopSearXNG() {
|
|
142
|
+
console.log("Stopping SearXNG...");
|
|
143
|
+
try {
|
|
144
|
+
execSync(`docker stop ${SEARXNG_CONTAINER}`, { stdio: "pipe" });
|
|
145
|
+
execSync(`docker rm ${SEARXNG_CONTAINER}`, { stdio: "pipe" });
|
|
146
|
+
console.log("✓ SearXNG stopped.");
|
|
147
|
+
} catch { console.log("SearXNG container not found or already stopped."); }
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async function showHelp() {
|
|
151
|
+
const version = await getVersion();
|
|
152
|
+
console.log(`opencode-crawl4ai v${version}
|
|
153
|
+
|
|
154
|
+
Usage: opencode-crawl4ai [command]
|
|
155
|
+
|
|
156
|
+
Commands:
|
|
157
|
+
--install Install plugin (prompts for scope + SearXNG setup)
|
|
158
|
+
--uninstall Remove plugin
|
|
159
|
+
searxng [port] Start SearXNG Docker container (default port: ${SEARXNG_DEFAULT_PORT})
|
|
160
|
+
searxng-stop Stop SearXNG container
|
|
161
|
+
--help, -h Show this help
|
|
162
|
+
|
|
163
|
+
Examples:
|
|
164
|
+
bunx opencode-crawl4ai --install
|
|
165
|
+
npx opencode-crawl4ai --install
|
|
166
|
+
npm i -g opencode-crawl4ai && opencode-crawl4ai --install
|
|
167
|
+
`);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
async function main() {
|
|
171
|
+
const arg = process.argv[2];
|
|
172
|
+
switch (arg) {
|
|
173
|
+
case "--install": await install(); break;
|
|
174
|
+
case "--uninstall": await uninstall(); break;
|
|
175
|
+
case "searxng":
|
|
176
|
+
if (!isDockerRunning()) { console.error("Docker is not running."); process.exit(1); }
|
|
177
|
+
startSearXNG(parseInt(process.argv[3] || String(SEARXNG_DEFAULT_PORT), 10));
|
|
178
|
+
console.log(`✓ SearXNG running at http://localhost:${process.argv[3] || SEARXNG_DEFAULT_PORT}`);
|
|
179
|
+
break;
|
|
180
|
+
case "searxng-stop": stopSearXNG(); break;
|
|
181
|
+
case "--help": case "-h": case undefined: await showHelp(); break;
|
|
182
|
+
default:
|
|
183
|
+
console.error(`Unknown command: ${arg}\n`);
|
|
184
|
+
await showHelp();
|
|
185
|
+
process.exit(1);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
main().catch((err) => { console.error("Error:", err.message); process.exit(1); });
|