pi-web-toolkit 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/docs/guide.md +1 -1
- package/docs/tools.md +6 -2
- package/extensions/utils/agent-browser.ts +38 -93
- package/extensions/utils/cli-runner.ts +108 -0
- package/extensions/utils/content-preview.ts +493 -0
- package/extensions/utils/output-sink.ts +67 -0
- package/extensions/utils/render-helpers.ts +77 -0
- package/extensions/utils/scrapling.ts +2 -27
- package/extensions/utils/tool-factory.ts +79 -0
- package/extensions/web_batch_fetch.ts +146 -35
- package/extensions/web_browse.ts +152 -29
- package/extensions/web_fetch.ts +74 -24
- package/extensions/web_search.ts +137 -54
- package/package.json +9 -1
package/README.md
CHANGED
|
@@ -13,9 +13,9 @@ Web research toolkit for [pi](https://pi.dev) agents. Search via SearXNG, fetch
|
|
|
13
13
|
|
|
14
14
|
| Tool | Backend | Purpose | Current Limit |
|
|
15
15
|
|------|---------|---------|---------------|
|
|
16
|
-
| **`web_search`** | [SearXNG](https://github.com/searxng/searxng) | Search the web with scored, ranked results from multiple engines — always the first step in web research |
|
|
16
|
+
| **`web_search`** | [SearXNG](https://github.com/searxng/searxng) | Search the web with scored, ranked results from multiple engines — always the first step in web research | 20 results (max 60, auto-pages up to 3 pages) |
|
|
17
17
|
| **`web_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch a single static page as clean markdown | — |
|
|
18
|
-
| **`web_batch_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch 2–
|
|
18
|
+
| **`web_batch_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch 2–15 pages in parallel for research synthesis | 3 concurrent (max 5) |
|
|
19
19
|
| **`web_browse`** | [agent-browser](https://github.com/vercel-labs/agent-browser) | Interact with a page (click, scroll, fill) then extract content | 25 actions |
|
|
20
20
|
|
|
21
21
|
## Quick Start
|
package/docs/guide.md
CHANGED
|
@@ -32,7 +32,7 @@ User asks about something external / current
|
|
|
32
32
|
|
|
33
33
|
| | `web_fetch` | `web_browse` | `web_batch_fetch` |
|
|
34
34
|
|--|-------------|--------------|-------------------|
|
|
35
|
-
| **Pages** | 1 | 1 | 2–
|
|
35
|
+
| **Pages** | 1 | 1 | 2–15 |
|
|
36
36
|
| **Browser** | Yes (scrapling) | Yes (agent-browser) | Yes (scrapling) |
|
|
37
37
|
| **Interaction** | ❌ No | ✅ Click, fill, scroll, wait | ❌ No |
|
|
38
38
|
| **Selector** | ✅ Per-URL | ✅ Final state | ✅ Applied to all |
|
package/docs/tools.md
CHANGED
|
@@ -2,18 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
## `web_search`
|
|
4
4
|
|
|
5
|
-
Search the web via SearXNG. Returns ranked results with title, URL, and snippet.
|
|
5
|
+
Search the web via SearXNG. Returns ranked results with title, URL, and snippet. Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.
|
|
6
6
|
|
|
7
7
|
```typescript
|
|
8
8
|
{
|
|
9
9
|
query: string, // Search query
|
|
10
10
|
language?: string, // Language code (en, de, fr...). Default: "auto"
|
|
11
|
-
results?: number, // Max results (1–
|
|
11
|
+
results?: number, // Max results (1–60). Default: 20. Automatically pages through SearXNG (up to 3 pages) if needed.
|
|
12
12
|
}
|
|
13
13
|
```
|
|
14
14
|
|
|
15
15
|
**When to use:** The user asks about current events, facts, or anything requiring up-to-date information. This is always the **first step** of web research.
|
|
16
16
|
|
|
17
|
+
**Empty results behavior:** When no results are found, `web_search` returns a list of **suggestions** — alternative queries that SearXNG believes may yield better results. The agent can use these suggestions to automatically refine and retry the search.
|
|
18
|
+
|
|
19
|
+
**Pagination:** `web_search` automatically fetches up to 3 pages from SearXNG and deduplicates by URL. You do not need to call it multiple times for deeper results.
|
|
20
|
+
|
|
17
21
|
---
|
|
18
22
|
|
|
19
23
|
## `web_fetch`
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* command building, process spawning, JSON parsing, and session cleanup.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import {
|
|
8
|
+
import { runCLI } from "./cli-runner";
|
|
9
9
|
|
|
10
10
|
export interface BrowseAction {
|
|
11
11
|
type: "click" | "fill" | "type" | "press" | "wait" | "wait_selector" | "scroll";
|
|
@@ -26,7 +26,7 @@ export interface AgentBrowserBatchItem {
|
|
|
26
26
|
}
|
|
27
27
|
|
|
28
28
|
function requireString(action: BrowseAction, field: "selector" | "value" | "key"): string {
|
|
29
|
-
const value = action[field];
|
|
29
|
+
const value = action[field] as string | undefined;
|
|
30
30
|
if (typeof value !== "string" || value.length === 0) {
|
|
31
31
|
throw new Error(`Action "${action.type}" requires non-empty ${field}`);
|
|
32
32
|
}
|
|
@@ -34,11 +34,11 @@ function requireString(action: BrowseAction, field: "selector" | "value" | "key"
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
function requireInteger(action: BrowseAction, field: "ms" | "amount"): number {
|
|
37
|
-
const value = action[field];
|
|
38
|
-
if (!Number.isInteger(value) || value < 0) {
|
|
37
|
+
const value = action[field] as number | undefined;
|
|
38
|
+
if (!Number.isInteger(value) || (value as number) < 0) {
|
|
39
39
|
throw new Error(`Action "${action.type}" requires non-negative integer ${field}`);
|
|
40
40
|
}
|
|
41
|
-
return value;
|
|
41
|
+
return value as number;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
function waitForSelectorScript(selector: string, state: "attached" | "visible" | "hidden"): string {
|
|
@@ -128,7 +128,7 @@ export function buildBatchCommands(
|
|
|
128
128
|
return commands;
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
-
export function runAgentBrowserBatch(
|
|
131
|
+
export async function runAgentBrowserBatch(
|
|
132
132
|
commands: string[][],
|
|
133
133
|
options: { session: string; headless: boolean; signal?: AbortSignal; timeout?: number },
|
|
134
134
|
): Promise<AgentBrowserBatchItem[]> {
|
|
@@ -136,99 +136,44 @@ export function runAgentBrowserBatch(
|
|
|
136
136
|
if (!options.headless) args.push("--headed");
|
|
137
137
|
args.push("batch", "--bail", "--json");
|
|
138
138
|
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
|
|
139
|
+
try {
|
|
140
|
+
const result = await runCLI({
|
|
141
|
+
command: "agent-browser",
|
|
142
|
+
args,
|
|
143
|
+
stdin: JSON.stringify(commands),
|
|
144
|
+
timeout: options.timeout,
|
|
145
|
+
signal: options.signal,
|
|
143
146
|
});
|
|
144
147
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
let timeoutId: NodeJS.Timeout | undefined;
|
|
148
|
-
let settled = false;
|
|
149
|
-
|
|
150
|
-
const cleanup = () => {
|
|
151
|
-
if (timeoutId) clearTimeout(timeoutId);
|
|
152
|
-
if (options.signal) options.signal.removeEventListener("abort", kill);
|
|
153
|
-
};
|
|
154
|
-
|
|
155
|
-
const settleReject = (err: Error) => {
|
|
156
|
-
if (settled) return;
|
|
157
|
-
settled = true;
|
|
158
|
-
cleanup();
|
|
159
|
-
reject(err);
|
|
160
|
-
};
|
|
161
|
-
|
|
162
|
-
const kill = () => proc.kill("SIGTERM");
|
|
163
|
-
|
|
164
|
-
proc.stdout.on("data", (data: Buffer) => {
|
|
165
|
-
stdout += data.toString();
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
proc.stderr.on("data", (data: Buffer) => {
|
|
169
|
-
stderr += data.toString();
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
if (options.timeout) {
|
|
173
|
-
timeoutId = setTimeout(() => {
|
|
174
|
-
proc.kill("SIGTERM");
|
|
175
|
-
settleReject(new Error(`agent-browser timed out after ${options.timeout}ms`));
|
|
176
|
-
}, options.timeout);
|
|
148
|
+
if (result.exitCode !== 0 && !result.stdout.trim()) {
|
|
149
|
+
throw new Error(`agent-browser failed (exit ${result.exitCode}):\n${result.stderr || "unknown error"}`);
|
|
177
150
|
}
|
|
178
151
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
reject(new Error(`agent-browser failed (exit ${code}):\n${stderr || "unknown error"}`));
|
|
186
|
-
return;
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
try {
|
|
190
|
-
const results = JSON.parse(stdout) as AgentBrowserBatchItem[];
|
|
191
|
-
resolve(results);
|
|
192
|
-
} catch (err: any) {
|
|
193
|
-
reject(new Error(
|
|
194
|
-
`Failed to parse agent-browser output: ${err.message}\nstdout: ${stdout}\nstderr: ${stderr}`
|
|
195
|
-
));
|
|
196
|
-
}
|
|
197
|
-
});
|
|
198
|
-
|
|
199
|
-
proc.on("error", (err: any) => {
|
|
200
|
-
if (err.code === "ENOENT") {
|
|
201
|
-
settleReject(new Error(
|
|
202
|
-
"agent-browser is not installed.\n\nInstall it with:\n npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
|
|
203
|
-
));
|
|
204
|
-
} else {
|
|
205
|
-
settleReject(err);
|
|
206
|
-
}
|
|
207
|
-
});
|
|
208
|
-
|
|
209
|
-
if (options.signal) {
|
|
210
|
-
if (options.signal.aborted) kill();
|
|
211
|
-
else options.signal.addEventListener("abort", kill, { once: true });
|
|
152
|
+
try {
|
|
153
|
+
return JSON.parse(result.stdout) as AgentBrowserBatchItem[];
|
|
154
|
+
} catch (err: any) {
|
|
155
|
+
throw new Error(
|
|
156
|
+
`Failed to parse agent-browser output: ${err.message}\nstdout: ${result.stdout}\nstderr: ${result.stderr}`
|
|
157
|
+
);
|
|
212
158
|
}
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
159
|
+
} catch (err: any) {
|
|
160
|
+
if (err.message === "agent-browser is not installed") {
|
|
161
|
+
throw new Error(
|
|
162
|
+
"agent-browser is not installed.\n\nInstall it with:\n npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
throw err;
|
|
166
|
+
}
|
|
217
167
|
}
|
|
218
168
|
|
|
219
|
-
export function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
169
|
+
export async function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
|
|
170
|
+
try {
|
|
171
|
+
await runCLI({
|
|
172
|
+
command: "agent-browser",
|
|
173
|
+
args: ["--session", session, "close"],
|
|
174
|
+
signal,
|
|
224
175
|
});
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
if (signal) {
|
|
229
|
-
const kill = () => proc.kill("SIGTERM");
|
|
230
|
-
if (signal.aborted) kill();
|
|
231
|
-
else signal.addEventListener("abort", kill, { once: true });
|
|
232
|
-
}
|
|
233
|
-
});
|
|
176
|
+
} catch {
|
|
177
|
+
// Best-effort cleanup — ignore errors
|
|
178
|
+
}
|
|
234
179
|
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI runner — abstracted process spawning
|
|
3
|
+
*
|
|
4
|
+
* Provides a single interface for running external CLI commands
|
|
5
|
+
* with consistent signal handling, timeout support, and stdout/stderr
|
|
6
|
+
* collection. Enables testability by allowing the runner to be swapped.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { spawn, type ChildProcess } from "node:child_process";
|
|
10
|
+
|
|
11
|
+
export interface CLIRunOptions {
|
|
12
|
+
command: string;
|
|
13
|
+
args: string[];
|
|
14
|
+
/** Data to write to stdin. If omitted, stdin is ignored. */
|
|
15
|
+
stdin?: string;
|
|
16
|
+
/** Timeout in milliseconds. If exceeded, the process is killed. */
|
|
17
|
+
timeout?: number;
|
|
18
|
+
/** AbortSignal for cancellation. */
|
|
19
|
+
signal?: AbortSignal;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface CLIRunResult {
|
|
23
|
+
stdout: string;
|
|
24
|
+
stderr: string;
|
|
25
|
+
exitCode: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Run an external CLI command and capture its output.
|
|
30
|
+
*
|
|
31
|
+
* Handles:
|
|
32
|
+
* - stdout/stderr collection
|
|
33
|
+
* - optional stdin feeding
|
|
34
|
+
* - optional timeout (SIGTERM)
|
|
35
|
+
* - AbortSignal cancellation (SIGTERM)
|
|
36
|
+
* - process spawn errors (e.g. ENOENT)
|
|
37
|
+
*/
|
|
38
|
+
export function runCLI(options: CLIRunOptions): Promise<CLIRunResult> {
|
|
39
|
+
return new Promise((resolve, reject) => {
|
|
40
|
+
const stdio = options.stdin
|
|
41
|
+
? ["pipe", "pipe", "pipe"]
|
|
42
|
+
: ["ignore", "pipe", "pipe"];
|
|
43
|
+
|
|
44
|
+
const proc = spawn(options.command, options.args, {
|
|
45
|
+
shell: false,
|
|
46
|
+
stdio: stdio as any,
|
|
47
|
+
}) as ChildProcess;
|
|
48
|
+
|
|
49
|
+
let stdout = "";
|
|
50
|
+
let stderr = "";
|
|
51
|
+
let timeoutId: NodeJS.Timeout | undefined;
|
|
52
|
+
let settled = false;
|
|
53
|
+
|
|
54
|
+
const cleanup = () => {
|
|
55
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
56
|
+
if (options.signal) options.signal.removeEventListener("abort", kill);
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
const settleReject = (err: Error) => {
|
|
60
|
+
if (settled) return;
|
|
61
|
+
settled = true;
|
|
62
|
+
cleanup();
|
|
63
|
+
reject(err);
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
const kill = () => proc.kill("SIGTERM");
|
|
67
|
+
|
|
68
|
+
proc.stdout?.on("data", (data: Buffer) => {
|
|
69
|
+
stdout += data.toString();
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
proc.stderr?.on("data", (data: Buffer) => {
|
|
73
|
+
stderr += data.toString();
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
if (options.timeout) {
|
|
77
|
+
timeoutId = setTimeout(() => {
|
|
78
|
+
proc.kill("SIGTERM");
|
|
79
|
+
settleReject(new Error(`${options.command} timed out after ${options.timeout}ms`));
|
|
80
|
+
}, options.timeout);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
proc.on("close", (code) => {
|
|
84
|
+
if (settled) return;
|
|
85
|
+
settled = true;
|
|
86
|
+
cleanup();
|
|
87
|
+
resolve({ stdout, stderr, exitCode: code ?? 1 });
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
proc.on("error", (err: any) => {
|
|
91
|
+
if (err.code === "ENOENT") {
|
|
92
|
+
settleReject(new Error(`${options.command} is not installed`));
|
|
93
|
+
} else {
|
|
94
|
+
settleReject(err);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
if (options.signal) {
|
|
99
|
+
if (options.signal.aborted) kill();
|
|
100
|
+
else options.signal.addEventListener("abort", kill, { once: true });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (options.stdin && proc.stdin) {
|
|
104
|
+
proc.stdin.write(options.stdin);
|
|
105
|
+
proc.stdin.end();
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
}
|