pi-web-toolkit 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -13,9 +13,9 @@ Web research toolkit for [pi](https://pi.dev) agents. Search via SearXNG, fetch
13
13
 
14
14
  | Tool | Backend | Purpose | Current Limit |
15
15
  |------|---------|---------|---------------|
16
- | **`web_search`** | [SearXNG](https://github.com/searxng/searxng) | Search the web with scored, ranked results from multiple engines — always the first step in web research | 10 results (max 50) |
16
+ | **`web_search`** | [SearXNG](https://github.com/searxng/searxng) | Search the web with scored, ranked results from multiple engines — always the first step in web research | 20 results (max 60, auto-pages up to 3 pages) |
17
17
  | **`web_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch a single static page as clean markdown | — |
18
- | **`web_batch_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch 2–10 pages in parallel for research synthesis | 3 concurrent (max 5) |
18
+ | **`web_batch_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch 2–15 pages in parallel for research synthesis | 3 concurrent (max 5) |
19
19
  | **`web_browse`** | [agent-browser](https://github.com/vercel-labs/agent-browser) | Interact with a page (click, scroll, fill) then extract content | 25 actions |
20
20
 
21
21
  ## Quick Start
package/docs/guide.md CHANGED
@@ -32,7 +32,7 @@ User asks about something external / current
32
32
 
33
33
  | | `web_fetch` | `web_browse` | `web_batch_fetch` |
34
34
  |--|-------------|--------------|-------------------|
35
- | **Pages** | 1 | 1 | 2–10 |
35
+ | **Pages** | 1 | 1 | 2–15 |
36
36
  | **Browser** | Yes (scrapling) | Yes (agent-browser) | Yes (scrapling) |
37
37
  | **Interaction** | ❌ No | ✅ Click, fill, scroll, wait | ❌ No |
38
38
  | **Selector** | ✅ Per-URL | ✅ Final state | ✅ Applied to all |
package/docs/tools.md CHANGED
@@ -2,18 +2,22 @@
2
2
 
3
3
  ## `web_search`
4
4
 
5
- Search the web via SearXNG. Returns ranked results with title, URL, and snippet.
5
+ Search the web via SearXNG. Returns ranked results with title, URL, and snippet. Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.
6
6
 
7
7
  ```typescript
8
8
  {
9
9
  query: string, // Search query
10
10
  language?: string, // Language code (en, de, fr...). Default: "auto"
11
- results?: number, // Max results (1–50). Default: 10
11
+ results?: number, // Max results (1–60). Default: 20. Automatically pages through SearXNG (up to 3 pages) if needed.
12
12
  }
13
13
  ```
14
14
 
15
15
  **When to use:** The user asks about current events, facts, or anything requiring up-to-date information. This is always the **first step** of web research.
16
16
 
17
+ **Empty results behavior:** When no results are found, `web_search` returns a list of **suggestions** — alternative queries that SearXNG believes may yield better results. The agent can use these suggestions to automatically refine and retry the search.
18
+
19
+ **Pagination:** `web_search` automatically fetches up to 3 pages from SearXNG and deduplicates by URL. You do not need to call it multiple times for deeper results.
20
+
17
21
  ---
18
22
 
19
23
  ## `web_fetch`
@@ -5,7 +5,7 @@
5
5
  * command building, process spawning, JSON parsing, and session cleanup.
6
6
  */
7
7
 
8
- import { spawn } from "node:child_process";
8
+ import { runCLI } from "./cli-runner";
9
9
 
10
10
  export interface BrowseAction {
11
11
  type: "click" | "fill" | "type" | "press" | "wait" | "wait_selector" | "scroll";
@@ -26,7 +26,7 @@ export interface AgentBrowserBatchItem {
26
26
  }
27
27
 
28
28
  function requireString(action: BrowseAction, field: "selector" | "value" | "key"): string {
29
- const value = action[field];
29
+ const value = action[field] as string | undefined;
30
30
  if (typeof value !== "string" || value.length === 0) {
31
31
  throw new Error(`Action "${action.type}" requires non-empty ${field}`);
32
32
  }
@@ -34,11 +34,11 @@ function requireString(action: BrowseAction, field: "selector" | "value" | "key"
34
34
  }
35
35
 
36
36
  function requireInteger(action: BrowseAction, field: "ms" | "amount"): number {
37
- const value = action[field];
38
- if (!Number.isInteger(value) || value < 0) {
37
+ const value = action[field] as number | undefined;
38
+ if (!Number.isInteger(value) || (value as number) < 0) {
39
39
  throw new Error(`Action "${action.type}" requires non-negative integer ${field}`);
40
40
  }
41
- return value;
41
+ return value as number;
42
42
  }
43
43
 
44
44
  function waitForSelectorScript(selector: string, state: "attached" | "visible" | "hidden"): string {
@@ -128,7 +128,7 @@ export function buildBatchCommands(
128
128
  return commands;
129
129
  }
130
130
 
131
- export function runAgentBrowserBatch(
131
+ export async function runAgentBrowserBatch(
132
132
  commands: string[][],
133
133
  options: { session: string; headless: boolean; signal?: AbortSignal; timeout?: number },
134
134
  ): Promise<AgentBrowserBatchItem[]> {
@@ -136,99 +136,44 @@ export function runAgentBrowserBatch(
136
136
  if (!options.headless) args.push("--headed");
137
137
  args.push("batch", "--bail", "--json");
138
138
 
139
- return new Promise((resolve, reject) => {
140
- const proc = spawn("agent-browser", args, {
141
- shell: false,
142
- stdio: ["pipe", "pipe", "pipe"],
139
+ try {
140
+ const result = await runCLI({
141
+ command: "agent-browser",
142
+ args,
143
+ stdin: JSON.stringify(commands),
144
+ timeout: options.timeout,
145
+ signal: options.signal,
143
146
  });
144
147
 
145
- let stdout = "";
146
- let stderr = "";
147
- let timeoutId: NodeJS.Timeout | undefined;
148
- let settled = false;
149
-
150
- const cleanup = () => {
151
- if (timeoutId) clearTimeout(timeoutId);
152
- if (options.signal) options.signal.removeEventListener("abort", kill);
153
- };
154
-
155
- const settleReject = (err: Error) => {
156
- if (settled) return;
157
- settled = true;
158
- cleanup();
159
- reject(err);
160
- };
161
-
162
- const kill = () => proc.kill("SIGTERM");
163
-
164
- proc.stdout.on("data", (data: Buffer) => {
165
- stdout += data.toString();
166
- });
167
-
168
- proc.stderr.on("data", (data: Buffer) => {
169
- stderr += data.toString();
170
- });
171
-
172
- if (options.timeout) {
173
- timeoutId = setTimeout(() => {
174
- proc.kill("SIGTERM");
175
- settleReject(new Error(`agent-browser timed out after ${options.timeout}ms`));
176
- }, options.timeout);
148
+ if (result.exitCode !== 0 && !result.stdout.trim()) {
149
+ throw new Error(`agent-browser failed (exit ${result.exitCode}):\n${result.stderr || "unknown error"}`);
177
150
  }
178
151
 
179
- proc.on("close", (code) => {
180
- if (settled) return;
181
- settled = true;
182
- cleanup();
183
-
184
- if (code !== 0 && !stdout.trim()) {
185
- reject(new Error(`agent-browser failed (exit ${code}):\n${stderr || "unknown error"}`));
186
- return;
187
- }
188
-
189
- try {
190
- const results = JSON.parse(stdout) as AgentBrowserBatchItem[];
191
- resolve(results);
192
- } catch (err: any) {
193
- reject(new Error(
194
- `Failed to parse agent-browser output: ${err.message}\nstdout: ${stdout}\nstderr: ${stderr}`
195
- ));
196
- }
197
- });
198
-
199
- proc.on("error", (err: any) => {
200
- if (err.code === "ENOENT") {
201
- settleReject(new Error(
202
- "agent-browser is not installed.\n\nInstall it with:\n npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
203
- ));
204
- } else {
205
- settleReject(err);
206
- }
207
- });
208
-
209
- if (options.signal) {
210
- if (options.signal.aborted) kill();
211
- else options.signal.addEventListener("abort", kill, { once: true });
152
+ try {
153
+ return JSON.parse(result.stdout) as AgentBrowserBatchItem[];
154
+ } catch (err: any) {
155
+ throw new Error(
156
+ `Failed to parse agent-browser output: ${err.message}\nstdout: ${result.stdout}\nstderr: ${result.stderr}`
157
+ );
212
158
  }
213
-
214
- proc.stdin.write(JSON.stringify(commands));
215
- proc.stdin.end();
216
- });
159
+ } catch (err: any) {
160
+ if (err.message === "agent-browser is not installed") {
161
+ throw new Error(
162
+ "agent-browser is not installed.\n\nInstall it with:\n npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
163
+ );
164
+ }
165
+ throw err;
166
+ }
217
167
  }
218
168
 
219
- export function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
220
- return new Promise((resolve) => {
221
- const proc = spawn("agent-browser", ["--session", session, "close"], {
222
- shell: false,
223
- stdio: ["ignore", "ignore", "ignore"],
169
+ export async function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
170
+ try {
171
+ await runCLI({
172
+ command: "agent-browser",
173
+ args: ["--session", session, "close"],
174
+ signal,
224
175
  });
225
- const done = () => resolve();
226
- proc.on("close", done);
227
- proc.on("error", done);
228
- if (signal) {
229
- const kill = () => proc.kill("SIGTERM");
230
- if (signal.aborted) kill();
231
- else signal.addEventListener("abort", kill, { once: true });
232
- }
233
- });
176
+ } catch {
177
+ // Best-effort cleanup — ignore errors
178
+ }
234
179
  }
@@ -0,0 +1,108 @@
1
+ /**
2
+ * CLI runner — abstracted process spawning
3
+ *
4
+ * Provides a single interface for running external CLI commands
5
+ * with consistent signal handling, timeout support, and stdout/stderr
6
+ * collection. Enables testability by allowing the runner to be swapped.
7
+ */
8
+
9
+ import { spawn, type ChildProcess } from "node:child_process";
10
+
11
+ export interface CLIRunOptions {
12
+ command: string;
13
+ args: string[];
14
+ /** Data to write to stdin. If omitted, stdin is ignored. */
15
+ stdin?: string;
16
+ /** Timeout in milliseconds. If exceeded, the process is killed. */
17
+ timeout?: number;
18
+ /** AbortSignal for cancellation. */
19
+ signal?: AbortSignal;
20
+ }
21
+
22
+ export interface CLIRunResult {
23
+ stdout: string;
24
+ stderr: string;
25
+ exitCode: number;
26
+ }
27
+
28
+ /**
29
+ * Run an external CLI command and capture its output.
30
+ *
31
+ * Handles:
32
+ * - stdout/stderr collection
33
+ * - optional stdin feeding
34
+ * - optional timeout (SIGTERM)
35
+ * - AbortSignal cancellation (SIGTERM)
36
+ * - process spawn errors (e.g. ENOENT)
37
+ */
38
+ export function runCLI(options: CLIRunOptions): Promise<CLIRunResult> {
39
+ return new Promise((resolve, reject) => {
40
+ const stdio = options.stdin
41
+ ? ["pipe", "pipe", "pipe"]
42
+ : ["ignore", "pipe", "pipe"];
43
+
44
+ const proc = spawn(options.command, options.args, {
45
+ shell: false,
46
+ stdio: stdio as any,
47
+ }) as ChildProcess;
48
+
49
+ let stdout = "";
50
+ let stderr = "";
51
+ let timeoutId: NodeJS.Timeout | undefined;
52
+ let settled = false;
53
+
54
+ const cleanup = () => {
55
+ if (timeoutId) clearTimeout(timeoutId);
56
+ if (options.signal) options.signal.removeEventListener("abort", kill);
57
+ };
58
+
59
+ const settleReject = (err: Error) => {
60
+ if (settled) return;
61
+ settled = true;
62
+ cleanup();
63
+ reject(err);
64
+ };
65
+
66
+ const kill = () => proc.kill("SIGTERM");
67
+
68
+ proc.stdout?.on("data", (data: Buffer) => {
69
+ stdout += data.toString();
70
+ });
71
+
72
+ proc.stderr?.on("data", (data: Buffer) => {
73
+ stderr += data.toString();
74
+ });
75
+
76
+ if (options.timeout) {
77
+ timeoutId = setTimeout(() => {
78
+ proc.kill("SIGTERM");
79
+ settleReject(new Error(`${options.command} timed out after ${options.timeout}ms`));
80
+ }, options.timeout);
81
+ }
82
+
83
+ proc.on("close", (code) => {
84
+ if (settled) return;
85
+ settled = true;
86
+ cleanup();
87
+ resolve({ stdout, stderr, exitCode: code ?? 1 });
88
+ });
89
+
90
+ proc.on("error", (err: any) => {
91
+ if (err.code === "ENOENT") {
92
+ settleReject(new Error(`${options.command} is not installed`));
93
+ } else {
94
+ settleReject(err);
95
+ }
96
+ });
97
+
98
+ if (options.signal) {
99
+ if (options.signal.aborted) kill();
100
+ else options.signal.addEventListener("abort", kill, { once: true });
101
+ }
102
+
103
+ if (options.stdin && proc.stdin) {
104
+ proc.stdin.write(options.stdin);
105
+ proc.stdin.end();
106
+ }
107
+ });
108
+ }