pi-web-toolkit 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,6 @@
13
13
  import {
14
14
  defineTool,
15
15
  type ExtensionAPI,
16
- truncateHead,
17
16
  formatSize,
18
17
  DEFAULT_MAX_BYTES,
19
18
  DEFAULT_MAX_LINES,
@@ -24,6 +23,9 @@ import * as fs from "node:fs";
24
23
  import * as os from "node:os";
25
24
  import * as path from "node:path";
26
25
  import { runScraplingWithFallback } from "./utils/scrapling";
26
+ import { extractPreview } from "./utils/content-preview";
27
+ import { writeWithFallback } from "./utils/output-sink";
28
+ import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace, formatExtraction } from "./utils/render-helpers";
27
29
 
28
30
  export const WebFetchParamsSchema = Type.Object({
29
31
  url: Type.String({ description: "Full URL to fetch (e.g. https://example.com/article)" }),
@@ -45,9 +47,11 @@ const webFetchTool = defineTool({
45
47
  ].join(" "),
46
48
  promptSnippet: "Fetch full page content from a URL as markdown",
47
49
  promptGuidelines: [
48
- "Use web_fetch after web_search to read full articles, docs, or pages found in search results.",
49
- "Always pass the full URL including https://.",
50
+ "Use web_fetch to read a single static page (article, doc, or blog) when given a specific URL.",
51
+ "For a single URL, always use web_fetch instead of web_batch_fetch.",
50
52
  "If the page is dynamic/JavaScript-heavy, the tool automatically uses browser automation.",
53
+ "When reading multiple (2–5) pages at once (e.g., after web_search), prefer web_batch_fetch over repeated web_fetch calls.",
54
+ "Always pass the full URL including https://.",
51
55
  ],
52
56
  parameters: WebFetchParamsSchema,
53
57
 
@@ -71,23 +75,23 @@ const webFetchTool = defineTool({
71
75
  const content = await fs.promises.readFile(tmpFile, "utf-8");
72
76
  const stats = await fs.promises.stat(tmpFile);
73
77
 
78
+ const preview = extractPreview(content, 500);
74
79
  const rawText = `Fetched: ${params.url}\nSize: ${stats.size} bytes\n\n---\n\n${content}`;
75
- const truncation = truncateHead(rawText, {
76
- maxLines: DEFAULT_MAX_LINES,
77
- maxBytes: DEFAULT_MAX_BYTES,
80
+ const sink = await writeWithFallback(rawText, {
81
+ tmpPrefix: "pi-web-fetch-full-",
78
82
  });
79
-
80
- let finalText = truncation.content;
81
- if (truncation.truncated) {
82
- const tmpFullDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-web-fetch-full-"));
83
- tmpFull = path.join(tmpFullDir, "output.txt");
84
- await fs.promises.writeFile(tmpFull, rawText, "utf-8");
85
- finalText += `\n\n[Output truncated: ${truncation.outputLines} of ${truncation.totalLines} lines (${formatSize(truncation.outputBytes)} of ${formatSize(truncation.totalBytes)}). Full output saved to: ${tmpFull}]`;
86
- }
83
+ tmpFull = sink.fullOutputPath;
87
84
 
88
85
  return {
89
- content: [{ type: "text", text: finalText }],
90
- details: { url: params.url, bytes: stats.size, fullOutputPath: tmpFull },
86
+ content: [{ type: "text", text: sink.text }],
87
+ details: {
88
+ url: params.url,
89
+ bytes: stats.size,
90
+ fullOutputPath: tmpFull,
91
+ preview,
92
+ selector: params.selector,
93
+ stealthy: params.stealthy,
94
+ },
91
95
  };
92
96
  } catch (err: any) {
93
97
  throw new Error(`Error fetching ${params.url}: ${err.message ?? err}`);
@@ -99,27 +103,73 @@ const webFetchTool = defineTool({
99
103
  renderCall(args, theme) {
100
104
  let text = theme.fg("toolTitle", theme.bold("web_fetch "));
101
105
  text += theme.fg("muted", args.url);
106
+ if (args.stealthy) {
107
+ text += theme.fg("dim", " [stealthy]");
108
+ }
102
109
  if (args.selector) {
103
- text += theme.fg("dim", ` selector=${args.selector}`);
110
+ text += theme.fg("dim", ` [selector=${args.selector}]`);
104
111
  }
105
112
  return new Text(text, 0, 0);
106
113
  },
107
114
 
108
- renderResult(result, { expanded, isPartial }, theme) {
115
+ renderResult(result, { expanded, isPartial }, theme, context) {
116
+ const isError = context?.isError ?? false;
117
+
109
118
  if (isPartial) {
110
- return new Text(theme.fg("warning", "Fetching..."), 0, 0);
119
+ const url = (result.details as any)?.url as string | undefined;
120
+ const domain = url ? getDomain(url) : "";
121
+ const label = domain ? `Fetching ${domain}...` : "Fetching...";
122
+ return new Text(theme.fg("warning", label), 0, 0);
123
+ }
124
+ const details = result.details as {
125
+ url?: string;
126
+ bytes?: number;
127
+ fullOutputPath?: string;
128
+ preview?: string;
129
+ selector?: string;
130
+ stealthy?: boolean;
131
+ } | undefined;
132
+
133
+ if (isError) {
134
+ const errText = getErrorText(result);
135
+ let text = theme.fg("error", "✗ Fetch failed");
136
+ if (details?.url) text += ` ${theme.fg("dim", abbreviateUrl(details.url))}`;
137
+ text += `\n\n ${theme.fg("toolOutput", errText)}`;
138
+ return new Text(text, 0, 0);
111
139
  }
112
- const details = result.details as { url?: string; bytes?: number; fullOutputPath?: string } | undefined;
140
+
113
141
  let text = theme.fg("success", "✓ Fetched");
114
- if (details?.bytes) {
115
- text += theme.fg("muted", ` (${formatSize(details.bytes)})`);
142
+ if (details?.url) {
143
+ text += ` ${theme.fg("dim", abbreviateUrl(details.url))}`;
116
144
  }
145
+ if (details?.bytes && details?.preview) {
146
+ text += ` ${theme.fg("muted", formatExtraction(details.bytes, details.preview.length))}`;
147
+ }
148
+
149
+ if (details?.selector) {
150
+ text += `\n ${theme.fg("dim", `[selector=${details.selector}]`)}`;
151
+ }
152
+ if (details?.stealthy) {
153
+ text += `${details?.selector ? "" : "\n "}${theme.fg("dim", "[stealthy]")}`;
154
+ }
155
+
156
+ if (!expanded && details?.preview) {
157
+ const snippet = normalizeWhitespace(details.preview);
158
+ const short = snippet.length > 160
159
+ ? snippet.slice(0, 160).replace(/\s+\S*$/, "") + "..."
160
+ : snippet;
161
+ text += `\n\n ${theme.fg("muted", short)}`;
162
+ }
163
+
117
164
  if (expanded) {
118
- text += `\n${theme.fg("dim", details?.url ?? "")}`;
165
+ if (details?.preview) {
166
+ text += `\n\n ${theme.fg("muted", normalizeWhitespace(details.preview))}`;
167
+ }
119
168
  if (details?.fullOutputPath) {
120
- text += `\n${theme.fg("dim", `Full output: ${details.fullOutputPath}`)}`;
169
+ text += `\n\n${theme.fg("accent", `Full output: ${details.fullOutputPath}`)}`;
121
170
  }
122
171
  }
172
+
123
173
  return new Text(text, 0, 0);
124
174
  },
125
175
  });
@@ -13,16 +13,14 @@
13
13
  import {
14
14
  defineTool,
15
15
  type ExtensionAPI,
16
- truncateHead,
17
16
  formatSize,
18
17
  DEFAULT_MAX_BYTES,
19
18
  DEFAULT_MAX_LINES,
20
19
  } from "@earendil-works/pi-coding-agent";
21
20
  import { Text } from "@earendil-works/pi-tui";
22
21
  import { Type, type Static } from "typebox";
23
- import { mkdtemp, writeFile } from "node:fs/promises";
24
- import * as os from "node:os";
25
- import * as path from "node:path";
22
+ import { writeWithFallback } from "./utils/output-sink";
23
+ import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
26
24
 
27
25
 
28
26
 
@@ -42,8 +40,8 @@ interface SearxResponse {
42
40
 
43
41
  export const WebSearchParamsSchema = Type.Object({
44
42
  query: Type.String({ description: "Search query" }),
45
- language: Type.Optional(Type.String({ description: "Language code (e.g. en, en-US, de). Default: auto", default: "auto" })),
46
- results: Type.Optional(Type.Integer({ description: "Max number of results to return (1-50). Default: 10", minimum: 1, maximum: 50, default: 10 })),
43
+ language: Type.Optional(Type.String({ description: "Language code (e.g. en, en-US, de). Omit to use SearXNG default.", default: "" })),
44
+ results: Type.Optional(Type.Integer({ description: "Max number of results to return (1-60). Default: 20 (one page). Automatically pages through SearXNG (up to 3 pages) if needed.", minimum: 1, maximum: 60, default: 20 })),
47
45
  });
48
46
 
49
47
  export type WebSearchInput = Static<typeof WebSearchParamsSchema>;
@@ -54,6 +52,7 @@ const webSearchTool = defineTool({
54
52
  description: [
55
53
  "Search the web using a SearXNG instance.",
56
54
  "Returns a list of results with title, URL, and snippet.",
55
+ "Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.",
57
56
  "Use web_search when the user asks about current events, facts, or anything",
58
57
  "that requires up-to-date information beyond the model's training data.",
59
58
  `Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
@@ -62,54 +61,84 @@ const webSearchTool = defineTool({
62
61
  promptGuidelines: [
63
62
  "Use web_search when the user asks about recent events, current data, or external facts.",
64
63
  "Use web_search to verify claims, find documentation, or discover resources online.",
64
+ "If web_search returns no results but includes suggestions, consider using a suggested query to refine your search.",
65
+ "If web_search returns multiple (2–5) relevant results that all need to be read, prefer web_batch_fetch to fetch them in parallel instead of calling web_fetch repeatedly.",
65
66
  ],
66
67
  parameters: WebSearchParamsSchema,
67
68
 
68
69
  async execute(_toolCallId, params, signal) {
69
70
  const searxngUrl = (process.env.SEARXNG_URL || "http://localhost:8080").replace(/\/$/, "");
70
- const maxResults = Math.floor(Math.min(50, Math.max(1, params.results ?? 10)));
71
- const searchParams = new URLSearchParams({
72
- q: params.query,
73
- format: "json",
74
- language: params.language ?? "auto",
75
- });
76
-
77
- const url = `${searxngUrl}/search?${searchParams.toString()}`;
71
+ const maxResults = Math.floor(Math.min(60, Math.max(1, params.results ?? 20)));
72
+ const language = params.language ?? "";
78
73
 
74
+ const allResults: SearxResult[] = [];
75
+ const seenUrls = new Set<string>();
76
+ let suggestions: string[] | undefined;
77
+ let finalQuery = params.query;
79
78
  let fullOutputPath: string | undefined;
79
+ const MAX_PAGES = 3;
80
80
 
81
81
  try {
82
- const response = await fetch(url, {
83
- method: "GET",
84
- headers: { Accept: "application/json" },
85
- signal,
86
- });
82
+ for (let page = 1; page <= MAX_PAGES; page++) {
83
+ const searchParams = new URLSearchParams({
84
+ q: params.query,
85
+ format: "json",
86
+ pageno: String(page),
87
+ });
88
+ if (language) searchParams.set("language", language);
87
89
 
88
- if (!response.ok) {
89
- const body = await response.text().catch(() => "");
90
- throw new Error(`SearXNG error: ${response.status} ${response.statusText}\n${body}`);
91
- }
90
+ const response = await fetch(`${searxngUrl}/search?${searchParams.toString()}`, {
91
+ method: "GET",
92
+ headers: { Accept: "application/json" },
93
+ signal,
94
+ });
95
+
96
+ if (!response.ok) {
97
+ const body = await response.text().catch(() => "");
98
+ throw new Error(`SearXNG error: ${response.status} ${response.statusText}\n${body}`);
99
+ }
100
+
101
+ const data = (await response.json()) as SearxResponse;
102
+ finalQuery = data.query;
103
+
104
+ if (data.suggestions && data.suggestions.length > 0 && !suggestions) {
105
+ suggestions = data.suggestions;
106
+ }
107
+
108
+ if (!data.results || data.results.length === 0) {
109
+ break;
110
+ }
111
+
112
+ for (const r of data.results) {
113
+ if (!seenUrls.has(r.url)) {
114
+ seenUrls.add(r.url);
115
+ allResults.push(r);
116
+ }
117
+ }
92
118
 
93
- const data = (await response.json()) as SearxResponse;
119
+ if (allResults.length >= maxResults) {
120
+ break;
121
+ }
122
+ }
94
123
 
95
- if (!data.results || data.results.length === 0) {
96
- let text = `No results found for "${data.query}".`;
97
- if (data.suggestions && data.suggestions.length > 0) {
98
- text += `\n\nSuggestions:\n${data.suggestions.map((s) => `- ${s}`).join("\n")}`;
124
+ if (allResults.length === 0) {
125
+ let text = `No results found for "${finalQuery}".`;
126
+ if (suggestions && suggestions.length > 0) {
127
+ text += `\n\nSuggestions:\n${suggestions.map((s) => `- ${s}`).join("\n")}`;
99
128
  }
100
129
  return {
101
130
  content: [{ type: "text", text }],
102
- details: { query: data.query, totalResults: 0, results: [], fullOutputPath: undefined },
131
+ details: { query: finalQuery, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined },
103
132
  };
104
133
  }
105
134
 
106
135
  const lines: string[] = [
107
- `Results for "${data.query}":`,
136
+ `Results for "${finalQuery}":`,
108
137
  "",
109
138
  ];
110
139
 
111
- for (let i = 0; i < Math.min(maxResults, data.results.length); i++) {
112
- const r = data.results[i];
140
+ for (let i = 0; i < Math.min(maxResults, allResults.length); i++) {
141
+ const r = allResults[i];
113
142
  lines.push(`${i + 1}. ${r.title}`);
114
143
  lines.push(` URL: ${r.url}`);
115
144
  if (r.content) {
@@ -123,22 +152,15 @@ const webSearchTool = defineTool({
123
152
  }
124
153
 
125
154
  const rawText = lines.join("\n");
126
- const truncation = truncateHead(rawText, {
127
- maxLines: DEFAULT_MAX_LINES,
128
- maxBytes: DEFAULT_MAX_BYTES,
155
+ const sink = await writeWithFallback(rawText, {
156
+ tmpPrefix: "pi-web-search-",
157
+ alwaysWriteFile: true,
129
158
  });
130
-
131
- let finalText = truncation.content;
132
- if (truncation.truncated) {
133
- const tmpDir = await mkdtemp(path.join(os.tmpdir(), "pi-web-search-"));
134
- fullOutputPath = path.join(tmpDir, "output.txt");
135
- await writeFile(fullOutputPath, rawText, "utf-8");
136
- finalText += `\n\n[Output truncated: ${truncation.outputLines} of ${truncation.totalLines} lines (${formatSize(truncation.outputBytes)} of ${formatSize(truncation.totalBytes)}). Full output saved to: ${fullOutputPath}]`;
137
- }
159
+ fullOutputPath = sink.fullOutputPath;
138
160
 
139
161
  return {
140
- content: [{ type: "text", text: finalText }],
141
- details: { query: data.query, totalResults: data.results.length, results: data.results.slice(0, maxResults), fullOutputPath },
162
+ content: [{ type: "text", text: sink.text }],
163
+ details: { query: finalQuery, totalResults: allResults.length, results: allResults.slice(0, maxResults), fullOutputPath },
142
164
  };
143
165
  } catch (err: any) {
144
166
  throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${err.message ?? err}`);
@@ -154,23 +176,82 @@ const webSearchTool = defineTool({
154
176
  return new Text(text, 0, 0);
155
177
  },
156
178
 
157
- renderResult(result, { expanded, isPartial }, theme) {
179
+ renderResult(result, { expanded, isPartial }, theme, context) {
180
+ const isError = context?.isError ?? false;
181
+
158
182
  if (isPartial) {
159
- return new Text(theme.fg("warning", "Searching..."), 0, 0);
183
+ const query = (result.details as any)?.query as string | undefined;
184
+ const label = query ? `Searching "${query}"...` : "Searching...";
185
+ return new Text(theme.fg("warning", label), 0, 0);
186
+ }
187
+
188
+ const details = result.details as {
189
+ query?: string;
190
+ totalResults?: number;
191
+ results?: Array<{ title?: string; url?: string; score?: number; engine?: string; content?: string }>;
192
+ fullOutputPath?: string;
193
+ } | undefined;
194
+
195
+ if (isError) {
196
+ const errText = getErrorText(result);
197
+ const query = details?.query;
198
+ let text = theme.fg("error", "✗ Search failed");
199
+ if (query) text += ` ${theme.fg("dim", query)}`;
200
+ text += `\n\n ${theme.fg("toolOutput", errText)}`;
201
+ return new Text(text, 0, 0);
202
+ }
203
+
204
+ if (!details) {
205
+ return new Text(theme.fg("error", "No result details"), 0, 0);
206
+ }
207
+ const showing = details.results?.length ?? 0;
208
+ const total = details?.totalResults ?? 0;
209
+ let text = theme.fg("success", `✓ ${showing} unique results`);
210
+ if (total > showing) {
211
+ text += theme.fg("dim", ` (${total} total)`);
160
212
  }
161
- const details = result.details as { query?: string; totalResults?: number; results?: Array<{ title?: string; url?: string }>; fullOutputPath?: string } | undefined;
162
- let text = theme.fg("success", `✓ ${details?.totalResults ?? 0} results`);
163
- if (details?.query) {
164
- text += theme.fg("muted", ` for ${details.query}`);
213
+
214
+ if (!expanded && showing > 0) {
215
+ // Default: top 3 compact — [i] Title + domain + snippet
216
+ const top3 = (details.results ?? []).slice(0, 3);
217
+ for (let i = 0; i < top3.length; i++) {
218
+ const r = top3[i];
219
+ const domain = r.url ? theme.fg("dim", ` ${getDomain(r.url)}`) : "";
220
+ text += `\n [${i + 1}] ${theme.fg("toolTitle", r.title ?? "(untitled)")}${domain}`;
221
+ if (r.content) {
222
+ const snippet = normalizeWhitespace(r.content);
223
+ const short = snippet.length > 90 ? snippet.slice(0, 90).replace(/\s+\S*$/, "") + "..." : snippet;
224
+ text += `\n ${theme.fg("muted", short)}`;
225
+ }
226
+ }
227
+ if (showing > 3) {
228
+ text += `\n ${theme.fg("muted", `... and ${showing - 3} more (Ctrl+O for full list)`)}`;
229
+ }
165
230
  }
231
+
166
232
  if (expanded && details?.results?.length) {
167
- for (const r of details.results.slice(0, 10)) {
168
- text += `\n ${theme.fg("dim", `${r.title ?? "(untitled)"} — ${r.url ?? ""}`)}`;
233
+ // Expanded (Ctrl+O): top 10 cards — [i] Title|engine|score, URL, snippet
234
+ const top10 = (details.results ?? []).slice(0, 10);
235
+ for (let i = 0; i < top10.length; i++) {
236
+ const r = top10[i];
237
+ const scoreStr = r.score !== undefined ? r.score.toFixed(2) : "—";
238
+ const metaStr = r.engine ? ` | ${r.engine} | ${scoreStr}` : ` | ${scoreStr}`;
239
+ text += `\n [${i + 1}] ${theme.fg("toolTitle", r.title ?? "(untitled)")}${theme.fg("dim", metaStr)}`;
240
+ text += `\n ${theme.fg("dim", abbreviateUrl(r.url ?? ""))}`;
241
+ if (r.content) {
242
+ text += `\n ${theme.fg("muted", normalizeWhitespace(r.content))}`;
243
+ }
244
+ text += "\n";
245
+ }
246
+ if (details.results.length > 10) {
247
+ text += `\n ${theme.fg("muted", `... and ${details.results.length - 10} more results (see full output file)`)}`;
169
248
  }
170
249
  }
250
+
171
251
  if (expanded && details?.fullOutputPath) {
172
- text += `\n${theme.fg("dim", `Full output: ${details.fullOutputPath}`)}`;
252
+ text += `\n${theme.fg("accent", `Full output: ${details.fullOutputPath}`)}`;
173
253
  }
254
+
174
255
  return new Text(text, 0, 0);
175
256
  },
176
257
  });
@@ -178,3 +259,5 @@ const webSearchTool = defineTool({
178
259
  export default function (pi: ExtensionAPI) {
179
260
  pi.registerTool(webSearchTool);
180
261
  }
262
+
263
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-web-toolkit",
3
- "version": "0.1.2",
3
+ "version": "0.2.1",
4
4
  "description": "Web research toolkit for the pi coding agent. Search via SearXNG, fetch static pages with scrapling, browse interactively via agent-browser, and batch-read sources in parallel.",
5
5
  "author": "Wade Huang <fastwade11@gmail.com>",
6
6
  "license": "MIT",
@@ -17,6 +17,15 @@
17
17
  "engines": {
18
18
  "node": ">=22.0.0"
19
19
  },
20
+ "scripts": {
21
+ "typecheck": "tsc --noEmit",
22
+ "test": "npx tsx test/content-preview/test.ts && npx tsx test/agent-browser/test.ts",
23
+ "test:agent-browser": "npx tsx test/agent-browser/test.ts",
24
+ "test:approve": "npx tsx test/content-preview/test.ts --approve"
25
+ },
26
+ "devDependencies": {
27
+ "typescript": "^5.7.0"
28
+ },
20
29
  "peerDependencies": {
21
30
  "@earendil-works/pi-ai": "*",
22
31
  "@earendil-works/pi-coding-agent": "*",