pi-web-toolkit 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -43
- package/docs/agents/domain.md +51 -0
- package/docs/agents/issue-tracker.md +22 -0
- package/docs/agents/triage-labels.md +15 -0
- package/docs/guide.md +1 -1
- package/docs/tools.md +6 -2
- package/extensions/utils/agent-browser.ts +179 -0
- package/extensions/utils/cli-runner.ts +108 -0
- package/extensions/utils/content-preview.ts +493 -0
- package/extensions/utils/output-sink.ts +67 -0
- package/extensions/utils/render-helpers.ts +77 -0
- package/extensions/utils/scrapling.ts +39 -24
- package/extensions/utils/tool-factory.ts +79 -0
- package/extensions/web_batch_fetch.ts +155 -47
- package/extensions/web_browse.ts +158 -256
- package/extensions/web_fetch.ts +83 -42
- package/extensions/web_search.ts +140 -56
- package/package.json +9 -1
package/extensions/web_search.ts
CHANGED
|
@@ -13,18 +13,16 @@
|
|
|
13
13
|
import {
|
|
14
14
|
defineTool,
|
|
15
15
|
type ExtensionAPI,
|
|
16
|
-
truncateHead,
|
|
17
16
|
formatSize,
|
|
18
17
|
DEFAULT_MAX_BYTES,
|
|
19
18
|
DEFAULT_MAX_LINES,
|
|
20
19
|
} from "@earendil-works/pi-coding-agent";
|
|
21
20
|
import { Text } from "@earendil-works/pi-tui";
|
|
22
21
|
import { Type, type Static } from "typebox";
|
|
23
|
-
import {
|
|
24
|
-
import
|
|
25
|
-
|
|
22
|
+
import { writeWithFallback } from "./utils/output-sink";
|
|
23
|
+
import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
|
|
24
|
+
|
|
26
25
|
|
|
27
|
-
const SEARXNG_URL = (process.env.SEARXNG_URL || "http://localhost:8080").replace(/\/$/, "");
|
|
28
26
|
|
|
29
27
|
interface SearxResult {
|
|
30
28
|
title: string;
|
|
@@ -42,8 +40,8 @@ interface SearxResponse {
|
|
|
42
40
|
|
|
43
41
|
export const WebSearchParamsSchema = Type.Object({
|
|
44
42
|
query: Type.String({ description: "Search query" }),
|
|
45
|
-
language: Type.Optional(Type.String({ description: "Language code (e.g. en, en-US, de).
|
|
46
|
-
results: Type.Optional(Type.Integer({ description: "Max number of results to return (1-
|
|
43
|
+
language: Type.Optional(Type.String({ description: "Language code (e.g. en, en-US, de). Omit to use SearXNG default.", default: "" })),
|
|
44
|
+
results: Type.Optional(Type.Integer({ description: "Max number of results to return (1-60). Default: 20 (one page). Automatically pages through SearXNG (up to 3 pages) if needed.", minimum: 1, maximum: 60, default: 20 })),
|
|
47
45
|
});
|
|
48
46
|
|
|
49
47
|
export type WebSearchInput = Static<typeof WebSearchParamsSchema>;
|
|
@@ -54,6 +52,7 @@ const webSearchTool = defineTool({
|
|
|
54
52
|
description: [
|
|
55
53
|
"Search the web using a SearXNG instance.",
|
|
56
54
|
"Returns a list of results with title, URL, and snippet.",
|
|
55
|
+
"Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.",
|
|
57
56
|
"Use web_search when the user asks about current events, facts, or anything",
|
|
58
57
|
"that requires up-to-date information beyond the model's training data.",
|
|
59
58
|
`Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
|
|
@@ -62,53 +61,84 @@ const webSearchTool = defineTool({
|
|
|
62
61
|
promptGuidelines: [
|
|
63
62
|
"Use web_search when the user asks about recent events, current data, or external facts.",
|
|
64
63
|
"Use web_search to verify claims, find documentation, or discover resources online.",
|
|
64
|
+
"If web_search returns no results but includes suggestions, consider using a suggested query to refine your search.",
|
|
65
|
+
"If web_search returns multiple (2–5) relevant results that all need to be read, prefer web_batch_fetch to fetch them in parallel instead of calling web_fetch repeatedly.",
|
|
65
66
|
],
|
|
66
67
|
parameters: WebSearchParamsSchema,
|
|
67
68
|
|
|
68
69
|
async execute(_toolCallId, params, signal) {
|
|
69
|
-
const
|
|
70
|
-
const
|
|
71
|
-
|
|
72
|
-
format: "json",
|
|
73
|
-
language: params.language ?? "auto",
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
const url = `${SEARXNG_URL}/search?${searchParams.toString()}`;
|
|
70
|
+
const searxngUrl = (process.env.SEARXNG_URL || "http://localhost:8080").replace(/\/$/, "");
|
|
71
|
+
const maxResults = Math.floor(Math.min(60, Math.max(1, params.results ?? 20)));
|
|
72
|
+
const language = params.language ?? "";
|
|
77
73
|
|
|
74
|
+
const allResults: SearxResult[] = [];
|
|
75
|
+
const seenUrls = new Set<string>();
|
|
76
|
+
let suggestions: string[] | undefined;
|
|
77
|
+
let finalQuery = params.query;
|
|
78
78
|
let fullOutputPath: string | undefined;
|
|
79
|
+
const MAX_PAGES = 3;
|
|
79
80
|
|
|
80
81
|
try {
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
82
|
+
for (let page = 1; page <= MAX_PAGES; page++) {
|
|
83
|
+
const searchParams = new URLSearchParams({
|
|
84
|
+
q: params.query,
|
|
85
|
+
format: "json",
|
|
86
|
+
pageno: String(page),
|
|
87
|
+
});
|
|
88
|
+
if (language) searchParams.set("language", language);
|
|
86
89
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
90
|
+
const response = await fetch(`${searxngUrl}/search?${searchParams.toString()}`, {
|
|
91
|
+
method: "GET",
|
|
92
|
+
headers: { Accept: "application/json" },
|
|
93
|
+
signal,
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
if (!response.ok) {
|
|
97
|
+
const body = await response.text().catch(() => "");
|
|
98
|
+
throw new Error(`SearXNG error: ${response.status} ${response.statusText}\n${body}`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const data = (await response.json()) as SearxResponse;
|
|
102
|
+
finalQuery = data.query;
|
|
91
103
|
|
|
92
|
-
|
|
104
|
+
if (data.suggestions && data.suggestions.length > 0 && !suggestions) {
|
|
105
|
+
suggestions = data.suggestions;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (!data.results || data.results.length === 0) {
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
93
111
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
112
|
+
for (const r of data.results) {
|
|
113
|
+
if (!seenUrls.has(r.url)) {
|
|
114
|
+
seenUrls.add(r.url);
|
|
115
|
+
allResults.push(r);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (allResults.length >= maxResults) {
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (allResults.length === 0) {
|
|
125
|
+
let text = `No results found for "${finalQuery}".`;
|
|
126
|
+
if (suggestions && suggestions.length > 0) {
|
|
127
|
+
text += `\n\nSuggestions:\n${suggestions.map((s) => `- ${s}`).join("\n")}`;
|
|
98
128
|
}
|
|
99
129
|
return {
|
|
100
130
|
content: [{ type: "text", text }],
|
|
101
|
-
details: { query:
|
|
131
|
+
details: { query: finalQuery, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined },
|
|
102
132
|
};
|
|
103
133
|
}
|
|
104
134
|
|
|
105
135
|
const lines: string[] = [
|
|
106
|
-
`Results for "${
|
|
136
|
+
`Results for "${finalQuery}":`,
|
|
107
137
|
"",
|
|
108
138
|
];
|
|
109
139
|
|
|
110
|
-
for (let i = 0; i < Math.min(maxResults,
|
|
111
|
-
const r =
|
|
140
|
+
for (let i = 0; i < Math.min(maxResults, allResults.length); i++) {
|
|
141
|
+
const r = allResults[i];
|
|
112
142
|
lines.push(`${i + 1}. ${r.title}`);
|
|
113
143
|
lines.push(` URL: ${r.url}`);
|
|
114
144
|
if (r.content) {
|
|
@@ -122,25 +152,18 @@ const webSearchTool = defineTool({
|
|
|
122
152
|
}
|
|
123
153
|
|
|
124
154
|
const rawText = lines.join("\n");
|
|
125
|
-
const
|
|
126
|
-
|
|
127
|
-
|
|
155
|
+
const sink = await writeWithFallback(rawText, {
|
|
156
|
+
tmpPrefix: "pi-web-search-",
|
|
157
|
+
alwaysWriteFile: true,
|
|
128
158
|
});
|
|
129
|
-
|
|
130
|
-
let finalText = truncation.content;
|
|
131
|
-
if (truncation.truncated) {
|
|
132
|
-
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "pi-web-search-"));
|
|
133
|
-
fullOutputPath = path.join(tmpDir, "output.txt");
|
|
134
|
-
await writeFile(fullOutputPath, rawText, "utf-8");
|
|
135
|
-
finalText += `\n\n[Output truncated: ${truncation.outputLines} of ${truncation.totalLines} lines (${formatSize(truncation.outputBytes)} of ${formatSize(truncation.totalBytes)}). Full output saved to: ${fullOutputPath}]`;
|
|
136
|
-
}
|
|
159
|
+
fullOutputPath = sink.fullOutputPath;
|
|
137
160
|
|
|
138
161
|
return {
|
|
139
|
-
content: [{ type: "text", text:
|
|
140
|
-
details: { query:
|
|
162
|
+
content: [{ type: "text", text: sink.text }],
|
|
163
|
+
details: { query: finalQuery, totalResults: allResults.length, results: allResults.slice(0, maxResults), fullOutputPath },
|
|
141
164
|
};
|
|
142
165
|
} catch (err: any) {
|
|
143
|
-
throw new Error(`Failed to query SearXNG at ${
|
|
166
|
+
throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${err.message ?? err}`);
|
|
144
167
|
}
|
|
145
168
|
},
|
|
146
169
|
|
|
@@ -153,23 +176,82 @@ const webSearchTool = defineTool({
|
|
|
153
176
|
return new Text(text, 0, 0);
|
|
154
177
|
},
|
|
155
178
|
|
|
156
|
-
renderResult(result, { expanded, isPartial }, theme) {
|
|
179
|
+
renderResult(result, { expanded, isPartial }, theme, context) {
|
|
180
|
+
const isError = context?.isError ?? false;
|
|
181
|
+
|
|
157
182
|
if (isPartial) {
|
|
158
|
-
|
|
183
|
+
const query = (result.details as any)?.query as string | undefined;
|
|
184
|
+
const label = query ? `Searching "${query}"...` : "Searching...";
|
|
185
|
+
return new Text(theme.fg("warning", label), 0, 0);
|
|
159
186
|
}
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
187
|
+
|
|
188
|
+
const details = result.details as {
|
|
189
|
+
query?: string;
|
|
190
|
+
totalResults?: number;
|
|
191
|
+
results?: Array<{ title?: string; url?: string; score?: number; engine?: string; content?: string }>;
|
|
192
|
+
fullOutputPath?: string;
|
|
193
|
+
} | undefined;
|
|
194
|
+
|
|
195
|
+
if (isError) {
|
|
196
|
+
const errText = getErrorText(result);
|
|
197
|
+
const query = details?.query;
|
|
198
|
+
let text = theme.fg("error", "✗ Search failed");
|
|
199
|
+
if (query) text += ` ${theme.fg("dim", query)}`;
|
|
200
|
+
text += `\n\n ${theme.fg("toolOutput", errText)}`;
|
|
201
|
+
return new Text(text, 0, 0);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (!details) {
|
|
205
|
+
return new Text(theme.fg("error", "No result details"), 0, 0);
|
|
206
|
+
}
|
|
207
|
+
const showing = details.results?.length ?? 0;
|
|
208
|
+
const total = details?.totalResults ?? 0;
|
|
209
|
+
let text = theme.fg("success", `✓ ${showing} unique results`);
|
|
210
|
+
if (total > showing) {
|
|
211
|
+
text += theme.fg("dim", ` (${total} total)`);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (!expanded && showing > 0) {
|
|
215
|
+
// Default: top 3 compact — [i] Title + domain + snippet
|
|
216
|
+
const top3 = (details.results ?? []).slice(0, 3);
|
|
217
|
+
for (let i = 0; i < top3.length; i++) {
|
|
218
|
+
const r = top3[i];
|
|
219
|
+
const domain = r.url ? theme.fg("dim", ` ${getDomain(r.url)}`) : "";
|
|
220
|
+
text += `\n [${i + 1}] ${theme.fg("toolTitle", r.title ?? "(untitled)")}${domain}`;
|
|
221
|
+
if (r.content) {
|
|
222
|
+
const snippet = normalizeWhitespace(r.content);
|
|
223
|
+
const short = snippet.length > 90 ? snippet.slice(0, 90).replace(/\s+\S*$/, "") + "..." : snippet;
|
|
224
|
+
text += `\n ${theme.fg("muted", short)}`;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
if (showing > 3) {
|
|
228
|
+
text += `\n ${theme.fg("muted", `... and ${showing - 3} more (Ctrl+O for full list)`)}`;
|
|
229
|
+
}
|
|
164
230
|
}
|
|
231
|
+
|
|
165
232
|
if (expanded && details?.results?.length) {
|
|
166
|
-
|
|
167
|
-
|
|
233
|
+
// Expanded (Ctrl+O): top 10 cards — [i] Title|engine|score, URL, snippet
|
|
234
|
+
const top10 = (details.results ?? []).slice(0, 10);
|
|
235
|
+
for (let i = 0; i < top10.length; i++) {
|
|
236
|
+
const r = top10[i];
|
|
237
|
+
const scoreStr = r.score !== undefined ? r.score.toFixed(2) : "—";
|
|
238
|
+
const metaStr = r.engine ? ` | ${r.engine} | ${scoreStr}` : ` | ${scoreStr}`;
|
|
239
|
+
text += `\n [${i + 1}] ${theme.fg("toolTitle", r.title ?? "(untitled)")}${theme.fg("dim", metaStr)}`;
|
|
240
|
+
text += `\n ${theme.fg("dim", abbreviateUrl(r.url ?? ""))}`;
|
|
241
|
+
if (r.content) {
|
|
242
|
+
text += `\n ${theme.fg("muted", normalizeWhitespace(r.content))}`;
|
|
243
|
+
}
|
|
244
|
+
text += "\n";
|
|
245
|
+
}
|
|
246
|
+
if (details.results.length > 10) {
|
|
247
|
+
text += `\n ${theme.fg("muted", `... and ${details.results.length - 10} more results (see full output file)`)}`;
|
|
168
248
|
}
|
|
169
249
|
}
|
|
250
|
+
|
|
170
251
|
if (expanded && details?.fullOutputPath) {
|
|
171
|
-
text += `\n${theme.fg("
|
|
252
|
+
text += `\n${theme.fg("accent", `Full output: ${details.fullOutputPath}`)}`;
|
|
172
253
|
}
|
|
254
|
+
|
|
173
255
|
return new Text(text, 0, 0);
|
|
174
256
|
},
|
|
175
257
|
});
|
|
@@ -177,3 +259,5 @@ const webSearchTool = defineTool({
|
|
|
177
259
|
export default function (pi: ExtensionAPI) {
|
|
178
260
|
pi.registerTool(webSearchTool);
|
|
179
261
|
}
|
|
262
|
+
|
|
263
|
+
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-web-toolkit",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Web research toolkit for the pi coding agent. Search via SearXNG, fetch static pages with scrapling, browse interactively via agent-browser, and batch-read sources in parallel.",
|
|
5
5
|
"author": "Wade Huang <fastwade11@gmail.com>",
|
|
6
6
|
"license": "MIT",
|
|
@@ -17,6 +17,14 @@
|
|
|
17
17
|
"engines": {
|
|
18
18
|
"node": ">=22.0.0"
|
|
19
19
|
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"typecheck": "tsc --noEmit",
|
|
22
|
+
"test": "npx tsx test/content-preview/test.ts",
|
|
23
|
+
"test:approve": "npx tsx test/content-preview/test.ts --approve"
|
|
24
|
+
},
|
|
25
|
+
"devDependencies": {
|
|
26
|
+
"typescript": "^5.7.0"
|
|
27
|
+
},
|
|
20
28
|
"peerDependencies": {
|
|
21
29
|
"@earendil-works/pi-ai": "*",
|
|
22
30
|
"@earendil-works/pi-coding-agent": "*",
|