pi-smart-web-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +84 -0
  3. package/debug.ts +49 -0
  4. package/index.ts +497 -0
  5. package/package.json +93 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Joe Matthews
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # pi-smart-web-search
2
+
3
+ A [pi](https://pi.dev) extension that adds one tool — **`web_search`** — for batch web discovery.
4
+
5
+ ![web_search in pi — search, fetch, answer](https://raw.githubusercontent.com/joematthews/pi-smart-web-search/main/demo.png)
6
+
7
+ It takes an **array of queries** (batch-only, like `batch_web_fetch`), turns each into a search URL,
8
+ and runs it through the same fetch→extract pipeline as [pi-smart-fetch](https://pi.dev/packages/pi-smart-fetch) (`wreq-js` → `linkedom` →
9
+ `Defuddle`). It returns the extracted search-results pages (titles, links, snippets as markdown),
10
+ prefaced with a `# Next step` header telling the model to open the best result URLs.
11
+
12
+ So the model **curates** which links to open (no SEO-trash auto-pulled into context), and the
13
+ follow-up nudge sits right next to the links.
14
+
15
+ ## Install
16
+
17
+ ```sh
18
+ pi install npm:pi-smart-web-search
19
+ pi install npm:pi-smart-fetch # companion (see below)
20
+ ```
21
+
22
+ Then restart pi.
23
+
24
+ ### Pairs with pi-smart-fetch
25
+
26
+ `web_search` finds sources; it hands off to **`batch_web_fetch`** (from
27
+ [`pi-smart-fetch`](https://pi.dev/packages/pi-smart-fetch)) to read the chosen pages. Install
28
+ it alongside, or the model has nothing to follow up with.
29
+
30
+ ## Usage
31
+
32
+ Once installed, start up pi and just ask a question — `web_search` kicks in automatically when an
33
+ answer needs current or external info. Try:
34
+
35
+ ```
36
+ What's the latest version of Node.js, and what's new in it?
37
+ ```
38
+
39
+ ```
40
+ Compare Bun and Deno for a new TypeScript API in 2026.
41
+ ```
42
+
43
+ pi searches, opens the best results, and answers from what it read. No flags, no setup — just ask.
44
+
45
+ ## Tool
46
+
47
+ ```
48
+ web_search(searches: string[])
49
+ ```
50
+
51
+ Pass several queries at once to cover a topic from multiple angles in one call.
52
+
53
+ ## Notes
54
+
55
+ - **Search engine must be no-JS / server-rendered** to extract well. The default endpoint renders
56
+ without JavaScript; `google.com` and other JS-heavy SERPs extract poorly (the pipeline does not run
57
+ JavaScript).
58
+ - Built on the same primitives as pi-smart-fetch (`wreq-js` browser-grade TLS, `Defuddle`
59
+ extraction); it does not import pi-smart-fetch's code (factory-only export), only the shared libs.
60
+
61
+ ## Development
62
+
63
+ Run from a local clone instead of the registry:
64
+
65
+ ```sh
66
+ git clone https://github.com/joematthews/pi-smart-web-search
67
+ cd pi-smart-web-search
68
+ npm install
69
+ pi install .
70
+ ```
71
+
72
+ Then restart pi.
73
+
74
+ `npm run check` runs typecheck, lint, format, spell, and tests.
75
+ `npx tsx debug.ts "your query"` prints what the model would receive for a search.
76
+
77
+ ## Credits
78
+
79
+ Heavily inspired by [pi-smart-fetch](https://pi.dev/packages/pi-smart-fetch) by
80
+ [Thinkscape](https://github.com/Thinkscape) (MIT). It shares the same pipeline (`wreq-js` → `linkedom` → `Defuddle`), and the `web_search` result card mirrors `batch_web_fetch`'s look. Thanks to that project for the pattern.
81
+
82
+ ## License
83
+
84
+ [MIT](LICENSE) © Joe Matthews
package/debug.ts ADDED
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Debug helper: run one search through the real pipeline and print the markdown.
3
+ *
4
+ * npx tsx debug.ts "your search query"
5
+ *
6
+ * Uses the same functions the extension uses (imported from index.ts), so what you
7
+ * see here is exactly what the model would get — including the redirect-link cleanup.
8
+ */
9
+
10
+ import {
11
+ DEFAULT_SEARCH_URL_TEMPLATE,
12
+ buildSearchUrl,
13
+ cleanSearchResultLinks,
14
+ fetchReadablePage,
15
+ } from "./index.ts";
16
+
17
+ const query = process.argv.slice(2).join(" ").trim();
18
+ if (!query) {
19
+ console.error('Usage: npx tsx debug.ts "your search query"');
20
+ process.exit(1);
21
+ }
22
+
23
+ const url = buildSearchUrl(DEFAULT_SEARCH_URL_TEMPLATE, query);
24
+ console.error(`→ fetching: ${url}\n`);
25
+
26
+ const page = await fetchReadablePage(url);
27
+ if (!page.ok) {
28
+ console.error(`✗ fetch failed: ${page.error}`);
29
+ process.exit(1);
30
+ }
31
+
32
+ // Run the same per-engine dispatcher the extension uses, so debug matches real behavior:
33
+ // DDG links get unwrapped, a non-DDG engine correctly gets no parser.
34
+ const markdown = cleanSearchResultLinks(page.readableText, DEFAULT_SEARCH_URL_TEMPLATE);
35
+
36
+ // Flag any DDG redirect links that slipped through the cleanup.
37
+ const leftovers =
38
+ markdown.match(/(?:https?:)?\/\/(?:[a-z0-9-]+\.)?duckduckgo\.com\/l\/\?[^)\s"'<>]*\buddg=/gi) ??
39
+ [];
40
+
41
+ console.log(markdown);
42
+ console.error(
43
+ `\n${"─".repeat(60)}\n${markdown.length} chars · ${
44
+ leftovers.length === 0
45
+ ? "✓ no redirect links"
46
+ : `✗ ${leftovers.length} redirect link(s) survived`
47
+ }`,
48
+ );
49
+ process.exit(leftovers.length === 0 ? 0 : 1);
package/index.ts ADDED
@@ -0,0 +1,497 @@
1
+ /**
2
+ * pi-smart-web-search — a pi extension that adds one tool: `web_search`.
3
+ *
4
+ * What it does, in plain terms:
5
+ * 1. The model hands us one or more search queries.
6
+ * 2. For each query, we build a DDG search URL, fetch that results page,
7
+ * and extract it into clean, readable text (the same fetch + extract pipeline
8
+ * pi-smart-fetch uses: wreq-js to fetch, linkedom + Defuddle to extract).
9
+ * 3. We hand the model the extracted results, led by a short "# Next step"
10
+ * instruction telling it to open the best links with `batch_web_fetch`.
11
+ *
12
+ * So the model decides which links are worth reading (no junk auto-pulled into its
13
+ * context), and the "go read them" nudge sits right next to the links.
14
+ *
15
+ * Local install (no npm registry):
16
+ * 1. `cd` into this folder and run `npm install` (pulls wreq-js, defuddle, linkedom).
17
+ * 2. Add this folder's absolute path to the "packages" list in ~/.pi/agent/settings.json.
18
+ * 3. Restart pi. (Install pi-smart-fetch too — web_search hands off to its batch_web_fetch.)
19
+ */
20
+
21
+ import { Type, type Static } from "typebox";
22
+ import { Text } from "@earendil-works/pi-tui";
23
+ import { getAgentDir } from "@earendil-works/pi-coding-agent";
24
+ import { fetch } from "wreq-js";
25
+ import { parseHTML } from "linkedom";
26
+ import { Defuddle } from "defuddle/node";
27
+ import { readFileSync } from "node:fs";
28
+ import { join } from "node:path";
29
+
30
+ // =============================================================================
31
+ // 1. Fetching and extracting a page
32
+ // Fetch a URL like a real browser, then pull out the readable text as markdown.
33
+ // =============================================================================
34
+
35
+ /** How we fetch: impersonate a current Chrome on Windows, with a sane timeout. */
36
+ const BROWSER_FETCH_OPTIONS = {
37
+ browser: "chrome_140" as const,
38
+ os: "windows" as const,
39
+ timeoutMs: 12_000,
40
+ acceptHeader: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
41
+ acceptLanguageHeader: "en-US,en;q=0.9",
42
+ };
43
+
44
+ /** The outcome of fetching one URL: either readable text, or a reason it failed. */
45
+ export type PageFetchResult =
46
+ | { ok: true; requestedUrl: string; finalUrl: string; title: string; readableText: string }
47
+ | { ok: false; requestedUrl: string; error: string };
48
+
49
+ /** Fetch a single URL and extract its readable text. Never throws — failures come back as `{ ok: false }`. */
50
+ export async function fetchReadablePage(url: string): Promise<PageFetchResult> {
51
+ try {
52
+ const response = await fetch(url, {
53
+ browser: BROWSER_FETCH_OPTIONS.browser,
54
+ os: BROWSER_FETCH_OPTIONS.os,
55
+ headers: {
56
+ Accept: BROWSER_FETCH_OPTIONS.acceptHeader,
57
+ "Accept-Language": BROWSER_FETCH_OPTIONS.acceptLanguageHeader,
58
+ },
59
+ redirect: "follow",
60
+ timeout: BROWSER_FETCH_OPTIONS.timeoutMs,
61
+ });
62
+
63
+ if (!response.ok) {
64
+ return {
65
+ ok: false,
66
+ requestedUrl: url,
67
+ error: `HTTP ${response.status} ${response.statusText}`,
68
+ };
69
+ }
70
+
71
+ // The URL may differ after redirects; use the final one for extraction context.
72
+ const finalUrl = response.url;
73
+ const { document } = parseHTML(await response.text());
74
+ const extraction = await Defuddle(document, finalUrl, { markdown: true, removeImages: true });
75
+
76
+ return {
77
+ ok: true,
78
+ requestedUrl: url,
79
+ finalUrl,
80
+ title: extraction.title,
81
+ readableText: extraction.content.trim(),
82
+ };
83
+ } catch (caught) {
84
+ return {
85
+ ok: false,
86
+ requestedUrl: url,
87
+ error: caught instanceof Error ? caught.message : String(caught),
88
+ };
89
+ }
90
+ }
91
+
92
+ // =============================================================================
93
+ // 2. Settings
94
+ // Read from a `smartWebSearch` object in settings.json (global, then per-project
95
+ // which overrides). Both keys are optional — the defaults below are used otherwise.
96
+ //
97
+ // "smartWebSearch": {
98
+ // "searchUrl": "https://html.duckduckgo.com/html/?q={query}",
99
+ // "maxChars": 10000
100
+ // }
101
+ // =============================================================================
102
+
103
+ /** Default search engine: DDG's no-JavaScript HTML endpoint (`{query}` is filled in per search). */
104
+ export const DEFAULT_SEARCH_URL_TEMPLATE = "https://html.duckduckgo.com/html/?q={query}";
105
+
106
+ /**
107
+ * Safety cap on how much extracted text we return per query. A DDG results page
108
+ * through this pipeline measures ~6,400–7,900 characters, so 10,000 (the ~7,900 max plus
109
+ * ~25% headroom) never truncates DDG — it only protects against a different,
110
+ * larger engine when someone swaps `searchUrl`.
111
+ */
112
+ const DEFAULT_MAX_CHARS_PER_QUERY = 10_000;
113
+
114
+ interface Settings {
115
+ searchUrlTemplate: string;
116
+ maxCharsPerQuery: number;
117
+ }
118
+
119
+ /** Load settings, applying global then per-project overrides. Bad/missing files are ignored. */
120
+ function loadSettings(projectDir: string): Settings {
121
+ const settings: Settings = {
122
+ searchUrlTemplate: DEFAULT_SEARCH_URL_TEMPLATE,
123
+ maxCharsPerQuery: DEFAULT_MAX_CHARS_PER_QUERY,
124
+ };
125
+
126
+ const settingsFiles = [
127
+ join(getAgentDir(), "settings.json"), // global: ~/.pi/agent/settings.json
128
+ join(projectDir, ".pi", "settings.json"), // per-project: ./.pi/settings.json (wins)
129
+ ];
130
+
131
+ for (const file of settingsFiles) {
132
+ try {
133
+ const parsed: unknown = JSON.parse(readFileSync(file, "utf-8"));
134
+ const section = (parsed as { smartWebSearch?: unknown }).smartWebSearch;
135
+ if (typeof section === "object" && section !== null) {
136
+ const { searchUrl, maxChars } = section as { searchUrl?: unknown; maxChars?: unknown };
137
+ // A search URL is only accepted if it has the {query} placeholder to fill in.
138
+ if (typeof searchUrl === "string" && searchUrl.includes("{query}")) {
139
+ settings.searchUrlTemplate = searchUrl;
140
+ }
141
+ if (typeof maxChars === "number" && maxChars > 0) {
142
+ settings.maxCharsPerQuery = Math.floor(maxChars);
143
+ }
144
+ }
145
+ } catch {
146
+ // File missing or not valid JSON → keep whatever we have so far.
147
+ }
148
+ }
149
+
150
+ return settings;
151
+ }
152
+
153
+ /** Turn a query into a full search URL by filling the `{query}` placeholder. */
154
+ export function buildSearchUrl(template: string, query: string): string {
155
+ return template.replace("{query}", encodeURIComponent(query));
156
+ }
157
+
158
+ // =============================================================================
159
+ // 3. The tool: parameters, progress tracking, and the text we return to the model
160
+ // =============================================================================
161
+
162
+ /** The tool takes a list of queries — plural on purpose, to encourage covering a topic from several angles. */
163
+ const searchParametersSchema = Type.Object({
164
+ searches: Type.Array(Type.String(), {
165
+ minItems: 1,
166
+ description:
167
+ "One or more search queries to run together. Pass several queries at once to cover a topic from multiple angles in a single call.",
168
+ }),
169
+ });
170
+ type SearchParameters = Static<typeof searchParametersSchema>;
171
+
172
+ /** Where each query is in its lifecycle, plus its result once it finishes. Drives the live progress card. */
173
+ interface QueryProgress {
174
+ query: string;
175
+ status: "queued" | "loading" | "done" | "error";
176
+ result: PageFetchResult | undefined;
177
+ }
178
+
179
+ /**
180
+ * The instruction block we put at the top of every result, telling the model these are
181
+ * search results (links + snippets) and what to do next: open the best ones, then answer.
182
+ */
183
+ const FOLLOW_UP_INSTRUCTIONS = [
184
+ "# Next step: evaluate the results",
185
+ "",
186
+ "These are previews — brief, and sometimes out of date. If they don't fully answer your question, read the full pages:",
187
+ "1. Choose the most relevant URLs below.",
188
+ "2. Use the `batch_web_fetch` tool to fetch those pages.",
189
+ "3. Answer from what you read.",
190
+ "",
191
+ ].join("\n");
192
+
193
+ /**
194
+ * Clean up the result links in extracted markdown for whichever search engine produced it.
195
+ *
196
+ * Each engine mangles links its own way, and the cleanup is too engine-specific to express as a
197
+ * single shared regex — so we dispatch to a per-engine parser keyed off the search URL. Engines we
198
+ * don't have a parser for fall through unchanged (raw links shown as-is).
199
+ */
200
+ export function cleanSearchResultLinks(markdown: string, searchUrlTemplate: string): string {
201
+ if (searchUrlTemplate.includes("duckduckgo.com")) {
202
+ return parseDdgLinks(markdown);
203
+ }
204
+ return markdown;
205
+ }
206
+
207
+ /**
208
+ * DDG wraps every result link in a redirect: `https://duckduckgo.com/l/?uddg=<real-url>&rut=…`,
209
+ * where the real destination is percent-encoded in the `uddg` query parameter. Left as-is, the model
210
+ * would hand these opaque redirect URLs to batch_web_fetch. This unwraps them back to the real URL
211
+ * everywhere they appear in the extracted markdown (both protocol-relative and absolute forms).
212
+ */
213
+ export function parseDdgLinks(markdown: string): string {
214
+ // Matches the whole redirect URL — scheme optional (DDG often emits protocol-relative links) —
215
+ // captures the `uddg` value, and consumes any trailing params (e.g. `&rut=…`) so nothing dangles.
216
+ const redirectPattern =
217
+ /(?:https?:)?\/\/(?:[a-z0-9-]+\.)?duckduckgo\.com\/l\/\?[^)\s"'<>]*?\buddg=([^&)\s"'<>]+)[^)\s"'<>]*/gi;
218
+
219
+ return markdown.replace(redirectPattern, (whole: string, encodedTarget: string) => {
220
+ try {
221
+ return decodeURIComponent(encodedTarget);
222
+ } catch {
223
+ return whole; // Malformed encoding → leave the original link untouched.
224
+ }
225
+ });
226
+ }
227
+
228
+ /** Build the full text we hand back to the model: the follow-up instructions, then each query's results. */
229
+ function formatResultsForModel(
230
+ progressByQuery: QueryProgress[],
231
+ maxCharsPerQuery: number,
232
+ searchUrlTemplate: string,
233
+ ): string {
234
+ const sections = [FOLLOW_UP_INSTRUCTIONS];
235
+
236
+ for (const entry of progressByQuery) {
237
+ sections.push(`## Query: "${entry.query}"`);
238
+
239
+ if (!entry.result?.ok) {
240
+ const reason = entry.result ? entry.result.error : "unknown";
241
+ sections.push(`_search failed: ${reason}_\n`);
242
+ continue;
243
+ }
244
+
245
+ const fullText = cleanSearchResultLinks(entry.result.readableText || "", searchUrlTemplate);
246
+ const cappedText =
247
+ fullText.length > maxCharsPerQuery
248
+ ? fullText.slice(0, maxCharsPerQuery) + "\n…(truncated)"
249
+ : fullText;
250
+ sections.push(`${cappedText || "_no content extracted_"}\n`);
251
+ }
252
+
253
+ return sections.join("\n");
254
+ }
255
+
256
+ // =============================================================================
257
+ // 4. The progress card (what shows in pi's terminal UI while searches run)
258
+ // One row per query: a status glyph, the query text, and a right-aligned
259
+ // [ status ] badge — matching batch_web_fetch's look.
260
+ // =============================================================================
261
+
262
+ /** Number of characters between the brackets of a status badge; the label is centered within it. */
263
+ const STATUS_BADGE_INNER_WIDTH = 9;
264
+
265
+ const labelForStatus = (status: string) => status; // "queued" | "loading" | "done" | "error"
266
+ const colorForStatus = (status: string) =>
267
+ status === "done"
268
+ ? "success"
269
+ : status === "error"
270
+ ? "error"
271
+ : status === "loading"
272
+ ? "accent"
273
+ : "muted";
274
+ const glyphForStatus = (status: string) =>
275
+ status === "done" ? "✓" : status === "error" ? "✗" : "·";
276
+
277
+ /** Render a fixed-width, centered status badge like `[ done ]`. */
278
+ function formatStatusBadge(status: string): string {
279
+ const label = labelForStatus(status);
280
+ const totalPadding = Math.max(0, STATUS_BADGE_INNER_WIDTH - label.length);
281
+ const leftPadding = Math.floor(totalPadding / 2);
282
+ const rightPadding = totalPadding - leftPadding;
283
+ return `[${" ".repeat(leftPadding + 1)}${label}${" ".repeat(rightPadding + 1)}]`;
284
+ }
285
+
286
+ /**
287
+ * Build the progress card text for a given terminal width, right-aligning each badge.
288
+ * Alignment math uses plain-text lengths; colors (which add invisible escape codes) are
289
+ * applied only after the spacing is computed, so they don't throw off the layout.
290
+ */
291
+ function renderProgressCard(
292
+ progressByQuery: QueryProgress[],
293
+ theme: Theme,
294
+ terminalWidth: number,
295
+ ): string {
296
+ const width = Math.max(24, terminalWidth || 80);
297
+
298
+ const total = progressByQuery.length;
299
+ const finished = progressByQuery.filter(
300
+ (q) => q.status === "done" || q.status === "error",
301
+ ).length;
302
+ const succeeded = progressByQuery.filter((q) => q.status === "done").length;
303
+ const failed = progressByQuery.filter((q) => q.status === "error").length;
304
+
305
+ // Header line, e.g. "web_search 2/3 done · ok 2 · err 0"
306
+ const lines = [
307
+ theme.fg("toolTitle", theme.bold("web_search ")) +
308
+ theme.fg("muted", `${finished}/${total} done · ok ${succeeded} · err ${failed}`),
309
+ ];
310
+
311
+ for (const entry of progressByQuery) {
312
+ const badge = formatStatusBadge(entry.status);
313
+ const glyphAndSpaceWidth = 2; // the status glyph plus the space after it
314
+
315
+ // Truncate the query if the row would otherwise overflow the terminal width.
316
+ const maxQueryWidth = Math.max(1, width - glyphAndSpaceWidth - badge.length - 1);
317
+ const query =
318
+ entry.query.length > maxQueryWidth
319
+ ? entry.query.slice(0, Math.max(1, maxQueryWidth - 1)) + "…"
320
+ : entry.query;
321
+
322
+ // Spaces between the query and the badge so the badge lands flush against the right edge.
323
+ const gap = Math.max(1, width - glyphAndSpaceWidth - query.length - badge.length);
324
+
325
+ const glyph = theme.fg(colorForStatus(entry.status), glyphForStatus(entry.status));
326
+ const coloredBadge = theme.fg(colorForStatus(entry.status), badge);
327
+ lines.push(`${glyph} ${theme.fg("accent", query)}${" ".repeat(gap)}${coloredBadge}`);
328
+ }
329
+
330
+ return lines.join("\n");
331
+ }
332
+
333
+ // =============================================================================
334
+ // 5. Concurrency helper
335
+ // Run an async function over a list, but only so many at a time.
336
+ // =============================================================================
337
+
338
+ async function runWithConcurrencyLimit<Item, Result>(
339
+ items: Item[],
340
+ maxInFlight: number,
341
+ run: (item: Item, index: number) => Promise<Result>,
342
+ ): Promise<Result[]> {
343
+ const results: Result[] = new Array<Result>(items.length);
344
+ let nextIndex = 0;
345
+
346
+ // Each worker pulls the next unclaimed item until the list is exhausted.
347
+ const worker = async () => {
348
+ while (nextIndex < items.length) {
349
+ const index = nextIndex++;
350
+ const item = items[index];
351
+ if (item === undefined) continue;
352
+ results[index] = await run(item, index);
353
+ }
354
+ };
355
+
356
+ const workerCount = Math.min(maxInFlight, items.length);
357
+ await Promise.all(Array.from({ length: workerCount }, worker));
358
+ return results;
359
+ }
360
+
361
+ /** Run at most this many query fetches at once. */
362
+ const MAX_CONCURRENT_SEARCHES = 1;
363
+
364
+ // =============================================================================
365
+ // 6. Tool registration
366
+ // =============================================================================
367
+
368
+ /**
369
+ * Minimal local typings for the pi extension surface this tool touches. pi supplies the
370
+ * `api` and `theme` objects at runtime and does not export their types, so we declare just
371
+ * the members we use — enough to keep the boundary type-safe without depending on internals.
372
+ */
373
+ interface Theme {
374
+ fg(color: string, text: string): string;
375
+ bold(text: string): string;
376
+ }
377
+
378
+ interface RenderedComponent {
379
+ render(width: number): string[];
380
+ invalidate(): void;
381
+ }
382
+
383
+ interface ToolUpdate {
384
+ content: unknown[];
385
+ details: { progressByQuery: QueryProgress[] };
386
+ }
387
+
388
+ interface ToolResultPayload {
389
+ content: { type: "text"; text: string }[];
390
+ details: { progressByQuery: QueryProgress[] };
391
+ }
392
+
393
+ interface ToolDefinition {
394
+ name: string;
395
+ label: string;
396
+ description: string;
397
+ promptSnippet?: string;
398
+ promptGuidelines?: string[];
399
+ parameters: unknown;
400
+ renderCall?: (args: SearchParameters, theme: Theme) => Text;
401
+ execute: (
402
+ toolCallId: string,
403
+ params: SearchParameters,
404
+ signal: AbortSignal | undefined,
405
+ onUpdate?: (update: ToolUpdate) => void,
406
+ ctx?: { cwd?: string },
407
+ ) => Promise<ToolResultPayload>;
408
+ renderResult?: (result: ToolResultPayload, opts: unknown, theme: Theme) => RenderedComponent;
409
+ }
410
+
411
+ interface PiToolApi {
412
+ registerTool(definition: ToolDefinition): void;
413
+ }
414
+
415
+ export default function piSmartWebSearch(api: PiToolApi): void {
416
+ api.registerTool({
417
+ name: "web_search",
418
+ label: "web_search",
419
+ description:
420
+ "Search the web. Call this whenever current or external information would change your answer — " +
421
+ "latest versions, APIs, prices, dates, events, or anything you can't verify from " +
422
+ "memory. Returns ranked result pages to follow up on.",
423
+ promptSnippet: "web_search(searches: string[]): batch web search; returns ranked result pages",
424
+ promptGuidelines: [
425
+ "Use web_search to find sources — pass several optimized queries at once to cover a topic from multiple angles.",
426
+ ],
427
+ parameters: searchParametersSchema,
428
+
429
+ // The one-line "web_search N queries" shown the instant the call starts.
430
+ renderCall(args, theme) {
431
+ const queryCount = args.searches.length;
432
+ return new Text(
433
+ theme.fg("toolTitle", theme.bold("web_search ")) +
434
+ theme.fg("muted", `${queryCount} ${queryCount === 1 ? "query" : "queries"}`),
435
+ 0,
436
+ 0,
437
+ );
438
+ },
439
+
440
+ async execute(_toolCallId, params, _signal, onUpdate, ctx) {
441
+ const { searchUrlTemplate, maxCharsPerQuery } = loadSettings(ctx?.cwd ?? process.cwd());
442
+
443
+ // Start every query as "queued"; we update each one as it runs.
444
+ const progressByQuery: QueryProgress[] = params.searches.map((query) => ({
445
+ query: query,
446
+ status: "queued",
447
+ result: undefined,
448
+ }));
449
+
450
+ // Push the current progress to pi's UI so the card animates live.
451
+ const reportProgress = () => onUpdate?.({ content: [], details: { progressByQuery } });
452
+ reportProgress();
453
+
454
+ await runWithConcurrencyLimit(
455
+ params.searches,
456
+ MAX_CONCURRENT_SEARCHES,
457
+ async (query, index) => {
458
+ const entry = progressByQuery[index];
459
+ if (entry === undefined) return;
460
+
461
+ entry.status = "loading";
462
+ reportProgress();
463
+
464
+ entry.result = await fetchReadablePage(buildSearchUrl(searchUrlTemplate, query));
465
+ entry.status = entry.result.ok ? "done" : "error";
466
+ reportProgress();
467
+ },
468
+ );
469
+
470
+ return {
471
+ content: [
472
+ {
473
+ type: "text",
474
+ text: formatResultsForModel(progressByQuery, maxCharsPerQuery, searchUrlTemplate),
475
+ },
476
+ ],
477
+ details: { progressByQuery },
478
+ };
479
+ },
480
+
481
+ // Width-aware (returns a `render(width)` component) so the [ status ] badge can right-align,
482
+ // the same approach batch_web_fetch uses.
483
+ renderResult(result, _opts, theme) {
484
+ const progressByQuery = result.details.progressByQuery;
485
+ const text = new Text("", 0, 0);
486
+ return {
487
+ render(width) {
488
+ text.setText(renderProgressCard(progressByQuery, theme, width));
489
+ return text.render(width);
490
+ },
491
+ invalidate() {
492
+ text.invalidate();
493
+ },
494
+ };
495
+ },
496
+ });
497
+ }
package/package.json ADDED
@@ -0,0 +1,93 @@
1
+ {
2
+ "name": "pi-smart-web-search",
3
+ "version": "0.1.0",
4
+ "description": "Batch web search for the Pi coding agent — ranked, readable results ready to fetch. Pairs with pi-smart-fetch.",
5
+ "keywords": [
6
+ "pi-package",
7
+ "pi",
8
+ "pi-extension",
9
+ "web-search",
10
+ "defuddle",
11
+ "wreq-js"
12
+ ],
13
+ "license": "MIT",
14
+ "author": "Joe Matthews",
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "git+https://github.com/joematthews/pi-smart-web-search.git"
18
+ },
19
+ "homepage": "https://github.com/joematthews/pi-smart-web-search#readme",
20
+ "bugs": {
21
+ "url": "https://github.com/joematthews/pi-smart-web-search/issues"
22
+ },
23
+ "type": "module",
24
+ "main": "./index.ts",
25
+ "files": [
26
+ "index.ts",
27
+ "debug.ts",
28
+ "README.md",
29
+ "LICENSE"
30
+ ],
31
+ "engines": {
32
+ "node": ">=20"
33
+ },
34
+ "scripts": {
35
+ "typecheck": "tsc --noEmit",
36
+ "lint": "eslint .",
37
+ "lint:fix": "eslint . --fix",
38
+ "format": "prettier --write .",
39
+ "format:check": "prettier --check .",
40
+ "spell": "cspell --no-progress \"**/*\"",
41
+ "test": "vitest run",
42
+ "test:watch": "vitest",
43
+ "check": "npm run typecheck && npm run lint && npm run format:check && npm run spell && npm run test",
44
+ "prepare": "husky"
45
+ },
46
+ "lint-staged": {
47
+ "*.{ts,mjs,cjs,js}": [
48
+ "eslint --fix",
49
+ "prettier --write"
50
+ ],
51
+ "*.{json,jsonc,md,yml,yaml}": [
52
+ "prettier --write"
53
+ ]
54
+ },
55
+ "dependencies": {
56
+ "defuddle": "^0.19.0",
57
+ "linkedom": "^0.18.12",
58
+ "wreq-js": "^2.3.1"
59
+ },
60
+ "devDependencies": {
61
+ "@cspell/dict-bash": "^4.2.3",
62
+ "@cspell/dict-en_us": "^4.4.35",
63
+ "@cspell/dict-html": "^4.0.15",
64
+ "@cspell/dict-markdown": "^2.0.17",
65
+ "@cspell/dict-node": "^5.0.9",
66
+ "@cspell/dict-npm": "^5.2.41",
67
+ "@cspell/dict-software-terms": "^5.2.2",
68
+ "@cspell/dict-typescript": "^3.2.3",
69
+ "@eslint/js": "^10.0.1",
70
+ "cspell": "^10.0.1",
71
+ "eslint": "^10.5.0",
72
+ "eslint-config-prettier": "^10.1.8",
73
+ "husky": "^9.1.7",
74
+ "lint-staged": "^17.0.7",
75
+ "prettier": "^3.8.4",
76
+ "tsx": "^4.22.4",
77
+ "typescript": "^6.0.3",
78
+ "typescript-eslint": "^8.61.1",
79
+ "vitest": "^4.1.9"
80
+ },
81
+ "peerDependencies": {
82
+ "@earendil-works/pi-coding-agent": "*",
83
+ "@earendil-works/pi-tui": "*",
84
+ "pi-smart-fetch": "*",
85
+ "typebox": "*"
86
+ },
87
+ "pi": {
88
+ "extensions": [
89
+ "./index.ts"
90
+ ],
91
+ "image": "https://raw.githubusercontent.com/joematthews/pi-smart-web-search/main/demo.png"
92
+ }
93
+ }