pi-web-toolkit 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,12 +19,13 @@ import {
19
19
  } from "@earendil-works/pi-coding-agent";
20
20
  import { Text } from "@earendil-works/pi-tui";
21
21
  import { Type, type Static } from "typebox";
22
+ import { getSearxngUrl } from "./utils/config";
22
23
  import { writeWithFallback } from "./utils/output-sink";
23
- import { searchKeyless, shouldFallbackSearch } from "./utils/firecrawl";
24
+ import { searchKeyless } from "./utils/firecrawl";
25
+ import { runWebSearchCore } from "./utils/web-search-core";
24
26
  import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
25
27
 
26
28
 
27
-
28
29
  interface SearxResult {
29
30
  title: string;
30
31
  url: string;
@@ -33,12 +34,6 @@ interface SearxResult {
33
34
  score?: number;
34
35
  }
35
36
 
36
- interface SearxResponse {
37
- query: string;
38
- results: SearxResult[];
39
- suggestions?: string[];
40
- }
41
-
42
37
  export const WebSearchParamsSchema = Type.Object({
43
38
  query: Type.String({ description: "Search query" }),
44
39
  language: Type.Optional(Type.String({ description: "Language code (e.g. en, en-US, de). Omit to use SearXNG default.", default: "" })),
@@ -51,139 +46,69 @@ const webSearchTool = defineTool({
51
46
  name: "web_search",
52
47
  label: "Web Search",
53
48
  description: [
54
- "Search the web using a SearXNG instance.",
49
+ "Primary local-first tool for web discovery via a SearXNG instance.",
55
50
  "Returns a list of results with title, URL, and snippet.",
56
51
  "Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.",
52
+ "Use web_search as the first attempt for web search; it automatically tries Firecrawl keyless only if SearXNG fails or returns nothing.",
57
53
  "Use web_search when the user asks about current events, facts, or anything",
58
54
  "that requires up-to-date information beyond the model's training data.",
59
55
  `Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
60
56
  ].join(" "),
61
- promptSnippet: "Search the web for current information",
57
+ promptSnippet: "Local web search via SearXNG",
62
58
  promptGuidelines: [
63
- "Use web_search when the user asks about recent events, current data, or external facts.",
64
- "Use web_search to verify claims, find documentation, or discover resources online.",
65
- "If web_search returns no results but includes suggestions, consider using a suggested query to refine your search.",
66
- "If web_search returns multiple (2–5) relevant results that all need to be read, prefer web_batch_fetch to fetch them in parallel instead of calling web_fetch repeatedly.",
59
+ "Use web_search for current/external facts, verification, docs, and discovery.",
60
+ "If 2–5 results need reading, use web_batch_fetch; retry suggested queries when results are empty.",
67
61
  ],
68
62
  parameters: WebSearchParamsSchema,
69
63
 
70
64
  async execute(_toolCallId, params, signal) {
71
- const searxngUrl = (process.env.SEARXNG_URL || "http://localhost:8080").replace(/\/$/, "");
72
- const maxResults = Math.floor(Math.min(60, Math.max(1, params.results ?? 20)));
73
- const language = params.language ?? "";
74
-
75
- const allResults: SearxResult[] = [];
76
- const seenUrls = new Set<string>();
77
- let suggestions: string[] | undefined;
78
- let finalQuery = params.query;
79
- let fullOutputPath: string | undefined;
80
- const MAX_PAGES = 3;
81
-
82
- let localOk = true;
83
- let localError: string | undefined;
84
-
85
- try {
86
- for (let page = 1; page <= MAX_PAGES; page++) {
87
- const searchParams = new URLSearchParams({
88
- q: params.query,
89
- format: "json",
90
- pageno: String(page),
91
- });
92
- if (language) searchParams.set("language", language);
93
-
94
- const response = await fetch(`${searxngUrl}/search?${searchParams.toString()}`, {
95
- method: "GET",
96
- headers: { Accept: "application/json" },
97
- signal,
98
- });
99
-
100
- if (!response.ok) {
101
- const body = await response.text().catch(() => "");
102
- throw new Error(`SearXNG error: ${response.status} ${response.statusText}\n${body}`);
103
- }
104
-
105
- const data = (await response.json()) as SearxResponse;
106
- finalQuery = data.query;
107
-
108
- if (data.suggestions && data.suggestions.length > 0 && !suggestions) {
109
- suggestions = data.suggestions;
110
- }
111
-
112
- if (!data.results || data.results.length === 0) {
113
- break;
114
- }
115
-
116
- for (const r of data.results) {
117
- if (!seenUrls.has(r.url)) {
118
- seenUrls.add(r.url);
119
- allResults.push(r);
120
- }
121
- }
122
-
123
- if (allResults.length >= maxResults) {
124
- break;
125
- }
126
- }
127
- } catch (err: any) {
128
- localOk = false;
129
- localError = err.message ?? String(err);
130
- }
65
+ const result = await runWebSearchCore(params, {
66
+ searxngUrl: getSearxngUrl(),
67
+ fetchImpl: fetch,
68
+ firecrawlSearch: searchKeyless,
69
+ signal,
70
+ });
131
71
 
132
- // Firecrawl keyless fallback: when SearXNG errored OR returned nothing.
133
- if (shouldFallbackSearch(localOk, allResults.length)) {
134
- const fb = await searchKeyless(params.query, { limit: Math.min(maxResults, 10) }, signal);
135
- if (fb.ok && fb.results.length > 0) {
136
- const fbResults: SearxResult[] = fb.results.slice(0, maxResults).map((r) => ({
137
- title: r.title ?? "(untitled)",
138
- url: r.url,
139
- content: r.description,
140
- engine: "firecrawl",
141
- }));
142
- const creditTag = fb.creditsUsed !== undefined ? `, ${fb.creditsUsed} credits` : "";
143
- const lines: string[] = [`Results for "${params.query}" (via Firecrawl keyless${creditTag}):`, ""];
144
- for (let i = 0; i < fbResults.length; i++) {
145
- const r = fbResults[i];
146
- lines.push(`${i + 1}. ${r.title}`);
147
- lines.push(` URL: ${r.url}`);
148
- if (r.content) lines.push(` ${r.content.replace(/\s+/g, " ").trim()}`);
149
- if (r.engine) lines.push(` [engine: ${r.engine}]`);
150
- lines.push("");
151
- }
152
- const rawText = lines.join("\n");
153
- const sink = await writeWithFallback(rawText, {
154
- tmpPrefix: "pi-web-search-firecrawl-",
155
- alwaysWriteFile: true,
156
- });
157
- return {
158
- content: [{ type: "text", text: sink.text }],
159
- details: { query: params.query, totalResults: fbResults.length, results: fbResults, fullOutputPath: sink.fullOutputPath, viaFirecrawl: true, creditsUsed: fb.creditsUsed },
160
- };
72
+ if (result.viaFirecrawl) {
73
+ const creditTag = result.creditsUsed !== undefined ? `, ${result.creditsUsed} credits` : "";
74
+ const lines: string[] = [`Results for "${params.query}" (via Firecrawl keyless${creditTag}):`, ""];
75
+ for (let i = 0; i < result.results.length; i++) {
76
+ const r = result.results[i];
77
+ lines.push(`${i + 1}. ${r.title}`);
78
+ lines.push(` URL: ${r.url}`);
79
+ if (r.content) lines.push(` ${r.content.replace(/\s+/g, " ").trim()}`);
80
+ if (r.engine) lines.push(` [engine: ${r.engine}]`);
81
+ lines.push("");
161
82
  }
162
- // Graceful skip or empty Firecrawl: fall through to local handling.
163
- }
164
-
165
- if (!localOk) {
166
- throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${localError}`);
83
+ const rawText = lines.join("\n");
84
+ const sink = await writeWithFallback(rawText, {
85
+ tmpPrefix: "pi-web-search-firecrawl-",
86
+ alwaysWriteFile: true,
87
+ });
88
+ return {
89
+ content: [{ type: "text", text: sink.text }],
90
+ details: { query: params.query, totalResults: result.totalResults, results: result.results, fullOutputPath: sink.fullOutputPath, viaFirecrawl: true, creditsUsed: result.creditsUsed },
91
+ };
167
92
  }
168
93
 
169
- if (allResults.length === 0) {
170
- let text = `No results found for "${finalQuery}".`;
171
- if (suggestions && suggestions.length > 0) {
172
- text += `\n\nSuggestions:\n${suggestions.map((s) => `- ${s}`).join("\n")}`;
94
+ if (result.results.length === 0) {
95
+ let text = `No results found for "${result.query}".`;
96
+ if (result.suggestions && result.suggestions.length > 0) {
97
+ text += `\n\nSuggestions:\n${result.suggestions.map((s) => `- ${s}`).join("\n")}`;
173
98
  }
174
99
  return {
175
100
  content: [{ type: "text", text }],
176
- details: { query: finalQuery, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined, viaFirecrawl: false, creditsUsed: undefined },
101
+ details: { query: result.query, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined, viaFirecrawl: false, creditsUsed: undefined },
177
102
  };
178
103
  }
179
104
 
180
105
  const lines: string[] = [
181
- `Results for "${finalQuery}":`,
106
+ `Results for "${result.query}":`,
182
107
  "",
183
108
  ];
184
109
 
185
- for (let i = 0; i < Math.min(maxResults, allResults.length); i++) {
186
- const r = allResults[i];
110
+ for (let i = 0; i < result.results.length; i++) {
111
+ const r = result.results[i];
187
112
  lines.push(`${i + 1}. ${r.title}`);
188
113
  lines.push(` URL: ${r.url}`);
189
114
  if (r.content) {
@@ -201,11 +126,10 @@ const webSearchTool = defineTool({
201
126
  tmpPrefix: "pi-web-search-",
202
127
  alwaysWriteFile: true,
203
128
  });
204
- fullOutputPath = sink.fullOutputPath;
205
129
 
206
130
  return {
207
131
  content: [{ type: "text", text: sink.text }],
208
- details: { query: finalQuery, totalResults: allResults.length, results: allResults.slice(0, maxResults), fullOutputPath, viaFirecrawl: false, creditsUsed: undefined },
132
+ details: { query: result.query, totalResults: result.totalResults, results: result.results, fullOutputPath: sink.fullOutputPath, viaFirecrawl: false, creditsUsed: undefined },
209
133
  };
210
134
  },
211
135