pi-web-toolkit 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -1
- package/README.md +151 -119
- package/docs/adr/0001-firecrawl-keyless-cloud-fallback.md +1 -1
- package/docs/adr/0002-toolkit-config-for-installer-selections.md +3 -0
- package/docs/adr/0003-conservative-installer-prerequisites.md +3 -0
- package/docs/adr/0004-searxng-endpoint-discovery.md +3 -0
- package/docs/guide.md +20 -4
- package/docs/tools.md +25 -7
- package/extensions/firecrawl_interact.ts +13 -14
- package/extensions/firecrawl_scrape.ts +13 -14
- package/extensions/firecrawl_search.ts +6 -6
- package/extensions/index.ts +25 -0
- package/extensions/utils/agent-browser.ts +4 -3
- package/extensions/utils/config.ts +170 -0
- package/extensions/utils/firecrawl.ts +27 -3
- package/extensions/utils/scrapling.ts +2 -1
- package/extensions/utils/web-search-core.ts +146 -0
- package/extensions/web_batch_fetch.ts +3 -7
- package/extensions/web_browse.ts +5 -9
- package/extensions/web_fetch.ts +5 -9
- package/extensions/web_search.ts +42 -118
- package/install.sh +801 -0
- package/package.json +6 -3
package/extensions/web_search.ts
CHANGED
|
@@ -19,12 +19,13 @@ import {
|
|
|
19
19
|
} from "@earendil-works/pi-coding-agent";
|
|
20
20
|
import { Text } from "@earendil-works/pi-tui";
|
|
21
21
|
import { Type, type Static } from "typebox";
|
|
22
|
+
import { getSearxngUrl } from "./utils/config";
|
|
22
23
|
import { writeWithFallback } from "./utils/output-sink";
|
|
23
|
-
import { searchKeyless
|
|
24
|
+
import { searchKeyless } from "./utils/firecrawl";
|
|
25
|
+
import { runWebSearchCore } from "./utils/web-search-core";
|
|
24
26
|
import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
|
|
25
27
|
|
|
26
28
|
|
|
27
|
-
|
|
28
29
|
interface SearxResult {
|
|
29
30
|
title: string;
|
|
30
31
|
url: string;
|
|
@@ -33,12 +34,6 @@ interface SearxResult {
|
|
|
33
34
|
score?: number;
|
|
34
35
|
}
|
|
35
36
|
|
|
36
|
-
interface SearxResponse {
|
|
37
|
-
query: string;
|
|
38
|
-
results: SearxResult[];
|
|
39
|
-
suggestions?: string[];
|
|
40
|
-
}
|
|
41
|
-
|
|
42
37
|
export const WebSearchParamsSchema = Type.Object({
|
|
43
38
|
query: Type.String({ description: "Search query" }),
|
|
44
39
|
language: Type.Optional(Type.String({ description: "Language code (e.g. en, en-US, de). Omit to use SearXNG default.", default: "" })),
|
|
@@ -51,139 +46,69 @@ const webSearchTool = defineTool({
|
|
|
51
46
|
name: "web_search",
|
|
52
47
|
label: "Web Search",
|
|
53
48
|
description: [
|
|
54
|
-
"
|
|
49
|
+
"Primary local-first tool for web discovery via a SearXNG instance.",
|
|
55
50
|
"Returns a list of results with title, URL, and snippet.",
|
|
56
51
|
"Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.",
|
|
52
|
+
"Use web_search as the first attempt for web search; it automatically tries Firecrawl keyless only if SearXNG fails or returns nothing.",
|
|
57
53
|
"Use web_search when the user asks about current events, facts, or anything",
|
|
58
54
|
"that requires up-to-date information beyond the model's training data.",
|
|
59
55
|
`Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
|
|
60
56
|
].join(" "),
|
|
61
|
-
promptSnippet: "
|
|
57
|
+
promptSnippet: "Local web search via SearXNG",
|
|
62
58
|
promptGuidelines: [
|
|
63
|
-
"Use web_search
|
|
64
|
-
"
|
|
65
|
-
"If web_search returns no results but includes suggestions, consider using a suggested query to refine your search.",
|
|
66
|
-
"If web_search returns multiple (2–5) relevant results that all need to be read, prefer web_batch_fetch to fetch them in parallel instead of calling web_fetch repeatedly.",
|
|
59
|
+
"Use web_search for current/external facts, verification, docs, and discovery.",
|
|
60
|
+
"If 2–5 results need reading, use web_batch_fetch; retry suggested queries when results are empty.",
|
|
67
61
|
],
|
|
68
62
|
parameters: WebSearchParamsSchema,
|
|
69
63
|
|
|
70
64
|
async execute(_toolCallId, params, signal) {
|
|
71
|
-
const
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
let suggestions: string[] | undefined;
|
|
78
|
-
let finalQuery = params.query;
|
|
79
|
-
let fullOutputPath: string | undefined;
|
|
80
|
-
const MAX_PAGES = 3;
|
|
81
|
-
|
|
82
|
-
let localOk = true;
|
|
83
|
-
let localError: string | undefined;
|
|
84
|
-
|
|
85
|
-
try {
|
|
86
|
-
for (let page = 1; page <= MAX_PAGES; page++) {
|
|
87
|
-
const searchParams = new URLSearchParams({
|
|
88
|
-
q: params.query,
|
|
89
|
-
format: "json",
|
|
90
|
-
pageno: String(page),
|
|
91
|
-
});
|
|
92
|
-
if (language) searchParams.set("language", language);
|
|
93
|
-
|
|
94
|
-
const response = await fetch(`${searxngUrl}/search?${searchParams.toString()}`, {
|
|
95
|
-
method: "GET",
|
|
96
|
-
headers: { Accept: "application/json" },
|
|
97
|
-
signal,
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
if (!response.ok) {
|
|
101
|
-
const body = await response.text().catch(() => "");
|
|
102
|
-
throw new Error(`SearXNG error: ${response.status} ${response.statusText}\n${body}`);
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
const data = (await response.json()) as SearxResponse;
|
|
106
|
-
finalQuery = data.query;
|
|
107
|
-
|
|
108
|
-
if (data.suggestions && data.suggestions.length > 0 && !suggestions) {
|
|
109
|
-
suggestions = data.suggestions;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
if (!data.results || data.results.length === 0) {
|
|
113
|
-
break;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
for (const r of data.results) {
|
|
117
|
-
if (!seenUrls.has(r.url)) {
|
|
118
|
-
seenUrls.add(r.url);
|
|
119
|
-
allResults.push(r);
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (allResults.length >= maxResults) {
|
|
124
|
-
break;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
} catch (err: any) {
|
|
128
|
-
localOk = false;
|
|
129
|
-
localError = err.message ?? String(err);
|
|
130
|
-
}
|
|
65
|
+
const result = await runWebSearchCore(params, {
|
|
66
|
+
searxngUrl: getSearxngUrl(),
|
|
67
|
+
fetchImpl: fetch,
|
|
68
|
+
firecrawlSearch: searchKeyless,
|
|
69
|
+
signal,
|
|
70
|
+
});
|
|
131
71
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
const
|
|
135
|
-
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
const creditTag = fb.creditsUsed !== undefined ? `, ${fb.creditsUsed} credits` : "";
|
|
143
|
-
const lines: string[] = [`Results for "${params.query}" (via Firecrawl keyless${creditTag}):`, ""];
|
|
144
|
-
for (let i = 0; i < fbResults.length; i++) {
|
|
145
|
-
const r = fbResults[i];
|
|
146
|
-
lines.push(`${i + 1}. ${r.title}`);
|
|
147
|
-
lines.push(` URL: ${r.url}`);
|
|
148
|
-
if (r.content) lines.push(` ${r.content.replace(/\s+/g, " ").trim()}`);
|
|
149
|
-
if (r.engine) lines.push(` [engine: ${r.engine}]`);
|
|
150
|
-
lines.push("");
|
|
151
|
-
}
|
|
152
|
-
const rawText = lines.join("\n");
|
|
153
|
-
const sink = await writeWithFallback(rawText, {
|
|
154
|
-
tmpPrefix: "pi-web-search-firecrawl-",
|
|
155
|
-
alwaysWriteFile: true,
|
|
156
|
-
});
|
|
157
|
-
return {
|
|
158
|
-
content: [{ type: "text", text: sink.text }],
|
|
159
|
-
details: { query: params.query, totalResults: fbResults.length, results: fbResults, fullOutputPath: sink.fullOutputPath, viaFirecrawl: true, creditsUsed: fb.creditsUsed },
|
|
160
|
-
};
|
|
72
|
+
if (result.viaFirecrawl) {
|
|
73
|
+
const creditTag = result.creditsUsed !== undefined ? `, ${result.creditsUsed} credits` : "";
|
|
74
|
+
const lines: string[] = [`Results for "${params.query}" (via Firecrawl keyless${creditTag}):`, ""];
|
|
75
|
+
for (let i = 0; i < result.results.length; i++) {
|
|
76
|
+
const r = result.results[i];
|
|
77
|
+
lines.push(`${i + 1}. ${r.title}`);
|
|
78
|
+
lines.push(` URL: ${r.url}`);
|
|
79
|
+
if (r.content) lines.push(` ${r.content.replace(/\s+/g, " ").trim()}`);
|
|
80
|
+
if (r.engine) lines.push(` [engine: ${r.engine}]`);
|
|
81
|
+
lines.push("");
|
|
161
82
|
}
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
83
|
+
const rawText = lines.join("\n");
|
|
84
|
+
const sink = await writeWithFallback(rawText, {
|
|
85
|
+
tmpPrefix: "pi-web-search-firecrawl-",
|
|
86
|
+
alwaysWriteFile: true,
|
|
87
|
+
});
|
|
88
|
+
return {
|
|
89
|
+
content: [{ type: "text", text: sink.text }],
|
|
90
|
+
details: { query: params.query, totalResults: result.totalResults, results: result.results, fullOutputPath: sink.fullOutputPath, viaFirecrawl: true, creditsUsed: result.creditsUsed },
|
|
91
|
+
};
|
|
167
92
|
}
|
|
168
93
|
|
|
169
|
-
if (
|
|
170
|
-
let text = `No results found for "${
|
|
171
|
-
if (suggestions && suggestions.length > 0) {
|
|
172
|
-
text += `\n\nSuggestions:\n${suggestions.map((s) => `- ${s}`).join("\n")}`;
|
|
94
|
+
if (result.results.length === 0) {
|
|
95
|
+
let text = `No results found for "${result.query}".`;
|
|
96
|
+
if (result.suggestions && result.suggestions.length > 0) {
|
|
97
|
+
text += `\n\nSuggestions:\n${result.suggestions.map((s) => `- ${s}`).join("\n")}`;
|
|
173
98
|
}
|
|
174
99
|
return {
|
|
175
100
|
content: [{ type: "text", text }],
|
|
176
|
-
details: { query:
|
|
101
|
+
details: { query: result.query, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined, viaFirecrawl: false, creditsUsed: undefined },
|
|
177
102
|
};
|
|
178
103
|
}
|
|
179
104
|
|
|
180
105
|
const lines: string[] = [
|
|
181
|
-
`Results for "${
|
|
106
|
+
`Results for "${result.query}":`,
|
|
182
107
|
"",
|
|
183
108
|
];
|
|
184
109
|
|
|
185
|
-
for (let i = 0; i <
|
|
186
|
-
const r =
|
|
110
|
+
for (let i = 0; i < result.results.length; i++) {
|
|
111
|
+
const r = result.results[i];
|
|
187
112
|
lines.push(`${i + 1}. ${r.title}`);
|
|
188
113
|
lines.push(` URL: ${r.url}`);
|
|
189
114
|
if (r.content) {
|
|
@@ -201,11 +126,10 @@ const webSearchTool = defineTool({
|
|
|
201
126
|
tmpPrefix: "pi-web-search-",
|
|
202
127
|
alwaysWriteFile: true,
|
|
203
128
|
});
|
|
204
|
-
fullOutputPath = sink.fullOutputPath;
|
|
205
129
|
|
|
206
130
|
return {
|
|
207
131
|
content: [{ type: "text", text: sink.text }],
|
|
208
|
-
details: { query:
|
|
132
|
+
details: { query: result.query, totalResults: result.totalResults, results: result.results, fullOutputPath: sink.fullOutputPath, viaFirecrawl: false, creditsUsed: undefined },
|
|
209
133
|
};
|
|
210
134
|
},
|
|
211
135
|
|