pi-web-toolkit 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/README.md +39 -9
- package/docs/adr/0001-firecrawl-keyless-cloud-fallback.md +5 -0
- package/docs/guide.md +23 -0
- package/docs/tools.md +62 -0
- package/extensions/firecrawl_interact.ts +148 -0
- package/extensions/firecrawl_scrape.ts +156 -0
- package/extensions/firecrawl_search.ts +165 -0
- package/extensions/index.ts +6 -0
- package/extensions/utils/cli-runner.ts +3 -0
- package/extensions/utils/firecrawl.ts +484 -0
- package/extensions/web_browse.ts +60 -0
- package/extensions/web_fetch.ts +29 -7
- package/extensions/web_search.ts +85 -35
- package/package.json +5 -4
package/extensions/web_browse.ts
CHANGED
|
@@ -28,6 +28,7 @@ import {
|
|
|
28
28
|
closeAgentBrowserSession,
|
|
29
29
|
} from "./utils/agent-browser";
|
|
30
30
|
import { writeWithFallback } from "./utils/output-sink";
|
|
31
|
+
import { interactKeyless, shouldFallbackBrowse, isFirecrawlEnabled } from "./utils/firecrawl";
|
|
31
32
|
import { abbreviateUrl, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
|
|
32
33
|
|
|
33
34
|
export const WebBrowseActionSchema = Type.Object({
|
|
@@ -82,6 +83,25 @@ function formatBrowseStep(action: BrowseAction): string {
|
|
|
82
83
|
}
|
|
83
84
|
}
|
|
84
85
|
|
|
86
|
+
function synthesizeBrowsePrompt(params: { url: string; actions: BrowseAction[]; selector?: string }): string {
|
|
87
|
+
const parts: string[] = [];
|
|
88
|
+
for (const a of params.actions) {
|
|
89
|
+
switch (a.type) {
|
|
90
|
+
case "click": parts.push(`click the element "${a.selector ?? ""}"`); break;
|
|
91
|
+
case "fill": case "type": parts.push(`type "${a.value ?? ""}" into "${a.selector ?? ""}"`); break;
|
|
92
|
+
case "press": parts.push(`press ${a.key ?? ""}`); break;
|
|
93
|
+
case "scroll": parts.push(`scroll ${a.direction ?? "down"}`); break;
|
|
94
|
+
case "wait": parts.push("wait briefly"); break;
|
|
95
|
+
case "wait_selector": parts.push(`wait for "${a.selector ?? ""}" to appear`); break;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
const actionText = parts.length ? `Perform these actions in order: ${parts.join("; ")}. ` : "";
|
|
99
|
+
const extract = params.selector
|
|
100
|
+
? `Then return the text content of the element matching "${params.selector}".`
|
|
101
|
+
: "Then return the main textual content of the page.";
|
|
102
|
+
return `${actionText}${extract}`;
|
|
103
|
+
}
|
|
104
|
+
|
|
85
105
|
const webBrowseTool = defineTool({
|
|
86
106
|
name: "web_browse",
|
|
87
107
|
label: "Web Browse",
|
|
@@ -195,6 +215,38 @@ const webBrowseTool = defineTool({
|
|
|
195
215
|
},
|
|
196
216
|
};
|
|
197
217
|
} catch (err: any) {
|
|
218
|
+
// Firecrawl keyless fallback: only on runtime failures (CLI missing /
|
|
219
|
+
// batch failure), never on local validation errors (bad caller actions).
|
|
220
|
+
if (isFirecrawlEnabled() && !signal?.aborted && shouldFallbackBrowse(err as Error)) {
|
|
221
|
+
const fb = await interactKeyless(
|
|
222
|
+
params.url,
|
|
223
|
+
{ prompt: synthesizeBrowsePrompt({ url: params.url, actions: params.actions as BrowseAction[], selector: params.selector }), timeout: 60 },
|
|
224
|
+
signal,
|
|
225
|
+
);
|
|
226
|
+
if (fb.ok) {
|
|
227
|
+
const preview = (fb.output || "").replace(/\s+/g, " ").trim().slice(0, 500);
|
|
228
|
+
const creditTag = fb.creditsUsed !== undefined ? `, ${fb.creditsUsed} credits` : "";
|
|
229
|
+
const rawText = `URL: ${params.url}\n(via Firecrawl keyless interact fallback${creditTag})\n\n---\n\n${fb.output || "(no content extracted)"}`;
|
|
230
|
+
const sink = await writeWithFallback(rawText, { tmpPrefix: "pi-web-browse-firecrawl-" });
|
|
231
|
+
return {
|
|
232
|
+
content: [{ type: "text", text: sink.text }],
|
|
233
|
+
details: {
|
|
234
|
+
title: "",
|
|
235
|
+
url: params.url,
|
|
236
|
+
fullOutputPath: sink.fullOutputPath,
|
|
237
|
+
preview,
|
|
238
|
+
selector: params.selector,
|
|
239
|
+
headless: params.headless ?? true,
|
|
240
|
+
actionCount,
|
|
241
|
+
steps,
|
|
242
|
+
viaFirecrawl: true,
|
|
243
|
+
creditsUsed: fb.creditsUsed,
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
// Graceful skip (CLI absent / IP flagged / rate-limited / disabled):
|
|
248
|
+
// fall through to the original local error.
|
|
249
|
+
}
|
|
198
250
|
throw new Error(`Error browsing ${params.url}: ${err.message ?? err}`);
|
|
199
251
|
} finally {
|
|
200
252
|
await closeAgentBrowserSession(session, signal);
|
|
@@ -249,6 +301,8 @@ const webBrowseTool = defineTool({
|
|
|
249
301
|
headless?: boolean;
|
|
250
302
|
actionCount?: number;
|
|
251
303
|
steps?: string[];
|
|
304
|
+
viaFirecrawl?: boolean;
|
|
305
|
+
creditsUsed?: number;
|
|
252
306
|
} | undefined;
|
|
253
307
|
|
|
254
308
|
if (isError) {
|
|
@@ -266,6 +320,12 @@ const webBrowseTool = defineTool({
|
|
|
266
320
|
}
|
|
267
321
|
|
|
268
322
|
let text = theme.fg("success", "✓ Browsed");
|
|
323
|
+
if (details?.viaFirecrawl) {
|
|
324
|
+
text += theme.fg("accent", " [Firecrawl keyless]");
|
|
325
|
+
}
|
|
326
|
+
if (details?.creditsUsed !== undefined) {
|
|
327
|
+
text += theme.fg("muted", ` ${details.creditsUsed} credits`);
|
|
328
|
+
}
|
|
269
329
|
if (details?.title) {
|
|
270
330
|
text += ` ${theme.fg("toolTitle", details.title)}`;
|
|
271
331
|
}
|
package/extensions/web_fetch.ts
CHANGED
|
@@ -25,6 +25,7 @@ import * as path from "node:path";
|
|
|
25
25
|
import { runScraplingWithFallback } from "./utils/scrapling";
|
|
26
26
|
import { extractPreview } from "./utils/content-preview";
|
|
27
27
|
import { writeWithFallback } from "./utils/output-sink";
|
|
28
|
+
import { scrapeKeyless } from "./utils/firecrawl";
|
|
28
29
|
import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace, formatExtraction } from "./utils/render-helpers";
|
|
29
30
|
|
|
30
31
|
export const WebFetchParamsSchema = Type.Object({
|
|
@@ -68,15 +69,31 @@ const webFetchTool = defineTool({
|
|
|
68
69
|
signal,
|
|
69
70
|
);
|
|
70
71
|
|
|
71
|
-
|
|
72
|
-
|
|
72
|
+
let content: string;
|
|
73
|
+
let bytes: number;
|
|
74
|
+
let viaFirecrawl = false;
|
|
75
|
+
|
|
76
|
+
if (ok) {
|
|
77
|
+
content = await fs.promises.readFile(tmpFile, "utf-8");
|
|
78
|
+
bytes = (await fs.promises.stat(tmpFile)).size;
|
|
79
|
+
} else {
|
|
80
|
+
// Local scrapling failed — try the Firecrawl keyless fallback.
|
|
81
|
+
const localError = `Failed to fetch ${params.url}\n\nscrapling error:\n${stderr}`;
|
|
82
|
+
const fb = await scrapeKeyless(params.url, {}, signal);
|
|
83
|
+
if (fb.ok) {
|
|
84
|
+
content = fb.content;
|
|
85
|
+
bytes = fb.bytes;
|
|
86
|
+
viaFirecrawl = true;
|
|
87
|
+
} else {
|
|
88
|
+
// Graceful skip (CLI absent / IP flagged / rate-limited / disabled):
|
|
89
|
+
// never leave the user worse off — surface the original local error.
|
|
90
|
+
throw new Error(localError);
|
|
91
|
+
}
|
|
73
92
|
}
|
|
74
93
|
|
|
75
|
-
const content = await fs.promises.readFile(tmpFile, "utf-8");
|
|
76
|
-
const stats = await fs.promises.stat(tmpFile);
|
|
77
|
-
|
|
78
94
|
const preview = extractPreview(content, 500);
|
|
79
|
-
const
|
|
95
|
+
const viaTag = viaFirecrawl ? "\n(via Firecrawl keyless fallback)" : "";
|
|
96
|
+
const rawText = `Fetched: ${params.url}${viaTag}\nSize: ${bytes} bytes\n\n---\n\n${content}`;
|
|
80
97
|
const sink = await writeWithFallback(rawText, {
|
|
81
98
|
tmpPrefix: "pi-web-fetch-full-",
|
|
82
99
|
});
|
|
@@ -86,11 +103,12 @@ const webFetchTool = defineTool({
|
|
|
86
103
|
content: [{ type: "text", text: sink.text }],
|
|
87
104
|
details: {
|
|
88
105
|
url: params.url,
|
|
89
|
-
bytes
|
|
106
|
+
bytes,
|
|
90
107
|
fullOutputPath: tmpFull,
|
|
91
108
|
preview,
|
|
92
109
|
selector: params.selector,
|
|
93
110
|
stealthy: params.stealthy,
|
|
111
|
+
viaFirecrawl,
|
|
94
112
|
},
|
|
95
113
|
};
|
|
96
114
|
} catch (err: any) {
|
|
@@ -128,6 +146,7 @@ const webFetchTool = defineTool({
|
|
|
128
146
|
preview?: string;
|
|
129
147
|
selector?: string;
|
|
130
148
|
stealthy?: boolean;
|
|
149
|
+
viaFirecrawl?: boolean;
|
|
131
150
|
} | undefined;
|
|
132
151
|
|
|
133
152
|
if (isError) {
|
|
@@ -139,6 +158,9 @@ const webFetchTool = defineTool({
|
|
|
139
158
|
}
|
|
140
159
|
|
|
141
160
|
let text = theme.fg("success", "✓ Fetched");
|
|
161
|
+
if (details?.viaFirecrawl) {
|
|
162
|
+
text += theme.fg("accent", " [Firecrawl keyless]");
|
|
163
|
+
}
|
|
142
164
|
if (details?.url) {
|
|
143
165
|
text += ` ${theme.fg("dim", abbreviateUrl(details.url))}`;
|
|
144
166
|
}
|
package/extensions/web_search.ts
CHANGED
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
import { Text } from "@earendil-works/pi-tui";
|
|
21
21
|
import { Type, type Static } from "typebox";
|
|
22
22
|
import { writeWithFallback } from "./utils/output-sink";
|
|
23
|
+
import { searchKeyless, shouldFallbackSearch } from "./utils/firecrawl";
|
|
23
24
|
import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
|
|
24
25
|
|
|
25
26
|
|
|
@@ -78,6 +79,9 @@ const webSearchTool = defineTool({
|
|
|
78
79
|
let fullOutputPath: string | undefined;
|
|
79
80
|
const MAX_PAGES = 3;
|
|
80
81
|
|
|
82
|
+
let localOk = true;
|
|
83
|
+
let localError: string | undefined;
|
|
84
|
+
|
|
81
85
|
try {
|
|
82
86
|
for (let page = 1; page <= MAX_PAGES; page++) {
|
|
83
87
|
const searchParams = new URLSearchParams({
|
|
@@ -120,51 +124,89 @@ const webSearchTool = defineTool({
|
|
|
120
124
|
break;
|
|
121
125
|
}
|
|
122
126
|
}
|
|
127
|
+
} catch (err: any) {
|
|
128
|
+
localOk = false;
|
|
129
|
+
localError = err.message ?? String(err);
|
|
130
|
+
}
|
|
123
131
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
132
|
+
// Firecrawl keyless fallback: when SearXNG errored OR returned nothing.
|
|
133
|
+
if (shouldFallbackSearch(localOk, allResults.length)) {
|
|
134
|
+
const fb = await searchKeyless(params.query, { limit: Math.min(maxResults, 10) }, signal);
|
|
135
|
+
if (fb.ok && fb.results.length > 0) {
|
|
136
|
+
const fbResults: SearxResult[] = fb.results.slice(0, maxResults).map((r) => ({
|
|
137
|
+
title: r.title ?? "(untitled)",
|
|
138
|
+
url: r.url,
|
|
139
|
+
content: r.description,
|
|
140
|
+
engine: "firecrawl",
|
|
141
|
+
}));
|
|
142
|
+
const creditTag = fb.creditsUsed !== undefined ? `, ${fb.creditsUsed} credits` : "";
|
|
143
|
+
const lines: string[] = [`Results for "${params.query}" (via Firecrawl keyless${creditTag}):`, ""];
|
|
144
|
+
for (let i = 0; i < fbResults.length; i++) {
|
|
145
|
+
const r = fbResults[i];
|
|
146
|
+
lines.push(`${i + 1}. ${r.title}`);
|
|
147
|
+
lines.push(` URL: ${r.url}`);
|
|
148
|
+
if (r.content) lines.push(` ${r.content.replace(/\s+/g, " ").trim()}`);
|
|
149
|
+
if (r.engine) lines.push(` [engine: ${r.engine}]`);
|
|
150
|
+
lines.push("");
|
|
128
151
|
}
|
|
152
|
+
const rawText = lines.join("\n");
|
|
153
|
+
const sink = await writeWithFallback(rawText, {
|
|
154
|
+
tmpPrefix: "pi-web-search-firecrawl-",
|
|
155
|
+
alwaysWriteFile: true,
|
|
156
|
+
});
|
|
129
157
|
return {
|
|
130
|
-
content: [{ type: "text", text }],
|
|
131
|
-
details: { query:
|
|
158
|
+
content: [{ type: "text", text: sink.text }],
|
|
159
|
+
details: { query: params.query, totalResults: fbResults.length, results: fbResults, fullOutputPath: sink.fullOutputPath, viaFirecrawl: true, creditsUsed: fb.creditsUsed },
|
|
132
160
|
};
|
|
133
161
|
}
|
|
162
|
+
// Graceful skip or empty Firecrawl: fall through to local handling.
|
|
163
|
+
}
|
|
134
164
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
];
|
|
165
|
+
if (!localOk) {
|
|
166
|
+
throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${localError}`);
|
|
167
|
+
}
|
|
139
168
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if (r.content) {
|
|
145
|
-
const snippet = r.content.replace(/\s+/g, " ").trim();
|
|
146
|
-
lines.push(` ${snippet}`);
|
|
147
|
-
}
|
|
148
|
-
if (r.engine) {
|
|
149
|
-
lines.push(` [engine: ${r.engine}]`);
|
|
150
|
-
}
|
|
151
|
-
lines.push("");
|
|
169
|
+
if (allResults.length === 0) {
|
|
170
|
+
let text = `No results found for "${finalQuery}".`;
|
|
171
|
+
if (suggestions && suggestions.length > 0) {
|
|
172
|
+
text += `\n\nSuggestions:\n${suggestions.map((s) => `- ${s}`).join("\n")}`;
|
|
152
173
|
}
|
|
153
|
-
|
|
154
|
-
const rawText = lines.join("\n");
|
|
155
|
-
const sink = await writeWithFallback(rawText, {
|
|
156
|
-
tmpPrefix: "pi-web-search-",
|
|
157
|
-
alwaysWriteFile: true,
|
|
158
|
-
});
|
|
159
|
-
fullOutputPath = sink.fullOutputPath;
|
|
160
|
-
|
|
161
174
|
return {
|
|
162
|
-
content: [{ type: "text", text
|
|
163
|
-
details: { query: finalQuery, totalResults:
|
|
175
|
+
content: [{ type: "text", text }],
|
|
176
|
+
details: { query: finalQuery, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined, viaFirecrawl: false, creditsUsed: undefined },
|
|
164
177
|
};
|
|
165
|
-
} catch (err: any) {
|
|
166
|
-
throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${err.message ?? err}`);
|
|
167
178
|
}
|
|
179
|
+
|
|
180
|
+
const lines: string[] = [
|
|
181
|
+
`Results for "${finalQuery}":`,
|
|
182
|
+
"",
|
|
183
|
+
];
|
|
184
|
+
|
|
185
|
+
for (let i = 0; i < Math.min(maxResults, allResults.length); i++) {
|
|
186
|
+
const r = allResults[i];
|
|
187
|
+
lines.push(`${i + 1}. ${r.title}`);
|
|
188
|
+
lines.push(` URL: ${r.url}`);
|
|
189
|
+
if (r.content) {
|
|
190
|
+
const snippet = r.content.replace(/\s+/g, " ").trim();
|
|
191
|
+
lines.push(` ${snippet}`);
|
|
192
|
+
}
|
|
193
|
+
if (r.engine) {
|
|
194
|
+
lines.push(` [engine: ${r.engine}]`);
|
|
195
|
+
}
|
|
196
|
+
lines.push("");
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const rawText = lines.join("\n");
|
|
200
|
+
const sink = await writeWithFallback(rawText, {
|
|
201
|
+
tmpPrefix: "pi-web-search-",
|
|
202
|
+
alwaysWriteFile: true,
|
|
203
|
+
});
|
|
204
|
+
fullOutputPath = sink.fullOutputPath;
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
content: [{ type: "text", text: sink.text }],
|
|
208
|
+
details: { query: finalQuery, totalResults: allResults.length, results: allResults.slice(0, maxResults), fullOutputPath, viaFirecrawl: false, creditsUsed: undefined },
|
|
209
|
+
};
|
|
168
210
|
},
|
|
169
211
|
|
|
170
212
|
renderCall(args, theme) {
|
|
@@ -190,6 +232,8 @@ const webSearchTool = defineTool({
|
|
|
190
232
|
totalResults?: number;
|
|
191
233
|
results?: Array<{ title?: string; url?: string; score?: number; engine?: string; content?: string }>;
|
|
192
234
|
fullOutputPath?: string;
|
|
235
|
+
viaFirecrawl?: boolean;
|
|
236
|
+
creditsUsed?: number;
|
|
193
237
|
} | undefined;
|
|
194
238
|
|
|
195
239
|
if (isError) {
|
|
@@ -207,7 +251,13 @@ const webSearchTool = defineTool({
|
|
|
207
251
|
const showing = details.results?.length ?? 0;
|
|
208
252
|
const total = details?.totalResults ?? 0;
|
|
209
253
|
let text = theme.fg("success", `✓ ${showing} unique results`);
|
|
210
|
-
if (
|
|
254
|
+
if (details?.viaFirecrawl) {
|
|
255
|
+
text += theme.fg("accent", " [Firecrawl keyless]");
|
|
256
|
+
}
|
|
257
|
+
if (details?.creditsUsed !== undefined) {
|
|
258
|
+
text += theme.fg("muted", ` ${details.creditsUsed} credits`);
|
|
259
|
+
}
|
|
260
|
+
if (!details?.viaFirecrawl && total > showing) {
|
|
211
261
|
text += theme.fg("dim", ` (${total} total)`);
|
|
212
262
|
}
|
|
213
263
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-web-toolkit",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Web research toolkit for the pi coding agent. Search via SearXNG, fetch pages with scrapling, browse interactively via agent-browser,
|
|
3
|
+
"version": "0.3.1",
|
|
4
|
+
"description": "Web research toolkit for the pi coding agent. Search via SearXNG, fetch pages with scrapling, browse interactively via agent-browser, batch-read sources in parallel, and optionally fall back to Firecrawl Keyless (no API key) when a local backend fails.",
|
|
5
5
|
"author": "Wade Huang <fastwade11@gmail.com>",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"repository": {
|
|
@@ -12,15 +12,16 @@
|
|
|
12
12
|
"url": "https://github.com/Wade11s/pi-web-toolkit/issues"
|
|
13
13
|
},
|
|
14
14
|
"homepage": "https://github.com/Wade11s/pi-web-toolkit#readme",
|
|
15
|
-
"keywords": ["pi-package", "pi-extension", "web-search", "scrapling", "agent-browser"],
|
|
15
|
+
"keywords": ["pi-package", "pi-extension", "web-search", "scrapling", "agent-browser", "firecrawl"],
|
|
16
16
|
"files": ["extensions", "docs", "README.md", "CHANGELOG.md", "package.json", "LICENSE"],
|
|
17
17
|
"engines": {
|
|
18
18
|
"node": ">=22.0.0"
|
|
19
19
|
},
|
|
20
20
|
"scripts": {
|
|
21
21
|
"typecheck": "tsc --noEmit",
|
|
22
|
-
"test": "tsx test/content-preview/test.ts && tsx test/agent-browser/test.ts",
|
|
22
|
+
"test": "tsx test/content-preview/test.ts && tsx test/agent-browser/test.ts && tsx test/firecrawl/test.ts",
|
|
23
23
|
"test:agent-browser": "tsx test/agent-browser/test.ts",
|
|
24
|
+
"test:firecrawl": "tsx test/firecrawl/test.ts",
|
|
24
25
|
"test:approve": "tsx test/content-preview/test.ts --approve"
|
|
25
26
|
},
|
|
26
27
|
"devDependencies": {
|