pi-web-toolkit 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,7 @@ import {
28
28
  closeAgentBrowserSession,
29
29
  } from "./utils/agent-browser";
30
30
  import { writeWithFallback } from "./utils/output-sink";
31
+ import { interactKeyless, shouldFallbackBrowse, isFirecrawlEnabled } from "./utils/firecrawl";
31
32
  import { abbreviateUrl, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
32
33
 
33
34
  export const WebBrowseActionSchema = Type.Object({
@@ -82,6 +83,25 @@ function formatBrowseStep(action: BrowseAction): string {
82
83
  }
83
84
  }
84
85
 
86
+ function synthesizeBrowsePrompt(params: { url: string; actions: BrowseAction[]; selector?: string }): string {
87
+ const parts: string[] = [];
88
+ for (const a of params.actions) {
89
+ switch (a.type) {
90
+ case "click": parts.push(`click the element "${a.selector ?? ""}"`); break;
91
+ case "fill": case "type": parts.push(`type "${a.value ?? ""}" into "${a.selector ?? ""}"`); break;
92
+ case "press": parts.push(`press ${a.key ?? ""}`); break;
93
+ case "scroll": parts.push(`scroll ${a.direction ?? "down"}`); break;
94
+ case "wait": parts.push("wait briefly"); break;
95
+ case "wait_selector": parts.push(`wait for "${a.selector ?? ""}" to appear`); break;
96
+ }
97
+ }
98
+ const actionText = parts.length ? `Perform these actions in order: ${parts.join("; ")}. ` : "";
99
+ const extract = params.selector
100
+ ? `Then return the text content of the element matching "${params.selector}".`
101
+ : "Then return the main textual content of the page.";
102
+ return `${actionText}${extract}`;
103
+ }
104
+
85
105
  const webBrowseTool = defineTool({
86
106
  name: "web_browse",
87
107
  label: "Web Browse",
@@ -195,6 +215,38 @@ const webBrowseTool = defineTool({
195
215
  },
196
216
  };
197
217
  } catch (err: any) {
218
+ // Firecrawl keyless fallback: only on runtime failures (CLI missing /
219
+ // batch failure), never on local validation errors (bad caller actions).
220
+ if (isFirecrawlEnabled() && !signal?.aborted && shouldFallbackBrowse(err as Error)) {
221
+ const fb = await interactKeyless(
222
+ params.url,
223
+ { prompt: synthesizeBrowsePrompt({ url: params.url, actions: params.actions as BrowseAction[], selector: params.selector }), timeout: 60 },
224
+ signal,
225
+ );
226
+ if (fb.ok) {
227
+ const preview = (fb.output || "").replace(/\s+/g, " ").trim().slice(0, 500);
228
+ const creditTag = fb.creditsUsed !== undefined ? `, ${fb.creditsUsed} credits` : "";
229
+ const rawText = `URL: ${params.url}\n(via Firecrawl keyless interact fallback${creditTag})\n\n---\n\n${fb.output || "(no content extracted)"}`;
230
+ const sink = await writeWithFallback(rawText, { tmpPrefix: "pi-web-browse-firecrawl-" });
231
+ return {
232
+ content: [{ type: "text", text: sink.text }],
233
+ details: {
234
+ title: "",
235
+ url: params.url,
236
+ fullOutputPath: sink.fullOutputPath,
237
+ preview,
238
+ selector: params.selector,
239
+ headless: params.headless ?? true,
240
+ actionCount,
241
+ steps,
242
+ viaFirecrawl: true,
243
+ creditsUsed: fb.creditsUsed,
244
+ },
245
+ };
246
+ }
247
+ // Graceful skip (CLI absent / IP flagged / rate-limited / disabled):
248
+ // fall through to the original local error.
249
+ }
198
250
  throw new Error(`Error browsing ${params.url}: ${err.message ?? err}`);
199
251
  } finally {
200
252
  await closeAgentBrowserSession(session, signal);
@@ -249,6 +301,8 @@ const webBrowseTool = defineTool({
249
301
  headless?: boolean;
250
302
  actionCount?: number;
251
303
  steps?: string[];
304
+ viaFirecrawl?: boolean;
305
+ creditsUsed?: number;
252
306
  } | undefined;
253
307
 
254
308
  if (isError) {
@@ -266,6 +320,12 @@ const webBrowseTool = defineTool({
266
320
  }
267
321
 
268
322
  let text = theme.fg("success", "✓ Browsed");
323
+ if (details?.viaFirecrawl) {
324
+ text += theme.fg("accent", " [Firecrawl keyless]");
325
+ }
326
+ if (details?.creditsUsed !== undefined) {
327
+ text += theme.fg("muted", ` ${details.creditsUsed} credits`);
328
+ }
269
329
  if (details?.title) {
270
330
  text += ` ${theme.fg("toolTitle", details.title)}`;
271
331
  }
@@ -25,6 +25,7 @@ import * as path from "node:path";
25
25
  import { runScraplingWithFallback } from "./utils/scrapling";
26
26
  import { extractPreview } from "./utils/content-preview";
27
27
  import { writeWithFallback } from "./utils/output-sink";
28
+ import { scrapeKeyless } from "./utils/firecrawl";
28
29
  import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace, formatExtraction } from "./utils/render-helpers";
29
30
 
30
31
  export const WebFetchParamsSchema = Type.Object({
@@ -68,15 +69,31 @@ const webFetchTool = defineTool({
68
69
  signal,
69
70
  );
70
71
 
71
- if (!ok) {
72
- throw new Error(`Failed to fetch ${params.url}\n\nscrapling error:\n${stderr}`);
72
+ let content: string;
73
+ let bytes: number;
74
+ let viaFirecrawl = false;
75
+
76
+ if (ok) {
77
+ content = await fs.promises.readFile(tmpFile, "utf-8");
78
+ bytes = (await fs.promises.stat(tmpFile)).size;
79
+ } else {
80
+ // Local scrapling failed — try the Firecrawl keyless fallback.
81
+ const localError = `Failed to fetch ${params.url}\n\nscrapling error:\n${stderr}`;
82
+ const fb = await scrapeKeyless(params.url, {}, signal);
83
+ if (fb.ok) {
84
+ content = fb.content;
85
+ bytes = fb.bytes;
86
+ viaFirecrawl = true;
87
+ } else {
88
+ // Graceful skip (CLI absent / IP flagged / rate-limited / disabled):
89
+ // never leave the user worse off — surface the original local error.
90
+ throw new Error(localError);
91
+ }
73
92
  }
74
93
 
75
- const content = await fs.promises.readFile(tmpFile, "utf-8");
76
- const stats = await fs.promises.stat(tmpFile);
77
-
78
94
  const preview = extractPreview(content, 500);
79
- const rawText = `Fetched: ${params.url}\nSize: ${stats.size} bytes\n\n---\n\n${content}`;
95
+ const viaTag = viaFirecrawl ? "\n(via Firecrawl keyless fallback)" : "";
96
+ const rawText = `Fetched: ${params.url}${viaTag}\nSize: ${bytes} bytes\n\n---\n\n${content}`;
80
97
  const sink = await writeWithFallback(rawText, {
81
98
  tmpPrefix: "pi-web-fetch-full-",
82
99
  });
@@ -86,11 +103,12 @@ const webFetchTool = defineTool({
86
103
  content: [{ type: "text", text: sink.text }],
87
104
  details: {
88
105
  url: params.url,
89
- bytes: stats.size,
106
+ bytes,
90
107
  fullOutputPath: tmpFull,
91
108
  preview,
92
109
  selector: params.selector,
93
110
  stealthy: params.stealthy,
111
+ viaFirecrawl,
94
112
  },
95
113
  };
96
114
  } catch (err: any) {
@@ -128,6 +146,7 @@ const webFetchTool = defineTool({
128
146
  preview?: string;
129
147
  selector?: string;
130
148
  stealthy?: boolean;
149
+ viaFirecrawl?: boolean;
131
150
  } | undefined;
132
151
 
133
152
  if (isError) {
@@ -139,6 +158,9 @@ const webFetchTool = defineTool({
139
158
  }
140
159
 
141
160
  let text = theme.fg("success", "✓ Fetched");
161
+ if (details?.viaFirecrawl) {
162
+ text += theme.fg("accent", " [Firecrawl keyless]");
163
+ }
142
164
  if (details?.url) {
143
165
  text += ` ${theme.fg("dim", abbreviateUrl(details.url))}`;
144
166
  }
@@ -20,6 +20,7 @@ import {
20
20
  import { Text } from "@earendil-works/pi-tui";
21
21
  import { Type, type Static } from "typebox";
22
22
  import { writeWithFallback } from "./utils/output-sink";
23
+ import { searchKeyless, shouldFallbackSearch } from "./utils/firecrawl";
23
24
  import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
24
25
 
25
26
 
@@ -78,6 +79,9 @@ const webSearchTool = defineTool({
78
79
  let fullOutputPath: string | undefined;
79
80
  const MAX_PAGES = 3;
80
81
 
82
+ let localOk = true;
83
+ let localError: string | undefined;
84
+
81
85
  try {
82
86
  for (let page = 1; page <= MAX_PAGES; page++) {
83
87
  const searchParams = new URLSearchParams({
@@ -120,51 +124,89 @@ const webSearchTool = defineTool({
120
124
  break;
121
125
  }
122
126
  }
127
+ } catch (err: any) {
128
+ localOk = false;
129
+ localError = err.message ?? String(err);
130
+ }
123
131
 
124
- if (allResults.length === 0) {
125
- let text = `No results found for "${finalQuery}".`;
126
- if (suggestions && suggestions.length > 0) {
127
- text += `\n\nSuggestions:\n${suggestions.map((s) => `- ${s}`).join("\n")}`;
132
+ // Firecrawl keyless fallback: when SearXNG errored OR returned nothing.
133
+ if (shouldFallbackSearch(localOk, allResults.length)) {
134
+ const fb = await searchKeyless(params.query, { limit: Math.min(maxResults, 10) }, signal);
135
+ if (fb.ok && fb.results.length > 0) {
136
+ const fbResults: SearxResult[] = fb.results.slice(0, maxResults).map((r) => ({
137
+ title: r.title ?? "(untitled)",
138
+ url: r.url,
139
+ content: r.description,
140
+ engine: "firecrawl",
141
+ }));
142
+ const creditTag = fb.creditsUsed !== undefined ? `, ${fb.creditsUsed} credits` : "";
143
+ const lines: string[] = [`Results for "${params.query}" (via Firecrawl keyless${creditTag}):`, ""];
144
+ for (let i = 0; i < fbResults.length; i++) {
145
+ const r = fbResults[i];
146
+ lines.push(`${i + 1}. ${r.title}`);
147
+ lines.push(` URL: ${r.url}`);
148
+ if (r.content) lines.push(` ${r.content.replace(/\s+/g, " ").trim()}`);
149
+ if (r.engine) lines.push(` [engine: ${r.engine}]`);
150
+ lines.push("");
128
151
  }
152
+ const rawText = lines.join("\n");
153
+ const sink = await writeWithFallback(rawText, {
154
+ tmpPrefix: "pi-web-search-firecrawl-",
155
+ alwaysWriteFile: true,
156
+ });
129
157
  return {
130
- content: [{ type: "text", text }],
131
- details: { query: finalQuery, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined },
158
+ content: [{ type: "text", text: sink.text }],
159
+ details: { query: params.query, totalResults: fbResults.length, results: fbResults, fullOutputPath: sink.fullOutputPath, viaFirecrawl: true, creditsUsed: fb.creditsUsed },
132
160
  };
133
161
  }
162
+ // Graceful skip or empty Firecrawl: fall through to local handling.
163
+ }
134
164
 
135
- const lines: string[] = [
136
- `Results for "${finalQuery}":`,
137
- "",
138
- ];
165
+ if (!localOk) {
166
+ throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${localError}`);
167
+ }
139
168
 
140
- for (let i = 0; i < Math.min(maxResults, allResults.length); i++) {
141
- const r = allResults[i];
142
- lines.push(`${i + 1}. ${r.title}`);
143
- lines.push(` URL: ${r.url}`);
144
- if (r.content) {
145
- const snippet = r.content.replace(/\s+/g, " ").trim();
146
- lines.push(` ${snippet}`);
147
- }
148
- if (r.engine) {
149
- lines.push(` [engine: ${r.engine}]`);
150
- }
151
- lines.push("");
169
+ if (allResults.length === 0) {
170
+ let text = `No results found for "${finalQuery}".`;
171
+ if (suggestions && suggestions.length > 0) {
172
+ text += `\n\nSuggestions:\n${suggestions.map((s) => `- ${s}`).join("\n")}`;
152
173
  }
153
-
154
- const rawText = lines.join("\n");
155
- const sink = await writeWithFallback(rawText, {
156
- tmpPrefix: "pi-web-search-",
157
- alwaysWriteFile: true,
158
- });
159
- fullOutputPath = sink.fullOutputPath;
160
-
161
174
  return {
162
- content: [{ type: "text", text: sink.text }],
163
- details: { query: finalQuery, totalResults: allResults.length, results: allResults.slice(0, maxResults), fullOutputPath },
175
+ content: [{ type: "text", text }],
176
+ details: { query: finalQuery, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined, viaFirecrawl: false, creditsUsed: undefined },
164
177
  };
165
- } catch (err: any) {
166
- throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${err.message ?? err}`);
167
178
  }
179
+
180
+ const lines: string[] = [
181
+ `Results for "${finalQuery}":`,
182
+ "",
183
+ ];
184
+
185
+ for (let i = 0; i < Math.min(maxResults, allResults.length); i++) {
186
+ const r = allResults[i];
187
+ lines.push(`${i + 1}. ${r.title}`);
188
+ lines.push(` URL: ${r.url}`);
189
+ if (r.content) {
190
+ const snippet = r.content.replace(/\s+/g, " ").trim();
191
+ lines.push(` ${snippet}`);
192
+ }
193
+ if (r.engine) {
194
+ lines.push(` [engine: ${r.engine}]`);
195
+ }
196
+ lines.push("");
197
+ }
198
+
199
+ const rawText = lines.join("\n");
200
+ const sink = await writeWithFallback(rawText, {
201
+ tmpPrefix: "pi-web-search-",
202
+ alwaysWriteFile: true,
203
+ });
204
+ fullOutputPath = sink.fullOutputPath;
205
+
206
+ return {
207
+ content: [{ type: "text", text: sink.text }],
208
+ details: { query: finalQuery, totalResults: allResults.length, results: allResults.slice(0, maxResults), fullOutputPath, viaFirecrawl: false, creditsUsed: undefined },
209
+ };
168
210
  },
169
211
 
170
212
  renderCall(args, theme) {
@@ -190,6 +232,8 @@ const webSearchTool = defineTool({
190
232
  totalResults?: number;
191
233
  results?: Array<{ title?: string; url?: string; score?: number; engine?: string; content?: string }>;
192
234
  fullOutputPath?: string;
235
+ viaFirecrawl?: boolean;
236
+ creditsUsed?: number;
193
237
  } | undefined;
194
238
 
195
239
  if (isError) {
@@ -207,7 +251,13 @@ const webSearchTool = defineTool({
207
251
  const showing = details.results?.length ?? 0;
208
252
  const total = details?.totalResults ?? 0;
209
253
  let text = theme.fg("success", `✓ ${showing} unique results`);
210
- if (total > showing) {
254
+ if (details?.viaFirecrawl) {
255
+ text += theme.fg("accent", " [Firecrawl keyless]");
256
+ }
257
+ if (details?.creditsUsed !== undefined) {
258
+ text += theme.fg("muted", ` ${details.creditsUsed} credits`);
259
+ }
260
+ if (!details?.viaFirecrawl && total > showing) {
211
261
  text += theme.fg("dim", ` (${total} total)`);
212
262
  }
213
263
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pi-web-toolkit",
3
- "version": "0.2.2",
4
- "description": "Web research toolkit for the pi coding agent. Search via SearXNG, fetch pages with scrapling, browse interactively via agent-browser, and batch-read sources in parallel.",
3
+ "version": "0.3.1",
4
+ "description": "Web research toolkit for the pi coding agent. Search via SearXNG, fetch pages with scrapling, browse interactively via agent-browser, batch-read sources in parallel, and optionally fall back to Firecrawl Keyless (no API key) when a local backend fails.",
5
5
  "author": "Wade Huang <fastwade11@gmail.com>",
6
6
  "license": "MIT",
7
7
  "repository": {
@@ -12,15 +12,16 @@
12
12
  "url": "https://github.com/Wade11s/pi-web-toolkit/issues"
13
13
  },
14
14
  "homepage": "https://github.com/Wade11s/pi-web-toolkit#readme",
15
- "keywords": ["pi-package", "pi-extension", "web-search", "scrapling", "agent-browser"],
15
+ "keywords": ["pi-package", "pi-extension", "web-search", "scrapling", "agent-browser", "firecrawl"],
16
16
  "files": ["extensions", "docs", "README.md", "CHANGELOG.md", "package.json", "LICENSE"],
17
17
  "engines": {
18
18
  "node": ">=22.0.0"
19
19
  },
20
20
  "scripts": {
21
21
  "typecheck": "tsc --noEmit",
22
- "test": "tsx test/content-preview/test.ts && tsx test/agent-browser/test.ts",
22
+ "test": "tsx test/content-preview/test.ts && tsx test/agent-browser/test.ts && tsx test/firecrawl/test.ts",
23
23
  "test:agent-browser": "tsx test/agent-browser/test.ts",
24
+ "test:firecrawl": "tsx test/firecrawl/test.ts",
24
25
  "test:approve": "tsx test/content-preview/test.ts --approve"
25
26
  },
26
27
  "devDependencies": {