pi-web-toolkit 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,17 +41,17 @@ const firecrawlScrapeTool = defineTool({
41
41
  name: "firecrawl_scrape",
42
42
  label: "Firecrawl Scrape",
43
43
  description: [
44
- "Fetch a single page as clean markdown via Firecrawl (keyless — no API key, no signup).",
45
- "Use firecrawl_scrape when the local web_fetch fails on a hard target (anti-bot,",
46
- "JavaScript-heavy pages, PDFs) or when you need Firecrawl's cloud rendering directly.",
44
+ "Fallback-only cloud fetch via Firecrawl (keyless — no API key, no signup).",
45
+ "Do not use firecrawl_scrape as the first attempt for ordinary URL reading; use web_fetch first.",
46
+ "Use firecrawl_scrape only when web_fetch already failed on a hard target (anti-bot,",
47
+ "JavaScript-heavy pages, PDFs), or when the user explicitly asks for Firecrawl/cloud rendering.",
47
48
  "Privacy: the URL and page content are sent to Firecrawl's cloud.",
48
49
  `Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
49
50
  ].join(" "),
50
- promptSnippet: "Fetch a single page via Firecrawl keyless (anti-bot / JS / PDF fallback)",
51
+ promptSnippet: "Fallback-only Firecrawl scrape",
51
52
  promptGuidelines: [
52
- "Prefer web_fetch first; reach for firecrawl_scrape when web_fetch fails or you need cloud rendering.",
53
- "firecrawl_scrape handles anti-bot protection, JS-heavy SPAs, and PDFs that scrapling may miss.",
54
- "Always pass the full URL including https://.",
53
+ "Use firecrawl_scrape only after web_fetch fails or explicit cloud scraping/rendering.",
54
+ "Use firecrawl_scrape for anti-bot pages, heavy JS, and PDFs.",
55
55
  ],
56
56
  parameters: FirecrawlScrapeParamsSchema,
57
57
 
@@ -97,13 +97,6 @@ const firecrawlScrapeTool = defineTool({
97
97
 
98
98
  renderResult(result, { expanded, isPartial }, theme, context) {
99
99
  const isError = context?.isError ?? false;
100
-
101
- if (isPartial) {
102
- const domain = details?.url ? getDomain(details.url) : "";
103
- const label = domain ? `Scraping ${domain} via Firecrawl...` : "Scraping via Firecrawl...";
104
- return new Text(theme.fg("warning", label), 0, 0);
105
- }
106
-
107
100
  const details = result.details as {
108
101
  url?: string;
109
102
  bytes?: number;
@@ -113,6 +106,12 @@ const firecrawlScrapeTool = defineTool({
113
106
  creditsUsed?: number;
114
107
  } | undefined;
115
108
 
109
+ if (isPartial) {
110
+ const domain = details?.url ? getDomain(details.url) : "";
111
+ const label = domain ? `Scraping ${domain} via Firecrawl...` : "Scraping via Firecrawl...";
112
+ return new Text(theme.fg("warning", label), 0, 0);
113
+ }
114
+
116
115
  if (isError) {
117
116
  const errText = getErrorText(result);
118
117
  let text = theme.fg("error", "✗ Firecrawl scrape failed");
@@ -42,17 +42,17 @@ const firecrawlSearchTool = defineTool({
42
42
  name: "firecrawl_search",
43
43
  label: "Firecrawl Search",
44
44
  description: [
45
- "Search the web via Firecrawl (keyless — no API key, no signup).",
45
+ "Fallback-only cloud search via Firecrawl (keyless — no API key, no signup).",
46
+ "Do not use firecrawl_search as the first attempt for ordinary web discovery; use web_search first.",
46
47
  "Supports sources (web/images/news) and categories (github/research/pdf) that",
47
- "SearXNG does not. Use as an escape hatch or when web_search returns nothing.",
48
+ "SearXNG does not. Use only as an escape hatch when web_search fails/returns nothing, or when the user explicitly asks for Firecrawl/cloud search.",
48
49
  "Privacy: the query is sent to Firecrawl's cloud.",
49
50
  `Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
50
51
  ].join(" "),
51
- promptSnippet: "Search the web via Firecrawl keyless (categories, sources, domain filters)",
52
+ promptSnippet: "Fallback-only Firecrawl search",
52
53
  promptGuidelines: [
53
- "Prefer web_search first; reach for firecrawl_search when web_search fails or returns nothing.",
54
- "Use categories=[\"github\"], [\"research\"], or [\"pdf\"] for source-type-specific discovery.",
55
- "Use includeDomains/excludeDomains to scope results to specific sites.",
54
+ "Use firecrawl_search only after web_search fails/returns nothing, for Firecrawl-only categories, or explicit cloud search.",
55
+ "Use categories=[\"github\"|\"research\"|\"pdf\"] and includeDomains/excludeDomains when needed.",
56
56
  ],
57
57
  parameters: FirecrawlSearchParamsSchema,
58
58
 
@@ -17,6 +17,26 @@ import registerFirecrawlScrape from "./firecrawl_scrape";
17
17
  import registerFirecrawlSearch from "./firecrawl_search";
18
18
  import registerFirecrawlInteract from "./firecrawl_interact";
19
19
 
20
+ const WEB_TOOL_ROUTING_POLICY = [
21
+ "Web tools are local-first: web_search=discover, web_fetch=one static URL, web_batch_fetch=2–5 static URLs, web_browse=interaction.",
22
+ "Use firecrawl_* only after the matching local tool failed in this conversation, or when the user explicitly asks for Firecrawl/cloud.",
23
+ "web_search/web_fetch/web_browse already auto-fallback to Firecrawl; pass full URLs with scheme and selectors when useful.",
24
+ ].join("\n");
25
+
26
+ const WEB_TOOL_NAMES = new Set([
27
+ "web_search",
28
+ "web_fetch",
29
+ "web_browse",
30
+ "web_batch_fetch",
31
+ "firecrawl_search",
32
+ "firecrawl_scrape",
33
+ "firecrawl_interact",
34
+ ]);
35
+
36
+ function shouldInjectWebToolRoutingPolicy(selectedTools: readonly string[] | undefined): boolean {
37
+ return selectedTools?.some((tool) => WEB_TOOL_NAMES.has(tool)) ?? false;
38
+ }
39
+
20
40
  export default function (pi: ExtensionAPI) {
21
41
  registerWebSearch(pi);
22
42
  registerWebFetch(pi);
@@ -25,4 +45,9 @@ export default function (pi: ExtensionAPI) {
25
45
  registerFirecrawlScrape(pi);
26
46
  registerFirecrawlSearch(pi);
27
47
  registerFirecrawlInteract(pi);
48
+
49
+ pi.on("before_agent_start", (event) => {
50
+ if (!shouldInjectWebToolRoutingPolicy(event.systemPromptOptions.selectedTools)) return;
51
+ return { systemPrompt: `${event.systemPrompt}\n\n${WEB_TOOL_ROUTING_POLICY}` };
52
+ });
28
53
  }
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import { runCLI } from "./cli-runner";
9
+ import { getToolkitCommand } from "./config";
9
10
 
10
11
  export interface BrowseAction {
11
12
  type: "click" | "fill" | "type" | "press" | "wait" | "wait_selector" | "scroll";
@@ -180,7 +181,7 @@ export async function runAgentBrowserBatch(
180
181
 
181
182
  try {
182
183
  const result = await runCLI({
183
- command: "agent-browser",
184
+ command: getToolkitCommand("agentBrowser"),
184
185
  args,
185
186
  stdin: JSON.stringify(commands),
186
187
  timeout: options.timeout,
@@ -199,7 +200,7 @@ export async function runAgentBrowserBatch(
199
200
  );
200
201
  }
201
202
  } catch (err: any) {
202
- if (err.message === "agent-browser is not installed") {
203
+ if (typeof err.message === "string" && err.message.includes("is not installed")) {
203
204
  throw new Error(
204
205
  "agent-browser is not installed.\n\nInstall it with:\n npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
205
206
  );
@@ -211,7 +212,7 @@ export async function runAgentBrowserBatch(
211
212
  export async function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
212
213
  try {
213
214
  await runCLI({
214
- command: "agent-browser",
215
+ command: getToolkitCommand("agentBrowser"),
215
216
  args: ["--session", session, "close"],
216
217
  signal,
217
218
  });
@@ -0,0 +1,170 @@
1
+ /**
2
+ * pi-web-toolkit runtime configuration
3
+ *
4
+ * Reads user-level toolkit configuration without requiring users to modify
5
+ * shell profiles. Environment variables remain the highest-priority override.
6
+ */
7
+
8
+ import { existsSync, readFileSync } from "node:fs";
9
+ import * as os from "node:os";
10
+ import * as path from "node:path";
11
+
12
+ export const DEFAULT_SEARXNG_URL = "http://localhost:8080";
13
+
14
+ export interface ToolkitCommandsConfig {
15
+ scrapling?: string;
16
+ agentBrowser?: string;
17
+ firecrawl?: string;
18
+ }
19
+
20
+ export type FirecrawlRunner = "installed" | "npx" | "bunx";
21
+
22
+ export interface ToolkitConfig {
23
+ searxngUrl?: string;
24
+ firecrawlFallback?: boolean;
25
+ firecrawlRunner?: FirecrawlRunner;
26
+ commands?: ToolkitCommandsConfig;
27
+ }
28
+
29
+ export type ToolkitCommandName = "scrapling" | "agentBrowser" | "firecrawl";
30
+
31
+ const COMMAND_DEFAULTS: Record<ToolkitCommandName, string> = {
32
+ scrapling: "scrapling",
33
+ agentBrowser: "agent-browser",
34
+ firecrawl: "firecrawl",
35
+ };
36
+
37
+ const COMMAND_ENV_VARS: Record<ToolkitCommandName, string> = {
38
+ scrapling: "SCRAPLING_BIN",
39
+ agentBrowser: "AGENT_BROWSER_BIN",
40
+ firecrawl: "FIRECRAWL_BIN",
41
+ };
42
+
43
+ const FIRECRAWL_RUNNERS = ["installed", "npx", "bunx"] as const;
44
+
45
+ function isFirecrawlRunner(value: string): value is FirecrawlRunner {
46
+ return (FIRECRAWL_RUNNERS as readonly string[]).includes(value);
47
+ }
48
+
49
+ export function getDefaultToolkitConfigPath(): string {
50
+ const configHome = process.env.XDG_CONFIG_HOME?.trim() || path.join(os.homedir(), ".config");
51
+ return path.join(configHome, "pi-web-toolkit", "config.json");
52
+ }
53
+
54
+ export function getToolkitConfigPath(): string {
55
+ const configured = process.env.PI_WEB_TOOLKIT_CONFIG?.trim();
56
+ return configured || getDefaultToolkitConfigPath();
57
+ }
58
+
59
+ function parseConfigFile(filePath: string, required: boolean): ToolkitConfig {
60
+ if (!existsSync(filePath)) {
61
+ if (required) {
62
+ throw new Error(`Toolkit config file not found: ${filePath}`);
63
+ }
64
+ return {};
65
+ }
66
+
67
+ let raw: string;
68
+ try {
69
+ raw = readFileSync(filePath, "utf8");
70
+ } catch (err: any) {
71
+ throw new Error(`Unable to read toolkit config at ${filePath}: ${err.message ?? String(err)}`);
72
+ }
73
+
74
+ try {
75
+ const parsed = JSON.parse(raw) as unknown;
76
+ if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
77
+ throw new Error("expected a JSON object");
78
+ }
79
+ validateToolkitConfig(parsed as Record<string, unknown>);
80
+ return parsed as ToolkitConfig;
81
+ } catch (err: any) {
82
+ throw new Error(`Invalid toolkit config at ${filePath}: ${err.message ?? String(err)}`);
83
+ }
84
+ }
85
+
86
+ function validateOptionalString(value: unknown, key: string): void {
87
+ if (value !== undefined && typeof value !== "string") {
88
+ throw new Error(`${key} must be a string`);
89
+ }
90
+ }
91
+
92
+ function validateToolkitConfig(value: Record<string, unknown>): void {
93
+ validateOptionalString(value.searxngUrl, "searxngUrl");
94
+
95
+ if (value.firecrawlFallback !== undefined && typeof value.firecrawlFallback !== "boolean") {
96
+ throw new Error("firecrawlFallback must be a boolean");
97
+ }
98
+
99
+ if (value.firecrawlRunner !== undefined) {
100
+ if (typeof value.firecrawlRunner !== "string" || !isFirecrawlRunner(value.firecrawlRunner)) {
101
+ throw new Error("firecrawlRunner must be one of: installed, npx, bunx");
102
+ }
103
+ }
104
+
105
+ if (value.commands !== undefined) {
106
+ if (typeof value.commands !== "object" || value.commands === null || Array.isArray(value.commands)) {
107
+ throw new Error("commands must be an object");
108
+ }
109
+ const commands = value.commands as Record<string, unknown>;
110
+ validateOptionalString(commands.scrapling, "commands.scrapling");
111
+ validateOptionalString(commands.agentBrowser, "commands.agentBrowser");
112
+ validateOptionalString(commands.firecrawl, "commands.firecrawl");
113
+ }
114
+ }
115
+
116
+ export function readToolkitConfig(): ToolkitConfig {
117
+ const filePath = getToolkitConfigPath();
118
+ const required = Boolean(process.env.PI_WEB_TOOLKIT_CONFIG?.trim());
119
+ return parseConfigFile(filePath, required);
120
+ }
121
+
122
+ function normalizeUrl(url: string): string {
123
+ return url.replace(/\/+$/, "");
124
+ }
125
+
126
+ export function getSearxngUrl(): string {
127
+ const envUrl = process.env.SEARXNG_URL?.trim();
128
+ if (envUrl) return normalizeUrl(envUrl);
129
+
130
+ const cfgUrl = readToolkitConfig().searxngUrl?.trim();
131
+ if (cfgUrl) return normalizeUrl(cfgUrl);
132
+
133
+ return DEFAULT_SEARXNG_URL;
134
+ }
135
+
136
+ export function getToolkitCommand(name: ToolkitCommandName): string {
137
+ const envVar = COMMAND_ENV_VARS[name];
138
+ const envCommand = process.env[envVar]?.trim();
139
+ if (envCommand) return envCommand;
140
+
141
+ const cfgCommand = readToolkitConfig().commands?.[name]?.trim();
142
+ if (cfgCommand) return cfgCommand;
143
+
144
+ return COMMAND_DEFAULTS[name];
145
+ }
146
+
147
+ export function isFirecrawlFallbackEnabled(): boolean {
148
+ const envValue = process.env.PI_WEB_FIRECRAWL_FALLBACK;
149
+ if (envValue !== undefined) {
150
+ const v = envValue.trim().toLowerCase();
151
+ return !(v === "0" || v === "false" || v === "no" || v === "off");
152
+ }
153
+
154
+ const cfgValue = readToolkitConfig().firecrawlFallback;
155
+ if (cfgValue !== undefined) return cfgValue;
156
+
157
+ return true;
158
+ }
159
+
160
+ export function getFirecrawlRunner(): FirecrawlRunner {
161
+ const envValue = process.env.PI_WEB_FIRECRAWL_RUNNER?.trim().toLowerCase();
162
+ if (envValue) {
163
+ if (!isFirecrawlRunner(envValue)) {
164
+ throw new Error("PI_WEB_FIRECRAWL_RUNNER must be one of: installed, npx, bunx");
165
+ }
166
+ return envValue;
167
+ }
168
+
169
+ return readToolkitConfig().firecrawlRunner ?? "installed";
170
+ }
@@ -19,6 +19,7 @@ import { mkdtemp, rm } from "node:fs/promises";
19
19
  import * as os from "node:os";
20
20
  import * as path from "node:path";
21
21
  import { runCLI } from "./cli-runner";
22
+ import { getFirecrawlRunner, getToolkitCommand, isFirecrawlFallbackEnabled, type FirecrawlRunner } from "./config";
22
23
 
23
24
  // ---------------------------------------------------------------------------
24
25
  // Shared types
@@ -33,8 +34,7 @@ export type FirecrawlFailureKind = "graceful-skip" | "hard-error";
33
34
  * the single opt-out for a strict local-only / no-cloud-egress policy.
34
35
  */
35
36
  export function isFirecrawlEnabled(): boolean {
36
- const v = (process.env.PI_WEB_FIRECRAWL_FALLBACK ?? "").trim().toLowerCase();
37
- return !(v === "0" || v === "false" || v === "no" || v === "off");
37
+ return isFirecrawlFallbackEnabled();
38
38
  }
39
39
 
40
40
  export interface FirecrawlFailure {
@@ -159,6 +159,29 @@ export interface FirecrawlCliResult {
159
159
  exitCode: number;
160
160
  }
161
161
 
162
+ export interface FirecrawlCliInvocation {
163
+ command: string;
164
+ args: string[];
165
+ }
166
+
167
+ /**
168
+ * Build the command used to invoke the official Firecrawl CLI. `npx` and
169
+ * `bunx` are opt-in runners because they may run or download packages at
170
+ * fallback time.
171
+ */
172
+ export function buildFirecrawlCliInvocation(
173
+ args: string[],
174
+ runner: FirecrawlRunner = getFirecrawlRunner(),
175
+ ): FirecrawlCliInvocation {
176
+ if (runner === "npx") {
177
+ return { command: "npx", args: ["-y", "firecrawl-cli", ...args] };
178
+ }
179
+ if (runner === "bunx") {
180
+ return { command: "bunx", args: ["firecrawl-cli", ...args] };
181
+ }
182
+ return { command: getToolkitCommand("firecrawl"), args };
183
+ }
184
+
162
185
  /**
163
186
  * Run the firecrawl CLI under an isolated temporary HOME with no key env, so
164
187
  * it can only ever operate in keyless mode (no stored credentials, no
@@ -178,7 +201,8 @@ export async function runFirecrawlCli(
178
201
  delete env.FIRECRAWL_OAUTH_TOKEN;
179
202
  env.HOME = home;
180
203
  env.XDG_CONFIG_HOME = path.join(home, ".config");
181
- return await runCLI({ command: "firecrawl", args, env, signal, timeout });
204
+ const invocation = buildFirecrawlCliInvocation(args);
205
+ return await runCLI({ command: invocation.command, args: invocation.args, env, signal, timeout });
182
206
  } finally {
183
207
  await rm(home, { recursive: true, force: true }).catch(() => { /* best-effort */ });
184
208
  }
@@ -1,4 +1,5 @@
1
1
  import { runCLI } from "./cli-runner";
2
+ import { getToolkitCommand } from "./config";
2
3
 
3
4
  /**
4
5
  * Run a scrapling CLI command with optional abort signal.
@@ -7,7 +8,7 @@ export function runScrapling(
7
8
  args: string[],
8
9
  signal?: AbortSignal,
9
10
  ): Promise<{ stdout: string; stderr: string; exitCode: number }> {
10
- return runCLI({ command: "scrapling", args, signal });
11
+ return runCLI({ command: getToolkitCommand("scrapling"), args, signal });
11
12
  }
12
13
 
13
14
  /**
@@ -0,0 +1,146 @@
1
+ /**
2
+ * web_search execution core
3
+ *
4
+ * Keeps SearXNG-first search behavior behind a testable boundary. Firecrawl
5
+ * remains fallback-only and missing fallback runners never replace the primary
6
+ * SearXNG failure/no-result UX.
7
+ */
8
+
9
+ import type { FirecrawlSearchOutput } from "./firecrawl";
10
+ import { shouldFallbackSearch } from "./firecrawl";
11
+
12
+ export interface WebSearchCoreInput {
13
+ query: string;
14
+ language?: string;
15
+ results?: number;
16
+ }
17
+
18
+ export interface WebSearchResultItem {
19
+ title: string;
20
+ url: string;
21
+ content?: string;
22
+ engine?: string;
23
+ score?: number;
24
+ }
25
+
26
+ interface SearxResponse {
27
+ query: string;
28
+ results: WebSearchResultItem[];
29
+ suggestions?: string[];
30
+ }
31
+
32
+ export interface WebSearchCoreResult {
33
+ query: string;
34
+ totalResults: number;
35
+ results: WebSearchResultItem[];
36
+ suggestions?: string[];
37
+ viaFirecrawl: boolean;
38
+ creditsUsed?: number;
39
+ }
40
+
41
+ export interface WebSearchCoreDeps {
42
+ searxngUrl: string;
43
+ fetchImpl: typeof fetch;
44
+ firecrawlSearch: (query: string, options: { limit: number }, signal?: AbortSignal) => Promise<FirecrawlSearchOutput>;
45
+ signal?: AbortSignal;
46
+ }
47
+
48
+ function normalizeSearxngUrl(url: string): string {
49
+ return url.replace(/\/+$/, "");
50
+ }
51
+
52
+ export async function runWebSearchCore(
53
+ input: WebSearchCoreInput,
54
+ deps: WebSearchCoreDeps,
55
+ ): Promise<WebSearchCoreResult> {
56
+ const searxngUrl = normalizeSearxngUrl(deps.searxngUrl);
57
+ const maxResults = Math.floor(Math.min(60, Math.max(1, input.results ?? 20)));
58
+ const language = input.language ?? "";
59
+
60
+ const allResults: WebSearchResultItem[] = [];
61
+ const seenUrls = new Set<string>();
62
+ let suggestions: string[] | undefined;
63
+ let finalQuery = input.query;
64
+ const MAX_PAGES = 3;
65
+
66
+ let localOk = true;
67
+ let localError: string | undefined;
68
+
69
+ try {
70
+ for (let page = 1; page <= MAX_PAGES; page++) {
71
+ const searchParams = new URLSearchParams({
72
+ q: input.query,
73
+ format: "json",
74
+ pageno: String(page),
75
+ });
76
+ if (language) searchParams.set("language", language);
77
+
78
+ const response = await deps.fetchImpl(`${searxngUrl}/search?${searchParams.toString()}`, {
79
+ method: "GET",
80
+ headers: { Accept: "application/json" },
81
+ signal: deps.signal,
82
+ });
83
+
84
+ if (!response.ok) {
85
+ const body = await response.text().catch(() => "");
86
+ throw new Error(`SearXNG error: ${response.status} ${response.statusText}\n${body}`);
87
+ }
88
+
89
+ const data = (await response.json()) as SearxResponse;
90
+ finalQuery = data.query;
91
+
92
+ if (data.suggestions && data.suggestions.length > 0 && !suggestions) {
93
+ suggestions = data.suggestions;
94
+ }
95
+
96
+ if (!data.results || data.results.length === 0) {
97
+ break;
98
+ }
99
+
100
+ for (const r of data.results) {
101
+ if (!seenUrls.has(r.url)) {
102
+ seenUrls.add(r.url);
103
+ allResults.push(r);
104
+ }
105
+ }
106
+
107
+ if (allResults.length >= maxResults) {
108
+ break;
109
+ }
110
+ }
111
+ } catch (err: any) {
112
+ localOk = false;
113
+ localError = err.message ?? String(err);
114
+ }
115
+
116
+ if (shouldFallbackSearch(localOk, allResults.length)) {
117
+ const fb = await deps.firecrawlSearch(input.query, { limit: Math.min(maxResults, 10) }, deps.signal);
118
+ if (fb.ok && fb.results.length > 0) {
119
+ const fbResults: WebSearchResultItem[] = fb.results.slice(0, maxResults).map((r) => ({
120
+ title: r.title ?? "(untitled)",
121
+ url: r.url,
122
+ content: r.description,
123
+ engine: "firecrawl",
124
+ }));
125
+ return {
126
+ query: input.query,
127
+ totalResults: fbResults.length,
128
+ results: fbResults,
129
+ viaFirecrawl: true,
130
+ creditsUsed: fb.creditsUsed,
131
+ };
132
+ }
133
+ }
134
+
135
+ if (!localOk) {
136
+ throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${localError}`);
137
+ }
138
+
139
+ return {
140
+ query: finalQuery,
141
+ totalResults: allResults.length,
142
+ results: allResults.slice(0, maxResults),
143
+ suggestions,
144
+ viaFirecrawl: false,
145
+ };
146
+ }
@@ -113,14 +113,10 @@ const webBatchFetchTool = defineTool({
113
113
  "For a single page, use web_fetch instead.",
114
114
  `Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
115
115
  ].join(" "),
116
- promptSnippet: "Fetch multiple URLs in parallel for research",
116
+ promptSnippet: "Parallel fetch for 2–5 URLs",
117
117
  promptGuidelines: [
118
- "Use web_batch_fetch when web_search returns multiple (2–5) relevant pages and the agent needs to read them all at once.",
119
- "Prefer web_batch_fetch over repeated web_fetch calls when reading multiple pages for comparison or synthesis.",
120
- "Use web_batch_fetch for cross-referencing sources, comparing implementations, or synthesizing research from multiple sites.",
121
- "For a single URL, always use web_fetch — it supports per-URL selectors and stealthy mode.",
122
- "If a page in the batch fails, the tool reports the error but continues with the others.",
123
- "Keep batch sizes reasonable (≤8) to avoid overwhelming the browser and token budget.",
118
+ "Use web_batch_fetch for 2–5 pages to compare/cross-reference/synthesize; single URL web_fetch.",
119
+ "Keep batches small (≤8; schema max 15); failed pages are reported without stopping the batch.",
124
120
  ],
125
121
  parameters: WebBatchFetchParamsSchema,
126
122
 
@@ -106,22 +106,18 @@ const webBrowseTool = defineTool({
106
106
  name: "web_browse",
107
107
  label: "Web Browse",
108
108
  description: [
109
- "Interact with a web page through a browser: navigate, click, fill forms, scroll,",
109
+ "Primary local-first tool for interactive web pages: navigate, click, fill forms, scroll,",
110
110
  "wait for content, and then extract text.",
111
- "Uses the agent-browser CLI with batched JSON commands.",
111
+ "Uses the agent-browser CLI with batched JSON commands, then automatically tries Firecrawl keyless only if local browser automation fails.",
112
112
  "Use web_browse when the target content requires interaction (clicking buttons,",
113
113
  "scrolling, filling search boxes, waiting for JS to load) before it becomes available.",
114
114
  "For pages that need no interaction, use web_fetch instead.",
115
115
  `Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
116
116
  ].join(" "),
117
- promptSnippet: "Interact with a web page (click, scroll, fill) and extract content",
117
+ promptSnippet: "Local browser interaction and extraction",
118
118
  promptGuidelines: [
119
- "Use web_browse when a page requires clicking, scrolling, or form submission before showing target content.",
120
- "Use web_browse for SPAs, pagination (click 'Load more'), search forms, tab switching, and modal dialogs.",
121
- "For static articles, docs, or blogs that load everything on first request, prefer web_fetch.",
122
- "After web_search returns results, prefer web_fetch for reading individual articles.",
123
- "Use web_browse directly when interaction is required; otherwise try web_fetch first.",
124
- "Always provide a selector to extract only the relevant content area — avoid dumping full page text.",
119
+ "Use web_browse only when clicks/forms/scroll/wait are needed; otherwise use web_fetch.",
120
+ "Provide a selector to narrow extracted content when possible.",
125
121
  ],
126
122
  parameters: WebBrowseParamsSchema,
127
123
 
@@ -40,19 +40,15 @@ const webFetchTool = defineTool({
40
40
  name: "web_fetch",
41
41
  label: "Web Fetch",
42
42
  description: [
43
- "Fetch and extract readable content from a web page URL.",
44
- "Uses scrapling to download the page and convert it to clean markdown.",
45
- "Use web_fetch to read the full content of a specific result or user-provided URL.",
43
+ "Primary local-first tool for reading a single web page URL.",
44
+ "Fetches and extracts readable content via scrapling, then automatically tries Firecrawl keyless only if the local fetcher fails.",
45
+ "Use web_fetch as the first attempt to read the full content of a specific result or user-provided URL.",
46
46
  "Callers remain responsible for robots.txt and site terms; Scrapling extract commands do not enforce them automatically.",
47
47
  `Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
48
48
  ].join(" "),
49
- promptSnippet: "Fetch full page content from a URL as markdown",
49
+ promptSnippet: "Local-first fetch of one URL as markdown",
50
50
  promptGuidelines: [
51
- "Use web_fetch to read a single page (article, doc, or blog) that needs no interaction.",
52
- "For a single URL, always use web_fetch instead of web_batch_fetch.",
53
- "If the page is dynamic/JavaScript-heavy, the tool automatically uses browser automation.",
54
- "When reading multiple (2–5) pages at once (e.g., after web_search), prefer web_batch_fetch over repeated web_fetch calls.",
55
- "Always pass the full URL including https://.",
51
+ "Use web_fetch for one non-interactive URL; use web_batch_fetch for 2–5 URLs.",
56
52
  ],
57
53
  parameters: WebFetchParamsSchema,
58
54