pi-web-toolkit 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -1
- package/README.md +150 -118
- package/docs/adr/0001-firecrawl-keyless-cloud-fallback.md +1 -1
- package/docs/adr/0002-toolkit-config-for-installer-selections.md +3 -0
- package/docs/adr/0003-conservative-installer-prerequisites.md +3 -0
- package/docs/adr/0004-searxng-endpoint-discovery.md +3 -0
- package/docs/guide.md +19 -3
- package/docs/tools.md +16 -1
- package/extensions/utils/agent-browser.ts +4 -3
- package/extensions/utils/config.ts +170 -0
- package/extensions/utils/firecrawl.ts +27 -3
- package/extensions/utils/scrapling.ts +2 -1
- package/extensions/utils/web-search-core.ts +146 -0
- package/extensions/web_search.ts +37 -112
- package/install.sh +801 -0
- package/package.json +6 -3
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pi-web-toolkit runtime configuration
|
|
3
|
+
*
|
|
4
|
+
* Reads user-level toolkit configuration without requiring users to modify
|
|
5
|
+
* shell profiles. Environment variables remain the highest-priority override.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
9
|
+
import * as os from "node:os";
|
|
10
|
+
import * as path from "node:path";
|
|
11
|
+
|
|
12
|
+
export const DEFAULT_SEARXNG_URL = "http://localhost:8080";
|
|
13
|
+
|
|
14
|
+
export interface ToolkitCommandsConfig {
|
|
15
|
+
scrapling?: string;
|
|
16
|
+
agentBrowser?: string;
|
|
17
|
+
firecrawl?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export type FirecrawlRunner = "installed" | "npx" | "bunx";
|
|
21
|
+
|
|
22
|
+
export interface ToolkitConfig {
|
|
23
|
+
searxngUrl?: string;
|
|
24
|
+
firecrawlFallback?: boolean;
|
|
25
|
+
firecrawlRunner?: FirecrawlRunner;
|
|
26
|
+
commands?: ToolkitCommandsConfig;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export type ToolkitCommandName = "scrapling" | "agentBrowser" | "firecrawl";
|
|
30
|
+
|
|
31
|
+
const COMMAND_DEFAULTS: Record<ToolkitCommandName, string> = {
|
|
32
|
+
scrapling: "scrapling",
|
|
33
|
+
agentBrowser: "agent-browser",
|
|
34
|
+
firecrawl: "firecrawl",
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const COMMAND_ENV_VARS: Record<ToolkitCommandName, string> = {
|
|
38
|
+
scrapling: "SCRAPLING_BIN",
|
|
39
|
+
agentBrowser: "AGENT_BROWSER_BIN",
|
|
40
|
+
firecrawl: "FIRECRAWL_BIN",
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const FIRECRAWL_RUNNERS = ["installed", "npx", "bunx"] as const;
|
|
44
|
+
|
|
45
|
+
function isFirecrawlRunner(value: string): value is FirecrawlRunner {
|
|
46
|
+
return (FIRECRAWL_RUNNERS as readonly string[]).includes(value);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function getDefaultToolkitConfigPath(): string {
|
|
50
|
+
const configHome = process.env.XDG_CONFIG_HOME?.trim() || path.join(os.homedir(), ".config");
|
|
51
|
+
return path.join(configHome, "pi-web-toolkit", "config.json");
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function getToolkitConfigPath(): string {
|
|
55
|
+
const configured = process.env.PI_WEB_TOOLKIT_CONFIG?.trim();
|
|
56
|
+
return configured || getDefaultToolkitConfigPath();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function parseConfigFile(filePath: string, required: boolean): ToolkitConfig {
|
|
60
|
+
if (!existsSync(filePath)) {
|
|
61
|
+
if (required) {
|
|
62
|
+
throw new Error(`Toolkit config file not found: ${filePath}`);
|
|
63
|
+
}
|
|
64
|
+
return {};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
let raw: string;
|
|
68
|
+
try {
|
|
69
|
+
raw = readFileSync(filePath, "utf8");
|
|
70
|
+
} catch (err: any) {
|
|
71
|
+
throw new Error(`Unable to read toolkit config at ${filePath}: ${err.message ?? String(err)}`);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
const parsed = JSON.parse(raw) as unknown;
|
|
76
|
+
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
|
77
|
+
throw new Error("expected a JSON object");
|
|
78
|
+
}
|
|
79
|
+
validateToolkitConfig(parsed as Record<string, unknown>);
|
|
80
|
+
return parsed as ToolkitConfig;
|
|
81
|
+
} catch (err: any) {
|
|
82
|
+
throw new Error(`Invalid toolkit config at ${filePath}: ${err.message ?? String(err)}`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function validateOptionalString(value: unknown, key: string): void {
|
|
87
|
+
if (value !== undefined && typeof value !== "string") {
|
|
88
|
+
throw new Error(`${key} must be a string`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function validateToolkitConfig(value: Record<string, unknown>): void {
|
|
93
|
+
validateOptionalString(value.searxngUrl, "searxngUrl");
|
|
94
|
+
|
|
95
|
+
if (value.firecrawlFallback !== undefined && typeof value.firecrawlFallback !== "boolean") {
|
|
96
|
+
throw new Error("firecrawlFallback must be a boolean");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (value.firecrawlRunner !== undefined) {
|
|
100
|
+
if (typeof value.firecrawlRunner !== "string" || !isFirecrawlRunner(value.firecrawlRunner)) {
|
|
101
|
+
throw new Error("firecrawlRunner must be one of: installed, npx, bunx");
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (value.commands !== undefined) {
|
|
106
|
+
if (typeof value.commands !== "object" || value.commands === null || Array.isArray(value.commands)) {
|
|
107
|
+
throw new Error("commands must be an object");
|
|
108
|
+
}
|
|
109
|
+
const commands = value.commands as Record<string, unknown>;
|
|
110
|
+
validateOptionalString(commands.scrapling, "commands.scrapling");
|
|
111
|
+
validateOptionalString(commands.agentBrowser, "commands.agentBrowser");
|
|
112
|
+
validateOptionalString(commands.firecrawl, "commands.firecrawl");
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function readToolkitConfig(): ToolkitConfig {
|
|
117
|
+
const filePath = getToolkitConfigPath();
|
|
118
|
+
const required = Boolean(process.env.PI_WEB_TOOLKIT_CONFIG?.trim());
|
|
119
|
+
return parseConfigFile(filePath, required);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function normalizeUrl(url: string): string {
|
|
123
|
+
return url.replace(/\/+$/, "");
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function getSearxngUrl(): string {
|
|
127
|
+
const envUrl = process.env.SEARXNG_URL?.trim();
|
|
128
|
+
if (envUrl) return normalizeUrl(envUrl);
|
|
129
|
+
|
|
130
|
+
const cfgUrl = readToolkitConfig().searxngUrl?.trim();
|
|
131
|
+
if (cfgUrl) return normalizeUrl(cfgUrl);
|
|
132
|
+
|
|
133
|
+
return DEFAULT_SEARXNG_URL;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function getToolkitCommand(name: ToolkitCommandName): string {
|
|
137
|
+
const envVar = COMMAND_ENV_VARS[name];
|
|
138
|
+
const envCommand = process.env[envVar]?.trim();
|
|
139
|
+
if (envCommand) return envCommand;
|
|
140
|
+
|
|
141
|
+
const cfgCommand = readToolkitConfig().commands?.[name]?.trim();
|
|
142
|
+
if (cfgCommand) return cfgCommand;
|
|
143
|
+
|
|
144
|
+
return COMMAND_DEFAULTS[name];
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export function isFirecrawlFallbackEnabled(): boolean {
|
|
148
|
+
const envValue = process.env.PI_WEB_FIRECRAWL_FALLBACK;
|
|
149
|
+
if (envValue !== undefined) {
|
|
150
|
+
const v = envValue.trim().toLowerCase();
|
|
151
|
+
return !(v === "0" || v === "false" || v === "no" || v === "off");
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const cfgValue = readToolkitConfig().firecrawlFallback;
|
|
155
|
+
if (cfgValue !== undefined) return cfgValue;
|
|
156
|
+
|
|
157
|
+
return true;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export function getFirecrawlRunner(): FirecrawlRunner {
|
|
161
|
+
const envValue = process.env.PI_WEB_FIRECRAWL_RUNNER?.trim().toLowerCase();
|
|
162
|
+
if (envValue) {
|
|
163
|
+
if (!isFirecrawlRunner(envValue)) {
|
|
164
|
+
throw new Error("PI_WEB_FIRECRAWL_RUNNER must be one of: installed, npx, bunx");
|
|
165
|
+
}
|
|
166
|
+
return envValue;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return readToolkitConfig().firecrawlRunner ?? "installed";
|
|
170
|
+
}
|
|
@@ -19,6 +19,7 @@ import { mkdtemp, rm } from "node:fs/promises";
|
|
|
19
19
|
import * as os from "node:os";
|
|
20
20
|
import * as path from "node:path";
|
|
21
21
|
import { runCLI } from "./cli-runner";
|
|
22
|
+
import { getFirecrawlRunner, getToolkitCommand, isFirecrawlFallbackEnabled, type FirecrawlRunner } from "./config";
|
|
22
23
|
|
|
23
24
|
// ---------------------------------------------------------------------------
|
|
24
25
|
// Shared types
|
|
@@ -33,8 +34,7 @@ export type FirecrawlFailureKind = "graceful-skip" | "hard-error";
|
|
|
33
34
|
* the single opt-out for a strict local-only / no-cloud-egress policy.
|
|
34
35
|
*/
|
|
35
36
|
export function isFirecrawlEnabled(): boolean {
|
|
36
|
-
|
|
37
|
-
return !(v === "0" || v === "false" || v === "no" || v === "off");
|
|
37
|
+
return isFirecrawlFallbackEnabled();
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
export interface FirecrawlFailure {
|
|
@@ -159,6 +159,29 @@ export interface FirecrawlCliResult {
|
|
|
159
159
|
exitCode: number;
|
|
160
160
|
}
|
|
161
161
|
|
|
162
|
+
export interface FirecrawlCliInvocation {
|
|
163
|
+
command: string;
|
|
164
|
+
args: string[];
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Build the command used to invoke the official Firecrawl CLI. `npx` and
|
|
169
|
+
* `bunx` are opt-in runners because they may run or download packages at
|
|
170
|
+
* fallback time.
|
|
171
|
+
*/
|
|
172
|
+
export function buildFirecrawlCliInvocation(
|
|
173
|
+
args: string[],
|
|
174
|
+
runner: FirecrawlRunner = getFirecrawlRunner(),
|
|
175
|
+
): FirecrawlCliInvocation {
|
|
176
|
+
if (runner === "npx") {
|
|
177
|
+
return { command: "npx", args: ["-y", "firecrawl-cli", ...args] };
|
|
178
|
+
}
|
|
179
|
+
if (runner === "bunx") {
|
|
180
|
+
return { command: "bunx", args: ["firecrawl-cli", ...args] };
|
|
181
|
+
}
|
|
182
|
+
return { command: getToolkitCommand("firecrawl"), args };
|
|
183
|
+
}
|
|
184
|
+
|
|
162
185
|
/**
|
|
163
186
|
* Run the firecrawl CLI under an isolated temporary HOME with no key env, so
|
|
164
187
|
* it can only ever operate in keyless mode (no stored credentials, no
|
|
@@ -178,7 +201,8 @@ export async function runFirecrawlCli(
|
|
|
178
201
|
delete env.FIRECRAWL_OAUTH_TOKEN;
|
|
179
202
|
env.HOME = home;
|
|
180
203
|
env.XDG_CONFIG_HOME = path.join(home, ".config");
|
|
181
|
-
|
|
204
|
+
const invocation = buildFirecrawlCliInvocation(args);
|
|
205
|
+
return await runCLI({ command: invocation.command, args: invocation.args, env, signal, timeout });
|
|
182
206
|
} finally {
|
|
183
207
|
await rm(home, { recursive: true, force: true }).catch(() => { /* best-effort */ });
|
|
184
208
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { runCLI } from "./cli-runner";
|
|
2
|
+
import { getToolkitCommand } from "./config";
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Run a scrapling CLI command with optional abort signal.
|
|
@@ -7,7 +8,7 @@ export function runScrapling(
|
|
|
7
8
|
args: string[],
|
|
8
9
|
signal?: AbortSignal,
|
|
9
10
|
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
|
|
10
|
-
return runCLI({ command: "scrapling", args, signal });
|
|
11
|
+
return runCLI({ command: getToolkitCommand("scrapling"), args, signal });
|
|
11
12
|
}
|
|
12
13
|
|
|
13
14
|
/**
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* web_search execution core
|
|
3
|
+
*
|
|
4
|
+
* Keeps SearXNG-first search behavior behind a testable boundary. Firecrawl
|
|
5
|
+
* remains fallback-only and missing fallback runners never replace the primary
|
|
6
|
+
* SearXNG failure/no-result UX.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { FirecrawlSearchOutput } from "./firecrawl";
|
|
10
|
+
import { shouldFallbackSearch } from "./firecrawl";
|
|
11
|
+
|
|
12
|
+
export interface WebSearchCoreInput {
|
|
13
|
+
query: string;
|
|
14
|
+
language?: string;
|
|
15
|
+
results?: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface WebSearchResultItem {
|
|
19
|
+
title: string;
|
|
20
|
+
url: string;
|
|
21
|
+
content?: string;
|
|
22
|
+
engine?: string;
|
|
23
|
+
score?: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface SearxResponse {
|
|
27
|
+
query: string;
|
|
28
|
+
results: WebSearchResultItem[];
|
|
29
|
+
suggestions?: string[];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface WebSearchCoreResult {
|
|
33
|
+
query: string;
|
|
34
|
+
totalResults: number;
|
|
35
|
+
results: WebSearchResultItem[];
|
|
36
|
+
suggestions?: string[];
|
|
37
|
+
viaFirecrawl: boolean;
|
|
38
|
+
creditsUsed?: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface WebSearchCoreDeps {
|
|
42
|
+
searxngUrl: string;
|
|
43
|
+
fetchImpl: typeof fetch;
|
|
44
|
+
firecrawlSearch: (query: string, options: { limit: number }, signal?: AbortSignal) => Promise<FirecrawlSearchOutput>;
|
|
45
|
+
signal?: AbortSignal;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function normalizeSearxngUrl(url: string): string {
|
|
49
|
+
return url.replace(/\/+$/, "");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export async function runWebSearchCore(
|
|
53
|
+
input: WebSearchCoreInput,
|
|
54
|
+
deps: WebSearchCoreDeps,
|
|
55
|
+
): Promise<WebSearchCoreResult> {
|
|
56
|
+
const searxngUrl = normalizeSearxngUrl(deps.searxngUrl);
|
|
57
|
+
const maxResults = Math.floor(Math.min(60, Math.max(1, input.results ?? 20)));
|
|
58
|
+
const language = input.language ?? "";
|
|
59
|
+
|
|
60
|
+
const allResults: WebSearchResultItem[] = [];
|
|
61
|
+
const seenUrls = new Set<string>();
|
|
62
|
+
let suggestions: string[] | undefined;
|
|
63
|
+
let finalQuery = input.query;
|
|
64
|
+
const MAX_PAGES = 3;
|
|
65
|
+
|
|
66
|
+
let localOk = true;
|
|
67
|
+
let localError: string | undefined;
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
for (let page = 1; page <= MAX_PAGES; page++) {
|
|
71
|
+
const searchParams = new URLSearchParams({
|
|
72
|
+
q: input.query,
|
|
73
|
+
format: "json",
|
|
74
|
+
pageno: String(page),
|
|
75
|
+
});
|
|
76
|
+
if (language) searchParams.set("language", language);
|
|
77
|
+
|
|
78
|
+
const response = await deps.fetchImpl(`${searxngUrl}/search?${searchParams.toString()}`, {
|
|
79
|
+
method: "GET",
|
|
80
|
+
headers: { Accept: "application/json" },
|
|
81
|
+
signal: deps.signal,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
if (!response.ok) {
|
|
85
|
+
const body = await response.text().catch(() => "");
|
|
86
|
+
throw new Error(`SearXNG error: ${response.status} ${response.statusText}\n${body}`);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const data = (await response.json()) as SearxResponse;
|
|
90
|
+
finalQuery = data.query;
|
|
91
|
+
|
|
92
|
+
if (data.suggestions && data.suggestions.length > 0 && !suggestions) {
|
|
93
|
+
suggestions = data.suggestions;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (!data.results || data.results.length === 0) {
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
for (const r of data.results) {
|
|
101
|
+
if (!seenUrls.has(r.url)) {
|
|
102
|
+
seenUrls.add(r.url);
|
|
103
|
+
allResults.push(r);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (allResults.length >= maxResults) {
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
} catch (err: any) {
|
|
112
|
+
localOk = false;
|
|
113
|
+
localError = err.message ?? String(err);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (shouldFallbackSearch(localOk, allResults.length)) {
|
|
117
|
+
const fb = await deps.firecrawlSearch(input.query, { limit: Math.min(maxResults, 10) }, deps.signal);
|
|
118
|
+
if (fb.ok && fb.results.length > 0) {
|
|
119
|
+
const fbResults: WebSearchResultItem[] = fb.results.slice(0, maxResults).map((r) => ({
|
|
120
|
+
title: r.title ?? "(untitled)",
|
|
121
|
+
url: r.url,
|
|
122
|
+
content: r.description,
|
|
123
|
+
engine: "firecrawl",
|
|
124
|
+
}));
|
|
125
|
+
return {
|
|
126
|
+
query: input.query,
|
|
127
|
+
totalResults: fbResults.length,
|
|
128
|
+
results: fbResults,
|
|
129
|
+
viaFirecrawl: true,
|
|
130
|
+
creditsUsed: fb.creditsUsed,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (!localOk) {
|
|
136
|
+
throw new Error(`Failed to query SearXNG at ${searxngUrl}: ${localError}`);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
query: finalQuery,
|
|
141
|
+
totalResults: allResults.length,
|
|
142
|
+
results: allResults.slice(0, maxResults),
|
|
143
|
+
suggestions,
|
|
144
|
+
viaFirecrawl: false,
|
|
145
|
+
};
|
|
146
|
+
}
|
package/extensions/web_search.ts
CHANGED
|
@@ -19,12 +19,13 @@ import {
|
|
|
19
19
|
} from "@earendil-works/pi-coding-agent";
|
|
20
20
|
import { Text } from "@earendil-works/pi-tui";
|
|
21
21
|
import { Type, type Static } from "typebox";
|
|
22
|
+
import { getSearxngUrl } from "./utils/config";
|
|
22
23
|
import { writeWithFallback } from "./utils/output-sink";
|
|
23
|
-
import { searchKeyless
|
|
24
|
+
import { searchKeyless } from "./utils/firecrawl";
|
|
25
|
+
import { runWebSearchCore } from "./utils/web-search-core";
|
|
24
26
|
import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
|
|
25
27
|
|
|
26
28
|
|
|
27
|
-
|
|
28
29
|
interface SearxResult {
|
|
29
30
|
title: string;
|
|
30
31
|
url: string;
|
|
@@ -33,12 +34,6 @@ interface SearxResult {
|
|
|
33
34
|
score?: number;
|
|
34
35
|
}
|
|
35
36
|
|
|
36
|
-
interface SearxResponse {
|
|
37
|
-
query: string;
|
|
38
|
-
results: SearxResult[];
|
|
39
|
-
suggestions?: string[];
|
|
40
|
-
}
|
|
41
|
-
|
|
42
37
|
export const WebSearchParamsSchema = Type.Object({
|
|
43
38
|
query: Type.String({ description: "Search query" }),
|
|
44
39
|
language: Type.Optional(Type.String({ description: "Language code (e.g. en, en-US, de). Omit to use SearXNG default.", default: "" })),
|
|
@@ -67,122 +62,53 @@ const webSearchTool = defineTool({
|
|
|
67
62
|
parameters: WebSearchParamsSchema,
|
|
68
63
|
|
|
69
64
|
async execute(_toolCallId, params, signal) {
|
|
70
|
-
const
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
let suggestions: string[] | undefined;
|
|
77
|
-
let finalQuery = params.query;
|
|
78
|
-
let fullOutputPath: string | undefined;
|
|
79
|
-
const MAX_PAGES = 3;
|
|
80
|
-
|
|
81
|
-
let localOk = true;
|
|
82
|
-
let localError: string | undefined;
|
|
83
|
-
|
|
84
|
-
try {
|
|
85
|
-
for (let page = 1; page <= MAX_PAGES; page++) {
|
|
86
|
-
const searchParams = new URLSearchParams({
|
|
87
|
-
q: params.query,
|
|
88
|
-
format: "json",
|
|
89
|
-
pageno: String(page),
|
|
90
|
-
});
|
|
91
|
-
if (language) searchParams.set("language", language);
|
|
92
|
-
|
|
93
|
-
const response = await fetch(`${searxngUrl}/search?${searchParams.toString()}`, {
|
|
94
|
-
method: "GET",
|
|
95
|
-
headers: { Accept: "application/json" },
|
|
96
|
-
signal,
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
if (!response.ok) {
|
|
100
|
-
const body = await response.text().catch(() => "");
|
|
101
|
-
throw new Error(`SearXNG error: ${response.status} ${response.statusText}\n${body}`);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const data = (await response.json()) as SearxResponse;
|
|
105
|
-
finalQuery = data.query;
|
|
106
|
-
|
|
107
|
-
if (data.suggestions && data.suggestions.length > 0 && !suggestions) {
|
|
108
|
-
suggestions = data.suggestions;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
if (!data.results || data.results.length === 0) {
|
|
112
|
-
break;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
for (const r of data.results) {
|
|
116
|
-
if (!seenUrls.has(r.url)) {
|
|
117
|
-
seenUrls.add(r.url);
|
|
118
|
-
allResults.push(r);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
if (allResults.length >= maxResults) {
|
|
123
|
-
break;
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
} catch (err: any) {
|
|
127
|
-
localOk = false;
|
|
128
|
-
localError = err.message ?? String(err);
|
|
129
|
-
}
|
|
65
|
+
const result = await runWebSearchCore(params, {
|
|
66
|
+
searxngUrl: getSearxngUrl(),
|
|
67
|
+
fetchImpl: fetch,
|
|
68
|
+
firecrawlSearch: searchKeyless,
|
|
69
|
+
signal,
|
|
70
|
+
});
|
|
130
71
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
const
|
|
134
|
-
|
|
135
|
-
const
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
const creditTag = fb.creditsUsed !== undefined ? `, ${fb.creditsUsed} credits` : "";
|
|
142
|
-
const lines: string[] = [`Results for "${params.query}" (via Firecrawl keyless${creditTag}):`, ""];
|
|
143
|
-
for (let i = 0; i < fbResults.length; i++) {
|
|
144
|
-
const r = fbResults[i];
|
|
145
|
-
lines.push(`${i + 1}. ${r.title}`);
|
|
146
|
-
lines.push(` URL: ${r.url}`);
|
|
147
|
-
if (r.content) lines.push(` ${r.content.replace(/\s+/g, " ").trim()}`);
|
|
148
|
-
if (r.engine) lines.push(` [engine: ${r.engine}]`);
|
|
149
|
-
lines.push("");
|
|
150
|
-
}
|
|
151
|
-
const rawText = lines.join("\n");
|
|
152
|
-
const sink = await writeWithFallback(rawText, {
|
|
153
|
-
tmpPrefix: "pi-web-search-firecrawl-",
|
|
154
|
-
alwaysWriteFile: true,
|
|
155
|
-
});
|
|
156
|
-
return {
|
|
157
|
-
content: [{ type: "text", text: sink.text }],
|
|
158
|
-
details: { query: params.query, totalResults: fbResults.length, results: fbResults, fullOutputPath: sink.fullOutputPath, viaFirecrawl: true, creditsUsed: fb.creditsUsed },
|
|
159
|
-
};
|
|
72
|
+
if (result.viaFirecrawl) {
|
|
73
|
+
const creditTag = result.creditsUsed !== undefined ? `, ${result.creditsUsed} credits` : "";
|
|
74
|
+
const lines: string[] = [`Results for "${params.query}" (via Firecrawl keyless${creditTag}):`, ""];
|
|
75
|
+
for (let i = 0; i < result.results.length; i++) {
|
|
76
|
+
const r = result.results[i];
|
|
77
|
+
lines.push(`${i + 1}. ${r.title}`);
|
|
78
|
+
lines.push(` URL: ${r.url}`);
|
|
79
|
+
if (r.content) lines.push(` ${r.content.replace(/\s+/g, " ").trim()}`);
|
|
80
|
+
if (r.engine) lines.push(` [engine: ${r.engine}]`);
|
|
81
|
+
lines.push("");
|
|
160
82
|
}
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
83
|
+
const rawText = lines.join("\n");
|
|
84
|
+
const sink = await writeWithFallback(rawText, {
|
|
85
|
+
tmpPrefix: "pi-web-search-firecrawl-",
|
|
86
|
+
alwaysWriteFile: true,
|
|
87
|
+
});
|
|
88
|
+
return {
|
|
89
|
+
content: [{ type: "text", text: sink.text }],
|
|
90
|
+
details: { query: params.query, totalResults: result.totalResults, results: result.results, fullOutputPath: sink.fullOutputPath, viaFirecrawl: true, creditsUsed: result.creditsUsed },
|
|
91
|
+
};
|
|
166
92
|
}
|
|
167
93
|
|
|
168
|
-
if (
|
|
169
|
-
let text = `No results found for "${
|
|
170
|
-
if (suggestions && suggestions.length > 0) {
|
|
171
|
-
text += `\n\nSuggestions:\n${suggestions.map((s) => `- ${s}`).join("\n")}`;
|
|
94
|
+
if (result.results.length === 0) {
|
|
95
|
+
let text = `No results found for "${result.query}".`;
|
|
96
|
+
if (result.suggestions && result.suggestions.length > 0) {
|
|
97
|
+
text += `\n\nSuggestions:\n${result.suggestions.map((s) => `- ${s}`).join("\n")}`;
|
|
172
98
|
}
|
|
173
99
|
return {
|
|
174
100
|
content: [{ type: "text", text }],
|
|
175
|
-
details: { query:
|
|
101
|
+
details: { query: result.query, totalResults: 0, results: [] as SearxResult[], fullOutputPath: undefined as string | undefined, viaFirecrawl: false, creditsUsed: undefined },
|
|
176
102
|
};
|
|
177
103
|
}
|
|
178
104
|
|
|
179
105
|
const lines: string[] = [
|
|
180
|
-
`Results for "${
|
|
106
|
+
`Results for "${result.query}":`,
|
|
181
107
|
"",
|
|
182
108
|
];
|
|
183
109
|
|
|
184
|
-
for (let i = 0; i <
|
|
185
|
-
const r =
|
|
110
|
+
for (let i = 0; i < result.results.length; i++) {
|
|
111
|
+
const r = result.results[i];
|
|
186
112
|
lines.push(`${i + 1}. ${r.title}`);
|
|
187
113
|
lines.push(` URL: ${r.url}`);
|
|
188
114
|
if (r.content) {
|
|
@@ -200,11 +126,10 @@ const webSearchTool = defineTool({
|
|
|
200
126
|
tmpPrefix: "pi-web-search-",
|
|
201
127
|
alwaysWriteFile: true,
|
|
202
128
|
});
|
|
203
|
-
fullOutputPath = sink.fullOutputPath;
|
|
204
129
|
|
|
205
130
|
return {
|
|
206
131
|
content: [{ type: "text", text: sink.text }],
|
|
207
|
-
details: { query:
|
|
132
|
+
details: { query: result.query, totalResults: result.totalResults, results: result.results, fullOutputPath: sink.fullOutputPath, viaFirecrawl: false, creditsUsed: undefined },
|
|
208
133
|
};
|
|
209
134
|
},
|
|
210
135
|
|