@mammothb/pi-websearch 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ # Read the documentation before using the `docker-compose.yml` file:
2
+ # https://docs.searxng.org/admin/installation-docker.html
3
+ name: searxng
4
+ services:
5
+ core:
6
+ container_name: searxng-core
7
+ image: docker.io/searxng/searxng:${SEARXNG_VERSION:-latest}
8
+ restart: always
9
+ ports:
10
+ - ${SEARXNG_HOST:+${SEARXNG_HOST}:}${SEARXNG_PORT:-8080}:${SEARXNG_PORT:-8080}
11
+ env_file: ./.env
12
+ volumes:
13
+ - ./core-config/:/etc/searxng/:Z
14
+ - core-data:/var/cache/searxng/
15
+ valkey:
16
+ container_name: searxng-valkey
17
+ image: docker.io/valkey/valkey:9-alpine
18
+ command: valkey-server --save 30 1 --loglevel warning
19
+ restart: always
20
+ volumes:
21
+ - valkey-data:/data/
22
+ volumes:
23
+ core-data:
24
+ valkey-data:
package/src/config.ts ADDED
@@ -0,0 +1,128 @@
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import { getAgentDir } from "@earendil-works/pi-coding-agent";
4
+
5
+ export interface WebsearchConfig {
6
+ /** Which provider to use. */
7
+ provider: "exa-mcp" | "searxng";
8
+ /** Exa MCP provider configuration. */
9
+ exaMcp: {
10
+ /** MCP server URL */
11
+ url: string;
12
+ /** MCP tool name */
13
+ tool: string;
14
+ };
15
+ /** SearXNG provider configuration. */
16
+ searxng: {
17
+ /** SearXNG instance URL */
18
+ url: string;
19
+ /** SafeSearch level: 0 (off), 1 (moderate), 2 (strict) */
20
+ safesearch: 0 | 1 | 2;
21
+ /**
22
+ * Optional path to a custom management script.
23
+ * Must accept "up" and "down" commands (same interface as the default script).
24
+ * When set, this script is used instead of the built-in `bin/searxng` script.
25
+ */
26
+ script?: string;
27
+ };
28
+ /** Request timeout in milliseconds */
29
+ timeoutMs: number;
30
+ /** Default values for search parameters */
31
+ defaults: {
32
+ numResults: number;
33
+ type: "auto" | "fast" | "deep";
34
+ livecrawl: "fallback" | "preferred";
35
+ contextMaxCharacters: number;
36
+ };
37
+ }
38
+
39
+ export const DEFAULT_CONFIG: WebsearchConfig = {
40
+ provider: "exa-mcp",
41
+ exaMcp: {
42
+ url: "https://mcp.exa.ai/mcp",
43
+ tool: "web_search_exa",
44
+ },
45
+ searxng: {
46
+ url: "http://localhost:8080",
47
+ safesearch: 0,
48
+ script: undefined,
49
+ },
50
+ timeoutMs: 25_000,
51
+ defaults: {
52
+ numResults: 8,
53
+ type: "auto",
54
+ livecrawl: "fallback",
55
+ contextMaxCharacters: 10_000,
56
+ },
57
+ };
58
+
59
+ /**
60
+ * Deep-merge two configs. Arrays and primitives from `override` replace those
61
+ * in `base`. Objects are merged recursively.
62
+ */
63
+ function mergeConfigs(
64
+ base: WebsearchConfig,
65
+ override: Partial<WebsearchConfig>,
66
+ ): WebsearchConfig {
67
+ const merged = { ...base };
68
+
69
+ if (override.provider !== undefined) {
70
+ merged.provider = override.provider;
71
+ }
72
+ if (override.exaMcp) {
73
+ merged.exaMcp = { ...base.exaMcp, ...override.exaMcp };
74
+ }
75
+ if (override.searxng) {
76
+ merged.searxng = { ...base.searxng, ...override.searxng };
77
+ }
78
+ if (override.defaults) {
79
+ merged.defaults = { ...base.defaults, ...override.defaults };
80
+ }
81
+ if (override.timeoutMs !== undefined) {
82
+ merged.timeoutMs = override.timeoutMs;
83
+ }
84
+
85
+ return merged;
86
+ }
87
+
88
+ /**
89
+ * Load config from JSON files. Project config (`.pi/pi-websearch.json`)
90
+ * overrides global config (`~/.pi/agent/pi-websearch.json`).
91
+ *
92
+ * Returns the default config if no config files exist.
93
+ */
94
+ export function loadConfig(cwd: string): WebsearchConfig {
95
+ const globalPath = join(getAgentDir(), "pi-websearch.json");
96
+ const projectPath = join(cwd, ".pi", "pi-websearch.json");
97
+
98
+ let global: Partial<WebsearchConfig> | undefined;
99
+ let project: Partial<WebsearchConfig> | undefined;
100
+
101
+ if (existsSync(globalPath)) {
102
+ try {
103
+ global = JSON.parse(readFileSync(globalPath, "utf-8"));
104
+ } catch (err) {
105
+ console.error(`Failed to load global config from ${globalPath}: ${err}`);
106
+ }
107
+ }
108
+
109
+ if (existsSync(projectPath)) {
110
+ try {
111
+ project = JSON.parse(readFileSync(projectPath, "utf-8"));
112
+ } catch (err) {
113
+ console.error(
114
+ `Failed to load project config from ${projectPath}: ${err}`,
115
+ );
116
+ }
117
+ }
118
+
119
+ let config = DEFAULT_CONFIG;
120
+ if (global) {
121
+ config = mergeConfigs(config, global);
122
+ }
123
+ if (project) {
124
+ config = mergeConfigs(config, project);
125
+ }
126
+
127
+ return config;
128
+ }
@@ -0,0 +1,42 @@
1
+ import { Value } from "typebox/value";
2
+ import { McpResultPayload } from "./types";
3
+
4
+ /**
5
+ * Try to parse a JSON object from a string, returning the first text content.
6
+ */
7
+ function tryParsePayload(payload: string): string | undefined {
8
+ const trimmed = payload.trim();
9
+ if (!trimmed.startsWith("{")) return undefined;
10
+ try {
11
+ const data = Value.Parse(McpResultPayload, JSON.parse(trimmed));
12
+ return data.result.content.find((item) => item.text)?.text;
13
+ } catch {
14
+ return undefined;
15
+ }
16
+ }
17
+
18
+ /** Parse an MCP response body, handling both plain JSON and SSE streams. */
19
+ export function parseResponse(body: string): string | undefined {
20
+ const trimmed = body.trim();
21
+
22
+ // Try direct JSON parse first
23
+ if (trimmed) {
24
+ const direct = tryParsePayload(trimmed);
25
+ if (direct) {
26
+ return direct;
27
+ }
28
+ }
29
+
30
+ // Try SSE lines: "data: {...}"
31
+ for (const line of body.split("\n")) {
32
+ if (!line.startsWith("data: ")) {
33
+ continue;
34
+ }
35
+ const data = tryParsePayload(line.slice(6));
36
+ if (data) {
37
+ return data;
38
+ }
39
+ }
40
+
41
+ return undefined;
42
+ }
@@ -0,0 +1,84 @@
1
+ import { parseResponse } from "../parsers";
2
+ import type { SearchArgs, SearchProvider } from "../types";
3
+
4
+ /**
5
+ * Configuration for the Exa MCP provider.
6
+ */
7
+ export interface ExaMcpConfig {
8
+ url: string;
9
+ tool: string;
10
+ timeoutMs: number;
11
+ }
12
+
13
+ function buildMcpRequest(toolName: string, args: SearchArgs) {
14
+ const value = Object.fromEntries(
15
+ Object.entries(args).filter(([_, v]) => v !== undefined),
16
+ );
17
+ return {
18
+ jsonrpc: "2.0" as const,
19
+ id: 1,
20
+ method: "tools/call" as const,
21
+ params: { name: toolName, arguments: value },
22
+ };
23
+ }
24
+
25
+ /**
26
+ * Create an Exa MCP search provider.
27
+ *
28
+ * Communicates with an MCP-compatible server via JSON-RPC over HTTP.
29
+ * The MCP server handles the actual search and returns formatted text.
30
+ */
31
+ export function createExaMcpProvider(config: ExaMcpConfig): SearchProvider {
32
+ const { url, tool, timeoutMs } = config;
33
+
34
+ return {
35
+ name: "exa-mcp",
36
+
37
+ async search(
38
+ args: SearchArgs,
39
+ signal?: AbortSignal,
40
+ ): Promise<string | undefined> {
41
+ const controller = new AbortController();
42
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
43
+
44
+ // Forward external signal
45
+ const onAbort = () => controller.abort();
46
+ if (signal) {
47
+ if (signal.aborted) {
48
+ throw new Error("Request aborted");
49
+ }
50
+ signal.addEventListener("abort", onAbort, { once: true });
51
+ }
52
+
53
+ try {
54
+ const response = await fetch(url, {
55
+ method: "POST",
56
+ headers: {
57
+ "Content-Type": "application/json",
58
+ Accept: "application/json, text/event-stream",
59
+ },
60
+ body: JSON.stringify(buildMcpRequest(tool, args)),
61
+ signal: controller.signal,
62
+ });
63
+
64
+ if (!response.ok) {
65
+ throw new Error(
66
+ `Exa MCP returned HTTP ${response.status}: ${response.statusText}`,
67
+ );
68
+ }
69
+
70
+ return parseResponse(await response.text());
71
+ } catch (error) {
72
+ if (controller.signal.aborted && !signal?.aborted) {
73
+ throw new Error("Request timed out");
74
+ }
75
+ throw error;
76
+ } finally {
77
+ clearTimeout(timeoutId);
78
+ if (signal) {
79
+ signal.removeEventListener("abort", onAbort);
80
+ }
81
+ }
82
+ },
83
+ };
84
+ }
@@ -0,0 +1,29 @@
1
+ import type { WebsearchConfig } from "../../config";
2
+ import type { SearchProvider } from "../types";
3
+ import { createExaMcpProvider } from "./exa-mcp";
4
+ import { createSearxngProvider } from "./searxng";
5
+
6
+ /**
7
+ * Create a search provider based on the current configuration.
8
+ */
9
+ export function createProvider(config: WebsearchConfig): SearchProvider {
10
+ switch (config.provider) {
11
+ case "exa-mcp": {
12
+ return createExaMcpProvider({
13
+ url: config.exaMcp.url,
14
+ tool: config.exaMcp.tool,
15
+ timeoutMs: config.timeoutMs,
16
+ });
17
+ }
18
+ case "searxng": {
19
+ return createSearxngProvider({
20
+ url: config.searxng.url,
21
+ safesearch: config.searxng.safesearch,
22
+ timeoutMs: config.timeoutMs,
23
+ });
24
+ }
25
+ default: {
26
+ throw new Error(`Unknown provider: ${config.provider}`);
27
+ }
28
+ }
29
+ }
@@ -0,0 +1,100 @@
1
+ import type { SearchArgs, SearchProvider } from "../types";
2
+
3
+ /**
4
+ * Configuration for the SearXNG provider.
5
+ */
6
+ export interface SearxngConfig {
7
+ /** SearXNG instance URL (e.g. "http://localhost:8888"). */
8
+ url: string;
9
+ /** SafeSearch level: 0 (off), 1 (moderate), 2 (strict). */
10
+ safesearch: 0 | 1 | 2;
11
+ /** Request timeout in milliseconds. */
12
+ timeoutMs: number;
13
+ }
14
+
15
+ interface SearxngRawResult {
16
+ title?: string | null;
17
+ url?: string | null;
18
+ content?: string | null;
19
+ engine?: string | null;
20
+ }
21
+
22
+ interface SearxngResponse {
23
+ results: SearxngRawResult[];
24
+ }
25
+
26
+ /**
27
+ * Create a SearXNG search provider.
28
+ *
29
+ * Calls a SearXNG instance's `/search` endpoint with `format=json`
30
+ * and returns formatted text results.
31
+ */
32
+ export function createSearxngProvider(config: SearxngConfig): SearchProvider {
33
+ const { url, safesearch, timeoutMs } = config;
34
+
35
+ return {
36
+ name: "searxng",
37
+
38
+ async search(
39
+ args: SearchArgs,
40
+ signal?: AbortSignal,
41
+ ): Promise<string | undefined> {
42
+ const controller = new AbortController();
43
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
44
+
45
+ // Forward external signal
46
+ const onAbort = () => controller.abort();
47
+ if (signal) {
48
+ if (signal.aborted) {
49
+ throw new Error("Request aborted");
50
+ }
51
+ signal.addEventListener("abort", onAbort, { once: true });
52
+ }
53
+
54
+ try {
55
+ const searchUrl = new URL("/search", url);
56
+ searchUrl.searchParams.set("q", args.query);
57
+ searchUrl.searchParams.set("format", "json");
58
+ searchUrl.searchParams.set("safesearch", String(safesearch));
59
+
60
+ const response = await fetch(searchUrl.toString(), {
61
+ signal: controller.signal,
62
+ headers: { Accept: "application/json" },
63
+ });
64
+
65
+ if (!response.ok) {
66
+ throw new Error(
67
+ `SearXNG returned HTTP ${response.status}: ${response.statusText}`,
68
+ );
69
+ }
70
+
71
+ const data: SearxngResponse = await response.json();
72
+ const results = (data.results ?? [])
73
+ .filter((r) => r.url)
74
+ .slice(0, args.numResults)
75
+ .map((r, i) => {
76
+ const title = r.title ?? "Untitled";
77
+ const url = r.url!;
78
+ const content = r.content ?? "";
79
+ return `## **${i + 1}.** ${title}\n**URL:** ${url}\n${content}`;
80
+ });
81
+
82
+ if (results.length === 0) {
83
+ return undefined;
84
+ }
85
+
86
+ return results.join("\n\n---\n\n");
87
+ } catch (error) {
88
+ if (controller.signal.aborted && !signal?.aborted) {
89
+ throw new Error("Request timed out");
90
+ }
91
+ throw error;
92
+ } finally {
93
+ clearTimeout(timeoutId);
94
+ if (signal) {
95
+ signal.removeEventListener("abort", onAbort);
96
+ }
97
+ }
98
+ },
99
+ };
100
+ }
@@ -0,0 +1,161 @@
1
+ import { spawn } from "node:child_process";
2
+ import {
3
+ existsSync,
4
+ mkdirSync,
5
+ readdirSync,
6
+ rmSync,
7
+ writeFileSync,
8
+ } from "node:fs";
9
+ import { homedir } from "node:os";
10
+ import { join } from "node:path";
11
+ import { getAgentDir } from "@earendil-works/pi-coding-agent";
12
+
13
+ const __dirname = new URL(".", import.meta.url).pathname;
14
+
15
+ function getScriptPath(): string {
16
+ // From src/lib/ go up 2 levels to package root, then bin/searxng
17
+ return join(__dirname, "..", "..", "bin", "searxng");
18
+ }
19
+
20
+ function getInstancesDir(): string {
21
+ return join(getAgentDir(), "searxng-instances");
22
+ }
23
+
24
+ /**
25
+ * Check whether a PID is alive by sending signal 0.
26
+ */
27
+ export function isProcessAlive(pid: number): boolean {
28
+ try {
29
+ process.kill(pid, 0);
30
+ return true;
31
+ } catch {
32
+ return false;
33
+ }
34
+ }
35
+
36
+ /**
37
+ * Remove lock files belonging to dead PIDs.
38
+ */
39
+ export function cleanStaleLocks(dir: string): void {
40
+ if (!existsSync(dir)) return;
41
+ for (const entry of readdirSync(dir)) {
42
+ const pid = parseInt(entry.replace(/\.lock$/, ""), 10);
43
+ if (!isNaN(pid) && !isProcessAlive(pid)) {
44
+ rmSync(join(dir, entry), { force: true });
45
+ }
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Run the searxng management script (up/down).
51
+ * Uses the provided scriptPath, or falls back to the built-in `bin/searxng`.
52
+ */
53
+ export function expandTilde(filepath: string): string {
54
+ if (filepath.startsWith("~/") || filepath === "~") {
55
+ return join(homedir(), filepath.slice(1));
56
+ }
57
+ return filepath;
58
+ }
59
+
60
+ export async function runScript(command: "up" | "down", scriptPath?: string): Promise<void> {
61
+ const script = scriptPath ? expandTilde(scriptPath) : getScriptPath();
62
+ return new Promise((resolve, reject) => {
63
+ const child = spawn("bash", [script, command], {
64
+ stdio: "pipe",
65
+ });
66
+
67
+ let stdout = "";
68
+ let stderr = "";
69
+
70
+ child.stdout?.on("data", (d) => {
71
+ stdout += d.toString();
72
+ });
73
+ child.stderr?.on("data", (d) => {
74
+ stderr += d.toString();
75
+ });
76
+
77
+ child.on("close", (code) => {
78
+ if (code === 0) {
79
+ if (stdout.trim()) console.log(stdout.trim());
80
+ resolve();
81
+ } else {
82
+ reject(
83
+ new Error(
84
+ `searxng ${command} failed (exit ${code}): ${stderr || stdout}`,
85
+ ),
86
+ );
87
+ }
88
+ });
89
+
90
+ child.on("error", reject);
91
+ });
92
+ }
93
+
94
+ /**
95
+ * Register the current pi instance as a searxng user.
96
+ *
97
+ * Creates a PID-based lock file in the instances directory, cleans up stale
98
+ * locks, and starts SearXNG (if it's not already running). Safe to call
99
+ * multiple times — it is idempotent.
100
+ *
101
+ * @param scriptPath Optional path to a custom management script.
102
+ * Must accept "up" and "down" commands. When set, used instead of the
103
+ * built-in `bin/searxng` script.
104
+ */
105
+ export async function registerInstance(scriptPath?: string): Promise<void> {
106
+ const dir = getInstancesDir();
107
+ mkdirSync(dir, { recursive: true });
108
+
109
+ // Remove locks belonging to dead processes
110
+ cleanStaleLocks(dir);
111
+
112
+ // Create / overwrite our lock file
113
+ const lockFile = join(dir, `${process.pid}.lock`);
114
+ writeFileSync(lockFile, String(process.pid));
115
+
116
+ // Start SearXNG (idempotent — the script checks if already running)
117
+ try {
118
+ await runScript("up", scriptPath);
119
+ } catch (err) {
120
+ const message = err instanceof Error ? err.message : String(err);
121
+ console.error(`pi-websearch: failed to start SearXNG: ${message}`);
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Unregister the current pi instance.
127
+ *
128
+ * Removes our PID lock file. If no live instances remain, stops SearXNG.
129
+ *
130
+ * @param scriptPath Optional path to a custom management script.
131
+ * Must match the path passed to registerInstance.
132
+ */
133
+ export async function unregisterInstance(scriptPath?: string): Promise<void> {
134
+ const dir = getInstancesDir();
135
+
136
+ // Remove our lock file
137
+ const lockFile = join(dir, `${process.pid}.lock`);
138
+ try {
139
+ rmSync(lockFile, { force: true });
140
+ } catch {
141
+ // Ignore — best effort cleanup
142
+ }
143
+
144
+ // Check if any live instances remain
145
+ if (existsSync(dir)) {
146
+ for (const entry of readdirSync(dir)) {
147
+ const pid = parseInt(entry.replace(/\.lock$/, ""), 10);
148
+ if (!isNaN(pid) && isProcessAlive(pid)) {
149
+ return; // Another instance is still running
150
+ }
151
+ }
152
+ }
153
+
154
+ // No more live instances — shut down SearXNG
155
+ try {
156
+ await runScript("down", scriptPath);
157
+ } catch (err) {
158
+ const message = err instanceof Error ? err.message : String(err);
159
+ console.error(`pi-websearch: failed to stop SearXNG: ${message}`);
160
+ }
161
+ }
@@ -0,0 +1,60 @@
1
+ import { StringEnum } from "@earendil-works/pi-ai";
2
+ import type { Static } from "typebox";
3
+ import Type from "typebox";
4
+
5
+ /**
6
+ * A search provider implementation.
7
+ *
8
+ * Each provider encapsulates the logic for calling a specific search backend
9
+ * (MCP server, REST API, etc.) and returns the search results as a text string.
10
+ */
11
+ export interface SearchProvider {
12
+ /** Human-readable provider name (e.g. "exa-mcp", "brave", "tavily"). */
13
+ readonly name: string;
14
+
15
+ /**
16
+ * Execute a search and return the result text.
17
+ * Returns undefined if the search returned no results.
18
+ */
19
+ search(args: SearchArgs, signal?: AbortSignal): Promise<string | undefined>;
20
+ }
21
+
22
+ export const WebsearchParameters = Type.Object({
23
+ query: Type.String({ description: "Web search query" }),
24
+ numResults: Type.Optional(
25
+ Type.Number({
26
+ description: "Number of search results to return (default: 8)",
27
+ }),
28
+ ),
29
+ livecrawl: Type.Optional(
30
+ StringEnum(["fallback", "preferred"] as const, {
31
+ description:
32
+ "Live crawl mode - 'fallback': use live crawling as backup if cached content unavailable, 'preferred': prioritize live crawling (default: 'fallback')",
33
+ }),
34
+ ),
35
+ type: Type.Optional(
36
+ StringEnum(["auto", "fast", "deep"] as const, {
37
+ description:
38
+ "Search type - 'auto': balanced search, 'fast': quick results, 'deep': comprehensive search (default: 'auto')",
39
+ }),
40
+ ),
41
+ contextMaxCharacters: Type.Optional(
42
+ Type.Number({
43
+ description:
44
+ "Maximum characters for context string optimized for LLMs (default: 10000)",
45
+ }),
46
+ ),
47
+ });
48
+
49
+ export type SearchArgs = Static<typeof WebsearchParameters>;
50
+
51
+ export const McpResultPayload = Type.Object({
52
+ result: Type.Object({
53
+ content: Type.Array(
54
+ Type.Object({
55
+ type: Type.String(),
56
+ text: Type.String(),
57
+ }),
58
+ ),
59
+ }),
60
+ });