npm - pi-web-toolkit - Versions diffs - 0.1.2 → 0.2.1 - Mend

pi-web-toolkit 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +36 -4
package/docs/assets/screenshots/tools-workflow-preview.png +0 -0
package/docs/assets/screenshots/web-batch-fetch-progress.png +0 -0
package/docs/assets/screenshots/web-batch-fetch-results.png +0 -0
package/docs/assets/screenshots/web-browse-headless.png +0 -0
package/docs/assets/screenshots/web-fetch-summary.png +0 -0
package/docs/assets/screenshots/web-research-workflow.png +0 -0
package/docs/assets/screenshots/web-search-results-expanded.png +0 -0
package/docs/guide.md +1 -1
package/docs/tools.md +6 -2
package/extensions/utils/agent-browser.ts +80 -93
package/extensions/utils/cli-runner.ts +108 -0
package/extensions/utils/content-preview.ts +493 -0
package/extensions/utils/output-sink.ts +67 -0
package/extensions/utils/render-helpers.ts +77 -0
package/extensions/utils/scrapling.ts +2 -27
package/extensions/utils/tool-factory.ts +79 -0
package/extensions/web_batch_fetch.ts +146 -35
package/extensions/web_browse.ts +152 -29
package/extensions/web_fetch.ts +74 -24
package/extensions/web_search.ts +137 -54
package/package.json +10 -1

package/README.md CHANGED Viewed

@@ -13,11 +13,33 @@ Web research toolkit for [pi](https://pi.dev) agents. Search via SearXNG, fetch
 | Tool | Backend | Purpose | Current Limit |
 |------|---------|---------|---------------|
-| **`web_search`** | [SearXNG](https://github.com/searxng/searxng) | Search the web with scored, ranked results from multiple engines — always the first step in web research | 10 results (max 50) |
+| **`web_search`** | [SearXNG](https://github.com/searxng/searxng) | Search the web with scored, ranked results from multiple engines — always the first step in web research | 20 results (max 60, auto-pages up to 3 pages) |
 | **`web_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch a single static page as clean markdown | — |
-| **`web_batch_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch 2–10 pages in parallel for research synthesis | 3 concurrent (max 5) |
+| **`web_batch_fetch`** | [scrapling](https://github.com/D4Vinci/Scrapling) | Fetch 2–15 pages in parallel for research synthesis | 3 concurrent (max 5) |
 | **`web_browse`** | [agent-browser](https://github.com/vercel-labs/agent-browser) | Interact with a page (click, scroll, fill) then extract content | 25 actions |
+## Tools Preview
+A quick look at how pi renders toolkit calls while an agent searches, fetches, batches, and browses the web.
+<table>
+  <tr>
+    <td width="50%"><strong>Multi-tool research flow</strong><br><img src="docs/assets/screenshots/tools-workflow-preview.png" alt="pi-web-toolkit multi-tool research preview"></td>
+    <td width="50%"><strong><code>web_search</code> expanded results</strong><br><img src="docs/assets/screenshots/web-search-results-expanded.png" alt="web_search expanded results"></td>
+  </tr>
+  <tr>
+    <td width="50%"><strong><code>web_batch_fetch</code> progress</strong><br><img src="docs/assets/screenshots/web-batch-fetch-progress.png" alt="web_batch_fetch progress"></td>
+    <td width="50%"><strong><code>web_batch_fetch</code> results</strong><br><img src="docs/assets/screenshots/web-batch-fetch-results.png" alt="web_batch_fetch results"></td>
+  </tr>
+  <tr>
+    <td width="50%"><strong><code>web_fetch</code> result preview</strong><br><img src="docs/assets/screenshots/web-fetch-summary.png" alt="web_fetch result preview"></td>
+    <td width="50%"><strong><code>web_browse</code> headless browser flow</strong><br><img src="docs/assets/screenshots/web-browse-headless.png" alt="web_browse headless browser flow"></td>
+  </tr>
+  <tr>
+    <td colspan="2"><strong>End-to-end research summary</strong><br><img src="docs/assets/screenshots/web-research-workflow.png" alt="end-to-end web research workflow"></td>
+  </tr>
+</table>
 ## Quick Start
 ### 1. Install external dependencies
@@ -78,12 +100,19 @@ pi-web-toolkit/
 ├── extensions/
 │   ├── index.ts              # Unified entry point — registers all 4 tools
 │   ├── utils/
+│   │   ├── cli-runner.ts     # Unified CLI process spawning with timeout/AbortSignal
+│   │   ├── content-preview.ts # Intelligent content extraction from scraped pages
+│   │   ├── output-sink.ts    # Truncation + temp-file fallback
+│   │   ├── render-helpers.ts # URL abbreviations, text normalization, error formatting for TUI
 │   │   ├── scrapling.ts      # Reusable scrapling CLI wrapper (shared by fetch + batch)
+│   │   ├── tool-factory.ts   # Common tool registration patterns
 │   │   └── agent-browser.ts  # agent-browser CLI wrapper (shared by web_browse)
 │   ├── web_search.ts         # SearXNG search tool
 │   ├── web_fetch.ts          # Single-page scrapling fetcher
 │   ├── web_batch_fetch.ts    # Parallel scrapling fetcher
 │   └── web_browse.ts         # Interactive browser automation (agent-browser)
+├── test/
+│   └── content-preview/      # Automated test suite with fixtures & snapshots
 ├── docs/
 │   ├── tools.md              # Full parameter specs
 │   └── guide.md              # Decision tree & tool comparison
@@ -95,7 +124,7 @@ pi-web-toolkit/
 **Design principles:**
 - **Unified registration** — `index.ts` is the single source of truth for what pi loads.
-- **Shared utilities** — `utils/scrapling.ts` and `utils/agent-browser.ts` encapsulate the CLI wrappers and fallback logic; tool files import only from `utils/`, never from each other.
+- **Shared utilities** — `utils/` modules encapsulate CLI spawning, content extraction, output truncation, TUI formatting, and common registration patterns; tool files import only from `utils/`, never from each other.
 - **Per-tool isolation** — each tool owns its own schema, execute logic, and TUI renderer; no cross-imports except via `utils/`.
 - **Runtime config** — environment variables are read at execute time, not build time.
@@ -112,7 +141,10 @@ pi-web-toolkit/
 pi install ./
 # Type-check (no build step; pi loads TypeScript directly)
-npx tsc --noEmit
+npm run typecheck
+# Run tests
+npm run test
 # Verify external CLI dependencies
 scrapling --help

package/docs/assets/screenshots/tools-workflow-preview.png ADDED Viewed

Binary file

package/docs/assets/screenshots/web-batch-fetch-progress.png ADDED Viewed

Binary file

package/docs/assets/screenshots/web-batch-fetch-results.png ADDED Viewed

Binary file

package/docs/assets/screenshots/web-browse-headless.png ADDED Viewed

Binary file

package/docs/assets/screenshots/web-fetch-summary.png ADDED Viewed

Binary file

package/docs/assets/screenshots/web-research-workflow.png ADDED Viewed

Binary file

package/docs/assets/screenshots/web-search-results-expanded.png ADDED Viewed

Binary file

package/docs/guide.md CHANGED Viewed

@@ -32,7 +32,7 @@ User asks about something external / current
 | | `web_fetch` | `web_browse` | `web_batch_fetch` |
 |--|-------------|--------------|-------------------|
-| **Pages** | 1 | 1 | 2–10 |
+| **Pages** | 1 | 1 | 2–15 |
 | **Browser** | Yes (scrapling) | Yes (agent-browser) | Yes (scrapling) |
 | **Interaction** | ❌ No | ✅ Click, fill, scroll, wait | ❌ No |
 | **Selector** | ✅ Per-URL | ✅ Final state | ✅ Applied to all |

package/docs/tools.md CHANGED Viewed

@@ -2,18 +2,22 @@
 ## `web_search`
-Search the web via SearXNG. Returns ranked results with title, URL, and snippet.
+Search the web via SearXNG. Returns ranked results with title, URL, and snippet. Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.
 ```typescript
 {
   query: string,           // Search query
   language?: string,       // Language code (en, de, fr...). Default: "auto"
-  results?: number,        // Max results (1–50). Default: 10
+  results?: number,        // Max results (1–60). Default: 20. Automatically pages through SearXNG (up to 3 pages) if needed.
 }
 ```
 **When to use:** The user asks about current events, facts, or anything requiring up-to-date information. This is always the **first step** of web research.
+**Empty results behavior:** When no results are found, `web_search` returns a list of **suggestions** — alternative queries that SearXNG believes may yield better results. The agent can use these suggestions to automatically refine and retry the search.
+**Pagination:** `web_search` automatically fetches up to 3 pages from SearXNG and deduplicates by URL. You do not need to call it multiple times for deeper results.
 ---
 ## `web_fetch`

package/extensions/utils/agent-browser.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * command building, process spawning, JSON parsing, and session cleanup.
  */
-import { spawn } from "node:child_process";
+import { runCLI } from "./cli-runner";
 export interface BrowseAction {
   type: "click" | "fill" | "type" | "press" | "wait" | "wait_selector" | "scroll";
@@ -25,8 +25,50 @@ export interface AgentBrowserBatchItem {
   error?: string | null;
 }
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+function isBatchItem(value: unknown): value is AgentBrowserBatchItem {
+  return isRecord(value)
+    && typeof value.success === "boolean"
+    && Array.isArray(value.command)
+    && value.command.every((part) => typeof part === "string");
+}
+function describeBatchOutput(value: unknown): string {
+  if (Array.isArray(value)) return `array with ${value.length} item(s)`;
+  if (isRecord(value)) return `object with keys: ${Object.keys(value).join(", ") || "(none)"}`;
+  return typeof value;
+}
+export function parseAgentBrowserBatchOutput(stdout: string): AgentBrowserBatchItem[] {
+  const parsed = JSON.parse(stdout) as unknown;
+  if (Array.isArray(parsed)) {
+    if (parsed.every(isBatchItem)) return parsed;
+    throw new Error(`Expected every batch result item to contain { success, command }; got ${describeBatchOutput(parsed)}`);
+  }
+  if (isBatchItem(parsed)) {
+    return [parsed];
+  }
+  if (isRecord(parsed)) {
+    for (const key of ["results", "items", "data", "commands"]) {
+      const candidate = parsed[key];
+      if (Array.isArray(candidate)) {
+        if (candidate.every(isBatchItem)) return candidate;
+        throw new Error(`Expected ${key} to contain batch result items; got ${describeBatchOutput(candidate)}`);
+      }
+    }
+  }
+  throw new Error(`Expected JSON array of batch results; got ${describeBatchOutput(parsed)}`);
+}
 function requireString(action: BrowseAction, field: "selector" | "value" | "key"): string {
-  const value = action[field];
+  const value = action[field] as string | undefined;
   if (typeof value !== "string" || value.length === 0) {
     throw new Error(`Action "${action.type}" requires non-empty ${field}`);
   }
@@ -34,11 +76,11 @@ function requireString(action: BrowseAction, field: "selector" | "value" | "key"
 }
 function requireInteger(action: BrowseAction, field: "ms" | "amount"): number {
-  const value = action[field];
-  if (!Number.isInteger(value) || value < 0) {
+  const value = action[field] as number | undefined;
+  if (!Number.isInteger(value) || (value as number) < 0) {
     throw new Error(`Action "${action.type}" requires non-negative integer ${field}`);
   }
-  return value;
+  return value as number;
 }
 function waitForSelectorScript(selector: string, state: "attached" | "visible" | "hidden"): string {
@@ -128,7 +170,7 @@ export function buildBatchCommands(
   return commands;
 }
-export function runAgentBrowserBatch(
+export async function runAgentBrowserBatch(
   commands: string[][],
   options: { session: string; headless: boolean; signal?: AbortSignal; timeout?: number },
 ): Promise<AgentBrowserBatchItem[]> {
@@ -136,99 +178,44 @@ export function runAgentBrowserBatch(
   if (!options.headless) args.push("--headed");
   args.push("batch", "--bail", "--json");
-  return new Promise((resolve, reject) => {
-    const proc = spawn("agent-browser", args, {
-      shell: false,
-      stdio: ["pipe", "pipe", "pipe"],
-    });
-    let stdout = "";
-    let stderr = "";
-    let timeoutId: NodeJS.Timeout | undefined;
-    let settled = false;
-    const cleanup = () => {
-      if (timeoutId) clearTimeout(timeoutId);
-      if (options.signal) options.signal.removeEventListener("abort", kill);
-    };
-    const settleReject = (err: Error) => {
-      if (settled) return;
-      settled = true;
-      cleanup();
-      reject(err);
-    };
-    const kill = () => proc.kill("SIGTERM");
-    proc.stdout.on("data", (data: Buffer) => {
-      stdout += data.toString();
+  try {
+    const result = await runCLI({
+      command: "agent-browser",
+      args,
+      stdin: JSON.stringify(commands),
+      timeout: options.timeout,
+      signal: options.signal,
     });
-    proc.stderr.on("data", (data: Buffer) => {
-      stderr += data.toString();
-    });
-    if (options.timeout) {
-      timeoutId = setTimeout(() => {
-        proc.kill("SIGTERM");
-        settleReject(new Error(`agent-browser timed out after ${options.timeout}ms`));
-      }, options.timeout);
+    if (result.exitCode !== 0 && !result.stdout.trim()) {
+      throw new Error(`agent-browser failed (exit ${result.exitCode}):\n${result.stderr || "unknown error"}`);
     }
-    proc.on("close", (code) => {
-      if (settled) return;
-      settled = true;
-      cleanup();
-      if (code !== 0 && !stdout.trim()) {
-        reject(new Error(`agent-browser failed (exit ${code}):\n${stderr || "unknown error"}`));
-        return;
-      }
-      try {
-        const results = JSON.parse(stdout) as AgentBrowserBatchItem[];
-        resolve(results);
-      } catch (err: any) {
-        reject(new Error(
-          `Failed to parse agent-browser output: ${err.message}\nstdout: ${stdout}\nstderr: ${stderr}`
-        ));
-      }
-    });
-    proc.on("error", (err: any) => {
-      if (err.code === "ENOENT") {
-        settleReject(new Error(
-          "agent-browser is not installed.\n\nInstall it with:\n  npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
-        ));
-      } else {
-        settleReject(err);
-      }
-    });
-    if (options.signal) {
-      if (options.signal.aborted) kill();
-      else options.signal.addEventListener("abort", kill, { once: true });
+    try {
+      return parseAgentBrowserBatchOutput(result.stdout);
+    } catch (err: any) {
+      throw new Error(
+        `Failed to parse agent-browser output: ${err.message}\nstdout: ${result.stdout}\nstderr: ${result.stderr}`
+      );
     }
-    proc.stdin.write(JSON.stringify(commands));
-    proc.stdin.end();
-  });
+  } catch (err: any) {
+    if (err.message === "agent-browser is not installed") {
+      throw new Error(
+        "agent-browser is not installed.\n\nInstall it with:\n  npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
+      );
+    }
+    throw err;
+  }
 }
-export function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
-  return new Promise((resolve) => {
-    const proc = spawn("agent-browser", ["--session", session, "close"], {
-      shell: false,
-      stdio: ["ignore", "ignore", "ignore"],
+export async function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
+  try {
+    await runCLI({
+      command: "agent-browser",
+      args: ["--session", session, "close"],
+      signal,
     });
-    const done = () => resolve();
-    proc.on("close", done);
-    proc.on("error", done);
-    if (signal) {
-      const kill = () => proc.kill("SIGTERM");
-      if (signal.aborted) kill();
-      else signal.addEventListener("abort", kill, { once: true });
-    }
-  });
+  } catch {
+    // Best-effort cleanup — ignore errors
+  }
 }

package/extensions/utils/cli-runner.ts ADDED Viewed

@@ -0,0 +1,108 @@
+/**
+ * CLI runner — abstracted process spawning
+ *
+ * Provides a single interface for running external CLI commands
+ * with consistent signal handling, timeout support, and stdout/stderr
+ * collection. Enables testability by allowing the runner to be swapped.
+ */
+import { spawn, type ChildProcess } from "node:child_process";
+export interface CLIRunOptions {
+  command: string;
+  args: string[];
+  /** Data to write to stdin. If omitted, stdin is ignored. */
+  stdin?: string;
+  /** Timeout in milliseconds. If exceeded, the process is killed. */
+  timeout?: number;
+  /** AbortSignal for cancellation. */
+  signal?: AbortSignal;
+}
+export interface CLIRunResult {
+  stdout: string;
+  stderr: string;
+  exitCode: number;
+}
+/**
+ * Run an external CLI command and capture its output.
+ *
+ * Handles:
+ * - stdout/stderr collection
+ * - optional stdin feeding
+ * - optional timeout (SIGTERM)
+ * - AbortSignal cancellation (SIGTERM)
+ * - process spawn errors (e.g. ENOENT)
+ */
+export function runCLI(options: CLIRunOptions): Promise<CLIRunResult> {
+  return new Promise((resolve, reject) => {
+    const stdio = options.stdin
+      ? ["pipe", "pipe", "pipe"]
+      : ["ignore", "pipe", "pipe"];
+    const proc = spawn(options.command, options.args, {
+      shell: false,
+      stdio: stdio as any,
+    }) as ChildProcess;
+    let stdout = "";
+    let stderr = "";
+    let timeoutId: NodeJS.Timeout | undefined;
+    let settled = false;
+    const cleanup = () => {
+      if (timeoutId) clearTimeout(timeoutId);
+      if (options.signal) options.signal.removeEventListener("abort", kill);
+    };
+    const settleReject = (err: Error) => {
+      if (settled) return;
+      settled = true;
+      cleanup();
+      reject(err);
+    };
+    const kill = () => proc.kill("SIGTERM");
+    proc.stdout?.on("data", (data: Buffer) => {
+      stdout += data.toString();
+    });
+    proc.stderr?.on("data", (data: Buffer) => {
+      stderr += data.toString();
+    });
+    if (options.timeout) {
+      timeoutId = setTimeout(() => {
+        proc.kill("SIGTERM");
+        settleReject(new Error(`${options.command} timed out after ${options.timeout}ms`));
+      }, options.timeout);
+    }
+    proc.on("close", (code) => {
+      if (settled) return;
+      settled = true;
+      cleanup();
+      resolve({ stdout, stderr, exitCode: code ?? 1 });
+    });
+    proc.on("error", (err: any) => {
+      if (err.code === "ENOENT") {
+        settleReject(new Error(`${options.command} is not installed`));
+      } else {
+        settleReject(err);
+      }
+    });
+    if (options.signal) {
+      if (options.signal.aborted) kill();
+      else options.signal.addEventListener("abort", kill, { once: true });
+    }
+    if (options.stdin && proc.stdin) {
+      proc.stdin.write(options.stdin);
+      proc.stdin.end();
+    }
+  });
+}