pi-web-toolkit 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { spawn } from "node:child_process";
1
+ import { runCLI } from "./cli-runner";
2
2
 
3
3
  /**
4
4
  * Run a scrapling CLI command with optional abort signal.
@@ -7,30 +7,45 @@ export function runScrapling(
7
7
  args: string[],
8
8
  signal?: AbortSignal,
9
9
  ): Promise<{ stdout: string; stderr: string; exitCode: number }> {
10
- return new Promise((resolve) => {
11
- const proc = spawn("scrapling", args, { shell: false, stdio: ["ignore", "pipe", "pipe"] });
12
- let stdout = "";
13
- let stderr = "";
10
+ return runCLI({ command: "scrapling", args, signal });
11
+ }
12
+
13
+ /**
14
+ * Run scrapling fetch with automatic fallback to HTTP GET on failure.
15
+ *
16
+ * @param url Target URL
17
+ * @param tmpFile Output markdown file path
18
+ * @param options { selector?: string; stealthy?: boolean; noGetFallback?: boolean }
19
+ * @param signal Optional AbortSignal
20
+ * @returns { ok: true } or { ok: false, stderr: string }
21
+ */
22
+ export async function runScraplingWithFallback(
23
+ url: string,
24
+ tmpFile: string,
25
+ options: { selector?: string; stealthy?: boolean; noGetFallback?: boolean },
26
+ signal?: AbortSignal,
27
+ ): Promise<{ ok: boolean; stderr?: string }> {
28
+ const cmd = options.stealthy ? "stealthy-fetch" : "fetch";
29
+ const args = ["extract", cmd, url, tmpFile, "--ai-targeted"];
30
+ if (options.selector) {
31
+ args.push("--css-selector", options.selector);
32
+ }
14
33
 
15
- proc.stdout.on("data", (data) => {
16
- stdout += data.toString();
17
- });
18
- proc.stderr.on("data", (data) => {
19
- stderr += data.toString();
20
- });
21
- proc.on("close", (code, closeSignal) => {
22
- const exitCode = code ?? 1;
23
- const signalMessage = closeSignal ? `Process terminated by ${closeSignal}` : "";
24
- resolve({ stdout, stderr: stderr || signalMessage, exitCode });
25
- });
26
- proc.on("error", (err) => resolve({ stdout, stderr: err.message, exitCode: 1 }));
34
+ const result = await runScrapling(args, signal);
35
+ if (result.exitCode === 0) {
36
+ return { ok: true };
37
+ }
27
38
 
28
- if (signal) {
29
- const kill = () => {
30
- proc.kill("SIGTERM");
31
- };
32
- if (signal.aborted) kill();
33
- else signal.addEventListener("abort", kill, { once: true });
39
+ if (!options.noGetFallback) {
40
+ const fallback = await runScrapling(
41
+ ["extract", "get", url, tmpFile, "--ai-targeted"],
42
+ signal,
43
+ );
44
+ if (fallback.exitCode === 0) {
45
+ return { ok: true };
34
46
  }
35
- });
47
+ return { ok: false, stderr: result.stderr || fallback.stderr };
48
+ }
49
+
50
+ return { ok: false, stderr: result.stderr };
36
51
  }
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Tool factory — separates execution from TUI rendering
3
+ *
4
+ * Provides a defineWebTool helper that wraps tool definitions with
5
+ * consistent base behaviour, while letting each tool supply its own
6
+ * execution logic and optional custom renderers.
7
+ */
8
+
9
+ import { defineTool, formatSize } from "@earendil-works/pi-coding-agent";
10
+ import { Text } from "@earendil-works/pi-tui";
11
+
12
+ /**
13
+ * Shared render utilities for custom renderResult implementations.
14
+ */
15
+ export const RenderUtils = {
16
+ /** Truncate preview text to maxLen, adding ellipsis. */
17
+ truncatePreview(text: string, maxLen: number): string {
18
+ if (text.length <= maxLen) return text;
19
+ return text.slice(0, maxLen).replace(/\s+\S*$/, "") + "...";
20
+ },
21
+
22
+ /** Render the "Full output: path" line. */
23
+ fullOutputLine(path: string | undefined, theme: any): string {
24
+ return path ? `\n${theme.fg("accent", `Full output: ${path}`)}` : "";
25
+ },
26
+
27
+ /** Format a byte count using the shared formatter. */
28
+ formatBytes(bytes: number): string {
29
+ return formatSize(bytes);
30
+ },
31
+ };
32
+
33
+ /**
34
+ * Default renderCall implementation: shows tool name and first string argument.
35
+ */
36
+ export function defaultRenderCall(name: string, args: Record<string, unknown>, theme: any): Text {
37
+ let text = theme.fg("toolTitle", theme.bold(`${name} `));
38
+ const firstString = Object.values(args).find((v) => typeof v === "string");
39
+ if (firstString) {
40
+ text += theme.fg("muted", firstString as string);
41
+ }
42
+ return new Text(text, 0, 0);
43
+ }
44
+
45
+ /**
46
+ * Default renderResult implementation: shows success and full output path.
47
+ */
48
+ export function defaultRenderResult(
49
+ result: { content: Array<{ type: "text"; text: string }>; details?: unknown },
50
+ state: { expanded: boolean; isPartial: boolean },
51
+ theme: any,
52
+ ): Text {
53
+ if (state.isPartial) {
54
+ return new Text(theme.fg("warning", "Running..."), 0, 0);
55
+ }
56
+ const details = result.details as { fullOutputPath?: string } | undefined;
57
+ let text = theme.fg("success", "✓ Done");
58
+ if (state.expanded && details?.fullOutputPath) {
59
+ text += `\n${theme.fg("accent", `Full output: ${details.fullOutputPath}`)}`;
60
+ }
61
+ return new Text(text, 0, 0);
62
+ }
63
+
64
+ /**
65
+ * Register a web tool with consistent base behaviour.
66
+ *
67
+ * This is a thin wrapper around defineTool that applies default
68
+ * renderCall/renderResult when the tool does not supply its own.
69
+ *
70
+ * NOTE: The pi framework's TypeBox types make strict typing here difficult.
71
+ * Callers should rely on type inference at the call site.
72
+ */
73
+ export function defineWebTool(def: any) {
74
+ return defineTool({
75
+ ...def,
76
+ renderCall: def.renderCall ?? ((args: any, theme: any) => defaultRenderCall(def.name, args, theme)),
77
+ renderResult: def.renderResult ?? ((result: any, state: any, theme: any) => defaultRenderResult(result, state, theme)),
78
+ });
79
+ }
@@ -14,7 +14,6 @@
14
14
  import {
15
15
  defineTool,
16
16
  type ExtensionAPI,
17
- truncateHead,
18
17
  formatSize,
19
18
  DEFAULT_MAX_BYTES,
20
19
  DEFAULT_MAX_LINES,
@@ -24,7 +23,10 @@ import { Type, type Static } from "typebox";
24
23
  import * as fs from "node:fs";
25
24
  import * as os from "node:os";
26
25
  import * as path from "node:path";
27
- import { runScrapling } from "./utils/scrapling";
26
+ import { runScraplingWithFallback } from "./utils/scrapling";
27
+ import { extractPreview } from "./utils/content-preview";
28
+ import { writeWithFallback } from "./utils/output-sink";
29
+ import { abbreviateUrl, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
28
30
 
29
31
  interface FetchTask {
30
32
  url: string;
@@ -37,18 +39,15 @@ async function fetchOne(
37
39
  stealthy: boolean,
38
40
  signal?: AbortSignal,
39
41
  ): Promise<{ url: string; content: string; size: number; ok: boolean; error?: string }> {
40
- const cmd = stealthy ? "stealthy-fetch" : "fetch";
41
- const args = ["extract", cmd, task.url, task.tmpFile, "--ai-targeted"];
42
- if (selector) args.push("--css-selector", selector);
42
+ const { ok: fetchOk, stderr } = await runScraplingWithFallback(
43
+ task.url,
44
+ task.tmpFile,
45
+ { selector, stealthy },
46
+ signal,
47
+ );
43
48
 
44
- const { stderr, exitCode } = await runScrapling(args, signal);
45
-
46
- if (exitCode !== 0) {
47
- // Fallback to GET
48
- const fallback = await runScrapling(["extract", "get", task.url, task.tmpFile, "--ai-targeted"], signal);
49
- if (fallback.exitCode !== 0) {
50
- return { url: task.url, content: "", size: 0, ok: false, error: stderr || fallback.stderr };
51
- }
49
+ if (!fetchOk) {
50
+ return { url: task.url, content: "", size: 0, ok: false, error: stderr };
52
51
  }
53
52
 
54
53
  try {
@@ -84,9 +83,9 @@ async function mapWithConcurrencyLimit<TIn, TOut>(
84
83
 
85
84
  export const WebBatchFetchParamsSchema = Type.Object({
86
85
  urls: Type.Array(Type.String(), {
87
- description: "List of URLs to fetch (2–5 recommended)",
86
+ description: "List of URLs to fetch (2–5 recommended, max 15)",
88
87
  minItems: 1,
89
- maxItems: 10,
88
+ maxItems: 15,
90
89
  }),
91
90
  selector: Type.Optional(Type.String({
92
91
  description: "CSS selector applied to ALL pages to extract only relevant content",
@@ -117,11 +116,12 @@ const webBatchFetchTool = defineTool({
117
116
  ].join(" "),
118
117
  promptSnippet: "Fetch multiple URLs in parallel for research",
119
118
  promptGuidelines: [
120
- "Use web_batch_fetch when web_search returns multiple (2–5) relevant pages and the agent needs to read them all.",
119
+ "Use web_batch_fetch when web_search returns multiple (2–5) relevant pages and the agent needs to read them all at once.",
120
+ "Prefer web_batch_fetch over repeated web_fetch calls when reading multiple pages for comparison or synthesis.",
121
121
  "Use web_batch_fetch for cross-referencing sources, comparing implementations, or synthesizing research from multiple sites.",
122
122
  "For a single URL, always use web_fetch — it supports per-URL selectors and stealthy mode.",
123
123
  "If a page in the batch fails, the tool reports the error but continues with the others.",
124
- "Keep batch sizes small (≤5) to avoid overwhelming the browser and token budget.",
124
+ "Keep batch sizes reasonable (≤8) to avoid overwhelming the browser and token budget.",
125
125
  ],
126
126
  parameters: WebBatchFetchParamsSchema,
127
127
 
@@ -132,17 +132,48 @@ const webBatchFetchTool = defineTool({
132
132
  tmpFile: path.join(tmpDir, `page-${i}.md`),
133
133
  }));
134
134
  let fullOutputPath: string | undefined;
135
+ const concurrency = Math.floor(Math.min(5, Math.max(1, params.max_concurrency ?? 3)));
135
136
 
136
- try {
137
- const concurrency = Math.floor(Math.min(5, Math.max(1, params.max_concurrency ?? 3)));
138
- onUpdate?.({ content: [{ type: "text", text: `Fetching ${tasks.length} pages with concurrency ${concurrency}...` }], details: {} });
137
+ // Progress tracking for live UI updates
138
+ const progressItems = tasks.map((t) => ({
139
+ url: t.url,
140
+ status: "fetching" as "fetching" | "done" | "error",
141
+ size: 0,
142
+ error: "",
143
+ }));
144
+
145
+ const sendProgress = () => {
146
+ const completed = progressItems.filter((p) => p.status !== "fetching").length;
147
+ const succeeded = progressItems.filter((p) => p.status === "done").length;
148
+ const failed = progressItems.filter((p) => p.status === "error").length;
149
+ onUpdate?.({
150
+ content: [{ type: "text", text: `Fetching ${tasks.length} pages (${completed}/${tasks.length})...` }],
151
+ details: {
152
+ progress: {
153
+ total: tasks.length,
154
+ completed,
155
+ succeeded,
156
+ failed,
157
+ items: progressItems.map((p) => ({ ...p })),
158
+ },
159
+ },
160
+ });
161
+ };
139
162
 
163
+ sendProgress();
164
+
165
+ try {
140
166
  const results = await mapWithConcurrencyLimit(
141
167
  tasks,
142
168
  concurrency,
143
169
  (task, index) => {
144
- onUpdate?.({ content: [{ type: "text", text: `Fetching ${task.url} (${index + 1}/${tasks.length})...` }], details: {} });
145
- return fetchOne(task, params.selector, params.stealthy ?? false, signal);
170
+ return fetchOne(task, params.selector, params.stealthy ?? false, signal).then((res) => {
171
+ progressItems[index].status = res.ok ? "done" : "error";
172
+ progressItems[index].size = res.size;
173
+ progressItems[index].error = res.error || "";
174
+ sendProgress();
175
+ return res;
176
+ });
146
177
  },
147
178
  );
148
179
 
@@ -166,27 +197,24 @@ const webBatchFetchTool = defineTool({
166
197
  }
167
198
 
168
199
  const rawText = lines.join("\n");
169
- const truncation = truncateHead(rawText, {
170
- maxLines: DEFAULT_MAX_LINES,
171
- maxBytes: DEFAULT_MAX_BYTES,
200
+ const sink = await writeWithFallback(rawText, {
201
+ tmpPrefix: "pi-web-batch-",
172
202
  });
203
+ fullOutputPath = sink.fullOutputPath;
173
204
 
174
- let finalText = truncation.content;
175
- if (truncation.truncated) {
176
- const fullOutputDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-web-batch-"));
177
- fullOutputPath = path.join(fullOutputDir, "output.txt");
178
- await fs.promises.writeFile(fullOutputPath, rawText, "utf-8");
179
- finalText += `\n\n[Output truncated: ${truncation.outputLines} of ${truncation.totalLines} lines (${formatSize(truncation.outputBytes)} of ${formatSize(truncation.totalBytes)}). Full output saved to: ${fullOutputPath}]`;
180
- }
181
-
182
- onUpdate?.({ content: [{ type: "text", text: `Batch complete: ${successCount}/${results.length} succeeded` }], details: {} });
183
205
  return {
184
- content: [{ type: "text", text: finalText }],
206
+ content: [{ type: "text", text: sink.text }],
185
207
  details: {
186
208
  urls: params.urls,
187
209
  succeeded: successCount,
188
210
  failed: results.length - successCount,
189
- results: results.map((r) => ({ url: r.url, ok: r.ok, size: r.size })),
211
+ results: results.map((r) => ({
212
+ url: r.url,
213
+ ok: r.ok,
214
+ size: r.size,
215
+ preview: r.ok ? extractPreview(r.content, 200) : undefined,
216
+ error: r.error,
217
+ })),
190
218
  fullOutputPath,
191
219
  },
192
220
  };
@@ -206,40 +234,120 @@ const webBatchFetchTool = defineTool({
206
234
  renderCall(args, theme) {
207
235
  let text = theme.fg("toolTitle", theme.bold("web_batch_fetch "));
208
236
  text += theme.fg("muted", `${args.urls?.length ?? 0} URLs`);
237
+ if (args.max_concurrency) {
238
+ text += theme.fg("dim", ` concurrency=${args.max_concurrency}`);
239
+ }
209
240
  if (args.selector) {
210
241
  text += theme.fg("dim", ` selector=${args.selector}`);
211
242
  }
212
243
  return new Text(text, 0, 0);
213
244
  },
214
245
 
215
- renderResult(result, { expanded, isPartial }, theme) {
246
+ renderResult(result, { expanded, isPartial }, theme, context) {
247
+ const isError = context?.isError ?? false;
248
+
216
249
  if (isPartial) {
250
+ const progress = (result.details as any)?.progress;
251
+ if (progress) {
252
+ const { total, completed, succeeded, failed, items } = progress;
253
+ const barWidth = 15;
254
+ const filled = Math.round((completed / total) * barWidth);
255
+ const bar = "█".repeat(filled) + "░".repeat(barWidth - filled);
256
+ let text = `${theme.fg("warning", "Batch fetching")} [${theme.fg("accent", bar.slice(0, filled))}${theme.fg("dim", bar.slice(filled))}] ${theme.fg("muted", `${completed}/${total}`)}`;
257
+ if (failed > 0) {
258
+ text += ` ${theme.fg("error", `(${failed} failed)`)}`;
259
+ }
260
+ for (const item of items) {
261
+ const icon = item.status === "done"
262
+ ? theme.fg("success", "✓")
263
+ : item.status === "error"
264
+ ? theme.fg("error", "✗")
265
+ : theme.fg("warning", "⏳");
266
+ let line = `\n ${icon} ${theme.fg("dim", abbreviateUrl(item.url, 50))}`;
267
+ if (item.status === "done" && item.size > 0) {
268
+ line += theme.fg("muted", ` ${formatSize(item.size)}`);
269
+ } else if (item.status === "error" && item.error) {
270
+ const err = item.error.slice(0, 80);
271
+ line += theme.fg("dim", ` ${err}${item.error.length > 80 ? "..." : ""}`);
272
+ } else if (item.status === "fetching") {
273
+ line += theme.fg("muted", " fetching...");
274
+ }
275
+ text += line;
276
+ }
277
+ return new Text(text, 0, 0);
278
+ }
217
279
  return new Text(theme.fg("warning", "Batch fetching..."), 0, 0);
218
280
  }
281
+
219
282
  const details = result.details as {
220
283
  succeeded?: number;
221
284
  failed?: number;
222
285
  urls?: string[];
223
- results?: Array<{ url: string; ok: boolean; size?: number }>;
286
+ results?: Array<{ url: string; ok: boolean; size?: number; preview?: string; error?: string }>;
224
287
  fullOutputPath?: string;
225
288
  } | undefined;
289
+
290
+ if (isError) {
291
+ const errText = getErrorText(result);
292
+ let text = theme.fg("error", "✗ Batch failed");
293
+ if (details?.urls) {
294
+ text += ` ${theme.fg("dim", `${details.urls.length} URLs`)}`;
295
+ }
296
+ text += `\n\n ${theme.fg("toolOutput", errText)}`;
297
+ return new Text(text, 0, 0);
298
+ }
299
+
226
300
  const total = details?.urls?.length ?? 0;
227
301
  const ok = details?.succeeded ?? 0;
302
+ const failed = details?.failed ?? 0;
303
+
228
304
  let text = theme.fg("success", `✓ ${ok}/${total} fetched`);
229
- if (details?.failed) {
230
- text += theme.fg("error", ` (${details.failed} failed)`);
305
+ if (failed > 0) {
306
+ text += theme.fg("error", ` (${failed} failed)`);
231
307
  }
232
- if (expanded && details?.results) {
233
- for (const r of details.results) {
234
- text += `\n ${r.ok ? theme.fg("success", "✓") : theme.fg("error", "✗")} ${theme.fg("dim", r.url)}`;
235
- if (r.size) {
236
- text += theme.fg("muted", ` ${formatSize(r.size)}`);
308
+
309
+ if (!expanded) {
310
+ const successes = (details?.results ?? []).filter((r) => r.ok);
311
+ const top3 = successes.slice(0, 3);
312
+ for (let i = 0; i < top3.length; i++) {
313
+ const r = top3[i];
314
+ text += `\n [${i + 1}] ${theme.fg("toolTitle", abbreviateUrl(r.url, 40))} ${theme.fg("muted", `(${formatSize(r.size ?? 0)})`)}`;
315
+ if (r.preview) {
316
+ const snippet = normalizeWhitespace(r.preview);
317
+ const short = snippet.length > 80 ? snippet.slice(0, 80).replace(/\s+\S*$/, "") + "..." : snippet;
318
+ text += `\n ${theme.fg("muted", short)}`;
237
319
  }
238
320
  }
321
+ if (successes.length > 3) {
322
+ text += `\n ${theme.fg("muted", `... and ${successes.length - 3} more (Ctrl+O for full list)`)}`;
323
+ }
239
324
  }
240
- if (expanded && details?.fullOutputPath) {
241
- text += `\n${theme.fg("dim", `Full output: ${details.fullOutputPath}`)}`;
325
+
326
+ if (expanded && details?.results) {
327
+ const successes = details.results.filter((r) => r.ok);
328
+ const failures = details.results.filter((r) => !r.ok);
329
+
330
+ for (let i = 0; i < successes.length; i++) {
331
+ const r = successes[i];
332
+ text += `\n[${i + 1}] ${theme.fg("toolTitle", abbreviateUrl(r.url))} ${theme.fg("muted", `| ${formatSize(r.size ?? 0)}`)}`;
333
+ if (r.preview) {
334
+ text += `\n ${theme.fg("muted", normalizeWhitespace(r.preview))}`;
335
+ }
336
+ text += "\n";
337
+ }
338
+
339
+ if (failures.length > 0) {
340
+ text += `\n${theme.fg("error", "Failed:")}`;
341
+ for (const r of failures) {
342
+ text += `\n ${theme.fg("error", "✗")} ${theme.fg("dim", r.url)} ${theme.fg("dim", r.error ?? "")}`;
343
+ }
344
+ }
345
+
346
+ if (details?.fullOutputPath) {
347
+ text += `\n\n${theme.fg("accent", `Full output: ${details.fullOutputPath}`)}`;
348
+ }
242
349
  }
350
+
243
351
  return new Text(text, 0, 0);
244
352
  },
245
353
  });