pi-web-toolkit 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -43
- package/docs/agents/domain.md +51 -0
- package/docs/agents/issue-tracker.md +22 -0
- package/docs/agents/triage-labels.md +15 -0
- package/docs/guide.md +1 -1
- package/docs/tools.md +6 -2
- package/extensions/utils/agent-browser.ts +179 -0
- package/extensions/utils/cli-runner.ts +108 -0
- package/extensions/utils/content-preview.ts +493 -0
- package/extensions/utils/output-sink.ts +67 -0
- package/extensions/utils/render-helpers.ts +77 -0
- package/extensions/utils/scrapling.ts +39 -24
- package/extensions/utils/tool-factory.ts +79 -0
- package/extensions/web_batch_fetch.ts +155 -47
- package/extensions/web_browse.ts +158 -256
- package/extensions/web_fetch.ts +83 -42
- package/extensions/web_search.ts +140 -56
- package/package.json +9 -1
package/extensions/web_browse.ts
CHANGED
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
import {
|
|
15
15
|
defineTool,
|
|
16
16
|
type ExtensionAPI,
|
|
17
|
-
truncateHead,
|
|
18
17
|
formatSize,
|
|
19
18
|
DEFAULT_MAX_BYTES,
|
|
20
19
|
DEFAULT_MAX_LINES,
|
|
@@ -22,236 +21,14 @@ import {
|
|
|
22
21
|
import { StringEnum } from "@earendil-works/pi-ai";
|
|
23
22
|
import { Text } from "@earendil-works/pi-tui";
|
|
24
23
|
import { Type, type Static } from "typebox";
|
|
25
|
-
import {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
value?: string;
|
|
34
|
-
key?: string;
|
|
35
|
-
ms?: number;
|
|
36
|
-
direction?: "down" | "up" | "bottom" | "top";
|
|
37
|
-
amount?: number;
|
|
38
|
-
state?: "attached" | "visible" | "hidden";
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
interface AgentBrowserBatchItem {
|
|
42
|
-
success: boolean;
|
|
43
|
-
command: string[];
|
|
44
|
-
result?: any;
|
|
45
|
-
error?: string | null;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
function requireString(action: BrowseAction, field: "selector" | "value" | "key"): string {
|
|
49
|
-
const value = action[field];
|
|
50
|
-
if (typeof value !== "string" || value.length === 0) {
|
|
51
|
-
throw new Error(`Action "${action.type}" requires non-empty ${field}`);
|
|
52
|
-
}
|
|
53
|
-
return value;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
function requireInteger(action: BrowseAction, field: "ms" | "amount"): number {
|
|
57
|
-
const value = action[field];
|
|
58
|
-
if (!Number.isInteger(value) || value < 0) {
|
|
59
|
-
throw new Error(`Action "${action.type}" requires non-negative integer ${field}`);
|
|
60
|
-
}
|
|
61
|
-
return value;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function waitForSelectorScript(selector: string, state: "attached" | "visible" | "hidden"): string {
|
|
65
|
-
const selectorLiteral = JSON.stringify(selector);
|
|
66
|
-
const stateLiteral = JSON.stringify(state);
|
|
67
|
-
return `await new Promise((resolve, reject) => {
|
|
68
|
-
const selector = ${selectorLiteral};
|
|
69
|
-
const state = ${stateLiteral};
|
|
70
|
-
const deadline = Date.now() + 30000;
|
|
71
|
-
const isVisible = (el) => !!(el && (el.offsetWidth || el.offsetHeight || el.getClientRects().length));
|
|
72
|
-
const check = () => {
|
|
73
|
-
const el = document.querySelector(selector);
|
|
74
|
-
const ok = state === "attached" ? !!el : state === "hidden" ? !isVisible(el) : isVisible(el);
|
|
75
|
-
if (ok) return resolve(true);
|
|
76
|
-
if (Date.now() > deadline) return reject(new Error(\`Timed out waiting for ${state} selector: ${selector}\`));
|
|
77
|
-
setTimeout(check, 100);
|
|
78
|
-
};
|
|
79
|
-
check();
|
|
80
|
-
})`;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
function buildBatchCommands(
|
|
84
|
-
url: string,
|
|
85
|
-
actions: BrowseAction[],
|
|
86
|
-
selector?: string,
|
|
87
|
-
): string[][] {
|
|
88
|
-
const commands: string[][] = [["open", url]];
|
|
89
|
-
|
|
90
|
-
for (const action of actions) {
|
|
91
|
-
switch (action.type) {
|
|
92
|
-
case "click":
|
|
93
|
-
commands.push(["click", requireString(action, "selector")]);
|
|
94
|
-
break;
|
|
95
|
-
case "fill":
|
|
96
|
-
commands.push(["fill", requireString(action, "selector"), requireString(action, "value")]);
|
|
97
|
-
break;
|
|
98
|
-
case "type":
|
|
99
|
-
commands.push(["type", requireString(action, "selector"), requireString(action, "value")]);
|
|
100
|
-
break;
|
|
101
|
-
case "press": {
|
|
102
|
-
if (action.selector) {
|
|
103
|
-
commands.push(["focus", action.selector]);
|
|
104
|
-
}
|
|
105
|
-
commands.push(["press", requireString(action, "key")]);
|
|
106
|
-
break;
|
|
107
|
-
}
|
|
108
|
-
case "wait":
|
|
109
|
-
commands.push(["wait", String(requireInteger(action, "ms"))]);
|
|
110
|
-
break;
|
|
111
|
-
case "wait_selector": {
|
|
112
|
-
const state = action.state ?? "visible";
|
|
113
|
-
const waitSelector = requireString(action, "selector");
|
|
114
|
-
if (state === "visible") {
|
|
115
|
-
commands.push(["wait", waitSelector]);
|
|
116
|
-
} else {
|
|
117
|
-
commands.push(["eval", waitForSelectorScript(waitSelector, state)]);
|
|
118
|
-
}
|
|
119
|
-
break;
|
|
120
|
-
}
|
|
121
|
-
case "scroll": {
|
|
122
|
-
const dir = action.direction ?? "down";
|
|
123
|
-
if (dir === "top") {
|
|
124
|
-
commands.push(["eval", "window.scrollTo(0, 0)"]);
|
|
125
|
-
} else if (dir === "bottom") {
|
|
126
|
-
commands.push(["eval", "window.scrollTo(0, document.body.scrollHeight)"]);
|
|
127
|
-
} else {
|
|
128
|
-
commands.push(["scroll", dir, String(action.amount ?? 500)]);
|
|
129
|
-
}
|
|
130
|
-
break;
|
|
131
|
-
}
|
|
132
|
-
default:
|
|
133
|
-
throw new Error(`Unsupported browser action: ${(action as BrowseAction).type}`);
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
// Extract content
|
|
138
|
-
if (selector) {
|
|
139
|
-
commands.push(["get", "text", selector, "--json"]);
|
|
140
|
-
} else {
|
|
141
|
-
commands.push(["snapshot", "-i", "--json"]);
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
// Metadata
|
|
145
|
-
commands.push(["get", "title", "--json"]);
|
|
146
|
-
commands.push(["get", "url", "--json"]);
|
|
147
|
-
|
|
148
|
-
return commands;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
function runAgentBrowserBatch(
|
|
152
|
-
commands: string[][],
|
|
153
|
-
options: { session: string; headless: boolean; signal?: AbortSignal; timeout?: number },
|
|
154
|
-
): Promise<AgentBrowserBatchItem[]> {
|
|
155
|
-
const args = ["--session", options.session];
|
|
156
|
-
if (!options.headless) args.push("--headed");
|
|
157
|
-
args.push("batch", "--bail", "--json");
|
|
158
|
-
|
|
159
|
-
return new Promise((resolve, reject) => {
|
|
160
|
-
const proc = spawn("agent-browser", args, {
|
|
161
|
-
shell: false,
|
|
162
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
163
|
-
});
|
|
164
|
-
|
|
165
|
-
let stdout = "";
|
|
166
|
-
let stderr = "";
|
|
167
|
-
let timeoutId: NodeJS.Timeout | undefined;
|
|
168
|
-
let settled = false;
|
|
169
|
-
|
|
170
|
-
const cleanup = () => {
|
|
171
|
-
if (timeoutId) clearTimeout(timeoutId);
|
|
172
|
-
if (options.signal) options.signal.removeEventListener("abort", kill);
|
|
173
|
-
};
|
|
174
|
-
|
|
175
|
-
const settleReject = (err: Error) => {
|
|
176
|
-
if (settled) return;
|
|
177
|
-
settled = true;
|
|
178
|
-
cleanup();
|
|
179
|
-
reject(err);
|
|
180
|
-
};
|
|
181
|
-
|
|
182
|
-
const kill = () => proc.kill("SIGTERM");
|
|
183
|
-
|
|
184
|
-
proc.stdout.on("data", (data: Buffer) => {
|
|
185
|
-
stdout += data.toString();
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
proc.stderr.on("data", (data: Buffer) => {
|
|
189
|
-
stderr += data.toString();
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
if (options.timeout) {
|
|
193
|
-
timeoutId = setTimeout(() => {
|
|
194
|
-
proc.kill("SIGTERM");
|
|
195
|
-
settleReject(new Error(`agent-browser timed out after ${options.timeout}ms`));
|
|
196
|
-
}, options.timeout);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
proc.on("close", (code) => {
|
|
200
|
-
if (settled) return;
|
|
201
|
-
settled = true;
|
|
202
|
-
cleanup();
|
|
203
|
-
|
|
204
|
-
if (code !== 0 && !stdout.trim()) {
|
|
205
|
-
reject(new Error(`agent-browser failed (exit ${code}):\n${stderr || "unknown error"}`));
|
|
206
|
-
return;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
try {
|
|
210
|
-
const results = JSON.parse(stdout) as AgentBrowserBatchItem[];
|
|
211
|
-
resolve(results);
|
|
212
|
-
} catch (err: any) {
|
|
213
|
-
reject(new Error(
|
|
214
|
-
`Failed to parse agent-browser output: ${err.message}\nstdout: ${stdout}\nstderr: ${stderr}`
|
|
215
|
-
));
|
|
216
|
-
}
|
|
217
|
-
});
|
|
218
|
-
|
|
219
|
-
proc.on("error", (err: any) => {
|
|
220
|
-
if (err.code === "ENOENT") {
|
|
221
|
-
settleReject(new Error(
|
|
222
|
-
"agent-browser is not installed.\n\nInstall it with:\n npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
|
|
223
|
-
));
|
|
224
|
-
} else {
|
|
225
|
-
settleReject(err);
|
|
226
|
-
}
|
|
227
|
-
});
|
|
228
|
-
|
|
229
|
-
if (options.signal) {
|
|
230
|
-
if (options.signal.aborted) kill();
|
|
231
|
-
else options.signal.addEventListener("abort", kill, { once: true });
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
proc.stdin.write(JSON.stringify(commands));
|
|
235
|
-
proc.stdin.end();
|
|
236
|
-
});
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
|
|
240
|
-
return new Promise((resolve) => {
|
|
241
|
-
const proc = spawn("agent-browser", ["--session", session, "close"], {
|
|
242
|
-
shell: false,
|
|
243
|
-
stdio: ["ignore", "ignore", "ignore"],
|
|
244
|
-
});
|
|
245
|
-
const done = () => resolve();
|
|
246
|
-
proc.on("close", done);
|
|
247
|
-
proc.on("error", done);
|
|
248
|
-
if (signal) {
|
|
249
|
-
const kill = () => proc.kill("SIGTERM");
|
|
250
|
-
if (signal.aborted) kill();
|
|
251
|
-
else signal.addEventListener("abort", kill, { once: true });
|
|
252
|
-
}
|
|
253
|
-
});
|
|
254
|
-
}
|
|
24
|
+
import {
|
|
25
|
+
type BrowseAction,
|
|
26
|
+
buildBatchCommands,
|
|
27
|
+
runAgentBrowserBatch,
|
|
28
|
+
closeAgentBrowserSession,
|
|
29
|
+
} from "./utils/agent-browser";
|
|
30
|
+
import { writeWithFallback } from "./utils/output-sink";
|
|
31
|
+
import { abbreviateUrl, getErrorText, normalizeWhitespace } from "./utils/render-helpers";
|
|
255
32
|
|
|
256
33
|
export const WebBrowseActionSchema = Type.Object({
|
|
257
34
|
type: StringEnum(["click", "fill", "type", "press", "wait", "wait_selector", "scroll"] as const),
|
|
@@ -277,6 +54,34 @@ export const WebBrowseParamsSchema = Type.Object({
|
|
|
277
54
|
|
|
278
55
|
export type WebBrowseInput = Static<typeof WebBrowseParamsSchema>;
|
|
279
56
|
|
|
57
|
+
function formatBrowseStep(action: BrowseAction): string {
|
|
58
|
+
switch (action.type) {
|
|
59
|
+
case "click":
|
|
60
|
+
return `click ${action.selector ?? ""}`;
|
|
61
|
+
case "fill":
|
|
62
|
+
return `fill ${action.selector ?? ""} "${action.value ?? ""}"`;
|
|
63
|
+
case "type":
|
|
64
|
+
return `type ${action.selector ?? ""} "${action.value ?? ""}"`;
|
|
65
|
+
case "press":
|
|
66
|
+
return action.selector
|
|
67
|
+
? `focus ${action.selector} + press ${action.key ?? ""}`
|
|
68
|
+
: `press ${action.key ?? ""}`;
|
|
69
|
+
case "wait":
|
|
70
|
+
return action.selector
|
|
71
|
+
? `wait for ${action.selector}`
|
|
72
|
+
: `wait ${action.ms ?? 0}ms`;
|
|
73
|
+
case "wait_selector":
|
|
74
|
+
return `wait for ${action.selector ?? ""} (${action.state ?? "visible"})`;
|
|
75
|
+
case "scroll": {
|
|
76
|
+
const dir = action.direction ?? "down";
|
|
77
|
+
if (dir === "top" || dir === "bottom") return `scroll to ${dir}`;
|
|
78
|
+
return `scroll ${dir}${action.amount ? ` ${action.amount}px` : ""}`;
|
|
79
|
+
}
|
|
80
|
+
default:
|
|
81
|
+
return String((action as any).type);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
280
85
|
const webBrowseTool = defineTool({
|
|
281
86
|
name: "web_browse",
|
|
282
87
|
label: "Web Browse",
|
|
@@ -303,10 +108,22 @@ const webBrowseTool = defineTool({
|
|
|
303
108
|
async execute(toolCallId, params, signal, onUpdate) {
|
|
304
109
|
let fullOutputPath: string | undefined;
|
|
305
110
|
const session = `pi-web-browse-${toolCallId}`;
|
|
111
|
+
const actionCount = params.actions.length;
|
|
112
|
+
const steps = [
|
|
113
|
+
`open ${params.url}`,
|
|
114
|
+
...(params.actions as BrowseAction[]).map(formatBrowseStep),
|
|
115
|
+
params.selector ? `get text ${params.selector}` : "snapshot",
|
|
116
|
+
"get title",
|
|
117
|
+
"get url",
|
|
118
|
+
];
|
|
119
|
+
|
|
120
|
+
// Stream planned steps for isPartial rendering
|
|
121
|
+
onUpdate?.({
|
|
122
|
+
content: [{ type: "text", text: `Browsing ${params.url} (${actionCount} actions)...` }],
|
|
123
|
+
details: { url: params.url, steps, actionCount, selector: params.selector, headless: params.headless ?? true },
|
|
124
|
+
});
|
|
306
125
|
|
|
307
126
|
try {
|
|
308
|
-
onUpdate?.({ content: [{ type: "text", text: `Browsing ${params.url}...` }], details: {} });
|
|
309
|
-
|
|
310
127
|
const commands = buildBatchCommands(
|
|
311
128
|
params.url,
|
|
312
129
|
params.actions as BrowseAction[],
|
|
@@ -347,6 +164,7 @@ const webBrowseTool = defineTool({
|
|
|
347
164
|
|
|
348
165
|
const title = titleResult?.result?.title ?? "";
|
|
349
166
|
const finalUrl = urlResult?.result?.url ?? params.url;
|
|
167
|
+
const preview = content.replace(/\s+/g, " ").trim().slice(0, 500);
|
|
350
168
|
|
|
351
169
|
const lines: string[] = [
|
|
352
170
|
`Title: ${title || "(no title)"}`,
|
|
@@ -358,24 +176,23 @@ const webBrowseTool = defineTool({
|
|
|
358
176
|
];
|
|
359
177
|
|
|
360
178
|
const rawText = lines.join("\n");
|
|
361
|
-
const
|
|
362
|
-
|
|
363
|
-
maxBytes: DEFAULT_MAX_BYTES,
|
|
179
|
+
const sink = await writeWithFallback(rawText, {
|
|
180
|
+
tmpPrefix: "pi-web-browse-",
|
|
364
181
|
});
|
|
365
|
-
|
|
366
|
-
let finalText = truncation.content;
|
|
367
|
-
if (truncation.truncated) {
|
|
368
|
-
const fullOutputDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-web-browse-"));
|
|
369
|
-
fullOutputPath = path.join(fullOutputDir, "output.txt");
|
|
370
|
-
await fs.promises.writeFile(fullOutputPath, rawText, "utf-8");
|
|
371
|
-
finalText += `\n\n[Output truncated: ${truncation.outputLines} of ${truncation.totalLines} lines (${formatSize(truncation.outputBytes)} of ${formatSize(truncation.totalBytes)}). Full output saved to: ${fullOutputPath}]`;
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
onUpdate?.({ content: [{ type: "text", text: `Extracted from ${finalUrl}` }], details: {} });
|
|
182
|
+
fullOutputPath = sink.fullOutputPath;
|
|
375
183
|
|
|
376
184
|
return {
|
|
377
|
-
content: [{ type: "text", text:
|
|
378
|
-
details: {
|
|
185
|
+
content: [{ type: "text", text: sink.text }],
|
|
186
|
+
details: {
|
|
187
|
+
title,
|
|
188
|
+
url: finalUrl,
|
|
189
|
+
fullOutputPath,
|
|
190
|
+
preview,
|
|
191
|
+
selector: params.selector,
|
|
192
|
+
headless: params.headless ?? true,
|
|
193
|
+
actionCount,
|
|
194
|
+
steps,
|
|
195
|
+
},
|
|
379
196
|
};
|
|
380
197
|
} catch (err: any) {
|
|
381
198
|
throw new Error(`Error browsing ${params.url}: ${err.message ?? err}`);
|
|
@@ -388,24 +205,109 @@ const webBrowseTool = defineTool({
|
|
|
388
205
|
let text = theme.fg("toolTitle", theme.bold("web_browse "));
|
|
389
206
|
text += theme.fg("muted", args.url);
|
|
390
207
|
text += theme.fg("dim", ` (${args.actions?.length ?? 0} actions)`);
|
|
208
|
+
if (args.selector) {
|
|
209
|
+
text += theme.fg("dim", ` [selector=${args.selector}]`);
|
|
210
|
+
}
|
|
211
|
+
if (args.headless === false) {
|
|
212
|
+
text += theme.fg("dim", " [headed]");
|
|
213
|
+
}
|
|
391
214
|
return new Text(text, 0, 0);
|
|
392
215
|
},
|
|
393
216
|
|
|
394
|
-
renderResult(result, { expanded, isPartial }, theme) {
|
|
217
|
+
renderResult(result, { expanded, isPartial }, theme, context) {
|
|
218
|
+
const isError = context?.isError ?? false;
|
|
219
|
+
|
|
395
220
|
if (isPartial) {
|
|
396
|
-
|
|
221
|
+
const progress = (result.details as any);
|
|
222
|
+
const steps = progress?.steps as string[] | undefined;
|
|
223
|
+
const url = progress?.url as string | undefined;
|
|
224
|
+
const actionCount = progress?.actionCount ?? steps?.length ?? 0;
|
|
225
|
+
let text = theme.fg("warning", "Browsing");
|
|
226
|
+
if (url) {
|
|
227
|
+
text += ` ${theme.fg("dim", abbreviateUrl(url))}`;
|
|
228
|
+
}
|
|
229
|
+
text += theme.fg("dim", ` (${actionCount} steps)`);
|
|
230
|
+
if (steps && steps.length > 0) {
|
|
231
|
+
// Limit to first 5 steps to avoid blowing up vertical space
|
|
232
|
+
const maxPreviewSteps = 5;
|
|
233
|
+
for (let i = 0; i < Math.min(steps.length, maxPreviewSteps); i++) {
|
|
234
|
+
text += `\n ${theme.fg("dim", `[${i + 1}] ${steps[i]}`)}`;
|
|
235
|
+
}
|
|
236
|
+
if (steps.length > maxPreviewSteps) {
|
|
237
|
+
text += `\n ${theme.fg("muted", `... and ${steps.length - maxPreviewSteps} more`)}`;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return new Text(text, 0, 0);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const details = result.details as {
|
|
244
|
+
title?: string;
|
|
245
|
+
url?: string;
|
|
246
|
+
fullOutputPath?: string;
|
|
247
|
+
preview?: string;
|
|
248
|
+
selector?: string;
|
|
249
|
+
headless?: boolean;
|
|
250
|
+
actionCount?: number;
|
|
251
|
+
steps?: string[];
|
|
252
|
+
} | undefined;
|
|
253
|
+
|
|
254
|
+
if (isError) {
|
|
255
|
+
const errText = getErrorText(result);
|
|
256
|
+
let text = theme.fg("error", "✗ Browse failed");
|
|
257
|
+
if (details?.url) text += ` ${theme.fg("dim", abbreviateUrl(details.url))}`;
|
|
258
|
+
text += `\n\n ${theme.fg("toolOutput", errText)}`;
|
|
259
|
+
if (details?.steps && details.steps.length > 0) {
|
|
260
|
+
text += `\n\n${theme.fg("dim", "Steps attempted:")}`;
|
|
261
|
+
for (let i = 0; i < details.steps.length; i++) {
|
|
262
|
+
text += `\n ${theme.fg("dim", `[${i + 1}] ${details.steps[i]}`)}`;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return new Text(text, 0, 0);
|
|
397
266
|
}
|
|
398
|
-
|
|
267
|
+
|
|
399
268
|
let text = theme.fg("success", "✓ Browsed");
|
|
400
269
|
if (details?.title) {
|
|
401
|
-
text += theme.fg("
|
|
270
|
+
text += ` ${theme.fg("toolTitle", details.title)}`;
|
|
402
271
|
}
|
|
403
|
-
if (
|
|
404
|
-
text += `\n${theme.fg("dim", details.url)}`;
|
|
272
|
+
if (details?.url) {
|
|
273
|
+
text += `\n ${theme.fg("dim", abbreviateUrl(details.url))}`;
|
|
274
|
+
}
|
|
275
|
+
if (details?.actionCount) {
|
|
276
|
+
text += theme.fg("muted", ` (${details.actionCount} actions)`);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (details?.selector) {
|
|
280
|
+
text += `\n ${theme.fg("dim", `[selector=${details.selector}]`)}`;
|
|
405
281
|
}
|
|
406
|
-
if (
|
|
407
|
-
text +=
|
|
282
|
+
if (details?.headless === false) {
|
|
283
|
+
text += `${details?.selector ? "" : "\n "}${theme.fg("dim", "[headed]")}`;
|
|
408
284
|
}
|
|
285
|
+
|
|
286
|
+
if (!expanded && details?.preview) {
|
|
287
|
+
const snippet = normalizeWhitespace(details.preview);
|
|
288
|
+
const short = snippet.length > 160
|
|
289
|
+
? snippet.slice(0, 160).replace(/\s+\S*$/, "") + "..."
|
|
290
|
+
: snippet;
|
|
291
|
+
text += `\n\n ${theme.fg("muted", short)}`;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if (expanded) {
|
|
295
|
+
if (details?.steps && details.steps.length > 0) {
|
|
296
|
+
text += `\n\n${theme.fg("dim", "Steps:")}`;
|
|
297
|
+
for (let i = 0; i < details.steps.length; i++) {
|
|
298
|
+
text += `\n ${theme.fg("dim", `[${i + 1}] ${details.steps[i]}`)}`;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (details?.preview) {
|
|
303
|
+
text += `\n\n ${theme.fg("muted", normalizeWhitespace(details.preview))}`;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if (details?.fullOutputPath) {
|
|
307
|
+
text += `\n\n${theme.fg("accent", `Full output: ${details.fullOutputPath}`)}`;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
409
311
|
return new Text(text, 0, 0);
|
|
410
312
|
},
|
|
411
313
|
});
|
package/extensions/web_fetch.ts
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
import {
|
|
14
14
|
defineTool,
|
|
15
15
|
type ExtensionAPI,
|
|
16
|
-
truncateHead,
|
|
17
16
|
formatSize,
|
|
18
17
|
DEFAULT_MAX_BYTES,
|
|
19
18
|
DEFAULT_MAX_LINES,
|
|
@@ -23,7 +22,10 @@ import { Type, type Static } from "typebox";
|
|
|
23
22
|
import * as fs from "node:fs";
|
|
24
23
|
import * as os from "node:os";
|
|
25
24
|
import * as path from "node:path";
|
|
26
|
-
import {
|
|
25
|
+
import { runScraplingWithFallback } from "./utils/scrapling";
|
|
26
|
+
import { extractPreview } from "./utils/content-preview";
|
|
27
|
+
import { writeWithFallback } from "./utils/output-sink";
|
|
28
|
+
import { abbreviateUrl, getDomain, getErrorText, normalizeWhitespace, formatExtraction } from "./utils/render-helpers";
|
|
27
29
|
|
|
28
30
|
export const WebFetchParamsSchema = Type.Object({
|
|
29
31
|
url: Type.String({ description: "Full URL to fetch (e.g. https://example.com/article)" }),
|
|
@@ -45,9 +47,11 @@ const webFetchTool = defineTool({
|
|
|
45
47
|
].join(" "),
|
|
46
48
|
promptSnippet: "Fetch full page content from a URL as markdown",
|
|
47
49
|
promptGuidelines: [
|
|
48
|
-
"Use web_fetch
|
|
49
|
-
"
|
|
50
|
+
"Use web_fetch to read a single static page (article, doc, or blog) when given a specific URL.",
|
|
51
|
+
"For a single URL, always use web_fetch instead of web_batch_fetch.",
|
|
50
52
|
"If the page is dynamic/JavaScript-heavy, the tool automatically uses browser automation.",
|
|
53
|
+
"When reading multiple (2–5) pages at once (e.g., after web_search), prefer web_batch_fetch over repeated web_fetch calls.",
|
|
54
|
+
"Always pass the full URL including https://.",
|
|
51
55
|
],
|
|
52
56
|
parameters: WebFetchParamsSchema,
|
|
53
57
|
|
|
@@ -57,46 +61,37 @@ const webFetchTool = defineTool({
|
|
|
57
61
|
let tmpFull: string | undefined;
|
|
58
62
|
|
|
59
63
|
try {
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
64
|
+
const { ok, stderr } = await runScraplingWithFallback(
|
|
65
|
+
params.url,
|
|
66
|
+
tmpFile,
|
|
67
|
+
{ selector: params.selector, stealthy: params.stealthy, noGetFallback: params.stealthy },
|
|
68
|
+
signal,
|
|
69
|
+
);
|
|
65
70
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
if (exitCode !== 0) {
|
|
69
|
-
// Try fallback to simple HTTP GET if fetch/stealthy-fetch failed
|
|
70
|
-
if (!params.stealthy) {
|
|
71
|
-
const fallback = await runScrapling(["extract", "get", params.url, tmpFile, "--ai-targeted"], signal);
|
|
72
|
-
if (fallback.exitCode !== 0) {
|
|
73
|
-
throw new Error(`Failed to fetch ${params.url}\n\nscrapling error:\n${stderr || fallback.stderr}`);
|
|
74
|
-
}
|
|
75
|
-
} else {
|
|
76
|
-
throw new Error(`Failed to fetch ${params.url}\n\nscrapling error:\n${stderr}`);
|
|
77
|
-
}
|
|
71
|
+
if (!ok) {
|
|
72
|
+
throw new Error(`Failed to fetch ${params.url}\n\nscrapling error:\n${stderr}`);
|
|
78
73
|
}
|
|
79
74
|
|
|
80
75
|
const content = await fs.promises.readFile(tmpFile, "utf-8");
|
|
81
76
|
const stats = await fs.promises.stat(tmpFile);
|
|
82
77
|
|
|
78
|
+
const preview = extractPreview(content, 500);
|
|
83
79
|
const rawText = `Fetched: ${params.url}\nSize: ${stats.size} bytes\n\n---\n\n${content}`;
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
maxBytes: DEFAULT_MAX_BYTES,
|
|
80
|
+
const sink = await writeWithFallback(rawText, {
|
|
81
|
+
tmpPrefix: "pi-web-fetch-full-",
|
|
87
82
|
});
|
|
88
|
-
|
|
89
|
-
let finalText = truncation.content;
|
|
90
|
-
if (truncation.truncated) {
|
|
91
|
-
const tmpFullDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-web-fetch-full-"));
|
|
92
|
-
tmpFull = path.join(tmpFullDir, "output.txt");
|
|
93
|
-
await fs.promises.writeFile(tmpFull, rawText, "utf-8");
|
|
94
|
-
finalText += `\n\n[Output truncated: ${truncation.outputLines} of ${truncation.totalLines} lines (${formatSize(truncation.outputBytes)} of ${formatSize(truncation.totalBytes)}). Full output saved to: ${tmpFull}]`;
|
|
95
|
-
}
|
|
83
|
+
tmpFull = sink.fullOutputPath;
|
|
96
84
|
|
|
97
85
|
return {
|
|
98
|
-
content: [{ type: "text", text:
|
|
99
|
-
details: {
|
|
86
|
+
content: [{ type: "text", text: sink.text }],
|
|
87
|
+
details: {
|
|
88
|
+
url: params.url,
|
|
89
|
+
bytes: stats.size,
|
|
90
|
+
fullOutputPath: tmpFull,
|
|
91
|
+
preview,
|
|
92
|
+
selector: params.selector,
|
|
93
|
+
stealthy: params.stealthy,
|
|
94
|
+
},
|
|
100
95
|
};
|
|
101
96
|
} catch (err: any) {
|
|
102
97
|
throw new Error(`Error fetching ${params.url}: ${err.message ?? err}`);
|
|
@@ -108,27 +103,73 @@ const webFetchTool = defineTool({
|
|
|
108
103
|
renderCall(args, theme) {
|
|
109
104
|
let text = theme.fg("toolTitle", theme.bold("web_fetch "));
|
|
110
105
|
text += theme.fg("muted", args.url);
|
|
106
|
+
if (args.stealthy) {
|
|
107
|
+
text += theme.fg("dim", " [stealthy]");
|
|
108
|
+
}
|
|
111
109
|
if (args.selector) {
|
|
112
|
-
text += theme.fg("dim", ` selector=${args.selector}`);
|
|
110
|
+
text += theme.fg("dim", ` [selector=${args.selector}]`);
|
|
113
111
|
}
|
|
114
112
|
return new Text(text, 0, 0);
|
|
115
113
|
},
|
|
116
114
|
|
|
117
|
-
renderResult(result, { expanded, isPartial }, theme) {
|
|
115
|
+
renderResult(result, { expanded, isPartial }, theme, context) {
|
|
116
|
+
const isError = context?.isError ?? false;
|
|
117
|
+
|
|
118
118
|
if (isPartial) {
|
|
119
|
-
|
|
119
|
+
const url = (result.details as any)?.url as string | undefined;
|
|
120
|
+
const domain = url ? getDomain(url) : "";
|
|
121
|
+
const label = domain ? `Fetching ${domain}...` : "Fetching...";
|
|
122
|
+
return new Text(theme.fg("warning", label), 0, 0);
|
|
120
123
|
}
|
|
121
|
-
const details = result.details as {
|
|
124
|
+
const details = result.details as {
|
|
125
|
+
url?: string;
|
|
126
|
+
bytes?: number;
|
|
127
|
+
fullOutputPath?: string;
|
|
128
|
+
preview?: string;
|
|
129
|
+
selector?: string;
|
|
130
|
+
stealthy?: boolean;
|
|
131
|
+
} | undefined;
|
|
132
|
+
|
|
133
|
+
if (isError) {
|
|
134
|
+
const errText = getErrorText(result);
|
|
135
|
+
let text = theme.fg("error", "✗ Fetch failed");
|
|
136
|
+
if (details?.url) text += ` ${theme.fg("dim", abbreviateUrl(details.url))}`;
|
|
137
|
+
text += `\n\n ${theme.fg("toolOutput", errText)}`;
|
|
138
|
+
return new Text(text, 0, 0);
|
|
139
|
+
}
|
|
140
|
+
|
|
122
141
|
let text = theme.fg("success", "✓ Fetched");
|
|
123
|
-
if (details?.
|
|
124
|
-
text += theme.fg("
|
|
142
|
+
if (details?.url) {
|
|
143
|
+
text += ` ${theme.fg("dim", abbreviateUrl(details.url))}`;
|
|
125
144
|
}
|
|
145
|
+
if (details?.bytes && details?.preview) {
|
|
146
|
+
text += ` ${theme.fg("muted", formatExtraction(details.bytes, details.preview.length))}`;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (details?.selector) {
|
|
150
|
+
text += `\n ${theme.fg("dim", `[selector=${details.selector}]`)}`;
|
|
151
|
+
}
|
|
152
|
+
if (details?.stealthy) {
|
|
153
|
+
text += `${details?.selector ? "" : "\n "}${theme.fg("dim", "[stealthy]")}`;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (!expanded && details?.preview) {
|
|
157
|
+
const snippet = normalizeWhitespace(details.preview);
|
|
158
|
+
const short = snippet.length > 160
|
|
159
|
+
? snippet.slice(0, 160).replace(/\s+\S*$/, "") + "..."
|
|
160
|
+
: snippet;
|
|
161
|
+
text += `\n\n ${theme.fg("muted", short)}`;
|
|
162
|
+
}
|
|
163
|
+
|
|
126
164
|
if (expanded) {
|
|
127
|
-
|
|
165
|
+
if (details?.preview) {
|
|
166
|
+
text += `\n\n ${theme.fg("muted", normalizeWhitespace(details.preview))}`;
|
|
167
|
+
}
|
|
128
168
|
if (details?.fullOutputPath) {
|
|
129
|
-
text += `\n${theme.fg("
|
|
169
|
+
text += `\n\n${theme.fg("accent", `Full output: ${details.fullOutputPath}`)}`;
|
|
130
170
|
}
|
|
131
171
|
}
|
|
172
|
+
|
|
132
173
|
return new Text(text, 0, 0);
|
|
133
174
|
},
|
|
134
175
|
});
|