@mammothb/pi-webfetch 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -1,35 +1,6 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
- import { Type } from "typebox";
2
+ import { createWebfetchTool } from "./src/webfetch.js";
3
3
 
4
4
  export default function (pi: ExtensionAPI) {
5
- pi.registerTool({
6
- name: "webfetch",
7
- label: "Web Fetch",
8
- description:
9
- "Fetches content from a specified URL and converts it to the requested format (markdown by default).",
10
- parameters: Type.Object({
11
- url: Type.String({ description: "The URL to fetch content from" }),
12
- format: Type.Optional(
13
- Type.String({
14
- description:
15
- "The format to return the content in - text, markdown, or html (default: 'markdown')",
16
- }),
17
- ),
18
- }),
19
- async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
20
- const url = params.url;
21
- const format = params.format ?? "markdown";
22
-
23
- // TODO: implement actual fetch + conversion
24
- return {
25
- content: [
26
- {
27
- type: "text",
28
- text: `Fetched ${url} as ${format} (placeholder).`,
29
- },
30
- ],
31
- details: { url, format },
32
- };
33
- },
34
- });
5
+ pi.registerTool(createWebfetchTool());
35
6
  }
package/package.json CHANGED
@@ -1,19 +1,38 @@
1
1
  {
2
2
  "name": "@mammothb/pi-webfetch",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "A pi extension that adds a webfetch tool for fetching and converting web content",
5
5
  "keywords": [
6
6
  "pi-package"
7
7
  ],
8
8
  "license": "MIT",
9
+ "files": [
10
+ "index.ts",
11
+ "src"
12
+ ],
9
13
  "pi": {
10
14
  "extensions": [
11
15
  "./index.ts"
12
16
  ]
13
17
  },
18
+ "dependencies": {
19
+ "htmlparser2": "12.0.0",
20
+ "turndown": "7.2.4"
21
+ },
22
+ "scripts": {
23
+ "test": "vitest run",
24
+ "test:coverage": "vitest run --coverage"
25
+ },
26
+ "devDependencies": {
27
+ "@types/turndown": "5.0.6",
28
+ "@vitest/coverage-v8": "4.1.7",
29
+ "typescript": "6.0.3",
30
+ "vitest": "4.1.7"
31
+ },
14
32
  "peerDependencies": {
15
- "@earendil-works/pi-coding-agent": "*",
16
33
  "@earendil-works/pi-ai": "*",
34
+ "@earendil-works/pi-coding-agent": "*",
35
+ "@earendil-works/pi-tui": "*",
17
36
  "typebox": "*"
18
37
  }
19
38
  }
@@ -0,0 +1,28 @@
1
+ import type { Format, Header } from "./types";
2
+
3
+ export function buildHeaders(format: Format): Header {
4
+ let acceptHeader = "*/*";
5
+ switch (format) {
6
+ case "markdown":
7
+ acceptHeader =
8
+ "text/markdown;q=1.0, text/x-markdown;q=0.9, text/plain;q=0.8, text/html;q=0.7, */*;q=0.1";
9
+ break;
10
+ case "text":
11
+ acceptHeader =
12
+ "text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1";
13
+ break;
14
+ case "html":
15
+ acceptHeader =
16
+ "text/html;q=1.0, application/xhtml+xml;q=0.9, text/plain;q=0.8, text/markdown;q=0.7, */*;q=0.1";
17
+ break;
18
+ default:
19
+ acceptHeader =
20
+ "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8";
21
+ }
22
+ return {
23
+ "User-Agent":
24
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
25
+ Accept: acceptHeader,
26
+ "Accept-Language": "en-US,en;q=0.9",
27
+ };
28
+ }
@@ -0,0 +1,57 @@
1
+ import { Parser } from "htmlparser2";
2
+ import TurndownService from "turndown";
3
+
4
+ export function toMarkdown(contentType: string, html: string): string {
5
+ if (!contentType.includes("text/html")) {
6
+ return html;
7
+ }
8
+ const turndownService = new TurndownService({
9
+ headingStyle: "atx",
10
+ hr: "---",
11
+ bulletListMarker: "-",
12
+ codeBlockStyle: "fenced",
13
+ emDelimiter: "*",
14
+ });
15
+ turndownService.remove(["link", "meta", "script", "style"]);
16
+ return turndownService.turndown(html);
17
+ }
18
+
19
+ export function toText(contentType: string, html: string): string {
20
+ if (!contentType.includes("text/html")) {
21
+ return html;
22
+ }
23
+
24
+ const tagsToSkip = [
25
+ "script",
26
+ "style",
27
+ "noscript",
28
+ "iframe",
29
+ "object",
30
+ "embed",
31
+ ];
32
+ let text = "";
33
+ let skipDepth = 0;
34
+
35
+ const parser = new Parser({
36
+ onopentag(name, _attribs, _isImplied) {
37
+ if (skipDepth > 0 || tagsToSkip.includes(name)) {
38
+ skipDepth++;
39
+ }
40
+ },
41
+ ontext(data) {
42
+ if (skipDepth === 0) {
43
+ text += data;
44
+ }
45
+ },
46
+ onclosetag(_name, _isImplied) {
47
+ if (skipDepth > 0) {
48
+ skipDepth--;
49
+ }
50
+ },
51
+ });
52
+
53
+ parser.write(html);
54
+ parser.end();
55
+
56
+ return text.trim();
57
+ }
@@ -0,0 +1,13 @@
1
+ import { StringEnum } from "@earendil-works/pi-ai";
2
+ import type { Static } from "typebox";
3
+
4
+ export const FormatSchema = StringEnum(["text", "markdown", "html"] as const, {
5
+ description:
6
+ "The format to return the content in - text, markdown, or html (default: 'markdown')",
7
+ });
8
+ export type Format = Static<typeof FormatSchema>;
9
+
10
+ export type Header = Record<
11
+ "User-Agent" | "Accept" | "Accept-Language",
12
+ string
13
+ >;
@@ -0,0 +1,389 @@
1
+ import type { ImageContent, TextContent } from "@earendil-works/pi-ai";
2
+ import {
3
+ getMarkdownTheme,
4
+ keyText,
5
+ type Theme,
6
+ type ToolDefinition,
7
+ } from "@earendil-works/pi-coding-agent";
8
+ import { Container, Markdown, Spacer, Text } from "@earendil-works/pi-tui";
9
+ import Type from "typebox";
10
+ import { buildHeaders } from "./lib/headers.js";
11
+ import { toMarkdown, toText } from "./lib/processors.js";
12
+ import { FormatSchema, type Header } from "./lib/types.js";
13
+
14
+ const COLLAPSED_PREVIEW_LINES = 7;
15
+ const DEFAULT_TIMEOUT = 30; // 30 seconds
16
+ const MAX_TIMEOUT = 120; // 2 minutes
17
+ const MAX_RESPONSE_SIZE = 5 * 1024 * 1024; // 5 MB
18
+ const USER_AGENT = "opencode";
19
+
20
+ interface WebfetchDetails {
21
+ url: string;
22
+ contentType: string;
23
+ format: string;
24
+ displayTitle: string;
25
+ size?: number;
26
+ isImage?: boolean;
27
+ imageDataUrl?: string;
28
+ error?: boolean;
29
+ errorSummary?: string;
30
+ }
31
+
32
+ const Parameters = Type.Object({
33
+ url: Type.String({
34
+ description: "The URL to fetch content from",
35
+ pattern: "^https?://.*",
36
+ }),
37
+ format: Type.Optional(FormatSchema),
38
+ timeout: Type.Optional(
39
+ Type.Number({
40
+ description: "Optional timeout in seconds (max 120)",
41
+ exclusiveMinimum: 0,
42
+ maximum: 120,
43
+ }),
44
+ ),
45
+ });
46
+
47
+ async function fetchWithRetry(
48
+ url: string,
49
+ headers: Header,
50
+ signal: AbortSignal | undefined,
51
+ timeoutMs: number,
52
+ ): Promise<{ body: ArrayBuffer; contentType: string }> {
53
+ // Set up timeout
54
+ const controller = new AbortController();
55
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
56
+ // Forward external signal
57
+ const onAbort = () => controller.abort();
58
+ if (signal) {
59
+ if (signal.aborted) {
60
+ throw new Error("Request aborted");
61
+ }
62
+ signal.addEventListener("abort", onAbort, { once: true });
63
+ }
64
+
65
+ try {
66
+ const doFetch = async (userAgent: string) => {
67
+ return await fetch(url, {
68
+ method: "GET",
69
+ headers: { ...headers, "User-Agent": userAgent },
70
+ signal: controller.signal,
71
+ redirect: "follow",
72
+ });
73
+ };
74
+
75
+ let response = await doFetch(headers["User-Agent"]);
76
+ // Retry with honest UA if blocked by Cloudflare bot detection
77
+ if (isBlockedByCloudflare(response)) {
78
+ response = await doFetch(USER_AGENT);
79
+ }
80
+
81
+ if (!response.ok) {
82
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
83
+ }
84
+
85
+ const contentLength = response.headers.get("content-length");
86
+ if (contentLength && parseInt(contentLength, 10) > MAX_RESPONSE_SIZE) {
87
+ throw new Error(
88
+ `Response too large (exceeds ${formatSize(MAX_RESPONSE_SIZE)} limit)`,
89
+ );
90
+ }
91
+
92
+ const arrayBuffer = await response.arrayBuffer();
93
+ if (arrayBuffer.byteLength > MAX_RESPONSE_SIZE) {
94
+ throw new Error(
95
+ `Response too large (exceeds ${formatSize(MAX_RESPONSE_SIZE)} limit)`,
96
+ );
97
+ }
98
+
99
+ return {
100
+ body: arrayBuffer,
101
+ contentType: response.headers.get("content-type") || "text/html",
102
+ };
103
+ } catch (error) {
104
+ if (controller.signal.aborted && !signal?.aborted) {
105
+ throw new Error("Request timed out");
106
+ }
107
+ throw error;
108
+ } finally {
109
+ clearTimeout(timeoutId);
110
+ if (signal) {
111
+ signal.removeEventListener("abort", onAbort);
112
+ }
113
+ }
114
+ }
115
+
116
+ function formatTitle(details: WebfetchDetails): string {
117
+ return details.displayTitle ?? details.url ?? "Unknown URL";
118
+ }
119
+
120
+ function formatSize(bytes: number | undefined): string {
121
+ if (bytes === undefined) {
122
+ return "unknown size";
123
+ }
124
+ if (bytes < 1024) return `${bytes} B`;
125
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
126
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
127
+ }
128
+
129
+ function isBlockedByCloudflare(response: Response): boolean {
130
+ return (
131
+ response.status === 403 &&
132
+ response.headers.get("cf-mitigated") === "challenge"
133
+ );
134
+ }
135
+
136
+ function isImageAttachment(mime: string): boolean {
137
+ return (
138
+ mime.startsWith("image/") &&
139
+ mime !== "image/svg+xml" &&
140
+ mime !== "image/vnd.fastbidsheet"
141
+ );
142
+ }
143
+
144
+ function isTextContent(c: TextContent | ImageContent): c is TextContent {
145
+ return c.type === "text";
146
+ }
147
+
148
+ function renderWebfetchResult(
149
+ details: WebfetchDetails,
150
+ textContent: string,
151
+ expanded: boolean,
152
+ theme: Theme,
153
+ ): Container {
154
+ const container = new Container();
155
+
156
+ const title = formatTitle(details);
157
+ const format = details.format ? ` [${details.format}]` : "";
158
+ container.addChild(
159
+ new Text(
160
+ theme.fg("syntaxKeyword", "url: ") +
161
+ theme.fg("syntaxString", title + format),
162
+ ),
163
+ );
164
+
165
+ if (details.size !== undefined) {
166
+ container.addChild(
167
+ new Text(
168
+ theme.fg("syntaxKeyword", "size: ") +
169
+ theme.fg("syntaxString", formatSize(details.size)),
170
+ ),
171
+ );
172
+ }
173
+
174
+ if (!textContent) {
175
+ return container;
176
+ }
177
+
178
+ container.addChild(new Spacer(1));
179
+
180
+ if (expanded) {
181
+ const format = details.format ?? "markdown";
182
+ if (format === "markdown") {
183
+ container.addChild(new Markdown(textContent, 0, 0, getMarkdownTheme()));
184
+ } else {
185
+ const highlighted = `\`\`\`${format}\n${textContent}\n\`\`\``;
186
+ container.addChild(new Markdown(highlighted, 0, 0, getMarkdownTheme()));
187
+ }
188
+ } else {
189
+ const lines = textContent
190
+ .split("\n")
191
+ .filter(
192
+ (line, index, arr) =>
193
+ line.length > 0 || index === 0 || index < arr.length - 1,
194
+ );
195
+ const previewLines = lines.slice(0, COLLAPSED_PREVIEW_LINES);
196
+ const remaining = Math.max(0, lines.length - previewLines.length);
197
+
198
+ const preview = previewLines.join("\n");
199
+ const format = details.format ?? "markdown";
200
+ if (format === "markdown") {
201
+ container.addChild(new Markdown(preview, 0, 0, getMarkdownTheme()));
202
+ } else {
203
+ container.addChild(new Text(preview));
204
+ }
205
+
206
+ if (remaining > 0) {
207
+ container.addChild(new Spacer(1));
208
+ const expandKey = keyText("app.tools.expand") || "Ctrl+O";
209
+ container.addChild(
210
+ new Text(
211
+ theme.fg("muted", `... (${remaining} more lines, `) +
212
+ theme.fg("dim", expandKey) +
213
+ theme.fg("muted", " to expand)"),
214
+ ),
215
+ );
216
+ }
217
+ }
218
+ return container;
219
+ }
220
+
221
+ export function createWebfetchTool(): ToolDefinition<
222
+ typeof Parameters,
223
+ WebfetchDetails
224
+ > {
225
+ return {
226
+ name: "webfetch",
227
+ label: "Web Fetch",
228
+ description: `- Fetches content from a specified URL
229
+ - Takes a URL and optional format as input
230
+ - Fetches the URL content, converts to requested format (markdown by default)
231
+ - Returns the content in the specified format
232
+ - Use this tool when you need to retrieve and analyze web content
233
+
234
+ Usage notes:
235
+ - IMPORTANT: if another tool is present that offers better web fetching capabilities, is more targeted to the task, or has fewer restrictions, prefer using that tool instead of this one.
236
+ - The URL must be a fully-formed valid URL
237
+ - HTTP URLs will be automatically upgraded to HTTPS
238
+ - Format options: "markdown" (default), "text", or "html"
239
+ - This tool is read-only and does not modify any files
240
+ - Results may be summarized if the content is very large`,
241
+ promptSnippet: "Fetch content from a URL",
242
+ parameters: Parameters,
243
+ async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
244
+ const url = params.url;
245
+ const format = params.format ?? "markdown";
246
+
247
+ if (signal?.aborted) {
248
+ return {
249
+ content: [{ type: "text", text: "Cancelled" }],
250
+ details: {
251
+ url,
252
+ contentType: "",
253
+ format,
254
+ displayTitle: url,
255
+ error: true,
256
+ errorSummary: "Request cancelled",
257
+ },
258
+ };
259
+ }
260
+
261
+ const timeoutMs =
262
+ Math.min(params.timeout ?? DEFAULT_TIMEOUT, MAX_TIMEOUT) * 1000;
263
+
264
+ const headers = buildHeaders(format);
265
+ const { body, contentType } = await fetchWithRetry(
266
+ url,
267
+ headers,
268
+ signal,
269
+ timeoutMs,
270
+ );
271
+
272
+ const mime = contentType.split(";")[0]?.trim().toLowerCase() || "";
273
+ const displayTitle = `${url} (${contentType})`;
274
+
275
+ // Handle image
276
+ if (isImageAttachment(mime)) {
277
+ const base64Content = Buffer.from(body).toString("base64");
278
+ return {
279
+ content: [
280
+ {
281
+ type: "text",
282
+ text: `Image fetched successfully: ${url}\nMIME type: ${mime}\nSize: ${body.byteLength} bytes`,
283
+ },
284
+ {
285
+ type: "image",
286
+ data: base64Content,
287
+ mimeType: mime,
288
+ },
289
+ ],
290
+ details: {
291
+ url,
292
+ contentType: mime,
293
+ format,
294
+ displayTitle,
295
+ size: body.byteLength,
296
+ isImage: true,
297
+ imageDataUrl: `data:${mime};base64,${base64Content}`,
298
+ },
299
+ };
300
+ }
301
+
302
+ // Handle text
303
+ const text = new TextDecoder().decode(body);
304
+ switch (format) {
305
+ case "markdown": {
306
+ return {
307
+ content: [{ type: "text", text: toMarkdown(contentType, text) }],
308
+ details: {
309
+ url,
310
+ contentType,
311
+ format: "markdown",
312
+ displayTitle,
313
+ size: body.byteLength,
314
+ },
315
+ };
316
+ }
317
+ case "text": {
318
+ return {
319
+ content: [{ type: "text", text: toText(contentType, text) }],
320
+ details: {
321
+ url,
322
+ contentType,
323
+ format: "text",
324
+ displayTitle,
325
+ size: body.byteLength,
326
+ },
327
+ };
328
+ }
329
+ case "html": {
330
+ return {
331
+ content: [{ type: "text", text: text }],
332
+ details: {
333
+ url,
334
+ contentType,
335
+ format: "html",
336
+ displayTitle,
337
+ size: body.byteLength,
338
+ },
339
+ };
340
+ }
341
+ default: {
342
+ return {
343
+ content: [{ type: "text", text: text }],
344
+ details: {
345
+ url,
346
+ contentType,
347
+ format,
348
+ displayTitle,
349
+ size: body.byteLength,
350
+ },
351
+ };
352
+ }
353
+ }
354
+ },
355
+ renderResult(result, options, theme, _context) {
356
+ const details = result.details;
357
+
358
+ if (options.isPartial && !details.url) {
359
+ return new Text(theme.fg("muted", "Fetching..."));
360
+ }
361
+
362
+ if (details.isImage) {
363
+ return new Text(
364
+ theme.fg(
365
+ "muted",
366
+ `Image: ${formatTitle(details)} (${formatSize(details.size)})`,
367
+ ),
368
+ );
369
+ }
370
+
371
+ if (details.error) {
372
+ return new Text(
373
+ theme.fg("error", details.errorSummary ?? "Request failed"),
374
+ );
375
+ }
376
+
377
+ const textContent = result.content
378
+ .filter(isTextContent)
379
+ .map((c) => c.text)
380
+ .join("\n");
381
+ return renderWebfetchResult(
382
+ details,
383
+ textContent,
384
+ options.expanded,
385
+ theme,
386
+ );
387
+ },
388
+ };
389
+ }
package/biome.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "$schema": "https://biomejs.dev/schemas/2.4.14/schema.json",
3
- "formatter": {
4
- "enabled": true,
5
- "indentStyle": "space"
6
- },
7
- "linter": {
8
- "enabled": true,
9
- "rules": {
10
- "recommended": true
11
- }
12
- },
13
- "files": {
14
- "includes": [
15
- "**",
16
- "!dist",
17
- "!coverage",
18
- "!packages/*/dist",
19
- "!packages/*/coverage",
20
- "!node_modules",
21
- "!forks/wreq-js"
22
- ]
23
- },
24
- "javascript": {
25
- "formatter": {
26
- "quoteStyle": "double",
27
- "semicolons": "always"
28
- }
29
- },
30
- "json": {
31
- "formatter": {
32
- "indentStyle": "space"
33
- }
34
- }
35
- }