pi-smart-fetch 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Thinkscape
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # pi-smart-fetch
2
+
3
+ pi.dev extension package for browser-fingerprinted fetching via `wreq-js` plus readable extraction via Defuddle.
4
+
5
+ ## What it registers
6
+
7
+ - `web_fetch`
8
+
9
+ ## Install
10
+
11
+ From npm:
12
+
13
+ ```bash
14
+ pi install npm:pi-smart-fetch
15
+ ```
16
+
17
+ From a local checkout:
18
+
19
+ ```bash
20
+ pi install /absolute/path/to/agent-smart-fetch/packages/pi-smart-fetch
21
+ ```
22
+
23
+ ## Tool parameters
24
+
25
+ Supported request parameters:
26
+ - `url`
27
+ - `browser`
28
+ - `os`
29
+ - `headers`
30
+ - `maxChars`
31
+ - `format`
32
+ - `removeImages`
33
+ - `includeReplies`
34
+ - `proxy`
35
+ - `verbose`
36
+
37
+ By default the response is compact and includes only non-empty URL, title, author, published, and content. Set `verbose: true` to include the full metadata header.
38
+
39
+ ## pi settings
40
+
41
+ Optional custom settings in `~/.pi/agent/settings.json` or `.pi/settings.json`:
42
+
43
+ ```json
44
+ {
45
+ "webFetchVerboseByDefault": false,
46
+ "webFetchDefaultMaxChars": 12000
47
+ }
48
+ ```
49
+
50
+ Project settings override global settings.
@@ -0,0 +1,5 @@
1
+ import { ExtensionAPI } from '@mariozechner/pi-coding-agent';
2
+
3
+ declare function piSmartFetchExtension(pi: ExtensionAPI): void;
4
+
5
+ export { piSmartFetchExtension as default };
package/dist/index.js ADDED
@@ -0,0 +1,322 @@
1
+ import { getAgentDir } from '@mariozechner/pi-coding-agent';
2
+ import { Type } from '@sinclair/typebox';
3
+ import { Defuddle } from 'defuddle/node';
4
+ import { getProfiles, fetch } from 'wreq-js';
5
+ import { parseHTML } from 'linkedom';
6
+ import { readFile } from 'fs/promises';
7
+ import { join } from 'path';
8
+
9
+ // src/index.ts
10
+
11
+ // ../core/src/constants.ts
12
+ var DEFAULT_BROWSER = "chrome_145";
13
+ var DEFAULT_OS = "windows";
14
+ var DEFAULT_MAX_CHARS = 5e4;
15
+ var DEFAULT_TIMEOUT_MS = 15e3;
16
+ var DEFAULT_INCLUDE_REPLIES = "extractors";
17
+ var DEFAULT_ACCEPT_HEADER = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
18
+ var DEFAULT_ACCEPT_LANGUAGE_HEADER = "en-US,en;q=0.9";
19
+ var runtimeDependencies = {
20
+ fetch: fetch,
21
+ defuddle: Defuddle,
22
+ getProfiles
23
+ };
24
+ function parseLinkedomHTML(html, url) {
25
+ const { document } = parseHTML(html);
26
+ const doc = document;
27
+ const defaultView = doc.defaultView;
28
+ if (!doc.styleSheets) {
29
+ doc.styleSheets = [];
30
+ }
31
+ if (defaultView && !defaultView.getComputedStyle) {
32
+ defaultView.getComputedStyle = (() => ({
33
+ display: ""
34
+ }));
35
+ }
36
+ if (url) {
37
+ doc.URL = url;
38
+ }
39
+ return document;
40
+ }
41
+
42
+ // ../core/src/format.ts
43
+ function buildHeader(parts) {
44
+ return parts.filter(([, value]) => value !== void 0 && value !== "").map(([label, value]) => `> ${label}: ${value}`).join("\n");
45
+ }
46
+ function markdownToText(markdown) {
47
+ return markdown.replace(/^#{1,6}\s+/gm, "").replace(/\*\*([^*]+)\*\*/g, "$1").replace(/\*([^*]+)\*/g, "$1").replace(/\[([^\]]+)\]\([^)]+\)/g, "$1").replace(/!\[[^\]]*\]\([^)]+\)/g, "").replace(/^>\s+/gm, "").replace(/^[-*+]\s+/gm, "\u2022 ").replace(/`([^`]+)`/g, "$1");
48
+ }
49
+ function truncateContent(content, maxChars) {
50
+ if (content.length <= maxChars) return content;
51
+ return `${content.slice(0, maxChars)}
52
+
53
+ [... truncated]`;
54
+ }
55
+ function buildCompactMetadataHeader(result) {
56
+ return buildHeader([
57
+ ["URL", result.finalUrl],
58
+ ["Title", result.title],
59
+ ["Author", result.author],
60
+ ["Published", result.published]
61
+ ]);
62
+ }
63
+ function buildMetadataHeader(result) {
64
+ return buildHeader([
65
+ ["URL", result.finalUrl],
66
+ ["Title", result.title],
67
+ ["Author", result.author],
68
+ ["Published", result.published],
69
+ ["Site", result.site],
70
+ ["Language", result.language],
71
+ ["Words", result.wordCount],
72
+ ["Browser", `${result.browser}/${result.os}`]
73
+ ]);
74
+ }
75
+ function buildFetchResponseText(result, options = {}) {
76
+ const header = options.verbose ? buildMetadataHeader(result) : buildCompactMetadataHeader(result);
77
+ return header ? `${header}
78
+
79
+ ${result.content}` : result.content;
80
+ }
81
+
82
+ // ../core/src/extract.ts
83
+ var HTML_CONTENT_TYPES = [
84
+ "text/html",
85
+ "application/xhtml+xml",
86
+ "text/plain",
87
+ "text/markdown"
88
+ ];
89
+ function createDefuddleFetch(dependencies = runtimeDependencies) {
90
+ return async function defuddleFetch2(opts) {
91
+ const browser = opts.browser ?? DEFAULT_BROWSER;
92
+ const os = opts.os ?? DEFAULT_OS;
93
+ const format = opts.format ?? "markdown";
94
+ const maxChars = opts.maxChars ?? DEFAULT_MAX_CHARS;
95
+ const removeImages = opts.removeImages ?? false;
96
+ const includeReplies = opts.includeReplies ?? DEFAULT_INCLUDE_REPLIES;
97
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
98
+ let parsed;
99
+ try {
100
+ parsed = new URL(opts.url);
101
+ } catch {
102
+ return { error: `Invalid URL: ${opts.url}` };
103
+ }
104
+ if (!["http:", "https:"].includes(parsed.protocol)) {
105
+ return {
106
+ error: `Only http/https URLs supported, got ${parsed.protocol}`
107
+ };
108
+ }
109
+ const fetchOptions = {
110
+ browser,
111
+ os,
112
+ headers: {
113
+ Accept: DEFAULT_ACCEPT_HEADER,
114
+ "Accept-Language": DEFAULT_ACCEPT_LANGUAGE_HEADER,
115
+ ...opts.headers
116
+ },
117
+ redirect: "follow",
118
+ timeout: timeoutMs
119
+ };
120
+ if (opts.proxy) {
121
+ fetchOptions.proxy = opts.proxy;
122
+ }
123
+ const response = await dependencies.fetch(opts.url, fetchOptions);
124
+ if (!response.ok) {
125
+ return {
126
+ error: `HTTP ${response.status} ${response.statusText} for ${opts.url}`
127
+ };
128
+ }
129
+ const finalUrl = response.url ?? opts.url;
130
+ const contentType = response.headers.get("content-type") ?? "";
131
+ if (!HTML_CONTENT_TYPES.some((value) => contentType.includes(value))) {
132
+ return { error: `Not an HTML page (content-type: ${contentType})` };
133
+ }
134
+ const html = await response.text();
135
+ const document = parseLinkedomHTML(html, finalUrl);
136
+ const extracted = await dependencies.defuddle(document, finalUrl, {
137
+ markdown: format !== "html",
138
+ removeImages,
139
+ includeReplies
140
+ });
141
+ if (!extracted.content || extracted.wordCount === 0) {
142
+ return {
143
+ error: `No content extracted from ${opts.url}. May need JS rendering or is blocked.`
144
+ };
145
+ }
146
+ const normalizedContent = format === "text" ? markdownToText(extracted.content) : extracted.content;
147
+ return {
148
+ url: opts.url,
149
+ finalUrl,
150
+ title: extracted.title ?? "",
151
+ author: extracted.author ?? "",
152
+ published: extracted.published ?? "",
153
+ site: extracted.site ?? "",
154
+ language: extracted.language ?? "",
155
+ wordCount: extracted.wordCount,
156
+ content: truncateContent(normalizedContent, maxChars),
157
+ browser,
158
+ os
159
+ };
160
+ };
161
+ }
162
+ var defuddleFetch = createDefuddleFetch();
163
+ function isError(result) {
164
+ return "error" in result;
165
+ }
166
+ function resolveFetchToolDefaults(config = {}) {
167
+ return {
168
+ maxChars: config.maxChars ?? DEFAULT_MAX_CHARS,
169
+ timeoutMs: config.timeoutMs ?? DEFAULT_TIMEOUT_MS,
170
+ browser: config.browser ?? DEFAULT_BROWSER,
171
+ os: config.os ?? DEFAULT_OS,
172
+ removeImages: config.removeImages ?? false,
173
+ includeReplies: config.includeReplies ?? DEFAULT_INCLUDE_REPLIES
174
+ };
175
+ }
176
+ function createBaseFetchToolParameterProperties(defaults) {
177
+ return {
178
+ url: Type.String({ description: "URL to fetch (http/https only)" }),
179
+ browser: Type.Optional(
180
+ Type.String({
181
+ description: `Browser profile for TLS fingerprinting. Default: "${defaults.browser}". Examples: chrome_145, firefox_147, safari_26, edge_145, opera_127`
182
+ })
183
+ ),
184
+ os: Type.Optional(
185
+ Type.String({
186
+ description: `OS profile for fingerprinting. Default: "${defaults.os}". Options: windows, macos, linux, android, ios`
187
+ })
188
+ ),
189
+ headers: Type.Optional(
190
+ Type.Record(Type.String(), Type.String(), {
191
+ description: "Custom HTTP headers to send. By default, Accept and Accept-Language are set automatically."
192
+ })
193
+ ),
194
+ maxChars: Type.Optional(
195
+ Type.Number({
196
+ description: `Maximum characters to return. Default: ${defaults.maxChars}`
197
+ })
198
+ ),
199
+ format: Type.Optional(
200
+ Type.Union(
201
+ [Type.Literal("markdown"), Type.Literal("html"), Type.Literal("text")],
202
+ {
203
+ description: 'Output format. "markdown" (default), "html" (cleaned HTML), or "text" (plain text, no formatting)'
204
+ }
205
+ )
206
+ ),
207
+ removeImages: Type.Optional(
208
+ Type.Boolean({
209
+ description: "Strip image references from output. Default: false"
210
+ })
211
+ ),
212
+ includeReplies: Type.Optional(
213
+ Type.Union([Type.Boolean(), Type.Literal("extractors")], {
214
+ description: "Include replies/comments: 'extractors' for site-specific only (default), true for all, false for none"
215
+ })
216
+ ),
217
+ proxy: Type.Optional(
218
+ Type.String({
219
+ description: "Proxy URL (http://user:pass@host:port or socks5://host:port)"
220
+ })
221
+ )
222
+ };
223
+ }
224
+ async function executeFetchToolCall(params, defaults) {
225
+ return defuddleFetch({
226
+ url: params.url,
227
+ browser: params.browser ?? defaults.browser,
228
+ os: params.os ?? defaults.os,
229
+ headers: params.headers,
230
+ maxChars: params.maxChars ?? defaults.maxChars,
231
+ format: params.format ?? "markdown",
232
+ removeImages: params.removeImages ?? defaults.removeImages,
233
+ includeReplies: params.includeReplies ?? defaults.includeReplies,
234
+ proxy: params.proxy,
235
+ timeoutMs: defaults.timeoutMs
236
+ });
237
+ }
238
+ function normalizePiWebFetchSettings(input) {
239
+ if (!input || typeof input !== "object") return {};
240
+ const source = input;
241
+ const settings = {};
242
+ if (typeof source.webFetchVerboseByDefault === "boolean") {
243
+ settings.webFetchVerboseByDefault = source.webFetchVerboseByDefault;
244
+ }
245
+ if (typeof source.webFetchDefaultMaxChars === "number" && Number.isFinite(source.webFetchDefaultMaxChars) && source.webFetchDefaultMaxChars > 0) {
246
+ settings.webFetchDefaultMaxChars = source.webFetchDefaultMaxChars;
247
+ }
248
+ return settings;
249
+ }
250
+ function resolvePiWebFetchSettings(globalSettings, projectSettings) {
251
+ const global = normalizePiWebFetchSettings(globalSettings);
252
+ const project = normalizePiWebFetchSettings(projectSettings);
253
+ return {
254
+ verboseByDefault: project.webFetchVerboseByDefault ?? global.webFetchVerboseByDefault ?? false,
255
+ defaultMaxChars: project.webFetchDefaultMaxChars ?? global.webFetchDefaultMaxChars
256
+ };
257
+ }
258
+ async function readSettingsFile(path) {
259
+ try {
260
+ return JSON.parse(await readFile(path, "utf-8"));
261
+ } catch {
262
+ return {};
263
+ }
264
+ }
265
+ async function loadPiWebFetchSettings(cwd, agentDir = getAgentDir()) {
266
+ const globalSettings = await readSettingsFile(
267
+ join(agentDir, "settings.json")
268
+ );
269
+ const projectSettings = await readSettingsFile(
270
+ join(cwd, ".pi", "settings.json")
271
+ );
272
+ return resolvePiWebFetchSettings(globalSettings, projectSettings);
273
+ }
274
+
275
+ // src/index.ts
276
+ var toolDescription = [
277
+ "Fetch a URL with browser-grade TLS fingerprinting and extract clean, readable content.",
278
+ "Uses wreq-js for browser-like TLS/HTTP2 impersonation and Defuddle for article extraction.",
279
+ "Supports the same fetch parameters as the OpenClaw tool, plus an optional verbose flag.",
280
+ "Does NOT execute JavaScript \u2014 use a browser automation tool for JS-heavy pages."
281
+ ].join(" ");
282
+ function piSmartFetchExtension(pi) {
283
+ const defaults = resolveFetchToolDefaults();
284
+ pi.registerTool({
285
+ name: "web_fetch",
286
+ label: "web_fetch",
287
+ description: toolDescription,
288
+ promptSnippet: "web_fetch(url, browser?, os?, headers?, maxChars?, format?, removeImages?, includeReplies?, proxy?, verbose?): fetch browser-fingerprinted readable web content",
289
+ parameters: Type.Object({
290
+ ...createBaseFetchToolParameterProperties(defaults),
291
+ verbose: Type.Optional(
292
+ Type.Boolean({
293
+ description: "Include the full metadata header (site, language, word count, browser fingerprint info). Default: false, or webFetchVerboseByDefault from pi settings."
294
+ })
295
+ )
296
+ }),
297
+ async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
298
+ const settings = await loadPiWebFetchSettings(ctx.cwd, getAgentDir());
299
+ const runtimeDefaults = resolveFetchToolDefaults({
300
+ maxChars: settings.defaultMaxChars
301
+ });
302
+ const verbose = params.verbose ?? settings.verboseByDefault;
303
+ const result = await executeFetchToolCall(params, runtimeDefaults);
304
+ if (isError(result)) {
305
+ return {
306
+ content: [{ type: "text", text: `Error: ${result.error}` }],
307
+ details: { error: true, verbose }
308
+ };
309
+ }
310
+ return {
311
+ content: [
312
+ { type: "text", text: buildFetchResponseText(result, { verbose }) }
313
+ ],
314
+ details: { verbose, maxChars: runtimeDefaults.maxChars }
315
+ };
316
+ }
317
+ });
318
+ }
319
+
320
+ export { piSmartFetchExtension as default };
321
+ //# sourceMappingURL=index.js.map
322
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../core/src/constants.ts","../../core/src/dependencies.ts","../../core/src/dom.ts","../../core/src/format.ts","../../core/src/extract.ts","../../core/src/tool.ts","../src/settings.ts","../src/index.ts"],"names":["wreqFetch","defuddleFetch","Type","getAgentDir"],"mappings":";;;;;;;;;;;AAEO,IAAM,eAAA,GAAkB,YAAA;AACxB,IAAM,UAAA,GAA4B,SAAA;AAClC,IAAM,iBAAA,GAAoB,GAAA;AAC1B,IAAM,kBAAA,GAAqB,IAAA;AAC3B,IAAM,uBAAA,GAA0B,YAAA;AAChC,IAAM,qBAAA,GACX,iEAAA;AACK,IAAM,8BAAA,GAAiC,gBAAA;ACLvC,IAAM,mBAAA,GAAyC;AAAA,EACpD,KAAA,EAAOA,KAAA;AAAA,EACP,QAAA,EAAU,QAAA;AAAA,EACV;AACF,CAAA;ACLO,SAAS,iBAAA,CAAkB,MAAc,GAAA,EAAwB;AACtE,EAAA,MAAM,EAAE,QAAA,EAAS,GAAI,SAAA,CAAU,IAAI,CAAA;AACnC,EAAA,MAAM,GAAA,GAAM,QAAA;AACZ,EAAA,MAAM,cAAc,GAAA,CAAI,WAAA;AASxB,EAAA,IAAI,CAAE,IAAkC,WAAA,EAAa;AACnD,IAAC,GAAA,CAAkC,cACjC,EAAC;AAAA,EACL;AAEA,EAAA,IAAI,WAAA,IAAe,CAAC,WAAA,CAAY,gBAAA,EAAkB;AAChD,IAAA,WAAA,CAAY,oBAAoB,OAAO;AAAA,MACrC,OAAA,EAAS;AAAA,KACX,CAAA,CAAA;AAAA,EACF;AAEA,EAAA,IAAI,GAAA,EAAK;AACP,IAAC,IAAyB,GAAA,GAAM,GAAA;AAAA,EAClC;AAEA,EAAA,OAAO,QAAA;AACT;;;AC7BA,SAAS,YACP,KAAA,EACA;AACA,EAAA,OAAO,KAAA,CACJ,MAAA,CAAO,CAAC,GAAG,KAAK,CAAA,KAAM,KAAA,KAAU,MAAA,IAAa,KAAA,KAAU,EAAE,CAAA,CACzD,IAAI,CAAC,CAAC,KAAA,EAAO,KAAK,CAAA,KAAM,CAAA,EAAA,EAAK,KAAK,CAAA,EAAA,EAAK,KAAK,CAAA,CAAE,CAAA,CAC9C,IAAA,CAAK,IAAI,CAAA;AACd;AAEO,SAAS,eAAe,QAAA,EAA0B;AACvD,EAAA,OAAO,QAAA,CACJ,OAAA,CAAQ,cAAA,EAAgB,EAAE,CAAA,CAC1B,OAAA,CAAQ,kBAAA,EAAoB,IAAI,CAAA,CAChC,OAAA,CAAQ,cAAA,EAAgB,IAAI,EAC5B,OAAA,CAAQ,wBAAA,EAA0B,IAAI,CAAA,CACtC,OAAA,CAAQ,uBAAA,EAAyB,EAAE,CAAA,CACnC,QAAQ,SAAA,EAAW,EAAE,CAAA,CACrB,OAAA,CAAQ,aAAA,EAAe,SAAI,CAAA,CAC3B,OAAA,CAAQ,cAAc,IAAI,CAAA;AAC/B;AAEO,SAAS,eAAA,CAAgB,SAAiB,QAAA,EAA0B;AACzE,EAAA,IAAI,OAAA,CAAQ,MAAA,IAAU,QAAA,EAAU,OAAO,OAAA;AACvC,EAAA,OAAO,CAAA,EAAG,OAAA,CAAQ,KAAA,CAAM,CAAA,EAAG,QAAQ,CAAC;;AAAA,eAAA,CAAA;AACtC;AAEO,SAAS,2BAA2B,MAAA,EAA6B;AACtE,EAAA,OAAO,WAAA,CAAY;AAAA,IACjB,CAAC,KAAA,EAAO,MAAA,CAAO,QAAQ,CAAA;AAAA,IACvB,CAAC,OAAA,EAAS,MAAA,CAAO,KAAK,CAAA;AAAA,IACtB,CAAC,QAAA,EAAU,MAAA,CAAO,MAAM,CAAA;AAAA,IACxB,CAAC,WAAA,EAAa,MAAA,CAAO,SAAS;AAAA,GAC/B,CAAA;AACH;AAEO,SAAS,oBAAoB,MAAA,EAA6B;AAC/D,EAAA,OAAO,WAAA,CAAY;AAAA,IACjB,CAAC,KAAA,EAAO,MAAA,CAAO,QAAQ,CAAA;AAAA,IACvB,CAAC,OAAA,EAAS,MAAA,CAAO,KAAK,CAAA;AAAA,IACtB,CAAC,QAAA,EAAU,MAAA,CAAO,MAAM,CAAA;AAAA,IACxB,CAAC,WAAA,EAAa,MAAA,CAAO,SAAS,CAAA;AAAA,IAC9B,CAAC,MAAA,EAAQ,MAAA,CAAO,IAAI,CAAA;AAAA,IACpB,CAAC,UAAA,EAAY,MAAA,CAAO,QAAQ,CAAA;AAAA,IAC5B,CAAC,OAAA,EAAS,MAAA,CAAO,SAAS,CAAA;AAAA,IAC1B,CAAC,WAAW,CAAA,EAAG,MAAA,CAAO,OAAO,CAAA,CAAA,EAAI,MAAA,CAAO,EAAE,CAAA,CAAE;AAAA,GAC7C,CAAA;AACH;AAEO,SAAS,sBAAA,CACd,MAAA,EACA,OAAA,GAAiC,EAAC,EAC1B;AACR,EAAA,MAAM,SAAS,OAAA,CAAQ,OAAA,GACnB,oBAAoB,MAAM,CAAA,GAC1B,2BAA2B,MAAM,CAAA;AAErC,EAAA,OAAO,MAAA,GAAS,GAAG,MAAM;;AAAA,EAAO,MAAA,CAAO,OAAO,CAAA,CAAA,GAAK,MAAA,CAAO,OAAA;AAC5D;;;ACnBA,IAAM,kBAAA,GAAqB;AAAA,EACzB,WAAA;AAAA,EACA,uBAAA;AAAA,EACA,YAAA;AAAA,EACA;AACF,CAAA;AAMO,SAAS,mBAAA,CACd,eAAkC,mBAAA,EAClC;AACA,EAAA,OAAO,eAAeC,eACpB,IAAA,EACmC;AACnC,IAAA,MAAM,OAAA,GAAU,KAAK,OAAA,IAAW,eAAA;AAChC,IAAA,MAAM,EAAA,GAAK,KAAK,EAAA,IAAM,UAAA;AACtB,IAAA,MAAM,MAAA,GAAuB,KAAK,MAAA,IAAU,UAAA;AAC5C,IAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,iBAAA;AAClC,IAAA,MAAM,YAAA,GAAe,KAAK,YAAA,IAAgB,KAAA;AAC1C,IAAA,MAAM,cAAA,GAAiB,KAAK,cAAA,IAAkB,uBAAA;AAC9C,IAAA,MAAM,SAAA,GAAY,KAAK,SAAA,IAAa,kBAAA;AAEpC,IAAA,IAAI,MAAA;AACJ,IAAA,IAAI;AACF,MAAA,MAAA,GAAS,IAAI,GAAA,CAAI,IAAA,CAAK,GAAG,CAAA;AAAA,IAC3B,CAAA,CAAA,MAAQ;AACN,MAAA,OAAO,EAAE,KAAA,EAAO,CAAA,aAAA,EAAgB,IAAA,CAAK,GAAG,CAAA,CAAA,EAAG;AAAA,IAC7C;AAEA,IAAA,IAAI,CAAC,CAAC,OAAA,EAAS,QAAQ,EAAE,QAAA,CAAS,MAAA,CAAO,QAAQ,CAAA,EAAG;AAClD,MAAA,OAAO;AAAA,QACL,KAAA,EAAO,CAAA,oCAAA,EAAuC,MAAA,CAAO,QAAQ,CAAA;AAAA,OAC/D;AAAA,IACF;AAEA,IAAA,MAAM,YAAA,GAAwC;AAAA,MAC5C,OAAA;AAAA,MACA,EAAA;AAAA,MACA,OAAA,EAAS;AAAA,QACP,MAAA,EAAQ,qBAAA;AAAA,QACR,iBAAA,EAAmB,8BAAA;AAAA,QACnB,GAAG,IAAA,CAAK;AAAA,OACV;AAAA,MACA,QAAA,EAAU,QAAA;AAAA,MACV,OAAA,EAAS;AAAA,KACX;AAEA,IAAA,IAAI,KAAK,KAAA,EAAO;AACd,MAAA,YAAA,CAAa,QAAQ,IAAA,CAAK,KAAA;AAAA,IAC5B;AAEA,IAAA,MAAM,WAAW,MAAM,YAAA,CAAa,KAAA,CAAM,IAAA,CAAK,KAAK,YAAY,CAAA;AAEhE,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,OAAO;AAAA,QACL,KAAA,EAAO,QAAQ,QAAA,CAAS,MAAM,IAAI,QAAA,CAAS,UAAU,CAAA,KAAA,EAAQ,IAAA,CAAK,GAAG,CAAA;AAAA,OACvE;AAAA,IACF;AAEA,IAAA,MAAM,QAAA,GAAW,QAAA,CAAS,GAAA,IAAO,IAAA,CAAK,GAAA;AACtC,IAAA,MAAM,WAAA,GAAc,QAAA,CAAS,OAAA,CAAQ,GAAA,CAAI,cAAc,CAAA,IAAK,EAAA;AAE5D,IAAA,IAAI,CAAC,mBAAmB,IAAA,CAAK,CAAC,UAAU,WAAA,CAAY,QAAA,CAAS,KAAK,CAAC,CAAA,EAAG;AACpE,MAAA,OAAO,EAAE,KAAA,EAAO,CAAA,gCAAA,EAAmC,WAAW,CAAA,CAAA,CAAA,EAAI;AAAA,IACpE;AAEA,IAAA,MAAM,IAAA,GAAO,MAAM,QAAA,CAAS,IAAA,EAAK;AACjC,IAAA,MAAM,QAAA,GAAW,iBAAA,CAAkB,IAAA,EAAM,QAAQ,CAAA;AACjD,IAAA,MAAM,SAAA,GAAY,MAAM,YAAA,CAAa,QAAA,CAAS,UAAU,QAAA,EAAU;AAAA,MAChE,UAAU,MAAA,KAAW,MAAA;AAAA,MACrB,YAAA;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAI,CAAC,SAAA,CAAU,OAAA,IAAW,SAAA,CAAU,cAAc,CAAA,EAAG;AACnD,MAAA,OAAO;AAAA,QACL,KAAA,EAAO,CAAA,0BAAA,EAA6B,IAAA,CAAK,GAAG,CAAA,sCAAA;AAAA,OAC9C;AAAA,IACF;AAEA,IAAA,MAAM,oBACJ,MAAA,KAAW,MAAA,GAAS,eAAe,SAAA,CAAU,OAAO,IAAI,SAAA,CAAU,OAAA;AAEpE,IAAA,OAAO;AAAA,MACL,KAAK,IAAA,CAAK,GAAA;AAAA,MACV,QAAA;AAAA,MACA,KAAA,EAAO,UAAU,KAAA,IAAS,EAAA;AAAA,MAC1B,MAAA,EAAQ,UAAU,MAAA,IAAU,EAAA;AAAA,MAC5B,SAAA,EAAW,UAAU,SAAA,IAAa,EAAA;AAAA,MAClC,IAAA,EAAM,UAAU,IAAA,IAAQ,EAAA;AAAA,MACxB,QAAA,EAAU,UAAU,QAAA,IAAY,EAAA;AAAA,MAChC,WAAW,SAAA,CAAU,SAAA;AAAA,MACrB,OAAA,EAAS,eAAA,CAAgB,iBAAA,EAAmB,QAAQ,CAAA;AAAA,MACpD,OAAA;AAAA,MACA;AAAA,KACF;AAAA,EACF,CAAA;AACF;AAEO,IAAM,gBAAgB,mBAAA,EAAoB;AAG1C,SAAS,QACd,MAAA,EACsB;AACtB,EAAA,OAAO,OAAA,IAAW,MAAA;AACpB;ACrIO,SAAS,wBAAA,CACd,MAAA,GAA0B,EAAC,EACR;AACnB,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,OAAO,QAAA,IAAY,iBAAA;AAAA,IAC7B,SAAA,EAAW,OAAO,SAAA,IAAa,kBAAA;AAAA,IAC/B,OAAA,EAAS,OAAO,OAAA,IAAW,eAAA;AAAA,IAC3B,EAAA,EAAI,OAAO,EAAA,IAAM,UAAA;AAAA,IACjB,YAAA,EAAc,OAAO,YAAA,IAAgB,KAAA;AAAA,IACrC,cAAA,EAAgB,OAAO,cAAA,IAAkB;AAAA,GAC3C;AACF;AAEO,SAAS,uCACd,QAAA,EACyB;AACzB,EAAA,OAAO;AAAA,IACL,KAAK,IAAA,CAAK,MAAA,CAAO,EAAE,WAAA,EAAa,kCAAkC,CAAA;AAAA,IAClE,SAAS,IAAA,CAAK,QAAA;AAAA,MACZ,KAAK,MAAA,CAAO;AAAA,QACV,WAAA,EAAa,CAAA,kDAAA,EAAqD,QAAA,CAAS,OAAO,CAAA,oEAAA;AAAA,OACnF;AAAA,KACH;AAAA,IACA,IAAI,IAAA,CAAK,QAAA;AAAA,MACP,KAAK,MAAA,CAAO;AAAA,QACV,WAAA,EAAa,CAAA,yCAAA,EAA4C,QAAA,CAAS,EAAE,CAAA,+CAAA;AAAA,OACrE;AAAA,KACH;AAAA,IACA,SAAS,IAAA,CAAK,QAAA;AAAA,MACZ,KAAK,MAAA,CAAO,IAAA,CAAK,QAAO,EAAG,IAAA,CAAK,QAAO,EAAG;AAAA,QACxC,WAAA,EACE;AAAA,OACH;AAAA,KACH;AAAA,IACA,UAAU,IAAA,CAAK,QAAA;AAAA,MACb,KAAK,MAAA,CAAO;AAAA,QACV,WAAA,EAAa,CAAA,uCAAA,EAA0C,QAAA,CAAS,QAAQ,CAAA;AAAA,OACzE;AAAA,KACH;AAAA,IACA,QAAQ,IAAA,CAAK,QAAA;AAAA,MACX,IAAA,CAAK,KAAA;AAAA,QACH,CAAC,IAAA,CAAK,OAAA,CAAQ,UAAU,CAAA,EAAG,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAA,EAAG,IAAA,CAAK,OAAA,CAAQ,MAAM,CAAC,CAAA;AAAA,QACrE;AAAA,UACE,WAAA,EACE;AAAA;AACJ;AACF,KACF;AAAA,IACA,cAAc,IAAA,CAAK,QAAA;AAAA,MACjB,KAAK,OAAA,CAAQ;AAAA,QACX,WAAA,EAAa;AAAA,OACd;AAAA,KACH;AAAA,IACA,gBAAgB,IAAA,CAAK,QAAA;AAAA,MACnB,IAAA,CAAK,KAAA,CAAM,CAAC,IAAA,CAAK,OAAA,IAAW,IAAA,CAAK,OAAA,CAAQ,YAAY,CAAC,CAAA,EAAG;AAAA,QACvD,WAAA,EACE;AAAA,OACH;AAAA,KACH;AAAA,IACA,OAAO,IAAA,CAAK,QAAA;AAAA,MACV,KAAK,MAAA,CAAO;AAAA,QACV,WAAA,EACE;AAAA,OACH;AAAA;AACH,GACF;AACF;AAEA,eAAsB,oBAAA,CACpB,QACA,QAAA,EACmC;AACnC,EAAA,OAAO,aAAA,CAAc;AAAA,IACnB,KAAK,MAAA,CAAO,GAAA;AAAA,IACZ,OAAA,EAAU,MAAA,CAAO,OAAA,IAAsB,QAAA,CAAS,OAAA;AAAA,IAChD,EAAA,EAAK,MAAA,CAAO,EAAA,IAAiB,QAAA,CAAS,EAAA;AAAA,IACtC,SAAS,MAAA,CAAO,OAAA;AAAA,IAChB,QAAA,EAAW,MAAA,CAAO,QAAA,IAAuB,QAAA,CAAS,QAAA;AAAA,IAClD,MAAA,EAAS,OAAO,MAAA,IAA2C,UAAA;AAAA,IAC3D,YAAA,EAAe,MAAA,CAAO,YAAA,IAA4B,QAAA,CAAS,YAAA;AAAA,IAC3D,cAAA,EACG,MAAA,CAAO,cAAA,IACR,QAAA,CAAS,cAAA;AAAA,IACX,OAAO,MAAA,CAAO,KAAA;AAAA,IACd,WAAW,QAAA,CAAS;AAAA,GACrB,CAAA;AACH;ACxFA,SAAS,4BAA4B,KAAA,EAAoC;AACvE,EAAA,IAAI,CAAC,KAAA,IAAS,OAAO,KAAA,KAAU,QAAA,SAAiB,EAAC;AAEjD,EAAA,MAAM,MAAA,GAAS,KAAA;AACf,EAAA,MAAM,WAA+B,EAAC;AAEtC,EAAA,IAAI,OAAO,MAAA,CAAO,wBAAA,KAA6B,SAAA,EAAW;AACxD,IAAA,QAAA,CAAS,2BAA2B,MAAA,CAAO,wBAAA;AAAA,EAC7C;AAEA,EAAA,IACE,OAAO,MAAA,CAAO,uBAAA,KAA4B,QAAA,IAC1C,MAAA,CAAO,QAAA,CAAS,MAAA,CAAO,uBAAuB,CAAA,IAC9C,MAAA,CAAO,uBAAA,GAA0B,CAAA,EACjC;AACA,IAAA,QAAA,CAAS,0BAA0B,MAAA,CAAO,uBAAA;AAAA,EAC5C;AAEA,EAAA,OAAO,QAAA;AACT;AAEO,SAAS,yBAAA,CACd,gBACA,eAAA,EAC4B;AAC5B,EAAA,MAAM,MAAA,GAAS,4BAA4B,cAAc,CAAA;AACzD,EAAA,MAAM,OAAA,GAAU,4BAA4B,eAAe,CAAA;AAE3D,EAAA,OAAO;AAAA,IACL,gBAAA,EACE,OAAA,CAAQ,wBAAA,IACR,MAAA,CAAO,wBAAA,IACP,KAAA;AAAA,IACF,eAAA,EACE,OAAA,CAAQ,uBAAA,IAA2B,MAAA,CAAO;AAAA,GAC9C;AACF;AAEA,eAAe,iBAAiB,IAAA,EAAgC;AAC9D,EAAA,IAAI;AACF,IAAA,OAAO,KAAK,KAAA,CAAM,MAAM,QAAA,CAAS,IAAA,EAAM,OAAO,CAAC,CAAA;AAAA,EACjD,CAAA,CAAA,MAAQ;AACN,IAAA,OAAO,EAAC;AAAA,EACV;AACF;AAEA,eAAsB,sBAAA,CACpB,GAAA,EACA,QAAA,GAAW,WAAA,EAAY,EACc;AACrC,EAAA,MAAM,iBAAiB,MAAM,gBAAA;AAAA,IAC3B,IAAA,CAAK,UAAU,eAAe;AAAA,GAChC;AACA,EAAA,MAAM,kBAAkB,MAAM,gBAAA;AAAA,IAC5B,IAAA,CAAK,GAAA,EAAK,KAAA,EAAO,eAAe;AAAA,GAClC;AAEA,EAAA,OAAO,yBAAA,CAA0B,gBAAgB,eAAe,CAAA;AAClE;;;AC7DA,IAAM,eAAA,GAAkB;AAAA,EACtB,wFAAA;AAAA,EACA,4FAAA;AAAA,EACA,yFAAA;AAAA,EACA;AACF,CAAA,CAAE,KAAK,GAAG,CAAA;AAEK,SAAR,sBAAuC,EAAA,EAAkB;AAC9D,EAAA,MAAM,WAAW,wBAAA,EAAyB;AAE1C,EAAA,EAAA,CAAG,YAAA,CAAa;AAAA,IACd,IAAA,EAAM,WAAA;AAAA,IACN,KAAA,EAAO,WAAA;AAAA,IACP,WAAA,EAAa,eAAA;AAAA,IACb,aAAA,EACE,iKAAA;AAAA,IACF,UAAA,EAAYC,KAAK,MAAA,CAAO;AAAA,MACtB,GAAG,uCAAuC,QAAQ,CAAA;AAAA,MAClD,SAASA,IAAAA,CAAK,QAAA;AAAA,QACZA,KAAK,OAAA,CAAQ;AAAA,UACX,WAAA,EACE;AAAA,SACH;AAAA;AACH,KACD,CAAA;AAAA,IAED,MAAM,OAAA,CAAQ,WAAA,EAAa,MAAA,EAAQ,OAAA,EAAS,WAAW,GAAA,EAAK;AAC1D,MAAA,MAAM,WAAW,MAAM,sBAAA,CAAuB,GAAA,CAAI,GAAA,EAAKC,aAAa,CAAA;AACpE,MAAA,MAAM,kBAAkB,wBAAA,CAAyB;AAAA,QAC/C,UAAU,QAAA,CAAS;AAAA,OACpB,CAAA;AACD,MAAA,MAAM,OAAA,GACH,MAAA,CAAO,OAAA,IAAmC,QAAA,CAAS,gBAAA;AACtD,MAAA,MAAM,MAAA,GAAS,MAAM,oBAAA,CAAqB,MAAA,EAAQ,eAAe,CAAA;AAEjE,MAAA,IAAI,OAAA,CAAQ,MAAM,CAAA,EAAG;AACnB,QAAA,OAAO;AAAA,UACL,OAAA,EAAS,CAAC,EAAE,IAAA,EAAM,MAAA,EAAQ,MAAM,CAAA,OAAA,EAAU,MAAA,CAAO,KAAK,CAAA,CAAA,EAAI,CAAA;AAAA,UAC1D,OAAA,EAAS,EAAE,KAAA,EAAO,IAAA,EAAM,OAAA;AAAQ,SAClC;AAAA,MACF;AAEA,MAAA,OAAO;AAAA,QACL,OAAA,EAAS;AAAA,UACP,EAAE,MAAM,MAAA,EAAQ,IAAA,EAAM,uBAAuB,MAAA,EAAQ,EAAE,OAAA,EAAS,CAAA;AAAE,SACpE;AAAA,QACA,OAAA,EAAS,EAAE,OAAA,EAAS,QAAA,EAAU,gBAAgB,QAAA;AAAS,OACzD;AAAA,IACF;AAAA,GACD,CAAA;AACH","file":"index.js","sourcesContent":["import type { FingerprintOs } from \"./types\";\n\nexport const DEFAULT_BROWSER = \"chrome_145\";\nexport const DEFAULT_OS: FingerprintOs = \"windows\";\nexport const DEFAULT_MAX_CHARS = 50_000;\nexport const DEFAULT_TIMEOUT_MS = 15_000;\nexport const DEFAULT_INCLUDE_REPLIES = \"extractors\" as const;\nexport const DEFAULT_ACCEPT_HEADER =\n \"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\";\nexport const DEFAULT_ACCEPT_LANGUAGE_HEADER = \"en-US,en;q=0.9\";\n","import { Defuddle } from \"defuddle/node\";\nimport { getProfiles, fetch as wreqFetch } from \"wreq-js\";\nimport type { FetchDependencies } from \"./types\";\n\nexport const runtimeDependencies: FetchDependencies = {\n fetch: wreqFetch,\n defuddle: Defuddle,\n getProfiles,\n};\n","import { parseHTML } from \"linkedom\";\n\n/** Apply linkedom polyfills that Defuddle expects (getComputedStyle, styleSheets). */\nexport function parseLinkedomHTML(html: string, url?: string): Document {\n const { document } = parseHTML(html);\n const doc = document as Document & Record<string, unknown>;\n const defaultView = doc.defaultView as\n | (Window & {\n getComputedStyle?: (\n elt: Element,\n pseudoElt?: string | null,\n ) => CSSStyleDeclaration;\n })\n | undefined;\n\n if (!(doc as { styleSheets?: unknown }).styleSheets) {\n (doc as { styleSheets?: unknown }).styleSheets =\n [] as unknown as StyleSheetList;\n }\n\n if (defaultView && !defaultView.getComputedStyle) {\n defaultView.getComputedStyle = (() => ({\n display: \"\",\n })) as unknown as typeof defaultView.getComputedStyle;\n }\n\n if (url) {\n (doc as { URL?: string }).URL = url;\n }\n\n return document;\n}\n","import type { FetchResult } from \"./types\";\n\nfunction buildHeader(\n parts: Array<[label: string, value: string | number | undefined]>,\n) {\n return parts\n .filter(([, value]) => value !== undefined && value !== \"\")\n .map(([label, value]) => `> ${label}: ${value}`)\n .join(\"\\n\");\n}\n\nexport function markdownToText(markdown: string): string {\n return markdown\n .replace(/^#{1,6}\\s+/gm, \"\")\n .replace(/\\*\\*([^*]+)\\*\\*/g, \"$1\")\n .replace(/\\*([^*]+)\\*/g, \"$1\")\n .replace(/\\[([^\\]]+)\\]\\([^)]+\\)/g, \"$1\")\n .replace(/!\\[[^\\]]*\\]\\([^)]+\\)/g, \"\")\n .replace(/^>\\s+/gm, \"\")\n .replace(/^[-*+]\\s+/gm, \"• \")\n .replace(/`([^`]+)`/g, \"$1\");\n}\n\nexport function truncateContent(content: string, maxChars: number): string {\n if (content.length <= maxChars) return content;\n return `${content.slice(0, maxChars)}\\n\\n[... truncated]`;\n}\n\nexport function buildCompactMetadataHeader(result: FetchResult): string {\n return buildHeader([\n [\"URL\", result.finalUrl],\n [\"Title\", result.title],\n [\"Author\", result.author],\n [\"Published\", result.published],\n ]);\n}\n\nexport function buildMetadataHeader(result: FetchResult): string {\n return buildHeader([\n [\"URL\", result.finalUrl],\n [\"Title\", result.title],\n [\"Author\", result.author],\n [\"Published\", result.published],\n [\"Site\", result.site],\n [\"Language\", result.language],\n [\"Words\", result.wordCount],\n [\"Browser\", `${result.browser}/${result.os}`],\n ]);\n}\n\nexport function buildFetchResponseText(\n result: FetchResult,\n options: { verbose?: boolean } = {},\n): string {\n const header = options.verbose\n ? buildMetadataHeader(result)\n : buildCompactMetadataHeader(result);\n\n return header ? `${header}\\n\\n${result.content}` : result.content;\n}\n","/**\n * Core extraction pipeline: fetch with TLS fingerprinting → parse → Defuddle extract.\n * Separated from the plugin entry so it can be tested independently.\n */\n\nimport {\n DEFAULT_ACCEPT_HEADER,\n DEFAULT_ACCEPT_LANGUAGE_HEADER,\n DEFAULT_BROWSER,\n DEFAULT_INCLUDE_REPLIES,\n DEFAULT_MAX_CHARS,\n DEFAULT_OS,\n DEFAULT_TIMEOUT_MS,\n} from \"./constants\";\nimport { runtimeDependencies } from \"./dependencies\";\nimport { parseLinkedomHTML } from \"./dom\";\nimport { markdownToText, truncateContent } from \"./format\";\nimport { getLatestChromeProfile as getLatestChromeProfileFrom } from \"./profiles\";\nimport type {\n FetchDependencies,\n FetchError,\n FetchOptions,\n FetchResult,\n OutputFormat,\n} from \"./types\";\n\nexport {\n DEFAULT_BROWSER,\n DEFAULT_INCLUDE_REPLIES,\n DEFAULT_MAX_CHARS,\n DEFAULT_OS,\n DEFAULT_TIMEOUT_MS,\n} from \"./constants\";\nexport type {\n FetchError,\n FetchOptions,\n FetchResult,\n OutputFormat,\n} from \"./types\";\n\nconst HTML_CONTENT_TYPES = [\n \"text/html\",\n \"application/xhtml+xml\",\n \"text/plain\",\n \"text/markdown\",\n];\n\nexport function getLatestChromeProfile(): string {\n return getLatestChromeProfileFrom(runtimeDependencies.getProfiles);\n}\n\nexport function createDefuddleFetch(\n dependencies: FetchDependencies = runtimeDependencies,\n) {\n return async function defuddleFetch(\n opts: FetchOptions,\n ): Promise<FetchResult | FetchError> {\n const browser = opts.browser ?? DEFAULT_BROWSER;\n const os = opts.os ?? DEFAULT_OS;\n const format: OutputFormat = opts.format ?? \"markdown\";\n const maxChars = opts.maxChars ?? DEFAULT_MAX_CHARS;\n const removeImages = opts.removeImages ?? false;\n const includeReplies = opts.includeReplies ?? DEFAULT_INCLUDE_REPLIES;\n const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;\n\n let parsed: URL;\n try {\n parsed = new URL(opts.url);\n } catch {\n return { error: `Invalid URL: ${opts.url}` };\n }\n\n if (![\"http:\", \"https:\"].includes(parsed.protocol)) {\n return {\n error: `Only http/https URLs supported, got ${parsed.protocol}`,\n };\n }\n\n const fetchOptions: Record<string, unknown> = {\n browser,\n os,\n headers: {\n Accept: DEFAULT_ACCEPT_HEADER,\n \"Accept-Language\": DEFAULT_ACCEPT_LANGUAGE_HEADER,\n ...opts.headers,\n },\n redirect: \"follow\",\n timeout: timeoutMs,\n };\n\n if (opts.proxy) {\n fetchOptions.proxy = opts.proxy;\n }\n\n const response = await dependencies.fetch(opts.url, fetchOptions);\n\n if (!response.ok) {\n return {\n error: `HTTP ${response.status} ${response.statusText} for ${opts.url}`,\n };\n }\n\n const finalUrl = response.url ?? opts.url;\n const contentType = response.headers.get(\"content-type\") ?? \"\";\n\n if (!HTML_CONTENT_TYPES.some((value) => contentType.includes(value))) {\n return { error: `Not an HTML page (content-type: ${contentType})` };\n }\n\n const html = await response.text();\n const document = parseLinkedomHTML(html, finalUrl);\n const extracted = await dependencies.defuddle(document, finalUrl, {\n markdown: format !== \"html\",\n removeImages,\n includeReplies,\n });\n\n if (!extracted.content || extracted.wordCount === 0) {\n return {\n error: `No content extracted from ${opts.url}. May need JS rendering or is blocked.`,\n };\n }\n\n const normalizedContent =\n format === \"text\" ? markdownToText(extracted.content) : extracted.content;\n\n return {\n url: opts.url,\n finalUrl,\n title: extracted.title ?? \"\",\n author: extracted.author ?? \"\",\n published: extracted.published ?? \"\",\n site: extracted.site ?? \"\",\n language: extracted.language ?? \"\",\n wordCount: extracted.wordCount,\n content: truncateContent(normalizedContent, maxChars),\n browser,\n os,\n };\n };\n}\n\nexport const defuddleFetch = createDefuddleFetch();\n\n/** Type guard: check if result is an error. */\nexport function isError(\n result: FetchResult | FetchError,\n): result is FetchError {\n return \"error\" in result;\n}\n","import { type TSchema, Type } from \"@sinclair/typebox\";\nimport {\n DEFAULT_BROWSER,\n DEFAULT_INCLUDE_REPLIES,\n DEFAULT_MAX_CHARS,\n DEFAULT_OS,\n DEFAULT_TIMEOUT_MS,\n} from \"./constants\";\nimport { defuddleFetch } from \"./extract\";\nimport type {\n FetchError,\n FetchResult,\n FetchToolConfig,\n FetchToolDefaults,\n} from \"./types\";\n\nexport function resolveFetchToolDefaults(\n config: FetchToolConfig = {},\n): FetchToolDefaults {\n return {\n maxChars: config.maxChars ?? DEFAULT_MAX_CHARS,\n timeoutMs: config.timeoutMs ?? DEFAULT_TIMEOUT_MS,\n browser: config.browser ?? DEFAULT_BROWSER,\n os: config.os ?? DEFAULT_OS,\n removeImages: config.removeImages ?? false,\n includeReplies: config.includeReplies ?? DEFAULT_INCLUDE_REPLIES,\n };\n}\n\nexport function createBaseFetchToolParameterProperties(\n defaults: FetchToolDefaults,\n): Record<string, TSchema> {\n return {\n url: Type.String({ description: \"URL to fetch (http/https only)\" }),\n browser: Type.Optional(\n Type.String({\n description: `Browser profile for TLS fingerprinting. Default: \"${defaults.browser}\". Examples: chrome_145, firefox_147, safari_26, edge_145, opera_127`,\n }),\n ),\n os: Type.Optional(\n Type.String({\n description: `OS profile for fingerprinting. Default: \"${defaults.os}\". Options: windows, macos, linux, android, ios`,\n }),\n ),\n headers: Type.Optional(\n Type.Record(Type.String(), Type.String(), {\n description:\n \"Custom HTTP headers to send. By default, Accept and Accept-Language are set automatically.\",\n }),\n ),\n maxChars: Type.Optional(\n Type.Number({\n description: `Maximum characters to return. Default: ${defaults.maxChars}`,\n }),\n ),\n format: Type.Optional(\n Type.Union(\n [Type.Literal(\"markdown\"), Type.Literal(\"html\"), Type.Literal(\"text\")],\n {\n description:\n 'Output format. \"markdown\" (default), \"html\" (cleaned HTML), or \"text\" (plain text, no formatting)',\n },\n ),\n ),\n removeImages: Type.Optional(\n Type.Boolean({\n description: \"Strip image references from output. Default: false\",\n }),\n ),\n includeReplies: Type.Optional(\n Type.Union([Type.Boolean(), Type.Literal(\"extractors\")], {\n description:\n \"Include replies/comments: 'extractors' for site-specific only (default), true for all, false for none\",\n }),\n ),\n proxy: Type.Optional(\n Type.String({\n description:\n \"Proxy URL (http://user:pass@host:port or socks5://host:port)\",\n }),\n ),\n };\n}\n\nexport async function executeFetchToolCall(\n params: Record<string, unknown>,\n defaults: FetchToolDefaults,\n): Promise<FetchResult | FetchError> {\n return defuddleFetch({\n url: params.url as string,\n browser: (params.browser as string) ?? defaults.browser,\n os: (params.os as string) ?? defaults.os,\n headers: params.headers as Record<string, string> | undefined,\n maxChars: (params.maxChars as number) ?? defaults.maxChars,\n format: (params.format as \"markdown\" | \"html\" | \"text\") ?? \"markdown\",\n removeImages: (params.removeImages as boolean) ?? defaults.removeImages,\n includeReplies:\n (params.includeReplies as boolean | \"extractors\") ??\n defaults.includeReplies,\n proxy: params.proxy as string | undefined,\n timeoutMs: defaults.timeoutMs,\n });\n}\n","import { readFile } from \"node:fs/promises\";\nimport { join } from \"node:path\";\nimport { getAgentDir } from \"@mariozechner/pi-coding-agent\";\n\ninterface PiWebFetchSettings {\n webFetchVerboseByDefault?: boolean;\n webFetchDefaultMaxChars?: number;\n}\n\nexport interface ResolvedPiWebFetchSettings {\n verboseByDefault: boolean;\n defaultMaxChars?: number;\n}\n\nfunction normalizePiWebFetchSettings(input: unknown): PiWebFetchSettings {\n if (!input || typeof input !== \"object\") return {};\n\n const source = input as Record<string, unknown>;\n const settings: PiWebFetchSettings = {};\n\n if (typeof source.webFetchVerboseByDefault === \"boolean\") {\n settings.webFetchVerboseByDefault = source.webFetchVerboseByDefault;\n }\n\n if (\n typeof source.webFetchDefaultMaxChars === \"number\" &&\n Number.isFinite(source.webFetchDefaultMaxChars) &&\n source.webFetchDefaultMaxChars > 0\n ) {\n settings.webFetchDefaultMaxChars = source.webFetchDefaultMaxChars;\n }\n\n return settings;\n}\n\nexport function resolvePiWebFetchSettings(\n globalSettings: unknown,\n projectSettings: unknown,\n): ResolvedPiWebFetchSettings {\n const global = normalizePiWebFetchSettings(globalSettings);\n const project = normalizePiWebFetchSettings(projectSettings);\n\n return {\n verboseByDefault:\n project.webFetchVerboseByDefault ??\n global.webFetchVerboseByDefault ??\n false,\n defaultMaxChars:\n project.webFetchDefaultMaxChars ?? global.webFetchDefaultMaxChars,\n };\n}\n\nasync function readSettingsFile(path: string): Promise<unknown> {\n try {\n return JSON.parse(await readFile(path, \"utf-8\"));\n } catch {\n return {};\n }\n}\n\nexport async function loadPiWebFetchSettings(\n cwd: string,\n agentDir = getAgentDir(),\n): Promise<ResolvedPiWebFetchSettings> {\n const globalSettings = await readSettingsFile(\n join(agentDir, \"settings.json\"),\n );\n const projectSettings = await readSettingsFile(\n join(cwd, \".pi\", \"settings.json\"),\n );\n\n return resolvePiWebFetchSettings(globalSettings, projectSettings);\n}\n","import { type ExtensionAPI, getAgentDir } from \"@mariozechner/pi-coding-agent\";\nimport { Type } from \"@sinclair/typebox\";\nimport {\n buildFetchResponseText,\n createBaseFetchToolParameterProperties,\n executeFetchToolCall,\n isError,\n resolveFetchToolDefaults,\n} from \"smart-fetch-core\";\nimport { loadPiWebFetchSettings } from \"./settings\";\n\nconst toolDescription = [\n \"Fetch a URL with browser-grade TLS fingerprinting and extract clean, readable content.\",\n \"Uses wreq-js for browser-like TLS/HTTP2 impersonation and Defuddle for article extraction.\",\n \"Supports the same fetch parameters as the OpenClaw tool, plus an optional verbose flag.\",\n \"Does NOT execute JavaScript — use a browser automation tool for JS-heavy pages.\",\n].join(\" \");\n\nexport default function piSmartFetchExtension(pi: ExtensionAPI) {\n const defaults = resolveFetchToolDefaults();\n\n pi.registerTool({\n name: \"web_fetch\",\n label: \"web_fetch\",\n description: toolDescription,\n promptSnippet:\n \"web_fetch(url, browser?, os?, headers?, maxChars?, format?, removeImages?, includeReplies?, proxy?, verbose?): fetch browser-fingerprinted readable web content\",\n parameters: Type.Object({\n ...createBaseFetchToolParameterProperties(defaults),\n verbose: Type.Optional(\n Type.Boolean({\n description:\n \"Include the full metadata header (site, language, word count, browser fingerprint info). Default: false, or webFetchVerboseByDefault from pi settings.\",\n }),\n ),\n }),\n\n async execute(_toolCallId, params, _signal, _onUpdate, ctx) {\n const settings = await loadPiWebFetchSettings(ctx.cwd, getAgentDir());\n const runtimeDefaults = resolveFetchToolDefaults({\n maxChars: settings.defaultMaxChars,\n });\n const verbose =\n (params.verbose as boolean | undefined) ?? settings.verboseByDefault;\n const result = await executeFetchToolCall(params, runtimeDefaults);\n\n if (isError(result)) {\n return {\n content: [{ type: \"text\", text: `Error: ${result.error}` }],\n details: { error: true, verbose },\n };\n }\n\n return {\n content: [\n { type: \"text\", text: buildFetchResponseText(result, { verbose }) },\n ],\n details: { verbose, maxChars: runtimeDefaults.maxChars },\n };\n },\n });\n}\n"]}
package/package.json ADDED
@@ -0,0 +1,68 @@
1
+ {
2
+ "name": "pi-smart-fetch",
3
+ "version": "0.1.3",
4
+ "type": "module",
5
+ "description": "pi.dev smart fetch extension with browser-grade TLS fingerprinting and Defuddle extraction.",
6
+ "license": "MIT",
7
+ "author": "Thinkscape",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "git+https://github.com/Thinkscape/agent-smart-fetch.git"
11
+ },
12
+ "homepage": "https://github.com/Thinkscape/agent-smart-fetch#readme",
13
+ "bugs": {
14
+ "url": "https://github.com/Thinkscape/agent-smart-fetch/issues"
15
+ },
16
+ "keywords": [
17
+ "pi-package",
18
+ "pi",
19
+ "smart-fetch",
20
+ "defuddle",
21
+ "wreq-js",
22
+ "content-extraction",
23
+ "web-fetch",
24
+ "tls-fingerprinting"
25
+ ],
26
+ "main": "./dist/index.js",
27
+ "types": "./dist/index.d.ts",
28
+ "exports": {
29
+ ".": {
30
+ "types": "./dist/index.d.ts",
31
+ "import": "./dist/index.js"
32
+ }
33
+ },
34
+ "files": [
35
+ "dist/",
36
+ "README.md",
37
+ "LICENSE"
38
+ ],
39
+ "engines": {
40
+ "bun": ">=1.3.0",
41
+ "node": ">=22"
42
+ },
43
+ "publishConfig": {
44
+ "access": "public"
45
+ },
46
+ "pi": {
47
+ "extensions": [
48
+ "./dist/index.js"
49
+ ]
50
+ },
51
+ "scripts": {
52
+ "clean": "rm -rf dist",
53
+ "build": "bun run clean && bunx tsup --config tsup.config.ts",
54
+ "test": "bun test test/unit",
55
+ "typecheck": "bunx tsc -p tsconfig.json",
56
+ "check": "bun run test && bun run build && bun run typecheck",
57
+ "pack:dry-run": "bun run build && npm pack --dry-run"
58
+ },
59
+ "dependencies": {
60
+ "@sinclair/typebox": "^0.34.49",
61
+ "defuddle": "^0.14.0",
62
+ "linkedom": "^0.18.12",
63
+ "wreq-js": "^2.2.2"
64
+ },
65
+ "peerDependencies": {
66
+ "@mariozechner/pi-coding-agent": "*"
67
+ }
68
+ }