@0xkobold/pi-web 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/search.js ADDED
@@ -0,0 +1,322 @@
1
+ /**
2
+ * pi-web - Search and Content Extraction
3
+ *
4
+ * Internal search and fetch utilities. No framework dependencies.
5
+ */
6
+ // ═════════════════════════════════════════════════════════════════════════════
7
+ // Content Extraction (Cascade)
8
+ // ═════════════════════════════════════════════════════════════════════════════
9
+ /**
10
+ * Fast fetch for simple HTML sites
11
+ */
12
+ async function fastFetch(url, maxLength) {
13
+ try {
14
+ const controller = new AbortController();
15
+ const timeout = setTimeout(() => controller.abort(), 10000);
16
+ const response = await fetch(url, {
17
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; Bot/1.0)' },
18
+ signal: controller.signal
19
+ });
20
+ clearTimeout(timeout);
21
+ if (!response.ok)
22
+ return null;
23
+ const html = await response.text();
24
+ const title = html.match(/<title[^>]*>(.*?)<\/title>/i)?.[1]?.trim() || "Untitled";
25
+ const content = html
26
+ .replace(/<script[^>]*>.*?<\/script>/gi, '')
27
+ .replace(/<style[^>]*>.*?<\/style>/gi, '')
28
+ .replace(/<[^>]*>/g, ' ')
29
+ .replace(/\s+/g, ' ')
30
+ .trim()
31
+ .slice(0, maxLength);
32
+ if (content.length < 200)
33
+ return null;
34
+ return { content, title, method: 'fast', url };
35
+ }
36
+ catch {
37
+ return null;
38
+ }
39
+ }
40
+ /**
41
+ * Readability-style extraction using regex
42
+ */
43
+ async function readabilityFetch(url, maxLength) {
44
+ try {
45
+ const response = await fetch(url, {
46
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; Bot/1.0)' }
47
+ });
48
+ if (!response.ok)
49
+ return null;
50
+ const html = await response.text();
51
+ const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
52
+ const mainMatch = html.match(/<main[^>]*>([\s\S]*?)<\/main>/i);
53
+ const contentDiv = html.match(/<div[^>]*class="[^"]*(?:content|article|post)[^"]*"[^>]*>([\s\S]*?)<\/div>/i);
54
+ const rawContent = articleMatch?.[1] || mainMatch?.[1] || contentDiv?.[1];
55
+ if (!rawContent)
56
+ return null;
57
+ const content = rawContent
58
+ .replace(/<script[^>]*>.*?<\/script>/gi, '')
59
+ .replace(/<style[^>]*>.*?<\/style>/gi, '')
60
+ .replace(/<[^>]*>/g, ' ')
61
+ .replace(/\s+/g, ' ')
62
+ .trim()
63
+ .slice(0, maxLength);
64
+ const title = html.match(/<title[^>]*>(.*?)<\/title>/i)?.[1]?.trim() || "Untitled";
65
+ if (content.length < 200)
66
+ return null;
67
+ return { content, title, method: 'readability', url };
68
+ }
69
+ catch {
70
+ return null;
71
+ }
72
+ }
73
+ // ═════════════════════════════════════════════════════════════════════════════
74
+ // Playwright Browser Manager
75
+ // ═════════════════════════════════════════════════════════════════════════════
76
+ class BrowserManager {
77
+ browser = null;
78
+ context = null;
79
+ lastUsed = 0;
80
+ POOL_TTL_MS = 120000;
81
+ async getBrowser() {
82
+ const { chromium } = await import('playwright');
83
+ if (this.browser && Date.now() - this.lastUsed < this.POOL_TTL_MS) {
84
+ try {
85
+ await this.browser.contexts();
86
+ this.lastUsed = Date.now();
87
+ return { browser: this.browser, context: this.context, newBrowser: false };
88
+ }
89
+ catch {
90
+ await this.close();
91
+ }
92
+ }
93
+ this.browser = await chromium.launch({
94
+ headless: true,
95
+ args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--no-first-run']
96
+ });
97
+ this.context = await this.browser.newContext({
98
+ viewport: { width: 1280, height: 720 },
99
+ userAgent: 'Mozilla/5.0 (compatible; 0xKobold/0.1)'
100
+ });
101
+ this.lastUsed = Date.now();
102
+ return { browser: this.browser, context: this.context, newBrowser: true };
103
+ }
104
+ async close() {
105
+ if (this.browser) {
106
+ try {
107
+ await this.browser.close();
108
+ }
109
+ catch { /* ignore */ }
110
+ this.browser = null;
111
+ this.context = null;
112
+ }
113
+ }
114
+ }
115
+ const browserManager = new BrowserManager();
116
+ const requestQueue = [];
117
+ let isProcessing = false;
118
+ const MAX_CONCURRENT = 2;
119
+ const MAX_RETRIES = 3;
120
+ async function processQueue() {
121
+ if (isProcessing)
122
+ return;
123
+ isProcessing = true;
124
+ while (requestQueue.length > 0) {
125
+ const batch = requestQueue.splice(0, MAX_CONCURRENT);
126
+ await Promise.all(batch.map(req => processRequest(req)));
127
+ if (requestQueue.length > 0) {
128
+ await new Promise(r => setTimeout(r, 500));
129
+ }
130
+ }
131
+ isProcessing = false;
132
+ }
133
+ async function processRequest(req, attempt = 1) {
134
+ try {
135
+ const result = await playwrightFetchWithTimeout(req.url, req.maxLength, req.timeoutMs);
136
+ req.resolve(result);
137
+ }
138
+ catch {
139
+ if (attempt < MAX_RETRIES) {
140
+ const delay = Math.min(2000 * Math.pow(2, attempt - 1), 30000);
141
+ await new Promise(r => setTimeout(r, delay));
142
+ await processRequest(req, attempt + 1);
143
+ }
144
+ else {
145
+ req.resolve(null);
146
+ }
147
+ }
148
+ }
149
+ async function playwrightFetchWithTimeout(url, maxLength, timeoutMs = 15000) {
150
+ const controller = new AbortController();
151
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs + 2000);
152
+ try {
153
+ const { context, newBrowser } = await browserManager.getBrowser();
154
+ const page = await context.newPage();
155
+ page.setDefaultTimeout(Math.min(timeoutMs, 10000));
156
+ page.setDefaultNavigationTimeout(Math.min(timeoutMs, 10000));
157
+ controller.signal.addEventListener('abort', () => {
158
+ page.close().catch(() => { });
159
+ });
160
+ try {
161
+ const response = await page.goto(url, {
162
+ waitUntil: 'commit',
163
+ timeout: Math.min(timeoutMs, 10000)
164
+ });
165
+ if (!response)
166
+ throw new Error("No response");
167
+ await page.waitForTimeout(500);
168
+ const extracted = await page.evaluate((maxLen) => {
169
+ const doc = globalThis.document;
170
+ const main = doc.querySelector('main, article, .content, [role="main"]');
171
+ if (main?.innerText?.trim().length > 100) {
172
+ return main.innerText.slice(0, maxLen);
173
+ }
174
+ return doc.body?.innerText?.slice(0, maxLen) || '';
175
+ }, maxLength);
176
+ const title = await page.title().catch(() => 'Untitled');
177
+ if (!extracted || extracted.length < 50)
178
+ throw new Error("Insufficient content");
179
+ return { content: extracted, title, url, method: newBrowser ? 'playwright-new' : 'playwright-pooled' };
180
+ }
181
+ finally {
182
+ await page.close();
183
+ clearTimeout(timeoutId);
184
+ }
185
+ }
186
+ catch (error) {
187
+ throw error;
188
+ }
189
+ }
190
+ /**
191
+ * Playwright fetch with queue-based concurrency
192
+ */
193
+ export async function playwrightFetch(url, maxLength, timeoutMs = 15000) {
194
+ return new Promise((resolve) => {
195
+ requestQueue.push({ url, maxLength, timeoutMs, resolve });
196
+ processQueue();
197
+ });
198
+ }
199
+ // ═════════════════════════════════════════════════════════════════════════════
200
+ // Cascade Fetch
201
+ // ═════════════════════════════════════════════════════════════════════════════
202
+ /**
203
+ * CASCADE: Try all methods in order of speed → quality
204
+ */
205
+ export async function cascadeFetch(url, maxLength = 5000, usePlaywright = false, timeoutMs = 15000) {
206
+ // Level 1: Fast HTML fetch
207
+ if (!usePlaywright) {
208
+ const fast = await fastFetch(url, maxLength);
209
+ if (fast && fast.content.length > 1000)
210
+ return fast;
211
+ }
212
+ // Level 2: Readability extraction
213
+ if (!usePlaywright) {
214
+ const readability = await readabilityFetch(url, maxLength);
215
+ if (readability)
216
+ return readability;
217
+ }
218
+ // Level 3: JavaScript rendering with Playwright
219
+ const pw = await playwrightFetch(url, maxLength, timeoutMs);
220
+ if (pw)
221
+ return pw;
222
+ return null;
223
+ }
224
+ // ═════════════════════════════════════════════════════════════════════════════
225
+ // Search
226
+ // ═════════════════════════════════════════════════════════════════════════════
227
+ export async function searchDuckDuckGo(query, limit) {
228
+ const results = [];
229
+ try {
230
+ const liteUrl = `https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(query)}`;
231
+ const response = await fetch(liteUrl, {
232
+ headers: {
233
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
234
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
235
+ 'Accept-Language': 'en-US,en;q=0.5',
236
+ },
237
+ signal: AbortSignal.timeout(10000),
238
+ });
239
+ if (!response.ok)
240
+ return results;
241
+ const html = await response.text();
242
+ const linkRegex = /href="\/\/duckduckgo\.com\/l\/\?uddg=([^"&]+)/gi;
243
+ const urls = [];
244
+ let match;
245
+ while ((match = linkRegex.exec(html)) && urls.length < limit * 2) {
246
+ try {
247
+ const decoded = decodeURIComponent(match[1]);
248
+ const cleanUrl = decoded.split('&')[0].split('?rut=')[0];
249
+ if (cleanUrl.startsWith('http') && !urls.includes(cleanUrl)) {
250
+ urls.push(cleanUrl);
251
+ }
252
+ }
253
+ catch { /* skip */ }
254
+ }
255
+ const anchorRegex = /<a[^>]*href="[^"]*uddg=[^"]*"[^>]*>([^<]+)<\/a>/gi;
256
+ const titles = [];
257
+ while ((match = anchorRegex.exec(html)) && titles.length < urls.length) {
258
+ const title = match[1].replace(/<[^>]*>/g, '').trim();
259
+ if (title && title.length > 2 && title.length < 200) {
260
+ titles.push(title);
261
+ }
262
+ }
263
+ for (let i = 0; i < Math.min(urls.length, titles.length, limit); i++) {
264
+ results.push({ title: titles[i] || new URL(urls[i]).hostname, url: urls[i], snippet: '' });
265
+ }
266
+ for (let i = results.length; i < Math.min(urls.length, limit); i++) {
267
+ try {
268
+ results.push({ title: new URL(urls[i]).hostname, url: urls[i], snippet: '' });
269
+ }
270
+ catch { /* skip */ }
271
+ }
272
+ }
273
+ catch { /* skip */ }
274
+ return results;
275
+ }
276
+ export async function searchSearX(query, limit, instance) {
277
+ const results = [];
278
+ const searxInstances = instance ? [instance] : [
279
+ "https://search.bus-hit.me",
280
+ "https://search.projectsegfau.ltd",
281
+ "https://searx.foss.family",
282
+ ];
283
+ for (const baseUrl of searxInstances) {
284
+ try {
285
+ const searchUrl = `${baseUrl}/search?q=${encodeURIComponent(query)}&format=json`;
286
+ const response = await fetch(searchUrl, {
287
+ headers: { 'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json' },
288
+ signal: AbortSignal.timeout(8000)
289
+ });
290
+ if (!response.ok)
291
+ continue;
292
+ const data = await response.json();
293
+ if (data.results && data.results.length > 0) {
294
+ for (const r of data.results.slice(0, limit)) {
295
+ results.push({
296
+ title: r.title || "Untitled",
297
+ url: r.url,
298
+ snippet: r.content || r.snippet || ""
299
+ });
300
+ }
301
+ if (results.length >= limit)
302
+ break;
303
+ }
304
+ }
305
+ catch {
306
+ continue;
307
+ }
308
+ }
309
+ return results;
310
+ }
311
+ /**
312
+ * Combined search across multiple engines
313
+ */
314
+ export async function webSearch(query, limit = 5) {
315
+ let results = await searchDuckDuckGo(query, Math.min(limit, 10));
316
+ if (results.length < limit) {
317
+ const searxResults = await searchSearX(query, Math.min(limit, 10));
318
+ results = [...results, ...searxResults].slice(0, limit);
319
+ }
320
+ return results;
321
+ }
322
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAmBH,gFAAgF;AAChF,+BAA+B;AAC/B,gFAAgF;AAEhF;;GAEG;AACH,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,SAAiB;IACrD,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,KAAK,CAAC,CAAC;QAE5D,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE,EAAE,YAAY,EAAE,mCAAmC,EAAE;YAC9D,MAAM,EAAE,UAAU,CAAC,MAAM;SAC1B,CAAC,CAAC;QAEH,YAAY,CAAC,OAAO,CAAC,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC;QAE9B,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,6BAA6B,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC;QAEnF,MAAM,OAAO,GAAG,IAAI;aACjB,OAAO,CAAC,8BAA8B,EAAE,EAAE,CAAC;aAC3C,OAAO,CAAC,4BAA4B,EAAE,EAAE,CAAC;aACzC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;aACxB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,IAAI,EAAE;aACN,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAEvB,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAC;QACtC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAAC,GAAW,EAAE,SAAiB;IAC5D,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE,EAAE,YAAY,EAAE,mCAAmC,EAAE;SAC/D,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC;QAC9B,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEnC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;QACxE,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,6EAA6E,CAAC,CAAC;QAE7G,MAAM,UAAU,GAAG,YAAY,EAAE,CAAC,CAAC,CAAC,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC;QAC1E,IAAI,CAAC,UAAU;YAAE,OAAO,IAAI,CAAC;QAE7B,MAAM,OAAO,GAAG,UAAU;aACvB,OAAO,CAAC,8BAA8B,EAAE,EAAE,CAAC;aAC3C,OAAO,CAAC,4BAA4B,EAAE,EAAE,CAAC;aACzC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;aACxB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,IAAI,EAAE;aACN,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAEvB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,6BAA6B,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC;QACnF,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAC;QAEtC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,GAAG,EAAE,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,gFAAgF;AAChF,6BAA6B;AAC7B,gFAAgF;AAEhF,MAAM,cAAc;IACV,OAAO,GAAQ,IAAI,CAAC;IACpB,OAAO,GAAQ,IAAI,CAAC;IACpB,QAAQ,GAAW,CAAC,CAAC;IACZ,WAAW,GAAG,MAAM,CAAC;IAEtC,KAAK,CAAC,UAAU;QACd,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;QAEhD,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;YAClE,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC;gBAC9B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBAC3B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;YAC7E,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC;YACnC,QAAQ,EAAE,IAAI;YACd,IAAI,EAAE,CAAC,cAAc,EAAE,0BAA0B,EAAE,yBAAyB,EAAE,eAAe,EAAE,gBAAgB,CAAC;SACjH,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;YAC3C,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;YACtC,SAAS,EAAE,wCAAwC;SACpD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,IAAI,CAAC;gBAAC,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC;YAC1D,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;YACpB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;CACF;AAED,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAC;AAU5C,MAAM,YAAY,GAAoB,EAAE,CAAC;AACzC,IAAI,YAAY,GAAG,KAAK,CAAC;AACzB,MAAM,cAAc,GAAG,CAAC,CAAC;AACzB,MAAM,WAAW,GAAG,CAAC,CAAC;AAEtB,KAAK,UAAU,YAAY;IACzB,IAAI,YAAY;QAAE,OAAO;IACzB,YAAY,GAAG,IAAI,CAAC;IAEpB,OAAO,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;QACrD,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,YAAY,GAAG,KAAK,CAAC;AACvB,CAAC;AAED,KAAK,UAAU,cAAc,CAAC,GAAkB,EAAE,OAAO,GAAG,CAAC;IAC3D,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,0BAA0B,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;QACvF,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,OAAO,GAAG,WAAW,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YAC/D,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;YAC7C,MAAM,cAAc,CAAC,GAAG,EAAE,OAAO,GAAG,CAAC,CAAC,CAAC;QACzC,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,0BAA0B,CACvC,GAAW,EACX,SAAiB,EACjB,YAAoB,KAAK;IAEzB,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,SAAS,GAAG,IAAI,CAAC,CAAC;IAEzE,IAAI,CAAC;QACH,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,MAAM,cAAc,CAAC,UAAU,EAAE,CAAC;QAClE,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC;QACnD,IAAI,CAAC,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC;QAE7D,UAAU,CAAC,MAAM,CAAC,gBAAgB,CAAC,OAAO,EAAE,GAAG,EAAE;YAC/C,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,QAAQ;gBACnB,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC;aACpC,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ;gBAAE,MAAM,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;YAC9C,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;YAE/B,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC,MAAc,EAAE,EAAE;gBACvD,MAAM,GAAG,GAAI,UAAkB,CAAC,QAAQ,CAAC;gBACzC,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC,wCAAwC,CAAC,CAAC;gBACzE,IAAI,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;oBACzC,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;gBACzC,CAAC;gBACD,OAAO,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,IAAI,EAAE,CAAC;YACrD,CAAC,EAAE,SAAS,CAAC,CAAC;YAEd,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,CAAC;YAEzD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,EAAE;gBAAE,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;YAEjF,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,mBAAmB,EAAE,CAAC;QACzG,CAAC;gBAAS,CAAC;YACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACnB,YAAY,CAAC,SAAS,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,GAAW,EACX,SAAiB,EACjB,YAAoB,KAAK;IAEzB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,YAAY,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,CAAC;QAC1D,YAAY,EAAE,CAAC;IACjB,CAAC,CAAC,CAAC;AACL,CAAC;AAED,gFAAgF;AAChF,gBAAgB;AAChB,gFAAgF;AAEhF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,GAAW,EACX,YAAoB,IAAI,EACxB,gBAAyB,KAAK,EAC9B,YAAoB,KAAK;IAEzB,2BAA2B;IAC3B,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QAC7C,IAAI,IAAI,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI;YAAE,OAAO,IAAI,CAAC;IACtD,CAAC;IAED,kCAAkC;IAClC,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,MAAM,WAAW,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QAC3D,IAAI,WAAW;YAAE,OAAO,WAAW,CAAC;IACtC,CAAC;IAED,gDAAgD;IAChD,MAAM,EAAE,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IAC5D,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC;IAElB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,gFAAgF;AAChF,SAAS;AACT,gFAAgF;AAEhF,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,KAAa,EAAE,KAAa;IACjE,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,uCAAuC,kBAAkB,CAAC,KAAK,CAAC,EAAE,CAAC;QACnF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE;YACpC,OAAO,EAAE;gBACP,YAAY,EAAE,uGAAuG;gBACrH,QAAQ,EAAE,iEAAiE;gBAC3E,iBAAiB,EAAE,gBAAgB;aACpC;YACD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC;SACnC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE;YAAE,OAAO,OAAO,CAAC;QAEjC,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEnC,MAAM,SAAS,GAAG,iDAAiD,CAAC;QACpE,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,IAAI,KAAK,CAAC;QAEV,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;YACjE,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC7C,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzD,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC5D,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;YAAC,MAAM,CAAC,CAAC,UAAU,CAAC,CAAC;QACxB,CAAC;QAED,MAAM,WAAW,GAAG,mDAAmD,CAAC;QACxE,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YACvE,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACtD,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBACpD,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACrE,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC;QAC7F,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACnE,IAAI,CAAC;gBACH,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC;YAChF,CAAC;YAAC,MAAM,CAAC,CAAC,UAAU,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC,CAAC,UAAU,CAAC,CAAC;IAEtB,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,KAAa,EAAE,KAAa,EAAE,QAAiB;IAC/E,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,MAAM,cAAc,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC7C,2BAA2B;QAC3B,kCAAkC;QAClC,2BAA2B;KAC5B,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;QACrC,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,GAAG,OAAO,aAAa,kBAAkB,CAAC,KAAK,CAAC,cAAc,CAAC;YACjF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,SAAS,EAAE;gBACtC,OAAO,EAAE,EAAE,YAAY,EAAE,aAAa,EAAE,QAAQ,EAAE,kBAAkB,EAAE;gBACtE,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC;aAClC,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE;gBAAE,SAAS;YAE3B,MAAM,IAAI,GAAQ,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5C,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC7C,OAAO,CAAC,IAAI,CAAC;wBACX,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,UAAU;wBAC5B,GAAG,EAAE,CAAC,CAAC,GAAG;wBACV,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,IAAI,EAAE;qBACtC,CAAC,CAAC;gBACL,CAAC;gBACD,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;oBAAE,MAAM;YACrC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAa,EAAE,QAAgB,CAAC;IAC9D,IAAI,OAAO,GAAG,MAAM,gBAAgB,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;IACjE,IAAI,OAAO,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;QAC3B,MAAM,YAAY,GAAG,MAAM,WAAW,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;QACnE,OAAO,GAAG,CAAC,GAAG,OAAO,EAAE,GAAG,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAC1D,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@0xkobold/pi-web",
3
+ "version": "0.1.0",
4
+ "description": "Web search and content extraction for pi agents — DuckDuckGo/SearX search, cascade fetching (fast → readability → Playwright), deep research",
5
+ "author": "0xKobold",
6
+ "license": "MIT",
7
+ "keywords": ["pi-package", "pi-extension", "web", "search", "fetch", "scraping", "playwright"],
8
+ "type": "module",
9
+ "main": "dist/index.js",
10
+ "types": "dist/index.d.ts",
11
+ "exports": {
12
+ ".": {
13
+ "import": "./dist/index.js",
14
+ "types": "./dist/index.d.ts"
15
+ }
16
+ },
17
+ "files": [
18
+ "dist/",
19
+ "src/",
20
+ "README.md"
21
+ ],
22
+ "scripts": {
23
+ "build": "tsc",
24
+ "dev": "tsc --watch",
25
+ "test": "bun test",
26
+ "prepublishOnly": "rm -rf dist && tsc"
27
+ },
28
+ "dependencies": {},
29
+ "peerDependencies": {
30
+ "@mariozechner/pi-coding-agent": ">=0.65.0",
31
+ "@sinclair/typebox": ">=0.32.0"
32
+ },
33
+ "optionalDependencies": {
34
+ "playwright": "^1.40.0"
35
+ },
36
+ "devDependencies": {
37
+ "@types/node": "^20.19.37",
38
+ "typescript": "^6.0.2"
39
+ },
40
+ "pi": {
41
+ "extensions": ["./dist/index.js"]
42
+ },
43
+ "repository": {
44
+ "type": "git",
45
+ "url": "https://github.com/0xKobold/pi-web.git"
46
+ }
47
+ }
package/src/index.ts ADDED
@@ -0,0 +1,270 @@
1
+ /**
2
+ * pi-web - Web Search and Content Extraction for Pi Agents
3
+ *
4
+ * Provides web search + advanced content extraction using:
5
+ * 1. Standard fetch() for simple sites
6
+ * 2. Readability-style extraction for articles
7
+ * 3. Playwright for JavaScript-rendered content
8
+ * 4. Cascade strategy: fast → detailed
9
+ *
10
+ * Search backends: DuckDuckGo (default), SearXNG (fallback)
11
+ *
12
+ * Tools:
13
+ * web_fetch - Fetch and extract content from a URL
14
+ * web_search - Search web, optionally fetch content from results
15
+ * web_research - Deep research: search + fetch + synthesize from multiple sources
16
+ *
17
+ * Commands:
18
+ * /deep-fetch <url> - Fetch JS-rendered content
19
+ * /web-search-deep <query> - Search + fetch from top results
20
+ */
21
+
22
+ import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
23
+ import { Type } from "@sinclair/typebox";
24
+
25
+ import {
26
+ cascadeFetch,
27
+ playwrightFetch,
28
+ webSearch,
29
+ type ScrapingResult,
30
+ type WebSearchResult,
31
+ } from "./search.js";
32
+
33
+ export default async function (pi: ExtensionAPI): Promise<void> {
34
+
35
+ // ═══════════════════════════════════════════════════════════════════════════
36
+ // Commands
37
+ // ═══════════════════════════════════════════════════════════════════════════
38
+
39
+ pi.registerCommand("deep-fetch", {
40
+ description: "Fetch JavaScript-rendered content from a URL using Playwright",
41
+ handler: async (args: string, ctx) => {
42
+ const parts = args.split(/\s+/).filter(Boolean);
43
+ const url = parts[0];
44
+ const max = parseInt(parts[1]) || 8000;
45
+
46
+ if (!url?.startsWith("http")) {
47
+ ctx.ui?.notify?.("❌ URL must start with http:// or https://", "error");
48
+ return;
49
+ }
50
+
51
+ ctx.ui?.notify?.(`🔍 Deep fetching: ${url}`, "info");
52
+
53
+ const result = await cascadeFetch(url, max, true);
54
+
55
+ if (!result) {
56
+ ctx.ui?.notify?.("❌ Failed to fetch content", "error");
57
+ return;
58
+ }
59
+
60
+ ctx.ui?.notify?.(
61
+ `📄 ${result.title}\nMethod: ${result.method} | Source: ${result.url}\n─────────────────────────────────────────\n\n${result.content.slice(0, max)}${result.content.length > max ? `\n... (${result.content.length - max} more chars)` : ""}`,
62
+ "info"
63
+ );
64
+ },
65
+ });
66
+
67
+ pi.registerCommand("web-search-deep", {
68
+ description: "Search web + fetch content from top results",
69
+ handler: async (args: string, ctx) => {
70
+ const numResults = 3;
71
+
72
+ ctx.ui?.notify?.(`🔍 Searching + fetching: "${args}"`, "info");
73
+
74
+ const results = await webSearch(args, numResults * 2);
75
+
76
+ if (results.length === 0) {
77
+ ctx.ui?.notify?.("❌ No search results found", "error");
78
+ return;
79
+ }
80
+
81
+ const fetched: ScrapingResult[] = [];
82
+ for (let i = 0; i < Math.min(numResults, results.length); i++) {
83
+ const r = results[i];
84
+ ctx.ui?.notify?.(` Fetching ${i + 1}/${numResults}: ${r.title.slice(0, 50)}...`, "info");
85
+ const content = await cascadeFetch(r.url, 3000);
86
+ if (content) fetched.push(content);
87
+ }
88
+
89
+ const lines = [
90
+ `🔍 Research Results: "${args}"`,
91
+ `Sources: ${fetched.length} / ${results.length} found`,
92
+ "═══════════════════════════════════════════",
93
+ "",
94
+ ];
95
+
96
+ for (let i = 0; i < fetched.length; i++) {
97
+ const f = fetched[i];
98
+ lines.push(`## ${i + 1}. ${f.title}`);
99
+ lines.push(`Source: ${f.url} | Method: ${f.method}`);
100
+ lines.push("");
101
+ lines.push(f.content.slice(0, 2500));
102
+ lines.push("");
103
+ lines.push("─────────────────────────────────────────");
104
+ lines.push("");
105
+ }
106
+
107
+ ctx.ui?.notify?.(lines.join("\n"), "info");
108
+ },
109
+ });
110
+
111
+ // ═══════════════════════════════════════════════════════════════════════════
112
+ // Tools
113
+ // ═══════════════════════════════════════════════════════════════════════════
114
+
115
+ pi.registerTool({
116
+ name: "web_fetch",
117
+ label: "Web Fetch",
118
+ description: "Fetch and extract content from a web page. Uses cascade: fast HTML → readability → Playwright for JS sites.",
119
+ parameters: Type.Object({
120
+ url: Type.String({ description: "Full URL to fetch (must include http:// or https://)" }),
121
+ max_length: Type.Optional(Type.Number({ description: "Maximum characters to retrieve (default: 5000)", default: 5000 })),
122
+ use_playwright: Type.Optional(Type.Boolean({ description: "Force Playwright for JavaScript-rendered content (default: false)", default: false })),
123
+ timeout_ms: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 15000, max: 60000)", default: 15000 })),
124
+ }),
125
+ async execute(_toolCallId: string, params: any, _signal: AbortSignal, _onUpdate: any, _ctx: any) {
126
+ const { url, max_length = 5000, use_playwright = false, timeout_ms = 15000 } = params;
127
+
128
+ if (!url?.startsWith("http")) {
129
+ return {
130
+ content: [{ type: "text", text: "URL must start with http:// or https://" }],
131
+ details: { error: "fetch_failed" } as const,
132
+ };
133
+ }
134
+
135
+ const cappedTimeout = Math.min(timeout_ms, 60000);
136
+ let result: ScrapingResult | null;
137
+
138
+ if (use_playwright) {
139
+ result = await playwrightFetch(url, max_length, cappedTimeout);
140
+ } else {
141
+ result = await cascadeFetch(url, max_length, use_playwright, cappedTimeout);
142
+ }
143
+
144
+ if (!result) {
145
+ return {
146
+ content: [{ type: "text", text: `Failed to fetch content from ${url}` }],
147
+ details: { error: "fetch_failed", url } as any,
148
+ };
149
+ }
150
+
151
+ return {
152
+ content: [{
153
+ type: "text",
154
+ text: `# ${result.title}\n\n${result.content}\n\n[Source: ${result.url} | Method: ${result.method}]`,
155
+ }],
156
+ details: { url, title: result.title, method: result.method, length: result.content.length },
157
+ };
158
+ },
159
+ });
160
+
161
+ pi.registerTool({
162
+ name: "web_search",
163
+ label: "Web Search",
164
+ description: "Search the web using DuckDuckGo and SearX. Optionally fetch content from top results.",
165
+ parameters: Type.Object({
166
+ query: Type.String({ description: "Search query — be specific" }),
167
+ limit: Type.Optional(Type.Number({ description: "Number of results (1-10, default: 5)", default: 5 })),
168
+ fetch_content: Type.Optional(Type.Boolean({ description: "Fetch full content from top results (default: false)", default: false })),
169
+ fetch_sources: Type.Optional(Type.Number({ description: "How many sources to fetch content from if fetch_content is true (default: 3)", default: 3 })),
170
+ }),
171
+ async execute(_toolCallId: string, params: any, _signal: AbortSignal, _onUpdate: any, _ctx: any) {
172
+ const { query, limit = 5, fetch_content = false, fetch_sources = 3 } = params;
173
+
174
+ if (!query) {
175
+ return {
176
+ content: [{ type: "text", text: "Invalid search query" }],
177
+ details: { error: "fetch_failed" } as const,
178
+ };
179
+ }
180
+
181
+ const results = await webSearch(query, Math.min(limit, 10));
182
+
183
+ if (results.length === 0) {
184
+ return {
185
+ content: [{ type: "text", text: "No search results found" }],
186
+ details: { query, error: "no_results" } as any,
187
+ };
188
+ }
189
+
190
+ const basicFormatted = results
191
+ .map((r, i) => `${i + 1}. ${r.title}\n ${r.url}${r.snippet ? "\n " + r.snippet : ""}`)
192
+ .join("\n\n");
193
+
194
+ if (fetch_content) {
195
+ const fetchedContent: string[] = [];
196
+ for (let i = 0; i < Math.min(fetch_sources, results.length); i++) {
197
+ const result = await cascadeFetch(results[i].url, 3000);
198
+ if (result) {
199
+ fetchedContent.push(`## ${result.title}\n${result.content.slice(0, 2500)}...\n[Source: ${result.url}]`);
200
+ }
201
+ }
202
+
203
+ return {
204
+ content: [{
205
+ type: "text",
206
+ text: `Search results for "${query}":\n\n${basicFormatted}\n\nDetailed content from ${fetchedContent.length} sources:\n\n${fetchedContent.join("\n\n---\n\n")}`,
207
+ }],
208
+ details: { query, results: results.length, fetched: fetchedContent.length, urls: results.map(r => r.url) },
209
+ };
210
+ }
211
+
212
+ return {
213
+ content: [{ type: "text", text: `Search results for "${query}":\n\n${basicFormatted}` }],
214
+ details: { query, results: results.length, urls: results.map(r => r.url) },
215
+ };
216
+ },
217
+ });
218
+
219
+ pi.registerTool({
220
+ name: "web_research",
221
+ label: "Web Research",
222
+ description: "Deep research: search + fetch content from multiple sources with synthesis. Best for comprehensive answers.",
223
+ parameters: Type.Object({
224
+ question: Type.String({ description: "The research question" }),
225
+ sources: Type.Optional(Type.Number({ description: "Number of sources to analyze (1-10, default: 5)", default: 5 })),
226
+ }),
227
+ async execute(_toolCallId: string, params: any, _signal: AbortSignal, _onUpdate: any, _ctx: any) {
228
+ const { question, sources = 5 } = params;
229
+
230
+ if (!question) {
231
+ return {
232
+ content: [{ type: "text", text: "Invalid question provided" }],
233
+ details: { error: "fetch_failed" } as const,
234
+ };
235
+ }
236
+
237
+ const searchResults = await webSearch(question, Math.min(sources, 10) * 2);
238
+
239
+ if (searchResults.length === 0) {
240
+ return {
241
+ content: [{ type: "text", text: `Could not find information about: "${question}"` }],
242
+ details: { question, error: "no_results" } as any,
243
+ };
244
+ }
245
+
246
+ const fetched: ScrapingResult[] = [];
247
+ for (let i = 0; i < Math.min(sources, searchResults.length); i++) {
248
+ const result = await cascadeFetch(searchResults[i].url, 3000);
249
+ if (result) fetched.push(result);
250
+ }
251
+
252
+ const summary = fetched.length > 0
253
+ ? `Research on: "${question}"\n\nFound ${fetched.length} relevant sources:\n\n` +
254
+ fetched.map((c, i) => `## ${i + 1}. ${c.title}\n${c.content.slice(0, 2000)}...\n(Source: ${c.url} | Method: ${c.method})`).join("\n\n---\n\n")
255
+ : `Found ${searchResults.length} search results but could not fetch detailed content:\n\n` +
256
+ searchResults.slice(0, sources).map(r => `- ${r.title}: ${r.url}`).join("\n");
257
+
258
+ return {
259
+ content: [{ type: "text", text: summary }],
260
+ details: {
261
+ question,
262
+ sources_found: searchResults.length,
263
+ sources_fetched: fetched.length,
264
+ },
265
+ };
266
+ },
267
+ });
268
+
269
+ console.log("[pi-web] Extension loaded — tools: web_fetch, web_search, web_research");
270
+ }