skyloom 1.14.6 → 1.14.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/publish.yml +27 -0
- package/README.md +523 -220
- package/dist/tools/builtin.d.ts.map +1 -1
- package/dist/tools/builtin.js +172 -24
- package/dist/tools/builtin.js.map +1 -1
- package/package.json +68 -68
- package/src/tools/builtin.ts +169 -20
package/src/tools/builtin.ts
CHANGED
|
@@ -88,6 +88,145 @@ export function fenceCheck(resolvedPath: string): string | null {
|
|
|
88
88
|
return `Error: 路径越界 — 工作区围栏已启用 (SKYLOOM_WORKSPACE_FENCE=1),'${resolvedPath}' 在根目录 '${root}' 之外。`;
|
|
89
89
|
}
|
|
90
90
|
|
|
91
|
+
/* ── Web search helpers ───────────────────────────────────────────────────
|
|
92
|
+
Multi-engine fallback. DuckDuckGo's Instant Answer JSON API only returns
|
|
93
|
+
"abstracts" and is blank for ~90% of real queries; HTML scraping is what
|
|
94
|
+
actually works. In CN networks, DDG/Bing may be unreachable — Baidu/Sogou
|
|
95
|
+
serve as fallbacks. Each parser is intentionally tolerant: HTML changes
|
|
96
|
+
over time, so we extract loosely and let the engine list provide redundancy.
|
|
97
|
+
────────────────────────────────────────────────────────────────────────── */
|
|
98
|
+
interface SearchResult { title: string; url: string; snippet: string }
|
|
99
|
+
|
|
100
|
+
const SEARCH_UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36';
|
|
101
|
+
|
|
102
|
+
async function fetchHtml(url: string, timeoutMs = 12000): Promise<string> {
|
|
103
|
+
const controller = new AbortController();
|
|
104
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
105
|
+
try {
|
|
106
|
+
const res = await fetch(url, {
|
|
107
|
+
headers: {
|
|
108
|
+
'User-Agent': SEARCH_UA,
|
|
109
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
110
|
+
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
111
|
+
},
|
|
112
|
+
signal: controller.signal,
|
|
113
|
+
});
|
|
114
|
+
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
|
115
|
+
return await res.text();
|
|
116
|
+
} finally {
|
|
117
|
+
clearTimeout(timer);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function decodeHtmlEntities(s: string): string {
|
|
122
|
+
return s
|
|
123
|
+
.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>')
|
|
124
|
+
.replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, ' ')
|
|
125
|
+
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
|
|
126
|
+
.replace(/&#x([0-9a-f]+);/gi, (_, n) => String.fromCharCode(parseInt(n, 16)));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function stripTags(s: string): string {
|
|
130
|
+
return decodeHtmlEntities(s.replace(/<[^>]+>/g, '')).replace(/\s+/g, ' ').trim();
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function unwrapDdgRedirect(href: string): string {
|
|
134
|
+
// DuckDuckGo HTML wraps results in /l/?uddg=<encoded-url>
|
|
135
|
+
const m = href.match(/[?&]uddg=([^&]+)/);
|
|
136
|
+
if (m) { try { return decodeURIComponent(m[1]); } catch { /* fall through */ } }
|
|
137
|
+
if (href.startsWith('//')) return 'https:' + href;
|
|
138
|
+
return href;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function unwrapBaiduRedirect(href: string): string {
|
|
142
|
+
// Baidu uses opaque /link?url=... redirects; we can't resolve without another request.
|
|
143
|
+
// Return as-is; consumer can still click through.
|
|
144
|
+
return href;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
async function searchDuckDuckGo(query: string, max: number): Promise<SearchResult[]> {
|
|
148
|
+
const html = await fetchHtml(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`);
|
|
149
|
+
const results: SearchResult[] = [];
|
|
150
|
+
const re = /<a[^>]+class="[^"]*result__a[^"]*"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?<a[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
151
|
+
let m: RegExpExecArray | null;
|
|
152
|
+
while ((m = re.exec(html)) && results.length < max) {
|
|
153
|
+
results.push({ url: unwrapDdgRedirect(m[1]), title: stripTags(m[2]), snippet: stripTags(m[3]) });
|
|
154
|
+
}
|
|
155
|
+
return results;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async function searchBing(query: string, max: number): Promise<SearchResult[]> {
|
|
159
|
+
const html = await fetchHtml(`https://www.bing.com/search?q=${encodeURIComponent(query)}&setlang=zh-cn`);
|
|
160
|
+
const results: SearchResult[] = [];
|
|
161
|
+
const liRe = /<li class="b_algo"[\s\S]*?<\/li>/gi;
|
|
162
|
+
const items = html.match(liRe) || [];
|
|
163
|
+
for (const item of items) {
|
|
164
|
+
if (results.length >= max) break;
|
|
165
|
+
const a = item.match(/<h2[^>]*>\s*<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i);
|
|
166
|
+
if (!a) continue;
|
|
167
|
+
const snipMatch =
|
|
168
|
+
item.match(/<p class="b_lineclamp[^"]*"[^>]*>([\s\S]*?)<\/p>/i) ||
|
|
169
|
+
item.match(/<div class="b_caption"[\s\S]*?<p[^>]*>([\s\S]*?)<\/p>/i) ||
|
|
170
|
+
item.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
171
|
+
results.push({ url: a[1], title: stripTags(a[2]), snippet: snipMatch ? stripTags(snipMatch[1]) : '' });
|
|
172
|
+
}
|
|
173
|
+
return results;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
async function searchBaidu(query: string, max: number): Promise<SearchResult[]> {
|
|
177
|
+
const html = await fetchHtml(`https://www.baidu.com/s?wd=${encodeURIComponent(query)}`);
|
|
178
|
+
const results: SearchResult[] = [];
|
|
179
|
+
// Baidu nests divs aggressively; anchor on <h3> ... <a href>...</a> and look
|
|
180
|
+
// for the nearest abstract block following.
|
|
181
|
+
const re = /<h3[^>]*>[\s\S]{0,500}?<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
182
|
+
let m: RegExpExecArray | null;
|
|
183
|
+
while ((m = re.exec(html)) && results.length < max) {
|
|
184
|
+
const url = unwrapBaiduRedirect(m[1]);
|
|
185
|
+
const title = stripTags(m[2]);
|
|
186
|
+
if (!title || !/^https?:\/\//.test(url)) continue;
|
|
187
|
+
const after = html.slice(re.lastIndex, re.lastIndex + 4000);
|
|
188
|
+
const snipMatch =
|
|
189
|
+
after.match(/<span class="content-right[^"]*"[^>]*>([\s\S]*?)<\/span>/i) ||
|
|
190
|
+
after.match(/<div class="c-abstract[^"]*"[^>]*>([\s\S]*?)<\/div>/i) ||
|
|
191
|
+
after.match(/<span[^>]*content[^"]*"[^>]*>([\s\S]{20,400}?)<\/span>/i) ||
|
|
192
|
+
after.match(/<p[^>]*>([\s\S]{20,400}?)<\/p>/i);
|
|
193
|
+
results.push({ url, title, snippet: snipMatch ? stripTags(snipMatch[1]) : '' });
|
|
194
|
+
}
|
|
195
|
+
return results;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async function searchSogou(query: string, max: number): Promise<SearchResult[]> {
|
|
199
|
+
const html = await fetchHtml(`https://www.sogou.com/web?query=${encodeURIComponent(query)}`);
|
|
200
|
+
const results: SearchResult[] = [];
|
|
201
|
+
const divRe = /<div[^>]+class="vrwrap"[\s\S]*?(?=<div[^>]+class="vrwrap"|$)/gi;
|
|
202
|
+
const items = html.match(divRe) || [];
|
|
203
|
+
for (const item of items) {
|
|
204
|
+
if (results.length >= max) break;
|
|
205
|
+
const a = item.match(/<h3[^>]*>[\s\S]*?<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i);
|
|
206
|
+
if (!a) continue;
|
|
207
|
+
let url = a[1];
|
|
208
|
+
if (url.startsWith('/link?')) url = 'https://www.sogou.com' + url;
|
|
209
|
+
const snipMatch =
|
|
210
|
+
item.match(/<div[^>]+class="(?:str_info|fz-mid|space-txt)[^"]*"[^>]*>([\s\S]*?)<\/div>/i) ||
|
|
211
|
+
item.match(/<p[^>]*>([\s\S]{20,400}?)<\/p>/i);
|
|
212
|
+
results.push({ url, title: stripTags(a[2]), snippet: snipMatch ? stripTags(snipMatch[1]) : '' });
|
|
213
|
+
}
|
|
214
|
+
return results;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async function runSearchEngine(engine: string, query: string, max: number): Promise<SearchResult[]> {
|
|
218
|
+
let results: SearchResult[];
|
|
219
|
+
switch (engine) {
|
|
220
|
+
case 'duckduckgo': case 'ddg': results = await searchDuckDuckGo(query, max); break;
|
|
221
|
+
case 'bing': results = await searchBing(query, max); break;
|
|
222
|
+
case 'baidu': results = await searchBaidu(query, max); break;
|
|
223
|
+
case 'sogou': results = await searchSogou(query, max); break;
|
|
224
|
+
default: throw new Error(`unknown search engine: ${engine}`);
|
|
225
|
+
}
|
|
226
|
+
// Drop placeholder/JS-anchor entries from inline answer cards.
|
|
227
|
+
return results.filter((r) => r.title && /^https?:\/\//i.test(r.url));
|
|
228
|
+
}
|
|
229
|
+
|
|
91
230
|
/**
|
|
92
231
|
* Register all built-in tools into the given registry.
|
|
93
232
|
*/
|
|
@@ -295,33 +434,43 @@ export function registerBuiltinTools(registry: ToolRegistry): void {
|
|
|
295
434
|
|
|
296
435
|
registry.register({
|
|
297
436
|
name: 'web_search',
|
|
298
|
-
description: 'Search the web for information. Returns search results with titles and snippets.',
|
|
437
|
+
description: 'Search the web for information. Returns search results with titles, URLs and snippets.',
|
|
299
438
|
parameters: [
|
|
300
439
|
{ name: 'query', type: 'string', description: 'Search query', required: true },
|
|
440
|
+
{ name: 'engine', type: 'string', description: 'Optional engine: duckduckgo|bing|baidu|sogou. Default: auto (tries each until one returns results)', required: false },
|
|
441
|
+
{ name: 'max_results', type: 'number', description: 'Max results to return (default 8, capped at 20)', required: false },
|
|
301
442
|
],
|
|
302
443
|
handler: async (params) => {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
444
|
+
const query = String(params.query || '').trim();
|
|
445
|
+
if (!query) return 'Error: query is required';
|
|
446
|
+
const max = Math.max(1, Math.min(20, Math.floor(Number(params.max_results) || 8)));
|
|
447
|
+
const explicit = String(params.engine || '').trim().toLowerCase();
|
|
448
|
+
const envEngine = String(process.env.SKYLOOM_SEARCH_ENGINE || '').trim().toLowerCase();
|
|
449
|
+
const order = explicit
|
|
450
|
+
? [explicit]
|
|
451
|
+
: envEngine
|
|
452
|
+
? [envEngine, 'duckduckgo', 'bing', 'baidu', 'sogou']
|
|
453
|
+
: ['duckduckgo', 'bing', 'baidu', 'sogou'];
|
|
454
|
+
const seen = new Set<string>();
|
|
455
|
+
const tried: string[] = [];
|
|
456
|
+
for (const eng of order) {
|
|
457
|
+
if (seen.has(eng)) continue;
|
|
458
|
+
seen.add(eng);
|
|
459
|
+
tried.push(eng);
|
|
460
|
+
try {
|
|
461
|
+
const results = await runSearchEngine(eng, query, max);
|
|
462
|
+
if (results && results.length > 0) {
|
|
463
|
+
const head = `Search results (${eng}, ${results.length}):`;
|
|
464
|
+
const body = results
|
|
465
|
+
.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}${r.snippet ? `\n ${r.snippet}` : ''}`)
|
|
466
|
+
.join('\n');
|
|
467
|
+
return `${head}\n${body}`;
|
|
319
468
|
}
|
|
469
|
+
} catch (e: any) {
|
|
470
|
+
log.warn('web_search_engine_failed', { engine: eng, error: String(e?.message || e) });
|
|
320
471
|
}
|
|
321
|
-
return results.length > 0 ? results.join('\n') : 'No search results found.';
|
|
322
|
-
} catch (e) {
|
|
323
|
-
return `Search error: ${e}`;
|
|
324
472
|
}
|
|
473
|
+
return `No search results found (tried: ${tried.join(', ')}). Set SKYLOOM_SEARCH_ENGINE to pin an engine, or try a different query.`;
|
|
325
474
|
},
|
|
326
475
|
});
|
|
327
476
|
|