skyloom 1.15.5 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/command_args.d.ts +74 -0
- package/dist/cli/command_args.d.ts.map +1 -0
- package/dist/cli/command_args.js +129 -0
- package/dist/cli/command_args.js.map +1 -0
- package/dist/cli/loom.d.ts +20 -0
- package/dist/cli/loom.d.ts.map +1 -1
- package/dist/cli/loom.js +202 -24
- package/dist/cli/loom.js.map +1 -1
- package/dist/cli/loom_chat.d.ts.map +1 -1
- package/dist/cli/loom_chat.js +39 -0
- package/dist/cli/loom_chat.js.map +1 -1
- package/dist/core/agent.js +2 -2
- package/dist/core/agent.js.map +1 -1
- package/dist/core/security.d.ts.map +1 -1
- package/dist/core/security.js +1 -0
- package/dist/core/security.js.map +1 -1
- package/dist/core/tool_router.d.ts.map +1 -1
- package/dist/core/tool_router.js +11 -3
- package/dist/core/tool_router.js.map +1 -1
- package/dist/tools/builtin.d.ts.map +1 -1
- package/dist/tools/builtin.js +38 -192
- package/dist/tools/builtin.js.map +1 -1
- package/dist/tools/websearch.d.ts +92 -0
- package/dist/tools/websearch.d.ts.map +1 -0
- package/dist/tools/websearch.js +343 -0
- package/dist/tools/websearch.js.map +1 -0
- package/package.json +1 -1
- package/src/cli/command_args.ts +159 -0
- package/src/cli/loom.ts +155 -17
- package/src/cli/loom_chat.ts +33 -0
- package/src/core/agent.ts +2 -2
- package/src/core/security.ts +1 -0
- package/src/core/tool_router.ts +11 -3
- package/src/tools/builtin.ts +38 -190
- package/src/tools/websearch.ts +368 -0
- package/tests/command_args.test.ts +115 -0
- package/tests/loom.test.ts +74 -0
- package/tests/tool_router.test.ts +15 -0
- package/tests/websearch.test.ts +190 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 联网搜索 · Web search with a provider waterfall.
|
|
3
|
+
*
|
|
4
|
+
* Why this module exists: the old web_search scraped DuckDuckGo/Bing/Baidu/Sogou
|
|
5
|
+
* HTML. Scraping breaks constantly — engines change markup, block bot
|
|
6
|
+
* user-agents, throw CAPTCHAs, and rate-limit — so "search doesn't work" was the
|
|
7
|
+
* norm. This replaces it with a waterfall that prefers reliable JSON APIs and
|
|
8
|
+
* only falls back to scraping as a last resort:
|
|
9
|
+
*
|
|
10
|
+
* 1. Tavily (TAVILY_API_KEY) — purpose-built for LLM agents, returns an answer
|
|
11
|
+
* 2. Brave (BRAVE_API_KEY) — independent index, clean JSON
|
|
12
|
+
* 3. Serper (SERPER_API_KEY) — Google results as JSON
|
|
13
|
+
* 4. SearXNG (SEARXNG_URL) — self-hosted metasearch JSON
|
|
14
|
+
* 5. Jina (keyless) — s.jina.ai, free, LLM-optimized — works with NO setup
|
|
15
|
+
* 6. Scrape (last resort) — the legacy HTML scrapers
|
|
16
|
+
*
|
|
17
|
+
* The headline win: even with zero configuration, Jina's keyless endpoint gives
|
|
18
|
+
* results that actually return — no API key, no scraping fragility. Set any of
|
|
19
|
+
* the API keys above for enterprise-grade reliability and higher rate limits.
|
|
20
|
+
*
|
|
21
|
+
* The HTTP layer is injectable so the orchestration and every parser are
|
|
22
|
+
* unit-testable without a network.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import axios from 'axios';
|
|
26
|
+
|
|
27
|
+
export interface SearchResult {
|
|
28
|
+
title: string;
|
|
29
|
+
url: string;
|
|
30
|
+
snippet: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface SearchResponse {
|
|
34
|
+
provider: string; // which provider produced these results
|
|
35
|
+
results: SearchResult[];
|
|
36
|
+
answer?: string; // direct answer / summary, when the provider offers one
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Minimal HTTP surface — injectable for tests. */
|
|
40
|
+
export interface WebHttp {
|
|
41
|
+
getJson(url: string, opts?: { headers?: Record<string, string>; timeoutMs?: number }): Promise<any>;
|
|
42
|
+
postJson(url: string, body: any, opts?: { headers?: Record<string, string>; timeoutMs?: number }): Promise<any>;
|
|
43
|
+
getText(url: string, opts?: { headers?: Record<string, string>; timeoutMs?: number }): Promise<string>;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36';
|
|
47
|
+
const DEFAULT_TIMEOUT = 15000;
|
|
48
|
+
|
|
49
|
+
/** Default HTTP client backed by axios. */
|
|
50
|
+
export const defaultHttp: WebHttp = {
|
|
51
|
+
async getJson(url, opts) {
|
|
52
|
+
const res = await axios.get(url, {
|
|
53
|
+
headers: { 'User-Agent': UA, Accept: 'application/json', ...(opts?.headers || {}) },
|
|
54
|
+
timeout: opts?.timeoutMs ?? DEFAULT_TIMEOUT,
|
|
55
|
+
maxRedirects: 5,
|
|
56
|
+
validateStatus: (s) => s >= 200 && s < 300,
|
|
57
|
+
});
|
|
58
|
+
return res.data;
|
|
59
|
+
},
|
|
60
|
+
async postJson(url, body, opts) {
|
|
61
|
+
const res = await axios.post(url, body, {
|
|
62
|
+
headers: { 'User-Agent': UA, Accept: 'application/json', 'Content-Type': 'application/json', ...(opts?.headers || {}) },
|
|
63
|
+
timeout: opts?.timeoutMs ?? DEFAULT_TIMEOUT,
|
|
64
|
+
maxRedirects: 5,
|
|
65
|
+
validateStatus: (s) => s >= 200 && s < 300,
|
|
66
|
+
});
|
|
67
|
+
return res.data;
|
|
68
|
+
},
|
|
69
|
+
async getText(url, opts) {
|
|
70
|
+
const res = await axios.get(url, {
|
|
71
|
+
headers: {
|
|
72
|
+
'User-Agent': UA,
|
|
73
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
74
|
+
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
75
|
+
...(opts?.headers || {}),
|
|
76
|
+
},
|
|
77
|
+
timeout: opts?.timeoutMs ?? DEFAULT_TIMEOUT,
|
|
78
|
+
maxRedirects: 5,
|
|
79
|
+
validateStatus: (s) => s >= 200 && s < 300,
|
|
80
|
+
responseType: 'text',
|
|
81
|
+
transformResponse: [(d) => d],
|
|
82
|
+
});
|
|
83
|
+
return res.data as string;
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
/* ── HTML helpers (shared by the scrape provider) ── */
|
|
88
|
+
export function decodeHtmlEntities(s: string): string {
|
|
89
|
+
return s
|
|
90
|
+
.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>')
|
|
91
|
+
.replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, ' ')
|
|
92
|
+
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
|
|
93
|
+
.replace(/&#x([0-9a-f]+);/gi, (_, n) => String.fromCharCode(parseInt(n, 16)));
|
|
94
|
+
}
|
|
95
|
+
export function stripTags(s: string): string {
|
|
96
|
+
return decodeHtmlEntities(s.replace(/<[^>]+>/g, '')).replace(/\s+/g, ' ').trim();
|
|
97
|
+
}
|
|
98
|
+
function unwrapDdgRedirect(href: string): string {
|
|
99
|
+
const m = href.match(/[?&]uddg=([^&]+)/);
|
|
100
|
+
if (m) { try { return decodeURIComponent(m[1]); } catch { /* fall through */ } }
|
|
101
|
+
if (href.startsWith('//')) return 'https:' + href;
|
|
102
|
+
return href;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function clean(results: SearchResult[], max: number): SearchResult[] {
|
|
106
|
+
const seen = new Set<string>();
|
|
107
|
+
const out: SearchResult[] = [];
|
|
108
|
+
for (const r of results) {
|
|
109
|
+
if (!r || !r.title || !/^https?:\/\//i.test(r.url || '')) continue;
|
|
110
|
+
if (seen.has(r.url)) continue;
|
|
111
|
+
seen.add(r.url);
|
|
112
|
+
out.push({ title: r.title.trim(), url: r.url.trim(), snippet: (r.snippet || '').trim() });
|
|
113
|
+
if (out.length >= max) break;
|
|
114
|
+
}
|
|
115
|
+
return out;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/* ════════════════════════════════════════════════════════════
|
|
119
|
+
API providers (preferred — reliable JSON)
|
|
120
|
+
════════════════════════════════════════════════════════════ */
|
|
121
|
+
|
|
122
|
+
async function tavily(http: WebHttp, key: string, query: string, max: number): Promise<SearchResponse> {
|
|
123
|
+
const data = await http.postJson('https://api.tavily.com/search', {
|
|
124
|
+
query, max_results: max, search_depth: 'basic', include_answer: true,
|
|
125
|
+
}, { headers: { Authorization: `Bearer ${key}` } });
|
|
126
|
+
const results = (data?.results || []).map((r: any) => ({
|
|
127
|
+
title: r.title || '', url: r.url || '', snippet: r.content || '',
|
|
128
|
+
}));
|
|
129
|
+
return { provider: 'tavily', results: clean(results, max), answer: data?.answer || undefined };
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
async function brave(http: WebHttp, key: string, query: string, max: number): Promise<SearchResponse> {
|
|
133
|
+
const data = await http.getJson(
|
|
134
|
+
`https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${max}`,
|
|
135
|
+
{ headers: { 'X-Subscription-Token': key, Accept: 'application/json' } },
|
|
136
|
+
);
|
|
137
|
+
const results = (data?.web?.results || []).map((r: any) => ({
|
|
138
|
+
title: r.title || '', url: r.url || '', snippet: r.description || '',
|
|
139
|
+
}));
|
|
140
|
+
return { provider: 'brave', results: clean(results, max) };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async function serper(http: WebHttp, key: string, query: string, max: number): Promise<SearchResponse> {
|
|
144
|
+
const data = await http.postJson('https://google.serper.dev/search',
|
|
145
|
+
{ q: query, num: max },
|
|
146
|
+
{ headers: { 'X-API-KEY': key } });
|
|
147
|
+
const results = (data?.organic || []).map((r: any) => ({
|
|
148
|
+
title: r.title || '', url: r.link || '', snippet: r.snippet || '',
|
|
149
|
+
}));
|
|
150
|
+
const answer = data?.answerBox?.answer || data?.answerBox?.snippet || data?.knowledgeGraph?.description || undefined;
|
|
151
|
+
return { provider: 'serper', results: clean(results, max), answer };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async function searxng(http: WebHttp, baseUrl: string, query: string, max: number): Promise<SearchResponse> {
|
|
155
|
+
const base = baseUrl.replace(/\/+$/, '');
|
|
156
|
+
const data = await http.getJson(
|
|
157
|
+
`${base}/search?q=${encodeURIComponent(query)}&format=json&language=zh-CN`,
|
|
158
|
+
);
|
|
159
|
+
const results = (data?.results || []).map((r: any) => ({
|
|
160
|
+
title: r.title || '', url: r.url || '', snippet: r.content || '',
|
|
161
|
+
}));
|
|
162
|
+
return { provider: 'searxng', results: clean(results, max) };
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
async function jina(http: WebHttp, key: string | undefined, query: string, max: number): Promise<SearchResponse> {
|
|
166
|
+
// s.jina.ai returns the SERP for a query. `X-Respond-With: no-content` skips
|
|
167
|
+
// fetching each page body (faster, fewer tokens — we only want the listing).
|
|
168
|
+
// Keyless works (shared rate pool); a JINA_API_KEY raises the limit.
|
|
169
|
+
const headers: Record<string, string> = { Accept: 'application/json', 'X-Respond-With': 'no-content' };
|
|
170
|
+
if (key) headers.Authorization = `Bearer ${key}`;
|
|
171
|
+
const data = await http.getJson(`https://s.jina.ai/?q=${encodeURIComponent(query)}`, { headers });
|
|
172
|
+
const rows = Array.isArray(data?.data) ? data.data : Array.isArray(data) ? data : [];
|
|
173
|
+
const results = rows.map((r: any) => ({
|
|
174
|
+
title: r.title || '', url: r.url || '', snippet: r.description || r.content || r.snippet || '',
|
|
175
|
+
}));
|
|
176
|
+
return { provider: 'jina', results: clean(results, max) };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/* ════════════════════════════════════════════════════════════
|
|
180
|
+
Scrape provider (last resort — fragile HTML parsing)
|
|
181
|
+
════════════════════════════════════════════════════════════ */
|
|
182
|
+
|
|
183
|
+
async function scrapeDuckDuckGo(http: WebHttp, query: string, max: number): Promise<SearchResult[]> {
|
|
184
|
+
const html = await http.getText(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`);
|
|
185
|
+
const out: SearchResult[] = [];
|
|
186
|
+
const re = /<a[^>]+class="[^"]*result__a[^"]*"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?<a[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
187
|
+
let m: RegExpExecArray | null;
|
|
188
|
+
while ((m = re.exec(html)) && out.length < max) {
|
|
189
|
+
out.push({ url: unwrapDdgRedirect(m[1]), title: stripTags(m[2]), snippet: stripTags(m[3]) });
|
|
190
|
+
}
|
|
191
|
+
return out;
|
|
192
|
+
}
|
|
193
|
+
async function scrapeBing(http: WebHttp, query: string, max: number): Promise<SearchResult[]> {
|
|
194
|
+
const html = await http.getText(`https://www.bing.com/search?q=${encodeURIComponent(query)}&setlang=zh-cn`);
|
|
195
|
+
const out: SearchResult[] = [];
|
|
196
|
+
for (const item of html.match(/<li class="b_algo"[\s\S]*?<\/li>/gi) || []) {
|
|
197
|
+
if (out.length >= max) break;
|
|
198
|
+
const a = item.match(/<h2[^>]*>\s*<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i);
|
|
199
|
+
if (!a) continue;
|
|
200
|
+
const snip = item.match(/<p class="b_lineclamp[^"]*"[^>]*>([\s\S]*?)<\/p>/i) || item.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
201
|
+
out.push({ url: a[1], title: stripTags(a[2]), snippet: snip ? stripTags(snip[1]) : '' });
|
|
202
|
+
}
|
|
203
|
+
return out;
|
|
204
|
+
}
|
|
205
|
+
async function scrapeBaidu(http: WebHttp, query: string, max: number): Promise<SearchResult[]> {
|
|
206
|
+
const html = await http.getText(`https://www.baidu.com/s?wd=${encodeURIComponent(query)}`);
|
|
207
|
+
const out: SearchResult[] = [];
|
|
208
|
+
const re = /<h3[^>]*>[\s\S]{0,500}?<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
209
|
+
let m: RegExpExecArray | null;
|
|
210
|
+
while ((m = re.exec(html)) && out.length < max) {
|
|
211
|
+
const url = m[1]; const title = stripTags(m[2]);
|
|
212
|
+
if (!title || !/^https?:\/\//.test(url)) continue;
|
|
213
|
+
const after = html.slice(re.lastIndex, re.lastIndex + 4000);
|
|
214
|
+
const snip = after.match(/<span class="content-right[^"]*"[^>]*>([\s\S]*?)<\/span>/i)
|
|
215
|
+
|| after.match(/<div class="c-abstract[^"]*"[^>]*>([\s\S]*?)<\/div>/i)
|
|
216
|
+
|| after.match(/<p[^>]*>([\s\S]{20,400}?)<\/p>/i);
|
|
217
|
+
out.push({ url, title, snippet: snip ? stripTags(snip[1]) : '' });
|
|
218
|
+
}
|
|
219
|
+
return out;
|
|
220
|
+
}
|
|
221
|
+
async function scrapeSogou(http: WebHttp, query: string, max: number): Promise<SearchResult[]> {
|
|
222
|
+
const html = await http.getText(`https://www.sogou.com/web?query=${encodeURIComponent(query)}`);
|
|
223
|
+
const out: SearchResult[] = [];
|
|
224
|
+
for (const item of html.match(/<div[^>]+class="vrwrap"[\s\S]*?(?=<div[^>]+class="vrwrap"|$)/gi) || []) {
|
|
225
|
+
if (out.length >= max) break;
|
|
226
|
+
const a = item.match(/<h3[^>]*>[\s\S]*?<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i);
|
|
227
|
+
if (!a) continue;
|
|
228
|
+
let url = a[1]; if (url.startsWith('/link?')) url = 'https://www.sogou.com' + url;
|
|
229
|
+
const snip = item.match(/<div[^>]+class="(?:str_info|fz-mid|space-txt)[^"]*"[^>]*>([\s\S]*?)<\/div>/i) || item.match(/<p[^>]*>([\s\S]{20,400}?)<\/p>/i);
|
|
230
|
+
out.push({ url, title: stripTags(a[2]), snippet: snip ? stripTags(snip[1]) : '' });
|
|
231
|
+
}
|
|
232
|
+
return out;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const SCRAPE_ENGINES = ['duckduckgo', 'bing', 'baidu', 'sogou'] as const;
|
|
236
|
+
type ScrapeEngine = typeof SCRAPE_ENGINES[number];
|
|
237
|
+
|
|
238
|
+
async function scrape(http: WebHttp, engine: ScrapeEngine, query: string, max: number): Promise<SearchResponse> {
|
|
239
|
+
const fn = engine === 'bing' ? scrapeBing : engine === 'baidu' ? scrapeBaidu : engine === 'sogou' ? scrapeSogou : scrapeDuckDuckGo;
|
|
240
|
+
return { provider: engine, results: clean(await fn(http, query, max), max) };
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/* ════════════════════════════════════════════════════════════
|
|
244
|
+
Orchestration
|
|
245
|
+
════════════════════════════════════════════════════════════ */
|
|
246
|
+
|
|
247
|
+
export type EnvMap = Record<string, string | undefined>;
|
|
248
|
+
|
|
249
|
+
interface Provider {
|
|
250
|
+
id: string;
|
|
251
|
+
/** Run the provider; throws on failure so the waterfall can move on. */
|
|
252
|
+
run(http: WebHttp, env: EnvMap, query: string, max: number): Promise<SearchResponse>;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/** Resolve the ordered provider list for a given env + optional pinned engine. */
|
|
256
|
+
export function resolveProviders(env: EnvMap, pinned?: string): Provider[] {
|
|
257
|
+
const p = (pinned || '').trim().toLowerCase();
|
|
258
|
+
|
|
259
|
+
const tavilyP: Provider | null = env.TAVILY_API_KEY
|
|
260
|
+
? { id: 'tavily', run: (h, e, q, m) => tavily(h, e.TAVILY_API_KEY!, q, m) } : null;
|
|
261
|
+
const braveKey = env.BRAVE_API_KEY || env.BRAVE_SEARCH_API_KEY;
|
|
262
|
+
const braveP: Provider | null = braveKey
|
|
263
|
+
? { id: 'brave', run: (h, _e, q, m) => brave(h, braveKey!, q, m) } : null;
|
|
264
|
+
const serperP: Provider | null = env.SERPER_API_KEY
|
|
265
|
+
? { id: 'serper', run: (h, e, q, m) => serper(h, e.SERPER_API_KEY!, q, m) } : null;
|
|
266
|
+
const searxngP: Provider | null = env.SEARXNG_URL
|
|
267
|
+
? { id: 'searxng', run: (h, e, q, m) => searxng(h, e.SEARXNG_URL!, q, m) } : null;
|
|
268
|
+
const jinaP: Provider = { id: 'jina', run: (h, e, q, m) => jina(h, e.JINA_API_KEY, q, m) };
|
|
269
|
+
const scrapeP = (eng: ScrapeEngine): Provider => ({ id: eng, run: (h, _e, q, m) => scrape(h, eng, q, m) });
|
|
270
|
+
|
|
271
|
+
// Explicit pin (tool arg or SKYLOOM_SEARCH_ENGINE) — use only that provider.
|
|
272
|
+
if (p) {
|
|
273
|
+
if (p === 'tavily') return tavilyP ? [tavilyP] : [];
|
|
274
|
+
if (p === 'brave') return braveP ? [braveP] : [];
|
|
275
|
+
if (p === 'serper') return serperP ? [serperP] : [];
|
|
276
|
+
if (p === 'searxng') return searxngP ? [searxngP] : [];
|
|
277
|
+
if (p === 'jina') return [jinaP];
|
|
278
|
+
if (p === 'ddg' || p === 'duckduckgo') return [scrapeP('duckduckgo')];
|
|
279
|
+
if ((SCRAPE_ENGINES as readonly string[]).includes(p)) return [scrapeP(p as ScrapeEngine)];
|
|
280
|
+
// Unknown pin → fall through to auto.
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Auto waterfall: keyed providers first (best), then keyless Jina, then scrape.
|
|
284
|
+
const order: Provider[] = [];
|
|
285
|
+
for (const cand of [tavilyP, braveP, serperP, searxngP]) if (cand) order.push(cand);
|
|
286
|
+
order.push(jinaP);
|
|
287
|
+
for (const eng of SCRAPE_ENGINES) order.push(scrapeP(eng));
|
|
288
|
+
return order;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
export interface WebSearchOptions {
|
|
292
|
+
max?: number;
|
|
293
|
+
engine?: string; // explicit pin from the tool arg
|
|
294
|
+
env?: EnvMap; // defaults to process.env
|
|
295
|
+
http?: WebHttp; // defaults to axios-backed client
|
|
296
|
+
onProviderError?: (provider: string, error: string) => void;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Run a web search through the provider waterfall. Returns the first provider
|
|
301
|
+
* that yields results, or a response with an empty result set + the list of
|
|
302
|
+
* providers that were tried.
|
|
303
|
+
*/
|
|
304
|
+
export async function webSearch(query: string, opts: WebSearchOptions = {}): Promise<SearchResponse & { tried: string[] }> {
|
|
305
|
+
const q = (query || '').trim();
|
|
306
|
+
if (!q) throw new Error('query is required');
|
|
307
|
+
const max = Math.max(1, Math.min(20, Math.floor(opts.max ?? 8)));
|
|
308
|
+
const env = opts.env ?? (process.env as EnvMap);
|
|
309
|
+
const http = opts.http ?? defaultHttp;
|
|
310
|
+
const pinned = (opts.engine || env.SKYLOOM_SEARCH_ENGINE || '').trim();
|
|
311
|
+
|
|
312
|
+
const providers = resolveProviders(env, pinned);
|
|
313
|
+
const tried: string[] = [];
|
|
314
|
+
for (const provider of providers) {
|
|
315
|
+
tried.push(provider.id);
|
|
316
|
+
try {
|
|
317
|
+
const res = await provider.run(http, env, q, max);
|
|
318
|
+
if (res.results.length > 0 || res.answer) return { ...res, tried };
|
|
319
|
+
} catch (e: any) {
|
|
320
|
+
opts.onProviderError?.(provider.id, String(e?.message || e));
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return { provider: 'none', results: [], tried };
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/** Format a SearchResponse as compact text for an LLM tool result. */
|
|
327
|
+
export function formatSearchResults(res: SearchResponse & { tried?: string[] }): string {
|
|
328
|
+
if (!res.results.length && !res.answer) {
|
|
329
|
+
const tried = res.tried?.length ? ` (tried: ${res.tried.join(', ')})` : '';
|
|
330
|
+
return `No search results found${tried}. Try a simpler query, or set a search API key (TAVILY_API_KEY / BRAVE_API_KEY / SERPER_API_KEY) for more reliable results.`;
|
|
331
|
+
}
|
|
332
|
+
const parts: string[] = [];
|
|
333
|
+
if (res.answer) parts.push(`Answer: ${res.answer}\n`);
|
|
334
|
+
parts.push(`Search results (${res.provider}, ${res.results.length}):`);
|
|
335
|
+
parts.push(res.results.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}${r.snippet ? `\n ${r.snippet}` : ''}`).join('\n'));
|
|
336
|
+
return parts.join('\n');
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/* ════════════════════════════════════════════════════════════
|
|
340
|
+
Page reader — clean, LLM-ready content from a URL
|
|
341
|
+
════════════════════════════════════════════════════════════ */
|
|
342
|
+
|
|
343
|
+
/**
|
|
344
|
+
* Fetch a URL as clean, readable text. Uses Jina's r.jina.ai reader (strips
|
|
345
|
+
* nav/ads, returns markdown) when reachable, falling back to a raw fetch. This
|
|
346
|
+
* is what makes "read the top news article" actually usable — raw HTML is
|
|
347
|
+
* mostly boilerplate.
|
|
348
|
+
*/
|
|
349
|
+
export async function readPage(url: string, opts: { env?: EnvMap; http?: WebHttp; maxChars?: number } = {}): Promise<string> {
|
|
350
|
+
const env = opts.env ?? (process.env as EnvMap);
|
|
351
|
+
const http = opts.http ?? defaultHttp;
|
|
352
|
+
const maxChars = opts.maxChars ?? 12000;
|
|
353
|
+
if (!/^https?:\/\//i.test(url)) throw new Error('url must be http(s)');
|
|
354
|
+
|
|
355
|
+
const headers: Record<string, string> = { Accept: 'text/plain' };
|
|
356
|
+
if (env.JINA_API_KEY) headers.Authorization = `Bearer ${env.JINA_API_KEY}`;
|
|
357
|
+
try {
|
|
358
|
+
const text = await http.getText(`https://r.jina.ai/${url}`, { headers, timeoutMs: 20000 });
|
|
359
|
+
if (text && text.trim()) return clip(text, maxChars);
|
|
360
|
+
} catch { /* fall through to raw fetch */ }
|
|
361
|
+
|
|
362
|
+
const raw = await http.getText(url, { timeoutMs: 15000 });
|
|
363
|
+
return clip(stripTags(raw), maxChars);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function clip(s: string, max: number): string {
|
|
367
|
+
return s.length > max ? s.slice(0, max) + `\n...[truncated, ${s.length - max} more chars]` : s;
|
|
368
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
hasWizard, nextWizardStep, buildCommandLine, filterChoices,
|
|
4
|
+
type WizardContext, type ArgChoice,
|
|
5
|
+
} from "../src/cli/command_args";
|
|
6
|
+
|
|
7
|
+
const CTX: WizardContext = {
|
|
8
|
+
providers: [
|
|
9
|
+
{ id: "openai", label: "OpenAI", configured: true, envVar: "OPENAI_API_KEY" },
|
|
10
|
+
{ id: "deepseek", label: "DeepSeek", configured: false, envVar: "DEEPSEEK_API_KEY" },
|
|
11
|
+
{ id: "ollama", label: "Ollama", configured: true },
|
|
12
|
+
],
|
|
13
|
+
models: [
|
|
14
|
+
{ id: "gpt-4o", provider: "openai", label: "gpt-4o", hint: "$2.5/$10" },
|
|
15
|
+
{ id: "deepseek-chat", provider: "deepseek", label: "deepseek-chat", hint: "$0.3/$1.1" },
|
|
16
|
+
],
|
|
17
|
+
sessions: [
|
|
18
|
+
{ id: "abcdef123456", label: "重构搜索模块" },
|
|
19
|
+
{ id: "ff00aa221133", label: "写测试" },
|
|
20
|
+
],
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
describe("hasWizard", () => {
|
|
24
|
+
it("recognizes wizard commands with or without slash", () => {
|
|
25
|
+
for (const c of ["model", "/model", "apikey", "/apikey", "connect", "resume"]) {
|
|
26
|
+
expect(hasWizard(c), c).toBe(true);
|
|
27
|
+
}
|
|
28
|
+
});
|
|
29
|
+
it("rejects non-wizard commands", () => {
|
|
30
|
+
for (const c of ["help", "/status", "fog", "/clear", "verify"]) {
|
|
31
|
+
expect(hasWizard(c), c).toBe(false);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
describe("nextWizardStep · /apikey (the key-config flow)", () => {
|
|
37
|
+
it("step 0 lists providers with configured badges", () => {
|
|
38
|
+
const step = nextWizardStep("/apikey", [], CTX)!;
|
|
39
|
+
expect(step.kind).toBe("choice");
|
|
40
|
+
expect(step.choices.map((c) => c.value)).toEqual(["openai", "deepseek", "ollama"]);
|
|
41
|
+
expect(step.choices[0].hint).toContain("已配置"); // openai configured
|
|
42
|
+
expect(step.choices[1].hint).toContain("DEEPSEEK_API_KEY"); // deepseek not configured
|
|
43
|
+
expect(step.allowFreeform).toBe(true);
|
|
44
|
+
});
|
|
45
|
+
it("step 1 prompts for the key, masked", () => {
|
|
46
|
+
const step = nextWizardStep("/apikey", ["deepseek"], CTX)!;
|
|
47
|
+
expect(step.kind).toBe("freeform");
|
|
48
|
+
expect(step.secret).toBe(true);
|
|
49
|
+
expect(step.title).toContain("deepseek");
|
|
50
|
+
});
|
|
51
|
+
it("is complete after provider + key", () => {
|
|
52
|
+
expect(nextWizardStep("/apikey", ["deepseek", "sk-xxx"], CTX)).toBeNull();
|
|
53
|
+
});
|
|
54
|
+
it("builds the correct command line", () => {
|
|
55
|
+
expect(buildCommandLine("/apikey", ["deepseek", "sk-xxx"])).toBe("/apikey set deepseek sk-xxx");
|
|
56
|
+
});
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
describe("nextWizardStep · /model", () => {
|
|
60
|
+
it("offers reset + every model, completes after one pick", () => {
|
|
61
|
+
const step = nextWizardStep("/model", [], CTX)!;
|
|
62
|
+
expect(step.choices[0].value).toBe("reset");
|
|
63
|
+
expect(step.choices.map((c) => c.value)).toContain("gpt-4o");
|
|
64
|
+
expect(step.choices.find((c) => c.value === "gpt-4o")!.group).toBe("openai");
|
|
65
|
+
expect(nextWizardStep("/model", ["gpt-4o"], CTX)).toBeNull();
|
|
66
|
+
});
|
|
67
|
+
it("builds /model <id> and /model reset", () => {
|
|
68
|
+
expect(buildCommandLine("/model", ["gpt-4o"])).toBe("/model gpt-4o");
|
|
69
|
+
expect(buildCommandLine("/model", ["reset"])).toBe("/model reset");
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
describe("nextWizardStep · /connect and /resume", () => {
|
|
74
|
+
it("connect picks a provider", () => {
|
|
75
|
+
const step = nextWizardStep("/connect", [], CTX)!;
|
|
76
|
+
expect(step.choices.map((c) => c.value)).toEqual(["openai", "deepseek", "ollama"]);
|
|
77
|
+
expect(nextWizardStep("/connect", ["openai"], CTX)).toBeNull();
|
|
78
|
+
expect(buildCommandLine("/connect", ["openai"])).toBe("/connect openai");
|
|
79
|
+
});
|
|
80
|
+
it("resume lists sessions by index, builds /resume <n>", () => {
|
|
81
|
+
const step = nextWizardStep("/resume", [], CTX)!;
|
|
82
|
+
expect(step.choices.map((c) => c.value)).toEqual(["1", "2"]);
|
|
83
|
+
expect(step.choices[0].label).toContain("重构搜索模块");
|
|
84
|
+
expect(buildCommandLine("/resume", ["1"])).toBe("/resume 1");
|
|
85
|
+
});
|
|
86
|
+
it("resume with no sessions still returns a (empty) step, not a crash", () => {
|
|
87
|
+
const step = nextWizardStep("/resume", [], { ...CTX, sessions: [] })!;
|
|
88
|
+
expect(step.choices).toHaveLength(0);
|
|
89
|
+
expect(step.allowFreeform).toBe(true);
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
describe("filterChoices", () => {
|
|
94
|
+
const choices: ArgChoice[] = [
|
|
95
|
+
{ value: "gpt-4o", label: "gpt-4o", group: "openai" },
|
|
96
|
+
{ value: "deepseek-chat", label: "deepseek-chat", group: "deepseek" },
|
|
97
|
+
{ value: "gpt-4o-mini", label: "gpt-4o-mini", group: "openai" },
|
|
98
|
+
];
|
|
99
|
+
it("returns all on empty query", () => {
|
|
100
|
+
expect(filterChoices(choices, "")).toHaveLength(3);
|
|
101
|
+
});
|
|
102
|
+
it("matches on value substring", () => {
|
|
103
|
+
expect(filterChoices(choices, "deepseek").map((c) => c.value)).toEqual(["deepseek-chat"]);
|
|
104
|
+
});
|
|
105
|
+
it("matches on group", () => {
|
|
106
|
+
expect(filterChoices(choices, "openai").map((c) => c.value).sort()).toEqual(["gpt-4o", "gpt-4o-mini"]);
|
|
107
|
+
});
|
|
108
|
+
it("ranks exact/prefix matches before substring matches", () => {
|
|
109
|
+
const ranked = filterChoices(choices, "gpt-4o");
|
|
110
|
+
expect(ranked[0].value).toBe("gpt-4o"); // exact first
|
|
111
|
+
});
|
|
112
|
+
it("returns empty when nothing matches", () => {
|
|
113
|
+
expect(filterChoices(choices, "zzz")).toHaveLength(0);
|
|
114
|
+
});
|
|
115
|
+
});
|
package/tests/loom.test.ts
CHANGED
|
@@ -247,6 +247,80 @@ describe("palette ↑↓ navigation + Enter execution", () => {
|
|
|
247
247
|
});
|
|
248
248
|
});
|
|
249
249
|
|
|
250
|
+
describe("argument wizard (cascading ↑↓ selection)", () => {
|
|
251
|
+
function key(ui: any, name: string, opts: Record<string, any> = {}) { ui.onKey(opts.str ?? "", { name, ...opts }); }
|
|
252
|
+
function type(ui: any, text: string) { for (const ch of text) ui.onKey(ch, { name: ch }); }
|
|
253
|
+
|
|
254
|
+
// A two-level /apikey wizard: pick a provider, then paste a key.
|
|
255
|
+
function wireApikeyWizard(ui: any) {
|
|
256
|
+
ui.wizardStep = (command: string, prior: string[]) => {
|
|
257
|
+
if (!/apikey/.test(command)) return null;
|
|
258
|
+
if (prior.length === 0) return {
|
|
259
|
+
kind: "choice", title: "选择 Provider", allowFreeform: true,
|
|
260
|
+
choices: [
|
|
261
|
+
{ value: "deepseek", label: "DeepSeek", hint: "未配置" },
|
|
262
|
+
{ value: "openai", label: "OpenAI", hint: "✓ 已配置" },
|
|
263
|
+
],
|
|
264
|
+
};
|
|
265
|
+
if (prior.length === 1) return { kind: "freeform", title: `粘贴 ${prior[0]} key`, choices: [], allowFreeform: true, secret: true };
|
|
266
|
+
return null;
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
it("selecting a wizard command opens the wizard and cascades to submit", async () => {
|
|
271
|
+
const ui = makeUI() as any;
|
|
272
|
+
wireApikeyWizard(ui);
|
|
273
|
+
const p = ui.readInput();
|
|
274
|
+
type(ui, "/apikey");
|
|
275
|
+
key(ui, "return"); // palette → opens the wizard (input cleared, not submitted)
|
|
276
|
+
expect(ui.inputGlyphs.length).toBe(0);
|
|
277
|
+
|
|
278
|
+
key(ui, "down"); // deepseek → openai
|
|
279
|
+
key(ui, "return"); // pick provider → advance to the key step
|
|
280
|
+
type(ui, "sk-secret");
|
|
281
|
+
key(ui, "return"); // submit
|
|
282
|
+
|
|
283
|
+
expect(await p).toBe("/apikey set openai sk-secret");
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
it("typing filters the choice list; Enter picks the filtered match", async () => {
|
|
287
|
+
const ui = makeUI() as any;
|
|
288
|
+
wireApikeyWizard(ui);
|
|
289
|
+
const p = ui.readInput();
|
|
290
|
+
type(ui, "/apikey");
|
|
291
|
+
key(ui, "return");
|
|
292
|
+
type(ui, "deep"); // filters to deepseek
|
|
293
|
+
key(ui, "return"); // pick it
|
|
294
|
+
type(ui, "k1");
|
|
295
|
+
key(ui, "return");
|
|
296
|
+
expect(await p).toBe("/apikey set deepseek k1");
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
it("backspace at an empty filter steps back a level; Esc cancels", async () => {
|
|
300
|
+
const ui = makeUI() as any;
|
|
301
|
+
wireApikeyWizard(ui);
|
|
302
|
+
ui.readInput();
|
|
303
|
+
type(ui, "/apikey");
|
|
304
|
+
key(ui, "return"); // wizard open at provider step
|
|
305
|
+
key(ui, "return"); // pick deepseek → key step
|
|
306
|
+
key(ui, "backspace"); // empty typed → back to provider step
|
|
307
|
+
// still in the wizard, not submitted; Esc closes it entirely
|
|
308
|
+
key(ui, "escape");
|
|
309
|
+
expect(ui.inputGlyphs.length).toBe(0);
|
|
310
|
+
// a frame still renders at full width after cancelling
|
|
311
|
+
for (const row of ui.paint()) expect(visualWidth(row)).toBe(80);
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
it("a non-wizard command is unaffected (submits directly)", async () => {
|
|
315
|
+
const ui = makeUI() as any;
|
|
316
|
+
wireApikeyWizard(ui); // only /apikey has a wizard
|
|
317
|
+
const p = ui.readInput();
|
|
318
|
+
type(ui, "/status");
|
|
319
|
+
key(ui, "return");
|
|
320
|
+
expect(await p).toBe("/status");
|
|
321
|
+
});
|
|
322
|
+
});
|
|
323
|
+
|
|
250
324
|
describe("mouse wheel scrolling", () => {
|
|
251
325
|
// Replay an SGR mouse sequence the way Node's keypress parser fragments it:
|
|
252
326
|
// ESC[< as one event, then every remaining char separately.
|
|
@@ -53,6 +53,21 @@ describe('selectRelevantTools', () => {
|
|
|
53
53
|
expect(selected).toContain('read_file');
|
|
54
54
|
});
|
|
55
55
|
|
|
56
|
+
it('surfaces web_search for current-events queries among many tools', () => {
|
|
57
|
+
// Regression: "今日热点新闻" used to score 0 for web_search, so with a large
|
|
58
|
+
// tool catalog it never made the shortlist and the LLM couldn't use it.
|
|
59
|
+
const r = makeRegistry([
|
|
60
|
+
['web_search', 'search the live web'],
|
|
61
|
+
['read_url', 'read a web page as text'],
|
|
62
|
+
...Array.from({ length: 30 }, (_, i): [string, string] => [`tool_${i}`, `unrelated capability ${i}`]),
|
|
63
|
+
]);
|
|
64
|
+
const names = r.listNames();
|
|
65
|
+
for (const q of ['今日热点新闻', '最新的事件', "today's latest news", '查一下现在的天气']) {
|
|
66
|
+
const selected = selectRelevantTools(r, names, q, { topK: 8 });
|
|
67
|
+
expect(selected, `query: ${q}`).toContain('web_search');
|
|
68
|
+
}
|
|
69
|
+
});
|
|
70
|
+
|
|
56
71
|
it('mustInclude always present', () => {
|
|
57
72
|
const r = makeRegistry(Array.from({ length: 20 }, (_, i) => [`random_${i}`, `unrelated tool ${i}`]));
|
|
58
73
|
r.register({
|