viruagent 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -4
- package/config/agent-prompt.md +2 -1
- package/config/prompt-config.json +6 -0
- package/config/system-prompt.md +8 -0
- package/package.json +3 -2
- package/src/agent.js +71 -9
- package/src/cli-post.js +25 -1
- package/src/lib/ai.js +275 -1
- package/src/lib/pattern-store.js +138 -0
- package/src/lib/structure-policy.js +104 -0
- package/src/lib/title-policy.js +58 -0
- package/src/lib/websearch.js +409 -0
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
const { createLogger } = require('./logger');
|
|
2
|
+
|
|
3
|
+
const webLog = createLogger('websearch');
|
|
4
|
+
|
|
5
|
+
const DEFAULT_TIMEOUT_MS = 8000;
|
|
6
|
+
const DEFAULT_MAX_RESULTS = 5;
|
|
7
|
+
const MAX_RESULTS_LIMIT = 8;
|
|
8
|
+
|
|
9
|
+
const OFFICIAL_DOMAINS = new Set([
|
|
10
|
+
'openai.com',
|
|
11
|
+
'chatgpt.com',
|
|
12
|
+
'help.openai.com',
|
|
13
|
+
'status.openai.com',
|
|
14
|
+
'platform.openai.com',
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
const TRUSTED_NEWS_DOMAINS = new Set([
|
|
18
|
+
'reuters.com',
|
|
19
|
+
'www.reuters.com',
|
|
20
|
+
'www.digitaltrends.com',
|
|
21
|
+
'www.techradar.com',
|
|
22
|
+
'www.zdnet.com',
|
|
23
|
+
'www.nytimes.com',
|
|
24
|
+
'www.bloomberg.com',
|
|
25
|
+
'www.theverge.com',
|
|
26
|
+
'techcrunch.com',
|
|
27
|
+
'www.wired.com',
|
|
28
|
+
'www.forbes.com',
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
const LOW_TRUST_DOMAINS = new Set([
|
|
32
|
+
'reddit.com',
|
|
33
|
+
'www.reddit.com',
|
|
34
|
+
'zhihu.com',
|
|
35
|
+
'www.zhihu.com',
|
|
36
|
+
'github.com',
|
|
37
|
+
'quora.com',
|
|
38
|
+
'www.quora.com',
|
|
39
|
+
'pinterest.com',
|
|
40
|
+
'm.pinterest.com',
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
const clamp = (value, min, max) => Math.max(min, Math.min(max, value));
|
|
44
|
+
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
45
|
+
|
|
46
|
+
const decodeHtmlEntities = (text = '') =>
|
|
47
|
+
text
|
|
48
|
+
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(Number(code)))
|
|
49
|
+
.replace(/&#x([0-9a-fA-F]+);/g, (_, code) => String.fromCharCode(parseInt(code, 16)))
|
|
50
|
+
.replace(/"/g, '"')
|
|
51
|
+
.replace(/'/g, "'")
|
|
52
|
+
.replace(/&/g, '&')
|
|
53
|
+
.replace(/</g, '<')
|
|
54
|
+
.replace(/>/g, '>')
|
|
55
|
+
.replace(/ /g, ' ');
|
|
56
|
+
|
|
57
|
+
const stripTags = (html = '') =>
|
|
58
|
+
decodeHtmlEntities(html.replace(/<[^>]*>/g, ' '))
|
|
59
|
+
.replace(/\s+/g, ' ')
|
|
60
|
+
.trim();
|
|
61
|
+
|
|
62
|
+
const getHostname = (url = '') => {
|
|
63
|
+
try {
|
|
64
|
+
return new URL(url).hostname.toLowerCase();
|
|
65
|
+
} catch {
|
|
66
|
+
return '';
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const getCanonicalUrlKey = (url = '') => {
|
|
71
|
+
try {
|
|
72
|
+
const parsed = new URL(url);
|
|
73
|
+
const host = parsed.hostname.toLowerCase();
|
|
74
|
+
const parts = parsed.pathname.split('/').filter(Boolean);
|
|
75
|
+
|
|
76
|
+
if ((host === 'chatgpt.com' || host === 'openai.com') && parts.length > 1) {
|
|
77
|
+
const locale = parts[0];
|
|
78
|
+
if (/^[a-z]{2}(?:-[a-z0-9]{2,})?$/i.test(locale)) {
|
|
79
|
+
parsed.pathname = `/${parts.slice(1).join('/')}`;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
parsed.hash = '';
|
|
84
|
+
parsed.search = '';
|
|
85
|
+
return `${host}${parsed.pathname.replace(/\/+$/, '') || '/'}`;
|
|
86
|
+
} catch {
|
|
87
|
+
return url;
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const unwrapDuckDuckGoUrl = (href = '') => {
|
|
92
|
+
if (!href) return '';
|
|
93
|
+
|
|
94
|
+
let normalized = decodeHtmlEntities(href.trim());
|
|
95
|
+
if (normalized.startsWith('//')) normalized = `https:${normalized}`;
|
|
96
|
+
if (normalized.startsWith('/')) normalized = `https://duckduckgo.com${normalized}`;
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const parsed = new URL(normalized);
|
|
100
|
+
if (parsed.hostname.includes('duckduckgo.com')) {
|
|
101
|
+
const uddg = parsed.searchParams.get('uddg');
|
|
102
|
+
if (uddg) return decodeURIComponent(uddg);
|
|
103
|
+
}
|
|
104
|
+
return parsed.toString();
|
|
105
|
+
} catch {
|
|
106
|
+
return normalized;
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
const getQueryTokens = (text = '') => {
|
|
111
|
+
const matches = String(text)
|
|
112
|
+
.toLowerCase()
|
|
113
|
+
.match(/[a-z0-9]{3,}|[가-힣]{2,}/g);
|
|
114
|
+
if (!matches) return [];
|
|
115
|
+
|
|
116
|
+
const stop = new Set(['with', 'from', 'that', 'this', '그리고', '대한', '관련', '차이', '비교']);
|
|
117
|
+
const uniq = [];
|
|
118
|
+
for (const token of matches) {
|
|
119
|
+
if (stop.has(token)) continue;
|
|
120
|
+
if (!uniq.includes(token)) uniq.push(token);
|
|
121
|
+
}
|
|
122
|
+
return uniq;
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const overlapScore = (topic, title, snippet) => {
|
|
126
|
+
const qTokens = getQueryTokens(topic);
|
|
127
|
+
if (!qTokens.length) return 0;
|
|
128
|
+
|
|
129
|
+
const text = `${title} ${snippet}`.toLowerCase();
|
|
130
|
+
let score = 0;
|
|
131
|
+
for (const token of qTokens) {
|
|
132
|
+
if (text.includes(token)) score += 2;
|
|
133
|
+
}
|
|
134
|
+
return score;
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
const scoreResult = (result, originalQuery) => {
|
|
138
|
+
const host = getHostname(result.url);
|
|
139
|
+
let score = overlapScore(originalQuery, result.title, result.snippet);
|
|
140
|
+
|
|
141
|
+
if (OFFICIAL_DOMAINS.has(host)) score += 30;
|
|
142
|
+
if (TRUSTED_NEWS_DOMAINS.has(host)) score += 10;
|
|
143
|
+
if (LOW_TRUST_DOMAINS.has(host)) score -= 18;
|
|
144
|
+
|
|
145
|
+
if (/chatgpt/i.test(originalQuery) && /chatgpt|openai/i.test(host)) score += 8;
|
|
146
|
+
if (/openai/i.test(originalQuery) && /openai/i.test(host)) score += 8;
|
|
147
|
+
|
|
148
|
+
return score;
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const isIntentMatch = (result, query) => {
|
|
152
|
+
const q = String(query).toLowerCase();
|
|
153
|
+
const text = `${result.title} ${result.snippet} ${result.url}`.toLowerCase();
|
|
154
|
+
|
|
155
|
+
if (/chatgpt/.test(q) && /plus/.test(q) && /pro/.test(q)) {
|
|
156
|
+
const hasChatgpt = /chatgpt/.test(text);
|
|
157
|
+
const hasPlus = /plus/.test(text);
|
|
158
|
+
const hasPro = /\bpro\b|\/pro\b/.test(text);
|
|
159
|
+
const hasPlanCue = /pricing|plan|plans|요금/.test(text);
|
|
160
|
+
return hasChatgpt && ((hasPlus && hasPro) || (hasPro && hasPlanCue));
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if ((/chatgpt|openai/.test(q)) && /(status|outage|issue|error|장애|이슈)/.test(q)) {
|
|
164
|
+
return /status\\.openai\\.com|status|incident|outage|error/.test(text);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return true;
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
const buildQueryVariants = (query) => {
|
|
171
|
+
const variants = [];
|
|
172
|
+
const add = (q) => {
|
|
173
|
+
const normalized = String(q || '').replace(/\s+/g, ' ').trim();
|
|
174
|
+
if (!normalized) return;
|
|
175
|
+
if (!variants.includes(normalized)) variants.push(normalized);
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
add(query);
|
|
179
|
+
|
|
180
|
+
const ascii = String(query).replace(/[^\x00-\x7F]+/g, ' ').replace(/\s+/g, ' ').trim();
|
|
181
|
+
if (ascii && ascii.toLowerCase() !== String(query).toLowerCase()) add(ascii);
|
|
182
|
+
|
|
183
|
+
const lower = String(query).toLowerCase();
|
|
184
|
+
|
|
185
|
+
if (/chatgpt/.test(lower) && /plus/.test(lower) && /pro/.test(lower)) {
|
|
186
|
+
add('ChatGPT plans pricing plus pro official');
|
|
187
|
+
add('chatgpt.com pricing plus pro');
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if ((/chatgpt|openai/.test(lower)) && /(issue|issues|outage|status|error|장애|이슈|오류|다운)/.test(lower)) {
|
|
191
|
+
add('OpenAI status ChatGPT incidents');
|
|
192
|
+
add('ChatGPT release notes OpenAI help');
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return variants.slice(0, 4);
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
const extractResultItems = (html, maxResults) => {
|
|
199
|
+
const items = [];
|
|
200
|
+
const seen = new Set();
|
|
201
|
+
|
|
202
|
+
const anchorRegex = /<a\b([^>]*)>([\s\S]*?)<\/a>/gi;
|
|
203
|
+
const matches = [...html.matchAll(anchorRegex)];
|
|
204
|
+
|
|
205
|
+
for (let i = 0; i < matches.length; i++) {
|
|
206
|
+
const match = matches[i];
|
|
207
|
+
const attrs = match[1] || '';
|
|
208
|
+
const titleHtml = match[2] || '';
|
|
209
|
+
|
|
210
|
+
if (!/class\s*=\s*["'][^"']*(result__a|result-link)[^"']*["']/i.test(attrs)) continue;
|
|
211
|
+
|
|
212
|
+
const hrefMatch = attrs.match(/href\s*=\s*["']([^"']+)["']/i);
|
|
213
|
+
if (!hrefMatch) continue;
|
|
214
|
+
|
|
215
|
+
const href = hrefMatch[1];
|
|
216
|
+
const title = stripTags(titleHtml);
|
|
217
|
+
const url = unwrapDuckDuckGoUrl(href);
|
|
218
|
+
|
|
219
|
+
if (!title || !url) continue;
|
|
220
|
+
|
|
221
|
+
const nextIndex = matches[i + 1] ? matches[i + 1].index : match.index + 2200;
|
|
222
|
+
const windowHtml = html.slice(match.index, nextIndex);
|
|
223
|
+
const snippetMatch = windowHtml.match(
|
|
224
|
+
/<(?:a|div|td)[^>]*class\s*=\s*["'][^"']*(result__snippet|result-snippet)[^"']*["'][^>]*>([\s\S]*?)<\/(?:a|div|td)>/i,
|
|
225
|
+
);
|
|
226
|
+
const snippet = stripTags(snippetMatch ? snippetMatch[2] : '');
|
|
227
|
+
|
|
228
|
+
if (seen.has(url)) continue;
|
|
229
|
+
seen.add(url);
|
|
230
|
+
|
|
231
|
+
items.push({
|
|
232
|
+
title,
|
|
233
|
+
url,
|
|
234
|
+
snippet: snippet.slice(0, 300),
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
if (items.length >= maxResults) break;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return items;
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
const extractBingRssItems = (xml, maxResults) => {
|
|
244
|
+
const items = [];
|
|
245
|
+
const seen = new Set();
|
|
246
|
+
const itemRegex = /<item>([\s\S]*?)<\/item>/gi;
|
|
247
|
+
const getTag = (block, tag) => {
|
|
248
|
+
const m = block.match(new RegExp(`<${tag}>([\\s\\S]*?)<\\/${tag}>`, 'i'));
|
|
249
|
+
return m ? stripTags(m[1]) : '';
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
const matches = [...xml.matchAll(itemRegex)];
|
|
253
|
+
for (const match of matches) {
|
|
254
|
+
const block = match[1];
|
|
255
|
+
const title = getTag(block, 'title');
|
|
256
|
+
const url = getTag(block, 'link');
|
|
257
|
+
const snippet = getTag(block, 'description').slice(0, 300);
|
|
258
|
+
if (!title || !url || seen.has(url)) continue;
|
|
259
|
+
seen.add(url);
|
|
260
|
+
items.push({ title, url, snippet });
|
|
261
|
+
if (items.length >= maxResults) break;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return items;
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
const searchSingleQuery = async (query, options = {}) => {
|
|
268
|
+
const maxResults = clamp(Number(options.maxResults) || DEFAULT_MAX_RESULTS, 1, MAX_RESULTS_LIMIT);
|
|
269
|
+
const timeoutMs = Number(options.timeoutMs) || DEFAULT_TIMEOUT_MS;
|
|
270
|
+
const encoded = encodeURIComponent(query);
|
|
271
|
+
const endpoints = [
|
|
272
|
+
`https://lite.duckduckgo.com/lite/?q=${encoded}&kl=us-en`,
|
|
273
|
+
`https://html.duckduckgo.com/html/?q=${encoded}&kl=us-en`,
|
|
274
|
+
];
|
|
275
|
+
|
|
276
|
+
let lastError = null;
|
|
277
|
+
let finalResults = [];
|
|
278
|
+
|
|
279
|
+
for (let i = 0; i < endpoints.length; i++) {
|
|
280
|
+
const endpoint = endpoints[i];
|
|
281
|
+
const controller = new AbortController();
|
|
282
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
283
|
+
|
|
284
|
+
try {
|
|
285
|
+
const res = await fetch(endpoint, {
|
|
286
|
+
method: 'GET',
|
|
287
|
+
headers: {
|
|
288
|
+
'User-Agent':
|
|
289
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
|
290
|
+
Accept: 'text/html,application/xhtml+xml',
|
|
291
|
+
},
|
|
292
|
+
signal: controller.signal,
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
if (!res.ok) throw new Error(`DuckDuckGo 응답 오류: ${res.status}`);
|
|
296
|
+
|
|
297
|
+
const html = await res.text();
|
|
298
|
+
finalResults = extractResultItems(html, maxResults);
|
|
299
|
+
if (finalResults.length > 0) break;
|
|
300
|
+
|
|
301
|
+
if (i < endpoints.length - 1) await sleep(200);
|
|
302
|
+
} catch (e) {
|
|
303
|
+
lastError = e.name === 'AbortError' ? new Error(`웹검색 타임아웃 (${timeoutMs}ms)`) : e;
|
|
304
|
+
webLog.warn('웹검색 시도 실패', { query, endpoint, error: lastError.message });
|
|
305
|
+
} finally {
|
|
306
|
+
clearTimeout(timer);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (finalResults.length > 0) return finalResults;
|
|
311
|
+
|
|
312
|
+
const fallbackUrl = `https://www.bing.com/search?format=rss&setlang=en-US&cc=US&mkt=en-US&q=${encoded}`;
|
|
313
|
+
const controller = new AbortController();
|
|
314
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
315
|
+
|
|
316
|
+
try {
|
|
317
|
+
const res = await fetch(fallbackUrl, {
|
|
318
|
+
method: 'GET',
|
|
319
|
+
headers: {
|
|
320
|
+
'User-Agent': 'Mozilla/5.0',
|
|
321
|
+
Accept: 'application/rss+xml,application/xml,text/xml',
|
|
322
|
+
},
|
|
323
|
+
signal: controller.signal,
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
if (!res.ok) throw new Error(`Bing RSS 응답 오류: ${res.status}`);
|
|
327
|
+
|
|
328
|
+
const xml = await res.text();
|
|
329
|
+
finalResults = extractBingRssItems(xml, maxResults);
|
|
330
|
+
return finalResults;
|
|
331
|
+
} catch (e) {
|
|
332
|
+
const msg = e.name === 'AbortError' ? `Bing RSS 타임아웃 (${timeoutMs}ms)` : e.message;
|
|
333
|
+
if (lastError) throw lastError;
|
|
334
|
+
throw new Error(msg);
|
|
335
|
+
} finally {
|
|
336
|
+
clearTimeout(timer);
|
|
337
|
+
}
|
|
338
|
+
};
|
|
339
|
+
|
|
340
|
+
const searchWeb = async (query, options = {}) => {
|
|
341
|
+
if (!query || !String(query).trim()) {
|
|
342
|
+
throw new Error('검색어가 비어 있습니다.');
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
const maxResults = clamp(Number(options.maxResults) || DEFAULT_MAX_RESULTS, 1, MAX_RESULTS_LIMIT);
|
|
346
|
+
const timeoutMs = Number(options.timeoutMs) || DEFAULT_TIMEOUT_MS;
|
|
347
|
+
const variants = buildQueryVariants(query);
|
|
348
|
+
|
|
349
|
+
webLog.info('웹검색 시작', { query, maxResults, variants });
|
|
350
|
+
|
|
351
|
+
const aggregate = [];
|
|
352
|
+
let lastError = null;
|
|
353
|
+
|
|
354
|
+
for (const variant of variants) {
|
|
355
|
+
try {
|
|
356
|
+
const results = await searchSingleQuery(variant, {
|
|
357
|
+
maxResults: Math.max(maxResults, 6),
|
|
358
|
+
timeoutMs,
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
results.forEach((item) => {
|
|
362
|
+
aggregate.push({ ...item, sourceQuery: variant });
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
await sleep(120);
|
|
366
|
+
} catch (e) {
|
|
367
|
+
lastError = e;
|
|
368
|
+
webLog.warn('쿼리 변형 검색 실패', { query: variant, error: e.message });
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (!aggregate.length && lastError) {
|
|
373
|
+
webLog.warn('웹검색 실패', { query, error: lastError.message });
|
|
374
|
+
throw lastError;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const deduped = [];
|
|
378
|
+
const seen = new Set();
|
|
379
|
+
for (const item of aggregate) {
|
|
380
|
+
const dedupeKey = getCanonicalUrlKey(item.url);
|
|
381
|
+
if (seen.has(dedupeKey)) continue;
|
|
382
|
+
seen.add(dedupeKey);
|
|
383
|
+
deduped.push(item);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const scored = deduped
|
|
387
|
+
.map((item) => ({ ...item, score: scoreResult(item, query) }))
|
|
388
|
+
.sort((a, b) => b.score - a.score);
|
|
389
|
+
|
|
390
|
+
const intentFiltered = scored.filter((item) => isIntentMatch(item, query));
|
|
391
|
+
const qualityFiltered = (intentFiltered.length ? intentFiltered : scored).filter((item) => item.score > -4);
|
|
392
|
+
const selected = (qualityFiltered.length ? qualityFiltered : intentFiltered.length ? intentFiltered : scored)
|
|
393
|
+
.slice(0, maxResults)
|
|
394
|
+
.map(({ title, url, snippet }) => ({ title, url, snippet }));
|
|
395
|
+
|
|
396
|
+
webLog.info('웹검색 완료', {
|
|
397
|
+
query,
|
|
398
|
+
count: selected.length,
|
|
399
|
+
domains: selected.slice(0, 4).map((r) => getHostname(r.url) || 'unknown'),
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
return {
|
|
403
|
+
query: String(query).trim(),
|
|
404
|
+
fetchedAt: new Date().toISOString(),
|
|
405
|
+
results: selected,
|
|
406
|
+
};
|
|
407
|
+
};
|
|
408
|
+
|
|
409
|
+
module.exports = { searchWeb };
|