nothumanallowed 9.7.2 → 9.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/ask.mjs +206 -18
- package/src/commands/chat.mjs +482 -64
- package/src/commands/ui.mjs +843 -89
- package/src/constants.mjs +1 -1
- package/src/services/browser-engine.mjs +1240 -0
- package/src/services/conversations.mjs +277 -0
- package/src/services/tool-executor.mjs +384 -59
- package/src/services/web-tools.mjs +430 -0
- package/src/services/web-ui.mjs +422 -175
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Web search + URL fetch tools for NHA CLI.
|
|
3
|
+
*
|
|
4
|
+
* - web_search: DuckDuckGo HTML scraping (zero API key, zero dependencies)
|
|
5
|
+
* - fetch_url: SSRF-protected HTML→text extraction
|
|
6
|
+
*
|
|
7
|
+
* Enterprise-grade security:
|
|
8
|
+
* - SSRF protection (private IP blocking, protocol validation, DNS pre-resolution)
|
|
9
|
+
* - Content-type allowlist (text/* only)
|
|
10
|
+
* - Size limits (100KB download, 8KB output)
|
|
11
|
+
* - Timeout protection (10s)
|
|
12
|
+
* - No binary/PDF/script content
|
|
13
|
+
*
|
|
14
|
+
* Zero npm dependencies — pure Node.js 22.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { URL } from 'url';
|
|
18
|
+
import dns from 'dns/promises';
|
|
19
|
+
import net from 'net';
|
|
20
|
+
|
|
21
|
+
// ── Constants ────────────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
const MAX_DOWNLOAD_BYTES = 100 * 1024; // 100KB
|
|
24
|
+
const MAX_OUTPUT_CHARS = 8000; // ~2K tokens
|
|
25
|
+
const FETCH_TIMEOUT_MS = 10000; // 10s
|
|
26
|
+
const MAX_REDIRECTS = 5;
|
|
27
|
+
const MAX_RESULTS = 8;
|
|
28
|
+
|
|
29
|
+
const USER_AGENT = 'NHA-CLI/9.0 (NotHumanAllowed; +https://nothumanallowed.com)';
|
|
30
|
+
|
|
31
|
+
// ── SSRF Protection ──────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Private/internal IP ranges that MUST be blocked to prevent SSRF.
|
|
35
|
+
*/
|
|
36
|
+
const PRIVATE_RANGES = [
|
|
37
|
+
// IPv4
|
|
38
|
+
{ start: '10.0.0.0', end: '10.255.255.255' },
|
|
39
|
+
{ start: '172.16.0.0', end: '172.31.255.255' },
|
|
40
|
+
{ start: '192.168.0.0', end: '192.168.255.255' },
|
|
41
|
+
{ start: '127.0.0.0', end: '127.255.255.255' },
|
|
42
|
+
{ start: '169.254.0.0', end: '169.254.255.255' },
|
|
43
|
+
{ start: '0.0.0.0', end: '0.255.255.255' },
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
function ipToLong(ip) {
|
|
47
|
+
const parts = ip.split('.').map(Number);
|
|
48
|
+
return ((parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]) >>> 0;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isPrivateIp(ip) {
|
|
52
|
+
if (!net.isIPv4(ip)) return false; // IPv6 — block by default for safety
|
|
53
|
+
const long = ipToLong(ip);
|
|
54
|
+
for (const range of PRIVATE_RANGES) {
|
|
55
|
+
if (long >= ipToLong(range.start) && long <= ipToLong(range.end)) return true;
|
|
56
|
+
}
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Validate URL for SSRF safety.
|
|
62
|
+
* Returns { safe: true, hostname } or { safe: false, reason }.
|
|
63
|
+
*/
|
|
64
|
+
async function validateUrl(urlStr) {
|
|
65
|
+
let parsed;
|
|
66
|
+
try {
|
|
67
|
+
parsed = new URL(urlStr);
|
|
68
|
+
} catch {
|
|
69
|
+
return { safe: false, reason: 'Invalid URL' };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Protocol check
|
|
73
|
+
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
|
|
74
|
+
return { safe: false, reason: `Blocked protocol: ${parsed.protocol}` };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Localhost detection (various encodings)
|
|
78
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
79
|
+
if (
|
|
80
|
+
hostname === 'localhost' ||
|
|
81
|
+
hostname === '0.0.0.0' ||
|
|
82
|
+
hostname === '[::1]' ||
|
|
83
|
+
hostname === '::1' ||
|
|
84
|
+
/^0x[0-9a-f]+$/i.test(hostname) || // hex-encoded
|
|
85
|
+
/^\d+$/.test(hostname) // decimal-encoded
|
|
86
|
+
) {
|
|
87
|
+
return { safe: false, reason: 'Blocked: localhost' };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// DNS pre-resolution to catch internal hostnames
|
|
91
|
+
try {
|
|
92
|
+
const addresses = await dns.resolve4(hostname);
|
|
93
|
+
for (const addr of addresses) {
|
|
94
|
+
if (isPrivateIp(addr)) {
|
|
95
|
+
return { safe: false, reason: `Blocked: ${hostname} resolves to private IP ${addr}` };
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
} catch {
|
|
99
|
+
// DNS resolution failed — hostname might not exist
|
|
100
|
+
return { safe: false, reason: `DNS resolution failed for ${hostname}` };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return { safe: true, hostname };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ── HTML → Text Extraction ───────────────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Extract readable text from HTML.
|
|
110
|
+
* Strips scripts, styles, nav, header, footer. Decodes entities.
|
|
111
|
+
*/
|
|
112
|
+
function htmlToText(html) {
|
|
113
|
+
let text = html;
|
|
114
|
+
|
|
115
|
+
// Remove script, style, nav, header, footer, svg, noscript
|
|
116
|
+
text = text.replace(/<(script|style|svg|noscript|nav|header|footer|aside|iframe)[^>]*>[\s\S]*?<\/\1>/gi, ' ');
|
|
117
|
+
|
|
118
|
+
// Remove all HTML tags
|
|
119
|
+
text = text.replace(/<[^>]+>/g, ' ');
|
|
120
|
+
|
|
121
|
+
// Decode HTML entities
|
|
122
|
+
text = text
|
|
123
|
+
.replace(/&#x([0-9A-Fa-f]+);/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)))
|
|
124
|
+
.replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(parseInt(dec, 10)))
|
|
125
|
+
.replace(/&/g, '&')
|
|
126
|
+
.replace(/</g, '<')
|
|
127
|
+
.replace(/>/g, '>')
|
|
128
|
+
.replace(/"/g, '"')
|
|
129
|
+
.replace(/'/g, "'")
|
|
130
|
+
.replace(/ /g, ' ')
|
|
131
|
+
.replace(/—/g, '—')
|
|
132
|
+
.replace(/–/g, '–')
|
|
133
|
+
.replace(/…/g, '...')
|
|
134
|
+
.replace(/’/g, "'")
|
|
135
|
+
.replace(/‘/g, "'")
|
|
136
|
+
.replace(/”/g, '"')
|
|
137
|
+
.replace(/“/g, '"');
|
|
138
|
+
|
|
139
|
+
// Collapse whitespace
|
|
140
|
+
text = text.replace(/\s+/g, ' ').trim();
|
|
141
|
+
|
|
142
|
+
return text;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Extract <title> from HTML.
|
|
147
|
+
*/
|
|
148
|
+
function extractTitle(html) {
|
|
149
|
+
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
150
|
+
if (!match) return '';
|
|
151
|
+
return htmlToText(match[1]).slice(0, 200);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ── Fetch with protection ────────────────────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Fetch a URL with SSRF protection, size limits, and timeout.
|
|
158
|
+
* Returns { status, contentType, body, title, excerpt, truncated }.
|
|
159
|
+
*/
|
|
160
|
+
export async function fetchUrl(urlStr) {
|
|
161
|
+
// Validate URL
|
|
162
|
+
const validation = await validateUrl(urlStr);
|
|
163
|
+
if (!validation.safe) {
|
|
164
|
+
return { error: true, message: validation.reason };
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const controller = new AbortController();
|
|
168
|
+
const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
|
169
|
+
|
|
170
|
+
try {
|
|
171
|
+
const res = await fetch(urlStr, {
|
|
172
|
+
headers: {
|
|
173
|
+
'User-Agent': USER_AGENT,
|
|
174
|
+
'Accept': 'text/html, text/plain, application/json, text/xml',
|
|
175
|
+
},
|
|
176
|
+
signal: controller.signal,
|
|
177
|
+
redirect: 'follow',
|
|
178
|
+
// Node.js fetch follows redirects by default (max 20)
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
clearTimeout(timeout);
|
|
182
|
+
|
|
183
|
+
const contentType = (res.headers.get('content-type') || '').toLowerCase();
|
|
184
|
+
|
|
185
|
+
// Content-type allowlist
|
|
186
|
+
if (!contentType.startsWith('text/') && !contentType.includes('json') && !contentType.includes('xml')) {
|
|
187
|
+
return {
|
|
188
|
+
error: true,
|
|
189
|
+
message: `Blocked content-type: ${contentType}. Only text, JSON, and XML are allowed.`,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Read body with size limit
|
|
194
|
+
const reader = res.body.getReader();
|
|
195
|
+
const chunks = [];
|
|
196
|
+
let totalBytes = 0;
|
|
197
|
+
let truncated = false;
|
|
198
|
+
|
|
199
|
+
while (true) {
|
|
200
|
+
const { done, value } = await reader.read();
|
|
201
|
+
if (done) break;
|
|
202
|
+
totalBytes += value.length;
|
|
203
|
+
if (totalBytes > MAX_DOWNLOAD_BYTES) {
|
|
204
|
+
truncated = true;
|
|
205
|
+
// Take only the part that fits
|
|
206
|
+
const overshoot = totalBytes - MAX_DOWNLOAD_BYTES;
|
|
207
|
+
chunks.push(value.slice(0, value.length - overshoot));
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
chunks.push(value);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const decoder = new TextDecoder('utf-8', { fatal: false });
|
|
214
|
+
const rawBody = decoder.decode(Buffer.concat(chunks));
|
|
215
|
+
|
|
216
|
+
// Extract useful content
|
|
217
|
+
let body;
|
|
218
|
+
let title = '';
|
|
219
|
+
let excerpt = '';
|
|
220
|
+
|
|
221
|
+
if (contentType.includes('html')) {
|
|
222
|
+
title = extractTitle(rawBody);
|
|
223
|
+
body = htmlToText(rawBody);
|
|
224
|
+
} else if (contentType.includes('json')) {
|
|
225
|
+
try {
|
|
226
|
+
const parsed = JSON.parse(rawBody);
|
|
227
|
+
body = JSON.stringify(parsed, null, 2);
|
|
228
|
+
} catch {
|
|
229
|
+
body = rawBody;
|
|
230
|
+
}
|
|
231
|
+
} else {
|
|
232
|
+
body = rawBody;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Enforce output limit
|
|
236
|
+
if (body.length > MAX_OUTPUT_CHARS) {
|
|
237
|
+
body = body.slice(0, MAX_OUTPUT_CHARS) + '\n\n[... content truncated at 8000 chars]';
|
|
238
|
+
truncated = true;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
excerpt = body.slice(0, 200).replace(/\s+/g, ' ').trim();
|
|
242
|
+
|
|
243
|
+
// DNS rebinding check on final URL (after redirects)
|
|
244
|
+
if (res.url !== urlStr) {
|
|
245
|
+
const finalValidation = await validateUrl(res.url);
|
|
246
|
+
if (!finalValidation.safe) {
|
|
247
|
+
return { error: true, message: `Redirect blocked: ${finalValidation.reason}` };
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return {
|
|
252
|
+
error: false,
|
|
253
|
+
status: res.status,
|
|
254
|
+
contentType,
|
|
255
|
+
body,
|
|
256
|
+
title,
|
|
257
|
+
excerpt,
|
|
258
|
+
truncated,
|
|
259
|
+
url: res.url,
|
|
260
|
+
};
|
|
261
|
+
} catch (err) {
|
|
262
|
+
clearTimeout(timeout);
|
|
263
|
+
if (err.name === 'AbortError') {
|
|
264
|
+
return { error: true, message: 'Request timed out (10s limit)' };
|
|
265
|
+
}
|
|
266
|
+
return { error: true, message: `Fetch failed: ${err.message}` };
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// ── Web Search (DuckDuckGo HTML) ─────────────────────────────────────────────
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Search the web using DuckDuckGo HTML (no API key needed).
|
|
274
|
+
* Parses the HTML results page to extract links, titles, and snippets.
|
|
275
|
+
*
|
|
276
|
+
* @param {string} query - Search query
|
|
277
|
+
* @param {number} maxResults - Max results to return (default 8)
|
|
278
|
+
* @returns {Promise<{ results: Array<{ title, url, snippet }>, query }>}
|
|
279
|
+
*/
|
|
280
|
+
export async function webSearch(query, maxResults = MAX_RESULTS) {
|
|
281
|
+
if (!query || query.trim().length < 2) {
|
|
282
|
+
return { error: true, message: 'Query too short' };
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const encodedQuery = encodeURIComponent(query.trim());
|
|
286
|
+
const searchUrl = `https://html.duckduckgo.com/html/?q=${encodedQuery}`;
|
|
287
|
+
|
|
288
|
+
const controller = new AbortController();
|
|
289
|
+
const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
|
290
|
+
|
|
291
|
+
try {
|
|
292
|
+
const res = await fetch(searchUrl, {
|
|
293
|
+
headers: {
|
|
294
|
+
'User-Agent': USER_AGENT,
|
|
295
|
+
'Accept': 'text/html',
|
|
296
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
297
|
+
},
|
|
298
|
+
signal: controller.signal,
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
clearTimeout(timeout);
|
|
302
|
+
|
|
303
|
+
if (!res.ok) {
|
|
304
|
+
return { error: true, message: `DuckDuckGo returned ${res.status}` };
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const html = await res.text();
|
|
308
|
+
const results = parseDuckDuckGoResults(html, maxResults);
|
|
309
|
+
|
|
310
|
+
return {
|
|
311
|
+
error: false,
|
|
312
|
+
query: query.trim(),
|
|
313
|
+
resultCount: results.length,
|
|
314
|
+
results,
|
|
315
|
+
};
|
|
316
|
+
} catch (err) {
|
|
317
|
+
clearTimeout(timeout);
|
|
318
|
+
if (err.name === 'AbortError') {
|
|
319
|
+
return { error: true, message: 'Search timed out (10s limit)' };
|
|
320
|
+
}
|
|
321
|
+
return { error: true, message: `Search failed: ${err.message}` };
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Parse DuckDuckGo HTML results page.
|
|
327
|
+
* Extracts title, URL, and snippet from result items.
|
|
328
|
+
*/
|
|
329
|
+
function parseDuckDuckGoResults(html, maxResults) {
|
|
330
|
+
const results = [];
|
|
331
|
+
|
|
332
|
+
// DuckDuckGo HTML wraps results in <div class="result..."> with
|
|
333
|
+
// <a class="result__a" href="...">title</a> and
|
|
334
|
+
// <a class="result__snippet">snippet</a>
|
|
335
|
+
const resultBlocks = html.split(/class="result\s/);
|
|
336
|
+
|
|
337
|
+
for (let i = 1; i < resultBlocks.length && results.length < maxResults; i++) {
|
|
338
|
+
const block = resultBlocks[i];
|
|
339
|
+
|
|
340
|
+
// Extract URL — DuckDuckGo uses redirect URLs, extract the actual destination
|
|
341
|
+
let url = '';
|
|
342
|
+
const urlMatch = block.match(/class="result__a"\s+href="([^"]+)"/);
|
|
343
|
+
if (urlMatch) {
|
|
344
|
+
url = urlMatch[1];
|
|
345
|
+
// DuckDuckGo wraps URLs: //duckduckgo.com/l/?uddg=ENCODED_URL&...
|
|
346
|
+
if (url.includes('uddg=')) {
|
|
347
|
+
const uddgMatch = url.match(/uddg=([^&]+)/);
|
|
348
|
+
if (uddgMatch) {
|
|
349
|
+
try {
|
|
350
|
+
url = decodeURIComponent(uddgMatch[1]);
|
|
351
|
+
} catch {
|
|
352
|
+
url = uddgMatch[1];
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
// Handle protocol-relative URLs
|
|
357
|
+
if (url.startsWith('//')) url = 'https:' + url;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Extract title
|
|
361
|
+
let title = '';
|
|
362
|
+
const titleMatch = block.match(/class="result__a"[^>]*>([\s\S]*?)<\/a>/);
|
|
363
|
+
if (titleMatch) {
|
|
364
|
+
title = htmlToText(titleMatch[1]).trim();
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Extract snippet
|
|
368
|
+
let snippet = '';
|
|
369
|
+
const snippetMatch = block.match(/class="result__snippet"[^>]*>([\s\S]*?)<\/a>/);
|
|
370
|
+
if (!snippetMatch) {
|
|
371
|
+
const altSnippet = block.match(/class="result__snippet"[^>]*>([\s\S]*?)<\//);
|
|
372
|
+
if (altSnippet) snippet = htmlToText(altSnippet[1]).trim();
|
|
373
|
+
} else {
|
|
374
|
+
snippet = htmlToText(snippetMatch[1]).trim();
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (url && title) {
|
|
378
|
+
results.push({ title, url, snippet: snippet.slice(0, 300) });
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
return results;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/**
|
|
386
|
+
* Deep search: search + fetch top N results for full content.
|
|
387
|
+
*
|
|
388
|
+
* @param {string} query
|
|
389
|
+
* @param {number} fetchCount - How many top results to fetch (default 3)
|
|
390
|
+
* @returns {Promise<{ results, deepResults }>}
|
|
391
|
+
*/
|
|
392
|
+
export async function webSearchDeep(query, fetchCount = 3) {
|
|
393
|
+
const searchResult = await webSearch(query);
|
|
394
|
+
if (searchResult.error) return searchResult;
|
|
395
|
+
|
|
396
|
+
const deepResults = [];
|
|
397
|
+
const urlsToFetch = searchResult.results.slice(0, fetchCount);
|
|
398
|
+
|
|
399
|
+
const fetches = urlsToFetch.map(async (result) => {
|
|
400
|
+
try {
|
|
401
|
+
const content = await fetchUrl(result.url);
|
|
402
|
+
if (!content.error) {
|
|
403
|
+
return {
|
|
404
|
+
title: content.title || result.title,
|
|
405
|
+
url: result.url,
|
|
406
|
+
snippet: result.snippet,
|
|
407
|
+
content: content.body,
|
|
408
|
+
excerpt: content.excerpt,
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
} catch {}
|
|
412
|
+
return null;
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
const fetchedResults = await Promise.allSettled(fetches);
|
|
416
|
+
for (const result of fetchedResults) {
|
|
417
|
+
if (result.status === 'fulfilled' && result.value) {
|
|
418
|
+
deepResults.push(result.value);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
return {
|
|
423
|
+
error: false,
|
|
424
|
+
query: query.trim(),
|
|
425
|
+
resultCount: searchResult.results.length,
|
|
426
|
+
results: searchResult.results,
|
|
427
|
+
deepFetched: deepResults.length,
|
|
428
|
+
deepResults,
|
|
429
|
+
};
|
|
430
|
+
}
|