morpheus-cli 0.8.9 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/channels/discord.js +133 -6
  2. package/dist/channels/telegram.js +23 -17
  3. package/dist/http/api.js +2 -3
  4. package/dist/runtime/__tests__/keymaker.test.js +5 -2
  5. package/dist/runtime/apoc.js +7 -7
  6. package/dist/{devkit/registry.js → runtime/devkit-instrument.js} +5 -29
  7. package/dist/runtime/keymaker.js +6 -5
  8. package/dist/runtime/memory/sati/service.js +1 -1
  9. package/dist/runtime/memory/sqlite.js +31 -124
  10. package/dist/runtime/neo.js +1 -1
  11. package/dist/runtime/oracle.js +30 -52
  12. package/dist/runtime/smiths/delegator.js +2 -2
  13. package/dist/runtime/trinity.js +1 -1
  14. package/dist/ui/assets/{AuditDashboard-5sA8Sd8S.js → AuditDashboard-nVV9KKFp.js} +1 -1
  15. package/dist/ui/assets/Chat-ChsmnZzq.js +41 -0
  16. package/dist/ui/assets/{Chronos-BAjeLobF.js → Chronos-kgO7IkEj.js} +1 -1
  17. package/dist/ui/assets/{ConfirmationModal-fvgnOWTY.js → ConfirmationModal-D1BYPXJ4.js} +1 -1
  18. package/dist/ui/assets/{Dashboard-Ca5mSefz.js → Dashboard-DWB5NwQn.js} +1 -1
  19. package/dist/ui/assets/{DeleteConfirmationModal-A8EmnHoa.js → DeleteConfirmationModal-CgIMbyB7.js} +1 -1
  20. package/dist/ui/assets/{Logs-CYu7se7R.js → Logs-DGdRnEFi.js} +1 -1
  21. package/dist/ui/assets/{MCPManager-DsDA_ZVT.js → MCPManager-BDjWMRRX.js} +1 -1
  22. package/dist/ui/assets/{ModelPricing-DnSm_Nh-.js → ModelPricing-DAk1sS7D.js} +1 -1
  23. package/dist/ui/assets/{Notifications-CiljQzvM.js → Notifications-DMEq6EZR.js} +1 -1
  24. package/dist/ui/assets/{SatiMemories-rnO2b0LG.js → SatiMemories-BxicQE35.js} +1 -1
  25. package/dist/ui/assets/{SessionAudit-Dfvhge3Z.js → SessionAudit-CKJQf9LU.js} +1 -1
  26. package/dist/ui/assets/{Settings-OQlHAJoy.js → Settings-CulMd4Qr.js} +1 -1
  27. package/dist/ui/assets/{Skills-Crsybug0.js → Skills-DPoqYa8Y.js} +1 -1
  28. package/dist/ui/assets/{Smiths-wm90jRDT.js → Smiths-Clamjlph.js} +1 -1
  29. package/dist/ui/assets/{Tasks-C5FMu_Yu.js → Tasks-BfTkhB1J.js} +1 -1
  30. package/dist/ui/assets/{TrinityDatabases-BzYfecKI.js → TrinityDatabases-BmM1S9aQ.js} +1 -1
  31. package/dist/ui/assets/{UsageStats-CBo2vW2n.js → UsageStats-aAu2DFlb.js} +1 -1
  32. package/dist/ui/assets/{WebhookManager-0tDFkfHd.js → WebhookManager-DdnRSWl9.js} +1 -1
  33. package/dist/ui/assets/{audit-B-F8XPLi.js → audit-CqszEkOd.js} +1 -1
  34. package/dist/ui/assets/{chronos-BvMxfBQH.js → chronos-CPwFWid9.js} +1 -1
  35. package/dist/ui/assets/{config-DteVgNGR.js → config-D0DePxKu.js} +1 -1
  36. package/dist/ui/assets/{index-Cwqr-n0Y.js → index-BxVeRyTh.js} +2 -2
  37. package/dist/ui/assets/index-OLhpm8I7.css +1 -0
  38. package/dist/ui/assets/{mcp-DxzodOdH.js → mcp-Gjc3IZpO.js} +1 -1
  39. package/dist/ui/assets/{skills--hAyQnmG.js → skills-B5DnmnHW.js} +1 -1
  40. package/dist/ui/assets/{stats-Cibaisqd.js → stats-BAse7jj0.js} +1 -1
  41. package/dist/ui/index.html +2 -2
  42. package/dist/ui/sw.js +1 -1
  43. package/package.json +6 -4
  44. package/dist/devkit/adapters/shell.js +0 -80
  45. package/dist/devkit/index.js +0 -11
  46. package/dist/devkit/tools/browser.js +0 -825
  47. package/dist/devkit/tools/filesystem.js +0 -235
  48. package/dist/devkit/tools/git.js +0 -226
  49. package/dist/devkit/tools/network.js +0 -165
  50. package/dist/devkit/tools/packages.js +0 -73
  51. package/dist/devkit/tools/processes.js +0 -130
  52. package/dist/devkit/tools/shell.js +0 -106
  53. package/dist/devkit/tools/system.js +0 -132
  54. package/dist/devkit/types.js +0 -1
  55. package/dist/devkit/utils.js +0 -45
  56. package/dist/ui/assets/Chat-CjxeAQmd.js +0 -41
  57. package/dist/ui/assets/index-DcfyUdLI.css +0 -1
@@ -1,825 +0,0 @@
1
- import { tool } from '@langchain/core/tools';
2
- import { z } from 'zod';
3
- import os from 'os';
4
- import path from 'path';
5
- import { truncateOutput } from '../utils.js';
6
- import { registerToolFactory } from '../registry.js';
7
- import { Readability } from '@mozilla/readability';
8
- import { JSDOM } from 'jsdom';
9
- // ─── Local path resolution (standalone Smith, no Morpheus PATHS) ────────────
10
- const SMITH_HOME = process.env.SMITH_HOME ?? path.join(os.homedir(), '.smith');
11
- const BROWSER_CACHE = path.join(SMITH_HOME, 'cache', 'browser');
12
- // ─── Module-level browser singleton ────────────────────────────────────────
13
- let browserInstance = null;
14
- let pageInstance = null;
15
- let idleTimer = null;
16
- let installPromise = null;
17
- const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
18
- // ─── Common User Agents (rotated to avoid detection) ───────────────────────
19
- const USER_AGENTS = [
20
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
21
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
22
- 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
23
- ];
24
- function getRandomUserAgent() {
25
- return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
26
- }
27
- // ─── Retry helper with exponential backoff ──────────────────────────────────
28
- async function withRetry(fn, maxRetries = 3, baseDelayMs = 1000) {
29
- let lastError;
30
- for (let attempt = 0; attempt < maxRetries; attempt++) {
31
- try {
32
- return await fn();
33
- }
34
- catch (err) {
35
- lastError = err;
36
- if (attempt < maxRetries - 1) {
37
- const delay = baseDelayMs * Math.pow(2, attempt);
38
- await new Promise(r => setTimeout(r, delay));
39
- }
40
- }
41
- }
42
- throw lastError;
43
- }
44
- /**
45
- * Ensures Chromium is downloaded to ~/.smith/cache/browser/.
46
- * Downloads only once; subsequent calls return the cached executablePath.
47
- */
48
- async function ensureChromium() {
49
- const { install, resolveBuildId, detectBrowserPlatform, computeExecutablePath, Browser: PBrowser, } = await import('@puppeteer/browsers');
50
- const platform = detectBrowserPlatform();
51
- const buildId = await resolveBuildId(PBrowser.CHROME, platform, 'stable');
52
- // Check if already installed
53
- const execPath = computeExecutablePath({
54
- browser: PBrowser.CHROME,
55
- buildId,
56
- cacheDir: BROWSER_CACHE,
57
- });
58
- const { default: fs } = await import('fs-extra');
59
- if (await fs.pathExists(execPath)) {
60
- return execPath;
61
- }
62
- // Download with progress indicator
63
- process.stdout.write('[Smith] Installing Chromium for browser tools (first run, ~150MB)...\n');
64
- const installed = await install({
65
- browser: PBrowser.CHROME,
66
- buildId,
67
- cacheDir: BROWSER_CACHE,
68
- downloadProgressCallback: (downloaded, total) => {
69
- const pct = total > 0 ? Math.round((downloaded / total) * 100) : 0;
70
- process.stdout.write(`\r[Smith] Downloading Chromium: ${pct}% `);
71
- },
72
- });
73
- process.stdout.write('\n[Smith] Chromium installed successfully.\n');
74
- return installed.executablePath;
75
- }
76
- /**
77
- * Returns (or creates) the browser singleton, resetting the idle timer.
78
- * Handles Chromium lazy-install with a lock to prevent concurrent downloads.
79
- */
80
- async function acquireBrowser() {
81
- const { launch } = await import('puppeteer-core');
82
- const needsLaunch = !browserInstance || !browserInstance.connected;
83
- if (needsLaunch) {
84
- if (!installPromise) {
85
- installPromise = ensureChromium().finally(() => {
86
- installPromise = null;
87
- });
88
- }
89
- const executablePath = await installPromise;
90
- // Re-check after awaiting (another caller may have launched already)
91
- if (!browserInstance || !browserInstance.connected) {
92
- browserInstance = await launch({
93
- executablePath,
94
- headless: true,
95
- args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'],
96
- });
97
- pageInstance = await browserInstance.newPage();
98
- }
99
- }
100
- else if (!pageInstance || pageInstance.isClosed()) {
101
- pageInstance = await browserInstance.newPage();
102
- }
103
- // Reset idle timeout
104
- if (idleTimer)
105
- clearTimeout(idleTimer);
106
- idleTimer = setTimeout(async () => {
107
- try {
108
- await pageInstance?.close();
109
- }
110
- catch { /* ignore */ }
111
- try {
112
- await browserInstance?.close();
113
- }
114
- catch { /* ignore */ }
115
- pageInstance = null;
116
- browserInstance = null;
117
- idleTimer = null;
118
- }, IDLE_TIMEOUT_MS);
119
- return { browser: browserInstance, page: pageInstance };
120
- }
121
- // Best-effort cleanup on process exit
122
- process.on('exit', () => {
123
- try {
124
- browserInstance?.process()?.kill();
125
- }
126
- catch { /* ignore */ }
127
- });
128
- // ─── Tool Definitions ───────────────────────────────────────────────────────
129
- const browserNavigateTool = tool(async ({ url, wait_until, timeout_ms, return_html, wait_for_selector, extract_readable }) => {
130
- try {
131
- const { page } = await acquireBrowser();
132
- // Set a realistic user agent
133
- await page.setUserAgent(getRandomUserAgent());
134
- // Set extra headers to appear more like a real browser
135
- await page.setExtraHTTPHeaders({
136
- 'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
137
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
138
- });
139
- await withRetry(async () => {
140
- await page.goto(url, {
141
- waitUntil: (wait_until ?? 'domcontentloaded'),
142
- timeout: timeout_ms ?? 30_000,
143
- });
144
- }, 2);
145
- // Wait for specific selector if requested
146
- if (wait_for_selector) {
147
- await page.waitForSelector(wait_for_selector, { timeout: timeout_ms ?? 30_000 });
148
- }
149
- const title = await page.title();
150
- const htmlContent = await page.content();
151
- let text;
152
- let articleTitle = null;
153
- let articleByline = null;
154
- let articleExcerpt = null;
155
- // Use Readability for cleaner content extraction
156
- if (extract_readable !== false) {
157
- try {
158
- const dom = new JSDOM(htmlContent, { url });
159
- const reader = new Readability(dom.window.document);
160
- const article = reader.parse();
161
- if (article) {
162
- articleTitle = article.title || null;
163
- articleByline = article.byline || null;
164
- articleExcerpt = article.excerpt || null;
165
- text = article.textContent || '';
166
- }
167
- else {
168
- text = await page.evaluate(() => document.body.innerText);
169
- }
170
- }
171
- catch {
172
- text = await page.evaluate(() => document.body.innerText);
173
- }
174
- }
175
- else {
176
- text = await page.evaluate(() => document.body.innerText);
177
- }
178
- const result = {
179
- success: true,
180
- url,
181
- current_url: page.url(),
182
- title: articleTitle || title,
183
- byline: articleByline,
184
- excerpt: articleExcerpt,
185
- text: truncateOutput(text),
186
- };
187
- if (return_html) {
188
- result.html = truncateOutput(htmlContent);
189
- }
190
- return JSON.stringify(result);
191
- }
192
- catch (err) {
193
- return JSON.stringify({ success: false, url, error: err.message });
194
- }
195
- }, {
196
- name: 'browser_navigate',
197
- description: 'Navigate to a URL in a real browser (executes JavaScript). Use for SPAs, JS-heavy pages, or sites requiring interaction. ' +
198
- 'Automatically extracts clean readable content using Mozilla Readability. Returns page title, byline, excerpt, and text content.',
199
- schema: z.object({
200
- url: z.string().describe('Full URL to navigate to (must include https://)'),
201
- wait_until: z
202
- .enum(['load', 'domcontentloaded', 'networkidle0', 'networkidle2'])
203
- .optional()
204
- .describe('Wait condition. Default: domcontentloaded. Use networkidle0 for SPAs.'),
205
- timeout_ms: z.number().optional().describe('Navigation timeout in ms. Default: 30000'),
206
- return_html: z
207
- .boolean()
208
- .optional()
209
- .describe('Also return raw HTML in response. Default: false'),
210
- wait_for_selector: z
211
- .string()
212
- .optional()
213
- .describe('CSS selector to wait for before extracting content (useful for dynamic content)'),
214
- extract_readable: z
215
- .boolean()
216
- .optional()
217
- .describe('Use Readability to extract clean article content. Default: true'),
218
- }),
219
- });
220
- const browserGetDomTool = tool(async ({ selector, include_attributes }) => {
221
- try {
222
- const { page } = await acquireBrowser();
223
- const includeAttrs = include_attributes ?? true;
224
- const dom = await page.evaluate(({ sel, attrs }) => {
225
- const root = sel
226
- ? document.querySelector(sel)
227
- : document.body;
228
- if (!root)
229
- return null;
230
- const RELEVANT_ATTRS = [
231
- 'href', 'src', 'type', 'name', 'value',
232
- 'placeholder', 'action', 'id', 'role', 'aria-label',
233
- ];
234
- function serialize(el, depth) {
235
- const hasChildren = el.children.length > 0;
236
- const node = {
237
- tag: el.tagName.toLowerCase(),
238
- };
239
- if (el.id)
240
- node.id = el.id;
241
- if (el.className)
242
- node.class = el.className;
243
- if (!hasChildren) {
244
- const txt = el.textContent?.trim();
245
- if (txt)
246
- node.text = txt.slice(0, 120);
247
- }
248
- if (attrs && el.attributes.length > 0) {
249
- const attrMap = {};
250
- for (const attr of el.attributes) {
251
- if (RELEVANT_ATTRS.includes(attr.name)) {
252
- attrMap[attr.name] = attr.value;
253
- }
254
- }
255
- if (Object.keys(attrMap).length)
256
- node.attrs = attrMap;
257
- }
258
- if (depth < 6 && hasChildren) {
259
- node.children = Array.from(el.children)
260
- .slice(0, 40)
261
- .map((c) => serialize(c, depth + 1));
262
- }
263
- return node;
264
- }
265
- return serialize(root, 0);
266
- }, { sel: selector ?? null, attrs: includeAttrs });
267
- if (!dom) {
268
- return JSON.stringify({ success: false, error: `Element not found: ${selector}` });
269
- }
270
- return JSON.stringify({ success: true, current_url: page.url(), dom: truncateOutput(JSON.stringify(dom, null, 2)) });
271
- }
272
- catch (err) {
273
- return JSON.stringify({ success: false, error: err.message });
274
- }
275
- }, {
276
- name: 'browser_get_dom',
277
- description: 'Get a simplified DOM tree of the current page or a specific element. ' +
278
- 'ALWAYS call this BEFORE browser_click or browser_fill to inspect page structure and identify the correct CSS selectors. ' +
279
- 'Never guess selectors — analyze the DOM first.',
280
- schema: z.object({
281
- selector: z
282
- .string()
283
- .optional()
284
- .describe('CSS selector to scope the DOM tree to. Omit to get the full body.'),
285
- include_attributes: z
286
- .boolean()
287
- .optional()
288
- .describe('Include relevant attributes (href, src, type, name, value, placeholder, role, aria-label). Default: true'),
289
- }),
290
- });
291
- const browserClickTool = tool(async ({ selector, text, timeout_ms, wait_after_ms }) => {
292
- try {
293
- const { page } = await acquireBrowser();
294
- if (!selector && !text) {
295
- return JSON.stringify({ success: false, error: 'Provide either selector or text' });
296
- }
297
- const clickTimeout = timeout_ms ?? 10_000;
298
- if (text) {
299
- await page.locator(`::-p-text(${text})`).setTimeout(clickTimeout).click();
300
- }
301
- else {
302
- await page.locator(selector).setTimeout(clickTimeout).click();
303
- }
304
- if (wait_after_ms) {
305
- await new Promise((r) => setTimeout(r, wait_after_ms));
306
- }
307
- return JSON.stringify({
308
- success: true,
309
- current_url: page.url(),
310
- title: await page.title(),
311
- });
312
- }
313
- catch (err) {
314
- return JSON.stringify({ success: false, error: err.message });
315
- }
316
- }, {
317
- name: 'browser_click',
318
- description: 'Click an element on the current browser page by CSS selector or visible text. ' +
319
- 'The page must already be loaded via browser_navigate. ' +
320
- 'Always inspect the DOM with browser_get_dom first to find the correct selector.',
321
- schema: z.object({
322
- selector: z
323
- .string()
324
- .optional()
325
- .describe('CSS selector of the element to click (e.g. "button#submit", ".btn-login")'),
326
- text: z
327
- .string()
328
- .optional()
329
- .describe('Click element containing this visible text (alternative to selector)'),
330
- timeout_ms: z
331
- .number()
332
- .optional()
333
- .describe('Timeout to wait for the element in ms. Default: 10000'),
334
- wait_after_ms: z
335
- .number()
336
- .optional()
337
- .describe('Wait this many ms after clicking (for page transitions/animations). Default: 0'),
338
- }),
339
- });
340
- const browserFillTool = tool(async ({ selector, value, press_enter, timeout_ms }) => {
341
- try {
342
- const { page } = await acquireBrowser();
343
- await page.locator(selector).setTimeout(timeout_ms ?? 10_000).fill(value);
344
- if (press_enter) {
345
- await page.keyboard.press('Enter');
346
- }
347
- return JSON.stringify({ success: true, selector, filled: true });
348
- }
349
- catch (err) {
350
- return JSON.stringify({ success: false, selector, error: err.message });
351
- }
352
- }, {
353
- name: 'browser_fill',
354
- description: 'Fill a form input or textarea field with a value. Clears any existing content first. ' +
355
- 'Always inspect the DOM with browser_get_dom first to identify the correct CSS selector.',
356
- schema: z.object({
357
- selector: z.string().describe('CSS selector of the input/textarea element'),
358
- value: z.string().describe('Value to type into the field'),
359
- press_enter: z
360
- .boolean()
361
- .optional()
362
- .describe('Press Enter after filling (triggers form submit in many cases). Default: false'),
363
- timeout_ms: z
364
- .number()
365
- .optional()
366
- .describe('Timeout to find the element in ms. Default: 10000'),
367
- }),
368
- });
369
- /**
370
- * Search via DuckDuckGo Lite (plain HTML, no JS, no bot detection).
371
- * Enhanced with better parsing, intent detection, and fallbacks.
372
- */
373
- const browserSearchTool = tool(async ({ query, num_results, language, search_type }) => {
374
- try {
375
- const max = Math.min(num_results ?? 10, 20);
376
- const year = new Date().getFullYear().toString();
377
- const lang = language ?? "pt";
378
- const qLower = query.toLowerCase();
379
- let intent = "general";
380
- // News patterns (PT/EN)
381
- if (/(hoje|ontem|último|resultado|placar|próximos|futebol|eleição|202\d|today|yesterday|latest|breaking|election)/i.test(qLower)) {
382
- intent = "news";
383
- }
384
- // Official/Government patterns
385
- else if (/(site oficial|gov\.|receita federal|ministério|official site|government)/i.test(qLower)) {
386
- intent = "official";
387
- }
388
- // Documentation patterns
389
- else if (/(api|sdk|npm|pypi|docs|documentação|documentation|reference|tutorial|example)/i.test(qLower)) {
390
- intent = "documentation";
391
- }
392
- // Price patterns
393
- else if (/(preço|valor|quanto custa|price|cost|pricing|buy)/i.test(qLower)) {
394
- intent = "price";
395
- }
396
- // Academic patterns
397
- else if (/(research|paper|study|journal|artigo|pesquisa|científico|scientific)/i.test(qLower)) {
398
- intent = "academic";
399
- }
400
- // How-to patterns
401
- else if (/(como|how to|tutorial|guia|guide|passo a passo|step by step)/i.test(qLower)) {
402
- intent = "how-to";
403
- }
404
- // ─── Smart Query Refinement ──────────────────────────────────────────
405
- let refinedQuery = query;
406
- const refinements = [];
407
- switch (intent) {
408
- case "news":
409
- refinements.push(year);
410
- break;
411
- case "official":
412
- // Don't modify - let user's query stand
413
- break;
414
- case "documentation":
415
- // Only add if not already present
416
- if (!/docs|documentation|github/i.test(qLower)) {
417
- refinements.push("documentation");
418
- }
419
- break;
420
- case "price":
421
- refinements.push(year);
422
- if (lang === "pt" || lang === "br")
423
- refinements.push("Brasil");
424
- break;
425
- case "academic":
426
- refinements.push("site:scholar.google.com OR site:arxiv.org OR site:researchgate.net");
427
- break;
428
- case "how-to":
429
- // Don't add noise, how-to queries are usually specific enough
430
- break;
431
- }
432
- if (refinements.length > 0) {
433
- refinedQuery = `${query} ${refinements.join(" ")}`;
434
- }
435
- // ─── Region Mapping ──────────────────────────────────────────────────
436
- const regionMap = {
437
- pt: "br-pt",
438
- br: "br-pt",
439
- en: "us-en",
440
- us: "us-en",
441
- uk: "uk-en",
442
- es: "es-es",
443
- fr: "fr-fr",
444
- de: "de-de",
445
- };
446
- const kl = regionMap[lang] ?? lang;
447
- // ─── Execute Search with Retry ───────────────────────────────────────
448
- const searchResult = await withRetry(async () => {
449
- const body = new URLSearchParams({ q: refinedQuery, kl }).toString();
450
- const res = await fetch("https://lite.duckduckgo.com/lite/", {
451
- method: "POST",
452
- headers: {
453
- "Content-Type": "application/x-www-form-urlencoded",
454
- "User-Agent": getRandomUserAgent(),
455
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
456
- "Accept-Language": lang === "pt" ? "pt-BR,pt;q=0.9,en;q=0.8" : "en-US,en;q=0.9",
457
- },
458
- body,
459
- signal: AbortSignal.timeout(20000),
460
- });
461
- if (!res.ok) {
462
- throw new Error(`HTTP ${res.status}`);
463
- }
464
- return res.text();
465
- }, 3);
466
- const html = searchResult;
467
- // ─── Improved Parsing (handles both quote styles) ────────────────────
468
- // Match links with either single or double quotes
469
- const linkPattern = /href=["'](https?:\/\/[^"']+)["'][^>]*class=["']result-link["'][^>]*>([^<]+)<\/a>/gi;
470
- const snippetPattern = /class=["']result-snippet["'][^>]*>([\s\S]*?)<\/td>/gi;
471
- const links = [...html.matchAll(linkPattern)];
472
- const snippets = [...html.matchAll(snippetPattern)];
473
- if (!links.length) {
474
- // Try alternative pattern (DuckDuckGo sometimes changes format)
475
- const altLinkPattern = /<a[^>]+class=["']result-link["'][^>]+href=["'](https?:\/\/[^"']+)["'][^>]*>([^<]+)<\/a>/gi;
476
- const altLinks = [...html.matchAll(altLinkPattern)];
477
- if (!altLinks.length) {
478
- return JSON.stringify({
479
- success: false,
480
- query: refinedQuery,
481
- error: "No results found. Try a different search term.",
482
- hint: intent !== "general" ? `Detected intent: ${intent}. Try a more specific query.` : undefined,
483
- });
484
- }
485
- links.push(...altLinks);
486
- }
487
- // ─── Helper Functions ────────────────────────────────────────────────
488
- function normalizeUrl(url) {
489
- try {
490
- const u = new URL(url);
491
- // Remove tracking parameters
492
- ['utm_source', 'utm_medium', 'utm_campaign', 'ref', 'fbclid', 'gclid'].forEach(p => u.searchParams.delete(p));
493
- return u.toString();
494
- }
495
- catch {
496
- return url;
497
- }
498
- }
499
- function getDomain(url) {
500
- try {
501
- return new URL(url).hostname.replace(/^www\./, "");
502
- }
503
- catch {
504
- return "";
505
- }
506
- }
507
- // ─── Enhanced Domain Scoring ─────────────────────────────────────────
508
- const domainScores = {
509
- // High authority
510
- "github.com": 8,
511
- "stackoverflow.com": 8,
512
- "wikipedia.org": 7,
513
- "docs.python.org": 8,
514
- "developer.mozilla.org": 8,
515
- "npmjs.com": 7,
516
- "pypi.org": 7,
517
- // News
518
- "bbc.com": 6,
519
- "reuters.com": 6,
520
- "cnn.com": 5,
521
- "globo.com": 5,
522
- "uol.com.br": 4,
523
- "g1.globo.com": 6,
524
- // Brazilian official
525
- "gov.br": 7,
526
- // Tech blogs
527
- "medium.com": 3,
528
- "dev.to": 4,
529
- "hashnode.dev": 3,
530
- // Academic
531
- "arxiv.org": 7,
532
- "scholar.google.com": 7,
533
- "researchgate.net": 6,
534
- };
535
- const penalizedPatterns = [
536
- /login|signin|signup/i,
537
- /assine|subscribe|paywall/i,
538
- /compre|buy now|add to cart/i,
539
- /pinterest\.com/i,
540
- /facebook\.com/i,
541
- /instagram\.com/i,
542
- ];
543
- function scoreResult(result) {
544
- let score = 0;
545
- const domain = getDomain(result.url);
546
- // Domain-based scoring
547
- for (const [d, s] of Object.entries(domainScores)) {
548
- if (domain.includes(d) || domain.endsWith(d)) {
549
- score += s;
550
- break;
551
- }
552
- }
553
- // Intent-based bonuses
554
- if (intent === "documentation") {
555
- if (/github|docs|reference|api/i.test(domain))
556
- score += 4;
557
- if (/example|tutorial|guide/i.test(result.title))
558
- score += 2;
559
- }
560
- if (intent === "news") {
561
- if (/(globo|uol|cnn|bbc|reuters|g1)/i.test(domain))
562
- score += 4;
563
- if (new RegExp(year).test(result.snippet))
564
- score += 2;
565
- }
566
- if (intent === "official" && /gov\.|\.gov|official/i.test(domain)) {
567
- score += 5;
568
- }
569
- if (intent === "academic" && /arxiv|scholar|research/i.test(domain)) {
570
- score += 5;
571
- }
572
- if (intent === "how-to" && /tutorial|guide|how/i.test(result.title)) {
573
- score += 3;
574
- }
575
- // Title relevance
576
- const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
577
- const titleLower = result.title.toLowerCase();
578
- const matchedWords = queryWords.filter(w => titleLower.includes(w));
579
- score += Math.min(matchedWords.length * 1.5, 5);
580
- // Snippet quality
581
- if (result.snippet.length > 100)
582
- score += 1;
583
- if (result.snippet.length > 200)
584
- score += 1;
585
- // Penalties
586
- for (const pattern of penalizedPatterns) {
587
- if (pattern.test(result.url) || pattern.test(result.snippet)) {
588
- score -= 4;
589
- }
590
- }
591
- return Math.max(0, score);
592
- }
593
- // ─── Process Results ─────────────────────────────────────────────────
594
- const domainSeen = new Set();
595
- const results = [];
596
- for (let i = 0; i < links.length; i++) {
597
- const rawUrl = links[i][1];
598
- if (rawUrl.includes("duckduckgo.com"))
599
- continue;
600
- const url = normalizeUrl(rawUrl);
601
- const domain = getDomain(url);
602
- // Skip if we already have this domain (dedupe)
603
- if (domainSeen.has(domain))
604
- continue;
605
- domainSeen.add(domain);
606
- const title = links[i][2].trim().replace(/\s+/g, " ");
607
- const snippet = snippets[i]
608
- ? snippets[i][1].replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim()
609
- : "";
610
- const result = { title, url, snippet };
611
- const score = scoreResult(result);
612
- results.push({ ...result, domain, score });
613
- }
614
- if (!results.length) {
615
- return JSON.stringify({
616
- success: false,
617
- query: refinedQuery,
618
- error: "No valid results after filtering",
619
- });
620
- }
621
- // Sort by score and take top results
622
- results.sort((a, b) => b.score - a.score);
623
- const topResults = results.slice(0, max);
624
- // Calculate confidence
625
- const avgScore = topResults.reduce((acc, r) => acc + r.score, 0) / topResults.length;
626
- const confidence = avgScore >= 6 ? "high" : avgScore >= 3 ? "medium" : "low";
627
- return JSON.stringify({
628
- success: true,
629
- original_query: query,
630
- refined_query: refinedQuery !== query ? refinedQuery : undefined,
631
- intent,
632
- confidence,
633
- result_count: topResults.length,
634
- results: topResults.map((r) => ({
635
- title: r.title,
636
- url: r.url,
637
- snippet: r.snippet,
638
- domain: r.domain,
639
- score: r.score,
640
- })),
641
- });
642
- }
643
- catch (err) {
644
- return JSON.stringify({
645
- success: false,
646
- error: err.message,
647
- hint: "Search failed. Try simplifying your query or check your internet connection."
648
- });
649
- }
650
- }, {
651
- name: "browser_search",
652
- description: "Intelligent web search with automatic intent detection (news, documentation, how-to, academic, etc.), " +
653
- "smart query refinement, domain authority scoring, and confidence levels. Uses DuckDuckGo Lite for privacy. " +
654
- "Returns ranked results with relevance scores.",
655
- schema: z.object({
656
- query: z.string().describe("Search query. Be specific for better results."),
657
- num_results: z.number().int().min(1).max(20).optional().describe("Max results to return. Default: 10"),
658
- language: z.enum(["pt", "br", "en", "us", "uk", "es", "fr", "de"]).optional().describe("Search region/language. Default: pt"),
659
- search_type: z.enum(["web", "news"]).optional().describe("Type of search. Default: web (news not yet implemented)"),
660
- }),
661
- });
662
- /**
663
- * Lightweight content fetcher - uses fetch + Readability instead of Puppeteer.
664
- * Much faster for static pages, articles, documentation, etc.
665
- */
666
- const browserFetchContentTool = tool(async ({ url, timeout_ms, include_links }) => {
667
- try {
668
- const result = await withRetry(async () => {
669
- const controller = new AbortController();
670
- const timer = setTimeout(() => controller.abort(), timeout_ms ?? 30_000);
671
- try {
672
- const response = await fetch(url, {
673
- signal: controller.signal,
674
- headers: {
675
- 'User-Agent': getRandomUserAgent(),
676
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
677
- 'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
678
- 'Accept-Encoding': 'gzip, deflate, br',
679
- 'Cache-Control': 'no-cache',
680
- },
681
- });
682
- if (!response.ok) {
683
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
684
- }
685
- const contentType = response.headers.get('content-type') || '';
686
- // Handle JSON responses directly
687
- if (contentType.includes('application/json')) {
688
- const json = await response.json();
689
- return {
690
- success: true,
691
- url,
692
- content_type: 'json',
693
- data: json,
694
- };
695
- }
696
- const html = await response.text();
697
- return { html, response };
698
- }
699
- finally {
700
- clearTimeout(timer);
701
- }
702
- }, 3);
703
- // If it was JSON, return early
704
- if ('content_type' in result && result.content_type === 'json') {
705
- return JSON.stringify(result);
706
- }
707
- const { html } = result;
708
- // Parse with JSDOM and extract with Readability
709
- const dom = new JSDOM(html, { url });
710
- const document = dom.window.document;
711
- // Extract metadata
712
- const title = document.querySelector('title')?.textContent?.trim() || '';
713
- const description = document.querySelector('meta[name="description"]')?.getAttribute('content') ||
714
- document.querySelector('meta[property="og:description"]')?.getAttribute('content') || '';
715
- const author = document.querySelector('meta[name="author"]')?.getAttribute('content') || '';
716
- // Use Readability for main content
717
- const reader = new Readability(document.cloneNode(true));
718
- const article = reader.parse();
719
- // Extract links if requested
720
- let links = [];
721
- if (include_links) {
722
- const anchors = document.querySelectorAll('a[href]');
723
- const seen = new Set();
724
- anchors.forEach((a) => {
725
- const href = a.getAttribute('href');
726
- const text = a.textContent?.trim();
727
- if (href && text && !seen.has(href) && href.startsWith('http')) {
728
- seen.add(href);
729
- links.push({ text: text.slice(0, 100), href });
730
- }
731
- });
732
- links = links.slice(0, 50); // Limit to 50 links
733
- }
734
- const output = {
735
- success: true,
736
- url,
737
- title: article?.title || title,
738
- description,
739
- author: article?.byline || author,
740
- excerpt: article?.excerpt || description,
741
- content: truncateOutput(article?.textContent || document.body?.textContent || ''),
742
- word_count: article?.textContent?.split(/\s+/).length || 0,
743
- };
744
- if (include_links && links.length > 0) {
745
- output.links = links;
746
- }
747
- return JSON.stringify(output);
748
- }
749
- catch (err) {
750
- return JSON.stringify({
751
- success: false,
752
- url,
753
- error: err.message,
754
- hint: 'If this is a JavaScript-heavy site, try browser_navigate instead.'
755
- });
756
- }
757
- }, {
758
- name: 'browser_fetch_content',
759
- description: 'Fast, lightweight content fetcher for static pages, articles, documentation, and APIs. ' +
760
- 'Uses HTTP fetch + Readability (no browser needed). Much faster than browser_navigate. ' +
761
- 'Use this for: documentation pages, blog posts, news articles, API endpoints. ' +
762
- 'For JavaScript-heavy SPAs, use browser_navigate instead.',
763
- schema: z.object({
764
- url: z.string().describe('Full URL to fetch (must include https://)'),
765
- timeout_ms: z.number().optional().describe('Timeout in ms. Default: 30000'),
766
- include_links: z.boolean().optional().describe('Extract and return all links from the page. Default: false'),
767
- }),
768
- });
769
- /**
770
- * Screenshot tool - useful for visual verification and debugging
771
- */
772
- const browserScreenshotTool = tool(async ({ selector, full_page }) => {
773
- try {
774
- const { page } = await acquireBrowser();
775
- let screenshot;
776
- if (selector) {
777
- const element = await page.$(selector);
778
- if (!element) {
779
- return JSON.stringify({ success: false, error: `Element not found: ${selector}` });
780
- }
781
- screenshot = await element.screenshot({ encoding: 'binary' });
782
- }
783
- else {
784
- screenshot = await page.screenshot({
785
- fullPage: full_page ?? false,
786
- encoding: 'binary'
787
- });
788
- }
789
- const base64 = screenshot.toString('base64');
790
- return JSON.stringify({
791
- success: true,
792
- current_url: page.url(),
793
- title: await page.title(),
794
- screenshot_base64: base64,
795
- size_bytes: screenshot.length,
796
- });
797
- }
798
- catch (err) {
799
- return JSON.stringify({ success: false, error: err.message });
800
- }
801
- }, {
802
- name: 'browser_screenshot',
803
- description: 'Take a screenshot of the current page or a specific element. ' +
804
- 'Useful for visual verification and debugging. Returns base64-encoded PNG.',
805
- schema: z.object({
806
- selector: z.string().optional().describe('CSS selector of element to screenshot. Omit for full viewport.'),
807
- full_page: z.boolean().optional().describe('Capture full scrollable page. Default: false (viewport only)'),
808
- }),
809
- });
810
- // ─── Factory ────────────────────────────────────────────────────────────────
811
- export function createBrowserTools(_ctx) {
812
- if (process.env.SMITH_BROWSER_ENABLED === 'false') {
813
- return [];
814
- }
815
- return [
816
- browserNavigateTool,
817
- browserGetDomTool,
818
- browserClickTool,
819
- browserFillTool,
820
- browserSearchTool,
821
- browserFetchContentTool,
822
- browserScreenshotTool,
823
- ];
824
- }
825
- registerToolFactory(createBrowserTools, 'browser');