morpheus-cli 0.8.7 → 0.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,48 @@
1
1
  import { tool } from '@langchain/core/tools';
2
2
  import { z } from 'zod';
3
+ import os from 'os';
4
+ import path from 'path';
3
5
  import { truncateOutput } from '../utils.js';
4
6
  import { registerToolFactory } from '../registry.js';
5
- import { PATHS } from '../../config/paths.js';
7
+ import { Readability } from '@mozilla/readability';
8
+ import { JSDOM } from 'jsdom';
9
+ // ─── Local path resolution (standalone Smith, no Morpheus PATHS) ────────────
10
+ const SMITH_HOME = process.env.SMITH_HOME ?? path.join(os.homedir(), '.smith');
11
+ const BROWSER_CACHE = path.join(SMITH_HOME, 'cache', 'browser');
6
12
  // ─── Module-level browser singleton ────────────────────────────────────────
7
13
  let browserInstance = null;
8
14
  let pageInstance = null;
9
15
  let idleTimer = null;
10
16
  let installPromise = null;
11
17
  const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
18
+ // ─── Common User Agents (rotated to avoid detection) ───────────────────────
19
+ const USER_AGENTS = [
20
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
21
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
22
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
23
+ ];
24
+ function getRandomUserAgent() {
25
+ return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
26
+ }
27
+ // ─── Retry helper with exponential backoff ──────────────────────────────────
28
+ async function withRetry(fn, maxRetries = 3, baseDelayMs = 1000) {
29
+ let lastError;
30
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
31
+ try {
32
+ return await fn();
33
+ }
34
+ catch (err) {
35
+ lastError = err;
36
+ if (attempt < maxRetries - 1) {
37
+ const delay = baseDelayMs * Math.pow(2, attempt);
38
+ await new Promise(r => setTimeout(r, delay));
39
+ }
40
+ }
41
+ }
42
+ throw lastError;
43
+ }
12
44
  /**
13
- * Ensures Chromium is downloaded to ~/.morpheus/cache/browser/.
45
+ * Ensures Chromium is downloaded to ~/.smith/cache/browser/.
14
46
  * Downloads only once; subsequent calls return the cached executablePath.
15
47
  */
16
48
  async function ensureChromium() {
@@ -21,24 +53,24 @@ async function ensureChromium() {
21
53
  const execPath = computeExecutablePath({
22
54
  browser: PBrowser.CHROME,
23
55
  buildId,
24
- cacheDir: PATHS.browser,
56
+ cacheDir: BROWSER_CACHE,
25
57
  });
26
58
  const { default: fs } = await import('fs-extra');
27
59
  if (await fs.pathExists(execPath)) {
28
60
  return execPath;
29
61
  }
30
62
  // Download with progress indicator
31
- process.stdout.write('[Morpheus] Installing Chromium for browser tools (first run, ~150MB)...\n');
63
+ process.stdout.write('[Smith] Installing Chromium for browser tools (first run, ~150MB)...\n');
32
64
  const installed = await install({
33
65
  browser: PBrowser.CHROME,
34
66
  buildId,
35
- cacheDir: PATHS.browser,
67
+ cacheDir: BROWSER_CACHE,
36
68
  downloadProgressCallback: (downloaded, total) => {
37
69
  const pct = total > 0 ? Math.round((downloaded / total) * 100) : 0;
38
- process.stdout.write(`\r[Morpheus] Downloading Chromium: ${pct}% `);
70
+ process.stdout.write(`\r[Smith] Downloading Chromium: ${pct}% `);
39
71
  },
40
72
  });
41
- process.stdout.write('\n[Morpheus] Chromium installed successfully.\n');
73
+ process.stdout.write('\n[Smith] Chromium installed successfully.\n');
42
74
  return installed.executablePath;
43
75
  }
44
76
  /**
@@ -94,24 +126,66 @@ process.on('exit', () => {
94
126
  catch { /* ignore */ }
95
127
  });
96
128
  // ─── Tool Definitions ───────────────────────────────────────────────────────
97
- const browserNavigateTool = tool(async ({ url, wait_until, timeout_ms, return_html }) => {
129
+ const browserNavigateTool = tool(async ({ url, wait_until, timeout_ms, return_html, wait_for_selector, extract_readable }) => {
98
130
  try {
99
131
  const { page } = await acquireBrowser();
100
- await page.goto(url, {
101
- waitUntil: (wait_until ?? 'domcontentloaded'),
102
- timeout: timeout_ms ?? 30_000,
132
+ // Set a realistic user agent
133
+ await page.setUserAgent(getRandomUserAgent());
134
+ // Set extra headers to appear more like a real browser
135
+ await page.setExtraHTTPHeaders({
136
+ 'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
137
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
103
138
  });
139
+ await withRetry(async () => {
140
+ await page.goto(url, {
141
+ waitUntil: (wait_until ?? 'domcontentloaded'),
142
+ timeout: timeout_ms ?? 30_000,
143
+ });
144
+ }, 2);
145
+ // Wait for specific selector if requested
146
+ if (wait_for_selector) {
147
+ await page.waitForSelector(wait_for_selector, { timeout: timeout_ms ?? 30_000 });
148
+ }
104
149
  const title = await page.title();
105
- const text = await page.evaluate(() => document.body.innerText);
150
+ const htmlContent = await page.content();
151
+ let text;
152
+ let articleTitle = null;
153
+ let articleByline = null;
154
+ let articleExcerpt = null;
155
+ // Use Readability for cleaner content extraction
156
+ if (extract_readable !== false) {
157
+ try {
158
+ const dom = new JSDOM(htmlContent, { url });
159
+ const reader = new Readability(dom.window.document);
160
+ const article = reader.parse();
161
+ if (article) {
162
+ articleTitle = article.title || null;
163
+ articleByline = article.byline || null;
164
+ articleExcerpt = article.excerpt || null;
165
+ text = article.textContent || '';
166
+ }
167
+ else {
168
+ text = await page.evaluate(() => document.body.innerText);
169
+ }
170
+ }
171
+ catch {
172
+ text = await page.evaluate(() => document.body.innerText);
173
+ }
174
+ }
175
+ else {
176
+ text = await page.evaluate(() => document.body.innerText);
177
+ }
106
178
  const result = {
107
179
  success: true,
108
180
  url,
109
181
  current_url: page.url(),
110
- title,
182
+ title: articleTitle || title,
183
+ byline: articleByline,
184
+ excerpt: articleExcerpt,
111
185
  text: truncateOutput(text),
112
186
  };
113
187
  if (return_html) {
114
- result.html = truncateOutput(await page.content());
188
+ result.html = truncateOutput(htmlContent);
115
189
  }
116
190
  return JSON.stringify(result);
117
191
  }
@@ -120,7 +194,8 @@ const browserNavigateTool = tool(async ({ url, wait_until, timeout_ms, return_ht
120
194
  }
121
195
  }, {
122
196
  name: 'browser_navigate',
123
- description: 'Navigate to a URL in a real browser (executes JavaScript). Use instead of http_request for SPAs, JS-heavy pages, or sites requiring interaction. Returns page title and text content.',
197
+ description: 'Navigate to a URL in a real browser (executes JavaScript). Use for SPAs, JS-heavy pages, or sites requiring interaction. ' +
198
+ 'Automatically extracts clean readable content using Mozilla Readability. Returns page title, byline, excerpt, and text content.',
124
199
  schema: z.object({
125
200
  url: z.string().describe('Full URL to navigate to (must include https://)'),
126
201
  wait_until: z
@@ -132,6 +207,14 @@ const browserNavigateTool = tool(async ({ url, wait_until, timeout_ms, return_ht
132
207
  .boolean()
133
208
  .optional()
134
209
  .describe('Also return raw HTML in response. Default: false'),
210
+ wait_for_selector: z
211
+ .string()
212
+ .optional()
213
+ .describe('CSS selector to wait for before extracting content (useful for dynamic content)'),
214
+ extract_readable: z
215
+ .boolean()
216
+ .optional()
217
+ .describe('Use Readability to extract clean article content. Default: true'),
135
218
  }),
136
219
  });
137
220
  const browserGetDomTool = tool(async ({ selector, include_attributes }) => {
@@ -213,7 +296,6 @@ const browserClickTool = tool(async ({ selector, text, timeout_ms, wait_after_ms
213
296
  }
214
297
  const clickTimeout = timeout_ms ?? 10_000;
215
298
  if (text) {
216
- // Use Puppeteer pseudo-selector to find element by visible text
217
299
  await page.locator(`::-p-text(${text})`).setTimeout(clickTimeout).click();
218
300
  }
219
301
  else {
@@ -286,89 +368,128 @@ const browserFillTool = tool(async ({ selector, value, press_enter, timeout_ms }
286
368
  });
287
369
  /**
288
370
  * Search via DuckDuckGo Lite (plain HTML, no JS, no bot detection).
289
- * Uses a simple POST fetch — no browser required, much faster and more reliable
290
- * than headless browser scraping of Google.
291
- *
292
- * DDG Lite returns results as: href="URL" class='result-link'>TITLE</a>
293
- * and <td class='result-snippet'>SNIPPET</td>, paired by index.
294
- * Sponsored links have URLs starting with "https://duckduckgo.com/y.js" and are filtered out.
371
+ * Enhanced with better parsing, intent detection, and fallbacks.
295
372
  */
296
- const browserSearchTool = tool(async ({ query, num_results, language }) => {
373
+ const browserSearchTool = tool(async ({ query, num_results, language, search_type }) => {
297
374
  try {
298
375
  const max = Math.min(num_results ?? 10, 20);
299
376
  const year = new Date().getFullYear().toString();
300
377
  const lang = language ?? "pt";
301
- // ─────────────────────────────────────────────
302
- // 1️⃣ Intent Classification (heurístico leve)
303
- // ─────────────────────────────────────────────
304
378
  const qLower = query.toLowerCase();
305
379
  let intent = "general";
306
- if (/(hoje|último|resultado|placar|próximos|futebol|202\d)/.test(qLower))
380
+ // News patterns (PT/EN)
381
+ if (/(hoje|ontem|último|resultado|placar|próximos|futebol|eleição|202\d|today|yesterday|latest|breaking|election)/i.test(qLower)) {
307
382
  intent = "news";
308
- if (/(site oficial|gov|receita federal|ministério)/.test(qLower))
383
+ }
384
+ // Official/Government patterns
385
+ else if (/(site oficial|gov\.|receita federal|ministério|official site|government)/i.test(qLower)) {
309
386
  intent = "official";
310
- if (/(api|sdk|npm|docs|documentação)/.test(qLower))
387
+ }
388
+ // Documentation patterns
389
+ else if (/(api|sdk|npm|pypi|docs|documentação|documentation|reference|tutorial|example)/i.test(qLower)) {
311
390
  intent = "documentation";
312
- if (/(preço|valor|quanto custa)/.test(qLower))
391
+ }
392
+ // Price patterns
393
+ else if (/(preço|valor|quanto custa|price|cost|pricing|buy)/i.test(qLower)) {
313
394
  intent = "price";
314
- // ─────────────────────────────────────────────
315
- // 2️⃣ Query Refinement
316
- // ─────────────────────────────────────────────
317
- let refinedQuery = query;
318
- if (intent === "news") {
319
- refinedQuery = `${query} ${year}`;
320
395
  }
321
- if (intent === "official") {
322
- refinedQuery = `${query} site:gov.br OR site:org`;
396
+ // Academic patterns
397
+ else if (/(research|paper|study|journal|artigo|pesquisa|científico|scientific)/i.test(qLower)) {
398
+ intent = "academic";
323
399
  }
324
- if (intent === "documentation") {
325
- refinedQuery = `${query} documentation OR docs OR github`;
400
+ // How-to patterns
401
+ else if (/(como|how to|tutorial|guia|guide|passo a passo|step by step)/i.test(qLower)) {
402
+ intent = "how-to";
326
403
  }
327
- if (intent === "price") {
328
- refinedQuery = `${query} preço ${year} Brasil`;
404
+ // ─── Smart Query Refinement ──────────────────────────────────────────
405
+ let refinedQuery = query;
406
+ const refinements = [];
407
+ switch (intent) {
408
+ case "news":
409
+ refinements.push(year);
410
+ break;
411
+ case "official":
412
+ // Don't modify - let user's query stand
413
+ break;
414
+ case "documentation":
415
+ // Only add if not already present
416
+ if (!/docs|documentation|github/i.test(qLower)) {
417
+ refinements.push("documentation");
418
+ }
419
+ break;
420
+ case "price":
421
+ refinements.push(year);
422
+ if (lang === "pt" || lang === "br")
423
+ refinements.push("Brasil");
424
+ break;
425
+ case "academic":
426
+ refinements.push("site:scholar.google.com OR site:arxiv.org OR site:researchgate.net");
427
+ break;
428
+ case "how-to":
429
+ // Don't add noise, how-to queries are usually specific enough
430
+ break;
329
431
  }
330
- // ─────────────────────────────────────────────
331
- // 3️⃣ DuckDuckGo Lite Fetch
332
- // ─────────────────────────────────────────────
432
+ if (refinements.length > 0) {
433
+ refinedQuery = `${query} ${refinements.join(" ")}`;
434
+ }
435
+ // ─── Region Mapping ──────────────────────────────────────────────────
333
436
  const regionMap = {
334
437
  pt: "br-pt",
335
438
  br: "br-pt",
336
439
  en: "us-en",
337
440
  us: "us-en",
441
+ uk: "uk-en",
442
+ es: "es-es",
443
+ fr: "fr-fr",
444
+ de: "de-de",
338
445
  };
339
446
  const kl = regionMap[lang] ?? lang;
340
- const body = new URLSearchParams({ q: refinedQuery, kl }).toString();
341
- const res = await fetch("https://lite.duckduckgo.com/lite/", {
342
- method: "POST",
343
- headers: {
344
- "Content-Type": "application/x-www-form-urlencoded",
345
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
346
- },
347
- body,
348
- signal: AbortSignal.timeout(20000),
349
- });
350
- if (!res.ok) {
351
- return JSON.stringify({ success: false, error: `HTTP ${res.status}` });
352
- }
353
- const html = await res.text();
354
- const linkPattern = /href="(https?:\/\/[^"]+)"[^>]*class='result-link'>([^<]+)<\/a>/g;
355
- const snippetPattern = /class='result-snippet'>([\s\S]*?)<\/td>/g;
447
+ // ─── Execute Search with Retry ───────────────────────────────────────
448
+ const searchResult = await withRetry(async () => {
449
+ const body = new URLSearchParams({ q: refinedQuery, kl }).toString();
450
+ const res = await fetch("https://lite.duckduckgo.com/lite/", {
451
+ method: "POST",
452
+ headers: {
453
+ "Content-Type": "application/x-www-form-urlencoded",
454
+ "User-Agent": getRandomUserAgent(),
455
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
456
+ "Accept-Language": lang === "pt" ? "pt-BR,pt;q=0.9,en;q=0.8" : "en-US,en;q=0.9",
457
+ },
458
+ body,
459
+ signal: AbortSignal.timeout(20000),
460
+ });
461
+ if (!res.ok) {
462
+ throw new Error(`HTTP ${res.status}`);
463
+ }
464
+ return res.text();
465
+ }, 3);
466
+ const html = searchResult;
467
+ // ─── Improved Parsing (handles both quote styles) ────────────────────
468
+ // Match links with either single or double quotes
469
+ const linkPattern = /href=["'](https?:\/\/[^"']+)["'][^>]*class=["']result-link["'][^>]*>([^<]+)<\/a>/gi;
470
+ const snippetPattern = /class=["']result-snippet["'][^>]*>([\s\S]*?)<\/td>/gi;
356
471
  const links = [...html.matchAll(linkPattern)];
357
472
  const snippets = [...html.matchAll(snippetPattern)];
358
473
  if (!links.length) {
359
- return JSON.stringify({
360
- success: false,
361
- query: refinedQuery,
362
- error: "No results found",
363
- });
474
+ // Try alternative pattern (DuckDuckGo sometimes changes format)
475
+ const altLinkPattern = /<a[^>]+class=["']result-link["'][^>]+href=["'](https?:\/\/[^"']+)["'][^>]*>([^<]+)<\/a>/gi;
476
+ const altLinks = [...html.matchAll(altLinkPattern)];
477
+ if (!altLinks.length) {
478
+ return JSON.stringify({
479
+ success: false,
480
+ query: refinedQuery,
481
+ error: "No results found. Try a different search term.",
482
+ hint: intent !== "general" ? `Detected intent: ${intent}. Try a more specific query.` : undefined,
483
+ });
484
+ }
485
+ links.push(...altLinks);
364
486
  }
365
- // ─────────────────────────────────────────────
366
- // 4️⃣ Helpers
367
- // ─────────────────────────────────────────────
487
+ // ─── Helper Functions ────────────────────────────────────────────────
368
488
  function normalizeUrl(url) {
369
489
  try {
370
490
  const u = new URL(url);
371
- u.search = ""; // remove tracking params
491
+ // Remove tracking parameters
492
+ ['utm_source', 'utm_medium', 'utm_campaign', 'ref', 'fbclid', 'gclid'].forEach(p => u.searchParams.delete(p));
372
493
  return u.toString();
373
494
  }
374
495
  catch {
@@ -377,59 +498,114 @@ const browserSearchTool = tool(async ({ query, num_results, language }) => {
377
498
  }
378
499
  function getDomain(url) {
379
500
  try {
380
- return new URL(url).hostname.replace("www.", "");
501
+ return new URL(url).hostname.replace(/^www\./, "");
381
502
  }
382
503
  catch {
383
504
  return "";
384
505
  }
385
506
  }
386
- const trustedDomains = [
387
- "gov.br",
388
- "bbc.com",
389
- "reuters.com",
390
- "globo.com",
391
- "uol.com",
392
- "cnn.com",
393
- "github.com",
394
- "npmjs.com",
395
- "com.br"
507
+ // ─── Enhanced Domain Scoring ─────────────────────────────────────────
508
+ const domainScores = {
509
+ // High authority
510
+ "github.com": 8,
511
+ "stackoverflow.com": 8,
512
+ "wikipedia.org": 7,
513
+ "docs.python.org": 8,
514
+ "developer.mozilla.org": 8,
515
+ "npmjs.com": 7,
516
+ "pypi.org": 7,
517
+ // News
518
+ "bbc.com": 6,
519
+ "reuters.com": 6,
520
+ "cnn.com": 5,
521
+ "globo.com": 5,
522
+ "uol.com.br": 4,
523
+ "g1.globo.com": 6,
524
+ // Brazilian official
525
+ "gov.br": 7,
526
+ // Tech blogs
527
+ "medium.com": 3,
528
+ "dev.to": 4,
529
+ "hashnode.dev": 3,
530
+ // Academic
531
+ "arxiv.org": 7,
532
+ "scholar.google.com": 7,
533
+ "researchgate.net": 6,
534
+ };
535
+ const penalizedPatterns = [
536
+ /login|signin|signup/i,
537
+ /assine|subscribe|paywall/i,
538
+ /compre|buy now|add to cart/i,
539
+ /pinterest\.com/i,
540
+ /facebook\.com/i,
541
+ /instagram\.com/i,
396
542
  ];
397
543
  function scoreResult(result) {
398
544
  let score = 0;
399
545
  const domain = getDomain(result.url);
400
- if (trustedDomains.some((d) => domain.includes(d)))
546
+ // Domain-based scoring
547
+ for (const [d, s] of Object.entries(domainScores)) {
548
+ if (domain.includes(d) || domain.endsWith(d)) {
549
+ score += s;
550
+ break;
551
+ }
552
+ }
553
+ // Intent-based bonuses
554
+ if (intent === "documentation") {
555
+ if (/github|docs|reference|api/i.test(domain))
556
+ score += 4;
557
+ if (/example|tutorial|guide/i.test(result.title))
558
+ score += 2;
559
+ }
560
+ if (intent === "news") {
561
+ if (/(globo|uol|cnn|bbc|reuters|g1)/i.test(domain))
562
+ score += 4;
563
+ if (new RegExp(year).test(result.snippet))
564
+ score += 2;
565
+ }
566
+ if (intent === "official" && /gov\.|\.gov|official/i.test(domain)) {
401
567
  score += 5;
402
- if (intent === "official" && domain.includes("gov"))
568
+ }
569
+ if (intent === "academic" && /arxiv|scholar|research/i.test(domain)) {
403
570
  score += 5;
404
- if (intent === "documentation" && domain.includes("github"))
405
- score += 4;
406
- if (intent === "news" && /(globo|uol|cnn|bbc)/.test(domain))
571
+ }
572
+ if (intent === "how-to" && /tutorial|guide|how/i.test(result.title)) {
407
573
  score += 3;
408
- if (result.title.toLowerCase().includes(query.toLowerCase()))
409
- score += 2;
410
- if (result.snippet.length > 120)
574
+ }
575
+ // Title relevance
576
+ const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
577
+ const titleLower = result.title.toLowerCase();
578
+ const matchedWords = queryWords.filter(w => titleLower.includes(w));
579
+ score += Math.min(matchedWords.length * 1.5, 5);
580
+ // Snippet quality
581
+ if (result.snippet.length > 100)
411
582
  score += 1;
412
- if (/login|assine|subscribe|paywall/i.test(result.snippet))
413
- score -= 3;
414
- return score;
583
+ if (result.snippet.length > 200)
584
+ score += 1;
585
+ // Penalties
586
+ for (const pattern of penalizedPatterns) {
587
+ if (pattern.test(result.url) || pattern.test(result.snippet)) {
588
+ score -= 4;
589
+ }
590
+ }
591
+ return Math.max(0, score);
415
592
  }
416
- // ─────────────────────────────────────────────
417
- // 5️⃣ Build Results + Deduplicate Domain
418
- // ─────────────────────────────────────────────
593
+ // ─── Process Results ─────────────────────────────────────────────────
419
594
  const domainSeen = new Set();
420
595
  const results = [];
421
596
  for (let i = 0; i < links.length; i++) {
422
597
  const rawUrl = links[i][1];
423
- if (rawUrl.startsWith("https://duckduckgo.com/"))
598
+ if (rawUrl.includes("duckduckgo.com"))
424
599
  continue;
425
600
  const url = normalizeUrl(rawUrl);
426
601
  const domain = getDomain(url);
602
+ // Skip if we already have this domain (dedupe)
427
603
  if (domainSeen.has(domain))
428
604
  continue;
429
605
  domainSeen.add(domain);
430
- const title = links[i][2].trim();
606
+ const title = links[i][2].trim().replace(/\s+/g, " ");
431
607
  const snippet = snippets[i]
432
- ? snippets[i][1].replace(/<[^>]+>/g, "").trim()
608
+ ? snippets[i][1].replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim()
433
609
  : "";
434
610
  const result = { title, url, snippet };
435
611
  const score = scoreResult(result);
@@ -442,33 +618,24 @@ const browserSearchTool = tool(async ({ query, num_results, language }) => {
442
618
  error: "No valid results after filtering",
443
619
  });
444
620
  }
445
- // ─────────────────────────────────────────────
446
- // 6️⃣ Ranking
447
- // ─────────────────────────────────────────────
621
+ // Sort by score and take top results
448
622
  results.sort((a, b) => b.score - a.score);
449
623
  const topResults = results.slice(0, max);
450
- const avgScore = topResults.reduce((acc, r) => acc + r.score, 0) /
451
- topResults.length;
452
- // ─────────────────────────────────────────────
453
- // 7️⃣ Low-Confidence Auto Retry
454
- // ─────────────────────────────────────────────
455
- if (avgScore < 2 && intent !== "general") {
456
- return JSON.stringify({
457
- success: false,
458
- query: refinedQuery,
459
- warning: "Low confidence results. Consider refining query further.",
460
- results: topResults,
461
- });
462
- }
624
+ // Calculate confidence
625
+ const avgScore = topResults.reduce((acc, r) => acc + r.score, 0) / topResults.length;
626
+ const confidence = avgScore >= 6 ? "high" : avgScore >= 3 ? "medium" : "low";
463
627
  return JSON.stringify({
464
628
  success: true,
465
629
  original_query: query,
466
- refined_query: refinedQuery,
630
+ refined_query: refinedQuery !== query ? refinedQuery : undefined,
467
631
  intent,
632
+ confidence,
633
+ result_count: topResults.length,
468
634
  results: topResults.map((r) => ({
469
635
  title: r.title,
470
636
  url: r.url,
471
637
  snippet: r.snippet,
638
+ domain: r.domain,
472
639
  score: r.score,
473
640
  })),
474
641
  });
@@ -477,20 +644,172 @@ const browserSearchTool = tool(async ({ query, num_results, language }) => {
477
644
  return JSON.stringify({
478
645
  success: false,
479
646
  error: err.message,
647
+ hint: "Search failed. Try simplifying your query or check your internet connection."
480
648
  });
481
649
  }
482
650
  }, {
483
651
  name: "browser_search",
484
- description: "Enhanced internet search with query refinement, ranking, deduplication, and confidence scoring. Uses DuckDuckGo Lite.",
652
+ description: "Intelligent web search with automatic intent detection (news, documentation, how-to, academic, etc.), " +
653
+ "smart query refinement, domain authority scoring, and confidence levels. Uses DuckDuckGo Lite for privacy. " +
654
+ "Returns ranked results with relevance scores.",
655
+ schema: z.object({
656
+ query: z.string().describe("Search query. Be specific for better results."),
657
+ num_results: z.number().int().min(1).max(20).optional().describe("Max results to return. Default: 10"),
658
+ language: z.enum(["pt", "br", "en", "us", "uk", "es", "fr", "de"]).optional().describe("Search region/language. Default: pt"),
659
+ search_type: z.enum(["web", "news"]).optional().describe("Type of search. Default: web (news not yet implemented)"),
660
+ }),
661
+ });
662
+ /**
663
+ * Lightweight content fetcher - uses fetch + Readability instead of Puppeteer.
664
+ * Much faster for static pages, articles, documentation, etc.
665
+ */
666
+ const browserFetchContentTool = tool(async ({ url, timeout_ms, include_links }) => {
667
+ try {
668
+ const result = await withRetry(async () => {
669
+ const controller = new AbortController();
670
+ const timer = setTimeout(() => controller.abort(), timeout_ms ?? 30_000);
671
+ try {
672
+ const response = await fetch(url, {
673
+ signal: controller.signal,
674
+ headers: {
675
+ 'User-Agent': getRandomUserAgent(),
676
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
677
+ 'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
678
+ 'Accept-Encoding': 'gzip, deflate, br',
679
+ 'Cache-Control': 'no-cache',
680
+ },
681
+ });
682
+ if (!response.ok) {
683
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
684
+ }
685
+ const contentType = response.headers.get('content-type') || '';
686
+ // Handle JSON responses directly
687
+ if (contentType.includes('application/json')) {
688
+ const json = await response.json();
689
+ return {
690
+ success: true,
691
+ url,
692
+ content_type: 'json',
693
+ data: json,
694
+ };
695
+ }
696
+ const html = await response.text();
697
+ return { html, response };
698
+ }
699
+ finally {
700
+ clearTimeout(timer);
701
+ }
702
+ }, 3);
703
+ // If it was JSON, return early
704
+ if ('content_type' in result && result.content_type === 'json') {
705
+ return JSON.stringify(result);
706
+ }
707
+ const { html } = result;
708
+ // Parse with JSDOM and extract with Readability
709
+ const dom = new JSDOM(html, { url });
710
+ const document = dom.window.document;
711
+ // Extract metadata
712
+ const title = document.querySelector('title')?.textContent?.trim() || '';
713
+ const description = document.querySelector('meta[name="description"]')?.getAttribute('content') ||
714
+ document.querySelector('meta[property="og:description"]')?.getAttribute('content') || '';
715
+ const author = document.querySelector('meta[name="author"]')?.getAttribute('content') || '';
716
+ // Use Readability for main content
717
+ const reader = new Readability(document.cloneNode(true));
718
+ const article = reader.parse();
719
+ // Extract links if requested
720
+ let links = [];
721
+ if (include_links) {
722
+ const anchors = document.querySelectorAll('a[href]');
723
+ const seen = new Set();
724
+ anchors.forEach((a) => {
725
+ const href = a.getAttribute('href');
726
+ const text = a.textContent?.trim();
727
+ if (href && text && !seen.has(href) && href.startsWith('http')) {
728
+ seen.add(href);
729
+ links.push({ text: text.slice(0, 100), href });
730
+ }
731
+ });
732
+ links = links.slice(0, 50); // Limit to 50 links
733
+ }
734
+ const output = {
735
+ success: true,
736
+ url,
737
+ title: article?.title || title,
738
+ description,
739
+ author: article?.byline || author,
740
+ excerpt: article?.excerpt || description,
741
+ content: truncateOutput(article?.textContent || document.body?.textContent || ''),
742
+ word_count: article?.textContent?.split(/\s+/).length || 0,
743
+ };
744
+ if (include_links && links.length > 0) {
745
+ output.links = links;
746
+ }
747
+ return JSON.stringify(output);
748
+ }
749
+ catch (err) {
750
+ return JSON.stringify({
751
+ success: false,
752
+ url,
753
+ error: err.message,
754
+ hint: 'If this is a JavaScript-heavy site, try browser_navigate instead.'
755
+ });
756
+ }
757
+ }, {
758
+ name: 'browser_fetch_content',
759
+ description: 'Fast, lightweight content fetcher for static pages, articles, documentation, and APIs. ' +
760
+ 'Uses HTTP fetch + Readability (no browser needed). Much faster than browser_navigate. ' +
761
+ 'Use this for: documentation pages, blog posts, news articles, API endpoints. ' +
762
+ 'For JavaScript-heavy SPAs, use browser_navigate instead.',
763
+ schema: z.object({
764
+ url: z.string().describe('Full URL to fetch (must include https://)'),
765
+ timeout_ms: z.number().optional().describe('Timeout in ms. Default: 30000'),
766
+ include_links: z.boolean().optional().describe('Extract and return all links from the page. Default: false'),
767
+ }),
768
+ });
769
+ /**
770
+ * Screenshot tool - useful for visual verification and debugging
771
+ */
772
+ const browserScreenshotTool = tool(async ({ selector, full_page }) => {
773
+ try {
774
+ const { page } = await acquireBrowser();
775
+ let screenshot;
776
+ if (selector) {
777
+ const element = await page.$(selector);
778
+ if (!element) {
779
+ return JSON.stringify({ success: false, error: `Element not found: ${selector}` });
780
+ }
781
+ screenshot = await element.screenshot({ encoding: 'binary' });
782
+ }
783
+ else {
784
+ screenshot = await page.screenshot({
785
+ fullPage: full_page ?? false,
786
+ encoding: 'binary'
787
+ });
788
+ }
789
+ const base64 = screenshot.toString('base64');
790
+ return JSON.stringify({
791
+ success: true,
792
+ current_url: page.url(),
793
+ title: await page.title(),
794
+ screenshot_base64: base64,
795
+ size_bytes: screenshot.length,
796
+ });
797
+ }
798
+ catch (err) {
799
+ return JSON.stringify({ success: false, error: err.message });
800
+ }
801
+ }, {
802
+ name: 'browser_screenshot',
803
+ description: 'Take a screenshot of the current page or a specific element. ' +
804
+ 'Useful for visual verification and debugging. Returns base64-encoded PNG.',
485
805
  schema: z.object({
486
- query: z.string(),
487
- num_results: z.number().int().min(1).max(20).optional(),
488
- language: z.string().optional(),
806
+ selector: z.string().optional().describe('CSS selector of element to screenshot. Omit for full viewport.'),
807
+ full_page: z.boolean().optional().describe('Capture full scrollable page. Default: false (viewport only)'),
489
808
  }),
490
809
  });
491
810
  // ─── Factory ────────────────────────────────────────────────────────────────
492
811
  export function createBrowserTools(_ctx) {
493
- if (process.env.MORPHEUS_BROWSER_ENABLED === 'false') {
812
+ if (process.env.SMITH_BROWSER_ENABLED === 'false') {
494
813
  return [];
495
814
  }
496
815
  return [
@@ -499,6 +818,8 @@ export function createBrowserTools(_ctx) {
499
818
  browserClickTool,
500
819
  browserFillTool,
501
820
  browserSearchTool,
821
+ browserFetchContentTool,
822
+ browserScreenshotTool,
502
823
  ];
503
824
  }
504
825
  registerToolFactory(createBrowserTools, 'browser');
@@ -259,7 +259,7 @@ ${context ? `CONTEXT FROM ORACLE:\n${context}` : ""}
259
259
  try {
260
260
  const inputCount = messages.length;
261
261
  const startMs = Date.now();
262
- const response = await this.agent.invoke({ messages });
262
+ const response = await this.agent.invoke({ messages }, { recursionLimit: 100 });
263
263
  const durationMs = Date.now() - startMs;
264
264
  const apocConfig = this.config.apoc || this.config.llm;
265
265
  const lastMessage = response.messages[response.messages.length - 1];
@@ -116,7 +116,7 @@ CRITICAL — NEVER FABRICATE DATA:
116
116
  origin_user_id: taskContext?.origin_user_id,
117
117
  };
118
118
  const startMs = Date.now();
119
- const response = await TaskRequestContext.run(invokeContext, () => this.agent.invoke({ messages }));
119
+ const response = await TaskRequestContext.run(invokeContext, () => this.agent.invoke({ messages }, { recursionLimit: 100 }));
120
120
  const durationMs = Date.now() - startMs;
121
121
  const lastMessage = response.messages[response.messages.length - 1];
122
122
  const content = typeof lastMessage.content === "string"
@@ -97,7 +97,7 @@ export class SatiService {
97
97
  console.warn('[SatiService] Failed to persist input log:', e);
98
98
  }
99
99
  const satiStartMs = Date.now();
100
- const response = await agent.invoke({ messages });
100
+ const response = await agent.invoke({ messages }, { recursionLimit: 100 });
101
101
  const satiDurationMs = Date.now() - satiStartMs;
102
102
  const lastMessage = response.messages[response.messages.length - 1];
103
103
  let content = lastMessage.content.toString();
@@ -137,7 +137,7 @@ ${context ? `Context:\n${context}` : ""}
137
137
  };
138
138
  const inputCount = messages.length;
139
139
  const startMs = Date.now();
140
- const response = await TaskRequestContext.run(invokeContext, () => this.agent.invoke({ messages }));
140
+ const response = await TaskRequestContext.run(invokeContext, () => this.agent.invoke({ messages }, { recursionLimit: 100 }));
141
141
  const durationMs = Date.now() - startMs;
142
142
  const lastMessage = response.messages[response.messages.length - 1];
143
143
  const content = typeof lastMessage.content === "string"
@@ -395,7 +395,7 @@ Use it to inform your response and tool selection (if needed), but do not assume
395
395
  let syncDelegationCount = 0;
396
396
  const oracleStartMs = Date.now();
397
397
  const response = await TaskRequestContext.run(invokeContext, async () => {
398
- const agentResponse = await this.provider.invoke({ messages });
398
+ const agentResponse = await this.provider.invoke({ messages }, { recursionLimit: 100 });
399
399
  contextDelegationAcks = TaskRequestContext.getDelegationAcks();
400
400
  syncDelegationCount = TaskRequestContext.getSyncDelegationCount();
401
401
  return agentResponse;
@@ -7,6 +7,8 @@ import { ConfigManager } from '../../config/manager.js';
7
7
  import { ProviderFactory } from '../providers/factory.js';
8
8
  import { buildDevKit } from '../../devkit/index.js';
9
9
  import { SQLiteChatMessageHistory } from '../memory/sqlite.js';
10
+ import { AuditRepository } from '../audit/repository.js';
11
+ import { TaskRequestContext } from '../tasks/context.js';
10
12
  /**
11
13
  * SmithDelegator — delegates natural-language tasks to a specific Smith.
12
14
  *
@@ -70,6 +72,25 @@ export class SmithDelegator {
70
72
  connection.onMessage(progressHandler);
71
73
  try {
72
74
  const result = await connection.sendTask(taskId, localTool.name, args);
75
+ // Audit the remote tool execution — data already available from Smith's response
76
+ const sessionId = TaskRequestContext.get()?.session_id ?? 'smith';
77
+ const resultStr = result.data !== undefined
78
+ ? (typeof result.data === 'string' ? result.data : JSON.stringify(result.data))
79
+ : result.error;
80
+ const meta = { smith: smithName };
81
+ if (args && Object.keys(args).length > 0)
82
+ meta.args = args;
83
+ if (resultStr)
84
+ meta.result = resultStr.length > 500 ? resultStr.slice(0, 500) + '…' : resultStr;
85
+ AuditRepository.getInstance().insert({
86
+ session_id: sessionId,
87
+ event_type: 'tool_call',
88
+ agent: 'smith',
89
+ tool_name: `${smithName}/${localTool.name}`,
90
+ duration_ms: result.duration_ms,
91
+ status: result.success ? 'success' : 'error',
92
+ metadata: meta,
93
+ });
73
94
  if (result.success) {
74
95
  return typeof result.data === 'string'
75
96
  ? result.data
@@ -135,7 +156,7 @@ Respond in the same language as the task.`);
135
156
  : task;
136
157
  const messages = [systemMessage, new HumanMessage(userContent)];
137
158
  const startMs = Date.now();
138
- const response = await agent.invoke({ messages });
159
+ const response = await agent.invoke({ messages }, { recursionLimit: 100 });
139
160
  const durationMs = Date.now() - startMs;
140
161
  // Extract final response
141
162
  const lastMessage = response.messages[response.messages.length - 1];
@@ -238,7 +238,7 @@ ${context ? `CONTEXT FROM ORACLE:\n${context}` : ''}
238
238
  const messages = [systemMessage, userMessage];
239
239
  try {
240
240
  const startMs = Date.now();
241
- const response = await this.agent.invoke({ messages });
241
+ const response = await this.agent.invoke({ messages }, { recursionLimit: 100 });
242
242
  const durationMs = Date.now() - startMs;
243
243
  const lastMessage = response.messages[response.messages.length - 1];
244
244
  const content = typeof lastMessage.content === 'string'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "morpheus-cli",
3
- "version": "0.8.7",
3
+ "version": "0.8.9",
4
4
  "description": "Morpheus is a local AI agent for developers, running as a CLI daemon that connects to LLMs, local tools, and MCPs, enabling interaction via Terminal, Telegram, and Discord. Inspired by the character Morpheus from *The Matrix*, the project acts as an intelligent orchestrator, bridging the gap between the developer and complex systems.",
5
5
  "bin": {
6
6
  "morpheus": "./bin/morpheus.js"
@@ -34,8 +34,10 @@
34
34
  "@langchain/mcp-adapters": "^1.1.2",
35
35
  "@langchain/ollama": "^1.2.1",
36
36
  "@langchain/openai": "^1.2.3",
37
+ "@mozilla/readability": "^0.6.0",
37
38
  "@openrouter/sdk": "^0.8.0",
38
39
  "@types/better-sqlite3": "^7.6.13",
40
+ "@types/jsdom": "^28.0.0",
39
41
  "@types/pg": "^8.16.0",
40
42
  "@xenova/transformers": "^2.17.2",
41
43
  "better-sqlite3": "^12.6.2",
@@ -54,6 +56,7 @@
54
56
  "figlet": "^1.10.0",
55
57
  "fs-extra": "^11.3.3",
56
58
  "js-yaml": "^4.1.1",
59
+ "jsdom": "^28.1.0",
57
60
  "langchain": "^1.2.16",
58
61
  "mcp-remote": "^0.1.38",
59
62
  "mongodb": "^6.21.0",