morpheus-cli 0.8.8 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,504 +0,0 @@
1
- import { tool } from '@langchain/core/tools';
2
- import { z } from 'zod';
3
- import { truncateOutput } from '../utils.js';
4
- import { registerToolFactory } from '../registry.js';
5
- import { PATHS } from '../../config/paths.js';
6
- // ─── Module-level browser singleton ────────────────────────────────────────
7
- let browserInstance = null;
8
- let pageInstance = null;
9
- let idleTimer = null;
10
- let installPromise = null;
11
- const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
12
- /**
13
- * Ensures Chromium is downloaded to ~/.morpheus/cache/browser/.
14
- * Downloads only once; subsequent calls return the cached executablePath.
15
- */
16
- async function ensureChromium() {
17
- const { install, resolveBuildId, detectBrowserPlatform, computeExecutablePath, Browser: PBrowser, } = await import('@puppeteer/browsers');
18
- const platform = detectBrowserPlatform();
19
- const buildId = await resolveBuildId(PBrowser.CHROME, platform, 'stable');
20
- // Check if already installed
21
- const execPath = computeExecutablePath({
22
- browser: PBrowser.CHROME,
23
- buildId,
24
- cacheDir: PATHS.browser,
25
- });
26
- const { default: fs } = await import('fs-extra');
27
- if (await fs.pathExists(execPath)) {
28
- return execPath;
29
- }
30
- // Download with progress indicator
31
- process.stdout.write('[Morpheus] Installing Chromium for browser tools (first run, ~150MB)...\n');
32
- const installed = await install({
33
- browser: PBrowser.CHROME,
34
- buildId,
35
- cacheDir: PATHS.browser,
36
- downloadProgressCallback: (downloaded, total) => {
37
- const pct = total > 0 ? Math.round((downloaded / total) * 100) : 0;
38
- process.stdout.write(`\r[Morpheus] Downloading Chromium: ${pct}% `);
39
- },
40
- });
41
- process.stdout.write('\n[Morpheus] Chromium installed successfully.\n');
42
- return installed.executablePath;
43
- }
44
- /**
45
- * Returns (or creates) the browser singleton, resetting the idle timer.
46
- * Handles Chromium lazy-install with a lock to prevent concurrent downloads.
47
- */
48
- async function acquireBrowser() {
49
- const { launch } = await import('puppeteer-core');
50
- const needsLaunch = !browserInstance || !browserInstance.connected;
51
- if (needsLaunch) {
52
- if (!installPromise) {
53
- installPromise = ensureChromium().finally(() => {
54
- installPromise = null;
55
- });
56
- }
57
- const executablePath = await installPromise;
58
- // Re-check after awaiting (another caller may have launched already)
59
- if (!browserInstance || !browserInstance.connected) {
60
- browserInstance = await launch({
61
- executablePath,
62
- headless: true,
63
- args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'],
64
- });
65
- pageInstance = await browserInstance.newPage();
66
- }
67
- }
68
- else if (!pageInstance || pageInstance.isClosed()) {
69
- pageInstance = await browserInstance.newPage();
70
- }
71
- // Reset idle timeout
72
- if (idleTimer)
73
- clearTimeout(idleTimer);
74
- idleTimer = setTimeout(async () => {
75
- try {
76
- await pageInstance?.close();
77
- }
78
- catch { /* ignore */ }
79
- try {
80
- await browserInstance?.close();
81
- }
82
- catch { /* ignore */ }
83
- pageInstance = null;
84
- browserInstance = null;
85
- idleTimer = null;
86
- }, IDLE_TIMEOUT_MS);
87
- return { browser: browserInstance, page: pageInstance };
88
- }
89
- // Best-effort cleanup on process exit
90
- process.on('exit', () => {
91
- try {
92
- browserInstance?.process()?.kill();
93
- }
94
- catch { /* ignore */ }
95
- });
96
- // ─── Tool Definitions ───────────────────────────────────────────────────────
97
- const browserNavigateTool = tool(async ({ url, wait_until, timeout_ms, return_html }) => {
98
- try {
99
- const { page } = await acquireBrowser();
100
- await page.goto(url, {
101
- waitUntil: (wait_until ?? 'domcontentloaded'),
102
- timeout: timeout_ms ?? 30_000,
103
- });
104
- const title = await page.title();
105
- const text = await page.evaluate(() => document.body.innerText);
106
- const result = {
107
- success: true,
108
- url,
109
- current_url: page.url(),
110
- title,
111
- text: truncateOutput(text),
112
- };
113
- if (return_html) {
114
- result.html = truncateOutput(await page.content());
115
- }
116
- return JSON.stringify(result);
117
- }
118
- catch (err) {
119
- return JSON.stringify({ success: false, url, error: err.message });
120
- }
121
- }, {
122
- name: 'browser_navigate',
123
- description: 'Navigate to a URL in a real browser (executes JavaScript). Use instead of http_request for SPAs, JS-heavy pages, or sites requiring interaction. Returns page title and text content.',
124
- schema: z.object({
125
- url: z.string().describe('Full URL to navigate to (must include https://)'),
126
- wait_until: z
127
- .enum(['load', 'domcontentloaded', 'networkidle0', 'networkidle2'])
128
- .optional()
129
- .describe('Wait condition. Default: domcontentloaded. Use networkidle0 for SPAs.'),
130
- timeout_ms: z.number().optional().describe('Navigation timeout in ms. Default: 30000'),
131
- return_html: z
132
- .boolean()
133
- .optional()
134
- .describe('Also return raw HTML in response. Default: false'),
135
- }),
136
- });
137
- const browserGetDomTool = tool(async ({ selector, include_attributes }) => {
138
- try {
139
- const { page } = await acquireBrowser();
140
- const includeAttrs = include_attributes ?? true;
141
- const dom = await page.evaluate(({ sel, attrs }) => {
142
- const root = sel
143
- ? document.querySelector(sel)
144
- : document.body;
145
- if (!root)
146
- return null;
147
- const RELEVANT_ATTRS = [
148
- 'href', 'src', 'type', 'name', 'value',
149
- 'placeholder', 'action', 'id', 'role', 'aria-label',
150
- ];
151
- function serialize(el, depth) {
152
- const hasChildren = el.children.length > 0;
153
- const node = {
154
- tag: el.tagName.toLowerCase(),
155
- };
156
- if (el.id)
157
- node.id = el.id;
158
- if (el.className)
159
- node.class = el.className;
160
- if (!hasChildren) {
161
- const txt = el.textContent?.trim();
162
- if (txt)
163
- node.text = txt.slice(0, 120);
164
- }
165
- if (attrs && el.attributes.length > 0) {
166
- const attrMap = {};
167
- for (const attr of el.attributes) {
168
- if (RELEVANT_ATTRS.includes(attr.name)) {
169
- attrMap[attr.name] = attr.value;
170
- }
171
- }
172
- if (Object.keys(attrMap).length)
173
- node.attrs = attrMap;
174
- }
175
- if (depth < 6 && hasChildren) {
176
- node.children = Array.from(el.children)
177
- .slice(0, 40)
178
- .map((c) => serialize(c, depth + 1));
179
- }
180
- return node;
181
- }
182
- return serialize(root, 0);
183
- }, { sel: selector ?? null, attrs: includeAttrs });
184
- if (!dom) {
185
- return JSON.stringify({ success: false, error: `Element not found: ${selector}` });
186
- }
187
- return JSON.stringify({ success: true, current_url: page.url(), dom: truncateOutput(JSON.stringify(dom, null, 2)) });
188
- }
189
- catch (err) {
190
- return JSON.stringify({ success: false, error: err.message });
191
- }
192
- }, {
193
- name: 'browser_get_dom',
194
- description: 'Get a simplified DOM tree of the current page or a specific element. ' +
195
- 'ALWAYS call this BEFORE browser_click or browser_fill to inspect page structure and identify the correct CSS selectors. ' +
196
- 'Never guess selectors — analyze the DOM first.',
197
- schema: z.object({
198
- selector: z
199
- .string()
200
- .optional()
201
- .describe('CSS selector to scope the DOM tree to. Omit to get the full body.'),
202
- include_attributes: z
203
- .boolean()
204
- .optional()
205
- .describe('Include relevant attributes (href, src, type, name, value, placeholder, role, aria-label). Default: true'),
206
- }),
207
- });
208
- const browserClickTool = tool(async ({ selector, text, timeout_ms, wait_after_ms }) => {
209
- try {
210
- const { page } = await acquireBrowser();
211
- if (!selector && !text) {
212
- return JSON.stringify({ success: false, error: 'Provide either selector or text' });
213
- }
214
- const clickTimeout = timeout_ms ?? 10_000;
215
- if (text) {
216
- // Use Puppeteer pseudo-selector to find element by visible text
217
- await page.locator(`::-p-text(${text})`).setTimeout(clickTimeout).click();
218
- }
219
- else {
220
- await page.locator(selector).setTimeout(clickTimeout).click();
221
- }
222
- if (wait_after_ms) {
223
- await new Promise((r) => setTimeout(r, wait_after_ms));
224
- }
225
- return JSON.stringify({
226
- success: true,
227
- current_url: page.url(),
228
- title: await page.title(),
229
- });
230
- }
231
- catch (err) {
232
- return JSON.stringify({ success: false, error: err.message });
233
- }
234
- }, {
235
- name: 'browser_click',
236
- description: 'Click an element on the current browser page by CSS selector or visible text. ' +
237
- 'The page must already be loaded via browser_navigate. ' +
238
- 'Always inspect the DOM with browser_get_dom first to find the correct selector.',
239
- schema: z.object({
240
- selector: z
241
- .string()
242
- .optional()
243
- .describe('CSS selector of the element to click (e.g. "button#submit", ".btn-login")'),
244
- text: z
245
- .string()
246
- .optional()
247
- .describe('Click element containing this visible text (alternative to selector)'),
248
- timeout_ms: z
249
- .number()
250
- .optional()
251
- .describe('Timeout to wait for the element in ms. Default: 10000'),
252
- wait_after_ms: z
253
- .number()
254
- .optional()
255
- .describe('Wait this many ms after clicking (for page transitions/animations). Default: 0'),
256
- }),
257
- });
258
- const browserFillTool = tool(async ({ selector, value, press_enter, timeout_ms }) => {
259
- try {
260
- const { page } = await acquireBrowser();
261
- await page.locator(selector).setTimeout(timeout_ms ?? 10_000).fill(value);
262
- if (press_enter) {
263
- await page.keyboard.press('Enter');
264
- }
265
- return JSON.stringify({ success: true, selector, filled: true });
266
- }
267
- catch (err) {
268
- return JSON.stringify({ success: false, selector, error: err.message });
269
- }
270
- }, {
271
- name: 'browser_fill',
272
- description: 'Fill a form input or textarea field with a value. Clears any existing content first. ' +
273
- 'Always inspect the DOM with browser_get_dom first to identify the correct CSS selector.',
274
- schema: z.object({
275
- selector: z.string().describe('CSS selector of the input/textarea element'),
276
- value: z.string().describe('Value to type into the field'),
277
- press_enter: z
278
- .boolean()
279
- .optional()
280
- .describe('Press Enter after filling (triggers form submit in many cases). Default: false'),
281
- timeout_ms: z
282
- .number()
283
- .optional()
284
- .describe('Timeout to find the element in ms. Default: 10000'),
285
- }),
286
- });
287
- /**
288
- * Search via DuckDuckGo Lite (plain HTML, no JS, no bot detection).
289
- * Uses a simple POST fetch — no browser required, much faster and more reliable
290
- * than headless browser scraping of Google.
291
- *
292
- * DDG Lite returns results as: href="URL" class='result-link'>TITLE</a>
293
- * and <td class='result-snippet'>SNIPPET</td>, paired by index.
294
- * Sponsored links have URLs starting with "https://duckduckgo.com/y.js" and are filtered out.
295
- */
296
- const browserSearchTool = tool(async ({ query, num_results, language }) => {
297
- try {
298
- const max = Math.min(num_results ?? 10, 20);
299
- const year = new Date().getFullYear().toString();
300
- const lang = language ?? "pt";
301
- // ─────────────────────────────────────────────
302
- // 1️⃣ Intent Classification (heurístico leve)
303
- // ─────────────────────────────────────────────
304
- const qLower = query.toLowerCase();
305
- let intent = "general";
306
- if (/(hoje|último|resultado|placar|próximos|futebol|202\d)/.test(qLower))
307
- intent = "news";
308
- if (/(site oficial|gov|receita federal|ministério)/.test(qLower))
309
- intent = "official";
310
- if (/(api|sdk|npm|docs|documentação)/.test(qLower))
311
- intent = "documentation";
312
- if (/(preço|valor|quanto custa)/.test(qLower))
313
- intent = "price";
314
- // ─────────────────────────────────────────────
315
- // 2️⃣ Query Refinement
316
- // ─────────────────────────────────────────────
317
- let refinedQuery = query;
318
- if (intent === "news") {
319
- refinedQuery = `${query} ${year}`;
320
- }
321
- if (intent === "official") {
322
- refinedQuery = `${query} site:gov.br OR site:org`;
323
- }
324
- if (intent === "documentation") {
325
- refinedQuery = `${query} documentation OR docs OR github`;
326
- }
327
- if (intent === "price") {
328
- refinedQuery = `${query} preço ${year} Brasil`;
329
- }
330
- // ─────────────────────────────────────────────
331
- // 3️⃣ DuckDuckGo Lite Fetch
332
- // ─────────────────────────────────────────────
333
- const regionMap = {
334
- pt: "br-pt",
335
- br: "br-pt",
336
- en: "us-en",
337
- us: "us-en",
338
- };
339
- const kl = regionMap[lang] ?? lang;
340
- const body = new URLSearchParams({ q: refinedQuery, kl }).toString();
341
- const res = await fetch("https://lite.duckduckgo.com/lite/", {
342
- method: "POST",
343
- headers: {
344
- "Content-Type": "application/x-www-form-urlencoded",
345
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
346
- },
347
- body,
348
- signal: AbortSignal.timeout(20000),
349
- });
350
- if (!res.ok) {
351
- return JSON.stringify({ success: false, error: `HTTP ${res.status}` });
352
- }
353
- const html = await res.text();
354
- const linkPattern = /href="(https?:\/\/[^"]+)"[^>]*class='result-link'>([^<]+)<\/a>/g;
355
- const snippetPattern = /class='result-snippet'>([\s\S]*?)<\/td>/g;
356
- const links = [...html.matchAll(linkPattern)];
357
- const snippets = [...html.matchAll(snippetPattern)];
358
- if (!links.length) {
359
- return JSON.stringify({
360
- success: false,
361
- query: refinedQuery,
362
- error: "No results found",
363
- });
364
- }
365
- // ─────────────────────────────────────────────
366
- // 4️⃣ Helpers
367
- // ─────────────────────────────────────────────
368
- function normalizeUrl(url) {
369
- try {
370
- const u = new URL(url);
371
- u.search = ""; // remove tracking params
372
- return u.toString();
373
- }
374
- catch {
375
- return url;
376
- }
377
- }
378
- function getDomain(url) {
379
- try {
380
- return new URL(url).hostname.replace("www.", "");
381
- }
382
- catch {
383
- return "";
384
- }
385
- }
386
- const trustedDomains = [
387
- "gov.br",
388
- "bbc.com",
389
- "reuters.com",
390
- "globo.com",
391
- "uol.com",
392
- "cnn.com",
393
- "github.com",
394
- "npmjs.com",
395
- "com.br"
396
- ];
397
- function scoreResult(result) {
398
- let score = 0;
399
- const domain = getDomain(result.url);
400
- if (trustedDomains.some((d) => domain.includes(d)))
401
- score += 5;
402
- if (intent === "official" && domain.includes("gov"))
403
- score += 5;
404
- if (intent === "documentation" && domain.includes("github"))
405
- score += 4;
406
- if (intent === "news" && /(globo|uol|cnn|bbc)/.test(domain))
407
- score += 3;
408
- if (result.title.toLowerCase().includes(query.toLowerCase()))
409
- score += 2;
410
- if (result.snippet.length > 120)
411
- score += 1;
412
- if (/login|assine|subscribe|paywall/i.test(result.snippet))
413
- score -= 3;
414
- return score;
415
- }
416
- // ─────────────────────────────────────────────
417
- // 5️⃣ Build Results + Deduplicate Domain
418
- // ─────────────────────────────────────────────
419
- const domainSeen = new Set();
420
- const results = [];
421
- for (let i = 0; i < links.length; i++) {
422
- const rawUrl = links[i][1];
423
- if (rawUrl.startsWith("https://duckduckgo.com/"))
424
- continue;
425
- const url = normalizeUrl(rawUrl);
426
- const domain = getDomain(url);
427
- if (domainSeen.has(domain))
428
- continue;
429
- domainSeen.add(domain);
430
- const title = links[i][2].trim();
431
- const snippet = snippets[i]
432
- ? snippets[i][1].replace(/<[^>]+>/g, "").trim()
433
- : "";
434
- const result = { title, url, snippet };
435
- const score = scoreResult(result);
436
- results.push({ ...result, domain, score });
437
- }
438
- if (!results.length) {
439
- return JSON.stringify({
440
- success: false,
441
- query: refinedQuery,
442
- error: "No valid results after filtering",
443
- });
444
- }
445
- // ─────────────────────────────────────────────
446
- // 6️⃣ Ranking
447
- // ─────────────────────────────────────────────
448
- results.sort((a, b) => b.score - a.score);
449
- const topResults = results.slice(0, max);
450
- const avgScore = topResults.reduce((acc, r) => acc + r.score, 0) /
451
- topResults.length;
452
- // ─────────────────────────────────────────────
453
- // 7️⃣ Low-Confidence Auto Retry
454
- // ─────────────────────────────────────────────
455
- if (avgScore < 2 && intent !== "general") {
456
- return JSON.stringify({
457
- success: false,
458
- query: refinedQuery,
459
- warning: "Low confidence results. Consider refining query further.",
460
- results: topResults,
461
- });
462
- }
463
- return JSON.stringify({
464
- success: true,
465
- original_query: query,
466
- refined_query: refinedQuery,
467
- intent,
468
- results: topResults.map((r) => ({
469
- title: r.title,
470
- url: r.url,
471
- snippet: r.snippet,
472
- score: r.score,
473
- })),
474
- });
475
- }
476
- catch (err) {
477
- return JSON.stringify({
478
- success: false,
479
- error: err.message,
480
- });
481
- }
482
- }, {
483
- name: "browser_search",
484
- description: "Enhanced internet search with query refinement, ranking, deduplication, and confidence scoring. Uses DuckDuckGo Lite.",
485
- schema: z.object({
486
- query: z.string(),
487
- num_results: z.number().int().min(1).max(20).optional(),
488
- language: z.string().optional(),
489
- }),
490
- });
491
- // ─── Factory ────────────────────────────────────────────────────────────────
492
- export function createBrowserTools(_ctx) {
493
- if (process.env.MORPHEUS_BROWSER_ENABLED === 'false') {
494
- return [];
495
- }
496
- return [
497
- browserNavigateTool,
498
- browserGetDomTool,
499
- browserClickTool,
500
- browserFillTool,
501
- browserSearchTool,
502
- ];
503
- }
504
- registerToolFactory(createBrowserTools, 'browser');