npm - 0agent - Versions diffs - 1.0.9 → 1.0.11 - Mend

0agent 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/daemon.mjs +125 -0
package/package.json +1 -1

package/dist/daemon.mjs CHANGED Viewed

@@ -1736,6 +1736,32 @@ var AGENT_TOOLS = [
         path: { type: "string", description: 'Directory path relative to working directory (default: ".")' }
       }
     }
+  },
+  {
+    name: "web_search",
+    description: "Search the web and return titles, URLs, and snippets. No API key needed. Use this first to find relevant pages, then scrape_url for full content.",
+    input_schema: {
+      type: "object",
+      properties: {
+        query: { type: "string", description: "Search query" },
+        num_results: { type: "number", description: "Number of results (default 5, max 10)" }
+      },
+      required: ["query"]
+    }
+  },
+  {
+    name: "scrape_url",
+    description: "Scrape a URL and return clean structured content. Handles JavaScript-rendered pages, auto-adapts to page structure, returns text/links/metadata. Better than shell curl for web pages.",
+    input_schema: {
+      type: "object",
+      properties: {
+        url: { type: "string", description: "URL to scrape" },
+        mode: { type: "string", description: 'What to extract: "text" (default), "links", "tables", "full", "markdown"' },
+        selector: { type: "string", description: "Optional CSS selector to target specific element" },
+        wait_ms: { type: "number", description: "Wait N ms after page load (for JS-heavy pages, default 0)" }
+      },
+      required: ["url"]
+    }
   }
 ];
 var LLMExecutor = class {
@@ -2135,6 +2161,18 @@ var AgentExecutor = class {
         return this.readFile(String(input.path ?? ""));
       case "list_dir":
         return this.listDir(input.path ? String(input.path) : void 0);
+      case "web_search":
+        return this.webSearch(
+          String(input.query ?? ""),
+          Math.min(10, Number(input.num_results ?? 5))
+        );
+      case "scrape_url":
+        return this.scrapeUrl(
+          String(input.url ?? ""),
+          String(input.mode ?? "text"),
+          input.selector ? String(input.selector) : void 0,
+          Number(input.wait_ms ?? 0)
+        );
       default:
         return `Unknown tool: ${name}`;
     }
@@ -2174,6 +2212,90 @@ var AgentExecutor = class {
     return content.length > 8e3 ? content.slice(0, 8e3) + `
 \u2026[truncated, ${content.length} total bytes]` : content;
   }
+  async webSearch(query, numResults) {
+    const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}&kl=us-en`;
+    let html = "";
+    try {
+      const res = await fetch(url, {
+        headers: {
+          "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
+          "Accept": "text/html,application/xhtml+xml",
+          "Accept-Language": "en-US,en;q=0.9"
+        },
+        signal: AbortSignal.timeout(12e3)
+      });
+      html = await res.text();
+    } catch (err) {
+      return `Search request failed: ${err instanceof Error ? err.message : String(err)}`;
+    }
+    const results = [];
+    const titleRe = /<a[^>]+class="result__a"[^>]+href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/g;
+    const snippetRe = /<a[^>]+class="result__snippet"[^>]*>([\s\S]*?)<\/a>/g;
+    const titles = [];
+    const snippets = [];
+    let m;
+    while ((m = titleRe.exec(html)) !== null) {
+      let href = m[1];
+      const title = m[2].replace(/<[^>]+>/g, "").replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").trim();
+      const uddg = href.match(/[?&]uddg=([^&]+)/);
+      if (uddg) href = decodeURIComponent(uddg[1]);
+      if (href.startsWith("http") && title && titles.length < numResults) {
+        titles.push({ url: href, title });
+      }
+    }
+    while ((m = snippetRe.exec(html)) !== null && snippets.length < numResults) {
+      snippets.push(m[1].replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim());
+    }
+    if (titles.length === 0) {
+      const plainText = html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").slice(0, 1500);
+      return `No results parsed. Raw content:
+${plainText}`;
+    }
+    return titles.map(
+      (t, i) => `${i + 1}. ${t.title}
+   URL: ${t.url}${snippets[i] ? `
+   ${snippets[i]}` : ""}`
+    ).join("\n\n");
+  }
+  async scrapeUrl(url, mode, selector, waitMs) {
+    if (!url.startsWith("http")) return "Error: URL must start with http:// or https://";
+    const selectorLine = selector ? `element = page.find('${selector}')
+content = element.text if element else page.get_all_text()` : `content = page.get_all_text()`;
+    const modeLine = mode === "links" ? `result = [a.attrib.get('href','') for a in page.find_all('a') if a.attrib.get('href','').startswith('http')]` : mode === "tables" ? `result = [str(t) for t in page.find_all('table')]` : mode === "markdown" ? `result = page.get_all_text()` : `result = page.get_all_text()`;
+    const script = [
+      `import sys`,
+      `try:`,
+      `    from scrapling import Fetcher`,
+      `except ImportError:`,
+      `    import subprocess, sys`,
+      `    subprocess.run([sys.executable, '-m', 'pip', 'install', 'scrapling', '-q'], check=True)`,
+      `    from scrapling import Fetcher`,
+      `try:`,
+      `    fetcher = Fetcher(auto_match=False)`,
+      `    page = fetcher.get('${url}', timeout=20)`,
+      `    ${modeLine}`,
+      `    if isinstance(result, list):`,
+      `        print('\\n'.join(str(r) for r in result[:50]))`,
+      `    else:`,
+      `        text = str(result).strip()`,
+      `        print(text[:6000] + ('...[truncated]' if len(text)>6000 else ''))`,
+      `except Exception as e:`,
+      `    # Fallback to simple fetch if scrapling fails`,
+      `    import urllib.request`,
+      `    try:`,
+      `        req = urllib.request.Request('${url}', headers={'User-Agent': 'Mozilla/5.0'})`,
+      `        with urllib.request.urlopen(req, timeout=15) as resp:`,
+      `            body = resp.read().decode('utf-8', errors='ignore')`,
+      `            # Strip tags simply`,
+      `            import re`,
+      `            text = re.sub(r'<[^>]+>', ' ', body)`,
+      `            text = re.sub(r'\\s+', ' ', text).strip()`,
+      `            print(text[:5000])`,
+      `    except Exception as e2:`,
+      `        print(f'Scrape failed: {e} / {e2}', file=sys.stderr)`
+    ].join("\n");
+    return this.shellExec(`python3 -c "${script.replace(/"/g, '\\"').replace(/\n/g, ";")}"`, 3e4);
+  }
   listDir(dirPath) {
     const safe = this.safePath(dirPath ?? ".");
     if (!safe) return "Error: path outside working directory";
@@ -2201,6 +2323,7 @@ var AgentExecutor = class {
       `- For npm/node projects: check package.json first with read_file or list_dir`,
       `- After write_file, verify with read_file if needed`,
       `- After shell_exec, check output for errors and retry if needed`,
+      `- For research tasks: use web_search first, then scrape_url for full page content`,
       `- Use relative paths from the working directory`,
       `- Be concise in your final response: state what was done and where to find it`
     ];
@@ -2212,6 +2335,8 @@ var AgentExecutor = class {
     if (toolName === "write_file") return `"${input.path}"`;
     if (toolName === "read_file") return `"${input.path}"`;
     if (toolName === "list_dir") return `"${input.path ?? "."}"`;
+    if (toolName === "web_search") return `"${String(input.query ?? "").slice(0, 60)}"`;
+    if (toolName === "scrape_url") return `"${String(input.url ?? "").slice(0, 60)}" mode=${input.mode ?? "text"}`;
     return JSON.stringify(input).slice(0, 60);
   }
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "0agent",
-  "version": "1.0.9",
+  "version": "1.0.11",
   "description": "A persistent, learning AI agent that runs on your machine. An agent that learns.",
   "private": false,
   "license": "Apache-2.0",