npm - 0agent - Versions diffs - 1.0.9 → 1.0.10 - Mend

0agent 1.0.9 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/daemon.mjs +61 -0
package/package.json +1 -1

package/dist/daemon.mjs CHANGED Viewed

@@ -1736,6 +1736,20 @@ var AGENT_TOOLS = [
         path: { type: "string", description: 'Directory path relative to working directory (default: ".")' }
       }
     }
+  },
+  {
+    name: "scrape_url",
+    description: "Scrape a URL and return clean structured content. Handles JavaScript-rendered pages, auto-adapts to page structure, returns text/links/metadata. Better than shell curl for web pages.",
+    input_schema: {
+      type: "object",
+      properties: {
+        url: { type: "string", description: "URL to scrape" },
+        mode: { type: "string", description: 'What to extract: "text" (default), "links", "tables", "full", "markdown"' },
+        selector: { type: "string", description: "Optional CSS selector to target specific element" },
+        wait_ms: { type: "number", description: "Wait N ms after page load (for JS-heavy pages, default 0)" }
+      },
+      required: ["url"]
+    }
   }
 ];
 var LLMExecutor = class {
@@ -2135,6 +2149,13 @@ var AgentExecutor = class {
         return this.readFile(String(input.path ?? ""));
       case "list_dir":
         return this.listDir(input.path ? String(input.path) : void 0);
+      case "scrape_url":
+        return this.scrapeUrl(
+          String(input.url ?? ""),
+          String(input.mode ?? "text"),
+          input.selector ? String(input.selector) : void 0,
+          Number(input.wait_ms ?? 0)
+        );
       default:
         return `Unknown tool: ${name}`;
     }
@@ -2174,6 +2195,45 @@ var AgentExecutor = class {
     return content.length > 8e3 ? content.slice(0, 8e3) + `
 \u2026[truncated, ${content.length} total bytes]` : content;
   }
+  async scrapeUrl(url, mode, selector, waitMs) {
+    if (!url.startsWith("http")) return "Error: URL must start with http:// or https://";
+    const selectorLine = selector ? `element = page.find('${selector}')
+content = element.text if element else page.get_all_text()` : `content = page.get_all_text()`;
+    const modeLine = mode === "links" ? `result = [a.attrib.get('href','') for a in page.find_all('a') if a.attrib.get('href','').startswith('http')]` : mode === "tables" ? `result = [str(t) for t in page.find_all('table')]` : mode === "markdown" ? `result = page.get_all_text()` : `result = page.get_all_text()`;
+    const script = [
+      `import sys`,
+      `try:`,
+      `    from scrapling import Fetcher`,
+      `except ImportError:`,
+      `    import subprocess, sys`,
+      `    subprocess.run([sys.executable, '-m', 'pip', 'install', 'scrapling', '-q'], check=True)`,
+      `    from scrapling import Fetcher`,
+      `try:`,
+      `    fetcher = Fetcher(auto_match=False)`,
+      `    page = fetcher.get('${url}', timeout=20)`,
+      `    ${modeLine}`,
+      `    if isinstance(result, list):`,
+      `        print('\\n'.join(str(r) for r in result[:50]))`,
+      `    else:`,
+      `        text = str(result).strip()`,
+      `        print(text[:6000] + ('...[truncated]' if len(text)>6000 else ''))`,
+      `except Exception as e:`,
+      `    # Fallback to simple fetch if scrapling fails`,
+      `    import urllib.request`,
+      `    try:`,
+      `        req = urllib.request.Request('${url}', headers={'User-Agent': 'Mozilla/5.0'})`,
+      `        with urllib.request.urlopen(req, timeout=15) as resp:`,
+      `            body = resp.read().decode('utf-8', errors='ignore')`,
+      `            # Strip tags simply`,
+      `            import re`,
+      `            text = re.sub(r'<[^>]+>', ' ', body)`,
+      `            text = re.sub(r'\\s+', ' ', text).strip()`,
+      `            print(text[:5000])`,
+      `    except Exception as e2:`,
+      `        print(f'Scrape failed: {e} / {e2}', file=sys.stderr)`
+    ].join("\n");
+    return this.shellExec(`python3 -c "${script.replace(/"/g, '\\"').replace(/\n/g, ";")}"`, 3e4);
+  }
   listDir(dirPath) {
     const safe = this.safePath(dirPath ?? ".");
     if (!safe) return "Error: path outside working directory";
@@ -2212,6 +2272,7 @@ var AgentExecutor = class {
     if (toolName === "write_file") return `"${input.path}"`;
     if (toolName === "read_file") return `"${input.path}"`;
     if (toolName === "list_dir") return `"${input.path ?? "."}"`;
+    if (toolName === "scrape_url") return `"${String(input.url ?? "").slice(0, 60)}" mode=${input.mode ?? "text"}`;
     return JSON.stringify(input).slice(0, 60);
   }
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "0agent",
-  "version": "1.0.9",
+  "version": "1.0.10",
   "description": "A persistent, learning AI agent that runs on your machine. An agent that learns.",
   "private": false,
   "license": "Apache-2.0",