0agent 1.0.9 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/daemon.mjs +61 -0
  2. package/package.json +1 -1
package/dist/daemon.mjs CHANGED
@@ -1736,6 +1736,20 @@ var AGENT_TOOLS = [
1736
1736
  path: { type: "string", description: 'Directory path relative to working directory (default: ".")' }
1737
1737
  }
1738
1738
  }
1739
+ },
1740
+ {
1741
+ name: "scrape_url",
1742
+ description: "Scrape a URL and return clean structured content. Handles JavaScript-rendered pages, auto-adapts to page structure, returns text/links/metadata. Better than shell curl for web pages.",
1743
+ input_schema: {
1744
+ type: "object",
1745
+ properties: {
1746
+ url: { type: "string", description: "URL to scrape" },
1747
+ mode: { type: "string", description: 'What to extract: "text" (default), "links", "tables", "full", "markdown"' },
1748
+ selector: { type: "string", description: "Optional CSS selector to target specific element" },
1749
+ wait_ms: { type: "number", description: "Wait N ms after page load (for JS-heavy pages, default 0)" }
1750
+ },
1751
+ required: ["url"]
1752
+ }
1739
1753
  }
1740
1754
  ];
1741
1755
  var LLMExecutor = class {
@@ -2135,6 +2149,13 @@ var AgentExecutor = class {
2135
2149
  return this.readFile(String(input.path ?? ""));
2136
2150
  case "list_dir":
2137
2151
  return this.listDir(input.path ? String(input.path) : void 0);
2152
+ case "scrape_url":
2153
+ return this.scrapeUrl(
2154
+ String(input.url ?? ""),
2155
+ String(input.mode ?? "text"),
2156
+ input.selector ? String(input.selector) : void 0,
2157
+ Number(input.wait_ms ?? 0)
2158
+ );
2138
2159
  default:
2139
2160
  return `Unknown tool: ${name}`;
2140
2161
  }
@@ -2174,6 +2195,45 @@ var AgentExecutor = class {
2174
2195
  return content.length > 8e3 ? content.slice(0, 8e3) + `
2175
2196
  \u2026[truncated, ${content.length} total bytes]` : content;
2176
2197
  }
2198
+ async scrapeUrl(url, mode, selector, waitMs) {
2199
+ if (!url.startsWith("http")) return "Error: URL must start with http:// or https://";
2200
+ const selectorLine = selector ? `element = page.find('${selector}')
2201
+ content = element.text if element else page.get_all_text()` : `content = page.get_all_text()`;
2202
+ const modeLine = mode === "links" ? `result = [a.attrib.get('href','') for a in page.find_all('a') if a.attrib.get('href','').startswith('http')]` : mode === "tables" ? `result = [str(t) for t in page.find_all('table')]` : mode === "markdown" ? `result = page.get_all_text()` : `result = page.get_all_text()`;
2203
+ const script = [
2204
+ `import sys`,
2205
+ `try:`,
2206
+ ` from scrapling import Fetcher`,
2207
+ `except ImportError:`,
2208
+ ` import subprocess, sys`,
2209
+ ` subprocess.run([sys.executable, '-m', 'pip', 'install', 'scrapling', '-q'], check=True)`,
2210
+ ` from scrapling import Fetcher`,
2211
+ `try:`,
2212
+ ` fetcher = Fetcher(auto_match=False)`,
2213
+ ` page = fetcher.get('${url}', timeout=20)`,
2214
+ ` ${modeLine}`,
2215
+ ` if isinstance(result, list):`,
2216
+ ` print('\\n'.join(str(r) for r in result[:50]))`,
2217
+ ` else:`,
2218
+ ` text = str(result).strip()`,
2219
+ ` print(text[:6000] + ('...[truncated]' if len(text)>6000 else ''))`,
2220
+ `except Exception as e:`,
2221
+ ` # Fallback to simple fetch if scrapling fails`,
2222
+ ` import urllib.request`,
2223
+ ` try:`,
2224
+ ` req = urllib.request.Request('${url}', headers={'User-Agent': 'Mozilla/5.0'})`,
2225
+ ` with urllib.request.urlopen(req, timeout=15) as resp:`,
2226
+ ` body = resp.read().decode('utf-8', errors='ignore')`,
2227
+ ` # Strip tags simply`,
2228
+ ` import re`,
2229
+ ` text = re.sub(r'<[^>]+>', ' ', body)`,
2230
+ ` text = re.sub(r'\\s+', ' ', text).strip()`,
2231
+ ` print(text[:5000])`,
2232
+ ` except Exception as e2:`,
2233
+ ` print(f'Scrape failed: {e} / {e2}', file=sys.stderr)`
2234
+ ].join("\n");
2235
+ return this.shellExec(`python3 -c "${script.replace(/"/g, '\\"').replace(/\n/g, ";")}"`, 3e4);
2236
+ }
2177
2237
  listDir(dirPath) {
2178
2238
  const safe = this.safePath(dirPath ?? ".");
2179
2239
  if (!safe) return "Error: path outside working directory";
@@ -2212,6 +2272,7 @@ var AgentExecutor = class {
2212
2272
  if (toolName === "write_file") return `"${input.path}"`;
2213
2273
  if (toolName === "read_file") return `"${input.path}"`;
2214
2274
  if (toolName === "list_dir") return `"${input.path ?? "."}"`;
2275
+ if (toolName === "scrape_url") return `"${String(input.url ?? "").slice(0, 60)}" mode=${input.mode ?? "text"}`;
2215
2276
  return JSON.stringify(input).slice(0, 60);
2216
2277
  }
2217
2278
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "0agent",
3
- "version": "1.0.9",
3
+ "version": "1.0.10",
4
4
  "description": "A persistent, learning AI agent that runs on your machine. An agent that learns.",
5
5
  "private": false,
6
6
  "license": "Apache-2.0",