@robot-resources/scraper 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/setup.js +81 -91
  2. package/package.json +1 -2
package/bin/setup.js CHANGED
@@ -1,15 +1,36 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  /**
4
- * robot-resources-scraper — Setup wizard for @robot-resources/scraper.
4
+ * robot-resources-scraper — CLI for @robot-resources/scraper.
5
5
  *
6
- * Triggered via `npx @robot-resources/scraper`.
7
- * Offers optional GitHub login and shows usage instructions.
6
+ * Usage:
7
+ * npx @robot-resources/scraper <url> → compressed markdown to stdout
8
+ * npx @robot-resources/scraper <url> --json → full result as JSON
9
+ * npx @robot-resources/scraper <url> --mode fast → specific fetch mode
10
+ * npx @robot-resources/scraper → show usage & setup info
8
11
  */
9
12
 
10
- import { readFileSync, writeFileSync, copyFileSync, mkdirSync, existsSync } from "node:fs";
13
+ import { readFileSync } from "node:fs";
11
14
  import { homedir } from "node:os";
12
- import { join } from "node:path";
15
+ import { join, dirname } from "node:path";
16
+ import { fileURLToPath } from "node:url";
17
+
18
+ // ─── Arg parsing (zero deps) ────────────────────────────────────────────────
19
+
20
+ const args = process.argv.slice(2);
21
+ let url = null;
22
+ let mode = "auto";
23
+ let timeout = undefined;
24
+ let json = false;
25
+ let help = false;
26
+
27
+ for (let i = 0; i < args.length; i++) {
28
+ if (args[i] === "--mode" && args[i + 1]) { mode = args[++i]; continue; }
29
+ if (args[i] === "--timeout" && args[i + 1]) { timeout = Number(args[++i]); continue; }
30
+ if (args[i] === "--json") { json = true; continue; }
31
+ if (args[i] === "--help" || args[i] === "-h") { help = true; continue; }
32
+ if (!url && /^https?:\/\//.test(args[i])) { url = args[i]; continue; }
33
+ }
13
34
 
14
35
  // ─── ANSI helpers ────────────────────────────────────────────────────────────
15
36
 
@@ -23,113 +44,82 @@ const c = {
23
44
  blue: "\x1b[34m",
24
45
  };
25
46
 
26
- function success(msg) { console.log(` ${c.green}✓${c.reset} ${msg}`); }
27
- function step(msg) { console.log(` ${c.cyan}→${c.reset} ${msg}`); }
28
- function info(msg) { console.log(` ${c.dim}${msg}${c.reset}`); }
29
- function warn(msg) { console.log(` ${c.yellow}!${c.reset} ${msg}`); }
30
-
31
- // ─── Config helpers (inline — no external deps for this bin) ─────────────────
47
+ // ─── Config helpers ──────────────────────────────────────────────────────────
32
48
 
33
- const CONFIG_DIR = join(homedir(), ".robot-resources");
34
- const CONFIG_FILE = join(CONFIG_DIR, "config.json");
49
+ const CONFIG_FILE = join(homedir(), ".robot-resources", "config.json");
35
50
 
36
51
  function readConfig() {
37
52
  try { return JSON.parse(readFileSync(CONFIG_FILE, "utf-8")); }
38
53
  catch { return {}; }
39
54
  }
40
55
 
41
- // ─── MCP auto-config ─────────────────────────────────────────────────────────
42
-
43
- const MCP_KEY = "robot-resources-scraper";
44
- const MCP_ENTRY = { command: "npx", args: ["-y", "@robot-resources/scraper-mcp"] };
45
-
46
- function detectAgents() {
47
- const home = homedir();
48
- const agents = [
49
- {
50
- name: "Claude Desktop",
51
- configPath: process.platform === "darwin"
52
- ? join(home, "Library", "Application Support", "Claude", "claude_desktop_config.json")
53
- : join(home, ".config", "Claude", "claude_desktop_config.json"),
54
- },
55
- { name: "Cursor", configPath: join(home, ".cursor", "mcp.json") },
56
- ];
57
- return agents.filter((a) => existsSync(a.configPath) || existsSync(join(a.configPath, "..")));
58
- }
56
+ // ─── Scrape mode: URL provided ──────────────────────────────────────────────
59
57
 
60
- function configureAgentMCP() {
61
- const agents = detectAgents();
62
- const results = [];
63
-
64
- for (const agent of agents) {
65
- try {
66
- let config;
67
- try { config = JSON.parse(readFileSync(agent.configPath, "utf-8")); }
68
- catch { config = {}; }
69
-
70
- config.mcpServers = config.mcpServers || {};
71
- if (config.mcpServers[MCP_KEY]) {
72
- results.push({ name: agent.name, action: "exists" });
73
- continue;
74
- }
75
-
76
- // Backup before modifying
77
- if (existsSync(agent.configPath)) {
78
- copyFileSync(agent.configPath, `${agent.configPath}.bak`);
79
- }
80
-
81
- config.mcpServers[MCP_KEY] = MCP_ENTRY;
82
- mkdirSync(join(agent.configPath, ".."), { recursive: true });
83
- writeFileSync(agent.configPath, JSON.stringify(config, null, 2) + "\n");
84
- results.push({ name: agent.name, action: "added" });
85
- } catch (err) {
86
- results.push({ name: agent.name, action: "error", reason: err.message });
87
- }
58
+ async function runScrape(targetUrl, opts) {
59
+ const { scrape } = await import("@robot-resources/scraper");
60
+ const result = await scrape(targetUrl, {
61
+ mode: opts.mode,
62
+ timeout: opts.timeout,
63
+ });
64
+
65
+ if (opts.json) {
66
+ process.stdout.write(JSON.stringify(result) + "\n");
67
+ } else {
68
+ process.stdout.write(result.markdown + "\n");
88
69
  }
89
- return results;
90
70
  }
91
71
 
92
- // ─── Main ────────────────────────────────────────────────────────────────────
72
+ // ─── Usage mode: no URL ─────────────────────────────────────────────────────
73
+
74
+ function showUsage() {
75
+ const __dirname = dirname(fileURLToPath(import.meta.url));
76
+ const pkg = JSON.parse(readFileSync(join(__dirname, "..", "package.json"), "utf-8"));
93
77
 
94
- async function main() {
95
- console.log(`\n ${c.blue}${c.bold}██ Robot Resources — Scraper Setup${c.reset}\n`);
78
+ console.log(`\n ${c.blue}${c.bold}██ Robot Resources — Scraper v${pkg.version}${c.reset}\n`);
96
79
 
97
- // Step 1: Auth status
80
+ // Auth status
98
81
  const config = readConfig();
99
82
  if (config.api_key) {
100
- success(`Logged in as ${config.user_name || config.user_email || "unknown"}`);
83
+ console.log(` ${c.green}✓${c.reset} Logged in as ${config.user_name || config.user_email || "unknown"}`);
101
84
  } else {
102
- info("Not logged in. Scraper works without login.");
103
- info(`To enable telemetry, run: ${c.cyan}npx robot-resources${c.reset}`);
85
+ console.log(` ${c.dim}Not logged in. Scraper works without login.${c.reset}`);
86
+ console.log(` ${c.dim}To enable telemetry, run: ${c.cyan}npx robot-resources${c.reset}`);
104
87
  }
105
88
 
106
- // Step 2: MCP auto-config
107
- console.log("");
108
- step("Configuring MCP in detected agents...");
89
+ // CLI usage
90
+ console.log(`\n ${c.blue}${c.bold}── Command Line ──${c.reset}\n`);
91
+ console.log(` ${c.cyan}npx @robot-resources/scraper ${c.dim}<url>${c.reset} Compressed markdown`);
92
+ console.log(` ${c.cyan}npx @robot-resources/scraper ${c.dim}<url>${c.cyan} --json${c.reset} Full result as JSON`);
93
+ console.log(` ${c.cyan}npx @robot-resources/scraper ${c.dim}<url>${c.cyan} --mode stealth${c.reset} Stealth fetch mode`);
94
+ console.log(` ${c.cyan}npx @robot-resources/scraper ${c.dim}<url>${c.cyan} --timeout 15000${c.reset} Custom timeout (ms)`);
109
95
 
110
- const mcpResults = configureAgentMCP();
111
- if (mcpResults.length === 0) {
112
- info("No supported agents detected (Claude Desktop, Cursor)");
113
- } else {
114
- for (const r of mcpResults) {
115
- if (r.action === "added") success(`${r.name}: scraper MCP configured`);
116
- else if (r.action === "exists") success(`${r.name}: already configured`);
117
- else warn(`${r.name}: ${r.reason || r.action}`);
118
- }
119
- }
120
-
121
- // Step 3: Usage
122
- console.log(`\n ${c.blue}${c.bold}── Ready ──${c.reset}\n`);
123
- console.log(" Use as a library:");
96
+ // Library usage
97
+ console.log(`\n ${c.blue}${c.bold}── As a Library ──${c.reset}\n`);
124
98
  console.log(` ${c.dim}import { scrape } from '@robot-resources/scraper';${c.reset}`);
125
99
  console.log(` ${c.dim}const result = await scrape('https://example.com');${c.reset}`);
126
- console.log("");
127
- console.log(" Use via MCP (already configured above):");
128
- console.log(` ${c.dim}Your agent can call scraper_compress_url(url)${c.reset}`);
100
+
101
+ // MCP usage (generic, not agent-specific)
102
+ console.log(`\n ${c.blue}${c.bold}── Via MCP ──${c.reset}\n`);
103
+ console.log(` ${c.dim}Add to your agent's MCP config:${c.reset}`);
104
+ console.log(` ${c.dim}{${c.reset}`);
105
+ console.log(` ${c.dim} "mcpServers": {${c.reset}`);
106
+ console.log(` ${c.dim} "scraper": {${c.reset}`);
107
+ console.log(` ${c.dim} "command": "npx",${c.reset}`);
108
+ console.log(` ${c.dim} "args": ["-y", "@robot-resources/scraper-mcp"]${c.reset}`);
109
+ console.log(` ${c.dim} }${c.reset}`);
110
+ console.log(` ${c.dim} }${c.reset}`);
111
+ console.log(` ${c.dim}}${c.reset}`);
112
+
129
113
  console.log("");
130
114
  }
131
115
 
132
- main().catch((err) => {
133
- console.error(`\n Setup failed: ${err.message}\n`);
134
- process.exit(1);
135
- });
116
+ // ─── Main ────────────────────────────────────────────────────────────────────
117
+
118
+ if (url && !help) {
119
+ runScrape(url, { mode, timeout, json }).catch((err) => {
120
+ process.stderr.write(`Error: ${err.message}\n`);
121
+ process.exit(1);
122
+ });
123
+ } else {
124
+ showUsage();
125
+ }
package/package.json CHANGED
@@ -1,9 +1,8 @@
1
1
  {
2
2
  "name": "@robot-resources/scraper",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Context compression for AI agents. Fetch -> Extract -> Convert pipeline without LLM dependency.",
5
5
  "author": "Robot Resources",
6
- "publishConfig": { "access": "public" },
7
6
  "license": "MIT",
8
7
  "type": "module",
9
8
  "bin": {