@robot-resources/scraper 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/setup.js +81 -91
- package/package.json +1 -2
package/bin/setup.js
CHANGED
|
@@ -1,15 +1,36 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* robot-resources-scraper —
|
|
4
|
+
* robot-resources-scraper — CLI for @robot-resources/scraper.
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* npx @robot-resources/scraper <url> → compressed markdown to stdout
|
|
8
|
+
* npx @robot-resources/scraper <url> --json → full result as JSON
|
|
9
|
+
* npx @robot-resources/scraper <url> --mode fast → specific fetch mode
|
|
10
|
+
* npx @robot-resources/scraper → show usage & setup info
|
|
8
11
|
*/
|
|
9
12
|
|
|
10
|
-
import { readFileSync
|
|
13
|
+
import { readFileSync } from "node:fs";
|
|
11
14
|
import { homedir } from "node:os";
|
|
12
|
-
import { join } from "node:path";
|
|
15
|
+
import { join, dirname } from "node:path";
|
|
16
|
+
import { fileURLToPath } from "node:url";
|
|
17
|
+
|
|
18
|
+
// ─── Arg parsing (zero deps) ────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
const args = process.argv.slice(2);
|
|
21
|
+
let url = null;
|
|
22
|
+
let mode = "auto";
|
|
23
|
+
let timeout = undefined;
|
|
24
|
+
let json = false;
|
|
25
|
+
let help = false;
|
|
26
|
+
|
|
27
|
+
for (let i = 0; i < args.length; i++) {
|
|
28
|
+
if (args[i] === "--mode" && args[i + 1]) { mode = args[++i]; continue; }
|
|
29
|
+
if (args[i] === "--timeout" && args[i + 1]) { timeout = Number(args[++i]); continue; }
|
|
30
|
+
if (args[i] === "--json") { json = true; continue; }
|
|
31
|
+
if (args[i] === "--help" || args[i] === "-h") { help = true; continue; }
|
|
32
|
+
if (!url && /^https?:\/\//.test(args[i])) { url = args[i]; continue; }
|
|
33
|
+
}
|
|
13
34
|
|
|
14
35
|
// ─── ANSI helpers ────────────────────────────────────────────────────────────
|
|
15
36
|
|
|
@@ -23,113 +44,82 @@ const c = {
|
|
|
23
44
|
blue: "\x1b[34m",
|
|
24
45
|
};
|
|
25
46
|
|
|
26
|
-
|
|
27
|
-
function step(msg) { console.log(` ${c.cyan}→${c.reset} ${msg}`); }
|
|
28
|
-
function info(msg) { console.log(` ${c.dim}${msg}${c.reset}`); }
|
|
29
|
-
function warn(msg) { console.log(` ${c.yellow}!${c.reset} ${msg}`); }
|
|
30
|
-
|
|
31
|
-
// ─── Config helpers (inline — no external deps for this bin) ─────────────────
|
|
47
|
+
// ─── Config helpers ──────────────────────────────────────────────────────────
|
|
32
48
|
|
|
33
|
-
const
|
|
34
|
-
const CONFIG_FILE = join(CONFIG_DIR, "config.json");
|
|
49
|
+
const CONFIG_FILE = join(homedir(), ".robot-resources", "config.json");
|
|
35
50
|
|
|
36
51
|
function readConfig() {
|
|
37
52
|
try { return JSON.parse(readFileSync(CONFIG_FILE, "utf-8")); }
|
|
38
53
|
catch { return {}; }
|
|
39
54
|
}
|
|
40
55
|
|
|
41
|
-
// ───
|
|
42
|
-
|
|
43
|
-
const MCP_KEY = "robot-resources-scraper";
|
|
44
|
-
const MCP_ENTRY = { command: "npx", args: ["-y", "@robot-resources/scraper-mcp"] };
|
|
45
|
-
|
|
46
|
-
function detectAgents() {
|
|
47
|
-
const home = homedir();
|
|
48
|
-
const agents = [
|
|
49
|
-
{
|
|
50
|
-
name: "Claude Desktop",
|
|
51
|
-
configPath: process.platform === "darwin"
|
|
52
|
-
? join(home, "Library", "Application Support", "Claude", "claude_desktop_config.json")
|
|
53
|
-
: join(home, ".config", "Claude", "claude_desktop_config.json"),
|
|
54
|
-
},
|
|
55
|
-
{ name: "Cursor", configPath: join(home, ".cursor", "mcp.json") },
|
|
56
|
-
];
|
|
57
|
-
return agents.filter((a) => existsSync(a.configPath) || existsSync(join(a.configPath, "..")));
|
|
58
|
-
}
|
|
56
|
+
// ─── Scrape mode: URL provided ──────────────────────────────────────────────
|
|
59
57
|
|
|
60
|
-
function
|
|
61
|
-
const
|
|
62
|
-
const
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
if (config.mcpServers[MCP_KEY]) {
|
|
72
|
-
results.push({ name: agent.name, action: "exists" });
|
|
73
|
-
continue;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Backup before modifying
|
|
77
|
-
if (existsSync(agent.configPath)) {
|
|
78
|
-
copyFileSync(agent.configPath, `${agent.configPath}.bak`);
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
config.mcpServers[MCP_KEY] = MCP_ENTRY;
|
|
82
|
-
mkdirSync(join(agent.configPath, ".."), { recursive: true });
|
|
83
|
-
writeFileSync(agent.configPath, JSON.stringify(config, null, 2) + "\n");
|
|
84
|
-
results.push({ name: agent.name, action: "added" });
|
|
85
|
-
} catch (err) {
|
|
86
|
-
results.push({ name: agent.name, action: "error", reason: err.message });
|
|
87
|
-
}
|
|
58
|
+
async function runScrape(targetUrl, opts) {
|
|
59
|
+
const { scrape } = await import("@robot-resources/scraper");
|
|
60
|
+
const result = await scrape(targetUrl, {
|
|
61
|
+
mode: opts.mode,
|
|
62
|
+
timeout: opts.timeout,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
if (opts.json) {
|
|
66
|
+
process.stdout.write(JSON.stringify(result) + "\n");
|
|
67
|
+
} else {
|
|
68
|
+
process.stdout.write(result.markdown + "\n");
|
|
88
69
|
}
|
|
89
|
-
return results;
|
|
90
70
|
}
|
|
91
71
|
|
|
92
|
-
// ───
|
|
72
|
+
// ─── Usage mode: no URL ─────────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
function showUsage() {
|
|
75
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
76
|
+
const pkg = JSON.parse(readFileSync(join(__dirname, "..", "package.json"), "utf-8"));
|
|
93
77
|
|
|
94
|
-
|
|
95
|
-
console.log(`\n ${c.blue}${c.bold}██ Robot Resources — Scraper Setup${c.reset}\n`);
|
|
78
|
+
console.log(`\n ${c.blue}${c.bold}██ Robot Resources — Scraper v${pkg.version}${c.reset}\n`);
|
|
96
79
|
|
|
97
|
-
//
|
|
80
|
+
// Auth status
|
|
98
81
|
const config = readConfig();
|
|
99
82
|
if (config.api_key) {
|
|
100
|
-
|
|
83
|
+
console.log(` ${c.green}✓${c.reset} Logged in as ${config.user_name || config.user_email || "unknown"}`);
|
|
101
84
|
} else {
|
|
102
|
-
|
|
103
|
-
|
|
85
|
+
console.log(` ${c.dim}Not logged in. Scraper works without login.${c.reset}`);
|
|
86
|
+
console.log(` ${c.dim}To enable telemetry, run: ${c.cyan}npx robot-resources${c.reset}`);
|
|
104
87
|
}
|
|
105
88
|
|
|
106
|
-
//
|
|
107
|
-
console.log(
|
|
108
|
-
|
|
89
|
+
// CLI usage
|
|
90
|
+
console.log(`\n ${c.blue}${c.bold}── Command Line ──${c.reset}\n`);
|
|
91
|
+
console.log(` ${c.cyan}npx @robot-resources/scraper ${c.dim}<url>${c.reset} Compressed markdown`);
|
|
92
|
+
console.log(` ${c.cyan}npx @robot-resources/scraper ${c.dim}<url>${c.cyan} --json${c.reset} Full result as JSON`);
|
|
93
|
+
console.log(` ${c.cyan}npx @robot-resources/scraper ${c.dim}<url>${c.cyan} --mode stealth${c.reset} Stealth fetch mode`);
|
|
94
|
+
console.log(` ${c.cyan}npx @robot-resources/scraper ${c.dim}<url>${c.cyan} --timeout 15000${c.reset} Custom timeout (ms)`);
|
|
109
95
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
info("No supported agents detected (Claude Desktop, Cursor)");
|
|
113
|
-
} else {
|
|
114
|
-
for (const r of mcpResults) {
|
|
115
|
-
if (r.action === "added") success(`${r.name}: scraper MCP configured`);
|
|
116
|
-
else if (r.action === "exists") success(`${r.name}: already configured`);
|
|
117
|
-
else warn(`${r.name}: ${r.reason || r.action}`);
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// Step 3: Usage
|
|
122
|
-
console.log(`\n ${c.blue}${c.bold}── Ready ──${c.reset}\n`);
|
|
123
|
-
console.log(" Use as a library:");
|
|
96
|
+
// Library usage
|
|
97
|
+
console.log(`\n ${c.blue}${c.bold}── As a Library ──${c.reset}\n`);
|
|
124
98
|
console.log(` ${c.dim}import { scrape } from '@robot-resources/scraper';${c.reset}`);
|
|
125
99
|
console.log(` ${c.dim}const result = await scrape('https://example.com');${c.reset}`);
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
console.log(
|
|
100
|
+
|
|
101
|
+
// MCP usage (generic, not agent-specific)
|
|
102
|
+
console.log(`\n ${c.blue}${c.bold}── Via MCP ──${c.reset}\n`);
|
|
103
|
+
console.log(` ${c.dim}Add to your agent's MCP config:${c.reset}`);
|
|
104
|
+
console.log(` ${c.dim}{${c.reset}`);
|
|
105
|
+
console.log(` ${c.dim} "mcpServers": {${c.reset}`);
|
|
106
|
+
console.log(` ${c.dim} "scraper": {${c.reset}`);
|
|
107
|
+
console.log(` ${c.dim} "command": "npx",${c.reset}`);
|
|
108
|
+
console.log(` ${c.dim} "args": ["-y", "@robot-resources/scraper-mcp"]${c.reset}`);
|
|
109
|
+
console.log(` ${c.dim} }${c.reset}`);
|
|
110
|
+
console.log(` ${c.dim} }${c.reset}`);
|
|
111
|
+
console.log(` ${c.dim}}${c.reset}`);
|
|
112
|
+
|
|
129
113
|
console.log("");
|
|
130
114
|
}
|
|
131
115
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
})
|
|
116
|
+
// ─── Main ────────────────────────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
if (url && !help) {
|
|
119
|
+
runScrape(url, { mode, timeout, json }).catch((err) => {
|
|
120
|
+
process.stderr.write(`Error: ${err.message}\n`);
|
|
121
|
+
process.exit(1);
|
|
122
|
+
});
|
|
123
|
+
} else {
|
|
124
|
+
showUsage();
|
|
125
|
+
}
|
package/package.json
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@robot-resources/scraper",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Context compression for AI agents. Fetch -> Extract -> Convert pipeline without LLM dependency.",
|
|
5
5
|
"author": "Robot Resources",
|
|
6
|
-
"publishConfig": { "access": "public" },
|
|
7
6
|
"license": "MIT",
|
|
8
7
|
"type": "module",
|
|
9
8
|
"bin": {
|