@vakra-dev/reader-cli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +178 -24
- package/package.json +3 -2
package/dist/index.js
CHANGED
|
@@ -49,6 +49,15 @@ function redactKey(key) {
|
|
|
49
49
|
|
|
50
50
|
// src/utils/output.ts
|
|
51
51
|
import { writeFileSync as writeFileSync2 } from "fs";
|
|
52
|
+
import yoctoSpinner from "yocto-spinner";
|
|
53
|
+
import {
|
|
54
|
+
ReaderApiError,
|
|
55
|
+
InsufficientCreditsError,
|
|
56
|
+
RateLimitedError,
|
|
57
|
+
UrlBlockedError,
|
|
58
|
+
UnauthenticatedError,
|
|
59
|
+
ScrapeTimeoutError
|
|
60
|
+
} from "@vakra-dev/reader-js";
|
|
52
61
|
function info(msg) {
|
|
53
62
|
console.error(msg);
|
|
54
63
|
}
|
|
@@ -67,24 +76,143 @@ function saveScreenshot(base64, outputPath) {
|
|
|
67
76
|
writeFileSync2(path, buffer);
|
|
68
77
|
return path;
|
|
69
78
|
}
|
|
79
|
+
var SCRAPE_MESSAGES = [
|
|
80
|
+
"Reading the page...",
|
|
81
|
+
"Fetching content...",
|
|
82
|
+
"Scraping the web...",
|
|
83
|
+
"Extracting data..."
|
|
84
|
+
];
|
|
85
|
+
var CRAWL_MESSAGES = [
|
|
86
|
+
"Discovering pages...",
|
|
87
|
+
"Following links...",
|
|
88
|
+
"Mapping the site...",
|
|
89
|
+
"Crawling deeper..."
|
|
90
|
+
];
|
|
91
|
+
function rotatingSpinner(messages, intervalMs = 3e3) {
|
|
92
|
+
let idx = Math.floor(Math.random() * messages.length);
|
|
93
|
+
const spinner = yoctoSpinner({ text: messages[idx] }).start();
|
|
94
|
+
const timer = setInterval(() => {
|
|
95
|
+
idx = (idx + 1) % messages.length;
|
|
96
|
+
spinner.text = messages[idx];
|
|
97
|
+
}, intervalMs);
|
|
98
|
+
const origSuccess = spinner.success.bind(spinner);
|
|
99
|
+
const origError = spinner.error.bind(spinner);
|
|
100
|
+
const origStop = spinner.stop.bind(spinner);
|
|
101
|
+
spinner.success = (text) => {
|
|
102
|
+
clearInterval(timer);
|
|
103
|
+
return origSuccess(text);
|
|
104
|
+
};
|
|
105
|
+
spinner.error = (text) => {
|
|
106
|
+
clearInterval(timer);
|
|
107
|
+
return origError(text);
|
|
108
|
+
};
|
|
109
|
+
spinner.stop = (text) => {
|
|
110
|
+
clearInterval(timer);
|
|
111
|
+
return origStop(text);
|
|
112
|
+
};
|
|
113
|
+
return spinner;
|
|
114
|
+
}
|
|
115
|
+
function scrapeSpinner() {
|
|
116
|
+
return rotatingSpinner(SCRAPE_MESSAGES);
|
|
117
|
+
}
|
|
118
|
+
function crawlSpinner(domain) {
|
|
119
|
+
return rotatingSpinner([
|
|
120
|
+
`Crawling ${domain}...`,
|
|
121
|
+
...CRAWL_MESSAGES
|
|
122
|
+
]);
|
|
123
|
+
}
|
|
124
|
+
function statusSpinner() {
|
|
125
|
+
return yoctoSpinner({ text: "Pinging the API..." }).start();
|
|
126
|
+
}
|
|
127
|
+
function creditsSpinner() {
|
|
128
|
+
return yoctoSpinner({ text: "Fetching balance..." }).start();
|
|
129
|
+
}
|
|
130
|
+
function formatError(err) {
|
|
131
|
+
if (err instanceof ReaderApiError) {
|
|
132
|
+
const lines = [];
|
|
133
|
+
lines.push(`${err.message} (${err.code}, HTTP ${err.httpStatus})`);
|
|
134
|
+
if (err instanceof InsufficientCreditsError) {
|
|
135
|
+
if (err.required !== void 0) lines.push(` Required: ${err.required} credits`);
|
|
136
|
+
if (err.available !== void 0) lines.push(` Available: ${err.available} credits`);
|
|
137
|
+
if (err.resetAt) lines.push(` Resets: ${err.resetAt}`);
|
|
138
|
+
} else if (err instanceof RateLimitedError) {
|
|
139
|
+
if (err.retryAfterSeconds) lines.push(` Retry after: ${err.retryAfterSeconds} seconds`);
|
|
140
|
+
} else if (err instanceof UrlBlockedError) {
|
|
141
|
+
if (err.url) lines.push(` URL: ${err.url}`);
|
|
142
|
+
if (err.reason) lines.push(` Reason: ${err.reason}`);
|
|
143
|
+
} else if (err instanceof UnauthenticatedError) {
|
|
144
|
+
lines.push(" Check your API key: reader config show");
|
|
145
|
+
} else if (err instanceof ScrapeTimeoutError) {
|
|
146
|
+
if (err.timeoutMs) lines.push(` Timeout: ${err.timeoutMs}ms`);
|
|
147
|
+
}
|
|
148
|
+
if (err.docsUrl) lines.push(` Docs: ${err.docsUrl}`);
|
|
149
|
+
if (err.requestId) lines.push(` Request: ${err.requestId}`);
|
|
150
|
+
return lines.join("\n");
|
|
151
|
+
}
|
|
152
|
+
if (err instanceof Error) {
|
|
153
|
+
const msg = err.message;
|
|
154
|
+
if (msg.includes("ECONNREFUSED")) {
|
|
155
|
+
return `${msg}
|
|
156
|
+
Could not connect to the API. Check your API URL: reader config show`;
|
|
157
|
+
}
|
|
158
|
+
if (msg.includes("ENOTFOUND")) {
|
|
159
|
+
return `${msg}
|
|
160
|
+
DNS lookup failed. Check your API URL: reader config show`;
|
|
161
|
+
}
|
|
162
|
+
if (msg.includes("fetch failed") || msg.includes("network")) {
|
|
163
|
+
return `${msg}
|
|
164
|
+
Network error. Check your internet connection.`;
|
|
165
|
+
}
|
|
166
|
+
return msg;
|
|
167
|
+
}
|
|
168
|
+
return String(err);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// src/utils/validate.ts
|
|
172
|
+
var VALID_FORMATS = ["markdown", "html", "screenshot"];
|
|
173
|
+
function normalizeUrl(url) {
|
|
174
|
+
let normalized = url;
|
|
175
|
+
if (!url.includes("://")) {
|
|
176
|
+
normalized = `https://${url}`;
|
|
177
|
+
info(`Normalized URL: ${url} -> ${normalized}`);
|
|
178
|
+
}
|
|
179
|
+
try {
|
|
180
|
+
new URL(normalized);
|
|
181
|
+
} catch {
|
|
182
|
+
error(`Invalid URL: "${url}". Expected format: https://example.com`);
|
|
183
|
+
process.exit(1);
|
|
184
|
+
}
|
|
185
|
+
return normalized;
|
|
186
|
+
}
|
|
187
|
+
function parsePositiveInt(value, name) {
|
|
188
|
+
const n = parseInt(value, 10);
|
|
189
|
+
if (isNaN(n) || n <= 0) {
|
|
190
|
+
error(`Invalid value for ${name}: "${value}" is not a positive integer`);
|
|
191
|
+
process.exit(1);
|
|
192
|
+
}
|
|
193
|
+
return n;
|
|
194
|
+
}
|
|
195
|
+
function validateFormat(format) {
|
|
196
|
+
if (!VALID_FORMATS.includes(format)) {
|
|
197
|
+
error(`Invalid format: "${format}". Allowed: ${VALID_FORMATS.join(", ")}`);
|
|
198
|
+
process.exit(1);
|
|
199
|
+
}
|
|
200
|
+
return format;
|
|
201
|
+
}
|
|
70
202
|
|
|
71
203
|
// src/commands/scrape.ts
|
|
72
204
|
function registerScrapeCommand(program2) {
|
|
73
|
-
program2.command("scrape <url>").description("Scrape a URL and output content").option("-f, --format <format>", "Output format: markdown (default), html, screenshot", "markdown").option("--json", "Output full JSON response").option("-o, --output <file>", "Write output to file").option("--no-main-content", "Include full page (nav, header, footer)").option("--include-tags <selectors>", "CSS selectors to include (comma-separated)").option("--exclude-tags <selectors>", "CSS selectors to exclude (comma-separated)").option("--wait-for <selector>", "Wait for CSS selector before scraping").option("--timeout <ms>", "Timeout in milliseconds", "30000").option("--proxy-mode <mode>", "Proxy mode: standard, stealth, auto").action(async (
|
|
205
|
+
program2.command("scrape <url>").description("Scrape a URL and output content").option("-f, --format <format>", "Output format: markdown (default), html, screenshot", "markdown").option("--json", "Output full JSON response").option("-o, --output <file>", "Write output to file").option("--no-main-content", "Include full page (nav, header, footer)").option("--include-tags <selectors>", "CSS selectors to include (comma-separated)").option("--exclude-tags <selectors>", "CSS selectors to exclude (comma-separated)").option("--wait-for <selector>", "Wait for CSS selector before scraping").option("--timeout <ms>", "Timeout in milliseconds", "30000").option("--proxy-mode <mode>", "Proxy mode: standard, stealth, auto").action(async (rawUrl, opts) => {
|
|
206
|
+
const url = normalizeUrl(rawUrl);
|
|
74
207
|
const apiKey = getApiKey();
|
|
75
208
|
const client = new ReaderClient({ apiKey, baseUrl: getApiUrl() });
|
|
76
|
-
const
|
|
77
|
-
const
|
|
78
|
-
if (requestedFormat === "screenshot") {
|
|
79
|
-
formats.push("screenshot");
|
|
80
|
-
} else if (requestedFormat === "html") {
|
|
81
|
-
formats.push("html");
|
|
82
|
-
} else {
|
|
83
|
-
formats.push("markdown");
|
|
84
|
-
}
|
|
209
|
+
const requestedFormat = validateFormat(opts.format);
|
|
210
|
+
const formats = [requestedFormat];
|
|
85
211
|
if (requestedFormat !== "screenshot" && opts.output?.endsWith(".png")) {
|
|
86
212
|
formats.push("screenshot");
|
|
87
213
|
}
|
|
214
|
+
const timeout = parsePositiveInt(opts.timeout, "--timeout");
|
|
215
|
+
const spinner = scrapeSpinner();
|
|
88
216
|
try {
|
|
89
217
|
const result = await client.read({
|
|
90
218
|
url,
|
|
@@ -93,11 +221,12 @@ function registerScrapeCommand(program2) {
|
|
|
93
221
|
includeTags: opts.includeTags?.split(",").map((s) => s.trim()),
|
|
94
222
|
excludeTags: opts.excludeTags?.split(",").map((s) => s.trim()),
|
|
95
223
|
waitForSelector: opts.waitFor,
|
|
96
|
-
timeoutMs:
|
|
224
|
+
timeoutMs: timeout,
|
|
97
225
|
proxyMode: opts.proxyMode
|
|
98
226
|
});
|
|
99
227
|
if (result.kind === "scrape") {
|
|
100
228
|
const data = result.data;
|
|
229
|
+
spinner.success("Done -- content ready");
|
|
101
230
|
if (opts.json) {
|
|
102
231
|
outputJson(data);
|
|
103
232
|
return;
|
|
@@ -117,6 +246,7 @@ function registerScrapeCommand(program2) {
|
|
|
117
246
|
}
|
|
118
247
|
} else {
|
|
119
248
|
const job = result.data;
|
|
249
|
+
spinner.success(`Done -- ${job.results.length} pages`);
|
|
120
250
|
if (opts.json) {
|
|
121
251
|
outputJson(job);
|
|
122
252
|
} else {
|
|
@@ -126,8 +256,8 @@ function registerScrapeCommand(program2) {
|
|
|
126
256
|
}
|
|
127
257
|
}
|
|
128
258
|
} catch (err) {
|
|
129
|
-
|
|
130
|
-
error(
|
|
259
|
+
spinner.error("Failed");
|
|
260
|
+
error(formatError(err));
|
|
131
261
|
process.exit(1);
|
|
132
262
|
}
|
|
133
263
|
});
|
|
@@ -136,22 +266,33 @@ function registerScrapeCommand(program2) {
|
|
|
136
266
|
// src/commands/crawl.ts
|
|
137
267
|
import { ReaderClient as ReaderClient2 } from "@vakra-dev/reader-js";
|
|
138
268
|
function registerCrawlCommand(program2) {
|
|
139
|
-
program2.command("crawl <url>").description("Crawl a website and output discovered pages").option("--max-depth <n>", "Maximum crawl depth", "2").option("--max-pages <n>", "Maximum pages to crawl", "20").option("--urls-only", "Only output discovered URLs, don't scrape content").option("--json", "Output full JSON response").option("-o, --output-dir <dir>", "Write each page to a separate file").action(async (
|
|
269
|
+
program2.command("crawl <url>").description("Crawl a website and output discovered pages").option("--max-depth <n>", "Maximum crawl depth", "2").option("--max-pages <n>", "Maximum pages to crawl", "20").option("--urls-only", "Only output discovered URLs, don't scrape content").option("--json", "Output full JSON response").option("-o, --output-dir <dir>", "Write each page to a separate file").action(async (rawUrl, opts) => {
|
|
270
|
+
const url = normalizeUrl(rawUrl);
|
|
140
271
|
const apiKey = getApiKey();
|
|
141
272
|
const client = new ReaderClient2({ apiKey, baseUrl: getApiUrl() });
|
|
142
|
-
|
|
273
|
+
const maxDepth = parsePositiveInt(opts.maxDepth, "--max-depth");
|
|
274
|
+
const maxPages = parsePositiveInt(opts.maxPages, "--max-pages");
|
|
275
|
+
let domain;
|
|
276
|
+
try {
|
|
277
|
+
domain = new URL(url).hostname;
|
|
278
|
+
} catch {
|
|
279
|
+
domain = url;
|
|
280
|
+
}
|
|
281
|
+
const spinner = crawlSpinner(domain);
|
|
143
282
|
try {
|
|
144
283
|
const result = await client.read({
|
|
145
284
|
url,
|
|
146
|
-
maxDepth
|
|
147
|
-
maxPages
|
|
285
|
+
maxDepth,
|
|
286
|
+
maxPages,
|
|
148
287
|
formats: opts.urlsOnly ? [] : ["markdown"]
|
|
149
288
|
});
|
|
150
289
|
if (result.kind !== "job") {
|
|
151
|
-
error("Unexpected response
|
|
290
|
+
spinner.error("Unexpected response");
|
|
291
|
+
error("Expected a crawl job but got a scrape result");
|
|
152
292
|
process.exit(1);
|
|
153
293
|
}
|
|
154
294
|
const job = result.data;
|
|
295
|
+
spinner.success(`Done -- ${job.results.length} pages crawled`);
|
|
155
296
|
if (opts.json) {
|
|
156
297
|
outputJson(job);
|
|
157
298
|
return;
|
|
@@ -186,8 +327,8 @@ ${job.results.length} URLs discovered`);
|
|
|
186
327
|
${job.results.length} pages crawled`);
|
|
187
328
|
}
|
|
188
329
|
} catch (err) {
|
|
189
|
-
|
|
190
|
-
error(
|
|
330
|
+
spinner.error("Failed");
|
|
331
|
+
error(formatError(err));
|
|
191
332
|
process.exit(1);
|
|
192
333
|
}
|
|
193
334
|
});
|
|
@@ -208,13 +349,15 @@ function registerStatusCommand(program2) {
|
|
|
208
349
|
console.log(`Reader CLI v${version}`);
|
|
209
350
|
console.log(`API: ${apiUrl}`);
|
|
210
351
|
console.log(`Key: ${redactKey(apiKey)}`);
|
|
352
|
+
const spinner = statusSpinner();
|
|
211
353
|
try {
|
|
212
354
|
const credits = await client.getCredits();
|
|
355
|
+
spinner.success("Connected");
|
|
213
356
|
console.log(`Credits: ${credits.balance} / ${credits.limit} (${credits.tier} tier)`);
|
|
214
357
|
console.log(`Resets: ${credits.resetAt}`);
|
|
215
358
|
} catch (err) {
|
|
216
|
-
|
|
217
|
-
|
|
359
|
+
spinner.error("Connection failed");
|
|
360
|
+
error(formatError(err));
|
|
218
361
|
process.exit(1);
|
|
219
362
|
}
|
|
220
363
|
});
|
|
@@ -226,8 +369,10 @@ function registerCreditsCommand(program2) {
|
|
|
226
369
|
program2.command("credits").description("Check credit balance and usage").option("--json", "Output full JSON response").action(async (opts) => {
|
|
227
370
|
const apiKey = getApiKey();
|
|
228
371
|
const client = new ReaderClient4({ apiKey, baseUrl: getApiUrl() });
|
|
372
|
+
const spinner = creditsSpinner();
|
|
229
373
|
try {
|
|
230
374
|
const credits = await client.getCredits();
|
|
375
|
+
spinner.stop();
|
|
231
376
|
if (opts.json) {
|
|
232
377
|
outputJson(credits);
|
|
233
378
|
return;
|
|
@@ -237,8 +382,8 @@ function registerCreditsCommand(program2) {
|
|
|
237
382
|
console.log(`Tier: ${credits.tier}`);
|
|
238
383
|
console.log(`Resets: ${credits.resetAt}`);
|
|
239
384
|
} catch (err) {
|
|
240
|
-
|
|
241
|
-
error(
|
|
385
|
+
spinner.error("Failed");
|
|
386
|
+
error(formatError(err));
|
|
242
387
|
process.exit(1);
|
|
243
388
|
}
|
|
244
389
|
});
|
|
@@ -250,10 +395,19 @@ function registerConfigCommand(program2) {
|
|
|
250
395
|
config.command("set <key> <value>").description("Set a config value (api-key, api-url)").action((key, value) => {
|
|
251
396
|
const current = loadConfig();
|
|
252
397
|
if (key === "api-key") {
|
|
398
|
+
if (!value.startsWith("rdr_")) {
|
|
399
|
+
console.error(`Warning: API key doesn't start with "rdr_". This may not be a valid Reader API key.`);
|
|
400
|
+
}
|
|
253
401
|
current.apiKey = value;
|
|
254
402
|
saveConfig(current);
|
|
255
403
|
console.error(`API key saved: ${redactKey(value)}`);
|
|
256
404
|
} else if (key === "api-url") {
|
|
405
|
+
try {
|
|
406
|
+
new URL(value);
|
|
407
|
+
} catch {
|
|
408
|
+
console.error(`Error: Invalid URL: "${value}"`);
|
|
409
|
+
process.exit(1);
|
|
410
|
+
}
|
|
257
411
|
current.apiUrl = value;
|
|
258
412
|
saveConfig(current);
|
|
259
413
|
console.error(`API URL saved: ${value}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vakra-dev/reader-cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "CLI for the Reader API - read the web for your AI agents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -17,7 +17,8 @@
|
|
|
17
17
|
},
|
|
18
18
|
"dependencies": {
|
|
19
19
|
"@vakra-dev/reader-js": "^0.2.0",
|
|
20
|
-
"commander": "^12.0.0"
|
|
20
|
+
"commander": "^12.0.0",
|
|
21
|
+
"yocto-spinner": "^1.2.0"
|
|
21
22
|
},
|
|
22
23
|
"devDependencies": {
|
|
23
24
|
"tsup": "^8.3.6",
|