@vakra-dev/reader-js 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/dist/README.md +107 -0
- package/dist/index.cjs +500 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +403 -0
- package/dist/index.d.ts +403 -0
- package/dist/index.js +460 -0
- package/dist/index.js.map +1 -0
- package/package.json +42 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reader SDK types. Shapes mirror the reader-api envelope contract.
|
|
3
|
+
*/
|
|
4
|
+
interface ReaderClientConfig {
|
|
5
|
+
/** API key (required) */
|
|
6
|
+
apiKey: string;
|
|
7
|
+
/** API base URL (default: https://api.reader.dev) */
|
|
8
|
+
baseUrl?: string;
|
|
9
|
+
/** Request timeout in ms (default: 60000) */
|
|
10
|
+
timeout?: number;
|
|
11
|
+
/** Max retries on transient failures (default: 2) */
|
|
12
|
+
maxRetries?: number;
|
|
13
|
+
/** Extra headers to include in every request (e.g. x-request-id for tracing) */
|
|
14
|
+
headers?: Record<string, string>;
|
|
15
|
+
}
|
|
16
|
+
/** Public proxy mode. `auto` picks standard first and escalates to stealth on block. */
|
|
17
|
+
type ProxyMode = "standard" | "stealth" | "auto";
|
|
18
|
+
interface ReadParams {
|
|
19
|
+
/** Single URL to scrape */
|
|
20
|
+
url?: string;
|
|
21
|
+
/** Multiple URLs for batch scraping */
|
|
22
|
+
urls?: string[];
|
|
23
|
+
/** Output formats (default: ["markdown"]) */
|
|
24
|
+
formats?: Array<"markdown" | "html">;
|
|
25
|
+
/** Extract main content only (default: true) */
|
|
26
|
+
onlyMainContent?: boolean;
|
|
27
|
+
/** CSS selectors to include */
|
|
28
|
+
includeTags?: string[];
|
|
29
|
+
/** CSS selectors to exclude */
|
|
30
|
+
excludeTags?: string[];
|
|
31
|
+
/** Wait for CSS selector before scraping */
|
|
32
|
+
waitForSelector?: string;
|
|
33
|
+
/** Per-URL timeout in ms (default: 30000) */
|
|
34
|
+
timeoutMs?: number;
|
|
35
|
+
/** Proxy mode: standard, stealth, or auto (default: auto) */
|
|
36
|
+
proxyMode?: ProxyMode;
|
|
37
|
+
/** Max crawl depth (triggers crawl mode) */
|
|
38
|
+
maxDepth?: number;
|
|
39
|
+
/** Max pages to crawl (triggers crawl mode) */
|
|
40
|
+
maxPages?: number;
|
|
41
|
+
/** Use cache (default: true) */
|
|
42
|
+
cache?: boolean;
|
|
43
|
+
/** Webhook for async job notifications */
|
|
44
|
+
webhook?: {
|
|
45
|
+
url: string;
|
|
46
|
+
events?: string[];
|
|
47
|
+
secret?: string;
|
|
48
|
+
};
|
|
49
|
+
/** Batch concurrency override */
|
|
50
|
+
batchConcurrency?: number;
|
|
51
|
+
}
|
|
52
|
+
interface ScrapeMetadata {
|
|
53
|
+
title?: string | null;
|
|
54
|
+
description?: string | null;
|
|
55
|
+
statusCode?: number;
|
|
56
|
+
duration: number;
|
|
57
|
+
cached: boolean;
|
|
58
|
+
/** Resolved proxy mode — `"standard"` or `"stealth"`. Omitted on cache hits. */
|
|
59
|
+
proxyMode?: "standard" | "stealth";
|
|
60
|
+
/** True if `auto` escalated from standard to stealth for this page. */
|
|
61
|
+
proxyEscalated?: boolean;
|
|
62
|
+
scrapedAt: string;
|
|
63
|
+
}
|
|
64
|
+
interface Page {
|
|
65
|
+
url: string;
|
|
66
|
+
markdown?: string;
|
|
67
|
+
html?: string;
|
|
68
|
+
statusCode?: number;
|
|
69
|
+
proxyMode?: "standard" | "stealth";
|
|
70
|
+
proxyEscalated?: boolean;
|
|
71
|
+
credits?: number;
|
|
72
|
+
metadata?: ScrapeMetadata | Record<string, unknown>;
|
|
73
|
+
error?: string;
|
|
74
|
+
}
|
|
75
|
+
/** Result of a synchronous scrape — single URL, returned immediately. */
|
|
76
|
+
interface ScrapeResult {
|
|
77
|
+
url: string;
|
|
78
|
+
/** Final URL after redirects (only present if different from `url`) */
|
|
79
|
+
finalUrl?: string;
|
|
80
|
+
markdown?: string;
|
|
81
|
+
html?: string;
|
|
82
|
+
metadata: ScrapeMetadata;
|
|
83
|
+
}
|
|
84
|
+
type JobStatus = "queued" | "processing" | "completed" | "failed" | "cancelled";
|
|
85
|
+
type JobMode = "scrape" | "batch" | "crawl";
|
|
86
|
+
/** Job as returned from GET /v1/jobs/:id (data portion of envelope). */
|
|
87
|
+
interface Job {
|
|
88
|
+
id: string;
|
|
89
|
+
status: JobStatus;
|
|
90
|
+
mode: JobMode;
|
|
91
|
+
completed: number;
|
|
92
|
+
total: number;
|
|
93
|
+
creditsUsed: number;
|
|
94
|
+
error: string | null;
|
|
95
|
+
/** Paginated page results. `waitForJob` auto-collects all pages across pages. */
|
|
96
|
+
results: Page[];
|
|
97
|
+
startedAt: string | null;
|
|
98
|
+
completedAt: string | null;
|
|
99
|
+
createdAt: string;
|
|
100
|
+
}
|
|
101
|
+
interface Pagination {
|
|
102
|
+
total: number;
|
|
103
|
+
skip: number;
|
|
104
|
+
limit: number;
|
|
105
|
+
hasMore: boolean;
|
|
106
|
+
next?: string;
|
|
107
|
+
}
|
|
108
|
+
/** Return type of `client.read(...)`. Discriminated by `kind`. */
|
|
109
|
+
type ReadResult = {
|
|
110
|
+
kind: "scrape";
|
|
111
|
+
data: ScrapeResult;
|
|
112
|
+
} | {
|
|
113
|
+
kind: "job";
|
|
114
|
+
data: Job;
|
|
115
|
+
};
|
|
116
|
+
interface Credits {
|
|
117
|
+
balance: number;
|
|
118
|
+
limit: number;
|
|
119
|
+
used: number;
|
|
120
|
+
tier: "free" | "pro" | "business" | "enterprise" | string;
|
|
121
|
+
resetAt: string;
|
|
122
|
+
}
|
|
123
|
+
interface UsageEntry {
|
|
124
|
+
id: string;
|
|
125
|
+
url: string;
|
|
126
|
+
duration: number;
|
|
127
|
+
status: "success" | "error";
|
|
128
|
+
cached: boolean;
|
|
129
|
+
proxyMode: "standard" | "stealth" | null;
|
|
130
|
+
credits: number;
|
|
131
|
+
error: string | null;
|
|
132
|
+
createdAt: string;
|
|
133
|
+
}
|
|
134
|
+
type StreamEvent = {
|
|
135
|
+
type: "progress";
|
|
136
|
+
completed: number;
|
|
137
|
+
total: number;
|
|
138
|
+
status: JobStatus;
|
|
139
|
+
} | {
|
|
140
|
+
type: "page";
|
|
141
|
+
data: Page;
|
|
142
|
+
} | {
|
|
143
|
+
type: "error";
|
|
144
|
+
url: string;
|
|
145
|
+
error: string;
|
|
146
|
+
} | {
|
|
147
|
+
type: "done";
|
|
148
|
+
completed: number;
|
|
149
|
+
total: number;
|
|
150
|
+
status: JobStatus;
|
|
151
|
+
};
|
|
152
|
+
interface SuccessEnvelope<T> {
|
|
153
|
+
success: true;
|
|
154
|
+
data: T;
|
|
155
|
+
}
|
|
156
|
+
interface PaginatedEnvelope<T> {
|
|
157
|
+
success: true;
|
|
158
|
+
data: T[];
|
|
159
|
+
pagination: Pagination;
|
|
160
|
+
}
|
|
161
|
+
interface ErrorEnvelope {
|
|
162
|
+
success: false;
|
|
163
|
+
error: {
|
|
164
|
+
code: string;
|
|
165
|
+
message: string;
|
|
166
|
+
details?: Record<string, unknown>;
|
|
167
|
+
docsUrl?: string;
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
type ApiEnvelope<T> = SuccessEnvelope<T> | ErrorEnvelope;
|
|
171
|
+
type SessionStatus = "active" | "stopped" | "expired";
|
|
172
|
+
interface SessionInfo {
|
|
173
|
+
sessionId: string;
|
|
174
|
+
wsEndpoint: string;
|
|
175
|
+
token: string;
|
|
176
|
+
status: SessionStatus;
|
|
177
|
+
createdAt: string;
|
|
178
|
+
expiresAt: string;
|
|
179
|
+
}
|
|
180
|
+
interface CreateSessionParams {
|
|
181
|
+
/** Max session lifetime in ms (default: 3600000 = 60 min) */
|
|
182
|
+
maxDurationMs?: number;
|
|
183
|
+
}
|
|
184
|
+
interface StopSessionResult {
|
|
185
|
+
sessionId: string;
|
|
186
|
+
status: "stopped";
|
|
187
|
+
durationMs: number;
|
|
188
|
+
creditsCharged: number;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Reader SDK Client
|
|
193
|
+
*
|
|
194
|
+
* @example
|
|
195
|
+
* import { ReaderClient } from "@vakra-dev/reader-js";
|
|
196
|
+
*
|
|
197
|
+
* const client = new ReaderClient({ apiKey: "rdr_your_key" });
|
|
198
|
+
*
|
|
199
|
+
* // Synchronous scrape (single URL)
|
|
200
|
+
* const result = await client.read({ url: "https://example.com" });
|
|
201
|
+
* if (result.kind === "scrape") {
|
|
202
|
+
* console.log(result.data.markdown);
|
|
203
|
+
* }
|
|
204
|
+
*
|
|
205
|
+
* // Batch (returns a completed Job with all results collected)
|
|
206
|
+
* const batch = await client.read({ urls: ["url1", "url2"] });
|
|
207
|
+
* if (batch.kind === "job") {
|
|
208
|
+
* for (const page of batch.data.results) {
|
|
209
|
+
* console.log(page.url, page.markdown?.length);
|
|
210
|
+
* }
|
|
211
|
+
* }
|
|
212
|
+
*/
|
|
213
|
+
|
|
214
|
+
declare class ReaderClient {
|
|
215
|
+
private apiKey;
|
|
216
|
+
private baseUrl;
|
|
217
|
+
private timeout;
|
|
218
|
+
private maxRetries;
|
|
219
|
+
private extraHeaders;
|
|
220
|
+
private _sessions;
|
|
221
|
+
constructor(config: ReaderClientConfig);
|
|
222
|
+
/**
|
|
223
|
+
* Browser sessions API.
|
|
224
|
+
*
|
|
225
|
+
* @example
|
|
226
|
+
* ```typescript
|
|
227
|
+
* const session = await client.sessions.create();
|
|
228
|
+
* const browser = await chromium.connectOverCDP(session.wsEndpoint);
|
|
229
|
+
* // ... use Playwright ...
|
|
230
|
+
* await client.sessions.stop(session.sessionId);
|
|
231
|
+
* ```
|
|
232
|
+
*/
|
|
233
|
+
get sessions(): SessionsAPI;
|
|
234
|
+
/**
|
|
235
|
+
* Read (scrape, batch, or crawl) one or more URLs.
|
|
236
|
+
*
|
|
237
|
+
* - Single URL → sync scrape, returns immediately with `{ kind: "scrape", data }`
|
|
238
|
+
* - Multiple URLs or URL + maxDepth/maxPages → async job; this method polls
|
|
239
|
+
* until the job terminates and returns `{ kind: "job", data }`.
|
|
240
|
+
*/
|
|
241
|
+
read(params: ReadParams): Promise<ReadResult>;
|
|
242
|
+
/**
|
|
243
|
+
* Get job status and a single page of results.
|
|
244
|
+
*/
|
|
245
|
+
getJob(jobId: string, opts?: {
|
|
246
|
+
skip?: number;
|
|
247
|
+
limit?: number;
|
|
248
|
+
}): Promise<{
|
|
249
|
+
job: Job;
|
|
250
|
+
hasMore: boolean;
|
|
251
|
+
next?: string;
|
|
252
|
+
}>;
|
|
253
|
+
/**
|
|
254
|
+
* Fetch all job result pages by following pagination.
|
|
255
|
+
*/
|
|
256
|
+
getAllJobResults(jobId: string): Promise<Page[]>;
|
|
257
|
+
/**
|
|
258
|
+
* Cancel a job. Throws `ConflictError` if the job is already terminal.
|
|
259
|
+
*/
|
|
260
|
+
cancelJob(jobId: string): Promise<void>;
|
|
261
|
+
/**
|
|
262
|
+
* Retry the failed URLs in a job. Throws `InvalidRequestError` if no
|
|
263
|
+
* failed URLs exist.
|
|
264
|
+
*/
|
|
265
|
+
retryJob(jobId: string): Promise<{
|
|
266
|
+
id: string;
|
|
267
|
+
status: string;
|
|
268
|
+
retrying: number;
|
|
269
|
+
}>;
|
|
270
|
+
/**
|
|
271
|
+
* Poll a job until it completes, fails, or is cancelled. Collects all
|
|
272
|
+
* paginated results when complete.
|
|
273
|
+
*/
|
|
274
|
+
waitForJob(jobId: string, options?: {
|
|
275
|
+
pollInterval?: number;
|
|
276
|
+
timeout?: number;
|
|
277
|
+
}): Promise<Job>;
|
|
278
|
+
/**
|
|
279
|
+
* Stream job results as they arrive via polling.
|
|
280
|
+
*
|
|
281
|
+
* @example
|
|
282
|
+
* for await (const event of client.stream(jobId)) {
|
|
283
|
+
* if (event.type === "page") console.log(event.data.url);
|
|
284
|
+
* if (event.type === "done") break;
|
|
285
|
+
* }
|
|
286
|
+
*/
|
|
287
|
+
stream(jobId: string, options?: {
|
|
288
|
+
pollInterval?: number;
|
|
289
|
+
timeout?: number;
|
|
290
|
+
}): AsyncGenerator<StreamEvent>;
|
|
291
|
+
/**
|
|
292
|
+
* Get the current credit balance for this workspace.
|
|
293
|
+
*/
|
|
294
|
+
getCredits(): Promise<Credits>;
|
|
295
|
+
private request;
|
|
296
|
+
}
|
|
297
|
+
type RequestFn = <T>(method: string, path: string, body?: unknown) => Promise<T>;
|
|
298
|
+
declare class SessionsAPI {
|
|
299
|
+
private request;
|
|
300
|
+
constructor(request: RequestFn);
|
|
301
|
+
/**
|
|
302
|
+
* Create a browser session. Returns a CDP WebSocket URL for
|
|
303
|
+
* Playwright/Puppeteer connection.
|
|
304
|
+
*/
|
|
305
|
+
create(params?: CreateSessionParams): Promise<SessionInfo>;
|
|
306
|
+
/**
|
|
307
|
+
* Get session status.
|
|
308
|
+
*/
|
|
309
|
+
get(sessionId: string): Promise<SessionInfo>;
|
|
310
|
+
/**
|
|
311
|
+
* Stop a browser session.
|
|
312
|
+
*/
|
|
313
|
+
stop(sessionId: string): Promise<StopSessionResult>;
|
|
314
|
+
/**
|
|
315
|
+
* List active sessions.
|
|
316
|
+
*/
|
|
317
|
+
list(): Promise<SessionInfo[]>;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Typed error classes mirroring the reader-api error code catalog.
|
|
322
|
+
*
|
|
323
|
+
* The API returns a stable `code` field on every error response. The SDK
|
|
324
|
+
* branches on that code and throws a specific subclass, so callers can
|
|
325
|
+
* write:
|
|
326
|
+
*
|
|
327
|
+
* try {
|
|
328
|
+
* await client.read({ url });
|
|
329
|
+
* } catch (err) {
|
|
330
|
+
* if (err instanceof InsufficientCreditsError) {
|
|
331
|
+
* // err.required, err.available, err.resetAt
|
|
332
|
+
* }
|
|
333
|
+
* }
|
|
334
|
+
*
|
|
335
|
+
* There is one subclass per code in the catalog. Unknown codes fall through
|
|
336
|
+
* to the base `ReaderApiError`.
|
|
337
|
+
*/
|
|
338
|
+
type ReaderErrorCode = "invalid_request" | "unauthenticated" | "insufficient_credits" | "url_blocked" | "not_found" | "conflict" | "rate_limited" | "concurrency_limited" | "internal_error" | "upstream_unavailable" | "scrape_timeout";
|
|
339
|
+
interface ApiErrorBody {
|
|
340
|
+
code: ReaderErrorCode | string;
|
|
341
|
+
message: string;
|
|
342
|
+
details?: Record<string, unknown>;
|
|
343
|
+
docsUrl?: string;
|
|
344
|
+
}
|
|
345
|
+
declare class ReaderApiError extends Error {
|
|
346
|
+
readonly code: string;
|
|
347
|
+
readonly httpStatus: number;
|
|
348
|
+
readonly details?: Record<string, unknown>;
|
|
349
|
+
readonly docsUrl?: string;
|
|
350
|
+
readonly requestId?: string;
|
|
351
|
+
constructor(body: ApiErrorBody, httpStatus: number, requestId?: string);
|
|
352
|
+
}
|
|
353
|
+
declare class InvalidRequestError extends ReaderApiError {
|
|
354
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
355
|
+
}
|
|
356
|
+
declare class UnauthenticatedError extends ReaderApiError {
|
|
357
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
358
|
+
}
|
|
359
|
+
declare class InsufficientCreditsError extends ReaderApiError {
|
|
360
|
+
readonly required?: number;
|
|
361
|
+
readonly available?: number;
|
|
362
|
+
readonly resetAt?: string;
|
|
363
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
364
|
+
}
|
|
365
|
+
declare class UrlBlockedError extends ReaderApiError {
|
|
366
|
+
readonly url?: string;
|
|
367
|
+
readonly reason?: string;
|
|
368
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
369
|
+
}
|
|
370
|
+
declare class NotFoundError extends ReaderApiError {
|
|
371
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
372
|
+
}
|
|
373
|
+
declare class ConflictError extends ReaderApiError {
|
|
374
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
375
|
+
}
|
|
376
|
+
declare class RateLimitedError extends ReaderApiError {
|
|
377
|
+
readonly retryAfterSeconds?: number;
|
|
378
|
+
readonly limit?: number;
|
|
379
|
+
readonly windowSeconds?: number;
|
|
380
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
381
|
+
}
|
|
382
|
+
declare class ConcurrencyLimitedError extends ReaderApiError {
|
|
383
|
+
readonly active?: number;
|
|
384
|
+
readonly max?: number;
|
|
385
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
386
|
+
}
|
|
387
|
+
declare class InternalServerError extends ReaderApiError {
|
|
388
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
389
|
+
}
|
|
390
|
+
declare class UpstreamUnavailableError extends ReaderApiError {
|
|
391
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
392
|
+
}
|
|
393
|
+
declare class ScrapeTimeoutError extends ReaderApiError {
|
|
394
|
+
readonly timeoutMs?: number;
|
|
395
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Construct the right error subclass from an error response body.
|
|
399
|
+
* Unknown codes fall through to the base class.
|
|
400
|
+
*/
|
|
401
|
+
declare function toReaderApiError(body: ApiErrorBody, httpStatus: number, requestId?: string): ReaderApiError;
|
|
402
|
+
|
|
403
|
+
export { type ApiEnvelope, type ApiErrorBody, ConcurrencyLimitedError, ConflictError, type CreateSessionParams, type Credits, type ErrorEnvelope, InsufficientCreditsError, InternalServerError, InvalidRequestError, type Job, type JobMode, type JobStatus, NotFoundError, type Page, type PaginatedEnvelope, type Pagination, type ProxyMode, RateLimitedError, type ReadParams, type ReadResult, ReaderApiError, ReaderClient, type ReaderClientConfig, type ReaderErrorCode, type ScrapeMetadata, type ScrapeResult, ScrapeTimeoutError, type SessionInfo, type SessionStatus, type StopSessionResult, type StreamEvent, type SuccessEnvelope, UnauthenticatedError, UpstreamUnavailableError, UrlBlockedError, type UsageEntry, toReaderApiError };
|