@vakra-dev/reader-js 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/dist/README.md +107 -0
- package/dist/index.cjs +500 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +403 -0
- package/dist/index.d.ts +403 -0
- package/dist/index.js +460 -0
- package/dist/index.js.map +1 -0
- package/package.json +42 -0
package/README.md
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# @vakra-dev/reader-js
|
|
2
|
+
|
|
3
|
+
TypeScript/JavaScript SDK for the [Reader API](https://reader.dev) — content extraction for LLMs. Wraps `POST /v1/read`, parses the standard envelope, throws typed errors, and auto-polls async jobs to completion.
|
|
4
|
+
|
|
5
|
+
**Version:** 0.2.0 · **Runtime:** Node 18+, Deno, Bun, Cloudflare Workers, modern browsers
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @vakra-dev/reader-js
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```ts
|
|
16
|
+
import { ReaderClient } from "@vakra-dev/reader-js";
|
|
17
|
+
|
|
18
|
+
const reader = new ReaderClient({ apiKey: process.env.READER_KEY! });
|
|
19
|
+
|
|
20
|
+
const result = await reader.read({ url: "https://example.com" });
|
|
21
|
+
if (result.kind === "scrape") {
|
|
22
|
+
console.log(result.data.markdown);
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
`reader.read(...)` returns a discriminated union:
|
|
27
|
+
|
|
28
|
+
- `{ kind: "scrape", data: ScrapeResult }` — single-URL requests, returned immediately
|
|
29
|
+
- `{ kind: "job", data: Job }` — batch and crawl requests, auto-polled to completion
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- **One method for every read operation.** `reader.read({ url })` for sync scrape, `{ urls: [...] }` for batch, `{ url, maxPages }` for crawl.
|
|
34
|
+
- **Typed errors for all 11 Reader error codes.** `InsufficientCreditsError`, `RateLimitedError`, `UrlBlockedError`, `ScrapeTimeoutError`, and more. Each subclass surfaces the relevant fields (e.g. `err.required`, `err.retryAfterSeconds`).
|
|
35
|
+
- **Automatic retries with exponential backoff** for transient codes (`rate_limited`, `upstream_unavailable`, `scrape_timeout`, …). Honors the `Retry-After` header on 429.
|
|
36
|
+
- **Pagination-aware job collection.** `waitForJob()` returns the full job with every page result collected across pagination boundaries.
|
|
37
|
+
- **SSE streaming.** `for await (const event of reader.stream(jobId))` yields real-time `progress` / `page` / `error` / `done` events.
|
|
38
|
+
- **Request ID tracing.** Every error carries the `x-request-id` header value on `err.requestId` for support tickets.
|
|
39
|
+
|
|
40
|
+
## Browser Sessions
|
|
41
|
+
|
|
42
|
+
Launch a stealthed Chrome and connect Playwright or Puppeteer:
|
|
43
|
+
|
|
44
|
+
```ts
|
|
45
|
+
import { chromium } from "playwright-core";
|
|
46
|
+
|
|
47
|
+
const session = await reader.sessions.create();
|
|
48
|
+
const browser = await chromium.connectOverCDP(session.wsEndpoint);
|
|
49
|
+
const page = await (await browser.newContext()).newPage();
|
|
50
|
+
|
|
51
|
+
await page.goto("https://example.com");
|
|
52
|
+
console.log(await page.title());
|
|
53
|
+
|
|
54
|
+
await browser.close();
|
|
55
|
+
await reader.sessions.stop(session.sessionId);
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Methods: `reader.sessions.create()`, `.get(id)`, `.stop(id)`, `.list()`
|
|
59
|
+
|
|
60
|
+
## Browser usage
|
|
61
|
+
|
|
62
|
+
The SDK works in modern browsers via native `fetch`, but **do not ship your API key in browser code** — anyone can read and reuse it. Proxy requests through your own backend.
|
|
63
|
+
|
|
64
|
+
## Errors
|
|
65
|
+
|
|
66
|
+
```ts
|
|
67
|
+
import {
|
|
68
|
+
ReaderApiError,
|
|
69
|
+
InsufficientCreditsError,
|
|
70
|
+
RateLimitedError,
|
|
71
|
+
UrlBlockedError,
|
|
72
|
+
} from "@vakra-dev/reader-js";
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
await reader.read({ url });
|
|
76
|
+
} catch (err) {
|
|
77
|
+
if (err instanceof InsufficientCreditsError) {
|
|
78
|
+
console.error(`Need ${err.required}, have ${err.available}`);
|
|
79
|
+
} else if (err instanceof RateLimitedError) {
|
|
80
|
+
console.error(`Retry after ${err.retryAfterSeconds}s`);
|
|
81
|
+
} else if (err instanceof UrlBlockedError) {
|
|
82
|
+
console.error(`Blocked: ${err.reason}`);
|
|
83
|
+
} else if (err instanceof ReaderApiError) {
|
|
84
|
+
console.error(`[${err.code}] ${err.message} — see ${err.docsUrl}`);
|
|
85
|
+
} else {
|
|
86
|
+
throw err;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Full catalog of error codes: https://reader.dev/docs/home/concepts/errors
|
|
92
|
+
|
|
93
|
+
## Links
|
|
94
|
+
|
|
95
|
+
- **Docs:** https://reader.dev/docs
|
|
96
|
+
- **SDK reference:** https://reader.dev/docs/sdk/javascript
|
|
97
|
+
- **API reference:** https://reader.dev/docs/api-reference/read
|
|
98
|
+
- **Discord:** https://discord.gg/6tjkq7J5WV
|
|
99
|
+
|
|
100
|
+
## Development
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
npm install
|
|
104
|
+
npm run typecheck
|
|
105
|
+
npm run build # builds to dist/
|
|
106
|
+
npm test # vitest
|
|
107
|
+
```
|
package/dist/README.md
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# @vakra-dev/reader-js
|
|
2
|
+
|
|
3
|
+
TypeScript/JavaScript SDK for the [Reader API](https://reader.dev) — content extraction for LLMs. Wraps `POST /v1/read`, parses the standard envelope, throws typed errors, and auto-polls async jobs to completion.
|
|
4
|
+
|
|
5
|
+
**Version:** 0.2.0 · **Runtime:** Node 18+, Deno, Bun, Cloudflare Workers, modern browsers
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @vakra-dev/reader-js
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```ts
|
|
16
|
+
import { ReaderClient } from "@vakra-dev/reader-js";
|
|
17
|
+
|
|
18
|
+
const reader = new ReaderClient({ apiKey: process.env.READER_KEY! });
|
|
19
|
+
|
|
20
|
+
const result = await reader.read({ url: "https://example.com" });
|
|
21
|
+
if (result.kind === "scrape") {
|
|
22
|
+
console.log(result.data.markdown);
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
`reader.read(...)` returns a discriminated union:
|
|
27
|
+
|
|
28
|
+
- `{ kind: "scrape", data: ScrapeResult }` — single-URL requests, returned immediately
|
|
29
|
+
- `{ kind: "job", data: Job }` — batch and crawl requests, auto-polled to completion
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- **One method for every read operation.** `reader.read({ url })` for sync scrape, `{ urls: [...] }` for batch, `{ url, maxPages }` for crawl.
|
|
34
|
+
- **Typed errors for all 11 Reader error codes.** `InsufficientCreditsError`, `RateLimitedError`, `UrlBlockedError`, `ScrapeTimeoutError`, and more. Each subclass surfaces the relevant fields (e.g. `err.required`, `err.retryAfterSeconds`).
|
|
35
|
+
- **Automatic retries with exponential backoff** for transient codes (`rate_limited`, `upstream_unavailable`, `scrape_timeout`, …). Honors the `Retry-After` header on 429.
|
|
36
|
+
- **Pagination-aware job collection.** `waitForJob()` returns the full job with every page result collected across pagination boundaries.
|
|
37
|
+
- **SSE streaming.** `for await (const event of reader.stream(jobId))` yields real-time `progress` / `page` / `error` / `done` events.
|
|
38
|
+
- **Request ID tracing.** Every error carries the `x-request-id` header value on `err.requestId` for support tickets.
|
|
39
|
+
|
|
40
|
+
## Browser Sessions
|
|
41
|
+
|
|
42
|
+
Launch a stealthed Chrome and connect Playwright or Puppeteer:
|
|
43
|
+
|
|
44
|
+
```ts
|
|
45
|
+
import { chromium } from "playwright-core";
|
|
46
|
+
|
|
47
|
+
const session = await reader.sessions.create();
|
|
48
|
+
const browser = await chromium.connectOverCDP(session.wsEndpoint);
|
|
49
|
+
const page = await (await browser.newContext()).newPage();
|
|
50
|
+
|
|
51
|
+
await page.goto("https://example.com");
|
|
52
|
+
console.log(await page.title());
|
|
53
|
+
|
|
54
|
+
await browser.close();
|
|
55
|
+
await reader.sessions.stop(session.sessionId);
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Methods: `reader.sessions.create()`, `.get(id)`, `.stop(id)`, `.list()`
|
|
59
|
+
|
|
60
|
+
## Browser usage
|
|
61
|
+
|
|
62
|
+
The SDK works in modern browsers via native `fetch`, but **do not ship your API key in browser code** — anyone can read and reuse it. Proxy requests through your own backend.
|
|
63
|
+
|
|
64
|
+
## Errors
|
|
65
|
+
|
|
66
|
+
```ts
|
|
67
|
+
import {
|
|
68
|
+
ReaderApiError,
|
|
69
|
+
InsufficientCreditsError,
|
|
70
|
+
RateLimitedError,
|
|
71
|
+
UrlBlockedError,
|
|
72
|
+
} from "@vakra-dev/reader-js";
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
await reader.read({ url });
|
|
76
|
+
} catch (err) {
|
|
77
|
+
if (err instanceof InsufficientCreditsError) {
|
|
78
|
+
console.error(`Need ${err.required}, have ${err.available}`);
|
|
79
|
+
} else if (err instanceof RateLimitedError) {
|
|
80
|
+
console.error(`Retry after ${err.retryAfterSeconds}s`);
|
|
81
|
+
} else if (err instanceof UrlBlockedError) {
|
|
82
|
+
console.error(`Blocked: ${err.reason}`);
|
|
83
|
+
} else if (err instanceof ReaderApiError) {
|
|
84
|
+
console.error(`[${err.code}] ${err.message} — see ${err.docsUrl}`);
|
|
85
|
+
} else {
|
|
86
|
+
throw err;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Full catalog of error codes: https://reader.dev/docs/home/concepts/errors
|
|
92
|
+
|
|
93
|
+
## Links
|
|
94
|
+
|
|
95
|
+
- **Docs:** https://reader.dev/docs
|
|
96
|
+
- **SDK reference:** https://reader.dev/docs/sdk/javascript
|
|
97
|
+
- **API reference:** https://reader.dev/docs/api-reference/read
|
|
98
|
+
- **Discord:** https://discord.gg/6tjkq7J5WV
|
|
99
|
+
|
|
100
|
+
## Development
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
npm install
|
|
104
|
+
npm run typecheck
|
|
105
|
+
npm run build # builds to dist/
|
|
106
|
+
npm test # vitest
|
|
107
|
+
```
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
ConcurrencyLimitedError: () => ConcurrencyLimitedError,
|
|
24
|
+
ConflictError: () => ConflictError,
|
|
25
|
+
InsufficientCreditsError: () => InsufficientCreditsError,
|
|
26
|
+
InternalServerError: () => InternalServerError,
|
|
27
|
+
InvalidRequestError: () => InvalidRequestError,
|
|
28
|
+
NotFoundError: () => NotFoundError,
|
|
29
|
+
RateLimitedError: () => RateLimitedError,
|
|
30
|
+
ReaderApiError: () => ReaderApiError,
|
|
31
|
+
ReaderClient: () => ReaderClient,
|
|
32
|
+
ScrapeTimeoutError: () => ScrapeTimeoutError,
|
|
33
|
+
UnauthenticatedError: () => UnauthenticatedError,
|
|
34
|
+
UpstreamUnavailableError: () => UpstreamUnavailableError,
|
|
35
|
+
UrlBlockedError: () => UrlBlockedError,
|
|
36
|
+
toReaderApiError: () => toReaderApiError
|
|
37
|
+
});
|
|
38
|
+
module.exports = __toCommonJS(index_exports);
|
|
39
|
+
|
|
40
|
+
// src/errors.ts
|
|
41
|
+
var ReaderApiError = class extends Error {
|
|
42
|
+
code;
|
|
43
|
+
httpStatus;
|
|
44
|
+
details;
|
|
45
|
+
docsUrl;
|
|
46
|
+
requestId;
|
|
47
|
+
constructor(body, httpStatus, requestId) {
|
|
48
|
+
super(body.message);
|
|
49
|
+
this.name = "ReaderApiError";
|
|
50
|
+
this.code = body.code;
|
|
51
|
+
this.httpStatus = httpStatus;
|
|
52
|
+
this.details = body.details;
|
|
53
|
+
this.docsUrl = body.docsUrl;
|
|
54
|
+
this.requestId = requestId;
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
var InvalidRequestError = class extends ReaderApiError {
|
|
58
|
+
constructor(body, status, requestId) {
|
|
59
|
+
super(body, status, requestId);
|
|
60
|
+
this.name = "InvalidRequestError";
|
|
61
|
+
}
|
|
62
|
+
};
|
|
63
|
+
var UnauthenticatedError = class extends ReaderApiError {
|
|
64
|
+
constructor(body, status, requestId) {
|
|
65
|
+
super(body, status, requestId);
|
|
66
|
+
this.name = "UnauthenticatedError";
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
var InsufficientCreditsError = class extends ReaderApiError {
|
|
70
|
+
required;
|
|
71
|
+
available;
|
|
72
|
+
resetAt;
|
|
73
|
+
constructor(body, status, requestId) {
|
|
74
|
+
super(body, status, requestId);
|
|
75
|
+
this.name = "InsufficientCreditsError";
|
|
76
|
+
this.required = body.details?.required;
|
|
77
|
+
this.available = body.details?.available;
|
|
78
|
+
this.resetAt = body.details?.resetAt;
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
var UrlBlockedError = class extends ReaderApiError {
|
|
82
|
+
url;
|
|
83
|
+
reason;
|
|
84
|
+
constructor(body, status, requestId) {
|
|
85
|
+
super(body, status, requestId);
|
|
86
|
+
this.name = "UrlBlockedError";
|
|
87
|
+
this.url = body.details?.url;
|
|
88
|
+
this.reason = body.details?.reason;
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
var NotFoundError = class extends ReaderApiError {
|
|
92
|
+
constructor(body, status, requestId) {
|
|
93
|
+
super(body, status, requestId);
|
|
94
|
+
this.name = "NotFoundError";
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
var ConflictError = class extends ReaderApiError {
|
|
98
|
+
constructor(body, status, requestId) {
|
|
99
|
+
super(body, status, requestId);
|
|
100
|
+
this.name = "ConflictError";
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
var RateLimitedError = class extends ReaderApiError {
|
|
104
|
+
retryAfterSeconds;
|
|
105
|
+
limit;
|
|
106
|
+
windowSeconds;
|
|
107
|
+
constructor(body, status, requestId) {
|
|
108
|
+
super(body, status, requestId);
|
|
109
|
+
this.name = "RateLimitedError";
|
|
110
|
+
this.retryAfterSeconds = body.details?.retryAfterSeconds;
|
|
111
|
+
this.limit = body.details?.limit;
|
|
112
|
+
this.windowSeconds = body.details?.windowSeconds;
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
var ConcurrencyLimitedError = class extends ReaderApiError {
|
|
116
|
+
active;
|
|
117
|
+
max;
|
|
118
|
+
constructor(body, status, requestId) {
|
|
119
|
+
super(body, status, requestId);
|
|
120
|
+
this.name = "ConcurrencyLimitedError";
|
|
121
|
+
this.active = body.details?.active;
|
|
122
|
+
this.max = body.details?.max;
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
var InternalServerError = class extends ReaderApiError {
|
|
126
|
+
constructor(body, status, requestId) {
|
|
127
|
+
super(body, status, requestId);
|
|
128
|
+
this.name = "InternalServerError";
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
var UpstreamUnavailableError = class extends ReaderApiError {
|
|
132
|
+
constructor(body, status, requestId) {
|
|
133
|
+
super(body, status, requestId);
|
|
134
|
+
this.name = "UpstreamUnavailableError";
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
var ScrapeTimeoutError = class extends ReaderApiError {
|
|
138
|
+
timeoutMs;
|
|
139
|
+
constructor(body, status, requestId) {
|
|
140
|
+
super(body, status, requestId);
|
|
141
|
+
this.name = "ScrapeTimeoutError";
|
|
142
|
+
this.timeoutMs = body.details?.timeoutMs;
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
function toReaderApiError(body, httpStatus, requestId) {
|
|
146
|
+
switch (body.code) {
|
|
147
|
+
case "invalid_request":
|
|
148
|
+
return new InvalidRequestError(body, httpStatus, requestId);
|
|
149
|
+
case "unauthenticated":
|
|
150
|
+
return new UnauthenticatedError(body, httpStatus, requestId);
|
|
151
|
+
case "insufficient_credits":
|
|
152
|
+
return new InsufficientCreditsError(body, httpStatus, requestId);
|
|
153
|
+
case "url_blocked":
|
|
154
|
+
return new UrlBlockedError(body, httpStatus, requestId);
|
|
155
|
+
case "not_found":
|
|
156
|
+
return new NotFoundError(body, httpStatus, requestId);
|
|
157
|
+
case "conflict":
|
|
158
|
+
return new ConflictError(body, httpStatus, requestId);
|
|
159
|
+
case "rate_limited":
|
|
160
|
+
return new RateLimitedError(body, httpStatus, requestId);
|
|
161
|
+
case "concurrency_limited":
|
|
162
|
+
return new ConcurrencyLimitedError(body, httpStatus, requestId);
|
|
163
|
+
case "internal_error":
|
|
164
|
+
return new InternalServerError(body, httpStatus, requestId);
|
|
165
|
+
case "upstream_unavailable":
|
|
166
|
+
return new UpstreamUnavailableError(body, httpStatus, requestId);
|
|
167
|
+
case "scrape_timeout":
|
|
168
|
+
return new ScrapeTimeoutError(body, httpStatus, requestId);
|
|
169
|
+
default:
|
|
170
|
+
return new ReaderApiError(body, httpStatus, requestId);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// src/client.ts
|
|
175
|
+
var DEFAULT_BASE_URL = "https://api.reader.dev";
|
|
176
|
+
var DEFAULT_TIMEOUT = 6e4;
|
|
177
|
+
var DEFAULT_MAX_RETRIES = 2;
|
|
178
|
+
var DEFAULT_POLL_INTERVAL = 2e3;
|
|
179
|
+
var DEFAULT_POLL_TIMEOUT = 3e5;
|
|
180
|
+
var ReaderClient = class {
|
|
181
|
+
apiKey;
|
|
182
|
+
baseUrl;
|
|
183
|
+
timeout;
|
|
184
|
+
maxRetries;
|
|
185
|
+
extraHeaders;
|
|
186
|
+
_sessions = null;
|
|
187
|
+
constructor(config) {
|
|
188
|
+
if (!config.apiKey) {
|
|
189
|
+
throw new Error("API key is required");
|
|
190
|
+
}
|
|
191
|
+
this.apiKey = config.apiKey;
|
|
192
|
+
this.baseUrl = (config.baseUrl || DEFAULT_BASE_URL).replace(/\/$/, "");
|
|
193
|
+
this.timeout = config.timeout || DEFAULT_TIMEOUT;
|
|
194
|
+
this.maxRetries = config.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
195
|
+
this.extraHeaders = config.headers || {};
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Browser sessions API.
|
|
199
|
+
*
|
|
200
|
+
* @example
|
|
201
|
+
* ```typescript
|
|
202
|
+
* const session = await client.sessions.create();
|
|
203
|
+
* const browser = await chromium.connectOverCDP(session.wsEndpoint);
|
|
204
|
+
* // ... use Playwright ...
|
|
205
|
+
* await client.sessions.stop(session.sessionId);
|
|
206
|
+
* ```
|
|
207
|
+
*/
|
|
208
|
+
get sessions() {
|
|
209
|
+
if (!this._sessions) {
|
|
210
|
+
this._sessions = new SessionsAPI(this.request.bind(this));
|
|
211
|
+
}
|
|
212
|
+
return this._sessions;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Read (scrape, batch, or crawl) one or more URLs.
|
|
216
|
+
*
|
|
217
|
+
* - Single URL → sync scrape, returns immediately with `{ kind: "scrape", data }`
|
|
218
|
+
* - Multiple URLs or URL + maxDepth/maxPages → async job; this method polls
|
|
219
|
+
* until the job terminates and returns `{ kind: "job", data }`.
|
|
220
|
+
*/
|
|
221
|
+
async read(params) {
|
|
222
|
+
const envelope = await this.request(
|
|
223
|
+
"POST",
|
|
224
|
+
"/v1/read",
|
|
225
|
+
params
|
|
226
|
+
);
|
|
227
|
+
const data = envelope.data;
|
|
228
|
+
if (data && typeof data === "object" && "status" in data && "mode" in data && !("markdown" in data) && !("metadata" in data)) {
|
|
229
|
+
const jobId = String(data.id);
|
|
230
|
+
const job = await this.waitForJob(jobId);
|
|
231
|
+
return { kind: "job", data: job };
|
|
232
|
+
}
|
|
233
|
+
return { kind: "scrape", data };
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Get job status and a single page of results.
|
|
237
|
+
*/
|
|
238
|
+
async getJob(jobId, opts) {
|
|
239
|
+
const query = new URLSearchParams();
|
|
240
|
+
if (opts?.skip !== void 0) query.set("skip", String(opts.skip));
|
|
241
|
+
if (opts?.limit !== void 0) query.set("limit", String(opts.limit));
|
|
242
|
+
const qs = query.toString();
|
|
243
|
+
const envelope = await this.request(
|
|
244
|
+
"GET",
|
|
245
|
+
`/v1/jobs/${jobId}${qs ? `?${qs}` : ""}`
|
|
246
|
+
);
|
|
247
|
+
return {
|
|
248
|
+
job: envelope.data,
|
|
249
|
+
hasMore: envelope.pagination.hasMore,
|
|
250
|
+
next: envelope.pagination.next
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Fetch all job result pages by following pagination.
|
|
255
|
+
*/
|
|
256
|
+
async getAllJobResults(jobId) {
|
|
257
|
+
const pages = [];
|
|
258
|
+
let skip = 0;
|
|
259
|
+
const limit = 100;
|
|
260
|
+
while (true) {
|
|
261
|
+
const { job, hasMore } = await this.getJob(jobId, { skip, limit });
|
|
262
|
+
pages.push(...job.results ?? []);
|
|
263
|
+
if (!hasMore) break;
|
|
264
|
+
skip += limit;
|
|
265
|
+
}
|
|
266
|
+
return pages;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Cancel a job. Throws `ConflictError` if the job is already terminal.
|
|
270
|
+
*/
|
|
271
|
+
async cancelJob(jobId) {
|
|
272
|
+
await this.request("DELETE", `/v1/jobs/${jobId}`);
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Retry the failed URLs in a job. Throws `InvalidRequestError` if no
|
|
276
|
+
* failed URLs exist.
|
|
277
|
+
*/
|
|
278
|
+
async retryJob(jobId) {
|
|
279
|
+
const envelope = await this.request("POST", `/v1/jobs/${jobId}/retry`);
|
|
280
|
+
return envelope.data;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Poll a job until it completes, fails, or is cancelled. Collects all
|
|
284
|
+
* paginated results when complete.
|
|
285
|
+
*/
|
|
286
|
+
async waitForJob(jobId, options) {
|
|
287
|
+
const interval = options?.pollInterval ?? DEFAULT_POLL_INTERVAL;
|
|
288
|
+
const timeout = options?.timeout ?? DEFAULT_POLL_TIMEOUT;
|
|
289
|
+
const start = Date.now();
|
|
290
|
+
while (Date.now() - start < timeout) {
|
|
291
|
+
const { job } = await this.getJob(jobId, { limit: 1 });
|
|
292
|
+
if (job.status === "completed" || job.status === "failed" || job.status === "cancelled") {
|
|
293
|
+
if (job.status === "completed") {
|
|
294
|
+
job.results = await this.getAllJobResults(jobId);
|
|
295
|
+
}
|
|
296
|
+
return job;
|
|
297
|
+
}
|
|
298
|
+
await sleep(interval);
|
|
299
|
+
}
|
|
300
|
+
throw new ScrapeTimeoutError(
|
|
301
|
+
{
|
|
302
|
+
code: "scrape_timeout",
|
|
303
|
+
message: `Job ${jobId} polling timed out after ${timeout}ms`,
|
|
304
|
+
details: { timeoutMs: timeout }
|
|
305
|
+
},
|
|
306
|
+
504
|
|
307
|
+
);
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Stream job results as they arrive via polling.
|
|
311
|
+
*
|
|
312
|
+
* @example
|
|
313
|
+
* for await (const event of client.stream(jobId)) {
|
|
314
|
+
* if (event.type === "page") console.log(event.data.url);
|
|
315
|
+
* if (event.type === "done") break;
|
|
316
|
+
* }
|
|
317
|
+
*/
|
|
318
|
+
async *stream(jobId, options) {
|
|
319
|
+
const interval = options?.pollInterval ?? DEFAULT_POLL_INTERVAL;
|
|
320
|
+
const timeout = options?.timeout ?? DEFAULT_POLL_TIMEOUT;
|
|
321
|
+
const start = Date.now();
|
|
322
|
+
let lastCompleted = 0;
|
|
323
|
+
while (Date.now() - start < timeout) {
|
|
324
|
+
const { job } = await this.getJob(jobId, { skip: lastCompleted, limit: 100 });
|
|
325
|
+
yield {
|
|
326
|
+
type: "progress",
|
|
327
|
+
completed: job.completed,
|
|
328
|
+
total: job.total,
|
|
329
|
+
status: job.status
|
|
330
|
+
};
|
|
331
|
+
for (const page of job.results ?? []) {
|
|
332
|
+
if (page.error) {
|
|
333
|
+
yield { type: "error", url: page.url, error: page.error };
|
|
334
|
+
} else {
|
|
335
|
+
yield { type: "page", data: page };
|
|
336
|
+
}
|
|
337
|
+
lastCompleted += 1;
|
|
338
|
+
}
|
|
339
|
+
if (job.status === "completed" || job.status === "failed" || job.status === "cancelled") {
|
|
340
|
+
yield {
|
|
341
|
+
type: "done",
|
|
342
|
+
completed: job.completed,
|
|
343
|
+
total: job.total,
|
|
344
|
+
status: job.status
|
|
345
|
+
};
|
|
346
|
+
return;
|
|
347
|
+
}
|
|
348
|
+
await sleep(interval);
|
|
349
|
+
}
|
|
350
|
+
throw new ScrapeTimeoutError(
|
|
351
|
+
{
|
|
352
|
+
code: "scrape_timeout",
|
|
353
|
+
message: `Job ${jobId} stream timed out`,
|
|
354
|
+
details: { timeoutMs: timeout }
|
|
355
|
+
},
|
|
356
|
+
504
|
|
357
|
+
);
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Get the current credit balance for this workspace.
|
|
361
|
+
*/
|
|
362
|
+
async getCredits() {
|
|
363
|
+
const envelope = await this.request("GET", "/v1/usage/credits");
|
|
364
|
+
return envelope.data;
|
|
365
|
+
}
|
|
366
|
+
// --- Internal ---
|
|
367
|
+
async request(method, path, body) {
|
|
368
|
+
const url = path.startsWith("http") ? path : `${this.baseUrl}${path}`;
|
|
369
|
+
let lastError = null;
|
|
370
|
+
for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
|
|
371
|
+
try {
|
|
372
|
+
const controller = new AbortController();
|
|
373
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
374
|
+
const res = await fetch(url, {
|
|
375
|
+
method,
|
|
376
|
+
headers: {
|
|
377
|
+
"Content-Type": "application/json",
|
|
378
|
+
"x-api-key": this.apiKey,
|
|
379
|
+
...this.extraHeaders
|
|
380
|
+
},
|
|
381
|
+
body: body ? JSON.stringify(body) : void 0,
|
|
382
|
+
signal: controller.signal
|
|
383
|
+
});
|
|
384
|
+
clearTimeout(timeoutId);
|
|
385
|
+
const requestId = res.headers.get("x-request-id") ?? void 0;
|
|
386
|
+
const parsed = await res.json().catch(() => null);
|
|
387
|
+
if (!res.ok) {
|
|
388
|
+
if (parsed && "error" in parsed && parsed.error) {
|
|
389
|
+
const err = toReaderApiError(parsed.error, res.status, requestId);
|
|
390
|
+
if (res.status < 500 && res.status !== 429) throw err;
|
|
391
|
+
if (err instanceof RateLimitedError && err.retryAfterSeconds) {
|
|
392
|
+
await sleep(err.retryAfterSeconds * 1e3);
|
|
393
|
+
}
|
|
394
|
+
lastError = err;
|
|
395
|
+
} else {
|
|
396
|
+
const genericErr = new ReaderApiError(
|
|
397
|
+
{
|
|
398
|
+
code: "internal_error",
|
|
399
|
+
message: `Request failed with status ${res.status}`
|
|
400
|
+
},
|
|
401
|
+
res.status,
|
|
402
|
+
requestId
|
|
403
|
+
);
|
|
404
|
+
if (res.status < 500) throw genericErr;
|
|
405
|
+
lastError = genericErr;
|
|
406
|
+
}
|
|
407
|
+
} else {
|
|
408
|
+
return parsed;
|
|
409
|
+
}
|
|
410
|
+
} catch (err) {
|
|
411
|
+
if (err instanceof ReaderApiError) {
|
|
412
|
+
if (err.httpStatus < 500 && err.httpStatus !== 429) throw err;
|
|
413
|
+
lastError = err;
|
|
414
|
+
} else if (err instanceof Error) {
|
|
415
|
+
if (err.name === "AbortError") {
|
|
416
|
+
lastError = new ReaderApiError(
|
|
417
|
+
{ code: "scrape_timeout", message: "Request timed out" },
|
|
418
|
+
504
|
|
419
|
+
);
|
|
420
|
+
} else {
|
|
421
|
+
lastError = err;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
if (attempt < this.maxRetries) {
|
|
426
|
+
await sleep(Math.pow(2, attempt) * 1e3);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
throw lastError ?? new ReaderApiError({ code: "internal_error", message: "Request failed" }, 500);
|
|
430
|
+
}
|
|
431
|
+
};
|
|
432
|
+
var SessionsAPI = class {
|
|
433
|
+
constructor(request) {
|
|
434
|
+
this.request = request;
|
|
435
|
+
}
|
|
436
|
+
request;
|
|
437
|
+
/**
|
|
438
|
+
* Create a browser session. Returns a CDP WebSocket URL for
|
|
439
|
+
* Playwright/Puppeteer connection.
|
|
440
|
+
*/
|
|
441
|
+
async create(params) {
|
|
442
|
+
const envelope = await this.request(
|
|
443
|
+
"POST",
|
|
444
|
+
"/v1/sessions",
|
|
445
|
+
params ?? {}
|
|
446
|
+
);
|
|
447
|
+
return envelope.data;
|
|
448
|
+
}
|
|
449
|
+
/**
|
|
450
|
+
* Get session status.
|
|
451
|
+
*/
|
|
452
|
+
async get(sessionId) {
|
|
453
|
+
const envelope = await this.request(
|
|
454
|
+
"GET",
|
|
455
|
+
`/v1/sessions/${sessionId}`
|
|
456
|
+
);
|
|
457
|
+
return envelope.data;
|
|
458
|
+
}
|
|
459
|
+
/**
|
|
460
|
+
* Stop a browser session.
|
|
461
|
+
*/
|
|
462
|
+
async stop(sessionId) {
|
|
463
|
+
const envelope = await this.request(
|
|
464
|
+
"DELETE",
|
|
465
|
+
`/v1/sessions/${sessionId}`
|
|
466
|
+
);
|
|
467
|
+
return envelope.data;
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* List active sessions.
|
|
471
|
+
*/
|
|
472
|
+
async list() {
|
|
473
|
+
const envelope = await this.request(
|
|
474
|
+
"GET",
|
|
475
|
+
"/v1/sessions"
|
|
476
|
+
);
|
|
477
|
+
return envelope.data;
|
|
478
|
+
}
|
|
479
|
+
};
|
|
480
|
+
function sleep(ms) {
|
|
481
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
482
|
+
}
|
|
483
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
484
|
+
0 && (module.exports = {
|
|
485
|
+
ConcurrencyLimitedError,
|
|
486
|
+
ConflictError,
|
|
487
|
+
InsufficientCreditsError,
|
|
488
|
+
InternalServerError,
|
|
489
|
+
InvalidRequestError,
|
|
490
|
+
NotFoundError,
|
|
491
|
+
RateLimitedError,
|
|
492
|
+
ReaderApiError,
|
|
493
|
+
ReaderClient,
|
|
494
|
+
ScrapeTimeoutError,
|
|
495
|
+
UnauthenticatedError,
|
|
496
|
+
UpstreamUnavailableError,
|
|
497
|
+
UrlBlockedError,
|
|
498
|
+
toReaderApiError
|
|
499
|
+
});
|
|
500
|
+
//# sourceMappingURL=index.cjs.map
|