wellmarked 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +233 -0
- package/dist/cjs/client.cjs +332 -0
- package/dist/cjs/client.d.ts +173 -0
- package/dist/cjs/errors.cjs +143 -0
- package/dist/cjs/errors.d.ts +66 -0
- package/dist/cjs/index.cjs +31 -0
- package/dist/cjs/index.d.ts +15 -0
- package/dist/cjs/models.cjs +166 -0
- package/dist/cjs/models.d.ts +141 -0
- package/dist/cjs/package.json +3 -0
- package/dist/cjs/version.cjs +4 -0
- package/dist/cjs/version.d.ts +1 -0
- package/dist/esm/client.d.ts +173 -0
- package/dist/esm/client.js +330 -0
- package/dist/esm/errors.d.ts +66 -0
- package/dist/esm/errors.js +130 -0
- package/dist/esm/index.d.ts +15 -0
- package/dist/esm/index.js +15 -0
- package/dist/esm/models.d.ts +141 -0
- package/dist/esm/models.js +154 -0
- package/dist/esm/version.d.ts +1 -0
- package/dist/esm/version.js +1 -0
- package/dist/types/client.d.ts +173 -0
- package/dist/types/errors.d.ts +66 -0
- package/dist/types/index.d.ts +15 -0
- package/dist/types/models.d.ts +141 -0
- package/dist/types/version.d.ts +1 -0
- package/package.json +73 -0
- package/src/client.ts +463 -0
- package/src/errors.ts +162 -0
- package/src/index.ts +45 -0
- package/src/models.ts +311 -0
- package/src/version.ts +1 -0
package/src/client.ts
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WellMarked client.
|
|
3
|
+
*
|
|
4
|
+
* The client is a thin, typed wrapper around the HTTP API. All endpoint
|
|
5
|
+
* methods are async — there is no separate sync/async split as in the
|
|
6
|
+
* Python SDK because JavaScript I/O is async by default.
|
|
7
|
+
*
|
|
8
|
+
* import { WellMarked } from "wellmarked";
|
|
9
|
+
*
|
|
10
|
+
* const wm = new WellMarked({ apiKey: "wm_..." });
|
|
11
|
+
* const result = await wm.extract("https://example.com/article");
|
|
12
|
+
* console.log(result.markdown);
|
|
13
|
+
*
|
|
14
|
+
* The API key can also be passed via the `WELLMARKED_API_KEY` environment
|
|
15
|
+
* variable (Node.js), in which case `new WellMarked()` is enough.
|
|
16
|
+
*/
|
|
17
|
+
import {
|
|
18
|
+
APIConnectionError,
|
|
19
|
+
APIStatusError,
|
|
20
|
+
WellMarkedError,
|
|
21
|
+
fromResponse,
|
|
22
|
+
} from "./errors.js";
|
|
23
|
+
import {
|
|
24
|
+
type BulkJob,
|
|
25
|
+
type CrawlJob,
|
|
26
|
+
type ExtractResult,
|
|
27
|
+
type RotatedKey,
|
|
28
|
+
type Usage,
|
|
29
|
+
bulkJobFromResponse,
|
|
30
|
+
crawlJobFromResponse,
|
|
31
|
+
extractResultFromResponse,
|
|
32
|
+
rotatedKeyFromResponse,
|
|
33
|
+
usageFromResponse,
|
|
34
|
+
} from "./models.js";
|
|
35
|
+
import { VERSION } from "./version.js";
|
|
36
|
+
|
|
37
|
+
const DEFAULT_BASE_URL = "https://api.wellmarked.io";
|
|
38
|
+
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
39
|
+
|
|
40
|
+
const RESERVED_HEADERS = new Set([
|
|
41
|
+
"authorization",
|
|
42
|
+
"content-type",
|
|
43
|
+
"accept",
|
|
44
|
+
]);
|
|
45
|
+
|
|
46
|
+
export interface WellMarkedOptions {
|
|
47
|
+
/**
|
|
48
|
+
* Your WellMarked API key (`wm_...`). Falls back to the
|
|
49
|
+
* `WELLMARKED_API_KEY` environment variable (Node.js only).
|
|
50
|
+
*/
|
|
51
|
+
apiKey?: string;
|
|
52
|
+
/** API base URL. Override for testing. */
|
|
53
|
+
baseUrl?: string;
|
|
54
|
+
/** Per-request timeout, milliseconds. Defaults to 30000 (30s). */
|
|
55
|
+
timeoutMs?: number;
|
|
56
|
+
/**
|
|
57
|
+
* Bring your own `fetch`. Defaults to the global `fetch`. Useful for
|
|
58
|
+
* polyfills, custom agents/proxies, or test mocking.
|
|
59
|
+
*/
|
|
60
|
+
fetch?: typeof fetch;
|
|
61
|
+
/**
|
|
62
|
+
* Extra headers sent on every request — useful for adding an internal
|
|
63
|
+
* correlation id, a custom user agent suffix, etc.
|
|
64
|
+
*
|
|
65
|
+
* Authorization / Content-Type / Accept are reserved and silently
|
|
66
|
+
* ignored if passed (the SDK manages those itself).
|
|
67
|
+
*/
|
|
68
|
+
headers?: Record<string, string>;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export interface ExtractOptions {
|
|
72
|
+
/**
|
|
73
|
+
* Use Playwright to render JS-heavy pages. Requires a Pro/Enterprise
|
|
74
|
+
* plan AND `ENABLE_JS_RENDERING=true` on the API instance.
|
|
75
|
+
*/
|
|
76
|
+
renderJs?: boolean;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface BulkOptions {
|
|
80
|
+
renderJs?: boolean;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export interface CrawlOptions {
|
|
84
|
+
/** Max BFS depth from the root. Defaults to 1. Must be >= 0. */
|
|
85
|
+
depth?: number;
|
|
86
|
+
renderJs?: boolean;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export interface WaitForJobOptions {
|
|
90
|
+
/** Milliseconds to sleep between polls. Defaults to 2000. */
|
|
91
|
+
pollIntervalMs?: number;
|
|
92
|
+
/** Total ms to wait before timing out. `null` waits forever. Defaults to 300000 (5 min). */
|
|
93
|
+
timeoutMs?: number | null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function resolveApiKey(apiKey: string | undefined): string {
|
|
97
|
+
if (apiKey) return apiKey;
|
|
98
|
+
const env =
|
|
99
|
+
typeof process !== "undefined" && process.env
|
|
100
|
+
? process.env.WELLMARKED_API_KEY
|
|
101
|
+
: undefined;
|
|
102
|
+
if (env) return env;
|
|
103
|
+
throw new Error(
|
|
104
|
+
"No API key provided. Pass apiKey: ... to the client or set the " +
|
|
105
|
+
"WELLMARKED_API_KEY environment variable. Generate a key at " +
|
|
106
|
+
"https://wellmarked.io.",
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function defaultHeaders(apiKey: string): Record<string, string> {
|
|
111
|
+
return {
|
|
112
|
+
Authorization: `Bearer ${apiKey}`,
|
|
113
|
+
"Content-Type": "application/json",
|
|
114
|
+
Accept: "application/json",
|
|
115
|
+
"User-Agent": `wellmarked-js/${VERSION}`,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function mergeHeaders(
|
|
120
|
+
apiKey: string,
|
|
121
|
+
extra: Record<string, string> | undefined,
|
|
122
|
+
): Record<string, string> {
|
|
123
|
+
const out = defaultHeaders(apiKey);
|
|
124
|
+
if (!extra) return out;
|
|
125
|
+
for (const [k, v] of Object.entries(extra)) {
|
|
126
|
+
if (RESERVED_HEADERS.has(k.toLowerCase())) continue;
|
|
127
|
+
out[k] = v;
|
|
128
|
+
}
|
|
129
|
+
return out;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function sleep(ms: number): Promise<void> {
|
|
133
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
interface RequestInitWithSignal {
|
|
137
|
+
method: string;
|
|
138
|
+
headers: Record<string, string>;
|
|
139
|
+
body?: string;
|
|
140
|
+
signal?: AbortSignal;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export class WellMarked {
|
|
144
|
+
private apiKey: string;
|
|
145
|
+
private readonly baseUrl: string;
|
|
146
|
+
private readonly timeoutMs: number;
|
|
147
|
+
private readonly fetchImpl: typeof fetch;
|
|
148
|
+
private readonly extraHeaders: Record<string, string>;
|
|
149
|
+
|
|
150
|
+
constructor(options: WellMarkedOptions = {}) {
|
|
151
|
+
this.apiKey = resolveApiKey(options.apiKey);
|
|
152
|
+
this.baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
|
|
153
|
+
this.timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
154
|
+
const f = options.fetch ?? (typeof fetch !== "undefined" ? fetch : undefined);
|
|
155
|
+
if (!f) {
|
|
156
|
+
throw new Error(
|
|
157
|
+
"No fetch implementation available. Pass `fetch:` to the client " +
|
|
158
|
+
"(undici, node-fetch, etc.) or upgrade to Node 18+.",
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
// Bind so `this` isn't lost when calling globalThis.fetch.
|
|
162
|
+
this.fetchImpl = f.bind(globalThis) as typeof fetch;
|
|
163
|
+
this.extraHeaders = {};
|
|
164
|
+
if (options.headers) {
|
|
165
|
+
for (const [k, v] of Object.entries(options.headers)) {
|
|
166
|
+
if (RESERVED_HEADERS.has(k.toLowerCase())) continue;
|
|
167
|
+
this.extraHeaders[k] = v;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// ── Endpoints ──────────────────────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Extract clean Markdown from a single URL.
|
|
176
|
+
*
|
|
177
|
+
* Throws:
|
|
178
|
+
* - `RateLimitError` — monthly plan limit reached.
|
|
179
|
+
* - `UnprocessableEntityError` — `no_content`, `target_timeout`, or
|
|
180
|
+
* `js_rendering_disabled`.
|
|
181
|
+
* - `AuthenticationError` — missing or invalid API key.
|
|
182
|
+
*/
|
|
183
|
+
async extract(url: string, options: ExtractOptions = {}): Promise<ExtractResult> {
|
|
184
|
+
const body = await this.request("POST", "/extract", {
|
|
185
|
+
url,
|
|
186
|
+
render_js: options.renderJs === true,
|
|
187
|
+
});
|
|
188
|
+
return extractResultFromResponse(body as Record<string, unknown>);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Submit a batch of URLs for concurrent extraction.
|
|
193
|
+
*
|
|
194
|
+
* Returns immediately with `status="queued"`. Poll with `getJob` or
|
|
195
|
+
* block with `waitForJob` to collect results.
|
|
196
|
+
*
|
|
197
|
+
* Throws:
|
|
198
|
+
* - `PermissionDeniedError` — `plan_not_supported` (Free tier).
|
|
199
|
+
* - `UnprocessableEntityError` — `bulk_cap_exceeded` (50 on Pro).
|
|
200
|
+
* - `RateLimitError` — would exceed remaining monthly quota.
|
|
201
|
+
*/
|
|
202
|
+
async bulk(urls: Iterable<string>, options: BulkOptions = {}): Promise<BulkJob> {
|
|
203
|
+
const urlList = Array.from(urls);
|
|
204
|
+
if (urlList.length === 0) {
|
|
205
|
+
throw new Error("bulk() requires at least one URL.");
|
|
206
|
+
}
|
|
207
|
+
const body = await this.request("POST", "/bulk", {
|
|
208
|
+
urls: urlList,
|
|
209
|
+
render_js: options.renderJs === true,
|
|
210
|
+
});
|
|
211
|
+
return bulkJobFromResponse(body as Record<string, unknown>);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Polymorphic job lookup — works for both bulk and crawl jobs.
|
|
216
|
+
*
|
|
217
|
+
* Calls `GET /bulk/{jobId}` first, then inspects the response's `kind`
|
|
218
|
+
* discriminator field. If the job is actually a crawl, a second request
|
|
219
|
+
* to `GET /crawl/{jobId}` fetches the full crawl shape (with per-item
|
|
220
|
+
* depth and the truncated flags). Returns `BulkJob` or `CrawlJob`
|
|
221
|
+
* accordingly.
|
|
222
|
+
*
|
|
223
|
+
* Use `isCrawlJob(job)` (or check `job.kind === "crawl"`) to branch on
|
|
224
|
+
* crawl-specific behavior. The shared interface (`status`, `completed`,
|
|
225
|
+
* `total`, `results`, `done`) works on either type.
|
|
226
|
+
*
|
|
227
|
+
* Jobs are retained for 6 hours after completion.
|
|
228
|
+
*/
|
|
229
|
+
async getJob(jobId: string): Promise<BulkJob | CrawlJob> {
|
|
230
|
+
const body = (await this.request("GET", `/bulk/${jobId}`)) as Record<
|
|
231
|
+
string,
|
|
232
|
+
unknown
|
|
233
|
+
>;
|
|
234
|
+
// /bulk/{id} answers for any jobId today (the endpoint just serializes
|
|
235
|
+
// results in the bulk shape regardless of stored job_type). The `kind`
|
|
236
|
+
// field tells us whether we got a bulk-shaped response of a crawl
|
|
237
|
+
// job; if so, re-fetch via /crawl/{id} for the proper shape.
|
|
238
|
+
if (body.kind === "crawl") {
|
|
239
|
+
const crawlBody = (await this.request("GET", `/crawl/${jobId}`)) as Record<
|
|
240
|
+
string,
|
|
241
|
+
unknown
|
|
242
|
+
>;
|
|
243
|
+
return crawlJobFromResponse(crawlBody);
|
|
244
|
+
}
|
|
245
|
+
return bulkJobFromResponse(body);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Block until a job reaches `status="done"` (or timeout). Works for both
|
|
250
|
+
* bulk and crawl jobs.
|
|
251
|
+
*
|
|
252
|
+
* The first call uses the polymorphic `getJob` to discover the job's
|
|
253
|
+
* kind. Subsequent polls go directly to the typed endpoint, so a crawl
|
|
254
|
+
* job only pays the dispatch round-trip once.
|
|
255
|
+
*
|
|
256
|
+
* Throws:
|
|
257
|
+
* - `Error` with message "did not finish within ..." — the job didn't
|
|
258
|
+
* finish before `timeoutMs` elapsed.
|
|
259
|
+
*/
|
|
260
|
+
async waitForJob(
|
|
261
|
+
jobId: string,
|
|
262
|
+
options: WaitForJobOptions = {},
|
|
263
|
+
): Promise<BulkJob | CrawlJob> {
|
|
264
|
+
const pollIntervalMs = options.pollIntervalMs ?? 2_000;
|
|
265
|
+
const timeoutMs = options.timeoutMs === undefined ? 300_000 : options.timeoutMs;
|
|
266
|
+
const deadline = timeoutMs === null ? null : Date.now() + timeoutMs;
|
|
267
|
+
|
|
268
|
+
let job: BulkJob | CrawlJob = await this.getJob(jobId);
|
|
269
|
+
const isCrawl = job.kind === "crawl";
|
|
270
|
+
|
|
271
|
+
while (!job.done) {
|
|
272
|
+
if (deadline !== null && Date.now() >= deadline) {
|
|
273
|
+
throw new Error(
|
|
274
|
+
`Job ${jobId} did not finish within ${timeoutMs}ms ` +
|
|
275
|
+
`(last status: ${job.status}, ${job.completed}/${job.total})`,
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
await sleep(pollIntervalMs);
|
|
279
|
+
const path = isCrawl ? `/crawl/${jobId}` : `/bulk/${jobId}`;
|
|
280
|
+
const body = (await this.request("GET", path)) as Record<string, unknown>;
|
|
281
|
+
job = isCrawl ? crawlJobFromResponse(body) : bulkJobFromResponse(body);
|
|
282
|
+
}
|
|
283
|
+
return job;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Crawl a site starting from `url`, BFS to `depth`.
|
|
288
|
+
*
|
|
289
|
+
* Returns immediately with `status="queued"`. Use `getJob` to poll, or
|
|
290
|
+
* `waitForJob` to block until done — both handle crawl and bulk jobIds
|
|
291
|
+
* transparently.
|
|
292
|
+
*
|
|
293
|
+
* Plan caps:
|
|
294
|
+
* - Free → `PermissionDeniedError` (`plan_not_supported`)
|
|
295
|
+
* - Pro → max depth 5, up to 1,000 pages per crawl
|
|
296
|
+
* - Enterprise → unlimited depth and pages
|
|
297
|
+
*
|
|
298
|
+
* Throws:
|
|
299
|
+
* - `PermissionDeniedError` — `plan_not_supported` (Free tier).
|
|
300
|
+
* - `UnprocessableEntityError` — `crawl_depth_exceeded`.
|
|
301
|
+
*/
|
|
302
|
+
async crawl(url: string, options: CrawlOptions = {}): Promise<CrawlJob> {
|
|
303
|
+
const depth = options.depth ?? 1;
|
|
304
|
+
if (depth < 0) {
|
|
305
|
+
throw new Error("depth must be >= 0.");
|
|
306
|
+
}
|
|
307
|
+
const body = await this.request("POST", "/crawl", {
|
|
308
|
+
url,
|
|
309
|
+
depth,
|
|
310
|
+
render_js: options.renderJs === true,
|
|
311
|
+
});
|
|
312
|
+
return crawlJobFromResponse(body as Record<string, unknown>);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ── Custom headers ─────────────────────────────────────────────────────────
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Add or replace a per-request header for the rest of this client's life.
|
|
319
|
+
*
|
|
320
|
+
* Authorization / Content-Type / Accept are reserved — calls that try
|
|
321
|
+
* to set those are silently ignored. To rotate the bearer token, use
|
|
322
|
+
* `rotateKey()`.
|
|
323
|
+
*/
|
|
324
|
+
setHeader(name: string, value: string): void {
|
|
325
|
+
if (RESERVED_HEADERS.has(name.toLowerCase())) return;
|
|
326
|
+
this.extraHeaders[name] = value;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/** Remove a header previously added via `headers:` or `setHeader()`. */
|
|
330
|
+
removeHeader(name: string): void {
|
|
331
|
+
delete this.extraHeaders[name];
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Return your usage for the current billing period.
|
|
336
|
+
*
|
|
337
|
+
* Does not count toward your monthly quota.
|
|
338
|
+
*/
|
|
339
|
+
async getUsage(): Promise<Usage> {
|
|
340
|
+
const body = await this.request("GET", "/usage");
|
|
341
|
+
return usageFromResponse(body as Record<string, unknown>);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Mint a new API key. The current key is invalidated immediately.
|
|
346
|
+
*
|
|
347
|
+
* The new raw key is in the returned `apiKey` field — store it before
|
|
348
|
+
* discarding the result. There is no recovery flow.
|
|
349
|
+
*
|
|
350
|
+
* The client auto-swaps to the new key for subsequent requests.
|
|
351
|
+
*
|
|
352
|
+
* Does not count toward your monthly quota.
|
|
353
|
+
*/
|
|
354
|
+
async rotateKey(): Promise<RotatedKey> {
|
|
355
|
+
const body = await this.request("POST", "/keys/rotate");
|
|
356
|
+
const rotated = rotatedKeyFromResponse(body as Record<string, unknown>);
|
|
357
|
+
if (rotated.apiKey) {
|
|
358
|
+
this.apiKey = rotated.apiKey;
|
|
359
|
+
}
|
|
360
|
+
return rotated;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Internal: read the current API key. Exposed for tests.
|
|
365
|
+
* Not part of the public, semver-stable surface.
|
|
366
|
+
*/
|
|
367
|
+
_getApiKey(): string {
|
|
368
|
+
return this.apiKey;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// ── Transport ──────────────────────────────────────────────────────────────
|
|
372
|
+
|
|
373
|
+
private async request(
|
|
374
|
+
method: string,
|
|
375
|
+
path: string,
|
|
376
|
+
json?: unknown,
|
|
377
|
+
): Promise<unknown> {
|
|
378
|
+
const url = `${this.baseUrl}${path}`;
|
|
379
|
+
const headers = mergeHeaders(this.apiKey, this.extraHeaders);
|
|
380
|
+
|
|
381
|
+
const init: RequestInitWithSignal = { method, headers };
|
|
382
|
+
if (json !== undefined) {
|
|
383
|
+
init.body = JSON.stringify(json);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
let controller: AbortController | null = null;
|
|
387
|
+
let timer: ReturnType<typeof setTimeout> | null = null;
|
|
388
|
+
if (this.timeoutMs > 0 && typeof AbortController !== "undefined") {
|
|
389
|
+
controller = new AbortController();
|
|
390
|
+
init.signal = controller.signal;
|
|
391
|
+
timer = setTimeout(() => controller!.abort(), this.timeoutMs);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
let response: Response;
|
|
395
|
+
try {
|
|
396
|
+
response = await this.fetchImpl(url, init as RequestInit);
|
|
397
|
+
} catch (err) {
|
|
398
|
+
throw new APIConnectionError(
|
|
399
|
+
`Could not reach the WellMarked API: ${stringifyError(err)}`,
|
|
400
|
+
{ cause: err },
|
|
401
|
+
);
|
|
402
|
+
} finally {
|
|
403
|
+
if (timer !== null) clearTimeout(timer);
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
let bodyText = "";
|
|
407
|
+
try {
|
|
408
|
+
bodyText = await response.text();
|
|
409
|
+
} catch (err) {
|
|
410
|
+
throw new APIConnectionError(
|
|
411
|
+
`Could not read API response body: ${stringifyError(err)}`,
|
|
412
|
+
{ cause: err },
|
|
413
|
+
);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
let body: unknown = null;
|
|
417
|
+
if (bodyText.length > 0) {
|
|
418
|
+
try {
|
|
419
|
+
body = JSON.parse(bodyText);
|
|
420
|
+
} catch {
|
|
421
|
+
body = null;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return parseResponse(response.status, body);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function parseResponse(statusCode: number, body: unknown): unknown {
|
|
430
|
+
let requestId: string | undefined;
|
|
431
|
+
if (body && typeof body === "object") {
|
|
432
|
+
const rid = (body as { request_id?: unknown }).request_id;
|
|
433
|
+
if (typeof rid === "string") requestId = rid;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (statusCode >= 200 && statusCode < 300) {
|
|
437
|
+
if (body === null) {
|
|
438
|
+
// The API contract says every documented endpoint returns a JSON
|
|
439
|
+
// body on 2xx. A null body means the server broke that contract
|
|
440
|
+
// (or a middlebox stripped it); fail loudly rather than letting
|
|
441
|
+
// downstream parsing crash on `body.foo` of null.
|
|
442
|
+
throw new WellMarkedError(
|
|
443
|
+
`API returned HTTP ${statusCode} with no JSON body. ` +
|
|
444
|
+
"This is a contract violation — please report it.",
|
|
445
|
+
{ statusCode },
|
|
446
|
+
);
|
|
447
|
+
}
|
|
448
|
+
return body;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
throw fromResponse(statusCode, body, requestId);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
function stringifyError(err: unknown): string {
|
|
455
|
+
if (err instanceof Error) {
|
|
456
|
+
return `${err.name}: ${err.message}`;
|
|
457
|
+
}
|
|
458
|
+
return String(err);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// Re-export the APIStatusError type so consumers can narrow without
|
|
462
|
+
// pulling from "./errors" directly.
|
|
463
|
+
export { APIStatusError };
|
package/src/errors.ts
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Exception hierarchy for the WellMarked SDK.
|
|
3
|
+
*
|
|
4
|
+
* Every HTTP error returned by the API is translated into a typed error
|
|
5
|
+
* whose class corresponds to the HTTP status and whose `code` matches the
|
|
6
|
+
* `error.code` field in the response body. Catch `WellMarkedError` for
|
|
7
|
+
* anything raised by the SDK; catch a more specific subclass when you want
|
|
8
|
+
* to handle one failure mode.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export interface WellMarkedErrorOptions {
|
|
12
|
+
code?: string | undefined;
|
|
13
|
+
statusCode?: number | undefined;
|
|
14
|
+
retryAfter?: number | undefined;
|
|
15
|
+
requestId?: string | undefined;
|
|
16
|
+
cause?: unknown;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class WellMarkedError extends Error {
|
|
20
|
+
readonly code: string | undefined;
|
|
21
|
+
readonly statusCode: number | undefined;
|
|
22
|
+
readonly retryAfter: number | undefined;
|
|
23
|
+
readonly requestId: string | undefined;
|
|
24
|
+
|
|
25
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
26
|
+
super(message);
|
|
27
|
+
this.name = "WellMarkedError";
|
|
28
|
+
this.code = options.code;
|
|
29
|
+
this.statusCode = options.statusCode;
|
|
30
|
+
this.retryAfter = options.retryAfter;
|
|
31
|
+
this.requestId = options.requestId;
|
|
32
|
+
if (options.cause !== undefined) {
|
|
33
|
+
(this as { cause?: unknown }).cause = options.cause;
|
|
34
|
+
}
|
|
35
|
+
// Maintain proper prototype chain when transpiled to ES5.
|
|
36
|
+
Object.setPrototypeOf(this, new.target.prototype);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Raised when the SDK couldn't reach the API (DNS, TCP, TLS, timeout). */
|
|
41
|
+
export class APIConnectionError extends WellMarkedError {
|
|
42
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
43
|
+
super(message, options);
|
|
44
|
+
this.name = "APIConnectionError";
|
|
45
|
+
Object.setPrototypeOf(this, APIConnectionError.prototype);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/** Raised for any non-2xx response from the API. */
|
|
50
|
+
export class APIStatusError extends WellMarkedError {
|
|
51
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
52
|
+
super(message, options);
|
|
53
|
+
this.name = "APIStatusError";
|
|
54
|
+
Object.setPrototypeOf(this, APIStatusError.prototype);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** 401 — missing or invalid API key. */
|
|
59
|
+
export class AuthenticationError extends APIStatusError {
|
|
60
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
61
|
+
super(message, options);
|
|
62
|
+
this.name = "AuthenticationError";
|
|
63
|
+
Object.setPrototypeOf(this, AuthenticationError.prototype);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** 403 — account inactive, plan does not allow this operation, or job belongs to another user. */
|
|
68
|
+
export class PermissionDeniedError extends APIStatusError {
|
|
69
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
70
|
+
super(message, options);
|
|
71
|
+
this.name = "PermissionDeniedError";
|
|
72
|
+
Object.setPrototypeOf(this, PermissionDeniedError.prototype);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** 404 — job not found or expired past the 6-hour retention window. */
|
|
77
|
+
export class NotFoundError extends APIStatusError {
|
|
78
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
79
|
+
super(message, options);
|
|
80
|
+
this.name = "NotFoundError";
|
|
81
|
+
Object.setPrototypeOf(this, NotFoundError.prototype);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* 422 — request was syntactically valid but couldn't be fulfilled.
|
|
87
|
+
*
|
|
88
|
+
* Common `code` values:
|
|
89
|
+
* - `no_content` — could not identify main content on the page
|
|
90
|
+
* - `target_timeout` — the target URL timed out
|
|
91
|
+
* - `js_rendering_disabled` — `renderJs=true` but the server has it off
|
|
92
|
+
* - `bulk_cap_exceeded` — more URLs than the plan allows per request
|
|
93
|
+
* - `crawl_depth_exceeded` — requested depth above the plan cap
|
|
94
|
+
*/
|
|
95
|
+
export class UnprocessableEntityError extends APIStatusError {
|
|
96
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
97
|
+
super(message, options);
|
|
98
|
+
this.name = "UnprocessableEntityError";
|
|
99
|
+
Object.setPrototypeOf(this, UnprocessableEntityError.prototype);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** 429 — monthly plan limit reached. `retryAfter` is the number of seconds until reset. */
|
|
104
|
+
export class RateLimitError extends APIStatusError {
|
|
105
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
106
|
+
super(message, options);
|
|
107
|
+
this.name = "RateLimitError";
|
|
108
|
+
Object.setPrototypeOf(this, RateLimitError.prototype);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/** 5xx — something went wrong on the API side. */
|
|
113
|
+
export class InternalServerError extends APIStatusError {
|
|
114
|
+
constructor(message: string, options: WellMarkedErrorOptions = {}) {
|
|
115
|
+
super(message, options);
|
|
116
|
+
this.name = "InternalServerError";
|
|
117
|
+
Object.setPrototypeOf(this, InternalServerError.prototype);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
type APIStatusErrorCtor = new (
|
|
122
|
+
message: string,
|
|
123
|
+
options?: WellMarkedErrorOptions,
|
|
124
|
+
) => APIStatusError;
|
|
125
|
+
|
|
126
|
+
const STATUS_TO_EXC: Record<number, APIStatusErrorCtor> = {
|
|
127
|
+
401: AuthenticationError,
|
|
128
|
+
403: PermissionDeniedError,
|
|
129
|
+
404: NotFoundError,
|
|
130
|
+
422: UnprocessableEntityError,
|
|
131
|
+
429: RateLimitError,
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
/** Build the right error subclass for a given HTTP status + JSON body. */
|
|
135
|
+
export function fromResponse(
|
|
136
|
+
statusCode: number,
|
|
137
|
+
body: unknown,
|
|
138
|
+
requestId?: string,
|
|
139
|
+
): APIStatusError {
|
|
140
|
+
let code: string | undefined;
|
|
141
|
+
let message = `HTTP ${statusCode}`;
|
|
142
|
+
let retryAfter: number | undefined;
|
|
143
|
+
|
|
144
|
+
if (body && typeof body === "object" && "error" in body) {
|
|
145
|
+
const err = (body as { error?: unknown }).error;
|
|
146
|
+
if (err && typeof err === "object") {
|
|
147
|
+
const e = err as Record<string, unknown>;
|
|
148
|
+
if (typeof e.code === "string") code = e.code;
|
|
149
|
+
if (typeof e.message === "string") message = e.message;
|
|
150
|
+
if (typeof e.retry_after === "number") retryAfter = e.retry_after;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
let Ctor: APIStatusErrorCtor;
|
|
155
|
+
if (statusCode >= 500) {
|
|
156
|
+
Ctor = InternalServerError;
|
|
157
|
+
} else {
|
|
158
|
+
Ctor = STATUS_TO_EXC[statusCode] ?? APIStatusError;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return new Ctor(message, { code, statusCode, retryAfter, requestId });
|
|
162
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Official JavaScript/TypeScript SDK for the WellMarked API.
|
|
3
|
+
*
|
|
4
|
+
* import { WellMarked } from "wellmarked";
|
|
5
|
+
*
|
|
6
|
+
* const wm = new WellMarked({ apiKey: "wm_..." });
|
|
7
|
+
* const result = await wm.extract("https://example.com/article");
|
|
8
|
+
* console.log(result.markdown);
|
|
9
|
+
*
|
|
10
|
+
* See https://wellmarked.io/docs for the full API reference.
|
|
11
|
+
*/
|
|
12
|
+
export { VERSION } from "./version.js";
|
|
13
|
+
export {
|
|
14
|
+
WellMarked,
|
|
15
|
+
type WellMarkedOptions,
|
|
16
|
+
type ExtractOptions,
|
|
17
|
+
type BulkOptions,
|
|
18
|
+
type CrawlOptions,
|
|
19
|
+
type WaitForJobOptions,
|
|
20
|
+
} from "./client.js";
|
|
21
|
+
export {
|
|
22
|
+
type BulkItem,
|
|
23
|
+
type BulkJob,
|
|
24
|
+
type CrawlItem,
|
|
25
|
+
type CrawlJob,
|
|
26
|
+
type ExtractionMeta,
|
|
27
|
+
type ExtractResult,
|
|
28
|
+
type JobStatus,
|
|
29
|
+
type RotatedKey,
|
|
30
|
+
type TruncatedReason,
|
|
31
|
+
type Usage,
|
|
32
|
+
isBulkJob,
|
|
33
|
+
isCrawlJob,
|
|
34
|
+
} from "./models.js";
|
|
35
|
+
export {
|
|
36
|
+
APIConnectionError,
|
|
37
|
+
APIStatusError,
|
|
38
|
+
AuthenticationError,
|
|
39
|
+
InternalServerError,
|
|
40
|
+
NotFoundError,
|
|
41
|
+
PermissionDeniedError,
|
|
42
|
+
RateLimitError,
|
|
43
|
+
UnprocessableEntityError,
|
|
44
|
+
WellMarkedError,
|
|
45
|
+
} from "./errors.js";
|