wellmarked 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +233 -0
- package/dist/cjs/client.cjs +332 -0
- package/dist/cjs/client.d.ts +173 -0
- package/dist/cjs/errors.cjs +143 -0
- package/dist/cjs/errors.d.ts +66 -0
- package/dist/cjs/index.cjs +31 -0
- package/dist/cjs/index.d.ts +15 -0
- package/dist/cjs/models.cjs +166 -0
- package/dist/cjs/models.d.ts +141 -0
- package/dist/cjs/package.json +3 -0
- package/dist/cjs/version.cjs +4 -0
- package/dist/cjs/version.d.ts +1 -0
- package/dist/esm/client.d.ts +173 -0
- package/dist/esm/client.js +330 -0
- package/dist/esm/errors.d.ts +66 -0
- package/dist/esm/errors.js +130 -0
- package/dist/esm/index.d.ts +15 -0
- package/dist/esm/index.js +15 -0
- package/dist/esm/models.d.ts +141 -0
- package/dist/esm/models.js +154 -0
- package/dist/esm/version.d.ts +1 -0
- package/dist/esm/version.js +1 -0
- package/dist/types/client.d.ts +173 -0
- package/dist/types/errors.d.ts +66 -0
- package/dist/types/index.d.ts +15 -0
- package/dist/types/models.d.ts +141 -0
- package/dist/types/version.d.ts +1 -0
- package/package.json +73 -0
- package/src/client.ts +463 -0
- package/src/errors.ts +162 -0
- package/src/index.ts +45 -0
- package/src/models.ts +311 -0
- package/src/version.ts +1 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 WellMarked
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# wellmarked
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/wellmarked)
|
|
4
|
+
[](https://www.npmjs.com/package/wellmarked)
|
|
5
|
+
|
|
6
|
+
Official JavaScript/TypeScript SDK for the **[WellMarked](https://wellmarked.io)** API — convert any URL to clean Markdown.
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
npm install wellmarked
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Requires Node.js 18+ (uses the built-in `fetch`). Works in any modern runtime with a global `fetch` — Node 18+, Deno, Bun, Cloudflare Workers, Vercel Edge, browsers.
|
|
13
|
+
|
|
14
|
+
## Quick start
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
import { WellMarked } from "wellmarked";
|
|
18
|
+
|
|
19
|
+
const wm = new WellMarked({ apiKey: "wm_..." });
|
|
20
|
+
|
|
21
|
+
const result = await wm.extract("https://example.com/article");
|
|
22
|
+
console.log(result.markdown);
|
|
23
|
+
console.log(result.metadata.title, "by", result.metadata.author);
|
|
24
|
+
console.log("retrieved at", result.metadata.retrievedAt);
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
`result.metadata.retrievedAt` is a `Date` (UTC) recording when WellMarked actually fetched the page — distinct from `result.metadata.date` (the article's published date, often `null`). Useful for cache-freshness checks on the caller's side.
|
|
28
|
+
|
|
29
|
+
The API key can also be picked up from the `WELLMARKED_API_KEY` environment variable, in which case `new WellMarked()` is enough.
|
|
30
|
+
|
|
31
|
+
Get a key at [wellmarked.io](https://wellmarked.io).
|
|
32
|
+
|
|
33
|
+
## Pricing
|
|
34
|
+
|
|
35
|
+
| | Free | Pro | Enterprise |
|
|
36
|
+
|-----------------------|-----------|----------------------|---------------|
|
|
37
|
+
| **Monthly Price** | $0 | $29/mo | $199/mo |
|
|
38
|
+
| **Annual Price** | — | $299/yr | $1,999/yr |
|
|
39
|
+
| **Included Requests** | 500/mo | 7,500/mo | 150,000/mo |
|
|
40
|
+
| **Bulk Requests** | ❌ | ✅ (up to 50/request) | ✅ (Unlimited) |
|
|
41
|
+
| **Crawl** | ❌ | ✅ (depth 5, 1k pages)| ✅ (Unlimited) |
|
|
42
|
+
| **Overage Rate** | — | $0.004/req | $0.002/req |
|
|
43
|
+
| **JS Rendering** | ❌ | ✅ | ✅ |
|
|
44
|
+
| **Priority Queue** | Standard | High | Highest |
|
|
45
|
+
|
|
46
|
+
See additional pricing information at [wellmarked.io/#pricing](https://wellmarked.io/#pricing).
|
|
47
|
+
|
|
48
|
+
## CommonJS
|
|
49
|
+
|
|
50
|
+
Both ESM and CommonJS are published. The package's `exports` field routes consumers automatically:
|
|
51
|
+
|
|
52
|
+
```javascript
|
|
53
|
+
const { WellMarked } = require("wellmarked");
|
|
54
|
+
|
|
55
|
+
const wm = new WellMarked({ apiKey: "wm_..." });
|
|
56
|
+
const result = await wm.extract("https://example.com/article");
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Bulk extraction
|
|
60
|
+
|
|
61
|
+
Submit many URLs at once (Pro: up to 50; Enterprise: unlimited). The call returns immediately with a `jobId`. Poll with `getJob` or block until done with `waitForJob`.
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
let job = await wm.bulk([
|
|
65
|
+
"https://example.com/article-1",
|
|
66
|
+
"https://example.com/article-2",
|
|
67
|
+
]);
|
|
68
|
+
job = await wm.waitForJob(job.jobId); // resolves when status === "done"
|
|
69
|
+
|
|
70
|
+
for (const item of job.results) {
|
|
71
|
+
if (item.ok) {
|
|
72
|
+
console.log(item.metadata!.title);
|
|
73
|
+
} else {
|
|
74
|
+
console.log(`${item.url} failed: ${item.error}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
`getJob` and `waitForJob` are **polymorphic** — they work for both bulk and crawl `jobId`s. The SDK reads a `kind` discriminator from the API response and returns either a `BulkJob` or a `CrawlJob`. Use the `isCrawlJob(job)` type guard (or check `job.kind === "crawl"`) before reading crawl-specific fields like `job.truncated` or `item.depth`.
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
import { isCrawlJob } from "wellmarked";
|
|
83
|
+
|
|
84
|
+
const job = await wm.waitForJob(someJobId);
|
|
85
|
+
if (isCrawlJob(job)) {
|
|
86
|
+
console.log("truncated:", job.truncated, job.truncatedReason);
|
|
87
|
+
}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Crawl
|
|
91
|
+
|
|
92
|
+
Crawl a site BFS-style from a root URL — same-site links only, with per-plan depth and page caps (Pro: depth 5, up to 1,000 pages; Enterprise: unlimited). Like `bulk`, this returns a queued job; poll with `getJob` or block until done with `waitForJob` — the same two methods work on both kinds.
|
|
93
|
+
|
|
94
|
+
```typescript
|
|
95
|
+
let job = await wm.crawl("https://docs.example.com", { depth: 2 });
|
|
96
|
+
job = await wm.waitForJob(job.jobId); // works for crawl AND bulk jobIds
|
|
97
|
+
|
|
98
|
+
for (const page of job.results) {
|
|
99
|
+
if (page.ok) {
|
|
100
|
+
console.log(`depth=${page.depth} ${page.metadata!.title}`);
|
|
101
|
+
} else {
|
|
102
|
+
console.log(`${page.url} failed: ${page.error}`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (job.kind === "crawl" && job.truncated) {
|
|
107
|
+
console.log(`crawl stopped early: ${job.truncatedReason}`);
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Each successful page consumes one request from your monthly quota — failed pages (timeouts, robots-disallowed, no-content) are not billed. If you run out of quota mid-crawl the job finishes with `truncated: true`, `truncatedReason: "quota_exhausted"`.
|
|
112
|
+
|
|
113
|
+
## Custom headers
|
|
114
|
+
|
|
115
|
+
Pass extra HTTP headers on every request — useful for correlation IDs, multi-tenant identifiers, or a custom user-agent suffix:
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
const wm = new WellMarked({
|
|
119
|
+
apiKey: "wm_...",
|
|
120
|
+
headers: { "X-Trace-Id": "req-abc-123", "X-Tenant": "acme" },
|
|
121
|
+
});
|
|
122
|
+
await wm.extract("https://example.com");
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Headers can also be added or removed at runtime:
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
wm.setHeader("X-Run-Id", "run-99");
|
|
129
|
+
await wm.extract(/* ... */); // carries X-Run-Id
|
|
130
|
+
wm.removeHeader("X-Run-Id");
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
`Authorization`, `Content-Type`, and `Accept` are reserved — the SDK manages them itself, and entries passed in `headers:` for those keys are silently ignored. To rotate the bearer token, use `rotateKey()`.
|
|
134
|
+
|
|
135
|
+
## Usage & rate limits
|
|
136
|
+
|
|
137
|
+
`getUsage()` is the source of truth for your current-period quota. The quota state belongs on the account, so call `getUsage()` when you want it:
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
const usage = await wm.getUsage();
|
|
141
|
+
console.log(
|
|
142
|
+
`${usage.used} / ${usage.limit} used this period (${usage.plan}) — ${usage.remaining} left`,
|
|
143
|
+
);
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
`GET /usage` itself does not count toward your quota.
|
|
147
|
+
|
|
148
|
+
## Key rotation
|
|
149
|
+
|
|
150
|
+
```typescript
|
|
151
|
+
const rotated = await wm.rotateKey();
|
|
152
|
+
console.log("New key:", rotated.apiKey); // shown once — store it before the program exits
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
After `rotateKey()` the client automatically switches to the new key for subsequent calls; you still need to persist `rotated.apiKey` somewhere durable, because the previous key stops working immediately and there is no recovery flow.
|
|
156
|
+
|
|
157
|
+
## Errors
|
|
158
|
+
|
|
159
|
+
Every non-2xx response is translated into a typed error class. Catch the base class to handle anything, or the specific subclass to handle one failure mode:
|
|
160
|
+
|
|
161
|
+
```typescript
|
|
162
|
+
import {
|
|
163
|
+
WellMarked,
|
|
164
|
+
AuthenticationError,
|
|
165
|
+
PermissionDeniedError,
|
|
166
|
+
NotFoundError,
|
|
167
|
+
UnprocessableEntityError,
|
|
168
|
+
RateLimitError,
|
|
169
|
+
APIConnectionError,
|
|
170
|
+
} from "wellmarked";
|
|
171
|
+
|
|
172
|
+
const wm = new WellMarked();
|
|
173
|
+
try {
|
|
174
|
+
await wm.extract("https://example.com/paywalled");
|
|
175
|
+
} catch (err) {
|
|
176
|
+
if (err instanceof RateLimitError) {
|
|
177
|
+
console.log(`Quota hit. Resets in ${err.retryAfter}s.`);
|
|
178
|
+
} else if (err instanceof UnprocessableEntityError) {
|
|
179
|
+
// err.code is one of: no_content, target_timeout, js_rendering_disabled, ...
|
|
180
|
+
console.log(`Extraction failed (${err.code}): ${err.message}`);
|
|
181
|
+
} else {
|
|
182
|
+
throw err;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
| Error class | HTTP | Typical `code` values |
|
|
188
|
+
|----------------------------|------|------------------------------------------------------------------------------------------------------|
|
|
189
|
+
| `AuthenticationError` | 401 | `missing_api_key`, `invalid_api_key` |
|
|
190
|
+
| `PermissionDeniedError` | 403 | `account_inactive`, `plan_not_supported`, `forbidden` |
|
|
191
|
+
| `NotFoundError` | 404 | `job_not_found` |
|
|
192
|
+
| `UnprocessableEntityError` | 422 | `no_content`, `target_timeout`, `js_rendering_disabled`, `bulk_cap_exceeded`, `crawl_depth_exceeded` |
|
|
193
|
+
| `RateLimitError` | 429 | `rate_limit_exceeded` *(carries `retryAfter` in seconds)* |
|
|
194
|
+
| `InternalServerError` | 5xx | — |
|
|
195
|
+
| `APIConnectionError` | — | DNS / TCP / TLS / timeout failures, raised before any HTTP round-trip |
|
|
196
|
+
|
|
197
|
+
All inherit from `WellMarkedError`.
|
|
198
|
+
|
|
199
|
+
## Configuration
|
|
200
|
+
|
|
201
|
+
```typescript
|
|
202
|
+
new WellMarked({
|
|
203
|
+
apiKey: "wm_...", // or set WELLMARKED_API_KEY
|
|
204
|
+
baseUrl: "https://api.wellmarked.io",
|
|
205
|
+
timeoutMs: 30_000, // per request, default 30s
|
|
206
|
+
fetch: customFetch, // optional: bring your own fetch
|
|
207
|
+
headers: { "X-Trace-Id": "..." }, // optional: extra headers on every request
|
|
208
|
+
});
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Passing your own `fetch` is useful for custom proxies, polyfills (e.g. `undici` with a custom dispatcher), or test mocking. Any function with the standard `fetch` signature works.
|
|
212
|
+
|
|
213
|
+
## TypeScript
|
|
214
|
+
|
|
215
|
+
Types are bundled. Every public model and error class is exported. The polymorphic `getJob` / `waitForJob` return `BulkJob | CrawlJob` — use the `kind` discriminator or the `isBulkJob`/`isCrawlJob` type guards to narrow.
|
|
216
|
+
|
|
217
|
+
```typescript
|
|
218
|
+
import type {
|
|
219
|
+
BulkJob,
|
|
220
|
+
CrawlJob,
|
|
221
|
+
ExtractResult,
|
|
222
|
+
ExtractionMeta,
|
|
223
|
+
Usage,
|
|
224
|
+
} from "wellmarked";
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## For Agents
|
|
228
|
+
|
|
229
|
+
If you are an agent, feel free to find additional context [here](https://wellmarked.io/llms.txt)!
|
|
230
|
+
|
|
231
|
+
## License
|
|
232
|
+
|
|
233
|
+
MIT
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.APIStatusError = exports.WellMarked = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* WellMarked client.
|
|
6
|
+
*
|
|
7
|
+
* The client is a thin, typed wrapper around the HTTP API. All endpoint
|
|
8
|
+
* methods are async — there is no separate sync/async split as in the
|
|
9
|
+
* Python SDK because JavaScript I/O is async by default.
|
|
10
|
+
*
|
|
11
|
+
* import { WellMarked } from "wellmarked";
|
|
12
|
+
*
|
|
13
|
+
* const wm = new WellMarked({ apiKey: "wm_..." });
|
|
14
|
+
* const result = await wm.extract("https://example.com/article");
|
|
15
|
+
* console.log(result.markdown);
|
|
16
|
+
*
|
|
17
|
+
* The API key can also be passed via the `WELLMARKED_API_KEY` environment
|
|
18
|
+
* variable (Node.js), in which case `new WellMarked()` is enough.
|
|
19
|
+
*/
|
|
20
|
+
const errors_js_1 = require("./errors.cjs");
|
|
21
|
+
Object.defineProperty(exports, "APIStatusError", { enumerable: true, get: function () { return errors_js_1.APIStatusError; } });
|
|
22
|
+
const models_js_1 = require("./models.cjs");
|
|
23
|
+
const version_js_1 = require("./version.cjs");
|
|
24
|
+
const DEFAULT_BASE_URL = "https://api.wellmarked.io";
|
|
25
|
+
const DEFAULT_TIMEOUT_MS = 30000;
|
|
26
|
+
const RESERVED_HEADERS = new Set([
|
|
27
|
+
"authorization",
|
|
28
|
+
"content-type",
|
|
29
|
+
"accept",
|
|
30
|
+
]);
|
|
31
|
+
function resolveApiKey(apiKey) {
|
|
32
|
+
if (apiKey)
|
|
33
|
+
return apiKey;
|
|
34
|
+
const env = typeof process !== "undefined" && process.env
|
|
35
|
+
? process.env.WELLMARKED_API_KEY
|
|
36
|
+
: undefined;
|
|
37
|
+
if (env)
|
|
38
|
+
return env;
|
|
39
|
+
throw new Error("No API key provided. Pass apiKey: ... to the client or set the " +
|
|
40
|
+
"WELLMARKED_API_KEY environment variable. Generate a key at " +
|
|
41
|
+
"https://wellmarked.io.");
|
|
42
|
+
}
|
|
43
|
+
function defaultHeaders(apiKey) {
|
|
44
|
+
return {
|
|
45
|
+
Authorization: `Bearer ${apiKey}`,
|
|
46
|
+
"Content-Type": "application/json",
|
|
47
|
+
Accept: "application/json",
|
|
48
|
+
"User-Agent": `wellmarked-js/${version_js_1.VERSION}`,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
function mergeHeaders(apiKey, extra) {
|
|
52
|
+
const out = defaultHeaders(apiKey);
|
|
53
|
+
if (!extra)
|
|
54
|
+
return out;
|
|
55
|
+
for (const [k, v] of Object.entries(extra)) {
|
|
56
|
+
if (RESERVED_HEADERS.has(k.toLowerCase()))
|
|
57
|
+
continue;
|
|
58
|
+
out[k] = v;
|
|
59
|
+
}
|
|
60
|
+
return out;
|
|
61
|
+
}
|
|
62
|
+
function sleep(ms) {
|
|
63
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
64
|
+
}
|
|
65
|
+
class WellMarked {
|
|
66
|
+
constructor(options = {}) {
|
|
67
|
+
this.apiKey = resolveApiKey(options.apiKey);
|
|
68
|
+
this.baseUrl = (options.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
|
|
69
|
+
this.timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
70
|
+
const f = options.fetch ?? (typeof fetch !== "undefined" ? fetch : undefined);
|
|
71
|
+
if (!f) {
|
|
72
|
+
throw new Error("No fetch implementation available. Pass `fetch:` to the client " +
|
|
73
|
+
"(undici, node-fetch, etc.) or upgrade to Node 18+.");
|
|
74
|
+
}
|
|
75
|
+
// Bind so `this` isn't lost when calling globalThis.fetch.
|
|
76
|
+
this.fetchImpl = f.bind(globalThis);
|
|
77
|
+
this.extraHeaders = {};
|
|
78
|
+
if (options.headers) {
|
|
79
|
+
for (const [k, v] of Object.entries(options.headers)) {
|
|
80
|
+
if (RESERVED_HEADERS.has(k.toLowerCase()))
|
|
81
|
+
continue;
|
|
82
|
+
this.extraHeaders[k] = v;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// ── Endpoints ──────────────────────────────────────────────────────────────
|
|
87
|
+
/**
|
|
88
|
+
* Extract clean Markdown from a single URL.
|
|
89
|
+
*
|
|
90
|
+
* Throws:
|
|
91
|
+
* - `RateLimitError` — monthly plan limit reached.
|
|
92
|
+
* - `UnprocessableEntityError` — `no_content`, `target_timeout`, or
|
|
93
|
+
* `js_rendering_disabled`.
|
|
94
|
+
* - `AuthenticationError` — missing or invalid API key.
|
|
95
|
+
*/
|
|
96
|
+
async extract(url, options = {}) {
|
|
97
|
+
const body = await this.request("POST", "/extract", {
|
|
98
|
+
url,
|
|
99
|
+
render_js: options.renderJs === true,
|
|
100
|
+
});
|
|
101
|
+
return (0, models_js_1.extractResultFromResponse)(body);
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Submit a batch of URLs for concurrent extraction.
|
|
105
|
+
*
|
|
106
|
+
* Returns immediately with `status="queued"`. Poll with `getJob` or
|
|
107
|
+
* block with `waitForJob` to collect results.
|
|
108
|
+
*
|
|
109
|
+
* Throws:
|
|
110
|
+
* - `PermissionDeniedError` — `plan_not_supported` (Free tier).
|
|
111
|
+
* - `UnprocessableEntityError` — `bulk_cap_exceeded` (50 on Pro).
|
|
112
|
+
* - `RateLimitError` — would exceed remaining monthly quota.
|
|
113
|
+
*/
|
|
114
|
+
async bulk(urls, options = {}) {
|
|
115
|
+
const urlList = Array.from(urls);
|
|
116
|
+
if (urlList.length === 0) {
|
|
117
|
+
throw new Error("bulk() requires at least one URL.");
|
|
118
|
+
}
|
|
119
|
+
const body = await this.request("POST", "/bulk", {
|
|
120
|
+
urls: urlList,
|
|
121
|
+
render_js: options.renderJs === true,
|
|
122
|
+
});
|
|
123
|
+
return (0, models_js_1.bulkJobFromResponse)(body);
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Polymorphic job lookup — works for both bulk and crawl jobs.
|
|
127
|
+
*
|
|
128
|
+
* Calls `GET /bulk/{jobId}` first, then inspects the response's `kind`
|
|
129
|
+
* discriminator field. If the job is actually a crawl, a second request
|
|
130
|
+
* to `GET /crawl/{jobId}` fetches the full crawl shape (with per-item
|
|
131
|
+
* depth and the truncated flags). Returns `BulkJob` or `CrawlJob`
|
|
132
|
+
* accordingly.
|
|
133
|
+
*
|
|
134
|
+
* Use `isCrawlJob(job)` (or check `job.kind === "crawl"`) to branch on
|
|
135
|
+
* crawl-specific behavior. The shared interface (`status`, `completed`,
|
|
136
|
+
* `total`, `results`, `done`) works on either type.
|
|
137
|
+
*
|
|
138
|
+
* Jobs are retained for 6 hours after completion.
|
|
139
|
+
*/
|
|
140
|
+
async getJob(jobId) {
|
|
141
|
+
const body = (await this.request("GET", `/bulk/${jobId}`));
|
|
142
|
+
// /bulk/{id} answers for any jobId today (the endpoint just serializes
|
|
143
|
+
// results in the bulk shape regardless of stored job_type). The `kind`
|
|
144
|
+
// field tells us whether we got a bulk-shaped response of a crawl
|
|
145
|
+
// job; if so, re-fetch via /crawl/{id} for the proper shape.
|
|
146
|
+
if (body.kind === "crawl") {
|
|
147
|
+
const crawlBody = (await this.request("GET", `/crawl/${jobId}`));
|
|
148
|
+
return (0, models_js_1.crawlJobFromResponse)(crawlBody);
|
|
149
|
+
}
|
|
150
|
+
return (0, models_js_1.bulkJobFromResponse)(body);
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Block until a job reaches `status="done"` (or timeout). Works for both
|
|
154
|
+
* bulk and crawl jobs.
|
|
155
|
+
*
|
|
156
|
+
* The first call uses the polymorphic `getJob` to discover the job's
|
|
157
|
+
* kind. Subsequent polls go directly to the typed endpoint, so a crawl
|
|
158
|
+
* job only pays the dispatch round-trip once.
|
|
159
|
+
*
|
|
160
|
+
* Throws:
|
|
161
|
+
* - `Error` with message "did not finish within ..." — the job didn't
|
|
162
|
+
* finish before `timeoutMs` elapsed.
|
|
163
|
+
*/
|
|
164
|
+
async waitForJob(jobId, options = {}) {
|
|
165
|
+
const pollIntervalMs = options.pollIntervalMs ?? 2000;
|
|
166
|
+
const timeoutMs = options.timeoutMs === undefined ? 300000 : options.timeoutMs;
|
|
167
|
+
const deadline = timeoutMs === null ? null : Date.now() + timeoutMs;
|
|
168
|
+
let job = await this.getJob(jobId);
|
|
169
|
+
const isCrawl = job.kind === "crawl";
|
|
170
|
+
while (!job.done) {
|
|
171
|
+
if (deadline !== null && Date.now() >= deadline) {
|
|
172
|
+
throw new Error(`Job ${jobId} did not finish within ${timeoutMs}ms ` +
|
|
173
|
+
`(last status: ${job.status}, ${job.completed}/${job.total})`);
|
|
174
|
+
}
|
|
175
|
+
await sleep(pollIntervalMs);
|
|
176
|
+
const path = isCrawl ? `/crawl/${jobId}` : `/bulk/${jobId}`;
|
|
177
|
+
const body = (await this.request("GET", path));
|
|
178
|
+
job = isCrawl ? (0, models_js_1.crawlJobFromResponse)(body) : (0, models_js_1.bulkJobFromResponse)(body);
|
|
179
|
+
}
|
|
180
|
+
return job;
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Crawl a site starting from `url`, BFS to `depth`.
|
|
184
|
+
*
|
|
185
|
+
* Returns immediately with `status="queued"`. Use `getJob` to poll, or
|
|
186
|
+
* `waitForJob` to block until done — both handle crawl and bulk jobIds
|
|
187
|
+
* transparently.
|
|
188
|
+
*
|
|
189
|
+
* Plan caps:
|
|
190
|
+
* - Free → `PermissionDeniedError` (`plan_not_supported`)
|
|
191
|
+
* - Pro → max depth 5, up to 1,000 pages per crawl
|
|
192
|
+
* - Enterprise → unlimited depth and pages
|
|
193
|
+
*
|
|
194
|
+
* Throws:
|
|
195
|
+
* - `PermissionDeniedError` — `plan_not_supported` (Free tier).
|
|
196
|
+
* - `UnprocessableEntityError` — `crawl_depth_exceeded`.
|
|
197
|
+
*/
|
|
198
|
+
async crawl(url, options = {}) {
|
|
199
|
+
const depth = options.depth ?? 1;
|
|
200
|
+
if (depth < 0) {
|
|
201
|
+
throw new Error("depth must be >= 0.");
|
|
202
|
+
}
|
|
203
|
+
const body = await this.request("POST", "/crawl", {
|
|
204
|
+
url,
|
|
205
|
+
depth,
|
|
206
|
+
render_js: options.renderJs === true,
|
|
207
|
+
});
|
|
208
|
+
return (0, models_js_1.crawlJobFromResponse)(body);
|
|
209
|
+
}
|
|
210
|
+
// ── Custom headers ─────────────────────────────────────────────────────────
|
|
211
|
+
/**
|
|
212
|
+
* Add or replace a per-request header for the rest of this client's life.
|
|
213
|
+
*
|
|
214
|
+
* Authorization / Content-Type / Accept are reserved — calls that try
|
|
215
|
+
* to set those are silently ignored. To rotate the bearer token, use
|
|
216
|
+
* `rotateKey()`.
|
|
217
|
+
*/
|
|
218
|
+
setHeader(name, value) {
|
|
219
|
+
if (RESERVED_HEADERS.has(name.toLowerCase()))
|
|
220
|
+
return;
|
|
221
|
+
this.extraHeaders[name] = value;
|
|
222
|
+
}
|
|
223
|
+
/** Remove a header previously added via `headers:` or `setHeader()`. */
|
|
224
|
+
removeHeader(name) {
|
|
225
|
+
delete this.extraHeaders[name];
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Return your usage for the current billing period.
|
|
229
|
+
*
|
|
230
|
+
* Does not count toward your monthly quota.
|
|
231
|
+
*/
|
|
232
|
+
async getUsage() {
|
|
233
|
+
const body = await this.request("GET", "/usage");
|
|
234
|
+
return (0, models_js_1.usageFromResponse)(body);
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Mint a new API key. The current key is invalidated immediately.
|
|
238
|
+
*
|
|
239
|
+
* The new raw key is in the returned `apiKey` field — store it before
|
|
240
|
+
* discarding the result. There is no recovery flow.
|
|
241
|
+
*
|
|
242
|
+
* The client auto-swaps to the new key for subsequent requests.
|
|
243
|
+
*
|
|
244
|
+
* Does not count toward your monthly quota.
|
|
245
|
+
*/
|
|
246
|
+
async rotateKey() {
|
|
247
|
+
const body = await this.request("POST", "/keys/rotate");
|
|
248
|
+
const rotated = (0, models_js_1.rotatedKeyFromResponse)(body);
|
|
249
|
+
if (rotated.apiKey) {
|
|
250
|
+
this.apiKey = rotated.apiKey;
|
|
251
|
+
}
|
|
252
|
+
return rotated;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Internal: read the current API key. Exposed for tests.
|
|
256
|
+
* Not part of the public, semver-stable surface.
|
|
257
|
+
*/
|
|
258
|
+
_getApiKey() {
|
|
259
|
+
return this.apiKey;
|
|
260
|
+
}
|
|
261
|
+
// ── Transport ──────────────────────────────────────────────────────────────
|
|
262
|
+
async request(method, path, json) {
|
|
263
|
+
const url = `${this.baseUrl}${path}`;
|
|
264
|
+
const headers = mergeHeaders(this.apiKey, this.extraHeaders);
|
|
265
|
+
const init = { method, headers };
|
|
266
|
+
if (json !== undefined) {
|
|
267
|
+
init.body = JSON.stringify(json);
|
|
268
|
+
}
|
|
269
|
+
let controller = null;
|
|
270
|
+
let timer = null;
|
|
271
|
+
if (this.timeoutMs > 0 && typeof AbortController !== "undefined") {
|
|
272
|
+
controller = new AbortController();
|
|
273
|
+
init.signal = controller.signal;
|
|
274
|
+
timer = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
275
|
+
}
|
|
276
|
+
let response;
|
|
277
|
+
try {
|
|
278
|
+
response = await this.fetchImpl(url, init);
|
|
279
|
+
}
|
|
280
|
+
catch (err) {
|
|
281
|
+
throw new errors_js_1.APIConnectionError(`Could not reach the WellMarked API: ${stringifyError(err)}`, { cause: err });
|
|
282
|
+
}
|
|
283
|
+
finally {
|
|
284
|
+
if (timer !== null)
|
|
285
|
+
clearTimeout(timer);
|
|
286
|
+
}
|
|
287
|
+
let bodyText = "";
|
|
288
|
+
try {
|
|
289
|
+
bodyText = await response.text();
|
|
290
|
+
}
|
|
291
|
+
catch (err) {
|
|
292
|
+
throw new errors_js_1.APIConnectionError(`Could not read API response body: ${stringifyError(err)}`, { cause: err });
|
|
293
|
+
}
|
|
294
|
+
let body = null;
|
|
295
|
+
if (bodyText.length > 0) {
|
|
296
|
+
try {
|
|
297
|
+
body = JSON.parse(bodyText);
|
|
298
|
+
}
|
|
299
|
+
catch {
|
|
300
|
+
body = null;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
return parseResponse(response.status, body);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
exports.WellMarked = WellMarked;
|
|
307
|
+
function parseResponse(statusCode, body) {
|
|
308
|
+
let requestId;
|
|
309
|
+
if (body && typeof body === "object") {
|
|
310
|
+
const rid = body.request_id;
|
|
311
|
+
if (typeof rid === "string")
|
|
312
|
+
requestId = rid;
|
|
313
|
+
}
|
|
314
|
+
if (statusCode >= 200 && statusCode < 300) {
|
|
315
|
+
if (body === null) {
|
|
316
|
+
// The API contract says every documented endpoint returns a JSON
|
|
317
|
+
// body on 2xx. A null body means the server broke that contract
|
|
318
|
+
// (or a middlebox stripped it); fail loudly rather than letting
|
|
319
|
+
// downstream parsing crash on `body.foo` of null.
|
|
320
|
+
throw new errors_js_1.WellMarkedError(`API returned HTTP ${statusCode} with no JSON body. ` +
|
|
321
|
+
"This is a contract violation — please report it.", { statusCode });
|
|
322
|
+
}
|
|
323
|
+
return body;
|
|
324
|
+
}
|
|
325
|
+
throw (0, errors_js_1.fromResponse)(statusCode, body, requestId);
|
|
326
|
+
}
|
|
327
|
+
function stringifyError(err) {
|
|
328
|
+
if (err instanceof Error) {
|
|
329
|
+
return `${err.name}: ${err.message}`;
|
|
330
|
+
}
|
|
331
|
+
return String(err);
|
|
332
|
+
}
|