wellmarked 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ /**
2
+ * WellMarked client.
3
+ *
4
+ * The client is a thin, typed wrapper around the HTTP API. All endpoint
5
+ * methods are async — there is no separate sync/async split as in the
6
+ * Python SDK because JavaScript I/O is async by default.
7
+ *
8
+ * import { WellMarked } from "wellmarked";
9
+ *
10
+ * const wm = new WellMarked({ apiKey: "wm_..." });
11
+ * const result = await wm.extract("https://example.com/article");
12
+ * console.log(result.markdown);
13
+ *
14
+ * The API key can also be passed via the `WELLMARKED_API_KEY` environment
15
+ * variable (Node.js), in which case `new WellMarked()` is enough.
16
+ */
17
+ import { APIStatusError } from "./errors.js";
18
+ import { type BulkJob, type CrawlJob, type ExtractResult, type RotatedKey, type Usage } from "./models.js";
19
+ export interface WellMarkedOptions {
20
+ /**
21
+ * Your WellMarked API key (`wm_...`). Falls back to the
22
+ * `WELLMARKED_API_KEY` environment variable (Node.js only).
23
+ */
24
+ apiKey?: string;
25
+ /** API base URL. Override for testing. */
26
+ baseUrl?: string;
27
+ /** Per-request timeout, milliseconds. Defaults to 30000 (30s). */
28
+ timeoutMs?: number;
29
+ /**
30
+ * Bring your own `fetch`. Defaults to the global `fetch`. Useful for
31
+ * polyfills, custom agents/proxies, or test mocking.
32
+ */
33
+ fetch?: typeof fetch;
34
+ /**
35
+ * Extra headers sent on every request — useful for adding an internal
36
+ * correlation id, a custom user agent suffix, etc.
37
+ *
38
+ * Authorization / Content-Type / Accept are reserved and silently
39
+ * ignored if passed (the SDK manages those itself).
40
+ */
41
+ headers?: Record<string, string>;
42
+ }
43
+ export interface ExtractOptions {
44
+ /**
45
+ * Use Playwright to render JS-heavy pages. Requires a Pro/Enterprise
46
+ * plan AND `ENABLE_JS_RENDERING=true` on the API instance.
47
+ */
48
+ renderJs?: boolean;
49
+ }
50
+ export interface BulkOptions {
51
+ renderJs?: boolean;
52
+ }
53
+ export interface CrawlOptions {
54
+ /** Max BFS depth from the root. Defaults to 1. Must be >= 0. */
55
+ depth?: number;
56
+ renderJs?: boolean;
57
+ }
58
+ export interface WaitForJobOptions {
59
+ /** Milliseconds to sleep between polls. Defaults to 2000. */
60
+ pollIntervalMs?: number;
61
+ /** Total ms to wait before timing out. `null` waits forever. Defaults to 300000 (5 min). */
62
+ timeoutMs?: number | null;
63
+ }
64
+ export declare class WellMarked {
65
+ private apiKey;
66
+ private readonly baseUrl;
67
+ private readonly timeoutMs;
68
+ private readonly fetchImpl;
69
+ private readonly extraHeaders;
70
+ constructor(options?: WellMarkedOptions);
71
+ /**
72
+ * Extract clean Markdown from a single URL.
73
+ *
74
+ * Throws:
75
+ * - `RateLimitError` — monthly plan limit reached.
76
+ * - `UnprocessableEntityError` — `no_content`, `target_timeout`, or
77
+ * `js_rendering_disabled`.
78
+ * - `AuthenticationError` — missing or invalid API key.
79
+ */
80
+ extract(url: string, options?: ExtractOptions): Promise<ExtractResult>;
81
+ /**
82
+ * Submit a batch of URLs for concurrent extraction.
83
+ *
84
+ * Returns immediately with `status="queued"`. Poll with `getJob` or
85
+ * block with `waitForJob` to collect results.
86
+ *
87
+ * Throws:
88
+ * - `PermissionDeniedError` — `plan_not_supported` (Free tier).
89
+ * - `UnprocessableEntityError` — `bulk_cap_exceeded` (50 on Pro).
90
+ * - `RateLimitError` — would exceed remaining monthly quota.
91
+ */
92
+ bulk(urls: Iterable<string>, options?: BulkOptions): Promise<BulkJob>;
93
+ /**
94
+ * Polymorphic job lookup — works for both bulk and crawl jobs.
95
+ *
96
+ * Calls `GET /bulk/{jobId}` first, then inspects the response's `kind`
97
+ * discriminator field. If the job is actually a crawl, a second request
98
+ * to `GET /crawl/{jobId}` fetches the full crawl shape (with per-item
99
+ * depth and the truncated flags). Returns `BulkJob` or `CrawlJob`
100
+ * accordingly.
101
+ *
102
+ * Use `isCrawlJob(job)` (or check `job.kind === "crawl"`) to branch on
103
+ * crawl-specific behavior. The shared interface (`status`, `completed`,
104
+ * `total`, `results`, `done`) works on either type.
105
+ *
106
+ * Jobs are retained for 6 hours after completion.
107
+ */
108
+ getJob(jobId: string): Promise<BulkJob | CrawlJob>;
109
+ /**
110
+ * Block until a job reaches `status="done"` (or timeout). Works for both
111
+ * bulk and crawl jobs.
112
+ *
113
+ * The first call uses the polymorphic `getJob` to discover the job's
114
+ * kind. Subsequent polls go directly to the typed endpoint, so a crawl
115
+ * job only pays the dispatch round-trip once.
116
+ *
117
+ * Throws:
118
+ * - `Error` with message "did not finish within ..." — the job didn't
119
+ * finish before `timeoutMs` elapsed.
120
+ */
121
+ waitForJob(jobId: string, options?: WaitForJobOptions): Promise<BulkJob | CrawlJob>;
122
+ /**
123
+ * Crawl a site starting from `url`, BFS to `depth`.
124
+ *
125
+ * Returns immediately with `status="queued"`. Use `getJob` to poll, or
126
+ * `waitForJob` to block until done — both handle crawl and bulk jobIds
127
+ * transparently.
128
+ *
129
+ * Plan caps:
130
+ * - Free → `PermissionDeniedError` (`plan_not_supported`)
131
+ * - Pro → max depth 5, up to 1,000 pages per crawl
132
+ * - Enterprise → unlimited depth and pages
133
+ *
134
+ * Throws:
135
+ * - `PermissionDeniedError` — `plan_not_supported` (Free tier).
136
+ * - `UnprocessableEntityError` — `crawl_depth_exceeded`.
137
+ */
138
+ crawl(url: string, options?: CrawlOptions): Promise<CrawlJob>;
139
+ /**
140
+ * Add or replace a per-request header for the rest of this client's life.
141
+ *
142
+ * Authorization / Content-Type / Accept are reserved — calls that try
143
+ * to set those are silently ignored. To rotate the bearer token, use
144
+ * `rotateKey()`.
145
+ */
146
+ setHeader(name: string, value: string): void;
147
+ /** Remove a header previously added via `headers:` or `setHeader()`. */
148
+ removeHeader(name: string): void;
149
+ /**
150
+ * Return your usage for the current billing period.
151
+ *
152
+ * Does not count toward your monthly quota.
153
+ */
154
+ getUsage(): Promise<Usage>;
155
+ /**
156
+ * Mint a new API key. The current key is invalidated immediately.
157
+ *
158
+ * The new raw key is in the returned `apiKey` field — store it before
159
+ * discarding the result. There is no recovery flow.
160
+ *
161
+ * The client auto-swaps to the new key for subsequent requests.
162
+ *
163
+ * Does not count toward your monthly quota.
164
+ */
165
+ rotateKey(): Promise<RotatedKey>;
166
+ /**
167
+ * Internal: read the current API key. Exposed for tests.
168
+ * Not part of the public, semver-stable surface.
169
+ */
170
+ _getApiKey(): string;
171
+ private request;
172
+ }
173
+ export { APIStatusError };
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Exception hierarchy for the WellMarked SDK.
3
+ *
4
+ * Every HTTP error returned by the API is translated into a typed error
5
+ * whose class corresponds to the HTTP status and whose `code` matches the
6
+ * `error.code` field in the response body. Catch `WellMarkedError` for
7
+ * anything raised by the SDK; catch a more specific subclass when you want
8
+ * to handle one failure mode.
9
+ */
10
+ export interface WellMarkedErrorOptions {
11
+ code?: string | undefined;
12
+ statusCode?: number | undefined;
13
+ retryAfter?: number | undefined;
14
+ requestId?: string | undefined;
15
+ cause?: unknown;
16
+ }
17
+ export declare class WellMarkedError extends Error {
18
+ readonly code: string | undefined;
19
+ readonly statusCode: number | undefined;
20
+ readonly retryAfter: number | undefined;
21
+ readonly requestId: string | undefined;
22
+ constructor(message: string, options?: WellMarkedErrorOptions);
23
+ }
24
+ /** Raised when the SDK couldn't reach the API (DNS, TCP, TLS, timeout). */
25
+ export declare class APIConnectionError extends WellMarkedError {
26
+ constructor(message: string, options?: WellMarkedErrorOptions);
27
+ }
28
+ /** Raised for any non-2xx response from the API. */
29
+ export declare class APIStatusError extends WellMarkedError {
30
+ constructor(message: string, options?: WellMarkedErrorOptions);
31
+ }
32
+ /** 401 — missing or invalid API key. */
33
+ export declare class AuthenticationError extends APIStatusError {
34
+ constructor(message: string, options?: WellMarkedErrorOptions);
35
+ }
36
+ /** 403 — account inactive, plan does not allow this operation, or job belongs to another user. */
37
+ export declare class PermissionDeniedError extends APIStatusError {
38
+ constructor(message: string, options?: WellMarkedErrorOptions);
39
+ }
40
+ /** 404 — job not found or expired past the 6-hour retention window. */
41
+ export declare class NotFoundError extends APIStatusError {
42
+ constructor(message: string, options?: WellMarkedErrorOptions);
43
+ }
44
+ /**
45
+ * 422 — request was syntactically valid but couldn't be fulfilled.
46
+ *
47
+ * Common `code` values:
48
+ * - `no_content` — could not identify main content on the page
49
+ * - `target_timeout` — the target URL timed out
50
+ * - `js_rendering_disabled` — `renderJs=true` but the server has it off
51
+ * - `bulk_cap_exceeded` — more URLs than the plan allows per request
52
+ * - `crawl_depth_exceeded` — requested depth above the plan cap
53
+ */
54
+ export declare class UnprocessableEntityError extends APIStatusError {
55
+ constructor(message: string, options?: WellMarkedErrorOptions);
56
+ }
57
+ /** 429 — monthly plan limit reached. `retryAfter` is the number of seconds until reset. */
58
+ export declare class RateLimitError extends APIStatusError {
59
+ constructor(message: string, options?: WellMarkedErrorOptions);
60
+ }
61
+ /** 5xx — something went wrong on the API side. */
62
+ export declare class InternalServerError extends APIStatusError {
63
+ constructor(message: string, options?: WellMarkedErrorOptions);
64
+ }
65
+ /** Build the right error subclass for a given HTTP status + JSON body. */
66
+ export declare function fromResponse(statusCode: number, body: unknown, requestId?: string): APIStatusError;
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Official JavaScript/TypeScript SDK for the WellMarked API.
3
+ *
4
+ * import { WellMarked } from "wellmarked";
5
+ *
6
+ * const wm = new WellMarked({ apiKey: "wm_..." });
7
+ * const result = await wm.extract("https://example.com/article");
8
+ * console.log(result.markdown);
9
+ *
10
+ * See https://wellmarked.io/docs for the full API reference.
11
+ */
12
+ export { VERSION } from "./version.js";
13
+ export { WellMarked, type WellMarkedOptions, type ExtractOptions, type BulkOptions, type CrawlOptions, type WaitForJobOptions, } from "./client.js";
14
+ export { type BulkItem, type BulkJob, type CrawlItem, type CrawlJob, type ExtractionMeta, type ExtractResult, type JobStatus, type RotatedKey, type TruncatedReason, type Usage, isBulkJob, isCrawlJob, } from "./models.js";
15
+ export { APIConnectionError, APIStatusError, AuthenticationError, InternalServerError, NotFoundError, PermissionDeniedError, RateLimitError, UnprocessableEntityError, WellMarkedError, } from "./errors.js";
@@ -0,0 +1,141 @@
1
+ /**
2
+ * Typed response objects returned by the WellMarked SDK.
3
+ *
4
+ * These mirror the JSON shapes documented at https://api.wellmarked.io/docs.
5
+ *
6
+ * Response objects carry only the body fields documented in the API
7
+ * reference — they do not surface HTTP headers. Quota state lives on the
8
+ * account, so use `WellMarked.getUsage()` to read it.
9
+ */
10
+ /**
11
+ * Per-article metadata returned with each extraction.
12
+ *
13
+ * `date` is the article's published date as a string (often `null` — not
14
+ * every page publishes one). `retrievedAt` is the timestamp at which
15
+ * WellMarked actually fetched the page, populated on every successful
16
+ * extraction. The two fields are independent.
17
+ */
18
+ export interface ExtractionMeta {
19
+ url: string;
20
+ title: string | null;
21
+ author: string | null;
22
+ date: string | null;
23
+ retrievedAt: Date | null;
24
+ }
25
+ export declare function extractionMetaFromDict(data: Record<string, unknown>): ExtractionMeta;
26
+ /** Result of `POST /extract`. */
27
+ export interface ExtractResult {
28
+ markdown: string;
29
+ metadata: ExtractionMeta;
30
+ requestId: string;
31
+ }
32
+ export declare function extractResultFromResponse(body: Record<string, unknown>): ExtractResult;
33
+ /**
34
+ * One entry in a bulk job's `results` list.
35
+ *
36
+ * On success, `markdown` and `metadata` are populated and `error` is null.
37
+ * On a per-URL failure, `markdown`/`metadata` are null and `error` carries
38
+ * an API error code (e.g. `target_timeout`).
39
+ */
40
+ export interface BulkItem {
41
+ url: string;
42
+ markdown: string | null;
43
+ metadata: ExtractionMeta | null;
44
+ error: string | null;
45
+ /** True when the item completed successfully (no error + has markdown). */
46
+ readonly ok: boolean;
47
+ }
48
+ export declare function bulkItemFromDict(data: Record<string, unknown>): BulkItem;
49
+ /** "queued" | "processing" | "done" */
50
+ export type JobStatus = "queued" | "processing" | "done";
51
+ /**
52
+ * Status of a bulk extraction job.
53
+ *
54
+ * Returned from both `POST /bulk` and `GET /bulk/{jobId}`. Also one of the
55
+ * two possible return types from the polymorphic `WellMarked.getJob` and
56
+ * `WellMarked.waitForJob` — use the `kind` discriminator (or a type guard)
57
+ * to distinguish from `CrawlJob` if you need to read crawl-specific fields.
58
+ */
59
+ export interface BulkJob {
60
+ readonly kind: "bulk";
61
+ jobId: string;
62
+ status: JobStatus;
63
+ total: number;
64
+ completed: number;
65
+ results: BulkItem[];
66
+ createdAt: Date | null;
67
+ finishedAt: Date | null;
68
+ /** True when `status === "done"`. */
69
+ readonly done: boolean;
70
+ }
71
+ export declare function bulkJobFromResponse(body: Record<string, unknown>): BulkJob;
72
+ /**
73
+ * One page in a crawl job's `results` list.
74
+ *
75
+ * Shape mirrors `BulkItem` with an added `depth` field showing how far
76
+ * from the root URL this page sits in the BFS.
77
+ */
78
+ export interface CrawlItem {
79
+ url: string;
80
+ depth: number;
81
+ markdown: string | null;
82
+ metadata: ExtractionMeta | null;
83
+ error: string | null;
84
+ /** True when the page completed successfully (no error + has markdown). */
85
+ readonly ok: boolean;
86
+ }
87
+ export declare function crawlItemFromDict(data: Record<string, unknown>): CrawlItem;
88
+ export type TruncatedReason = "page_cap_reached" | "quota_exhausted";
89
+ /**
90
+ * Status of a crawl job. Returned from `POST /crawl` and `GET /crawl/{jobId}`.
91
+ *
92
+ * Two crawl-only fields:
93
+ * - `truncated` — true when the crawl stopped before exhausting
94
+ * the frontier (depth/page cap or quota).
95
+ * - `truncatedReason` — `"page_cap_reached"` | `"quota_exhausted"` |
96
+ * `null`.
97
+ */
98
+ export interface CrawlJob {
99
+ readonly kind: "crawl";
100
+ jobId: string;
101
+ status: JobStatus;
102
+ total: number;
103
+ completed: number;
104
+ results: CrawlItem[];
105
+ truncated: boolean;
106
+ truncatedReason: TruncatedReason | null;
107
+ createdAt: Date | null;
108
+ finishedAt: Date | null;
109
+ /** True when `status === "done"`. */
110
+ readonly done: boolean;
111
+ }
112
+ export declare function crawlJobFromResponse(body: Record<string, unknown>): CrawlJob;
113
+ /**
114
+ * Result of `GET /usage` — current-period quota state.
115
+ *
116
+ * This is the source of truth for rate-limit / quota information. The SDK
117
+ * does not surface `X-RateLimit-*` headers on extract/bulk responses;
118
+ * call `WellMarked.getUsage()` instead.
119
+ */
120
+ export interface Usage {
121
+ plan: string;
122
+ period: string;
123
+ used: number;
124
+ limit: number;
125
+ remaining: number;
126
+ }
127
+ export declare function usageFromResponse(body: Record<string, unknown>): Usage;
128
+ /**
129
+ * Result of `POST /keys/rotate`.
130
+ *
131
+ * `apiKey` is the new raw key — store it before discarding this object,
132
+ * there is no recovery flow. The previous key is invalidated the moment
133
+ * the rotation call returns 200.
134
+ */
135
+ export interface RotatedKey {
136
+ apiKey: string;
137
+ rotatedAt: Date | null;
138
+ }
139
+ export declare function rotatedKeyFromResponse(body: Record<string, unknown>): RotatedKey;
140
+ export declare function isBulkJob(job: BulkJob | CrawlJob): job is BulkJob;
141
+ export declare function isCrawlJob(job: BulkJob | CrawlJob): job is CrawlJob;
@@ -0,0 +1 @@
1
+ export declare const VERSION = "1.0.0";
package/package.json ADDED
@@ -0,0 +1,73 @@
1
+ {
2
+ "name": "wellmarked",
3
+ "version": "1.1.0",
4
+ "description": "Official JavaScript/TypeScript SDK for the WellMarked API — convert any URL to clean Markdown.",
5
+ "keywords": [
6
+ "wellmarked",
7
+ "markdown",
8
+ "scraping",
9
+ "extraction",
10
+ "html-to-markdown",
11
+ "rag",
12
+ "llm",
13
+ "pipeline"
14
+ ],
15
+ "homepage": "https://wellmarked.io",
16
+ "bugs": {
17
+ "url": "https://github.com/WellMarkedAPI/WellMarked/issues"
18
+ },
19
+ "repository": {
20
+ "type": "git",
21
+ "url": "git+https://github.com/WellMarkedAPI/WellMarked.git",
22
+ "directory": "js-sdk"
23
+ },
24
+ "license": "MIT",
25
+ "author": {
26
+ "name": "WellMarked",
27
+ "email": "support@wellmarked.io",
28
+ "url": "https://wellmarked.io"
29
+ },
30
+ "type": "module",
31
+ "main": "./dist/cjs/index.cjs",
32
+ "module": "./dist/esm/index.js",
33
+ "types": "./dist/types/index.d.ts",
34
+ "exports": {
35
+ ".": {
36
+ "types": "./dist/types/index.d.ts",
37
+ "import": "./dist/esm/index.js",
38
+ "require": "./dist/cjs/index.cjs"
39
+ },
40
+ "./package.json": "./package.json"
41
+ },
42
+ "files": [
43
+ "dist",
44
+ "src",
45
+ "README.md",
46
+ "LICENSE"
47
+ ],
48
+ "sideEffects": false,
49
+ "engines": {
50
+ "node": ">=18"
51
+ },
52
+ "scripts": {
53
+ "build": "npm run build:clean && npm run build:esm && npm run build:cjs && npm run build:types",
54
+ "build:clean": "rimraf dist",
55
+ "build:esm": "tsc -p tsconfig.build.json --module esnext --moduleResolution bundler --outDir dist/esm",
56
+ "build:cjs": "tsc -p tsconfig.build.json --module commonjs --moduleResolution node --outDir dist/cjs && node scripts/rename-cjs.mjs",
57
+ "build:types": "tsc -p tsconfig.build.json --emitDeclarationOnly --declaration --outDir dist/types",
58
+ "test": "vitest run",
59
+ "test:watch": "vitest",
60
+ "typecheck": "tsc -p tsconfig.json --noEmit",
61
+ "prepublishOnly": "npm run build && npm test"
62
+ },
63
+ "devDependencies": {
64
+ "@types/node": "^20.11.0",
65
+ "rimraf": "^5.0.5",
66
+ "typescript": "^5.4.0",
67
+ "vitest": "^1.6.0"
68
+ },
69
+ "publishConfig": {
70
+ "access": "public",
71
+ "registry": "https://registry.npmjs.org/"
72
+ }
73
+ }