@vakra-dev/reader-js 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/dist/README.md +107 -0
- package/dist/index.cjs +500 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +403 -0
- package/dist/index.d.ts +403 -0
- package/dist/index.js +460 -0
- package/dist/index.js.map +1 -0
- package/package.json +42 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/errors.ts","../src/client.ts"],"sourcesContent":["export { ReaderClient } from \"./client.js\";\nexport {\n ReaderApiError,\n InvalidRequestError,\n UnauthenticatedError,\n InsufficientCreditsError,\n UrlBlockedError,\n NotFoundError,\n ConflictError,\n RateLimitedError,\n ConcurrencyLimitedError,\n InternalServerError,\n UpstreamUnavailableError,\n ScrapeTimeoutError,\n toReaderApiError,\n} from \"./errors.js\";\nexport type { ReaderErrorCode, ApiErrorBody } from \"./errors.js\";\nexport type {\n ReaderClientConfig,\n ReadParams,\n ReadResult,\n ScrapeResult,\n ScrapeMetadata,\n Page,\n Job,\n JobStatus,\n JobMode,\n ProxyMode,\n Pagination,\n Credits,\n UsageEntry,\n StreamEvent,\n SuccessEnvelope,\n PaginatedEnvelope,\n ErrorEnvelope,\n ApiEnvelope,\n SessionInfo,\n CreateSessionParams,\n StopSessionResult,\n SessionStatus,\n} from \"./types.js\";\n","/**\n * Typed error classes mirroring the reader-api error code catalog.\n *\n * The API returns a stable `code` field on every error response. The SDK\n * branches on that code and throws a specific subclass, so callers can\n * write:\n *\n * try {\n * await client.read({ url });\n * } catch (err) {\n * if (err instanceof InsufficientCreditsError) {\n * // err.required, err.available, err.resetAt\n * }\n * }\n *\n * There is one subclass per code in the catalog. Unknown codes fall through\n * to the base `ReaderApiError`.\n */\n\nexport type ReaderErrorCode =\n | \"invalid_request\"\n | \"unauthenticated\"\n | \"insufficient_credits\"\n | \"url_blocked\"\n | \"not_found\"\n | \"conflict\"\n | \"rate_limited\"\n | \"concurrency_limited\"\n | \"internal_error\"\n | \"upstream_unavailable\"\n | \"scrape_timeout\";\n\nexport interface ApiErrorBody {\n code: ReaderErrorCode | string;\n message: string;\n details?: Record<string, unknown>;\n docsUrl?: string;\n}\n\nexport class ReaderApiError extends Error {\n readonly code: string;\n readonly httpStatus: number;\n readonly details?: Record<string, unknown>;\n readonly docsUrl?: string;\n readonly requestId?: string;\n\n constructor(body: ApiErrorBody, httpStatus: number, requestId?: string) {\n super(body.message);\n this.name = \"ReaderApiError\";\n this.code = body.code;\n this.httpStatus = httpStatus;\n this.details = body.details;\n this.docsUrl = body.docsUrl;\n this.requestId = requestId;\n }\n}\n\nexport class InvalidRequestError extends ReaderApiError {\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"InvalidRequestError\";\n }\n}\n\nexport class UnauthenticatedError extends ReaderApiError {\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"UnauthenticatedError\";\n }\n}\n\nexport class InsufficientCreditsError extends ReaderApiError {\n readonly required?: number;\n readonly available?: number;\n readonly resetAt?: string;\n\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"InsufficientCreditsError\";\n this.required = body.details?.required as number | undefined;\n this.available = body.details?.available as number | undefined;\n this.resetAt = body.details?.resetAt as string | undefined;\n }\n}\n\nexport class UrlBlockedError extends ReaderApiError {\n readonly url?: string;\n readonly reason?: string;\n\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"UrlBlockedError\";\n this.url = body.details?.url as string | undefined;\n this.reason = body.details?.reason as string | undefined;\n }\n}\n\nexport class NotFoundError extends ReaderApiError {\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"NotFoundError\";\n }\n}\n\nexport class ConflictError extends ReaderApiError {\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"ConflictError\";\n }\n}\n\nexport class RateLimitedError extends ReaderApiError {\n readonly retryAfterSeconds?: number;\n readonly limit?: number;\n readonly windowSeconds?: number;\n\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"RateLimitedError\";\n this.retryAfterSeconds = body.details?.retryAfterSeconds as number | undefined;\n this.limit = body.details?.limit as number | undefined;\n this.windowSeconds = body.details?.windowSeconds as number | undefined;\n }\n}\n\nexport class ConcurrencyLimitedError extends ReaderApiError {\n readonly active?: number;\n readonly max?: number;\n\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"ConcurrencyLimitedError\";\n this.active = body.details?.active as number | undefined;\n this.max = body.details?.max as number | undefined;\n }\n}\n\nexport class InternalServerError extends ReaderApiError {\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"InternalServerError\";\n }\n}\n\nexport class UpstreamUnavailableError extends ReaderApiError {\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"UpstreamUnavailableError\";\n }\n}\n\nexport class ScrapeTimeoutError extends ReaderApiError {\n readonly timeoutMs?: number;\n\n constructor(body: ApiErrorBody, status: number, requestId?: string) {\n super(body, status, requestId);\n this.name = \"ScrapeTimeoutError\";\n this.timeoutMs = body.details?.timeoutMs as number | undefined;\n }\n}\n\n/**\n * Construct the right error subclass from an error response body.\n * Unknown codes fall through to the base class.\n */\nexport function toReaderApiError(\n body: ApiErrorBody,\n httpStatus: number,\n requestId?: string,\n): ReaderApiError {\n switch (body.code) {\n case \"invalid_request\":\n return new InvalidRequestError(body, httpStatus, requestId);\n case \"unauthenticated\":\n return new UnauthenticatedError(body, httpStatus, requestId);\n case \"insufficient_credits\":\n return new InsufficientCreditsError(body, httpStatus, requestId);\n case \"url_blocked\":\n return new UrlBlockedError(body, httpStatus, requestId);\n case \"not_found\":\n return new NotFoundError(body, httpStatus, requestId);\n case \"conflict\":\n return new ConflictError(body, httpStatus, requestId);\n case \"rate_limited\":\n return new RateLimitedError(body, httpStatus, requestId);\n case \"concurrency_limited\":\n return new ConcurrencyLimitedError(body, httpStatus, requestId);\n case \"internal_error\":\n return new InternalServerError(body, httpStatus, requestId);\n case \"upstream_unavailable\":\n return new UpstreamUnavailableError(body, httpStatus, requestId);\n case \"scrape_timeout\":\n return new ScrapeTimeoutError(body, httpStatus, requestId);\n default:\n return new ReaderApiError(body, httpStatus, requestId);\n }\n}\n","/**\n * Reader SDK Client\n *\n * @example\n * import { ReaderClient } from \"@vakra-dev/reader-js\";\n *\n * const client = new ReaderClient({ apiKey: \"rdr_your_key\" });\n *\n * // Synchronous scrape (single URL)\n * const result = await client.read({ url: \"https://example.com\" });\n * if (result.kind === \"scrape\") {\n * console.log(result.data.markdown);\n * }\n *\n * // Batch (returns a completed Job with all results collected)\n * const batch = await client.read({ urls: [\"url1\", \"url2\"] });\n * if (batch.kind === \"job\") {\n * for (const page of batch.data.results) {\n * console.log(page.url, page.markdown?.length);\n * }\n * }\n */\n\nimport type {\n ReaderClientConfig,\n ReadParams,\n ReadResult,\n ScrapeResult,\n Job,\n Credits,\n Page,\n StreamEvent,\n SuccessEnvelope,\n PaginatedEnvelope,\n ErrorEnvelope,\n SessionInfo,\n CreateSessionParams,\n StopSessionResult,\n} from \"./types.js\";\nimport {\n toReaderApiError,\n ReaderApiError,\n ScrapeTimeoutError,\n RateLimitedError,\n} from \"./errors.js\";\n\nconst DEFAULT_BASE_URL = \"https://api.reader.dev\";\nconst DEFAULT_TIMEOUT = 60_000;\nconst DEFAULT_MAX_RETRIES = 2;\nconst DEFAULT_POLL_INTERVAL = 2_000;\nconst DEFAULT_POLL_TIMEOUT = 300_000; // 5 minutes\n\ninterface JobWithPagination {\n data: Job;\n pagination: { total: number; skip: number; limit: number; hasMore: boolean; next?: string };\n}\n\nexport class ReaderClient {\n private apiKey: string;\n private baseUrl: string;\n private timeout: number;\n private maxRetries: number;\n private extraHeaders: Record<string, string>;\n private _sessions: SessionsAPI | null = null;\n\n constructor(config: ReaderClientConfig) {\n if (!config.apiKey) {\n throw new Error(\"API key is required\");\n }\n this.apiKey = config.apiKey;\n this.baseUrl = (config.baseUrl || DEFAULT_BASE_URL).replace(/\\/$/, \"\");\n this.timeout = config.timeout || DEFAULT_TIMEOUT;\n this.maxRetries = config.maxRetries ?? DEFAULT_MAX_RETRIES;\n this.extraHeaders = config.headers || {};\n }\n\n /**\n * Browser sessions API.\n *\n * @example\n * ```typescript\n * const session = await client.sessions.create();\n * const browser = await chromium.connectOverCDP(session.wsEndpoint);\n * // ... use Playwright ...\n * await client.sessions.stop(session.sessionId);\n * ```\n */\n get sessions(): SessionsAPI {\n if (!this._sessions) {\n this._sessions = new SessionsAPI(this.request.bind(this));\n }\n return this._sessions;\n }\n\n /**\n * Read (scrape, batch, or crawl) one or more URLs.\n *\n * - Single URL → sync scrape, returns immediately with `{ kind: \"scrape\", data }`\n * - Multiple URLs or URL + maxDepth/maxPages → async job; this method polls\n * until the job terminates and returns `{ kind: \"job\", data }`.\n */\n async read(params: ReadParams): Promise<ReadResult> {\n const envelope = await this.request<SuccessEnvelope<unknown>>(\n \"POST\",\n \"/v1/read\",\n params,\n );\n\n const data = envelope.data as Record<string, unknown>;\n\n // Async job response: data.id + data.status present, no markdown/html/metadata\n if (\n data &&\n typeof data === \"object\" &&\n \"status\" in data &&\n \"mode\" in data &&\n !(\"markdown\" in data) &&\n !(\"metadata\" in data)\n ) {\n const jobId = String((data as { id: unknown }).id);\n const job = await this.waitForJob(jobId);\n return { kind: \"job\", data: job };\n }\n\n // Synchronous scrape: data has markdown/html/metadata\n return { kind: \"scrape\", data: data as unknown as ScrapeResult };\n }\n\n /**\n * Get job status and a single page of results.\n */\n async getJob(\n jobId: string,\n opts?: { skip?: number; limit?: number },\n ): Promise<{ job: Job; hasMore: boolean; next?: string }> {\n const query = new URLSearchParams();\n if (opts?.skip !== undefined) query.set(\"skip\", String(opts.skip));\n if (opts?.limit !== undefined) query.set(\"limit\", String(opts.limit));\n const qs = query.toString();\n\n const envelope = await this.request<JobWithPagination>(\n \"GET\",\n `/v1/jobs/${jobId}${qs ? `?${qs}` : \"\"}`,\n );\n\n return {\n job: envelope.data,\n hasMore: envelope.pagination.hasMore,\n next: envelope.pagination.next,\n };\n }\n\n /**\n * Fetch all job result pages by following pagination.\n */\n async getAllJobResults(jobId: string): Promise<Page[]> {\n const pages: Page[] = [];\n let skip = 0;\n const limit = 100;\n\n while (true) {\n const { job, hasMore } = await this.getJob(jobId, { skip, limit });\n pages.push(...(job.results ?? []));\n if (!hasMore) break;\n skip += limit;\n }\n\n return pages;\n }\n\n /**\n * Cancel a job. Throws `ConflictError` if the job is already terminal.\n */\n async cancelJob(jobId: string): Promise<void> {\n await this.request(\"DELETE\", `/v1/jobs/${jobId}`);\n }\n\n /**\n * Retry the failed URLs in a job. Throws `InvalidRequestError` if no\n * failed URLs exist.\n */\n async retryJob(jobId: string): Promise<{ id: string; status: string; retrying: number }> {\n const envelope = await this.request<\n SuccessEnvelope<{ id: string; status: string; retrying: number }>\n >(\"POST\", `/v1/jobs/${jobId}/retry`);\n return envelope.data;\n }\n\n /**\n * Poll a job until it completes, fails, or is cancelled. Collects all\n * paginated results when complete.\n */\n async waitForJob(\n jobId: string,\n options?: { pollInterval?: number; timeout?: number },\n ): Promise<Job> {\n const interval = options?.pollInterval ?? DEFAULT_POLL_INTERVAL;\n const timeout = options?.timeout ?? DEFAULT_POLL_TIMEOUT;\n const start = Date.now();\n\n while (Date.now() - start < timeout) {\n const { job } = await this.getJob(jobId, { limit: 1 });\n\n if (\n job.status === \"completed\" ||\n job.status === \"failed\" ||\n job.status === \"cancelled\"\n ) {\n if (job.status === \"completed\") {\n job.results = await this.getAllJobResults(jobId);\n }\n return job;\n }\n\n await sleep(interval);\n }\n\n throw new ScrapeTimeoutError(\n {\n code: \"scrape_timeout\",\n message: `Job ${jobId} polling timed out after ${timeout}ms`,\n details: { timeoutMs: timeout },\n },\n 504,\n );\n }\n\n /**\n * Stream job results as they arrive via polling.\n *\n * @example\n * for await (const event of client.stream(jobId)) {\n * if (event.type === \"page\") console.log(event.data.url);\n * if (event.type === \"done\") break;\n * }\n */\n async *stream(\n jobId: string,\n options?: { pollInterval?: number; timeout?: number },\n ): AsyncGenerator<StreamEvent> {\n const interval = options?.pollInterval ?? DEFAULT_POLL_INTERVAL;\n const timeout = options?.timeout ?? DEFAULT_POLL_TIMEOUT;\n const start = Date.now();\n let lastCompleted = 0;\n\n while (Date.now() - start < timeout) {\n const { job } = await this.getJob(jobId, { skip: lastCompleted, limit: 100 });\n\n yield {\n type: \"progress\",\n completed: job.completed,\n total: job.total,\n status: job.status,\n };\n\n for (const page of job.results ?? []) {\n if (page.error) {\n yield { type: \"error\", url: page.url, error: page.error };\n } else {\n yield { type: \"page\", data: page };\n }\n lastCompleted += 1;\n }\n\n if (\n job.status === \"completed\" ||\n job.status === \"failed\" ||\n job.status === \"cancelled\"\n ) {\n yield {\n type: \"done\",\n completed: job.completed,\n total: job.total,\n status: job.status,\n };\n return;\n }\n\n await sleep(interval);\n }\n\n throw new ScrapeTimeoutError(\n {\n code: \"scrape_timeout\",\n message: `Job ${jobId} stream timed out`,\n details: { timeoutMs: timeout },\n },\n 504,\n );\n }\n\n /**\n * Get the current credit balance for this workspace.\n */\n async getCredits(): Promise<Credits> {\n const envelope = await this.request<SuccessEnvelope<Credits>>(\"GET\", \"/v1/usage/credits\");\n return envelope.data;\n }\n\n // --- Internal ---\n\n private async request<T>(method: string, path: string, body?: unknown): Promise<T> {\n const url = path.startsWith(\"http\") ? path : `${this.baseUrl}${path}`;\n let lastError: Error | null = null;\n\n for (let attempt = 0; attempt <= this.maxRetries; attempt++) {\n try {\n const controller = new AbortController();\n const timeoutId = setTimeout(() => controller.abort(), this.timeout);\n\n const res = await fetch(url, {\n method,\n headers: {\n \"Content-Type\": \"application/json\",\n \"x-api-key\": this.apiKey,\n ...this.extraHeaders,\n },\n body: body ? JSON.stringify(body) : undefined,\n signal: controller.signal,\n });\n\n clearTimeout(timeoutId);\n\n const requestId = res.headers.get(\"x-request-id\") ?? undefined;\n const parsed = (await res.json().catch(() => null)) as\n | SuccessEnvelope<unknown>\n | PaginatedEnvelope<unknown>\n | ErrorEnvelope\n | null;\n\n if (!res.ok) {\n if (parsed && \"error\" in parsed && parsed.error) {\n const err = toReaderApiError(parsed.error, res.status, requestId);\n\n // Don't retry client errors except 429\n if (res.status < 500 && res.status !== 429) throw err;\n\n // Honor Retry-After from the rate-limited response\n if (err instanceof RateLimitedError && err.retryAfterSeconds) {\n await sleep(err.retryAfterSeconds * 1000);\n }\n\n lastError = err;\n } else {\n const genericErr = new ReaderApiError(\n {\n code: \"internal_error\",\n message: `Request failed with status ${res.status}`,\n },\n res.status,\n requestId,\n );\n if (res.status < 500) throw genericErr;\n lastError = genericErr;\n }\n } else {\n return parsed as unknown as T;\n }\n } catch (err) {\n if (err instanceof ReaderApiError) {\n if (err.httpStatus < 500 && err.httpStatus !== 429) throw err;\n lastError = err;\n } else if (err instanceof Error) {\n if (err.name === \"AbortError\") {\n lastError = new ReaderApiError(\n { code: \"scrape_timeout\", message: \"Request timed out\" },\n 504,\n );\n } else {\n lastError = err;\n }\n }\n }\n\n // Exponential backoff before retry\n if (attempt < this.maxRetries) {\n await sleep(Math.pow(2, attempt) * 1000);\n }\n }\n\n throw (\n lastError ??\n new ReaderApiError({ code: \"internal_error\", message: \"Request failed\" }, 500)\n );\n }\n}\n\n// ─── Sessions API ────────────────────────────────────────────────────\n\ntype RequestFn = <T>(method: string, path: string, body?: unknown) => Promise<T>;\n\nclass SessionsAPI {\n constructor(private request: RequestFn) {}\n\n /**\n * Create a browser session. Returns a CDP WebSocket URL for\n * Playwright/Puppeteer connection.\n */\n async create(params?: CreateSessionParams): Promise<SessionInfo> {\n const envelope = await this.request<SuccessEnvelope<SessionInfo>>(\n \"POST\",\n \"/v1/sessions\",\n params ?? {},\n );\n return envelope.data;\n }\n\n /**\n * Get session status.\n */\n async get(sessionId: string): Promise<SessionInfo> {\n const envelope = await this.request<SuccessEnvelope<SessionInfo>>(\n \"GET\",\n `/v1/sessions/${sessionId}`,\n );\n return envelope.data;\n }\n\n /**\n * Stop a browser session.\n */\n async stop(sessionId: string): Promise<StopSessionResult> {\n const envelope = await this.request<SuccessEnvelope<StopSessionResult>>(\n \"DELETE\",\n `/v1/sessions/${sessionId}`,\n );\n return envelope.data;\n }\n\n /**\n * List active sessions.\n */\n async list(): Promise<SessionInfo[]> {\n const envelope = await this.request<SuccessEnvelope<SessionInfo[]>>(\n \"GET\",\n \"/v1/sessions\",\n );\n return envelope.data;\n }\n}\n\nfunction sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACuCO,IAAM,iBAAN,cAA6B,MAAM;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAET,YAAY,MAAoB,YAAoB,WAAoB;AACtE,UAAM,KAAK,OAAO;AAClB,SAAK,OAAO;AACZ,SAAK,OAAO,KAAK;AACjB,SAAK,aAAa;AAClB,SAAK,UAAU,KAAK;AACpB,SAAK,UAAU,KAAK;AACpB,SAAK,YAAY;AAAA,EACnB;AACF;AAEO,IAAM,sBAAN,cAAkC,eAAe;AAAA,EACtD,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AAAA,EACd;AACF;AAEO,IAAM,uBAAN,cAAmC,eAAe;AAAA,EACvD,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AAAA,EACd;AACF;AAEO,IAAM,2BAAN,cAAuC,eAAe;AAAA,EAClD;AAAA,EACA;AAAA,EACA;AAAA,EAET,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AACZ,SAAK,WAAW,KAAK,SAAS;AAC9B,SAAK,YAAY,KAAK,SAAS;AAC/B,SAAK,UAAU,KAAK,SAAS;AAAA,EAC/B;AACF;AAEO,IAAM,kBAAN,cAA8B,eAAe;AAAA,EACzC;AAAA,EACA;AAAA,EAET,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AACZ,SAAK,MAAM,KAAK,SAAS;AACzB,SAAK,SAAS,KAAK,SAAS;AAAA,EAC9B;AACF;AAEO,IAAM,gBAAN,cAA4B,eAAe;AAAA,EAChD,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AAAA,EACd;AACF;AAEO,IAAM,gBAAN,cAA4B,eAAe;AAAA,EAChD,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AAAA,EACd;AACF;AAEO,IAAM,mBAAN,cAA+B,eAAe;AAAA,EAC1C;AAAA,EACA;AAAA,EACA;AAAA,EAET,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AACZ,SAAK,oBAAoB,KAAK,SAAS;AACvC,SAAK,QAAQ,KAAK,SAAS;AAC3B,SAAK,gBAAgB,KAAK,SAAS;AAAA,EACrC;AACF;AAEO,IAAM,0BAAN,cAAsC,eAAe;AAAA,EACjD;AAAA,EACA;AAAA,EAET,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AACZ,SAAK,SAAS,KAAK,SAAS;AAC5B,SAAK,MAAM,KAAK,SAAS;AAAA,EAC3B;AACF;AAEO,IAAM,sBAAN,cAAkC,eAAe;AAAA,EACtD,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AAAA,EACd;AACF;AAEO,IAAM,2BAAN,cAAuC,eAAe;AAAA,EAC3D,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AAAA,EACd;AACF;AAEO,IAAM,qBAAN,cAAiC,eAAe;AAAA,EAC5C;AAAA,EAET,YAAY,MAAoB,QAAgB,WAAoB;AAClE,UAAM,MAAM,QAAQ,SAAS;AAC7B,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,SAAS;AAAA,EACjC;AACF;AAMO,SAAS,iBACd,MACA,YACA,WACgB;AAChB,UAAQ,KAAK,MAAM;AAAA,IACjB,KAAK;AACH,aAAO,IAAI,oBAAoB,MAAM,YAAY,SAAS;AAAA,IAC5D,KAAK;AACH,aAAO,IAAI,qBAAqB,MAAM,YAAY,SAAS;AAAA,IAC7D,KAAK;AACH,aAAO,IAAI,yBAAyB,MAAM,YAAY,SAAS;AAAA,IACjE,KAAK;AACH,aAAO,IAAI,gBAAgB,MAAM,YAAY,SAAS;AAAA,IACxD,KAAK;AACH,aAAO,IAAI,cAAc,MAAM,YAAY,SAAS;AAAA,IACtD,KAAK;AACH,aAAO,IAAI,cAAc,MAAM,YAAY,SAAS;AAAA,IACtD,KAAK;AACH,aAAO,IAAI,iBAAiB,MAAM,YAAY,SAAS;AAAA,IACzD,KAAK;AACH,aAAO,IAAI,wBAAwB,MAAM,YAAY,SAAS;AAAA,IAChE,KAAK;AACH,aAAO,IAAI,oBAAoB,MAAM,YAAY,SAAS;AAAA,IAC5D,KAAK;AACH,aAAO,IAAI,yBAAyB,MAAM,YAAY,SAAS;AAAA,IACjE,KAAK;AACH,aAAO,IAAI,mBAAmB,MAAM,YAAY,SAAS;AAAA,IAC3D;AACE,aAAO,IAAI,eAAe,MAAM,YAAY,SAAS;AAAA,EACzD;AACF;;;ACtJA,IAAM,mBAAmB;AACzB,IAAM,kBAAkB;AACxB,IAAM,sBAAsB;AAC5B,IAAM,wBAAwB;AAC9B,IAAM,uBAAuB;AAOtB,IAAM,eAAN,MAAmB;AAAA,EAChB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAgC;AAAA,EAExC,YAAY,QAA4B;AACtC,QAAI,CAAC,OAAO,QAAQ;AAClB,YAAM,IAAI,MAAM,qBAAqB;AAAA,IACvC;AACA,SAAK,SAAS,OAAO;AACrB,SAAK,WAAW,OAAO,WAAW,kBAAkB,QAAQ,OAAO,EAAE;AACrE,SAAK,UAAU,OAAO,WAAW;AACjC,SAAK,aAAa,OAAO,cAAc;AACvC,SAAK,eAAe,OAAO,WAAW,CAAC;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,IAAI,WAAwB;AAC1B,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY,IAAI,YAAY,KAAK,QAAQ,KAAK,IAAI,CAAC;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,KAAK,QAAyC;AAClD,UAAM,WAAW,MAAM,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,UAAM,OAAO,SAAS;AAGtB,QACE,QACA,OAAO,SAAS,YAChB,YAAY,QACZ,UAAU,QACV,EAAE,cAAc,SAChB,EAAE,cAAc,OAChB;AACA,YAAM,QAAQ,OAAQ,KAAyB,EAAE;AACjD,YAAM,MAAM,MAAM,KAAK,WAAW,KAAK;AACvC,aAAO,EAAE,MAAM,OAAO,MAAM,IAAI;AAAA,IAClC;AAGA,WAAO,EAAE,MAAM,UAAU,KAAsC;AAAA,EACjE;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,OACJ,OACA,MACwD;AACxD,UAAM,QAAQ,IAAI,gBAAgB;AAClC,QAAI,MAAM,SAAS,OAAW,OAAM,IAAI,QAAQ,OAAO,KAAK,IAAI,CAAC;AACjE,QAAI,MAAM,UAAU,OAAW,OAAM,IAAI,SAAS,OAAO,KAAK,KAAK,CAAC;AACpE,UAAM,KAAK,MAAM,SAAS;AAE1B,UAAM,WAAW,MAAM,KAAK;AAAA,MAC1B;AAAA,MACA,YAAY,KAAK,GAAG,KAAK,IAAI,EAAE,KAAK,EAAE;AAAA,IACxC;AAEA,WAAO;AAAA,MACL,KAAK,SAAS;AAAA,MACd,SAAS,SAAS,WAAW;AAAA,MAC7B,MAAM,SAAS,WAAW;AAAA,IAC5B;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,iBAAiB,OAAgC;AACrD,UAAM,QAAgB,CAAC;AACvB,QAAI,OAAO;AACX,UAAM,QAAQ;AAEd,WAAO,MAAM;AACX,YAAM,EAAE,KAAK,QAAQ,IAAI,MAAM,KAAK,OAAO,OAAO,EAAE,MAAM,MAAM,CAAC;AACjE,YAAM,KAAK,GAAI,IAAI,WAAW,CAAC,CAAE;AACjC,UAAI,CAAC,QAAS;AACd,cAAQ;AAAA,IACV;AAEA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,UAAU,OAA8B;AAC5C,UAAM,KAAK,QAAQ,UAAU,YAAY,KAAK,EAAE;AAAA,EAClD;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,SAAS,OAA0E;AACvF,UAAM,WAAW,MAAM,KAAK,QAE1B,QAAQ,YAAY,KAAK,QAAQ;AACnC,WAAO,SAAS;AAAA,EAClB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,WACJ,OACA,SACc;AACd,UAAM,WAAW,SAAS,gBAAgB;AAC1C,UAAM,UAAU,SAAS,WAAW;AACpC,UAAM,QAAQ,KAAK,IAAI;AAEvB,WAAO,KAAK,IAAI,IAAI,QAAQ,SAAS;AACnC,YAAM,EAAE,IAAI,IAAI,MAAM,KAAK,OAAO,OAAO,EAAE,OAAO,EAAE,CAAC;AAErD,UACE,IAAI,WAAW,eACf,IAAI,WAAW,YACf,IAAI,WAAW,aACf;AACA,YAAI,IAAI,WAAW,aAAa;AAC9B,cAAI,UAAU,MAAM,KAAK,iBAAiB,KAAK;AAAA,QACjD;AACA,eAAO;AAAA,MACT;AAEA,YAAM,MAAM,QAAQ;AAAA,IACtB;AAEA,UAAM,IAAI;AAAA,MACR;AAAA,QACE,MAAM;AAAA,QACN,SAAS,OAAO,KAAK,4BAA4B,OAAO;AAAA,QACxD,SAAS,EAAE,WAAW,QAAQ;AAAA,MAChC;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,OAAO,OACL,OACA,SAC6B;AAC7B,UAAM,WAAW,SAAS,gBAAgB;AAC1C,UAAM,UAAU,SAAS,WAAW;AACpC,UAAM,QAAQ,KAAK,IAAI;AACvB,QAAI,gBAAgB;AAEpB,WAAO,KAAK,IAAI,IAAI,QAAQ,SAAS;AACnC,YAAM,EAAE,IAAI,IAAI,MAAM,KAAK,OAAO,OAAO,EAAE,MAAM,eAAe,OAAO,IAAI,CAAC;AAE5E,YAAM;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,IAAI;AAAA,QACf,OAAO,IAAI;AAAA,QACX,QAAQ,IAAI;AAAA,MACd;AAEA,iBAAW,QAAQ,IAAI,WAAW,CAAC,GAAG;AACpC,YAAI,KAAK,OAAO;AACd,gBAAM,EAAE,MAAM,SAAS,KAAK,KAAK,KAAK,OAAO,KAAK,MAAM;AAAA,QAC1D,OAAO;AACL,gBAAM,EAAE,MAAM,QAAQ,MAAM,KAAK;AAAA,QACnC;AACA,yBAAiB;AAAA,MACnB;AAEA,UACE,IAAI,WAAW,eACf,IAAI,WAAW,YACf,IAAI,WAAW,aACf;AACA,cAAM;AAAA,UACJ,MAAM;AAAA,UACN,WAAW,IAAI;AAAA,UACf,OAAO,IAAI;AAAA,UACX,QAAQ,IAAI;AAAA,QACd;AACA;AAAA,MACF;AAEA,YAAM,MAAM,QAAQ;AAAA,IACtB;AAEA,UAAM,IAAI;AAAA,MACR;AAAA,QACE,MAAM;AAAA,QACN,SAAS,OAAO,KAAK;AAAA,QACrB,SAAS,EAAE,WAAW,QAAQ;AAAA,MAChC;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,aAA+B;AACnC,UAAM,WAAW,MAAM,KAAK,QAAkC,OAAO,mBAAmB;AACxF,WAAO,SAAS;AAAA,EAClB;AAAA;AAAA,EAIA,MAAc,QAAW,QAAgB,MAAc,MAA4B;AACjF,UAAM,MAAM,KAAK,WAAW,MAAM,IAAI,OAAO,GAAG,KAAK,OAAO,GAAG,IAAI;AACnE,QAAI,YAA0B;AAE9B,aAAS,UAAU,GAAG,WAAW,KAAK,YAAY,WAAW;AAC3D,UAAI;AACF,cAAM,aAAa,IAAI,gBAAgB;AACvC,cAAM,YAAY,WAAW,MAAM,WAAW,MAAM,GAAG,KAAK,OAAO;AAEnE,cAAM,MAAM,MAAM,MAAM,KAAK;AAAA,UAC3B;AAAA,UACA,SAAS;AAAA,YACP,gBAAgB;AAAA,YAChB,aAAa,KAAK;AAAA,YAClB,GAAG,KAAK;AAAA,UACV;AAAA,UACA,MAAM,OAAO,KAAK,UAAU,IAAI,IAAI;AAAA,UACpC,QAAQ,WAAW;AAAA,QACrB,CAAC;AAED,qBAAa,SAAS;AAEtB,cAAM,YAAY,IAAI,QAAQ,IAAI,cAAc,KAAK;AACrD,cAAM,SAAU,MAAM,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI;AAMjD,YAAI,CAAC,IAAI,IAAI;AACX,cAAI,UAAU,WAAW,UAAU,OAAO,OAAO;AAC/C,kBAAM,MAAM,iBAAiB,OAAO,OAAO,IAAI,QAAQ,SAAS;AAGhE,gBAAI,IAAI,SAAS,OAAO,IAAI,WAAW,IAAK,OAAM;AAGlD,gBAAI,eAAe,oBAAoB,IAAI,mBAAmB;AAC5D,oBAAM,MAAM,IAAI,oBAAoB,GAAI;AAAA,YAC1C;AAEA,wBAAY;AAAA,UACd,OAAO;AACL,kBAAM,aAAa,IAAI;AAAA,cACrB;AAAA,gBACE,MAAM;AAAA,gBACN,SAAS,8BAA8B,IAAI,MAAM;AAAA,cACnD;AAAA,cACA,IAAI;AAAA,cACJ;AAAA,YACF;AACA,gBAAI,IAAI,SAAS,IAAK,OAAM;AAC5B,wBAAY;AAAA,UACd;AAAA,QACF,OAAO;AACL,iBAAO;AAAA,QACT;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,eAAe,gBAAgB;AACjC,cAAI,IAAI,aAAa,OAAO,IAAI,eAAe,IAAK,OAAM;AAC1D,sBAAY;AAAA,QACd,WAAW,eAAe,OAAO;AAC/B,cAAI,IAAI,SAAS,cAAc;AAC7B,wBAAY,IAAI;AAAA,cACd,EAAE,MAAM,kBAAkB,SAAS,oBAAoB;AAAA,cACvD;AAAA,YACF;AAAA,UACF,OAAO;AACL,wBAAY;AAAA,UACd;AAAA,QACF;AAAA,MACF;AAGA,UAAI,UAAU,KAAK,YAAY;AAC7B,cAAM,MAAM,KAAK,IAAI,GAAG,OAAO,IAAI,GAAI;AAAA,MACzC;AAAA,IACF;AAEA,UACE,aACA,IAAI,eAAe,EAAE,MAAM,kBAAkB,SAAS,iBAAiB,GAAG,GAAG;AAAA,EAEjF;AACF;AAMA,IAAM,cAAN,MAAkB;AAAA,EAChB,YAAoB,SAAoB;AAApB;AAAA,EAAqB;AAAA,EAArB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMpB,MAAM,OAAO,QAAoD;AAC/D,UAAM,WAAW,MAAM,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,MACA,UAAU,CAAC;AAAA,IACb;AACA,WAAO,SAAS;AAAA,EAClB;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,IAAI,WAAyC;AACjD,UAAM,WAAW,MAAM,KAAK;AAAA,MAC1B;AAAA,MACA,gBAAgB,SAAS;AAAA,IAC3B;AACA,WAAO,SAAS;AAAA,EAClB;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,KAAK,WAA+C;AACxD,UAAM,WAAW,MAAM,KAAK;AAAA,MAC1B;AAAA,MACA,gBAAgB,SAAS;AAAA,IAC3B;AACA,WAAO,SAAS;AAAA,EAClB;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,OAA+B;AACnC,UAAM,WAAW,MAAM,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AACA,WAAO,SAAS;AAAA,EAClB;AACF;AAEA,SAAS,MAAM,IAA2B;AACxC,SAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AACzD;","names":[]}
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reader SDK types. Shapes mirror the reader-api envelope contract.
|
|
3
|
+
*/
|
|
4
|
+
interface ReaderClientConfig {
|
|
5
|
+
/** API key (required) */
|
|
6
|
+
apiKey: string;
|
|
7
|
+
/** API base URL (default: https://api.reader.dev) */
|
|
8
|
+
baseUrl?: string;
|
|
9
|
+
/** Request timeout in ms (default: 60000) */
|
|
10
|
+
timeout?: number;
|
|
11
|
+
/** Max retries on transient failures (default: 2) */
|
|
12
|
+
maxRetries?: number;
|
|
13
|
+
/** Extra headers to include in every request (e.g. x-request-id for tracing) */
|
|
14
|
+
headers?: Record<string, string>;
|
|
15
|
+
}
|
|
16
|
+
/** Public proxy mode. `auto` picks standard first and escalates to stealth on block. */
|
|
17
|
+
type ProxyMode = "standard" | "stealth" | "auto";
|
|
18
|
+
interface ReadParams {
|
|
19
|
+
/** Single URL to scrape */
|
|
20
|
+
url?: string;
|
|
21
|
+
/** Multiple URLs for batch scraping */
|
|
22
|
+
urls?: string[];
|
|
23
|
+
/** Output formats (default: ["markdown"]) */
|
|
24
|
+
formats?: Array<"markdown" | "html">;
|
|
25
|
+
/** Extract main content only (default: true) */
|
|
26
|
+
onlyMainContent?: boolean;
|
|
27
|
+
/** CSS selectors to include */
|
|
28
|
+
includeTags?: string[];
|
|
29
|
+
/** CSS selectors to exclude */
|
|
30
|
+
excludeTags?: string[];
|
|
31
|
+
/** Wait for CSS selector before scraping */
|
|
32
|
+
waitForSelector?: string;
|
|
33
|
+
/** Per-URL timeout in ms (default: 30000) */
|
|
34
|
+
timeoutMs?: number;
|
|
35
|
+
/** Proxy mode: standard, stealth, or auto (default: auto) */
|
|
36
|
+
proxyMode?: ProxyMode;
|
|
37
|
+
/** Max crawl depth (triggers crawl mode) */
|
|
38
|
+
maxDepth?: number;
|
|
39
|
+
/** Max pages to crawl (triggers crawl mode) */
|
|
40
|
+
maxPages?: number;
|
|
41
|
+
/** Use cache (default: true) */
|
|
42
|
+
cache?: boolean;
|
|
43
|
+
/** Webhook for async job notifications */
|
|
44
|
+
webhook?: {
|
|
45
|
+
url: string;
|
|
46
|
+
events?: string[];
|
|
47
|
+
secret?: string;
|
|
48
|
+
};
|
|
49
|
+
/** Batch concurrency override */
|
|
50
|
+
batchConcurrency?: number;
|
|
51
|
+
}
|
|
52
|
+
interface ScrapeMetadata {
|
|
53
|
+
title?: string | null;
|
|
54
|
+
description?: string | null;
|
|
55
|
+
statusCode?: number;
|
|
56
|
+
duration: number;
|
|
57
|
+
cached: boolean;
|
|
58
|
+
/** Resolved proxy mode — `"standard"` or `"stealth"`. Omitted on cache hits. */
|
|
59
|
+
proxyMode?: "standard" | "stealth";
|
|
60
|
+
/** True if `auto` escalated from standard to stealth for this page. */
|
|
61
|
+
proxyEscalated?: boolean;
|
|
62
|
+
scrapedAt: string;
|
|
63
|
+
}
|
|
64
|
+
interface Page {
|
|
65
|
+
url: string;
|
|
66
|
+
markdown?: string;
|
|
67
|
+
html?: string;
|
|
68
|
+
statusCode?: number;
|
|
69
|
+
proxyMode?: "standard" | "stealth";
|
|
70
|
+
proxyEscalated?: boolean;
|
|
71
|
+
credits?: number;
|
|
72
|
+
metadata?: ScrapeMetadata | Record<string, unknown>;
|
|
73
|
+
error?: string;
|
|
74
|
+
}
|
|
75
|
+
/** Result of a synchronous scrape — single URL, returned immediately. */
|
|
76
|
+
interface ScrapeResult {
|
|
77
|
+
url: string;
|
|
78
|
+
/** Final URL after redirects (only present if different from `url`) */
|
|
79
|
+
finalUrl?: string;
|
|
80
|
+
markdown?: string;
|
|
81
|
+
html?: string;
|
|
82
|
+
metadata: ScrapeMetadata;
|
|
83
|
+
}
|
|
84
|
+
type JobStatus = "queued" | "processing" | "completed" | "failed" | "cancelled";
|
|
85
|
+
type JobMode = "scrape" | "batch" | "crawl";
|
|
86
|
+
/** Job as returned from GET /v1/jobs/:id (data portion of envelope). */
|
|
87
|
+
interface Job {
|
|
88
|
+
id: string;
|
|
89
|
+
status: JobStatus;
|
|
90
|
+
mode: JobMode;
|
|
91
|
+
completed: number;
|
|
92
|
+
total: number;
|
|
93
|
+
creditsUsed: number;
|
|
94
|
+
error: string | null;
|
|
95
|
+
/** Paginated page results. `waitForJob` auto-collects all pages across pages. */
|
|
96
|
+
results: Page[];
|
|
97
|
+
startedAt: string | null;
|
|
98
|
+
completedAt: string | null;
|
|
99
|
+
createdAt: string;
|
|
100
|
+
}
|
|
101
|
+
interface Pagination {
|
|
102
|
+
total: number;
|
|
103
|
+
skip: number;
|
|
104
|
+
limit: number;
|
|
105
|
+
hasMore: boolean;
|
|
106
|
+
next?: string;
|
|
107
|
+
}
|
|
108
|
+
/** Return type of `client.read(...)`. Discriminated by `kind`. */
|
|
109
|
+
type ReadResult = {
|
|
110
|
+
kind: "scrape";
|
|
111
|
+
data: ScrapeResult;
|
|
112
|
+
} | {
|
|
113
|
+
kind: "job";
|
|
114
|
+
data: Job;
|
|
115
|
+
};
|
|
116
|
+
interface Credits {
|
|
117
|
+
balance: number;
|
|
118
|
+
limit: number;
|
|
119
|
+
used: number;
|
|
120
|
+
tier: "free" | "pro" | "business" | "enterprise" | string;
|
|
121
|
+
resetAt: string;
|
|
122
|
+
}
|
|
123
|
+
interface UsageEntry {
|
|
124
|
+
id: string;
|
|
125
|
+
url: string;
|
|
126
|
+
duration: number;
|
|
127
|
+
status: "success" | "error";
|
|
128
|
+
cached: boolean;
|
|
129
|
+
proxyMode: "standard" | "stealth" | null;
|
|
130
|
+
credits: number;
|
|
131
|
+
error: string | null;
|
|
132
|
+
createdAt: string;
|
|
133
|
+
}
|
|
134
|
+
type StreamEvent = {
|
|
135
|
+
type: "progress";
|
|
136
|
+
completed: number;
|
|
137
|
+
total: number;
|
|
138
|
+
status: JobStatus;
|
|
139
|
+
} | {
|
|
140
|
+
type: "page";
|
|
141
|
+
data: Page;
|
|
142
|
+
} | {
|
|
143
|
+
type: "error";
|
|
144
|
+
url: string;
|
|
145
|
+
error: string;
|
|
146
|
+
} | {
|
|
147
|
+
type: "done";
|
|
148
|
+
completed: number;
|
|
149
|
+
total: number;
|
|
150
|
+
status: JobStatus;
|
|
151
|
+
};
|
|
152
|
+
interface SuccessEnvelope<T> {
|
|
153
|
+
success: true;
|
|
154
|
+
data: T;
|
|
155
|
+
}
|
|
156
|
+
interface PaginatedEnvelope<T> {
|
|
157
|
+
success: true;
|
|
158
|
+
data: T[];
|
|
159
|
+
pagination: Pagination;
|
|
160
|
+
}
|
|
161
|
+
interface ErrorEnvelope {
|
|
162
|
+
success: false;
|
|
163
|
+
error: {
|
|
164
|
+
code: string;
|
|
165
|
+
message: string;
|
|
166
|
+
details?: Record<string, unknown>;
|
|
167
|
+
docsUrl?: string;
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
type ApiEnvelope<T> = SuccessEnvelope<T> | ErrorEnvelope;
|
|
171
|
+
type SessionStatus = "active" | "stopped" | "expired";
|
|
172
|
+
interface SessionInfo {
|
|
173
|
+
sessionId: string;
|
|
174
|
+
wsEndpoint: string;
|
|
175
|
+
token: string;
|
|
176
|
+
status: SessionStatus;
|
|
177
|
+
createdAt: string;
|
|
178
|
+
expiresAt: string;
|
|
179
|
+
}
|
|
180
|
+
interface CreateSessionParams {
|
|
181
|
+
/** Max session lifetime in ms (default: 3600000 = 60 min) */
|
|
182
|
+
maxDurationMs?: number;
|
|
183
|
+
}
|
|
184
|
+
interface StopSessionResult {
|
|
185
|
+
sessionId: string;
|
|
186
|
+
status: "stopped";
|
|
187
|
+
durationMs: number;
|
|
188
|
+
creditsCharged: number;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Reader SDK Client
|
|
193
|
+
*
|
|
194
|
+
* @example
|
|
195
|
+
* import { ReaderClient } from "@vakra-dev/reader-js";
|
|
196
|
+
*
|
|
197
|
+
* const client = new ReaderClient({ apiKey: "rdr_your_key" });
|
|
198
|
+
*
|
|
199
|
+
* // Synchronous scrape (single URL)
|
|
200
|
+
* const result = await client.read({ url: "https://example.com" });
|
|
201
|
+
* if (result.kind === "scrape") {
|
|
202
|
+
* console.log(result.data.markdown);
|
|
203
|
+
* }
|
|
204
|
+
*
|
|
205
|
+
* // Batch (returns a completed Job with all results collected)
|
|
206
|
+
* const batch = await client.read({ urls: ["url1", "url2"] });
|
|
207
|
+
* if (batch.kind === "job") {
|
|
208
|
+
* for (const page of batch.data.results) {
|
|
209
|
+
* console.log(page.url, page.markdown?.length);
|
|
210
|
+
* }
|
|
211
|
+
* }
|
|
212
|
+
*/
|
|
213
|
+
|
|
214
|
+
declare class ReaderClient {
|
|
215
|
+
private apiKey;
|
|
216
|
+
private baseUrl;
|
|
217
|
+
private timeout;
|
|
218
|
+
private maxRetries;
|
|
219
|
+
private extraHeaders;
|
|
220
|
+
private _sessions;
|
|
221
|
+
constructor(config: ReaderClientConfig);
|
|
222
|
+
/**
|
|
223
|
+
* Browser sessions API.
|
|
224
|
+
*
|
|
225
|
+
* @example
|
|
226
|
+
* ```typescript
|
|
227
|
+
* const session = await client.sessions.create();
|
|
228
|
+
* const browser = await chromium.connectOverCDP(session.wsEndpoint);
|
|
229
|
+
* // ... use Playwright ...
|
|
230
|
+
* await client.sessions.stop(session.sessionId);
|
|
231
|
+
* ```
|
|
232
|
+
*/
|
|
233
|
+
get sessions(): SessionsAPI;
|
|
234
|
+
/**
|
|
235
|
+
* Read (scrape, batch, or crawl) one or more URLs.
|
|
236
|
+
*
|
|
237
|
+
* - Single URL → sync scrape, returns immediately with `{ kind: "scrape", data }`
|
|
238
|
+
* - Multiple URLs or URL + maxDepth/maxPages → async job; this method polls
|
|
239
|
+
* until the job terminates and returns `{ kind: "job", data }`.
|
|
240
|
+
*/
|
|
241
|
+
read(params: ReadParams): Promise<ReadResult>;
|
|
242
|
+
/**
|
|
243
|
+
* Get job status and a single page of results.
|
|
244
|
+
*/
|
|
245
|
+
getJob(jobId: string, opts?: {
|
|
246
|
+
skip?: number;
|
|
247
|
+
limit?: number;
|
|
248
|
+
}): Promise<{
|
|
249
|
+
job: Job;
|
|
250
|
+
hasMore: boolean;
|
|
251
|
+
next?: string;
|
|
252
|
+
}>;
|
|
253
|
+
/**
|
|
254
|
+
* Fetch all job result pages by following pagination.
|
|
255
|
+
*/
|
|
256
|
+
getAllJobResults(jobId: string): Promise<Page[]>;
|
|
257
|
+
/**
|
|
258
|
+
* Cancel a job. Throws `ConflictError` if the job is already terminal.
|
|
259
|
+
*/
|
|
260
|
+
cancelJob(jobId: string): Promise<void>;
|
|
261
|
+
/**
|
|
262
|
+
* Retry the failed URLs in a job. Throws `InvalidRequestError` if no
|
|
263
|
+
* failed URLs exist.
|
|
264
|
+
*/
|
|
265
|
+
retryJob(jobId: string): Promise<{
|
|
266
|
+
id: string;
|
|
267
|
+
status: string;
|
|
268
|
+
retrying: number;
|
|
269
|
+
}>;
|
|
270
|
+
/**
|
|
271
|
+
* Poll a job until it completes, fails, or is cancelled. Collects all
|
|
272
|
+
* paginated results when complete.
|
|
273
|
+
*/
|
|
274
|
+
waitForJob(jobId: string, options?: {
|
|
275
|
+
pollInterval?: number;
|
|
276
|
+
timeout?: number;
|
|
277
|
+
}): Promise<Job>;
|
|
278
|
+
/**
|
|
279
|
+
* Stream job results as they arrive via polling.
|
|
280
|
+
*
|
|
281
|
+
* @example
|
|
282
|
+
* for await (const event of client.stream(jobId)) {
|
|
283
|
+
* if (event.type === "page") console.log(event.data.url);
|
|
284
|
+
* if (event.type === "done") break;
|
|
285
|
+
* }
|
|
286
|
+
*/
|
|
287
|
+
stream(jobId: string, options?: {
|
|
288
|
+
pollInterval?: number;
|
|
289
|
+
timeout?: number;
|
|
290
|
+
}): AsyncGenerator<StreamEvent>;
|
|
291
|
+
/**
|
|
292
|
+
* Get the current credit balance for this workspace.
|
|
293
|
+
*/
|
|
294
|
+
getCredits(): Promise<Credits>;
|
|
295
|
+
private request;
|
|
296
|
+
}
|
|
297
|
+
type RequestFn = <T>(method: string, path: string, body?: unknown) => Promise<T>;
|
|
298
|
+
declare class SessionsAPI {
|
|
299
|
+
private request;
|
|
300
|
+
constructor(request: RequestFn);
|
|
301
|
+
/**
|
|
302
|
+
* Create a browser session. Returns a CDP WebSocket URL for
|
|
303
|
+
* Playwright/Puppeteer connection.
|
|
304
|
+
*/
|
|
305
|
+
create(params?: CreateSessionParams): Promise<SessionInfo>;
|
|
306
|
+
/**
|
|
307
|
+
* Get session status.
|
|
308
|
+
*/
|
|
309
|
+
get(sessionId: string): Promise<SessionInfo>;
|
|
310
|
+
/**
|
|
311
|
+
* Stop a browser session.
|
|
312
|
+
*/
|
|
313
|
+
stop(sessionId: string): Promise<StopSessionResult>;
|
|
314
|
+
/**
|
|
315
|
+
* List active sessions.
|
|
316
|
+
*/
|
|
317
|
+
list(): Promise<SessionInfo[]>;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Typed error classes mirroring the reader-api error code catalog.
|
|
322
|
+
*
|
|
323
|
+
* The API returns a stable `code` field on every error response. The SDK
|
|
324
|
+
* branches on that code and throws a specific subclass, so callers can
|
|
325
|
+
* write:
|
|
326
|
+
*
|
|
327
|
+
* try {
|
|
328
|
+
* await client.read({ url });
|
|
329
|
+
* } catch (err) {
|
|
330
|
+
* if (err instanceof InsufficientCreditsError) {
|
|
331
|
+
* // err.required, err.available, err.resetAt
|
|
332
|
+
* }
|
|
333
|
+
* }
|
|
334
|
+
*
|
|
335
|
+
* There is one subclass per code in the catalog. Unknown codes fall through
|
|
336
|
+
* to the base `ReaderApiError`.
|
|
337
|
+
*/
|
|
338
|
+
type ReaderErrorCode = "invalid_request" | "unauthenticated" | "insufficient_credits" | "url_blocked" | "not_found" | "conflict" | "rate_limited" | "concurrency_limited" | "internal_error" | "upstream_unavailable" | "scrape_timeout";
|
|
339
|
+
interface ApiErrorBody {
|
|
340
|
+
code: ReaderErrorCode | string;
|
|
341
|
+
message: string;
|
|
342
|
+
details?: Record<string, unknown>;
|
|
343
|
+
docsUrl?: string;
|
|
344
|
+
}
|
|
345
|
+
declare class ReaderApiError extends Error {
|
|
346
|
+
readonly code: string;
|
|
347
|
+
readonly httpStatus: number;
|
|
348
|
+
readonly details?: Record<string, unknown>;
|
|
349
|
+
readonly docsUrl?: string;
|
|
350
|
+
readonly requestId?: string;
|
|
351
|
+
constructor(body: ApiErrorBody, httpStatus: number, requestId?: string);
|
|
352
|
+
}
|
|
353
|
+
declare class InvalidRequestError extends ReaderApiError {
|
|
354
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
355
|
+
}
|
|
356
|
+
declare class UnauthenticatedError extends ReaderApiError {
|
|
357
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
358
|
+
}
|
|
359
|
+
declare class InsufficientCreditsError extends ReaderApiError {
|
|
360
|
+
readonly required?: number;
|
|
361
|
+
readonly available?: number;
|
|
362
|
+
readonly resetAt?: string;
|
|
363
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
364
|
+
}
|
|
365
|
+
declare class UrlBlockedError extends ReaderApiError {
|
|
366
|
+
readonly url?: string;
|
|
367
|
+
readonly reason?: string;
|
|
368
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
369
|
+
}
|
|
370
|
+
declare class NotFoundError extends ReaderApiError {
|
|
371
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
372
|
+
}
|
|
373
|
+
declare class ConflictError extends ReaderApiError {
|
|
374
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
375
|
+
}
|
|
376
|
+
declare class RateLimitedError extends ReaderApiError {
|
|
377
|
+
readonly retryAfterSeconds?: number;
|
|
378
|
+
readonly limit?: number;
|
|
379
|
+
readonly windowSeconds?: number;
|
|
380
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
381
|
+
}
|
|
382
|
+
declare class ConcurrencyLimitedError extends ReaderApiError {
|
|
383
|
+
readonly active?: number;
|
|
384
|
+
readonly max?: number;
|
|
385
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
386
|
+
}
|
|
387
|
+
declare class InternalServerError extends ReaderApiError {
|
|
388
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
389
|
+
}
|
|
390
|
+
declare class UpstreamUnavailableError extends ReaderApiError {
|
|
391
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
392
|
+
}
|
|
393
|
+
declare class ScrapeTimeoutError extends ReaderApiError {
|
|
394
|
+
readonly timeoutMs?: number;
|
|
395
|
+
constructor(body: ApiErrorBody, status: number, requestId?: string);
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Construct the right error subclass from an error response body.
|
|
399
|
+
* Unknown codes fall through to the base class.
|
|
400
|
+
*/
|
|
401
|
+
declare function toReaderApiError(body: ApiErrorBody, httpStatus: number, requestId?: string): ReaderApiError;
|
|
402
|
+
|
|
403
|
+
export { type ApiEnvelope, type ApiErrorBody, ConcurrencyLimitedError, ConflictError, type CreateSessionParams, type Credits, type ErrorEnvelope, InsufficientCreditsError, InternalServerError, InvalidRequestError, type Job, type JobMode, type JobStatus, NotFoundError, type Page, type PaginatedEnvelope, type Pagination, type ProxyMode, RateLimitedError, type ReadParams, type ReadResult, ReaderApiError, ReaderClient, type ReaderClientConfig, type ReaderErrorCode, type ScrapeMetadata, type ScrapeResult, ScrapeTimeoutError, type SessionInfo, type SessionStatus, type StopSessionResult, type StreamEvent, type SuccessEnvelope, UnauthenticatedError, UpstreamUnavailableError, UrlBlockedError, type UsageEntry, toReaderApiError };
|