@duyquangnvx/webnovel-downloader 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,862 @@
1
+ import { z, ZodType } from 'zod';
2
+ import { CheerioAPI } from 'cheerio';
3
+
4
+ declare const UrlSchema: z.ZodBranded<z.ZodString, "Url">;
5
+ /**
6
+ * An absolute URL string, branded after validation so an unchecked string
7
+ * cannot be passed where a real URL is required. Library output is already
8
+ * branded; to brand untrusted input, use `parseUrl` from `data-model/parse.ts`.
9
+ */
10
+ type Url = z.infer<typeof UrlSchema>;
11
+ declare const ChapterIndexSchema: z.ZodBranded<z.ZodNumber, "ChapterIndex">;
12
+ /**
13
+ * A chapter's position in the table of contents, branded as a non-negative
14
+ * integer. Indices are **0-based**: chapter 1 is index 0. Build one with
15
+ * {@link chapterIndex}.
16
+ */
17
+ type ChapterIndex = z.infer<typeof ChapterIndexSchema>;
18
+ /**
19
+ * Build a validated ChapterIndex from a plain number. Throws if `input` is not
20
+ * a non-negative integer. This is the public way to construct the bounds for
21
+ * `DownloadOptions.chapterRange` (indices are 0-based: chapter 1 is index 0).
22
+ */
23
+ declare function chapterIndex(input: number): ChapterIndex;
24
+ declare const AdapterIdSchema: z.ZodBranded<z.ZodString, "AdapterId">;
25
+ /** Stable identifier for a site adapter (e.g. `"truyenfull"`), branded as a non-empty string. */
26
+ type AdapterId = z.infer<typeof AdapterIdSchema>;
27
+ declare const IsoDateSchema: z.ZodBranded<z.ZodString, "IsoDate">;
28
+ /** An ISO-8601 datetime string (e.g. `"2026-06-17T09:30:00.000Z"`), branded after validation. */
29
+ type IsoDate = z.infer<typeof IsoDateSchema>;
30
+ declare const ResumeTokenSchema: z.ZodBranded<z.ZodString, "ResumeToken">;
31
+ /**
32
+ * Opaque token returned on a partial download (see {@link DownloadPartial}).
33
+ * Pass it back via `DownloadOptions.resume` to continue where the run left off.
34
+ * Branded as a non-empty string.
35
+ */
36
+ type ResumeToken = z.infer<typeof ResumeTokenSchema>;
37
+
38
+ declare const NovelStatusSchema: z.ZodEnum<["ongoing", "completed", "hiatus", "unknown"]>;
39
+ declare const NovelMetadataSchema: z.ZodReadonly<z.ZodObject<{
40
+ sourceUrl: z.ZodBranded<z.ZodString, "Url">;
41
+ sourceSite: z.ZodBranded<z.ZodString, "AdapterId">;
42
+ title: z.ZodString;
43
+ author: z.ZodString;
44
+ description: z.ZodString;
45
+ coverUrl: z.ZodOptional<z.ZodBranded<z.ZodString, "Url">>;
46
+ genres: z.ZodReadonly<z.ZodArray<z.ZodString, "many">>;
47
+ status: z.ZodEnum<["ongoing", "completed", "hiatus", "unknown"]>;
48
+ totalChapters: z.ZodOptional<z.ZodNumber>;
49
+ fetchedAt: z.ZodDate;
50
+ }, "strip", z.ZodTypeAny, {
51
+ status: "ongoing" | "completed" | "hiatus" | "unknown";
52
+ sourceUrl: string & z.BRAND<"Url">;
53
+ sourceSite: string & z.BRAND<"AdapterId">;
54
+ title: string;
55
+ author: string;
56
+ description: string;
57
+ genres: readonly string[];
58
+ fetchedAt: Date;
59
+ coverUrl?: (string & z.BRAND<"Url">) | undefined;
60
+ totalChapters?: number | undefined;
61
+ }, {
62
+ status: "ongoing" | "completed" | "hiatus" | "unknown";
63
+ sourceUrl: string;
64
+ sourceSite: string;
65
+ title: string;
66
+ author: string;
67
+ description: string;
68
+ genres: readonly string[];
69
+ fetchedAt: Date;
70
+ coverUrl?: string | undefined;
71
+ totalChapters?: number | undefined;
72
+ }>>;
73
+ declare const ChapterRefSchema: z.ZodReadonly<z.ZodObject<{
74
+ index: z.ZodBranded<z.ZodNumber, "ChapterIndex">;
75
+ title: z.ZodString;
76
+ url: z.ZodBranded<z.ZodString, "Url">;
77
+ volume: z.ZodOptional<z.ZodString>;
78
+ }, "strip", z.ZodTypeAny, {
79
+ title: string;
80
+ index: number & z.BRAND<"ChapterIndex">;
81
+ url: string & z.BRAND<"Url">;
82
+ volume?: string | undefined;
83
+ }, {
84
+ title: string;
85
+ index: number;
86
+ url: string;
87
+ volume?: string | undefined;
88
+ }>>;
89
+ declare const ChapterSchema: z.ZodReadonly<z.ZodObject<{
90
+ index: z.ZodBranded<z.ZodNumber, "ChapterIndex">;
91
+ title: z.ZodString;
92
+ url: z.ZodBranded<z.ZodString, "Url">;
93
+ volume: z.ZodOptional<z.ZodString>;
94
+ content: z.ZodString;
95
+ fetchedAt: z.ZodDate;
96
+ wordCount: z.ZodNumber;
97
+ }, "strip", z.ZodTypeAny, {
98
+ title: string;
99
+ fetchedAt: Date;
100
+ index: number & z.BRAND<"ChapterIndex">;
101
+ url: string & z.BRAND<"Url">;
102
+ content: string;
103
+ wordCount: number;
104
+ volume?: string | undefined;
105
+ }, {
106
+ title: string;
107
+ fetchedAt: Date;
108
+ index: number;
109
+ url: string;
110
+ content: string;
111
+ wordCount: number;
112
+ volume?: string | undefined;
113
+ }>>;
114
+ declare const NovelDataSchema: z.ZodReadonly<z.ZodObject<{
115
+ metadata: z.ZodReadonly<z.ZodObject<{
116
+ sourceUrl: z.ZodBranded<z.ZodString, "Url">;
117
+ sourceSite: z.ZodBranded<z.ZodString, "AdapterId">;
118
+ title: z.ZodString;
119
+ author: z.ZodString;
120
+ description: z.ZodString;
121
+ coverUrl: z.ZodOptional<z.ZodBranded<z.ZodString, "Url">>;
122
+ genres: z.ZodReadonly<z.ZodArray<z.ZodString, "many">>;
123
+ status: z.ZodEnum<["ongoing", "completed", "hiatus", "unknown"]>;
124
+ totalChapters: z.ZodOptional<z.ZodNumber>;
125
+ fetchedAt: z.ZodDate;
126
+ }, "strip", z.ZodTypeAny, {
127
+ status: "ongoing" | "completed" | "hiatus" | "unknown";
128
+ sourceUrl: string & z.BRAND<"Url">;
129
+ sourceSite: string & z.BRAND<"AdapterId">;
130
+ title: string;
131
+ author: string;
132
+ description: string;
133
+ genres: readonly string[];
134
+ fetchedAt: Date;
135
+ coverUrl?: (string & z.BRAND<"Url">) | undefined;
136
+ totalChapters?: number | undefined;
137
+ }, {
138
+ status: "ongoing" | "completed" | "hiatus" | "unknown";
139
+ sourceUrl: string;
140
+ sourceSite: string;
141
+ title: string;
142
+ author: string;
143
+ description: string;
144
+ genres: readonly string[];
145
+ fetchedAt: Date;
146
+ coverUrl?: string | undefined;
147
+ totalChapters?: number | undefined;
148
+ }>>;
149
+ chapters: z.ZodReadonly<z.ZodArray<z.ZodReadonly<z.ZodObject<{
150
+ index: z.ZodBranded<z.ZodNumber, "ChapterIndex">;
151
+ title: z.ZodString;
152
+ url: z.ZodBranded<z.ZodString, "Url">;
153
+ volume: z.ZodOptional<z.ZodString>;
154
+ content: z.ZodString;
155
+ fetchedAt: z.ZodDate;
156
+ wordCount: z.ZodNumber;
157
+ }, "strip", z.ZodTypeAny, {
158
+ title: string;
159
+ fetchedAt: Date;
160
+ index: number & z.BRAND<"ChapterIndex">;
161
+ url: string & z.BRAND<"Url">;
162
+ content: string;
163
+ wordCount: number;
164
+ volume?: string | undefined;
165
+ }, {
166
+ title: string;
167
+ fetchedAt: Date;
168
+ index: number;
169
+ url: string;
170
+ content: string;
171
+ wordCount: number;
172
+ volume?: string | undefined;
173
+ }>>, "many">>;
174
+ }, "strip", z.ZodTypeAny, {
175
+ metadata: Readonly<{
176
+ status: "ongoing" | "completed" | "hiatus" | "unknown";
177
+ sourceUrl: string & z.BRAND<"Url">;
178
+ sourceSite: string & z.BRAND<"AdapterId">;
179
+ title: string;
180
+ author: string;
181
+ description: string;
182
+ genres: readonly string[];
183
+ fetchedAt: Date;
184
+ coverUrl?: (string & z.BRAND<"Url">) | undefined;
185
+ totalChapters?: number | undefined;
186
+ }>;
187
+ chapters: readonly Readonly<{
188
+ title: string;
189
+ fetchedAt: Date;
190
+ index: number & z.BRAND<"ChapterIndex">;
191
+ url: string & z.BRAND<"Url">;
192
+ content: string;
193
+ wordCount: number;
194
+ volume?: string | undefined;
195
+ }>[];
196
+ }, {
197
+ metadata: Readonly<{
198
+ status: "ongoing" | "completed" | "hiatus" | "unknown";
199
+ sourceUrl: string;
200
+ sourceSite: string;
201
+ title: string;
202
+ author: string;
203
+ description: string;
204
+ genres: readonly string[];
205
+ fetchedAt: Date;
206
+ coverUrl?: string | undefined;
207
+ totalChapters?: number | undefined;
208
+ }>;
209
+ chapters: readonly Readonly<{
210
+ title: string;
211
+ fetchedAt: Date;
212
+ index: number;
213
+ url: string;
214
+ content: string;
215
+ wordCount: number;
216
+ volume?: string | undefined;
217
+ }>[];
218
+ }>>;
219
+
220
+ /** Publication state of a novel: `"ongoing" | "completed" | "hiatus" | "unknown"`. */
221
+ type NovelStatus = z.infer<typeof NovelStatusSchema>;
222
+ /**
223
+ * Novel-level metadata (title, author, cover, genres, status, …) without any
224
+ * chapter bodies. `totalChapters` is the source's reported count and may be
225
+ * absent if the site doesn't expose one — never inferred from fetched chapters.
226
+ */
227
+ type NovelMetadata = z.infer<typeof NovelMetadataSchema>;
228
+ /**
229
+ * A lightweight pointer to a chapter (index, title, URL, optional volume)
230
+ * produced while walking the table of contents — before the body is fetched.
231
+ * See {@link Chapter} for the fetched form.
232
+ */
233
+ type ChapterRef = z.infer<typeof ChapterRefSchema>;
234
+ /**
235
+ * A fully fetched chapter: a {@link ChapterRef} plus normalized `content`
236
+ * (plain text, paragraphs separated by blank lines) and `wordCount`.
237
+ */
238
+ type Chapter = z.infer<typeof ChapterSchema>;
239
+ /**
240
+ * The complete result of a download: {@link NovelMetadata} plus the fetched
241
+ * {@link Chapter}s, sorted by source index. On a partial download the array
242
+ * has gaps at the indices that permanently failed (indices are never
243
+ * renumbered).
244
+ */
245
+ type NovelData = z.infer<typeof NovelDataSchema>;
246
+
247
+ /**
248
+ * Base class for every error this library raises or returns. Each subclass
249
+ * carries a stable, machine-readable `code` (e.g. `"ADAPTER_NOT_FOUND"`) so
250
+ * callers can branch on `err.code` instead of `instanceof`.
251
+ */
252
+ declare abstract class DownloadError extends Error {
253
+ abstract readonly code: string;
254
+ readonly cause?: unknown;
255
+ constructor(message: string, options?: {
256
+ cause?: unknown;
257
+ });
258
+ }
259
+ /**
260
+ * No registered adapter can handle the given URL (`code: "ADAPTER_NOT_FOUND"`).
261
+ * `download()` returns this in its `{ status: "error" }` envelope;
262
+ * `fetchMetadata()` throws it.
263
+ */
264
+ declare class AdapterNotFoundError extends DownloadError {
265
+ readonly url: string;
266
+ readonly code: "ADAPTER_NOT_FOUND";
267
+ constructor(url: string, options?: {
268
+ cause?: unknown;
269
+ });
270
+ }
271
+ /** A non-retryable HTTP error response (`code: "HTTP_ERROR"`); carries `status` and `url`. */
272
+ declare class HttpError extends DownloadError {
273
+ readonly status: number;
274
+ readonly url: Url;
275
+ readonly code: "HTTP_ERROR";
276
+ constructor(status: number, url: Url, message?: string, options?: {
277
+ cause?: unknown;
278
+ });
279
+ }
280
+ /** The host signalled rate limiting, e.g. HTTP 429 (`code: "RATE_LIMITED"`); `retryAfterMs` if known. */
281
+ declare class RateLimitedError extends DownloadError {
282
+ readonly url: Url;
283
+ readonly retryAfterMs?: number | undefined;
284
+ readonly code: "RATE_LIMITED";
285
+ constructor(url: Url, retryAfterMs?: number | undefined, options?: {
286
+ cause?: unknown;
287
+ });
288
+ }
289
+ /**
290
+ * Malformed input or an unparseable/invalid response (`code: "PARSE_ERROR"`) —
291
+ * e.g. a bad URL, an out-of-range `chapterRange` bound, or adapter output that
292
+ * fails schema validation. Carries optional `url` and `snippet` for debugging.
293
+ */
294
+ declare class ParseError extends DownloadError {
295
+ readonly code: "PARSE_ERROR";
296
+ readonly url: Url | undefined;
297
+ readonly snippet: string | undefined;
298
+ constructor(message: string, options?: {
299
+ cause?: unknown;
300
+ url?: Url;
301
+ snippet?: string;
302
+ });
303
+ }
304
+ /**
305
+ * A single chapter could not be fetched (`code: "CHAPTER_FETCH_FAILED"`);
306
+ * carries the failing `ref`. Surfaces in {@link DownloadPartial}'s `failures`.
307
+ */
308
+ declare class ChapterFetchError extends DownloadError {
309
+ readonly ref: ChapterRef;
310
+ readonly code: "CHAPTER_FETCH_FAILED";
311
+ constructor(ref: ChapterRef, options?: {
312
+ cause?: unknown;
313
+ });
314
+ }
315
+ /** A request exceeded its timeout (`code: "TIMEOUT"`). */
316
+ declare class TimeoutError extends DownloadError {
317
+ readonly url: Url;
318
+ readonly code: "TIMEOUT";
319
+ constructor(url: Url, options?: {
320
+ cause?: unknown;
321
+ });
322
+ }
323
+ /**
324
+ * The operation was aborted via an `AbortSignal` (`code: "CANCELLED"`).
325
+ * `download()` **throws** this rather than returning an error envelope.
326
+ */
327
+ declare class CancelledError extends DownloadError {
328
+ readonly code: "CANCELLED";
329
+ constructor(options?: {
330
+ cause?: unknown;
331
+ });
332
+ }
333
+ /**
334
+ * A browser-tier request was required but neither `patchright` nor `playwright`
335
+ * is installed (`code: "BROWSER_MODULE_NOT_INSTALLED"`). The message lists the
336
+ * install commands and the `http-only` opt-out.
337
+ */
338
+ declare class BrowserModuleNotInstalledError extends DownloadError {
339
+ readonly code: "BROWSER_MODULE_NOT_INSTALLED";
340
+ constructor(options?: {
341
+ cause?: unknown;
342
+ });
343
+ }
344
+ /**
345
+ * Suggested remedy on a {@link ChallengeUnresolvedError}:
346
+ * `"manual-solve"` (launch a headed browser and solve once) or
347
+ * `"transport-config"` (raise the navigation timeout / use a stronger transport).
348
+ */
349
+ type ChallengeUnresolvedHint = "manual-solve" | "transport-config";
350
+ /**
351
+ * A Cloudflare challenge could not be cleared within the navigation timeout
352
+ * (`code: "CHALLENGE_UNRESOLVED"`). `hint` indicates how to recover; the message
353
+ * spells out the concrete fix.
354
+ */
355
+ declare class ChallengeUnresolvedError extends DownloadError {
356
+ readonly url: Url;
357
+ readonly hint: ChallengeUnresolvedHint;
358
+ readonly code: "CHALLENGE_UNRESOLVED";
359
+ constructor(url: Url, hint: ChallengeUnresolvedHint, options?: {
360
+ cause?: unknown;
361
+ });
362
+ }
363
+
364
+ /** One chapter that could not be fetched, with the error and the attempt count. */
365
+ interface ChapterFailure {
366
+ readonly ref: ChapterRef;
367
+ readonly error: DownloadError;
368
+ readonly attempts: number;
369
+ }
370
+ /** Every chapter in range was fetched. */
371
+ interface DownloadSuccess {
372
+ readonly status: "success";
373
+ readonly data: NovelData;
374
+ }
375
+ /**
376
+ * The novel was fetched but some chapters permanently failed. `data` holds the
377
+ * chapters that succeeded (with gaps at the failed indices); `failures` lists
378
+ * the rest.
379
+ */
380
+ interface DownloadPartial {
381
+ readonly status: "partial";
382
+ readonly data: NovelData;
383
+ readonly failures: readonly ChapterFailure[];
384
+ /**
385
+ * Present only when the caller passed `options.resume`. Without resume
386
+ * enabled there is no state file to point at, so no token can be issued.
387
+ * Pass it back via `options.resume` to retry just the failed chapters.
388
+ */
389
+ readonly resumeToken?: ResumeToken;
390
+ }
391
+ /**
392
+ * The download could not produce a novel (bad/unknown URL, metadata or TOC
393
+ * failure). Note: an aborted download throws `CancelledError` rather than
394
+ * returning this.
395
+ */
396
+ interface DownloadFailed {
397
+ readonly status: "error";
398
+ readonly error: DownloadError;
399
+ }
400
+ /**
401
+ * The outcome of `Downloader.download()`. Discriminate on `status`:
402
+ * `"success"`, `"partial"`, or `"error"`.
403
+ */
404
+ type DownloadResult = DownloadSuccess | DownloadPartial | DownloadFailed;
405
+
406
+ /**
407
+ * Lifecycle events delivered to `DownloadOptions.onEvent`. Discriminate on
408
+ * `type`. `progress` is throttled to ~1/sec (plus a guaranteed final emit) and
409
+ * `toc:progress` is deduped on `discovered`; the rest fire as they occur.
410
+ * `rate-limit:wait` only fires for the built-in transport (not a custom
411
+ * `HttpClient`), and carries the `host` so callers can attribute the wait.
412
+ */
413
+ type DownloadEvent = {
414
+ type: "metadata:fetched";
415
+ metadata: NovelMetadata;
416
+ } | {
417
+ type: "toc:progress";
418
+ discovered: number;
419
+ } | {
420
+ type: "toc:complete";
421
+ total: number;
422
+ } | {
423
+ type: "chapter:start";
424
+ ref: ChapterRef;
425
+ } | {
426
+ type: "chapter:success";
427
+ chapter: Chapter;
428
+ } | {
429
+ type: "chapter:failed";
430
+ ref: ChapterRef;
431
+ error: DownloadError;
432
+ } | {
433
+ type: "progress";
434
+ completed: number;
435
+ total: number;
436
+ } | {
437
+ type: "cache:hit";
438
+ url: Url;
439
+ } | {
440
+ type: "rate-limit:wait";
441
+ host: string;
442
+ waitMs: number;
443
+ };
444
+
445
+ /** Per-request options passed to {@link HttpClient.get}. */
446
+ interface HttpRequestOptions {
447
+ readonly headers?: Readonly<Record<string, string>>;
448
+ readonly signal?: AbortSignal;
449
+ /** Request timeout in ms. */
450
+ readonly timeoutMs?: number;
451
+ }
452
+ /** A fetched response: status, headers, the decoded `body` string, and the final `url` (post-redirect). */
453
+ interface HttpResponse {
454
+ readonly status: number;
455
+ readonly headers: Readonly<Record<string, string>>;
456
+ readonly body: string;
457
+ readonly url: Url;
458
+ }
459
+ /**
460
+ * The transport contract. Implement this to plug a custom transport (e.g. a
461
+ * SaaS render service) into a `Downloader` via its `http` option, bypassing the
462
+ * built-in tiered client — you then own retries, caching, and challenge
463
+ * handling. See `examples/custom-transport.ts`.
464
+ */
465
+ interface HttpClient {
466
+ get(url: Url, opts?: HttpRequestOptions): Promise<HttpResponse>;
467
+ }
468
+ /** Pino log levels, plus `"silent"` to disable logging entirely. */
469
+ type LogLevel = "trace" | "debug" | "info" | "warn" | "error" | "silent";
470
+ /**
471
+ * Minimal structured-logging interface (a pino-compatible subset). Pass your
472
+ * own to `createDownloader({ logger })`, or build one with {@link createLogger}.
473
+ */
474
+ interface Logger {
475
+ trace(obj: object, msg?: string): void;
476
+ trace(msg: string): void;
477
+ debug(obj: object, msg?: string): void;
478
+ debug(msg: string): void;
479
+ info(obj: object, msg?: string): void;
480
+ info(msg: string): void;
481
+ warn(obj: object, msg?: string): void;
482
+ warn(msg: string): void;
483
+ error(obj: object, msg?: string): void;
484
+ error(msg: string): void;
485
+ child(bindings: object): Logger;
486
+ }
487
+
488
+ interface AdapterContext$1 {
489
+ readonly http: HttpClient;
490
+ readonly logger: Logger;
491
+ readonly signal: AbortSignal;
492
+ readonly novelMetadata?: NovelMetadata;
493
+ }
494
+ interface SiteAdapter$1 {
495
+ readonly id: AdapterId;
496
+ readonly displayName: string;
497
+ readonly hostnames: readonly string[];
498
+ readonly preferredTransport?: "http" | "browser";
499
+ canHandle(url: string): boolean;
500
+ fetchMetadata(url: Url, ctx: AdapterContext$1): Promise<NovelMetadata>;
501
+ fetchChapterList(url: Url, ctx: AdapterContext$1): AsyncIterable<ChapterRef>;
502
+ fetchChapter(ref: ChapterRef, ctx: AdapterContext$1): Promise<Chapter>;
503
+ }
504
+
505
+ /** Per-host request throttling, applied across every `download()` on a `Downloader`. */
506
+ interface RateLimitOptions {
507
+ /** Sustained requests per second per host. Default: 2. */
508
+ readonly requestsPerSecond: number;
509
+ /** Token-bucket capacity (max burst before throttling). Default: `requestsPerSecond`. */
510
+ readonly burst?: number;
511
+ }
512
+ /** Automatic retry policy for transient HTTP failures. */
513
+ interface RetryOptions {
514
+ /** Max retry attempts after the first try. Default: 3. */
515
+ readonly retries: number;
516
+ /** Delay growth between attempts. Default: `"exponential"`. */
517
+ readonly backoff: "exponential" | "linear" | "fixed";
518
+ /** Floor for the backoff delay, in ms. Default: 500. */
519
+ readonly minDelayMs?: number;
520
+ /** Ceiling for the backoff delay, in ms. Default: 30000. */
521
+ readonly maxDelayMs?: number;
522
+ /** HTTP status codes that trigger a retry. Default: `[408, 429, 500, 502, 503, 504]`. */
523
+ readonly retryOn?: readonly number[];
524
+ }
525
+ /** On-disk HTTP response cache (ETag/304 revalidation). */
526
+ interface CacheOptions {
527
+ /** Directory for cached responses. */
528
+ readonly dir: string;
529
+ /** Max age before a cached entry is considered stale, in ms. Default: 7 days. */
530
+ readonly maxAgeMs?: number;
531
+ }
532
+ /**
533
+ * How to resume an interrupted download:
534
+ * - `true` — use a default state file keyed by URL under the user cache dir.
535
+ * - `{ stateFile }` — use an explicit state file path.
536
+ * - `{ token }` — resume from a {@link ResumeToken} issued by a prior partial run.
537
+ */
538
+ type ResumeOptions = true | {
539
+ readonly stateFile: string;
540
+ } | {
541
+ readonly token: ResumeToken;
542
+ };
543
+ /** Per-call options for `Downloader.download()` / `fetchMetadata()`. */
544
+ interface DownloadOptions {
545
+ /** Concurrent chapter fetches. Default: 4. */
546
+ readonly concurrency?: number;
547
+ /** Enable the on-disk HTTP response cache. */
548
+ readonly cache?: CacheOptions;
549
+ /** Enable resumable state so an interrupted run can continue. */
550
+ readonly resume?: ResumeOptions;
551
+ /** Chapter body format. Only `"text"` is supported. */
552
+ readonly contentFormat?: "text";
553
+ /**
554
+ * Limit the download to a contiguous slice. Bounds are **0-based and
555
+ * inclusive** (chapter 1 is index 0), so `{ from: 0, to: 9 }` is the first 10
556
+ * chapters. A non-integer or negative bound resolves to `{ status: "error" }`.
557
+ */
558
+ readonly chapterRange?: {
559
+ from?: number;
560
+ to?: number;
561
+ };
562
+ /** Abort the download. When it fires, `download()` rejects with `CancelledError`. */
563
+ readonly signal?: AbortSignal;
564
+ /** Progress/lifecycle callback; receives every {@link DownloadEvent}. */
565
+ readonly onEvent?: (event: DownloadEvent) => void;
566
+ /** Force a specific adapter instead of resolving one from the URL. */
567
+ readonly adapter?: SiteAdapter$1;
568
+ }
569
+ /**
570
+ * Transport selection:
571
+ * - `"auto"` — undici first, escalate to a real browser on a Cloudflare challenge.
572
+ * - `"http-only"` — never launch a browser; browser-only adapters throw.
573
+ * - `"browser-required"` — route every request through the browser.
574
+ */
575
+ type TransportMode = "auto" | "http-only" | "browser-required";
576
+ /** Tuning for the headless-browser transport tier. */
577
+ interface TransportBrowserOptions {
578
+ /** Launch a visible window (e.g. to solve a Cloudflare challenge by hand). Default: false. */
579
+ readonly headed?: boolean;
580
+ /** Idle time before the shared browser is closed, in ms. Default: 60000. */
581
+ readonly idleTimeoutMs?: number;
582
+ /** Path to a specific Chromium binary instead of the bundled one. */
583
+ readonly executablePath?: string;
584
+ /** Per-navigation timeout for browser page loads, in ms. */
585
+ readonly navigationTimeoutMs?: number;
586
+ }
587
+ /** Full transport configuration: a {@link TransportMode} plus optional browser tuning. */
588
+ interface TransportConfig {
589
+ readonly mode: TransportMode;
590
+ readonly browserOptions?: TransportBrowserOptions;
591
+ }
592
+
593
+ /**
594
+ * Build a pino-backed {@link Logger} at the given level (default `"silent"`).
595
+ * Pass it to `createDownloader({ logger })`, or use the `logLevel` shortcut on
596
+ * `createDownloader` to skip constructing one.
597
+ */
598
+ declare function createLogger(opts?: {
599
+ level?: LogLevel;
600
+ }): Logger;
601
+
602
+ interface ChromiumNamespace {
603
+ launch(opts: BrowserLaunchOptions): Promise<RuntimeBrowser>;
604
+ }
605
+ interface BrowserLaunchOptions {
606
+ readonly headless?: boolean;
607
+ readonly executablePath?: string;
608
+ }
609
+ interface RuntimeBrowser {
610
+ newContext(opts?: {
611
+ userAgent?: string;
612
+ }): Promise<RuntimeContext>;
613
+ close(): Promise<void>;
614
+ isConnected(): boolean;
615
+ }
616
+ interface RuntimeContext {
617
+ newPage(): Promise<RuntimePage>;
618
+ cookies(urls?: readonly string[]): Promise<readonly RuntimeCookie[]>;
619
+ close(): Promise<void>;
620
+ }
621
+ interface RuntimePage {
622
+ goto(url: string, opts?: {
623
+ timeout?: number;
624
+ waitUntil?: "domcontentloaded" | "load";
625
+ signal?: AbortSignal;
626
+ }): Promise<RuntimeResponse | null>;
627
+ content(): Promise<string>;
628
+ on(event: "response", handler: (res: RuntimeResponse) => void): void;
629
+ off(event: "response", handler: (res: RuntimeResponse) => void): void;
630
+ title(): Promise<string>;
631
+ evaluate(script: string): Promise<unknown>;
632
+ /**
633
+ * Register a script that runs in the page's main world before any other
634
+ * scripts on every navigation. Required to bridge into the main world from
635
+ * patchright's isolated `evaluate()` context — adapters install a relay
636
+ * that listens for DOM events from isolated-world `evaluate()` and invokes
637
+ * page-defined functions (e.g. `loadBookIndex`).
638
+ */
639
+ addInitScript(script: string): Promise<void>;
640
+ close(): Promise<void>;
641
+ }
642
+ interface RuntimeResponse {
643
+ url(): string;
644
+ status(): number;
645
+ headers(): Record<string, string>;
646
+ text(): Promise<string>;
647
+ json(): Promise<unknown>;
648
+ }
649
+ interface RuntimeCookie {
650
+ readonly name: string;
651
+ readonly value: string;
652
+ readonly domain?: string;
653
+ readonly path?: string;
654
+ }
655
+ interface BrowserModule {
656
+ readonly name: "patchright" | "playwright";
657
+ readonly chromium: ChromiumNamespace;
658
+ }
659
+ /** Adapter-facing interface exposed via AdapterContext.browser. */
660
+ interface BrowserClient {
661
+ navigate(url: Url, signal: AbortSignal, opts?: NavigateOptions): Promise<BrowserPage>;
662
+ }
663
+ interface NavigateOptions {
664
+ /**
665
+ * A JS expression installed via `Page.addInitScript` before the first
666
+ * navigation. Runs in the page's main world (where the page's globals are
667
+ * defined), giving adapters a way to bridge from patchright's isolated
668
+ * `evaluate()` context into main-world functions.
669
+ */
670
+ readonly initScript?: string;
671
+ }
672
+ interface WaitForResponseTextOpts {
673
+ readonly timeoutMs?: number;
674
+ readonly signal?: AbortSignal;
675
+ readonly parse: "text";
676
+ /**
677
+ * Ignore already-buffered responses and wait strictly for a new one. Set
678
+ * when the adapter is about to trigger a fresh XHR and must not match the
679
+ * previous identical request still cached in the buffer.
680
+ */
681
+ readonly skipBuffer?: boolean;
682
+ }
683
+ interface WaitForResponseJsonOpts<T> {
684
+ readonly timeoutMs?: number;
685
+ readonly signal?: AbortSignal;
686
+ readonly parse: "json";
687
+ readonly schema: ZodType<T>;
688
+ readonly skipBuffer?: boolean;
689
+ }
690
+ interface BrowserPage {
691
+ waitForResponse(pattern: RegExp, opts: WaitForResponseTextOpts): Promise<{
692
+ url: Url;
693
+ status: number;
694
+ body: string;
695
+ }>;
696
+ waitForResponse<T>(pattern: RegExp, opts: WaitForResponseJsonOpts<T>): Promise<{
697
+ url: Url;
698
+ status: number;
699
+ body: T;
700
+ }>;
701
+ content(): Promise<string>;
702
+ /**
703
+ * Evaluate a JS expression in the page context. With patchright the script
704
+ * runs in an isolated world: DOM is shared but the page's window globals
705
+ * are not visible. To trigger main-world code, install a relay via
706
+ * `BrowserClient.navigate({ initScript })` and dispatch a DOM event from
707
+ * here — DOM events bridge across the world boundary.
708
+ */
709
+ evaluate(script: string): Promise<unknown>;
710
+ close(): Promise<void>;
711
+ }
712
+
713
+ /**
714
+ * Runtime services handed to every adapter method: the {@link HttpClient}, a
715
+ * logger, the abort signal, an optional browser client (present only for
716
+ * browser-tier adapters), and — during chapter fetches — the already-fetched
717
+ * novel metadata.
718
+ */
719
+ interface AdapterContext extends AdapterContext$1 {
720
+ readonly browser?: BrowserClient;
721
+ }
722
+ /**
723
+ * The contract every site adapter implements. Register custom adapters via
724
+ * `createDownloader({ adapters })` or the `Downloader` constructor. Extending
725
+ * {@link BaseAdapter} covers `canHandle` and the HTML/JSON helpers for you.
726
+ * See `docs/adapter-spec.md`.
727
+ */
728
+ interface SiteAdapter {
729
+ /** Stable adapter id, also stamped onto `NovelMetadata.sourceSite`. */
730
+ readonly id: AdapterId;
731
+ /** Human-readable site name. */
732
+ readonly displayName: string;
733
+ /** Hostnames this adapter claims (used by the default `canHandle`). */
734
+ readonly hostnames: readonly string[];
735
+ /**
736
+ * Optional transport hint. Adapters that require a real browser (e.g. for
737
+ * XHR-rendered TOCs or Cloudflare-protected pages) set this to `"browser"`.
738
+ * Omitting the field (or setting it to `"http"`) selects the default HTTP
739
+ * transport.
740
+ */
741
+ readonly preferredTransport?: "http" | "browser";
742
+ /** Whether this adapter handles `url` (typically a hostname match). */
743
+ canHandle(url: string): boolean;
744
+ /** Fetch novel-level metadata. Throw a {@link DownloadError} subclass on failure. */
745
+ fetchMetadata(url: Url, ctx: AdapterContext): Promise<NovelMetadata>;
746
+ /**
747
+ * Yield chapter references for the whole table of contents. Refs MUST be
748
+ * yielded in ascending `index` order — the pipeline short-circuits a
749
+ * `chapterRange` once it passes the upper bound and relies on this ordering.
750
+ */
751
+ fetchChapterList(url: Url, ctx: AdapterContext): AsyncIterable<ChapterRef>;
752
+ /** Fetch and normalize one chapter body. */
753
+ fetchChapter(ref: ChapterRef, ctx: AdapterContext): Promise<Chapter>;
754
+ }
755
+ /**
756
+ * Convenience base for adapters: implements `canHandle` (hostname match) and
757
+ * provides protected `loadHtml` / `fetchJson` helpers. Extend it and implement
758
+ * the three `fetch*` methods.
759
+ */
760
+ declare abstract class BaseAdapter implements SiteAdapter {
761
+ abstract readonly id: AdapterId;
762
+ abstract readonly displayName: string;
763
+ abstract readonly hostnames: readonly string[];
764
+ readonly preferredTransport?: "http" | "browser";
765
+ abstract fetchMetadata(url: Url, ctx: AdapterContext): Promise<NovelMetadata>;
766
+ abstract fetchChapterList(url: Url, ctx: AdapterContext): AsyncIterable<ChapterRef>;
767
+ abstract fetchChapter(ref: ChapterRef, ctx: AdapterContext): Promise<Chapter>;
768
+ canHandle(url: string): boolean;
769
+ protected loadHtml(url: Url, ctx: AdapterContext): Promise<CheerioAPI>;
770
+ protected fetchJson(url: Url, ctx: AdapterContext): Promise<unknown>;
771
+ }
772
+
773
+ /**
774
+ * Orchestrates a download: resolves a site adapter from the URL, fetches
775
+ * metadata + the table of contents, then fetches chapters concurrently with
776
+ * shared rate-limiting, retries, optional caching, and resumable state.
777
+ *
778
+ * Most callers use the shared `downloader` singleton or `createDownloader()`
779
+ * rather than constructing this directly. An instance owns a per-host transport
780
+ * (and may hold a browser pool), so call {@link Downloader.dispose} when done.
781
+ */
782
+ declare class Downloader {
783
+ #private;
784
+ /**
785
+ * Build a downloader from a set of adapters. `rateLimit`, `retry`, and
786
+ * `transport` configure the shared per-host transport (set once here, not
787
+ * per call). Provide `http` to replace the entire transport stack with a
788
+ * custom {@link HttpClient}. Prefer `createDownloader()` for the built-in
789
+ * adapters; use this constructor for a custom adapter set or transport.
790
+ */
791
+ constructor(opts: {
792
+ adapters: readonly SiteAdapter[];
793
+ http?: HttpClient;
794
+ logger?: Logger;
795
+ transport?: TransportConfig | TransportMode;
796
+ rateLimit?: RateLimitOptions;
797
+ retry?: RetryOptions;
798
+ /** @internal — test seam for browser module loading */
799
+ browserModuleLoader?: () => Promise<BrowserModule>;
800
+ /** @internal — test seam: replaces the real UndiciHttpClient leaf */
801
+ undiciOverride?: HttpClient;
802
+ logLevel?: LogLevel;
803
+ });
804
+ /** Release held resources (closes the browser pool, if any). Safe to call once when done. */
805
+ dispose(): Promise<void>;
806
+ /** True when a built-in/registered adapter can handle this URL. */
807
+ canHandle(url: string): boolean;
808
+ /** Plain, serializable list of the sites this instance can download. */
809
+ supportedSites(): readonly {
810
+ id: AdapterId;
811
+ displayName: string;
812
+ hostnames: readonly string[];
813
+ }[];
814
+ /** @internal — for tests only. Returns a ready ctx and a release fn. */
815
+ _ctxFor(adapter: SiteAdapter, signal: AbortSignal): Promise<{
816
+ ctx: AdapterContext;
817
+ release: () => Promise<void>;
818
+ }>;
819
+ /**
820
+ * Low-level single-stage metadata fetch. Unlike `download()`, this returns a
821
+ * bare value and therefore **throws** on failure (`AdapterNotFoundError`,
822
+ * `ParseError`, or any `DownloadError`). Use `download()` if you want the
823
+ * `{ status: "error" }` envelope instead of exceptions.
824
+ */
825
+ fetchMetadata(url: string, options?: DownloadOptions): Promise<NovelMetadata>;
826
+ /**
827
+ * Download a novel end to end. Returns a {@link DownloadResult} envelope —
828
+ * `"success"`, `"partial"` (some chapters failed; carries a `resumeToken`
829
+ * when `options.resume` is set), or `"error"` (including an unsupported or
830
+ * malformed URL). The only thrown error is `CancelledError`, when
831
+ * `options.signal` aborts. Use `options.onEvent` for progress.
832
+ */
833
+ download(url: string, options?: DownloadOptions): Promise<DownloadResult>;
834
+ }
835
+
836
+ /** Fresh instances of every built-in site adapter. */
837
+ declare function builtinAdapters(): SiteAdapter[];
838
+ /** Options for {@link createDownloader}. All fields are optional. */
839
+ interface CreateDownloaderOptions {
840
+ /** Transport mode/config. Default: `{ mode: "auto" }` (headless). */
841
+ readonly transport?: TransportConfig | TransportMode;
842
+ /** Replace the entire transport stack with a custom {@link HttpClient}. */
843
+ readonly http?: HttpClient;
844
+ readonly logger?: Logger;
845
+ /** Convenience: build a pino logger at this level when no `logger` is given. */
846
+ readonly logLevel?: LogLevel;
847
+ /** Adapters to register. Defaults to every built-in adapter. */
848
+ readonly adapters?: readonly SiteAdapter[];
849
+ readonly rateLimit?: RateLimitOptions;
850
+ readonly retry?: RetryOptions;
851
+ }
852
+ /**
853
+ * Build a Downloader wired with the built-in site adapters. Unlike the shared
854
+ * `downloader` singleton, this lets you set the transport (e.g. a headed
855
+ * browser for a manual Cloudflare solve), a custom HttpClient, or a logger.
856
+ * Call `dispose()` when done — the instance may hold a browser pool.
857
+ */
858
+ declare function createDownloader(opts?: CreateDownloaderOptions): Downloader;
859
+ /** Shared singleton: every built-in adapter, default (auto) transport. */
860
+ declare const downloader: Downloader;
861
+
862
+ export { type AdapterContext, type AdapterId, AdapterNotFoundError, BaseAdapter, BrowserModuleNotInstalledError, type CacheOptions, CancelledError, ChallengeUnresolvedError, type ChallengeUnresolvedHint, type Chapter, type ChapterFailure, ChapterFetchError, type ChapterIndex, type ChapterRef, type CreateDownloaderOptions, DownloadError, type DownloadEvent, type DownloadFailed, type DownloadOptions, type DownloadPartial, type DownloadResult, type DownloadSuccess, Downloader, type HttpClient, HttpError, type IsoDate, type LogLevel, type Logger, type NovelData, type NovelMetadata, type NovelStatus, ParseError, type RateLimitOptions, RateLimitedError, type ResumeOptions, type ResumeToken, type RetryOptions, type SiteAdapter, TimeoutError, type TransportBrowserOptions, type TransportConfig, type TransportMode, type Url, builtinAdapters, chapterIndex, createDownloader, createLogger, downloader };