@duyquangnvx/webnovel-downloader 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +306 -241
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +302 -237
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -551,7 +551,10 @@ var EventBus = class {
|
|
|
551
551
|
// src/http/client.ts
|
|
552
552
|
var import_undici = require("undici");
|
|
553
553
|
init_errors();
|
|
554
|
+
init_primitives();
|
|
554
555
|
var DEFAULT_TIMEOUT_MS = 3e4;
|
|
556
|
+
var MAX_REDIRECTS = 5;
|
|
557
|
+
var REDIRECT_STATUSES = /* @__PURE__ */ new Set([301, 302, 303, 307, 308]);
|
|
555
558
|
var UndiciHttpClient = class {
|
|
556
559
|
#logger;
|
|
557
560
|
#dispatcher;
|
|
@@ -566,23 +569,28 @@ var UndiciHttpClient = class {
|
|
|
566
569
|
const half = Math.max(1, Math.floor(timeoutMs / 2));
|
|
567
570
|
let res;
|
|
568
571
|
try {
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
572
|
+
let currentUrl = url;
|
|
573
|
+
for (let hop = 0; ; hop++) {
|
|
574
|
+
res = await (0, import_undici.request)(currentUrl, {
|
|
575
|
+
method: "GET",
|
|
576
|
+
dispatcher: this.#dispatcher,
|
|
577
|
+
...opts?.headers !== void 0 ? { headers: opts.headers } : {},
|
|
578
|
+
...opts?.signal !== void 0 ? { signal: opts.signal } : {},
|
|
579
|
+
headersTimeout: half,
|
|
580
|
+
bodyTimeout: half
|
|
581
|
+
});
|
|
582
|
+
const next = hop < MAX_REDIRECTS ? redirectTarget(res.statusCode, res.headers, currentUrl) : void 0;
|
|
583
|
+
if (next === void 0) break;
|
|
584
|
+
await res.body.dump();
|
|
585
|
+
currentUrl = next;
|
|
586
|
+
}
|
|
578
587
|
const body = await res.body.text();
|
|
579
588
|
const headers = normalizeHeaders(res.headers);
|
|
580
|
-
const finalUrl = url;
|
|
581
589
|
return {
|
|
582
590
|
status: res.statusCode,
|
|
583
591
|
headers,
|
|
584
592
|
body,
|
|
585
|
-
url:
|
|
593
|
+
url: currentUrl
|
|
586
594
|
};
|
|
587
595
|
} catch (cause) {
|
|
588
596
|
if (res !== void 0) await res.body.dump().catch(() => {
|
|
@@ -603,6 +611,17 @@ var UndiciHttpClient = class {
|
|
|
603
611
|
if (this.#ownsDispatcher) await this.#dispatcher.close();
|
|
604
612
|
}
|
|
605
613
|
};
|
|
614
|
+
function redirectTarget(status, headers, currentUrl) {
|
|
615
|
+
if (!REDIRECT_STATUSES.has(status)) return void 0;
|
|
616
|
+
const raw = headers["location"];
|
|
617
|
+
const location = Array.isArray(raw) ? raw[0] : raw;
|
|
618
|
+
if (location === void 0 || location === "") return void 0;
|
|
619
|
+
try {
|
|
620
|
+
return unsafeBrandUrl(new URL(location, currentUrl).href);
|
|
621
|
+
} catch {
|
|
622
|
+
return void 0;
|
|
623
|
+
}
|
|
624
|
+
}
|
|
606
625
|
function normalizeHeaders(h) {
|
|
607
626
|
const out = {};
|
|
608
627
|
for (const [k, v] of Object.entries(h)) {
|
|
@@ -899,31 +918,6 @@ function clamp(n, lo, hi) {
|
|
|
899
918
|
// src/http/index.ts
|
|
900
919
|
init_cookie_jar();
|
|
901
920
|
|
|
902
|
-
// src/http/stack-builder.ts
|
|
903
|
-
var HttpStack = class _HttpStack {
|
|
904
|
-
#client;
|
|
905
|
-
constructor(client) {
|
|
906
|
-
this.#client = client;
|
|
907
|
-
}
|
|
908
|
-
static from(leaf) {
|
|
909
|
-
return new _HttpStack(leaf);
|
|
910
|
-
}
|
|
911
|
-
rotateUserAgent(uas, defaultHeaders, cookieJar) {
|
|
912
|
-
return new _HttpStack(
|
|
913
|
-
new UserAgentRotatingHttpClient(this.#client, uas, defaultHeaders, cookieJar)
|
|
914
|
-
);
|
|
915
|
-
}
|
|
916
|
-
rateLimit(limiter, onEvent) {
|
|
917
|
-
return new _HttpStack(new RateLimitedHttpClient(this.#client, limiter, onEvent));
|
|
918
|
-
}
|
|
919
|
-
retry(opts, logger, limiter, onEvent) {
|
|
920
|
-
return new _HttpStack(new RetryingHttpClient(this.#client, opts, logger, limiter, onEvent));
|
|
921
|
-
}
|
|
922
|
-
build() {
|
|
923
|
-
return this.#client;
|
|
924
|
-
}
|
|
925
|
-
};
|
|
926
|
-
|
|
927
921
|
// src/http/tiered.ts
|
|
928
922
|
init_cf_challenge();
|
|
929
923
|
var FALLBACK_UA = DEFAULT_UA_POOL[0] ?? "Mozilla/5.0";
|
|
@@ -989,7 +983,20 @@ function buildHttpClient(opts = {}) {
|
|
|
989
983
|
...opts.dispatcher !== void 0 ? { dispatcher: opts.dispatcher } : {}
|
|
990
984
|
});
|
|
991
985
|
const limiter = opts.rateLimiter ?? new RateLimiter(opts.rateLimit ?? DEFAULT_RATE_LIMIT);
|
|
992
|
-
const
|
|
986
|
+
const withUserAgent = new UserAgentRotatingHttpClient(
|
|
987
|
+
leaf,
|
|
988
|
+
opts.userAgents ?? new UserAgents(),
|
|
989
|
+
opts.defaultHeaders,
|
|
990
|
+
cookieJar
|
|
991
|
+
);
|
|
992
|
+
const withRateLimit = new RateLimitedHttpClient(withUserAgent, limiter, opts.onEvent);
|
|
993
|
+
const httpBranch = new RetryingHttpClient(
|
|
994
|
+
withRateLimit,
|
|
995
|
+
normalizeRetry(opts.retry),
|
|
996
|
+
logger,
|
|
997
|
+
limiter,
|
|
998
|
+
opts.onEvent
|
|
999
|
+
);
|
|
993
1000
|
const { mode } = normalizeTransport(opts.transport);
|
|
994
1001
|
if (mode === "http-only") {
|
|
995
1002
|
return httpBranch;
|
|
@@ -1501,8 +1508,78 @@ var Debouncer = class {
|
|
|
1501
1508
|
}
|
|
1502
1509
|
};
|
|
1503
1510
|
|
|
1504
|
-
// src/core/
|
|
1505
|
-
|
|
1511
|
+
// src/core/run-ledger.ts
|
|
1512
|
+
var RunLedger = class _RunLedger {
|
|
1513
|
+
#successes = [];
|
|
1514
|
+
#failures = [];
|
|
1515
|
+
#completedSet = /* @__PURE__ */ new Set();
|
|
1516
|
+
#completedList = [];
|
|
1517
|
+
#failedMap = /* @__PURE__ */ new Map();
|
|
1518
|
+
#enqueuedSet = /* @__PURE__ */ new Set();
|
|
1519
|
+
constructor() {
|
|
1520
|
+
}
|
|
1521
|
+
static seed(state, priorChapters) {
|
|
1522
|
+
const ledger = new _RunLedger();
|
|
1523
|
+
ledger.#successes.push(...priorChapters);
|
|
1524
|
+
for (const idx of state?.completed ?? []) ledger.#markCompleted(idx);
|
|
1525
|
+
for (const chapter of priorChapters) ledger.#markCompleted(chapter.index);
|
|
1526
|
+
for (const f of state?.failed ?? []) {
|
|
1527
|
+
if (!ledger.#completedSet.has(f.index)) ledger.#failedMap.set(f.index, f);
|
|
1528
|
+
}
|
|
1529
|
+
return ledger;
|
|
1530
|
+
}
|
|
1531
|
+
#markCompleted(index) {
|
|
1532
|
+
if (this.#completedSet.has(index)) return;
|
|
1533
|
+
this.#completedSet.add(index);
|
|
1534
|
+
this.#completedList.push(index);
|
|
1535
|
+
}
|
|
1536
|
+
/** Check-and-set enqueue dedup: false if the index is completed or already enqueued. */
|
|
1537
|
+
markEnqueued(index) {
|
|
1538
|
+
if (this.#completedSet.has(index) || this.#enqueuedSet.has(index)) return false;
|
|
1539
|
+
this.#enqueuedSet.add(index);
|
|
1540
|
+
return true;
|
|
1541
|
+
}
|
|
1542
|
+
recordSuccess(chapter) {
|
|
1543
|
+
this.#successes.push(chapter);
|
|
1544
|
+
this.#markCompleted(chapter.index);
|
|
1545
|
+
this.#failedMap.delete(chapter.index);
|
|
1546
|
+
}
|
|
1547
|
+
recordFailure(ref, error) {
|
|
1548
|
+
this.#failures.push({ ref, error, attempts: 1 });
|
|
1549
|
+
this.#failedMap.set(ref.index, {
|
|
1550
|
+
index: ref.index,
|
|
1551
|
+
lastError: { code: error.code, message: error.message }
|
|
1552
|
+
});
|
|
1553
|
+
}
|
|
1554
|
+
dropOrphanFailure(index) {
|
|
1555
|
+
this.#failedMap.delete(index);
|
|
1556
|
+
}
|
|
1557
|
+
/** The ONLY projection to the persisted shape; nothing else writes state.completed/failed. */
|
|
1558
|
+
applyTo(state) {
|
|
1559
|
+
return {
|
|
1560
|
+
...state,
|
|
1561
|
+
completed: [...this.#completedList].sort((a, b) => Number(a) - Number(b)),
|
|
1562
|
+
failed: [...this.#failedMap.values()]
|
|
1563
|
+
};
|
|
1564
|
+
}
|
|
1565
|
+
successes() {
|
|
1566
|
+
return this.#successes;
|
|
1567
|
+
}
|
|
1568
|
+
failures() {
|
|
1569
|
+
return this.#failures;
|
|
1570
|
+
}
|
|
1571
|
+
/** Snapshot — safe to mutate the ledger while iterating the result. */
|
|
1572
|
+
failedRecords() {
|
|
1573
|
+
return [...this.#failedMap.values()];
|
|
1574
|
+
}
|
|
1575
|
+
/** Fetched-chapter count (including prior-run chapters) — drives the progress event. */
|
|
1576
|
+
get completedCount() {
|
|
1577
|
+
return this.#successes.length;
|
|
1578
|
+
}
|
|
1579
|
+
get enqueuedCount() {
|
|
1580
|
+
return this.#enqueuedSet.size;
|
|
1581
|
+
}
|
|
1582
|
+
};
|
|
1506
1583
|
|
|
1507
1584
|
// src/pipeline/fetch-toc.ts
|
|
1508
1585
|
init_errors();
|
|
@@ -1574,15 +1651,51 @@ function assembleNovelData(metadata, results) {
|
|
|
1574
1651
|
// src/core/download-run.ts
|
|
1575
1652
|
init_errors();
|
|
1576
1653
|
|
|
1654
|
+
// src/storage/resume-state.ts
|
|
1655
|
+
var import_zod4 = require("zod");
|
|
1656
|
+
init_primitives();
|
|
1657
|
+
var ResumeFailureRecordSchema = import_zod4.z.object({
|
|
1658
|
+
index: ChapterIndexSchema,
|
|
1659
|
+
lastError: import_zod4.z.object({ code: import_zod4.z.string(), message: import_zod4.z.string() }).readonly()
|
|
1660
|
+
}).readonly();
|
|
1661
|
+
var ResumeStateSchema = import_zod4.z.object({
|
|
1662
|
+
schemaVersion: import_zod4.z.literal(1),
|
|
1663
|
+
url: UrlSchema,
|
|
1664
|
+
adapterId: AdapterIdSchema,
|
|
1665
|
+
metadata: NovelMetadataSchema.nullable(),
|
|
1666
|
+
toc: import_zod4.z.array(ChapterRefSchema).readonly().nullable(),
|
|
1667
|
+
completed: import_zod4.z.array(ChapterIndexSchema).readonly(),
|
|
1668
|
+
failed: import_zod4.z.array(ResumeFailureRecordSchema).readonly(),
|
|
1669
|
+
startedAt: IsoDateSchema,
|
|
1670
|
+
updatedAt: IsoDateSchema
|
|
1671
|
+
}).readonly();
|
|
1672
|
+
function nowIso() {
|
|
1673
|
+
return unsafeBrandIsoDate((/* @__PURE__ */ new Date()).toISOString());
|
|
1674
|
+
}
|
|
1675
|
+
function freshState(url, adapterId) {
|
|
1676
|
+
const now = nowIso();
|
|
1677
|
+
return {
|
|
1678
|
+
schemaVersion: 1,
|
|
1679
|
+
url,
|
|
1680
|
+
adapterId,
|
|
1681
|
+
metadata: null,
|
|
1682
|
+
toc: null,
|
|
1683
|
+
completed: [],
|
|
1684
|
+
failed: [],
|
|
1685
|
+
startedAt: now,
|
|
1686
|
+
updatedAt: now
|
|
1687
|
+
};
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1577
1690
|
// src/storage/resume-token.ts
|
|
1578
1691
|
var import_node_path3 = require("path");
|
|
1579
|
-
var
|
|
1692
|
+
var import_zod5 = require("zod");
|
|
1580
1693
|
init_errors();
|
|
1581
1694
|
init_primitives();
|
|
1582
|
-
var TokenPayloadSchema =
|
|
1583
|
-
v:
|
|
1584
|
-
stateFile:
|
|
1585
|
-
urlHash:
|
|
1695
|
+
var TokenPayloadSchema = import_zod5.z.object({
|
|
1696
|
+
v: import_zod5.z.literal(1),
|
|
1697
|
+
stateFile: import_zod5.z.string().min(1),
|
|
1698
|
+
urlHash: import_zod5.z.string().min(1)
|
|
1586
1699
|
});
|
|
1587
1700
|
function encodeResumeToken(stateFile, url) {
|
|
1588
1701
|
const payload = { v: 1, stateFile, urlHash: sha1(url) };
|
|
@@ -1621,23 +1734,6 @@ function verifyResumeToken(token, url) {
|
|
|
1621
1734
|
// src/core/download-run.ts
|
|
1622
1735
|
var QUEUE_BACKPRESSURE_RATIO = 4;
|
|
1623
1736
|
var STATE_PERSIST_DEBOUNCE_MS = 500;
|
|
1624
|
-
function freshState(url, adapterId) {
|
|
1625
|
-
const now = nowIso();
|
|
1626
|
-
return {
|
|
1627
|
-
schemaVersion: 1,
|
|
1628
|
-
url,
|
|
1629
|
-
adapterId,
|
|
1630
|
-
metadata: null,
|
|
1631
|
-
toc: null,
|
|
1632
|
-
completed: [],
|
|
1633
|
-
failed: [],
|
|
1634
|
-
startedAt: now,
|
|
1635
|
-
updatedAt: now
|
|
1636
|
-
};
|
|
1637
|
-
}
|
|
1638
|
-
function nowIso() {
|
|
1639
|
-
return unsafeBrandIsoDate((/* @__PURE__ */ new Date()).toISOString());
|
|
1640
|
-
}
|
|
1641
1737
|
var DownloadRun = class {
|
|
1642
1738
|
#adapter;
|
|
1643
1739
|
#validated;
|
|
@@ -1649,13 +1745,7 @@ var DownloadRun = class {
|
|
|
1649
1745
|
#bus;
|
|
1650
1746
|
#logger;
|
|
1651
1747
|
#state = null;
|
|
1652
|
-
#
|
|
1653
|
-
#failures = [];
|
|
1654
|
-
#completedSet = /* @__PURE__ */ new Set();
|
|
1655
|
-
#completedList = [];
|
|
1656
|
-
#failedMap = /* @__PURE__ */ new Map();
|
|
1657
|
-
#enqueuedSet = /* @__PURE__ */ new Set();
|
|
1658
|
-
#aborted = false;
|
|
1748
|
+
#ledger = RunLedger.seed(null, []);
|
|
1659
1749
|
#queue;
|
|
1660
1750
|
#stateDebouncer;
|
|
1661
1751
|
constructor(args) {
|
|
@@ -1681,9 +1771,8 @@ var DownloadRun = class {
|
|
|
1681
1771
|
const resolved = await this.#resolveMetadata();
|
|
1682
1772
|
if ("error" in resolved) return resolved.error;
|
|
1683
1773
|
const meta = resolved.meta;
|
|
1684
|
-
const ctxWithMeta = this.#
|
|
1774
|
+
const ctxWithMeta = { ...this.#ctx, novelMetadata: meta };
|
|
1685
1775
|
const onAbort = () => {
|
|
1686
|
-
this.#aborted = true;
|
|
1687
1776
|
this.#queue.clear();
|
|
1688
1777
|
};
|
|
1689
1778
|
this.#signal.addEventListener("abort", onAbort, { once: true });
|
|
@@ -1698,40 +1787,23 @@ var DownloadRun = class {
|
|
|
1698
1787
|
}
|
|
1699
1788
|
async #initResume() {
|
|
1700
1789
|
const resumeCfg = this.#resumeStorage;
|
|
1701
|
-
if (resumeCfg)
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
if (state.adapterId !== this.#adapter.id) {
|
|
1709
|
-
throw new ParseError("Stale resume state: adapter mismatch");
|
|
1710
|
-
}
|
|
1711
|
-
} else {
|
|
1712
|
-
state = freshState(this.#validated, this.#adapter.id);
|
|
1713
|
-
}
|
|
1714
|
-
this.#state = state;
|
|
1715
|
-
} catch (err) {
|
|
1716
|
-
return { status: "error", error: normalizeToEnvelopeError(err) };
|
|
1717
|
-
}
|
|
1718
|
-
}
|
|
1719
|
-
if (resumeCfg && this.#state) {
|
|
1720
|
-
const prior = await resumeCfg.chapters.loadAll();
|
|
1721
|
-
this.#successes.push(...prior);
|
|
1722
|
-
const merged = new Set(this.#state.completed);
|
|
1723
|
-
for (const c of prior) merged.add(c.index);
|
|
1724
|
-
this.#state = {
|
|
1725
|
-
...this.#state,
|
|
1726
|
-
completed: [...merged].sort((a, b) => Number(a) - Number(b))
|
|
1727
|
-
};
|
|
1790
|
+
if (!resumeCfg) return null;
|
|
1791
|
+
try {
|
|
1792
|
+
const { state, priorChapters } = await resumeCfg.loadFor(this.#validated, this.#adapter.id);
|
|
1793
|
+
this.#state = state;
|
|
1794
|
+
this.#ledger = RunLedger.seed(state, priorChapters);
|
|
1795
|
+
} catch (err) {
|
|
1796
|
+
return { status: "error", error: normalizeToEnvelopeError(err) };
|
|
1728
1797
|
}
|
|
1729
1798
|
return null;
|
|
1730
1799
|
}
|
|
1800
|
+
#schedulePersist() {
|
|
1801
|
+
if (this.#state) this.#stateDebouncer.schedule();
|
|
1802
|
+
}
|
|
1731
1803
|
async #persistState() {
|
|
1732
1804
|
const resumeCfg = this.#resumeStorage;
|
|
1733
1805
|
if (!resumeCfg || !this.#state) return;
|
|
1734
|
-
const next = { ...this.#state, updatedAt: nowIso() };
|
|
1806
|
+
const next = { ...this.#ledger.applyTo(this.#state), updatedAt: nowIso() };
|
|
1735
1807
|
this.#state = next;
|
|
1736
1808
|
await resumeCfg.resume.save(resumeCfg.stateFile, next);
|
|
1737
1809
|
}
|
|
@@ -1752,69 +1824,35 @@ var DownloadRun = class {
|
|
|
1752
1824
|
return { error: { status: "error", error: normalizeToEnvelopeError(err) } };
|
|
1753
1825
|
}
|
|
1754
1826
|
}
|
|
1755
|
-
#startQueue(meta) {
|
|
1756
|
-
const state = this.#state;
|
|
1757
|
-
const completedSet = this.#completedSet;
|
|
1758
|
-
const completedList = this.#completedList;
|
|
1759
|
-
const failedMap = this.#failedMap;
|
|
1760
|
-
for (const idx of state?.completed ?? []) completedSet.add(idx);
|
|
1761
|
-
completedList.push(...state ? state.completed : []);
|
|
1762
|
-
if (state) {
|
|
1763
|
-
for (const f of state.failed) {
|
|
1764
|
-
if (!completedSet.has(f.index)) failedMap.set(f.index, f);
|
|
1765
|
-
}
|
|
1766
|
-
}
|
|
1767
|
-
return { ...this.#ctx, novelMetadata: meta };
|
|
1768
|
-
}
|
|
1769
1827
|
async #enqueueRef(ref, ctxWithMeta) {
|
|
1770
|
-
if (this.#
|
|
1771
|
-
if (this.#
|
|
1772
|
-
if (this.#enqueuedSet.has(ref.index)) return;
|
|
1773
|
-
this.#enqueuedSet.add(ref.index);
|
|
1828
|
+
if (this.#signal.aborted) return;
|
|
1829
|
+
if (!this.#ledger.markEnqueued(ref.index)) return;
|
|
1774
1830
|
const backpressureLimit = this.#concurrency * QUEUE_BACKPRESSURE_RATIO;
|
|
1775
1831
|
while (this.#queue.size >= backpressureLimit) {
|
|
1776
1832
|
await this.#queue.onSizeLessThan(backpressureLimit);
|
|
1777
|
-
if (this.#
|
|
1833
|
+
if (this.#signal.aborted) throw new CancelledError({ cause: this.#signal.reason });
|
|
1778
1834
|
}
|
|
1779
|
-
if (this.#aborted) throw new CancelledError({ cause: this.#signal.reason });
|
|
1780
1835
|
void this.#queue.add(() => this.#fetchChapter(ref, ctxWithMeta));
|
|
1781
1836
|
}
|
|
1782
1837
|
async #fetchChapter(ref, ctxWithMeta) {
|
|
1783
|
-
if (this.#
|
|
1838
|
+
if (this.#signal.aborted) return;
|
|
1784
1839
|
try {
|
|
1785
1840
|
const chapter = await fetchChapterTask(this.#adapter, ref, ctxWithMeta, this.#bus);
|
|
1786
|
-
if (this.#aborted) return;
|
|
1841
|
+
if (this.#signal.aborted) return;
|
|
1787
1842
|
if (this.#resumeStorage) await this.#resumeStorage.chapters.save(chapter);
|
|
1788
|
-
this.#
|
|
1789
|
-
this.#
|
|
1790
|
-
this.#completedSet.add(ref.index);
|
|
1791
|
-
this.#failedMap.delete(ref.index);
|
|
1792
|
-
if (this.#state) {
|
|
1793
|
-
this.#state = {
|
|
1794
|
-
...this.#state,
|
|
1795
|
-
completed: [...this.#completedList],
|
|
1796
|
-
failed: [...this.#failedMap.values()]
|
|
1797
|
-
};
|
|
1798
|
-
this.#stateDebouncer.schedule();
|
|
1799
|
-
}
|
|
1843
|
+
this.#ledger.recordSuccess(chapter);
|
|
1844
|
+
this.#schedulePersist();
|
|
1800
1845
|
this.#bus.emit({ type: "chapter:success", chapter });
|
|
1801
1846
|
this.#bus.emit({
|
|
1802
1847
|
type: "progress",
|
|
1803
|
-
completed: this.#
|
|
1804
|
-
total: ctxWithMeta.novelMetadata?.totalChapters ?? this.#
|
|
1848
|
+
completed: this.#ledger.completedCount,
|
|
1849
|
+
total: ctxWithMeta.novelMetadata?.totalChapters ?? this.#ledger.enqueuedCount
|
|
1805
1850
|
});
|
|
1806
1851
|
} catch (cause) {
|
|
1807
1852
|
if (cause instanceof CancelledError) return;
|
|
1808
1853
|
const wrapped = cause instanceof ChapterFetchError ? cause : new ChapterFetchError(ref, { cause });
|
|
1809
|
-
this.#
|
|
1810
|
-
this.#
|
|
1811
|
-
index: ref.index,
|
|
1812
|
-
lastError: { code: wrapped.code, message: wrapped.message }
|
|
1813
|
-
});
|
|
1814
|
-
if (this.#state) {
|
|
1815
|
-
this.#state = { ...this.#state, failed: [...this.#failedMap.values()] };
|
|
1816
|
-
this.#stateDebouncer.schedule();
|
|
1817
|
-
}
|
|
1854
|
+
this.#ledger.recordFailure(ref, wrapped);
|
|
1855
|
+
this.#schedulePersist();
|
|
1818
1856
|
this.#bus.emit({ type: "chapter:failed", ref, error: wrapped });
|
|
1819
1857
|
}
|
|
1820
1858
|
}
|
|
@@ -1866,7 +1904,7 @@ var DownloadRun = class {
|
|
|
1866
1904
|
const tocByIndex = new Map(
|
|
1867
1905
|
(this.#state.toc ?? []).map((r) => [r.index, r])
|
|
1868
1906
|
);
|
|
1869
|
-
for (const f of this.#
|
|
1907
|
+
for (const f of this.#ledger.failedRecords()) {
|
|
1870
1908
|
const ref = tocByIndex.get(f.index);
|
|
1871
1909
|
if (ref) {
|
|
1872
1910
|
await this.#enqueueRef(ref, ctxWithMeta);
|
|
@@ -1874,11 +1912,8 @@ var DownloadRun = class {
|
|
|
1874
1912
|
this.#logger.warn(
|
|
1875
1913
|
`Resume: dropping orphan failed record for index ${String(f.index)} (no matching TOC ref)`
|
|
1876
1914
|
);
|
|
1877
|
-
this.#
|
|
1878
|
-
|
|
1879
|
-
this.#state = { ...this.#state, failed: [...this.#failedMap.values()] };
|
|
1880
|
-
this.#stateDebouncer.schedule();
|
|
1881
|
-
}
|
|
1915
|
+
this.#ledger.dropOrphanFailure(f.index);
|
|
1916
|
+
this.#schedulePersist();
|
|
1882
1917
|
}
|
|
1883
1918
|
}
|
|
1884
1919
|
}
|
|
@@ -1910,19 +1945,20 @@ var DownloadRun = class {
|
|
|
1910
1945
|
if (stage2Error) return { status: "error", error: stage2Error };
|
|
1911
1946
|
let data;
|
|
1912
1947
|
try {
|
|
1913
|
-
data = assembleNovelData(metadata, this.#successes);
|
|
1948
|
+
data = assembleNovelData(metadata, this.#ledger.successes());
|
|
1914
1949
|
} catch (err) {
|
|
1915
1950
|
return {
|
|
1916
1951
|
status: "error",
|
|
1917
1952
|
error: normalizeToEnvelopeError(err)
|
|
1918
1953
|
};
|
|
1919
1954
|
}
|
|
1920
|
-
|
|
1955
|
+
const failures = this.#ledger.failures();
|
|
1956
|
+
if (failures.length > 0) {
|
|
1921
1957
|
if (this.#resumeStorage) {
|
|
1922
1958
|
const token = encodeResumeToken(this.#resumeStorage.stateFile, this.#validated);
|
|
1923
|
-
return { status: "partial", resumable: true, data, failures
|
|
1959
|
+
return { status: "partial", resumable: true, data, failures, resumeToken: token };
|
|
1924
1960
|
}
|
|
1925
|
-
return { status: "partial", resumable: false, data, failures
|
|
1961
|
+
return { status: "partial", resumable: false, data, failures };
|
|
1926
1962
|
}
|
|
1927
1963
|
return { status: "success", data };
|
|
1928
1964
|
}
|
|
@@ -1937,32 +1973,12 @@ function normalizeChapterRange(range) {
|
|
|
1937
1973
|
|
|
1938
1974
|
// src/storage/resume-storage.ts
|
|
1939
1975
|
var import_node_path6 = require("path");
|
|
1976
|
+
init_errors();
|
|
1940
1977
|
|
|
1941
1978
|
// src/storage/resume-store.ts
|
|
1942
1979
|
var import_promises3 = require("fs/promises");
|
|
1943
1980
|
var import_node_path4 = require("path");
|
|
1944
1981
|
init_errors();
|
|
1945
|
-
|
|
1946
|
-
// src/storage/resume-state.ts
|
|
1947
|
-
var import_zod5 = require("zod");
|
|
1948
|
-
init_primitives();
|
|
1949
|
-
var ResumeFailureRecordSchema = import_zod5.z.object({
|
|
1950
|
-
index: ChapterIndexSchema,
|
|
1951
|
-
lastError: import_zod5.z.object({ code: import_zod5.z.string(), message: import_zod5.z.string() }).readonly()
|
|
1952
|
-
}).readonly();
|
|
1953
|
-
var ResumeStateSchema = import_zod5.z.object({
|
|
1954
|
-
schemaVersion: import_zod5.z.literal(1),
|
|
1955
|
-
url: UrlSchema,
|
|
1956
|
-
adapterId: AdapterIdSchema,
|
|
1957
|
-
metadata: NovelMetadataSchema.nullable(),
|
|
1958
|
-
toc: import_zod5.z.array(ChapterRefSchema).readonly().nullable(),
|
|
1959
|
-
completed: import_zod5.z.array(ChapterIndexSchema).readonly(),
|
|
1960
|
-
failed: import_zod5.z.array(ResumeFailureRecordSchema).readonly(),
|
|
1961
|
-
startedAt: IsoDateSchema,
|
|
1962
|
-
updatedAt: IsoDateSchema
|
|
1963
|
-
}).readonly();
|
|
1964
|
-
|
|
1965
|
-
// src/storage/resume-store.ts
|
|
1966
1982
|
var MemoryResumeStore = class {
|
|
1967
1983
|
#map = /* @__PURE__ */ new Map();
|
|
1968
1984
|
async load(path) {
|
|
@@ -2105,6 +2121,32 @@ var ResumeStorage = class _ResumeStorage {
|
|
|
2105
2121
|
stateFile
|
|
2106
2122
|
);
|
|
2107
2123
|
}
|
|
2124
|
+
/**
|
|
2125
|
+
* Loads state for a run against (url, adapterId) and applies the reconcile
|
|
2126
|
+
* invariant: a chapter file on disk proves the fetch completed, so its index
|
|
2127
|
+
* wins over a stale `state.completed` (they diverge when run 1's in-flight
|
|
2128
|
+
* tasks write chapters after abort but before the debounced state save).
|
|
2129
|
+
*/
|
|
2130
|
+
async loadFor(url, adapterId) {
|
|
2131
|
+
let state = await this.resume.load(this.stateFile);
|
|
2132
|
+
if (state) {
|
|
2133
|
+
if (state.url !== url) {
|
|
2134
|
+
throw new ParseError("Stale resume state: URL mismatch", { url, path: this.stateFile });
|
|
2135
|
+
}
|
|
2136
|
+
if (state.adapterId !== adapterId) {
|
|
2137
|
+
throw new ParseError("Stale resume state: adapter mismatch", { url, path: this.stateFile });
|
|
2138
|
+
}
|
|
2139
|
+
} else {
|
|
2140
|
+
state = freshState(url, adapterId);
|
|
2141
|
+
}
|
|
2142
|
+
const priorChapters = await this.chapters.loadAll();
|
|
2143
|
+
if (priorChapters.length > 0) {
|
|
2144
|
+
const merged = new Set(state.completed);
|
|
2145
|
+
for (const c of priorChapters) merged.add(c.index);
|
|
2146
|
+
state = { ...state, completed: [...merged].sort((a, b) => Number(a) - Number(b)) };
|
|
2147
|
+
}
|
|
2148
|
+
return { state, priorChapters };
|
|
2149
|
+
}
|
|
2108
2150
|
};
|
|
2109
2151
|
function resolveResumeStorage(opt, url, logger) {
|
|
2110
2152
|
if (opt === void 0) return null;
|
|
@@ -2437,7 +2479,6 @@ var BaseAdapter = class {
|
|
|
2437
2479
|
|
|
2438
2480
|
// src/adapters/truyenfull/index.ts
|
|
2439
2481
|
init_primitives();
|
|
2440
|
-
init_errors();
|
|
2441
2482
|
|
|
2442
2483
|
// src/adapters/truyenfull/parser.ts
|
|
2443
2484
|
init_primitives();
|
|
@@ -2506,6 +2547,20 @@ function parseNovelHeader($, selectors, sourceUrl) {
|
|
|
2506
2547
|
...coverUrl !== void 0 ? { coverUrl } : {}
|
|
2507
2548
|
};
|
|
2508
2549
|
}
|
|
2550
|
+
function buildNovelMetadata(fields, envelope) {
|
|
2551
|
+
return {
|
|
2552
|
+
title: fields.title,
|
|
2553
|
+
author: fields.author,
|
|
2554
|
+
description: fields.description,
|
|
2555
|
+
genres: fields.genres,
|
|
2556
|
+
sourceUrl: envelope.sourceUrl,
|
|
2557
|
+
sourceSite: envelope.sourceSite,
|
|
2558
|
+
status: envelope.status,
|
|
2559
|
+
fetchedAt: /* @__PURE__ */ new Date(),
|
|
2560
|
+
...fields.coverUrl !== void 0 ? { coverUrl: fields.coverUrl } : {},
|
|
2561
|
+
...envelope.totalChapters !== void 0 ? { totalChapters: envelope.totalChapters } : {}
|
|
2562
|
+
};
|
|
2563
|
+
}
|
|
2509
2564
|
|
|
2510
2565
|
// src/adapters/shared/content.ts
|
|
2511
2566
|
init_errors();
|
|
@@ -2551,13 +2606,11 @@ function parseNovelPage($, sourceUrl) {
|
|
|
2551
2606
|
let status = "unknown";
|
|
2552
2607
|
if ($(TRUYENFULL_SELECTORS.metadata.statusFull).length > 0) status = "completed";
|
|
2553
2608
|
else if ($(TRUYENFULL_SELECTORS.metadata.statusOngoing).length > 0) status = "ongoing";
|
|
2554
|
-
return {
|
|
2555
|
-
...header,
|
|
2609
|
+
return buildNovelMetadata(header, {
|
|
2556
2610
|
sourceUrl,
|
|
2557
2611
|
sourceSite: unsafeBrandAdapterId("truyenfull"),
|
|
2558
|
-
status
|
|
2559
|
-
|
|
2560
|
-
};
|
|
2612
|
+
status
|
|
2613
|
+
});
|
|
2561
2614
|
}
|
|
2562
2615
|
function parseTocPage($, startIndex, baseUrl) {
|
|
2563
2616
|
const refs = refsFromAnchors($, { selector: TRUYENFULL_SELECTORS.toc.listItem, startIndex, baseUrl });
|
|
@@ -2603,6 +2656,21 @@ function parseChapterPage($, ref) {
|
|
|
2603
2656
|
};
|
|
2604
2657
|
}
|
|
2605
2658
|
|
|
2659
|
+
// src/adapters/shared/paginate.ts
|
|
2660
|
+
init_primitives();
|
|
2661
|
+
init_errors();
|
|
2662
|
+
async function* paginateToc(opts) {
|
|
2663
|
+
let index = unsafeBrandChapterIndex(0);
|
|
2664
|
+
for (let page = 1; ; page++) {
|
|
2665
|
+
if (opts.ctx.signal.aborted) throw new CancelledError({ cause: opts.ctx.signal.reason });
|
|
2666
|
+
const result = await opts.fetchPage(page, index);
|
|
2667
|
+
if (result === null || result.refs.length === 0) return;
|
|
2668
|
+
for (const ref of result.refs) yield ref;
|
|
2669
|
+
index = unsafeBrandChapterIndex(index + result.refs.length);
|
|
2670
|
+
if (result.isLast) return;
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
|
|
2606
2674
|
// src/adapters/truyenfull/index.ts
|
|
2607
2675
|
var TruyenFullAdapter = class extends BaseAdapter {
|
|
2608
2676
|
id = unsafeBrandAdapterId("truyenfull");
|
|
@@ -2641,22 +2709,20 @@ var TruyenFullAdapter = class extends BaseAdapter {
|
|
|
2641
2709
|
}
|
|
2642
2710
|
async *fetchChapterList(url, ctx) {
|
|
2643
2711
|
const base = stripTrailingSlash(this.#normalize(url));
|
|
2644
|
-
let page = 1;
|
|
2645
|
-
let index = unsafeBrandChapterIndex(0);
|
|
2646
2712
|
let knownTotalPages;
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
}
|
|
2713
|
+
yield* paginateToc({
|
|
2714
|
+
ctx,
|
|
2715
|
+
fetchPage: async (page, startIndex) => {
|
|
2716
|
+
const pageUrl = page === 1 ? unsafeBrandUrl(`${base}/`) : unsafeBrandUrl(`${base}/trang-${page}/`);
|
|
2717
|
+
const $ = await this.loadHtml(pageUrl, ctx);
|
|
2718
|
+
const { refs, isLastPage, totalPages } = parseTocPage($, startIndex, pageUrl);
|
|
2719
|
+
if (page === 1 && totalPages !== void 0) knownTotalPages = totalPages;
|
|
2720
|
+
return {
|
|
2721
|
+
refs,
|
|
2722
|
+
isLast: isLastPage || knownTotalPages !== void 0 && page >= knownTotalPages
|
|
2723
|
+
};
|
|
2724
|
+
}
|
|
2725
|
+
});
|
|
2660
2726
|
}
|
|
2661
2727
|
async fetchChapter(ref, ctx) {
|
|
2662
2728
|
const target = this.#normalize(ref.url);
|
|
@@ -2726,15 +2792,12 @@ function parseNovelPage2($, sourceUrl) {
|
|
|
2726
2792
|
let status = "unknown";
|
|
2727
2793
|
if ($(METRUYENCHU_SELECTORS.metadata.statusFull).length > 0) status = "completed";
|
|
2728
2794
|
else if (METRUYENCHU_SELECTORS.metadata.statusOngoing.some((sel) => $(sel).length > 0)) status = "ongoing";
|
|
2729
|
-
|
|
2730
|
-
return {
|
|
2731
|
-
...header,
|
|
2795
|
+
return buildNovelMetadata(header, {
|
|
2732
2796
|
sourceUrl,
|
|
2733
2797
|
sourceSite: SOURCE_SITE,
|
|
2734
2798
|
status,
|
|
2735
|
-
|
|
2736
|
-
|
|
2737
|
-
};
|
|
2799
|
+
totalChapters: extractTotalChapters($)
|
|
2800
|
+
});
|
|
2738
2801
|
}
|
|
2739
2802
|
var TOTAL_CHAPTERS_LABEL_REGEX = /^\s*Số\s*chương\s*:?\s*$/i;
|
|
2740
2803
|
var TOTAL_CHAPTERS_VALUE_REGEX = /(\d[\d.,]*)/;
|
|
@@ -2831,34 +2894,34 @@ var MeTruyenChuComVnAdapter = class extends BaseAdapter {
|
|
|
2831
2894
|
);
|
|
2832
2895
|
}
|
|
2833
2896
|
const apiOrigin = new URL(url).origin;
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2897
|
+
yield* paginateToc({
|
|
2898
|
+
ctx,
|
|
2899
|
+
fetchPage: async (page, startIndex) => {
|
|
2900
|
+
if (page === 1) {
|
|
2901
|
+
return { refs: parseTocPage1($, startIndex, url), isLast: lastPage <= 1 };
|
|
2902
|
+
}
|
|
2903
|
+
const apiUrl = unsafeBrandUrl(`${apiOrigin}/get/listchap/${bid}?page=${page}`);
|
|
2904
|
+
const raw = await this.fetchJson(apiUrl, ctx);
|
|
2905
|
+
const parsed = tocJsonEnvelopeSchema.safeParse(raw);
|
|
2906
|
+
if (!parsed.success) {
|
|
2907
|
+
throw new ParseError(`Invalid TOC JSON envelope at ${apiUrl}`, {
|
|
2908
|
+
cause: parsed.error,
|
|
2909
|
+
url: apiUrl
|
|
2910
|
+
});
|
|
2911
|
+
}
|
|
2912
|
+
const fragment = parsed.data.data;
|
|
2913
|
+
if (!fragment) {
|
|
2914
|
+
ctx.logger.warn({ apiUrl, page }, "Empty TOC JSON data; ending walk early");
|
|
2915
|
+
return null;
|
|
2916
|
+
}
|
|
2917
|
+
const refs = parseTocJsonPage(fragment, startIndex, url);
|
|
2918
|
+
if (refs.length === 0) {
|
|
2919
|
+
ctx.logger.warn({ apiUrl, page }, "Zero anchors in TOC JSON page; ending walk early");
|
|
2920
|
+
return null;
|
|
2921
|
+
}
|
|
2922
|
+
return { refs, isLast: page >= lastPage };
|
|
2858
2923
|
}
|
|
2859
|
-
|
|
2860
|
-
index = unsafeBrandChapterIndex(index + refs.length);
|
|
2861
|
-
}
|
|
2924
|
+
});
|
|
2862
2925
|
}
|
|
2863
2926
|
async fetchChapter(ref, ctx) {
|
|
2864
2927
|
const $ = await this.loadHtml(ref.url, ctx);
|
|
@@ -2915,19 +2978,21 @@ function parseNovelPage3($, sourceUrl) {
|
|
|
2915
2978
|
const genres = parseGenres(descBlock);
|
|
2916
2979
|
const statusText = $(WIKICV_SELECTORS.metadata.statusAnchor).first().text().trim();
|
|
2917
2980
|
const status = /^Hoàn\s*thành/i.test(statusText) ? "completed" : /^Đang\s*ra/i.test(statusText) ? "ongoing" : "unknown";
|
|
2918
|
-
|
|
2919
|
-
|
|
2920
|
-
|
|
2921
|
-
|
|
2922
|
-
|
|
2923
|
-
|
|
2924
|
-
|
|
2925
|
-
|
|
2926
|
-
|
|
2927
|
-
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
|
-
|
|
2981
|
+
return buildNovelMetadata(
|
|
2982
|
+
{
|
|
2983
|
+
title,
|
|
2984
|
+
author,
|
|
2985
|
+
description,
|
|
2986
|
+
genres,
|
|
2987
|
+
...coverUrl !== void 0 ? { coverUrl } : {}
|
|
2988
|
+
},
|
|
2989
|
+
{
|
|
2990
|
+
sourceUrl,
|
|
2991
|
+
sourceSite: SOURCE_SITE2,
|
|
2992
|
+
status,
|
|
2993
|
+
totalChapters: extractTotalChapters2($)
|
|
2994
|
+
}
|
|
2995
|
+
);
|
|
2931
2996
|
}
|
|
2932
2997
|
var CHUONG_SLUG_REGEX = /\/chuong-(\d+)-/i;
|
|
2933
2998
|
var CHUONG_TEXT_REGEX = /Chương\s*(\d+)/i;
|