@duyquangnvx/webnovel-downloader 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +276 -230
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +273 -226
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -918,31 +918,6 @@ function clamp(n, lo, hi) {
|
|
|
918
918
|
// src/http/index.ts
|
|
919
919
|
init_cookie_jar();
|
|
920
920
|
|
|
921
|
-
// src/http/stack-builder.ts
|
|
922
|
-
var HttpStack = class _HttpStack {
|
|
923
|
-
#client;
|
|
924
|
-
constructor(client) {
|
|
925
|
-
this.#client = client;
|
|
926
|
-
}
|
|
927
|
-
static from(leaf) {
|
|
928
|
-
return new _HttpStack(leaf);
|
|
929
|
-
}
|
|
930
|
-
rotateUserAgent(uas, defaultHeaders, cookieJar) {
|
|
931
|
-
return new _HttpStack(
|
|
932
|
-
new UserAgentRotatingHttpClient(this.#client, uas, defaultHeaders, cookieJar)
|
|
933
|
-
);
|
|
934
|
-
}
|
|
935
|
-
rateLimit(limiter, onEvent) {
|
|
936
|
-
return new _HttpStack(new RateLimitedHttpClient(this.#client, limiter, onEvent));
|
|
937
|
-
}
|
|
938
|
-
retry(opts, logger, limiter, onEvent) {
|
|
939
|
-
return new _HttpStack(new RetryingHttpClient(this.#client, opts, logger, limiter, onEvent));
|
|
940
|
-
}
|
|
941
|
-
build() {
|
|
942
|
-
return this.#client;
|
|
943
|
-
}
|
|
944
|
-
};
|
|
945
|
-
|
|
946
921
|
// src/http/tiered.ts
|
|
947
922
|
init_cf_challenge();
|
|
948
923
|
var FALLBACK_UA = DEFAULT_UA_POOL[0] ?? "Mozilla/5.0";
|
|
@@ -1008,7 +983,20 @@ function buildHttpClient(opts = {}) {
|
|
|
1008
983
|
...opts.dispatcher !== void 0 ? { dispatcher: opts.dispatcher } : {}
|
|
1009
984
|
});
|
|
1010
985
|
const limiter = opts.rateLimiter ?? new RateLimiter(opts.rateLimit ?? DEFAULT_RATE_LIMIT);
|
|
1011
|
-
const
|
|
986
|
+
const withUserAgent = new UserAgentRotatingHttpClient(
|
|
987
|
+
leaf,
|
|
988
|
+
opts.userAgents ?? new UserAgents(),
|
|
989
|
+
opts.defaultHeaders,
|
|
990
|
+
cookieJar
|
|
991
|
+
);
|
|
992
|
+
const withRateLimit = new RateLimitedHttpClient(withUserAgent, limiter, opts.onEvent);
|
|
993
|
+
const httpBranch = new RetryingHttpClient(
|
|
994
|
+
withRateLimit,
|
|
995
|
+
normalizeRetry(opts.retry),
|
|
996
|
+
logger,
|
|
997
|
+
limiter,
|
|
998
|
+
opts.onEvent
|
|
999
|
+
);
|
|
1012
1000
|
const { mode } = normalizeTransport(opts.transport);
|
|
1013
1001
|
if (mode === "http-only") {
|
|
1014
1002
|
return httpBranch;
|
|
@@ -1520,8 +1508,78 @@ var Debouncer = class {
|
|
|
1520
1508
|
}
|
|
1521
1509
|
};
|
|
1522
1510
|
|
|
1523
|
-
// src/core/
|
|
1524
|
-
|
|
1511
|
+
// src/core/run-ledger.ts
|
|
1512
|
+
var RunLedger = class _RunLedger {
|
|
1513
|
+
#successes = [];
|
|
1514
|
+
#failures = [];
|
|
1515
|
+
#completedSet = /* @__PURE__ */ new Set();
|
|
1516
|
+
#completedList = [];
|
|
1517
|
+
#failedMap = /* @__PURE__ */ new Map();
|
|
1518
|
+
#enqueuedSet = /* @__PURE__ */ new Set();
|
|
1519
|
+
constructor() {
|
|
1520
|
+
}
|
|
1521
|
+
static seed(state, priorChapters) {
|
|
1522
|
+
const ledger = new _RunLedger();
|
|
1523
|
+
ledger.#successes.push(...priorChapters);
|
|
1524
|
+
for (const idx of state?.completed ?? []) ledger.#markCompleted(idx);
|
|
1525
|
+
for (const chapter of priorChapters) ledger.#markCompleted(chapter.index);
|
|
1526
|
+
for (const f of state?.failed ?? []) {
|
|
1527
|
+
if (!ledger.#completedSet.has(f.index)) ledger.#failedMap.set(f.index, f);
|
|
1528
|
+
}
|
|
1529
|
+
return ledger;
|
|
1530
|
+
}
|
|
1531
|
+
#markCompleted(index) {
|
|
1532
|
+
if (this.#completedSet.has(index)) return;
|
|
1533
|
+
this.#completedSet.add(index);
|
|
1534
|
+
this.#completedList.push(index);
|
|
1535
|
+
}
|
|
1536
|
+
/** Check-and-set enqueue dedup: false if the index is completed or already enqueued. */
|
|
1537
|
+
markEnqueued(index) {
|
|
1538
|
+
if (this.#completedSet.has(index) || this.#enqueuedSet.has(index)) return false;
|
|
1539
|
+
this.#enqueuedSet.add(index);
|
|
1540
|
+
return true;
|
|
1541
|
+
}
|
|
1542
|
+
recordSuccess(chapter) {
|
|
1543
|
+
this.#successes.push(chapter);
|
|
1544
|
+
this.#markCompleted(chapter.index);
|
|
1545
|
+
this.#failedMap.delete(chapter.index);
|
|
1546
|
+
}
|
|
1547
|
+
recordFailure(ref, error) {
|
|
1548
|
+
this.#failures.push({ ref, error, attempts: 1 });
|
|
1549
|
+
this.#failedMap.set(ref.index, {
|
|
1550
|
+
index: ref.index,
|
|
1551
|
+
lastError: { code: error.code, message: error.message }
|
|
1552
|
+
});
|
|
1553
|
+
}
|
|
1554
|
+
dropOrphanFailure(index) {
|
|
1555
|
+
this.#failedMap.delete(index);
|
|
1556
|
+
}
|
|
1557
|
+
/** The ONLY projection to the persisted shape; nothing else writes state.completed/failed. */
|
|
1558
|
+
applyTo(state) {
|
|
1559
|
+
return {
|
|
1560
|
+
...state,
|
|
1561
|
+
completed: [...this.#completedList].sort((a, b) => Number(a) - Number(b)),
|
|
1562
|
+
failed: [...this.#failedMap.values()]
|
|
1563
|
+
};
|
|
1564
|
+
}
|
|
1565
|
+
successes() {
|
|
1566
|
+
return this.#successes;
|
|
1567
|
+
}
|
|
1568
|
+
failures() {
|
|
1569
|
+
return this.#failures;
|
|
1570
|
+
}
|
|
1571
|
+
/** Snapshot — safe to mutate the ledger while iterating the result. */
|
|
1572
|
+
failedRecords() {
|
|
1573
|
+
return [...this.#failedMap.values()];
|
|
1574
|
+
}
|
|
1575
|
+
/** Fetched-chapter count (including prior-run chapters) — drives the progress event. */
|
|
1576
|
+
get completedCount() {
|
|
1577
|
+
return this.#successes.length;
|
|
1578
|
+
}
|
|
1579
|
+
get enqueuedCount() {
|
|
1580
|
+
return this.#enqueuedSet.size;
|
|
1581
|
+
}
|
|
1582
|
+
};
|
|
1525
1583
|
|
|
1526
1584
|
// src/pipeline/fetch-toc.ts
|
|
1527
1585
|
init_errors();
|
|
@@ -1593,15 +1651,51 @@ function assembleNovelData(metadata, results) {
|
|
|
1593
1651
|
// src/core/download-run.ts
|
|
1594
1652
|
init_errors();
|
|
1595
1653
|
|
|
1654
|
+
// src/storage/resume-state.ts
|
|
1655
|
+
var import_zod4 = require("zod");
|
|
1656
|
+
init_primitives();
|
|
1657
|
+
var ResumeFailureRecordSchema = import_zod4.z.object({
|
|
1658
|
+
index: ChapterIndexSchema,
|
|
1659
|
+
lastError: import_zod4.z.object({ code: import_zod4.z.string(), message: import_zod4.z.string() }).readonly()
|
|
1660
|
+
}).readonly();
|
|
1661
|
+
var ResumeStateSchema = import_zod4.z.object({
|
|
1662
|
+
schemaVersion: import_zod4.z.literal(1),
|
|
1663
|
+
url: UrlSchema,
|
|
1664
|
+
adapterId: AdapterIdSchema,
|
|
1665
|
+
metadata: NovelMetadataSchema.nullable(),
|
|
1666
|
+
toc: import_zod4.z.array(ChapterRefSchema).readonly().nullable(),
|
|
1667
|
+
completed: import_zod4.z.array(ChapterIndexSchema).readonly(),
|
|
1668
|
+
failed: import_zod4.z.array(ResumeFailureRecordSchema).readonly(),
|
|
1669
|
+
startedAt: IsoDateSchema,
|
|
1670
|
+
updatedAt: IsoDateSchema
|
|
1671
|
+
}).readonly();
|
|
1672
|
+
function nowIso() {
|
|
1673
|
+
return unsafeBrandIsoDate((/* @__PURE__ */ new Date()).toISOString());
|
|
1674
|
+
}
|
|
1675
|
+
function freshState(url, adapterId) {
|
|
1676
|
+
const now = nowIso();
|
|
1677
|
+
return {
|
|
1678
|
+
schemaVersion: 1,
|
|
1679
|
+
url,
|
|
1680
|
+
adapterId,
|
|
1681
|
+
metadata: null,
|
|
1682
|
+
toc: null,
|
|
1683
|
+
completed: [],
|
|
1684
|
+
failed: [],
|
|
1685
|
+
startedAt: now,
|
|
1686
|
+
updatedAt: now
|
|
1687
|
+
};
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1596
1690
|
// src/storage/resume-token.ts
|
|
1597
1691
|
var import_node_path3 = require("path");
|
|
1598
|
-
var
|
|
1692
|
+
var import_zod5 = require("zod");
|
|
1599
1693
|
init_errors();
|
|
1600
1694
|
init_primitives();
|
|
1601
|
-
var TokenPayloadSchema =
|
|
1602
|
-
v:
|
|
1603
|
-
stateFile:
|
|
1604
|
-
urlHash:
|
|
1695
|
+
var TokenPayloadSchema = import_zod5.z.object({
|
|
1696
|
+
v: import_zod5.z.literal(1),
|
|
1697
|
+
stateFile: import_zod5.z.string().min(1),
|
|
1698
|
+
urlHash: import_zod5.z.string().min(1)
|
|
1605
1699
|
});
|
|
1606
1700
|
function encodeResumeToken(stateFile, url) {
|
|
1607
1701
|
const payload = { v: 1, stateFile, urlHash: sha1(url) };
|
|
@@ -1640,23 +1734,6 @@ function verifyResumeToken(token, url) {
|
|
|
1640
1734
|
// src/core/download-run.ts
|
|
1641
1735
|
var QUEUE_BACKPRESSURE_RATIO = 4;
|
|
1642
1736
|
var STATE_PERSIST_DEBOUNCE_MS = 500;
|
|
1643
|
-
function freshState(url, adapterId) {
|
|
1644
|
-
const now = nowIso();
|
|
1645
|
-
return {
|
|
1646
|
-
schemaVersion: 1,
|
|
1647
|
-
url,
|
|
1648
|
-
adapterId,
|
|
1649
|
-
metadata: null,
|
|
1650
|
-
toc: null,
|
|
1651
|
-
completed: [],
|
|
1652
|
-
failed: [],
|
|
1653
|
-
startedAt: now,
|
|
1654
|
-
updatedAt: now
|
|
1655
|
-
};
|
|
1656
|
-
}
|
|
1657
|
-
function nowIso() {
|
|
1658
|
-
return unsafeBrandIsoDate((/* @__PURE__ */ new Date()).toISOString());
|
|
1659
|
-
}
|
|
1660
1737
|
var DownloadRun = class {
|
|
1661
1738
|
#adapter;
|
|
1662
1739
|
#validated;
|
|
@@ -1668,13 +1745,7 @@ var DownloadRun = class {
|
|
|
1668
1745
|
#bus;
|
|
1669
1746
|
#logger;
|
|
1670
1747
|
#state = null;
|
|
1671
|
-
#
|
|
1672
|
-
#failures = [];
|
|
1673
|
-
#completedSet = /* @__PURE__ */ new Set();
|
|
1674
|
-
#completedList = [];
|
|
1675
|
-
#failedMap = /* @__PURE__ */ new Map();
|
|
1676
|
-
#enqueuedSet = /* @__PURE__ */ new Set();
|
|
1677
|
-
#aborted = false;
|
|
1748
|
+
#ledger = RunLedger.seed(null, []);
|
|
1678
1749
|
#queue;
|
|
1679
1750
|
#stateDebouncer;
|
|
1680
1751
|
constructor(args) {
|
|
@@ -1700,9 +1771,8 @@ var DownloadRun = class {
|
|
|
1700
1771
|
const resolved = await this.#resolveMetadata();
|
|
1701
1772
|
if ("error" in resolved) return resolved.error;
|
|
1702
1773
|
const meta = resolved.meta;
|
|
1703
|
-
const ctxWithMeta = this.#
|
|
1774
|
+
const ctxWithMeta = { ...this.#ctx, novelMetadata: meta };
|
|
1704
1775
|
const onAbort = () => {
|
|
1705
|
-
this.#aborted = true;
|
|
1706
1776
|
this.#queue.clear();
|
|
1707
1777
|
};
|
|
1708
1778
|
this.#signal.addEventListener("abort", onAbort, { once: true });
|
|
@@ -1717,40 +1787,23 @@ var DownloadRun = class {
|
|
|
1717
1787
|
}
|
|
1718
1788
|
async #initResume() {
|
|
1719
1789
|
const resumeCfg = this.#resumeStorage;
|
|
1720
|
-
if (resumeCfg)
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
if (state.adapterId !== this.#adapter.id) {
|
|
1728
|
-
throw new ParseError("Stale resume state: adapter mismatch");
|
|
1729
|
-
}
|
|
1730
|
-
} else {
|
|
1731
|
-
state = freshState(this.#validated, this.#adapter.id);
|
|
1732
|
-
}
|
|
1733
|
-
this.#state = state;
|
|
1734
|
-
} catch (err) {
|
|
1735
|
-
return { status: "error", error: normalizeToEnvelopeError(err) };
|
|
1736
|
-
}
|
|
1737
|
-
}
|
|
1738
|
-
if (resumeCfg && this.#state) {
|
|
1739
|
-
const prior = await resumeCfg.chapters.loadAll();
|
|
1740
|
-
this.#successes.push(...prior);
|
|
1741
|
-
const merged = new Set(this.#state.completed);
|
|
1742
|
-
for (const c of prior) merged.add(c.index);
|
|
1743
|
-
this.#state = {
|
|
1744
|
-
...this.#state,
|
|
1745
|
-
completed: [...merged].sort((a, b) => Number(a) - Number(b))
|
|
1746
|
-
};
|
|
1790
|
+
if (!resumeCfg) return null;
|
|
1791
|
+
try {
|
|
1792
|
+
const { state, priorChapters } = await resumeCfg.loadFor(this.#validated, this.#adapter.id);
|
|
1793
|
+
this.#state = state;
|
|
1794
|
+
this.#ledger = RunLedger.seed(state, priorChapters);
|
|
1795
|
+
} catch (err) {
|
|
1796
|
+
return { status: "error", error: normalizeToEnvelopeError(err) };
|
|
1747
1797
|
}
|
|
1748
1798
|
return null;
|
|
1749
1799
|
}
|
|
1800
|
+
#schedulePersist() {
|
|
1801
|
+
if (this.#state) this.#stateDebouncer.schedule();
|
|
1802
|
+
}
|
|
1750
1803
|
async #persistState() {
|
|
1751
1804
|
const resumeCfg = this.#resumeStorage;
|
|
1752
1805
|
if (!resumeCfg || !this.#state) return;
|
|
1753
|
-
const next = { ...this.#state, updatedAt: nowIso() };
|
|
1806
|
+
const next = { ...this.#ledger.applyTo(this.#state), updatedAt: nowIso() };
|
|
1754
1807
|
this.#state = next;
|
|
1755
1808
|
await resumeCfg.resume.save(resumeCfg.stateFile, next);
|
|
1756
1809
|
}
|
|
@@ -1771,69 +1824,35 @@ var DownloadRun = class {
|
|
|
1771
1824
|
return { error: { status: "error", error: normalizeToEnvelopeError(err) } };
|
|
1772
1825
|
}
|
|
1773
1826
|
}
|
|
1774
|
-
#startQueue(meta) {
|
|
1775
|
-
const state = this.#state;
|
|
1776
|
-
const completedSet = this.#completedSet;
|
|
1777
|
-
const completedList = this.#completedList;
|
|
1778
|
-
const failedMap = this.#failedMap;
|
|
1779
|
-
for (const idx of state?.completed ?? []) completedSet.add(idx);
|
|
1780
|
-
completedList.push(...state ? state.completed : []);
|
|
1781
|
-
if (state) {
|
|
1782
|
-
for (const f of state.failed) {
|
|
1783
|
-
if (!completedSet.has(f.index)) failedMap.set(f.index, f);
|
|
1784
|
-
}
|
|
1785
|
-
}
|
|
1786
|
-
return { ...this.#ctx, novelMetadata: meta };
|
|
1787
|
-
}
|
|
1788
1827
|
async #enqueueRef(ref, ctxWithMeta) {
|
|
1789
|
-
if (this.#
|
|
1790
|
-
if (this.#
|
|
1791
|
-
if (this.#enqueuedSet.has(ref.index)) return;
|
|
1792
|
-
this.#enqueuedSet.add(ref.index);
|
|
1828
|
+
if (this.#signal.aborted) return;
|
|
1829
|
+
if (!this.#ledger.markEnqueued(ref.index)) return;
|
|
1793
1830
|
const backpressureLimit = this.#concurrency * QUEUE_BACKPRESSURE_RATIO;
|
|
1794
1831
|
while (this.#queue.size >= backpressureLimit) {
|
|
1795
1832
|
await this.#queue.onSizeLessThan(backpressureLimit);
|
|
1796
|
-
if (this.#
|
|
1833
|
+
if (this.#signal.aborted) throw new CancelledError({ cause: this.#signal.reason });
|
|
1797
1834
|
}
|
|
1798
|
-
if (this.#aborted) throw new CancelledError({ cause: this.#signal.reason });
|
|
1799
1835
|
void this.#queue.add(() => this.#fetchChapter(ref, ctxWithMeta));
|
|
1800
1836
|
}
|
|
1801
1837
|
async #fetchChapter(ref, ctxWithMeta) {
|
|
1802
|
-
if (this.#
|
|
1838
|
+
if (this.#signal.aborted) return;
|
|
1803
1839
|
try {
|
|
1804
1840
|
const chapter = await fetchChapterTask(this.#adapter, ref, ctxWithMeta, this.#bus);
|
|
1805
|
-
if (this.#aborted) return;
|
|
1841
|
+
if (this.#signal.aborted) return;
|
|
1806
1842
|
if (this.#resumeStorage) await this.#resumeStorage.chapters.save(chapter);
|
|
1807
|
-
this.#
|
|
1808
|
-
this.#
|
|
1809
|
-
this.#completedSet.add(ref.index);
|
|
1810
|
-
this.#failedMap.delete(ref.index);
|
|
1811
|
-
if (this.#state) {
|
|
1812
|
-
this.#state = {
|
|
1813
|
-
...this.#state,
|
|
1814
|
-
completed: [...this.#completedList],
|
|
1815
|
-
failed: [...this.#failedMap.values()]
|
|
1816
|
-
};
|
|
1817
|
-
this.#stateDebouncer.schedule();
|
|
1818
|
-
}
|
|
1843
|
+
this.#ledger.recordSuccess(chapter);
|
|
1844
|
+
this.#schedulePersist();
|
|
1819
1845
|
this.#bus.emit({ type: "chapter:success", chapter });
|
|
1820
1846
|
this.#bus.emit({
|
|
1821
1847
|
type: "progress",
|
|
1822
|
-
completed: this.#
|
|
1823
|
-
total: ctxWithMeta.novelMetadata?.totalChapters ?? this.#
|
|
1848
|
+
completed: this.#ledger.completedCount,
|
|
1849
|
+
total: ctxWithMeta.novelMetadata?.totalChapters ?? this.#ledger.enqueuedCount
|
|
1824
1850
|
});
|
|
1825
1851
|
} catch (cause) {
|
|
1826
1852
|
if (cause instanceof CancelledError) return;
|
|
1827
1853
|
const wrapped = cause instanceof ChapterFetchError ? cause : new ChapterFetchError(ref, { cause });
|
|
1828
|
-
this.#
|
|
1829
|
-
this.#
|
|
1830
|
-
index: ref.index,
|
|
1831
|
-
lastError: { code: wrapped.code, message: wrapped.message }
|
|
1832
|
-
});
|
|
1833
|
-
if (this.#state) {
|
|
1834
|
-
this.#state = { ...this.#state, failed: [...this.#failedMap.values()] };
|
|
1835
|
-
this.#stateDebouncer.schedule();
|
|
1836
|
-
}
|
|
1854
|
+
this.#ledger.recordFailure(ref, wrapped);
|
|
1855
|
+
this.#schedulePersist();
|
|
1837
1856
|
this.#bus.emit({ type: "chapter:failed", ref, error: wrapped });
|
|
1838
1857
|
}
|
|
1839
1858
|
}
|
|
@@ -1885,7 +1904,7 @@ var DownloadRun = class {
|
|
|
1885
1904
|
const tocByIndex = new Map(
|
|
1886
1905
|
(this.#state.toc ?? []).map((r) => [r.index, r])
|
|
1887
1906
|
);
|
|
1888
|
-
for (const f of this.#
|
|
1907
|
+
for (const f of this.#ledger.failedRecords()) {
|
|
1889
1908
|
const ref = tocByIndex.get(f.index);
|
|
1890
1909
|
if (ref) {
|
|
1891
1910
|
await this.#enqueueRef(ref, ctxWithMeta);
|
|
@@ -1893,11 +1912,8 @@ var DownloadRun = class {
|
|
|
1893
1912
|
this.#logger.warn(
|
|
1894
1913
|
`Resume: dropping orphan failed record for index ${String(f.index)} (no matching TOC ref)`
|
|
1895
1914
|
);
|
|
1896
|
-
this.#
|
|
1897
|
-
|
|
1898
|
-
this.#state = { ...this.#state, failed: [...this.#failedMap.values()] };
|
|
1899
|
-
this.#stateDebouncer.schedule();
|
|
1900
|
-
}
|
|
1915
|
+
this.#ledger.dropOrphanFailure(f.index);
|
|
1916
|
+
this.#schedulePersist();
|
|
1901
1917
|
}
|
|
1902
1918
|
}
|
|
1903
1919
|
}
|
|
@@ -1929,19 +1945,20 @@ var DownloadRun = class {
|
|
|
1929
1945
|
if (stage2Error) return { status: "error", error: stage2Error };
|
|
1930
1946
|
let data;
|
|
1931
1947
|
try {
|
|
1932
|
-
data = assembleNovelData(metadata, this.#successes);
|
|
1948
|
+
data = assembleNovelData(metadata, this.#ledger.successes());
|
|
1933
1949
|
} catch (err) {
|
|
1934
1950
|
return {
|
|
1935
1951
|
status: "error",
|
|
1936
1952
|
error: normalizeToEnvelopeError(err)
|
|
1937
1953
|
};
|
|
1938
1954
|
}
|
|
1939
|
-
|
|
1955
|
+
const failures = this.#ledger.failures();
|
|
1956
|
+
if (failures.length > 0) {
|
|
1940
1957
|
if (this.#resumeStorage) {
|
|
1941
1958
|
const token = encodeResumeToken(this.#resumeStorage.stateFile, this.#validated);
|
|
1942
|
-
return { status: "partial", resumable: true, data, failures
|
|
1959
|
+
return { status: "partial", resumable: true, data, failures, resumeToken: token };
|
|
1943
1960
|
}
|
|
1944
|
-
return { status: "partial", resumable: false, data, failures
|
|
1961
|
+
return { status: "partial", resumable: false, data, failures };
|
|
1945
1962
|
}
|
|
1946
1963
|
return { status: "success", data };
|
|
1947
1964
|
}
|
|
@@ -1956,32 +1973,12 @@ function normalizeChapterRange(range) {
|
|
|
1956
1973
|
|
|
1957
1974
|
// src/storage/resume-storage.ts
|
|
1958
1975
|
var import_node_path6 = require("path");
|
|
1976
|
+
init_errors();
|
|
1959
1977
|
|
|
1960
1978
|
// src/storage/resume-store.ts
|
|
1961
1979
|
var import_promises3 = require("fs/promises");
|
|
1962
1980
|
var import_node_path4 = require("path");
|
|
1963
1981
|
init_errors();
|
|
1964
|
-
|
|
1965
|
-
// src/storage/resume-state.ts
|
|
1966
|
-
var import_zod5 = require("zod");
|
|
1967
|
-
init_primitives();
|
|
1968
|
-
var ResumeFailureRecordSchema = import_zod5.z.object({
|
|
1969
|
-
index: ChapterIndexSchema,
|
|
1970
|
-
lastError: import_zod5.z.object({ code: import_zod5.z.string(), message: import_zod5.z.string() }).readonly()
|
|
1971
|
-
}).readonly();
|
|
1972
|
-
var ResumeStateSchema = import_zod5.z.object({
|
|
1973
|
-
schemaVersion: import_zod5.z.literal(1),
|
|
1974
|
-
url: UrlSchema,
|
|
1975
|
-
adapterId: AdapterIdSchema,
|
|
1976
|
-
metadata: NovelMetadataSchema.nullable(),
|
|
1977
|
-
toc: import_zod5.z.array(ChapterRefSchema).readonly().nullable(),
|
|
1978
|
-
completed: import_zod5.z.array(ChapterIndexSchema).readonly(),
|
|
1979
|
-
failed: import_zod5.z.array(ResumeFailureRecordSchema).readonly(),
|
|
1980
|
-
startedAt: IsoDateSchema,
|
|
1981
|
-
updatedAt: IsoDateSchema
|
|
1982
|
-
}).readonly();
|
|
1983
|
-
|
|
1984
|
-
// src/storage/resume-store.ts
|
|
1985
1982
|
var MemoryResumeStore = class {
|
|
1986
1983
|
#map = /* @__PURE__ */ new Map();
|
|
1987
1984
|
async load(path) {
|
|
@@ -2124,6 +2121,32 @@ var ResumeStorage = class _ResumeStorage {
|
|
|
2124
2121
|
stateFile
|
|
2125
2122
|
);
|
|
2126
2123
|
}
|
|
2124
|
+
/**
|
|
2125
|
+
* Loads state for a run against (url, adapterId) and applies the reconcile
|
|
2126
|
+
* invariant: a chapter file on disk proves the fetch completed, so its index
|
|
2127
|
+
* wins over a stale `state.completed` (they diverge when run 1's in-flight
|
|
2128
|
+
* tasks write chapters after abort but before the debounced state save).
|
|
2129
|
+
*/
|
|
2130
|
+
async loadFor(url, adapterId) {
|
|
2131
|
+
let state = await this.resume.load(this.stateFile);
|
|
2132
|
+
if (state) {
|
|
2133
|
+
if (state.url !== url) {
|
|
2134
|
+
throw new ParseError("Stale resume state: URL mismatch", { url, path: this.stateFile });
|
|
2135
|
+
}
|
|
2136
|
+
if (state.adapterId !== adapterId) {
|
|
2137
|
+
throw new ParseError("Stale resume state: adapter mismatch", { url, path: this.stateFile });
|
|
2138
|
+
}
|
|
2139
|
+
} else {
|
|
2140
|
+
state = freshState(url, adapterId);
|
|
2141
|
+
}
|
|
2142
|
+
const priorChapters = await this.chapters.loadAll();
|
|
2143
|
+
if (priorChapters.length > 0) {
|
|
2144
|
+
const merged = new Set(state.completed);
|
|
2145
|
+
for (const c of priorChapters) merged.add(c.index);
|
|
2146
|
+
state = { ...state, completed: [...merged].sort((a, b) => Number(a) - Number(b)) };
|
|
2147
|
+
}
|
|
2148
|
+
return { state, priorChapters };
|
|
2149
|
+
}
|
|
2127
2150
|
};
|
|
2128
2151
|
function resolveResumeStorage(opt, url, logger) {
|
|
2129
2152
|
if (opt === void 0) return null;
|
|
@@ -2456,7 +2479,6 @@ var BaseAdapter = class {
|
|
|
2456
2479
|
|
|
2457
2480
|
// src/adapters/truyenfull/index.ts
|
|
2458
2481
|
init_primitives();
|
|
2459
|
-
init_errors();
|
|
2460
2482
|
|
|
2461
2483
|
// src/adapters/truyenfull/parser.ts
|
|
2462
2484
|
init_primitives();
|
|
@@ -2525,6 +2547,20 @@ function parseNovelHeader($, selectors, sourceUrl) {
|
|
|
2525
2547
|
...coverUrl !== void 0 ? { coverUrl } : {}
|
|
2526
2548
|
};
|
|
2527
2549
|
}
|
|
2550
|
+
function buildNovelMetadata(fields, envelope) {
|
|
2551
|
+
return {
|
|
2552
|
+
title: fields.title,
|
|
2553
|
+
author: fields.author,
|
|
2554
|
+
description: fields.description,
|
|
2555
|
+
genres: fields.genres,
|
|
2556
|
+
sourceUrl: envelope.sourceUrl,
|
|
2557
|
+
sourceSite: envelope.sourceSite,
|
|
2558
|
+
status: envelope.status,
|
|
2559
|
+
fetchedAt: /* @__PURE__ */ new Date(),
|
|
2560
|
+
...fields.coverUrl !== void 0 ? { coverUrl: fields.coverUrl } : {},
|
|
2561
|
+
...envelope.totalChapters !== void 0 ? { totalChapters: envelope.totalChapters } : {}
|
|
2562
|
+
};
|
|
2563
|
+
}
|
|
2528
2564
|
|
|
2529
2565
|
// src/adapters/shared/content.ts
|
|
2530
2566
|
init_errors();
|
|
@@ -2570,13 +2606,11 @@ function parseNovelPage($, sourceUrl) {
|
|
|
2570
2606
|
let status = "unknown";
|
|
2571
2607
|
if ($(TRUYENFULL_SELECTORS.metadata.statusFull).length > 0) status = "completed";
|
|
2572
2608
|
else if ($(TRUYENFULL_SELECTORS.metadata.statusOngoing).length > 0) status = "ongoing";
|
|
2573
|
-
return {
|
|
2574
|
-
...header,
|
|
2609
|
+
return buildNovelMetadata(header, {
|
|
2575
2610
|
sourceUrl,
|
|
2576
2611
|
sourceSite: unsafeBrandAdapterId("truyenfull"),
|
|
2577
|
-
status
|
|
2578
|
-
|
|
2579
|
-
};
|
|
2612
|
+
status
|
|
2613
|
+
});
|
|
2580
2614
|
}
|
|
2581
2615
|
function parseTocPage($, startIndex, baseUrl) {
|
|
2582
2616
|
const refs = refsFromAnchors($, { selector: TRUYENFULL_SELECTORS.toc.listItem, startIndex, baseUrl });
|
|
@@ -2622,6 +2656,21 @@ function parseChapterPage($, ref) {
|
|
|
2622
2656
|
};
|
|
2623
2657
|
}
|
|
2624
2658
|
|
|
2659
|
+
// src/adapters/shared/paginate.ts
|
|
2660
|
+
init_primitives();
|
|
2661
|
+
init_errors();
|
|
2662
|
+
async function* paginateToc(opts) {
|
|
2663
|
+
let index = unsafeBrandChapterIndex(0);
|
|
2664
|
+
for (let page = 1; ; page++) {
|
|
2665
|
+
if (opts.ctx.signal.aborted) throw new CancelledError({ cause: opts.ctx.signal.reason });
|
|
2666
|
+
const result = await opts.fetchPage(page, index);
|
|
2667
|
+
if (result === null || result.refs.length === 0) return;
|
|
2668
|
+
for (const ref of result.refs) yield ref;
|
|
2669
|
+
index = unsafeBrandChapterIndex(index + result.refs.length);
|
|
2670
|
+
if (result.isLast) return;
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
|
|
2625
2674
|
// src/adapters/truyenfull/index.ts
|
|
2626
2675
|
var TruyenFullAdapter = class extends BaseAdapter {
|
|
2627
2676
|
id = unsafeBrandAdapterId("truyenfull");
|
|
@@ -2660,22 +2709,20 @@ var TruyenFullAdapter = class extends BaseAdapter {
|
|
|
2660
2709
|
}
|
|
2661
2710
|
async *fetchChapterList(url, ctx) {
|
|
2662
2711
|
const base = stripTrailingSlash(this.#normalize(url));
|
|
2663
|
-
let page = 1;
|
|
2664
|
-
let index = unsafeBrandChapterIndex(0);
|
|
2665
2712
|
let knownTotalPages;
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
}
|
|
2713
|
+
yield* paginateToc({
|
|
2714
|
+
ctx,
|
|
2715
|
+
fetchPage: async (page, startIndex) => {
|
|
2716
|
+
const pageUrl = page === 1 ? unsafeBrandUrl(`${base}/`) : unsafeBrandUrl(`${base}/trang-${page}/`);
|
|
2717
|
+
const $ = await this.loadHtml(pageUrl, ctx);
|
|
2718
|
+
const { refs, isLastPage, totalPages } = parseTocPage($, startIndex, pageUrl);
|
|
2719
|
+
if (page === 1 && totalPages !== void 0) knownTotalPages = totalPages;
|
|
2720
|
+
return {
|
|
2721
|
+
refs,
|
|
2722
|
+
isLast: isLastPage || knownTotalPages !== void 0 && page >= knownTotalPages
|
|
2723
|
+
};
|
|
2724
|
+
}
|
|
2725
|
+
});
|
|
2679
2726
|
}
|
|
2680
2727
|
async fetchChapter(ref, ctx) {
|
|
2681
2728
|
const target = this.#normalize(ref.url);
|
|
@@ -2745,15 +2792,12 @@ function parseNovelPage2($, sourceUrl) {
|
|
|
2745
2792
|
let status = "unknown";
|
|
2746
2793
|
if ($(METRUYENCHU_SELECTORS.metadata.statusFull).length > 0) status = "completed";
|
|
2747
2794
|
else if (METRUYENCHU_SELECTORS.metadata.statusOngoing.some((sel) => $(sel).length > 0)) status = "ongoing";
|
|
2748
|
-
|
|
2749
|
-
return {
|
|
2750
|
-
...header,
|
|
2795
|
+
return buildNovelMetadata(header, {
|
|
2751
2796
|
sourceUrl,
|
|
2752
2797
|
sourceSite: SOURCE_SITE,
|
|
2753
2798
|
status,
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
};
|
|
2799
|
+
totalChapters: extractTotalChapters($)
|
|
2800
|
+
});
|
|
2757
2801
|
}
|
|
2758
2802
|
var TOTAL_CHAPTERS_LABEL_REGEX = /^\s*Số\s*chương\s*:?\s*$/i;
|
|
2759
2803
|
var TOTAL_CHAPTERS_VALUE_REGEX = /(\d[\d.,]*)/;
|
|
@@ -2850,34 +2894,34 @@ var MeTruyenChuComVnAdapter = class extends BaseAdapter {
|
|
|
2850
2894
|
);
|
|
2851
2895
|
}
|
|
2852
2896
|
const apiOrigin = new URL(url).origin;
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
2860
|
-
|
|
2861
|
-
|
|
2862
|
-
|
|
2863
|
-
|
|
2864
|
-
|
|
2865
|
-
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
|
|
2869
|
-
|
|
2870
|
-
|
|
2871
|
-
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2897
|
+
yield* paginateToc({
|
|
2898
|
+
ctx,
|
|
2899
|
+
fetchPage: async (page, startIndex) => {
|
|
2900
|
+
if (page === 1) {
|
|
2901
|
+
return { refs: parseTocPage1($, startIndex, url), isLast: lastPage <= 1 };
|
|
2902
|
+
}
|
|
2903
|
+
const apiUrl = unsafeBrandUrl(`${apiOrigin}/get/listchap/${bid}?page=${page}`);
|
|
2904
|
+
const raw = await this.fetchJson(apiUrl, ctx);
|
|
2905
|
+
const parsed = tocJsonEnvelopeSchema.safeParse(raw);
|
|
2906
|
+
if (!parsed.success) {
|
|
2907
|
+
throw new ParseError(`Invalid TOC JSON envelope at ${apiUrl}`, {
|
|
2908
|
+
cause: parsed.error,
|
|
2909
|
+
url: apiUrl
|
|
2910
|
+
});
|
|
2911
|
+
}
|
|
2912
|
+
const fragment = parsed.data.data;
|
|
2913
|
+
if (!fragment) {
|
|
2914
|
+
ctx.logger.warn({ apiUrl, page }, "Empty TOC JSON data; ending walk early");
|
|
2915
|
+
return null;
|
|
2916
|
+
}
|
|
2917
|
+
const refs = parseTocJsonPage(fragment, startIndex, url);
|
|
2918
|
+
if (refs.length === 0) {
|
|
2919
|
+
ctx.logger.warn({ apiUrl, page }, "Zero anchors in TOC JSON page; ending walk early");
|
|
2920
|
+
return null;
|
|
2921
|
+
}
|
|
2922
|
+
return { refs, isLast: page >= lastPage };
|
|
2877
2923
|
}
|
|
2878
|
-
|
|
2879
|
-
index = unsafeBrandChapterIndex(index + refs.length);
|
|
2880
|
-
}
|
|
2924
|
+
});
|
|
2881
2925
|
}
|
|
2882
2926
|
async fetchChapter(ref, ctx) {
|
|
2883
2927
|
const $ = await this.loadHtml(ref.url, ctx);
|
|
@@ -2934,19 +2978,21 @@ function parseNovelPage3($, sourceUrl) {
|
|
|
2934
2978
|
const genres = parseGenres(descBlock);
|
|
2935
2979
|
const statusText = $(WIKICV_SELECTORS.metadata.statusAnchor).first().text().trim();
|
|
2936
2980
|
const status = /^Hoàn\s*thành/i.test(statusText) ? "completed" : /^Đang\s*ra/i.test(statusText) ? "ongoing" : "unknown";
|
|
2937
|
-
|
|
2938
|
-
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
2945
|
-
|
|
2946
|
-
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
|
|
2981
|
+
return buildNovelMetadata(
|
|
2982
|
+
{
|
|
2983
|
+
title,
|
|
2984
|
+
author,
|
|
2985
|
+
description,
|
|
2986
|
+
genres,
|
|
2987
|
+
...coverUrl !== void 0 ? { coverUrl } : {}
|
|
2988
|
+
},
|
|
2989
|
+
{
|
|
2990
|
+
sourceUrl,
|
|
2991
|
+
sourceSite: SOURCE_SITE2,
|
|
2992
|
+
status,
|
|
2993
|
+
totalChapters: extractTotalChapters2($)
|
|
2994
|
+
}
|
|
2995
|
+
);
|
|
2950
2996
|
}
|
|
2951
2997
|
var CHUONG_SLUG_REGEX = /\/chuong-(\d+)-/i;
|
|
2952
2998
|
var CHUONG_TEXT_REGEX = /Chương\s*(\d+)/i;
|