novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +77 -64
- novel_downloader/cli/export.py +16 -20
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +65 -105
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +1 -0
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +14 -9
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +17 -11
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +61 -66
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
- novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
- novel_downloader/core/parsers/qidian/main_parser.py +11 -38
- novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +31 -82
- novel_downloader/locales/zh.json +32 -83
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -22
- novel_downloader/utils/chapter_storage.py +3 -2
- novel_downloader/utils/constants.py +4 -29
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +1 -1
- novel_downloader/utils/epub/constants.py +57 -16
- novel_downloader/utils/epub/documents.py +88 -194
- novel_downloader/utils/epub/models.py +0 -14
- novel_downloader/utils/epub/utils.py +63 -96
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +3 -113
- novel_downloader/utils/file_utils/sanitize.py +0 -4
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/logger.py +8 -16
- novel_downloader/utils/network.py +2 -2
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +4 -8
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,16 +3,22 @@
|
|
3
3
|
novel_downloader.core.downloaders.qidian
|
4
4
|
----------------------------------------
|
5
5
|
|
6
|
+
Downloader implementation for Qidian novels,
|
7
|
+
with handling for restricted and encrypted chapters
|
6
8
|
"""
|
7
9
|
|
8
10
|
import asyncio
|
9
|
-
from collections.abc import
|
10
|
-
from contextlib import asynccontextmanager
|
11
|
+
from collections.abc import Awaitable, Callable
|
11
12
|
from pathlib import Path
|
12
13
|
from typing import Any
|
13
14
|
|
14
15
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
16
|
from novel_downloader.core.downloaders.registry import register_downloader
|
17
|
+
from novel_downloader.core.downloaders.signals import (
|
18
|
+
STOP,
|
19
|
+
Progress,
|
20
|
+
StopToken,
|
21
|
+
)
|
16
22
|
from novel_downloader.core.interfaces import (
|
17
23
|
FetcherProtocol,
|
18
24
|
ParserProtocol,
|
@@ -24,7 +30,7 @@ from novel_downloader.models import (
|
|
24
30
|
)
|
25
31
|
from novel_downloader.utils import (
|
26
32
|
ChapterStorage,
|
27
|
-
|
33
|
+
async_jitter_sleep,
|
28
34
|
)
|
29
35
|
|
30
36
|
|
@@ -51,13 +57,14 @@ class QidianDownloader(BaseDownloader):
|
|
51
57
|
config: DownloaderConfig,
|
52
58
|
):
|
53
59
|
config.request_interval = max(1.0, config.request_interval)
|
54
|
-
super().__init__(fetcher, parser, config, "qidian"
|
60
|
+
super().__init__(fetcher, parser, config, "qidian")
|
55
61
|
|
56
62
|
async def _download_one(
|
57
63
|
self,
|
58
64
|
book: BookConfig,
|
59
65
|
*,
|
60
66
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
67
|
+
cancel_event: asyncio.Event | None = None,
|
61
68
|
**kwargs: Any,
|
62
69
|
) -> None:
|
63
70
|
"""
|
@@ -66,6 +73,8 @@ class QidianDownloader(BaseDownloader):
|
|
66
73
|
:param book: BookConfig with at least 'book_id'.
|
67
74
|
"""
|
68
75
|
TAG = "[Downloader]"
|
76
|
+
NUM_WORKERS = 1
|
77
|
+
|
69
78
|
book_id = book["book_id"]
|
70
79
|
start_id = book.get("start_id")
|
71
80
|
end_id = book.get("end_id")
|
@@ -74,117 +83,187 @@ class QidianDownloader(BaseDownloader):
|
|
74
83
|
raw_base = self._raw_data_dir / book_id
|
75
84
|
raw_base.mkdir(parents=True, exist_ok=True)
|
76
85
|
html_dir = self._debug_dir / book_id / "html"
|
86
|
+
|
77
87
|
chapter_storage = ChapterStorage(
|
78
88
|
raw_base=raw_base,
|
79
|
-
priorities=self.
|
89
|
+
priorities=self.PRIORITIES_MAP,
|
80
90
|
)
|
81
91
|
chapter_storage.connect()
|
82
92
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
self.
|
89
|
-
|
90
|
-
|
91
|
-
# concurrency primitives
|
92
|
-
sem = asyncio.Semaphore(self.workers)
|
93
|
-
cid_q: asyncio.Queue[str | None] = asyncio.Queue()
|
94
|
-
save_q: asyncio.Queue[ChapterDict | None] = asyncio.Queue()
|
95
|
-
default_batch: list[ChapterDict] = []
|
96
|
-
encrypted_batch: list[ChapterDict] = []
|
97
|
-
completed = 0
|
98
|
-
|
99
|
-
def _select(batch_item: ChapterDict) -> tuple[list[ChapterDict], int]:
|
100
|
-
if batch_item.get("extra", {}).get("encrypted", False):
|
101
|
-
return encrypted_batch, self.ENCRYPTED_SOURCE_ID
|
102
|
-
return default_batch, self.DEFAULT_SOURCE_ID
|
103
|
-
|
104
|
-
async def _flush(batch: list[ChapterDict], src: int) -> None:
|
105
|
-
nonlocal completed
|
106
|
-
if not batch:
|
93
|
+
def cancelled() -> bool:
|
94
|
+
return bool(cancel_event and cancel_event.is_set())
|
95
|
+
|
96
|
+
try:
|
97
|
+
# ---- metadata ---
|
98
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
99
|
+
if not book_info:
|
107
100
|
return
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
if self.skip_existing and chapter_storage.exists(
|
143
|
-
cid, self.DEFAULT_SOURCE_ID
|
144
|
-
):
|
145
|
-
completed += 1
|
146
|
-
if progress_hook:
|
147
|
-
await progress_hook(completed, total_chapters)
|
101
|
+
|
102
|
+
vols = book_info["volumes"]
|
103
|
+
total_chapters = sum(len(v["chapters"]) for v in vols)
|
104
|
+
if total_chapters == 0:
|
105
|
+
self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
|
106
|
+
return
|
107
|
+
|
108
|
+
progress = Progress(total_chapters, progress_hook)
|
109
|
+
|
110
|
+
# ---- queues & batching ---
|
111
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
|
112
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
|
113
|
+
default_batch: list[ChapterDict] = []
|
114
|
+
encrypted_batch: list[ChapterDict] = []
|
115
|
+
|
116
|
+
def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
|
117
|
+
# set extra.encrypted (by parser); default to plain if absent.
|
118
|
+
if chap.get("extra", {}).get("encrypted", False):
|
119
|
+
return encrypted_batch, self.ENCRYPTED_SOURCE_ID
|
120
|
+
return default_batch, self.DEFAULT_SOURCE_ID
|
121
|
+
|
122
|
+
async def flush_batch(batch: list[ChapterDict], src: int) -> None:
|
123
|
+
if not batch:
|
124
|
+
return
|
125
|
+
try:
|
126
|
+
chapter_storage.upsert_chapters(batch, src)
|
127
|
+
except Exception as e:
|
128
|
+
self.logger.error(
|
129
|
+
"[Storage] batch upsert failed (size=%d, src=%d): %s",
|
130
|
+
len(batch),
|
131
|
+
src,
|
132
|
+
e,
|
133
|
+
exc_info=True,
|
134
|
+
)
|
148
135
|
else:
|
149
|
-
await
|
136
|
+
await progress.bump(len(batch))
|
137
|
+
finally:
|
138
|
+
batch.clear()
|
139
|
+
|
140
|
+
async def flush_all() -> None:
|
141
|
+
await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
|
142
|
+
await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
|
143
|
+
|
144
|
+
# ---- workers ---
|
145
|
+
sem = asyncio.Semaphore(self.workers)
|
146
|
+
|
147
|
+
async def storage_worker() -> None:
|
148
|
+
"""
|
149
|
+
Consumes parsed chapters, batches by source, flushes on threshold.
|
150
|
+
|
151
|
+
Terminates after receiving STOP from each chapter worker.
|
152
|
+
|
153
|
+
On cancel: drains queue, flushes once, then waits for remaining STOPs.
|
154
|
+
"""
|
155
|
+
stop_count = 0
|
156
|
+
while True:
|
157
|
+
chap = await save_q.get()
|
158
|
+
if isinstance(chap, StopToken):
|
159
|
+
stop_count += 1
|
160
|
+
if stop_count == NUM_WORKERS:
|
161
|
+
await flush_all()
|
162
|
+
return
|
163
|
+
continue
|
164
|
+
|
165
|
+
batch, src = select_batch(chap)
|
166
|
+
batch.append(chap)
|
167
|
+
if len(batch) >= self.storage_batch_size:
|
168
|
+
await flush_batch(batch, src)
|
169
|
+
|
170
|
+
if cancelled():
|
171
|
+
# Drain whatever is already parsed
|
172
|
+
try:
|
173
|
+
while True:
|
174
|
+
nxt = save_q.get_nowait()
|
175
|
+
if isinstance(nxt, StopToken):
|
176
|
+
stop_count += 1
|
177
|
+
else:
|
178
|
+
nbatch, nsrc = select_batch(nxt)
|
179
|
+
nbatch.append(nxt)
|
180
|
+
except asyncio.QueueEmpty:
|
181
|
+
pass
|
182
|
+
await flush_all()
|
183
|
+
# Wait for remaining STOPs to arrive
|
184
|
+
while stop_count < NUM_WORKERS:
|
185
|
+
nxt = await save_q.get()
|
186
|
+
if nxt is STOP:
|
187
|
+
stop_count += 1
|
188
|
+
return
|
189
|
+
|
190
|
+
async def chapter_worker() -> None:
|
191
|
+
"""
|
192
|
+
Single worker: fetch + parse with retry, then enqueue ChapterDict.
|
193
|
+
|
194
|
+
Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
|
195
|
+
"""
|
196
|
+
while True:
|
197
|
+
cid = await cid_q.get()
|
198
|
+
if isinstance(cid, StopToken):
|
199
|
+
await save_q.put(STOP)
|
200
|
+
return
|
201
|
+
|
202
|
+
if not cid or cid in ignore_set:
|
203
|
+
continue
|
204
|
+
|
205
|
+
if cancelled():
|
206
|
+
await save_q.put(STOP)
|
207
|
+
return
|
208
|
+
|
209
|
+
async with sem:
|
210
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
211
|
+
if chap and not cancelled():
|
212
|
+
await save_q.put(chap)
|
213
|
+
|
214
|
+
await async_jitter_sleep(
|
215
|
+
self.request_interval,
|
216
|
+
mul_spread=1.1,
|
217
|
+
max_sleep=self.request_interval + 2,
|
218
|
+
)
|
150
219
|
|
151
|
-
|
152
|
-
|
220
|
+
async def producer() -> None:
|
221
|
+
"""
|
222
|
+
Enqueue chapter IDs respecting start/end/skip_existing.
|
223
|
+
|
224
|
+
Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
|
225
|
+
"""
|
226
|
+
try:
|
227
|
+
async for cid in self._chapter_ids(vols, start_id, end_id):
|
228
|
+
if cancelled():
|
229
|
+
break
|
230
|
+
if self.skip_existing and (
|
231
|
+
chapter_storage.exists(cid, self.DEFAULT_SOURCE_ID)
|
232
|
+
or chapter_storage.exists(cid, self.ENCRYPTED_SOURCE_ID)
|
233
|
+
):
|
234
|
+
# Already have either variant; count as done.
|
235
|
+
await progress.bump(1)
|
236
|
+
else:
|
237
|
+
await cid_q.put(cid)
|
238
|
+
finally:
|
239
|
+
for _ in range(NUM_WORKERS):
|
240
|
+
await cid_q.put(STOP)
|
241
|
+
|
242
|
+
# ---- run tasks ---
|
153
243
|
async with asyncio.TaskGroup() as tg:
|
154
|
-
tg.create_task(
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
244
|
+
tg.create_task(storage_worker())
|
245
|
+
for _ in range(NUM_WORKERS):
|
246
|
+
tg.create_task(chapter_worker())
|
247
|
+
tg.create_task(producer())
|
248
|
+
|
249
|
+
# ---- done ---
|
250
|
+
if cancelled():
|
251
|
+
self.logger.info(
|
252
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
253
|
+
TAG,
|
254
|
+
book_info.get("book_name", "unknown"),
|
255
|
+
progress.done,
|
256
|
+
progress.total,
|
257
|
+
)
|
258
|
+
else:
|
259
|
+
self.logger.info(
|
260
|
+
"%s Novel '%s' download completed.",
|
261
|
+
TAG,
|
262
|
+
book_info.get("book_name", "unknown"),
|
162
263
|
)
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
# run producer + workers, send None sentinels to shut down loops
|
167
|
-
async with task_group_ctx():
|
168
|
-
await producer()
|
169
|
-
|
170
|
-
# signal fetcher to exit
|
171
|
-
await cid_q.put(None)
|
172
|
-
await cid_q.join()
|
173
|
-
|
174
|
-
# signal storage to exit
|
175
|
-
await save_q.put(None)
|
176
|
-
await save_q.join()
|
177
|
-
|
178
|
-
# final flush for both batches
|
179
|
-
await _flush(default_batch, self.DEFAULT_SOURCE_ID)
|
180
|
-
await _flush(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
|
181
|
-
|
182
|
-
chapter_storage.close()
|
183
|
-
self.logger.info(
|
184
|
-
"%s Novel '%s' download completed.",
|
185
|
-
TAG,
|
186
|
-
book_info.get("book_name", "unknown"),
|
187
|
-
)
|
264
|
+
|
265
|
+
finally:
|
266
|
+
chapter_storage.close()
|
188
267
|
|
189
268
|
@staticmethod
|
190
269
|
def _check_restricted(html_list: list[str]) -> bool:
|
@@ -205,40 +284,6 @@ class QidianDownloader(BaseDownloader):
|
|
205
284
|
return True
|
206
285
|
return '"cES":2' in html_list[0]
|
207
286
|
|
208
|
-
async def _chapter_worker(
|
209
|
-
self,
|
210
|
-
book_id: str,
|
211
|
-
ignore_set: set[str],
|
212
|
-
cid_q: asyncio.Queue[str | None],
|
213
|
-
save_q: asyncio.Queue[ChapterDict | None],
|
214
|
-
sem: asyncio.Semaphore,
|
215
|
-
) -> None:
|
216
|
-
"""
|
217
|
-
Worker that processes one chapter at a time:
|
218
|
-
fetch + parse with retry, then enqueue to save_q.
|
219
|
-
"""
|
220
|
-
html_dir = self._debug_dir / book_id / "html"
|
221
|
-
while True:
|
222
|
-
cid = await cid_q.get()
|
223
|
-
if cid is None:
|
224
|
-
cid_q.task_done()
|
225
|
-
break
|
226
|
-
if not cid or cid in ignore_set:
|
227
|
-
cid_q.task_done()
|
228
|
-
continue
|
229
|
-
|
230
|
-
async with sem:
|
231
|
-
chap = await self._process_chapter(book_id, cid, html_dir)
|
232
|
-
if chap:
|
233
|
-
await save_q.put(chap)
|
234
|
-
|
235
|
-
cid_q.task_done()
|
236
|
-
await async_sleep_with_random_delay(
|
237
|
-
self.request_interval,
|
238
|
-
mul_spread=1.1,
|
239
|
-
max_sleep=self.request_interval + 2,
|
240
|
-
)
|
241
|
-
|
242
287
|
async def _process_chapter(
|
243
288
|
self,
|
244
289
|
book_id: str,
|
@@ -247,7 +292,8 @@ class QidianDownloader(BaseDownloader):
|
|
247
292
|
) -> ChapterDict | None:
|
248
293
|
"""
|
249
294
|
Fetch, debug-save, parse a single chapter with retries.
|
250
|
-
|
295
|
+
|
296
|
+
:return: ChapterDict on success, or None on failure.
|
251
297
|
"""
|
252
298
|
for attempt in range(self.retry_times + 1):
|
253
299
|
try:
|
@@ -280,7 +326,7 @@ class QidianDownloader(BaseDownloader):
|
|
280
326
|
"[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
|
281
327
|
)
|
282
328
|
backoff = self.backoff_factor * (2**attempt)
|
283
|
-
await
|
329
|
+
await async_jitter_sleep(
|
284
330
|
base=backoff,
|
285
331
|
mul_spread=1.2,
|
286
332
|
max_sleep=backoff + 3,
|
@@ -3,6 +3,7 @@
|
|
3
3
|
novel_downloader.core.downloaders.registry
|
4
4
|
------------------------------------------
|
5
5
|
|
6
|
+
Registry and factory helpers for creating site-specific or common downloaders
|
6
7
|
"""
|
7
8
|
|
8
9
|
__all__ = ["register_downloader", "get_downloader"]
|
@@ -10,6 +11,7 @@ __all__ = ["register_downloader", "get_downloader"]
|
|
10
11
|
from collections.abc import Callable, Sequence
|
11
12
|
from typing import TypeVar
|
12
13
|
|
14
|
+
from novel_downloader.core.downloaders.common import CommonDownloader
|
13
15
|
from novel_downloader.core.interfaces import (
|
14
16
|
DownloaderProtocol,
|
15
17
|
FetcherProtocol,
|
@@ -62,6 +64,6 @@ def get_downloader(
|
|
62
64
|
site_key = site.lower()
|
63
65
|
try:
|
64
66
|
downloader_cls = _DOWNLOADER_MAP[site_key]
|
65
|
-
except KeyError
|
66
|
-
|
67
|
+
except KeyError:
|
68
|
+
return CommonDownloader(fetcher, parser, config, site_key)
|
67
69
|
return downloader_cls(fetcher, parser, config)
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.downloaders.signals
|
4
|
+
-----------------------------------------
|
5
|
+
|
6
|
+
Utilities for signaling task termination and reporting async progress.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
from collections.abc import Awaitable, Callable
|
12
|
+
from typing import Final, final
|
13
|
+
|
14
|
+
|
15
|
+
@final
|
16
|
+
class StopToken:
|
17
|
+
"""Typed sentinel used to end queues."""
|
18
|
+
|
19
|
+
__slots__ = ()
|
20
|
+
|
21
|
+
def __repr__(self) -> str:
|
22
|
+
return "STOP"
|
23
|
+
|
24
|
+
|
25
|
+
STOP: Final[StopToken] = StopToken()
|
26
|
+
|
27
|
+
# from typing_extensions import TypeIs
|
28
|
+
# def is_stop(x: object) -> TypeIs[StopToken]:
|
29
|
+
# """Type guard so `if is_stop(item)` narrows type to StopToken."""
|
30
|
+
# return isinstance(x, StopToken)
|
31
|
+
|
32
|
+
|
33
|
+
class Progress:
|
34
|
+
"""Lightweight progress reporter."""
|
35
|
+
|
36
|
+
__slots__ = ("done", "total", "hook")
|
37
|
+
|
38
|
+
def __init__(self, total: int, hook: Callable[[int, int], Awaitable[None]] | None):
|
39
|
+
self.done = 0
|
40
|
+
self.total = total
|
41
|
+
self.hook = hook
|
42
|
+
|
43
|
+
async def bump(self, n: int = 1) -> None:
|
44
|
+
self.done += n
|
45
|
+
if self.hook:
|
46
|
+
await self.hook(self.done, self.total)
|
@@ -3,34 +3,17 @@
|
|
3
3
|
novel_downloader.core.exporters
|
4
4
|
-------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
Currently supported platforms:
|
9
|
-
- biquge (笔趣阁)
|
10
|
-
- esjzone (ESJ Zone)
|
11
|
-
- linovelib (哔哩轻小说)
|
12
|
-
- qianbi (铅笔小说)
|
13
|
-
- qidian (起点中文网)
|
14
|
-
- sfacg (SF轻小说)
|
15
|
-
- yamibo (百合会)
|
6
|
+
Exporter implementations for saving books in various formats across different sources
|
16
7
|
"""
|
17
8
|
|
18
9
|
__all__ = [
|
19
10
|
"get_exporter",
|
20
|
-
"
|
21
|
-
"EsjzoneExporter",
|
11
|
+
"CommonExporter",
|
22
12
|
"LinovelibExporter",
|
23
|
-
"QianbiExporter",
|
24
13
|
"QidianExporter",
|
25
|
-
"SfacgExporter",
|
26
|
-
"YamiboExporter",
|
27
14
|
]
|
28
15
|
|
29
|
-
from .
|
30
|
-
from .esjzone import EsjzoneExporter
|
16
|
+
from .common import CommonExporter
|
31
17
|
from .linovelib import LinovelibExporter
|
32
|
-
from .qianbi import QianbiExporter
|
33
18
|
from .qidian import QidianExporter
|
34
19
|
from .registry import get_exporter
|
35
|
-
from .sfacg import SfacgExporter
|
36
|
-
from .yamibo import YamiboExporter
|