novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,15 +3,22 @@
|
|
3
3
|
novel_downloader.core.downloaders.qidian
|
4
4
|
----------------------------------------
|
5
5
|
|
6
|
+
Downloader implementation for Qidian novels,
|
7
|
+
with handling for restricted and encrypted chapters
|
6
8
|
"""
|
7
9
|
|
8
10
|
import asyncio
|
9
|
-
import json
|
10
11
|
from collections.abc import Awaitable, Callable
|
11
|
-
from
|
12
|
-
from typing import Any
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import Any
|
13
14
|
|
14
15
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
16
|
+
from novel_downloader.core.downloaders.registry import register_downloader
|
17
|
+
from novel_downloader.core.downloaders.signals import (
|
18
|
+
STOP,
|
19
|
+
Progress,
|
20
|
+
StopToken,
|
21
|
+
)
|
15
22
|
from novel_downloader.core.interfaces import (
|
16
23
|
FetcherProtocol,
|
17
24
|
ParserProtocol,
|
@@ -19,23 +26,30 @@ from novel_downloader.core.interfaces import (
|
|
19
26
|
from novel_downloader.models import (
|
20
27
|
BookConfig,
|
21
28
|
ChapterDict,
|
22
|
-
CidTask,
|
23
29
|
DownloaderConfig,
|
24
|
-
HtmlTask,
|
25
30
|
)
|
26
|
-
from novel_downloader.utils
|
27
|
-
|
28
|
-
|
29
|
-
async_sleep_with_random_delay,
|
30
|
-
calculate_time_difference,
|
31
|
+
from novel_downloader.utils import (
|
32
|
+
ChapterStorage,
|
33
|
+
async_jitter_sleep,
|
31
34
|
)
|
32
35
|
|
33
36
|
|
37
|
+
@register_downloader(site_keys=["qidian", "qd"])
|
34
38
|
class QidianDownloader(BaseDownloader):
|
35
39
|
"""
|
36
|
-
Specialized downloader for Qidian novels.
|
40
|
+
Specialized downloader for Qidian (起点) novels.
|
41
|
+
|
42
|
+
Processes each chapter in a single worker that
|
43
|
+
handles fetch -> parse -> enqueue storage.
|
37
44
|
"""
|
38
45
|
|
46
|
+
DEFAULT_SOURCE_ID = 0
|
47
|
+
ENCRYPTED_SOURCE_ID = 1
|
48
|
+
PRIORITIES_MAP = {
|
49
|
+
DEFAULT_SOURCE_ID: 0,
|
50
|
+
ENCRYPTED_SOURCE_ID: 1,
|
51
|
+
}
|
52
|
+
|
39
53
|
def __init__(
|
40
54
|
self,
|
41
55
|
fetcher: FetcherProtocol,
|
@@ -50,6 +64,7 @@ class QidianDownloader(BaseDownloader):
|
|
50
64
|
book: BookConfig,
|
51
65
|
*,
|
52
66
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
67
|
+
cancel_event: asyncio.Event | None = None,
|
53
68
|
**kwargs: Any,
|
54
69
|
) -> None:
|
55
70
|
"""
|
@@ -58,306 +73,200 @@ class QidianDownloader(BaseDownloader):
|
|
58
73
|
:param book: BookConfig with at least 'book_id'.
|
59
74
|
"""
|
60
75
|
TAG = "[Downloader]"
|
76
|
+
NUM_WORKERS = 1
|
77
|
+
|
61
78
|
book_id = book["book_id"]
|
62
79
|
start_id = book.get("start_id")
|
63
80
|
end_id = book.get("end_id")
|
64
81
|
ignore_set = set(book.get("ignore_ids", []))
|
65
82
|
|
66
|
-
raw_base = self.
|
67
|
-
cache_base = self.cache_dir / book_id
|
68
|
-
info_path = raw_base / "book_info.json"
|
69
|
-
chapters_html_dir = cache_base / "html"
|
70
|
-
|
83
|
+
raw_base = self._raw_data_dir / book_id
|
71
84
|
raw_base.mkdir(parents=True, exist_ok=True)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
raw_base=raw_base,
|
76
|
-
namespace="chapters",
|
77
|
-
backend_type=self._config.storage_backend,
|
78
|
-
batch_size=self._config.storage_batch_size,
|
79
|
-
)
|
80
|
-
encrypted_cs = ChapterStorage(
|
85
|
+
html_dir = self._debug_dir / book_id / "html"
|
86
|
+
|
87
|
+
chapter_storage = ChapterStorage(
|
81
88
|
raw_base=raw_base,
|
82
|
-
|
83
|
-
backend_type=self._config.storage_backend,
|
84
|
-
batch_size=self._config.storage_batch_size,
|
89
|
+
priorities=self.PRIORITIES_MAP,
|
85
90
|
)
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
total_chapters += len(vol.get("chapters", []))
|
121
|
-
if total_chapters == 0:
|
122
|
-
self.logger.warning("%s 书籍没有章节可下载: book_id=%s", TAG, book_id)
|
123
|
-
return
|
124
|
-
|
125
|
-
completed_count = 0
|
126
|
-
|
127
|
-
# setup queue
|
128
|
-
cid_queue: asyncio.Queue[CidTask] = asyncio.Queue()
|
129
|
-
html_queue: asyncio.Queue[HtmlTask] = asyncio.Queue()
|
130
|
-
save_queue: asyncio.Queue[ChapterDict] = asyncio.Queue()
|
131
|
-
|
132
|
-
async def fetcher_worker(
|
133
|
-
book_id: str,
|
134
|
-
cid_queue: asyncio.Queue[CidTask],
|
135
|
-
html_queue: asyncio.Queue[HtmlTask],
|
136
|
-
retry_times: int,
|
137
|
-
) -> None:
|
138
|
-
while True:
|
139
|
-
task = await cid_queue.get()
|
140
|
-
cid = task.cid
|
141
|
-
if not cid:
|
142
|
-
self.logger.warning("[Fetcher] Skipped empty cid task: %s", task)
|
143
|
-
cid_queue.task_done()
|
144
|
-
continue
|
145
|
-
|
146
|
-
if cid in ignore_set:
|
147
|
-
cid_queue.task_done()
|
148
|
-
continue
|
149
|
-
|
91
|
+
chapter_storage.connect()
|
92
|
+
|
93
|
+
def cancelled() -> bool:
|
94
|
+
return bool(cancel_event and cancel_event.is_set())
|
95
|
+
|
96
|
+
try:
|
97
|
+
# ---- metadata ---
|
98
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
99
|
+
if not book_info:
|
100
|
+
return
|
101
|
+
|
102
|
+
vols = book_info["volumes"]
|
103
|
+
total_chapters = sum(len(v["chapters"]) for v in vols)
|
104
|
+
if total_chapters == 0:
|
105
|
+
self.logger.warning("%s 书籍没有章节可下载: %s", TAG, book_id)
|
106
|
+
return
|
107
|
+
|
108
|
+
progress = Progress(total_chapters, progress_hook)
|
109
|
+
|
110
|
+
# ---- queues & batching ---
|
111
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue()
|
112
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue()
|
113
|
+
default_batch: list[ChapterDict] = []
|
114
|
+
encrypted_batch: list[ChapterDict] = []
|
115
|
+
|
116
|
+
def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
|
117
|
+
# set extra.encrypted (by parser); default to plain if absent.
|
118
|
+
if chap.get("extra", {}).get("encrypted", False):
|
119
|
+
return encrypted_batch, self.ENCRYPTED_SOURCE_ID
|
120
|
+
return default_batch, self.DEFAULT_SOURCE_ID
|
121
|
+
|
122
|
+
async def flush_batch(batch: list[ChapterDict], src: int) -> None:
|
123
|
+
if not batch:
|
124
|
+
return
|
150
125
|
try:
|
151
|
-
|
152
|
-
await html_queue.put(
|
153
|
-
HtmlTask(cid=cid, retry=task.retry, html_list=html_list)
|
154
|
-
)
|
155
|
-
self.logger.info("[Fetcher] Downloaded chapter %s", cid)
|
156
|
-
await async_sleep_with_random_delay(
|
157
|
-
self.request_interval,
|
158
|
-
mul_spread=1.1,
|
159
|
-
max_sleep=self.request_interval + 2,
|
160
|
-
)
|
161
|
-
|
126
|
+
chapter_storage.upsert_chapters(batch, src)
|
162
127
|
except Exception as e:
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
)
|
170
|
-
)
|
171
|
-
self.logger.info(
|
172
|
-
"[Fetcher] Re-queued chapter %s for retry #%d: %s",
|
173
|
-
cid,
|
174
|
-
task.retry + 1,
|
175
|
-
e,
|
176
|
-
)
|
177
|
-
backoff = self.backoff_factor * (2**task.retry)
|
178
|
-
await async_sleep_with_random_delay(
|
179
|
-
base=backoff,
|
180
|
-
mul_spread=1.2,
|
181
|
-
max_sleep=backoff + 3,
|
182
|
-
)
|
183
|
-
else:
|
184
|
-
self.logger.warning(
|
185
|
-
"[Fetcher] Max retries reached for chapter %s: %s",
|
186
|
-
cid,
|
187
|
-
e,
|
188
|
-
)
|
189
|
-
|
190
|
-
finally:
|
191
|
-
cid_queue.task_done()
|
192
|
-
|
193
|
-
async def parser_worker(
|
194
|
-
cid_queue: asyncio.Queue[CidTask],
|
195
|
-
html_queue: asyncio.Queue[HtmlTask],
|
196
|
-
save_queue: asyncio.Queue[ChapterDict],
|
197
|
-
retry_times: int,
|
198
|
-
) -> None:
|
199
|
-
while True:
|
200
|
-
task = await html_queue.get()
|
201
|
-
skip_retry = False
|
202
|
-
try:
|
203
|
-
chap_json: ChapterDict | None = None
|
204
|
-
if self.check_restricted(task.html_list):
|
205
|
-
self.logger.info(
|
206
|
-
"[Parser] Skipped restricted page for cid %s", task.cid
|
207
|
-
)
|
208
|
-
skip_retry = True
|
209
|
-
raise ValueError("Restricted content detected")
|
210
|
-
|
211
|
-
is_encrypted = self.check_encrypted(task.html_list)
|
212
|
-
chap_json = await asyncio.to_thread(
|
213
|
-
self.parser.parse_chapter,
|
214
|
-
task.html_list,
|
215
|
-
task.cid,
|
128
|
+
self.logger.error(
|
129
|
+
"[Storage] batch upsert failed (size=%d, src=%d): %s",
|
130
|
+
len(batch),
|
131
|
+
src,
|
132
|
+
e,
|
133
|
+
exc_info=True,
|
216
134
|
)
|
217
|
-
|
218
|
-
|
219
|
-
if self.save_html:
|
220
|
-
folder = chapters_html_dir / (
|
221
|
-
"html_encrypted" if is_encrypted else "html_plain"
|
222
|
-
)
|
223
|
-
html_path = folder / f"{task.cid}.html"
|
224
|
-
save_as_txt(task.html_list[0], html_path, on_exist="skip")
|
225
|
-
self.logger.debug(
|
226
|
-
"%s Saved raw HTML for chapter %s to %s",
|
227
|
-
TAG,
|
228
|
-
task.cid,
|
229
|
-
html_path,
|
230
|
-
)
|
231
|
-
if chap_json:
|
232
|
-
await save_queue.put(chap_json)
|
233
|
-
self.logger.info(
|
234
|
-
"[Parser] saved chapter %s",
|
235
|
-
task.cid,
|
236
|
-
)
|
237
|
-
else:
|
238
|
-
raise ValueError("Empty parse result")
|
239
|
-
except Exception as e:
|
240
|
-
if not skip_retry and task.retry < retry_times:
|
241
|
-
await cid_queue.put(
|
242
|
-
CidTask(prev_cid=None, cid=task.cid, retry=task.retry + 1)
|
243
|
-
)
|
244
|
-
self.logger.info(
|
245
|
-
"[Parser] Re-queued cid %s for retry #%d: %s",
|
246
|
-
task.cid,
|
247
|
-
task.retry + 1,
|
248
|
-
e,
|
249
|
-
)
|
250
|
-
elif not skip_retry:
|
251
|
-
self.logger.warning(
|
252
|
-
"[Parser] Max retries reached for cid %s: %s",
|
253
|
-
task.cid,
|
254
|
-
e,
|
255
|
-
)
|
135
|
+
else:
|
136
|
+
await progress.bump(len(batch))
|
256
137
|
finally:
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
cid_queue,
|
283
|
-
html_queue,
|
284
|
-
self.retry_times,
|
285
|
-
)
|
286
|
-
)
|
287
|
-
|
288
|
-
parser_task = asyncio.create_task(
|
289
|
-
parser_worker(
|
290
|
-
cid_queue,
|
291
|
-
html_queue,
|
292
|
-
save_queue,
|
293
|
-
self.retry_times,
|
294
|
-
)
|
295
|
-
)
|
296
|
-
|
297
|
-
storage_task = asyncio.create_task(
|
298
|
-
storage_worker(
|
299
|
-
normal_cs=normal_cs,
|
300
|
-
encrypted_cs=encrypted_cs,
|
301
|
-
save_queue=save_queue,
|
302
|
-
)
|
303
|
-
)
|
304
|
-
|
305
|
-
found_start = start_id is None
|
306
|
-
stop_early = False
|
307
|
-
|
308
|
-
for vol in book_info.get("volumes", []):
|
309
|
-
chapters = vol.get("chapters", [])
|
310
|
-
for chap in chapters:
|
311
|
-
if stop_early:
|
312
|
-
break
|
313
|
-
|
314
|
-
cid = chap.get("chapterId")
|
315
|
-
if not cid:
|
316
|
-
continue
|
317
|
-
|
318
|
-
if not found_start:
|
319
|
-
if cid == start_id:
|
320
|
-
found_start = True
|
321
|
-
else:
|
322
|
-
completed_count += 1
|
138
|
+
batch.clear()
|
139
|
+
|
140
|
+
async def flush_all() -> None:
|
141
|
+
await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
|
142
|
+
await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
|
143
|
+
|
144
|
+
# ---- workers ---
|
145
|
+
sem = asyncio.Semaphore(self.workers)
|
146
|
+
|
147
|
+
async def storage_worker() -> None:
|
148
|
+
"""
|
149
|
+
Consumes parsed chapters, batches by source, flushes on threshold.
|
150
|
+
|
151
|
+
Terminates after receiving STOP from each chapter worker.
|
152
|
+
|
153
|
+
On cancel: drains queue, flushes once, then waits for remaining STOPs.
|
154
|
+
"""
|
155
|
+
stop_count = 0
|
156
|
+
while True:
|
157
|
+
chap = await save_q.get()
|
158
|
+
if isinstance(chap, StopToken):
|
159
|
+
stop_count += 1
|
160
|
+
if stop_count == NUM_WORKERS:
|
161
|
+
await flush_all()
|
162
|
+
return
|
323
163
|
continue
|
324
164
|
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
165
|
+
batch, src = select_batch(chap)
|
166
|
+
batch.append(chap)
|
167
|
+
if len(batch) >= self.storage_batch_size:
|
168
|
+
await flush_batch(batch, src)
|
169
|
+
|
170
|
+
if cancelled():
|
171
|
+
# Drain whatever is already parsed
|
172
|
+
try:
|
173
|
+
while True:
|
174
|
+
nxt = save_q.get_nowait()
|
175
|
+
if isinstance(nxt, StopToken):
|
176
|
+
stop_count += 1
|
177
|
+
else:
|
178
|
+
nbatch, nsrc = select_batch(nxt)
|
179
|
+
nbatch.append(nxt)
|
180
|
+
except asyncio.QueueEmpty:
|
181
|
+
pass
|
182
|
+
await flush_all()
|
183
|
+
# Wait for remaining STOPs to arrive
|
184
|
+
while stop_count < NUM_WORKERS:
|
185
|
+
nxt = await save_q.get()
|
186
|
+
if nxt is STOP:
|
187
|
+
stop_count += 1
|
188
|
+
return
|
189
|
+
|
190
|
+
async def chapter_worker() -> None:
|
191
|
+
"""
|
192
|
+
Single worker: fetch + parse with retry, then enqueue ChapterDict.
|
193
|
+
|
194
|
+
Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
|
195
|
+
"""
|
196
|
+
while True:
|
197
|
+
cid = await cid_q.get()
|
198
|
+
if isinstance(cid, StopToken):
|
199
|
+
await save_q.put(STOP)
|
200
|
+
return
|
201
|
+
|
202
|
+
if not cid or cid in ignore_set:
|
203
|
+
continue
|
334
204
|
|
335
|
-
|
205
|
+
if cancelled():
|
206
|
+
await save_q.put(STOP)
|
207
|
+
return
|
336
208
|
|
337
|
-
|
338
|
-
|
209
|
+
async with sem:
|
210
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
211
|
+
if chap and not cancelled():
|
212
|
+
await save_q.put(chap)
|
339
213
|
|
340
|
-
|
341
|
-
|
342
|
-
|
214
|
+
await async_jitter_sleep(
|
215
|
+
self.request_interval,
|
216
|
+
mul_spread=1.1,
|
217
|
+
max_sleep=self.request_interval + 2,
|
218
|
+
)
|
343
219
|
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
await task
|
220
|
+
async def producer() -> None:
|
221
|
+
"""
|
222
|
+
Enqueue chapter IDs respecting start/end/skip_existing.
|
348
223
|
|
349
|
-
|
350
|
-
|
224
|
+
Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
|
225
|
+
"""
|
226
|
+
try:
|
227
|
+
async for cid in self._chapter_ids(vols, start_id, end_id):
|
228
|
+
if cancelled():
|
229
|
+
break
|
230
|
+
if self.skip_existing and (
|
231
|
+
chapter_storage.exists(cid, self.DEFAULT_SOURCE_ID)
|
232
|
+
or chapter_storage.exists(cid, self.ENCRYPTED_SOURCE_ID)
|
233
|
+
):
|
234
|
+
# Already have either variant; count as done.
|
235
|
+
await progress.bump(1)
|
236
|
+
else:
|
237
|
+
await cid_q.put(cid)
|
238
|
+
finally:
|
239
|
+
for _ in range(NUM_WORKERS):
|
240
|
+
await cid_q.put(STOP)
|
241
|
+
|
242
|
+
# ---- run tasks ---
|
243
|
+
async with asyncio.TaskGroup() as tg:
|
244
|
+
tg.create_task(storage_worker())
|
245
|
+
for _ in range(NUM_WORKERS):
|
246
|
+
tg.create_task(chapter_worker())
|
247
|
+
tg.create_task(producer())
|
248
|
+
|
249
|
+
# ---- done ---
|
250
|
+
if cancelled():
|
251
|
+
self.logger.info(
|
252
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
253
|
+
TAG,
|
254
|
+
book_info.get("book_name", "unknown"),
|
255
|
+
progress.done,
|
256
|
+
progress.total,
|
257
|
+
)
|
258
|
+
else:
|
259
|
+
self.logger.info(
|
260
|
+
"%s Novel '%s' download completed.",
|
261
|
+
TAG,
|
262
|
+
book_info.get("book_name", "unknown"),
|
263
|
+
)
|
351
264
|
|
352
|
-
|
353
|
-
|
354
|
-
TAG,
|
355
|
-
book_info.get("book_name", "unknown"),
|
356
|
-
)
|
357
|
-
return
|
265
|
+
finally:
|
266
|
+
chapter_storage.close()
|
358
267
|
|
359
268
|
@staticmethod
|
360
|
-
def
|
269
|
+
def _check_restricted(html_list: list[str]) -> bool:
|
361
270
|
"""
|
362
271
|
Return True if page content indicates access restriction
|
363
272
|
(e.g. not subscribed/purchased).
|
@@ -370,7 +279,58 @@ class QidianDownloader(BaseDownloader):
|
|
370
279
|
return any(m in html_list[0] for m in markers)
|
371
280
|
|
372
281
|
@staticmethod
|
373
|
-
def
|
282
|
+
def _check_encrypted(html_list: list[str]) -> bool:
|
374
283
|
if not html_list:
|
375
284
|
return True
|
376
285
|
return '"cES":2' in html_list[0]
|
286
|
+
|
287
|
+
async def _process_chapter(
|
288
|
+
self,
|
289
|
+
book_id: str,
|
290
|
+
cid: str,
|
291
|
+
html_dir: Path,
|
292
|
+
) -> ChapterDict | None:
|
293
|
+
"""
|
294
|
+
Fetch, debug-save, parse a single chapter with retries.
|
295
|
+
|
296
|
+
:return: ChapterDict on success, or None on failure.
|
297
|
+
"""
|
298
|
+
for attempt in range(self.retry_times + 1):
|
299
|
+
try:
|
300
|
+
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
301
|
+
if self._check_restricted(html_list):
|
302
|
+
self.logger.info(
|
303
|
+
"[ChapterWorker] Restricted content detected: %s", cid
|
304
|
+
)
|
305
|
+
return None
|
306
|
+
encrypted = self._check_encrypted(html_list)
|
307
|
+
|
308
|
+
folder = "html_encrypted" if encrypted else "html_plain"
|
309
|
+
self._save_html_pages(html_dir / folder, cid, html_list)
|
310
|
+
|
311
|
+
chap = await asyncio.to_thread(
|
312
|
+
self.parser.parse_chapter, html_list, cid
|
313
|
+
)
|
314
|
+
if encrypted and not chap:
|
315
|
+
self.logger.info(
|
316
|
+
"[ChapterWorker] Fail for encrypted chapter: %s", cid
|
317
|
+
)
|
318
|
+
return None
|
319
|
+
if not chap:
|
320
|
+
raise ValueError("Empty parse result")
|
321
|
+
return chap
|
322
|
+
|
323
|
+
except Exception as e:
|
324
|
+
if attempt < self.retry_times:
|
325
|
+
self.logger.info(
|
326
|
+
"[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
|
327
|
+
)
|
328
|
+
backoff = self.backoff_factor * (2**attempt)
|
329
|
+
await async_jitter_sleep(
|
330
|
+
base=backoff,
|
331
|
+
mul_spread=1.2,
|
332
|
+
max_sleep=backoff + 3,
|
333
|
+
)
|
334
|
+
else:
|
335
|
+
self.logger.warning("[ChapterWorker] Failed %s: %s", cid, e)
|
336
|
+
return None
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.downloaders.registry
|
4
|
+
------------------------------------------
|
5
|
+
|
6
|
+
Registry and factory helpers for creating site-specific or common downloaders
|
7
|
+
"""
|
8
|
+
|
9
|
+
__all__ = ["register_downloader", "get_downloader"]
|
10
|
+
|
11
|
+
from collections.abc import Callable, Sequence
|
12
|
+
from typing import TypeVar
|
13
|
+
|
14
|
+
from novel_downloader.core.downloaders.common import CommonDownloader
|
15
|
+
from novel_downloader.core.interfaces import (
|
16
|
+
DownloaderProtocol,
|
17
|
+
FetcherProtocol,
|
18
|
+
ParserProtocol,
|
19
|
+
)
|
20
|
+
from novel_downloader.models import DownloaderConfig
|
21
|
+
|
22
|
+
DownloaderBuilder = Callable[
|
23
|
+
[FetcherProtocol, ParserProtocol, DownloaderConfig],
|
24
|
+
DownloaderProtocol,
|
25
|
+
]
|
26
|
+
D = TypeVar("D", bound=DownloaderProtocol)
|
27
|
+
_DOWNLOADER_MAP: dict[str, DownloaderBuilder] = {}
|
28
|
+
|
29
|
+
|
30
|
+
def register_downloader(
|
31
|
+
site_keys: Sequence[str],
|
32
|
+
) -> Callable[[type[D]], type[D]]:
|
33
|
+
"""
|
34
|
+
Decorator to register a downloader class under given keys.
|
35
|
+
|
36
|
+
:param site_keys: Sequence of site identifiers
|
37
|
+
:return: A class decorator that populates _DOWNLOADER_MAP.
|
38
|
+
"""
|
39
|
+
|
40
|
+
def decorator(cls: type[D]) -> type[D]:
|
41
|
+
for key in site_keys:
|
42
|
+
_DOWNLOADER_MAP[key.lower()] = cls
|
43
|
+
return cls
|
44
|
+
|
45
|
+
return decorator
|
46
|
+
|
47
|
+
|
48
|
+
def get_downloader(
|
49
|
+
fetcher: FetcherProtocol,
|
50
|
+
parser: ParserProtocol,
|
51
|
+
site: str,
|
52
|
+
config: DownloaderConfig,
|
53
|
+
) -> DownloaderProtocol:
|
54
|
+
"""
|
55
|
+
Returns an DownloaderProtocol for the given site.
|
56
|
+
|
57
|
+
:param fetcher: Fetcher implementation
|
58
|
+
:param parser: Parser implementation
|
59
|
+
:param site: Site name (e.g., 'qidian')
|
60
|
+
:param config: Downloader configuration
|
61
|
+
|
62
|
+
:return: An instance of a downloader class
|
63
|
+
"""
|
64
|
+
site_key = site.lower()
|
65
|
+
try:
|
66
|
+
downloader_cls = _DOWNLOADER_MAP[site_key]
|
67
|
+
except KeyError:
|
68
|
+
return CommonDownloader(fetcher, parser, config, site_key)
|
69
|
+
return downloader_cls(fetcher, parser, config)
|