novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +14 -11
- novel_downloader/cli/export.py +19 -19
- novel_downloader/cli/ui.py +35 -8
- novel_downloader/config/adapter.py +216 -153
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +27 -68
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +23 -49
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +5 -39
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
- novel_downloader/core/parsers/qidian.py +717 -0
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +8 -15
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +7 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
- novel_downloader/utils/fontocr/loader.py +52 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +4 -4
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
- novel_downloader-2.0.2.dist-info/RECORD +203 -0
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/main_parser.py +0 -101
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- novel_downloader-2.0.0.dist-info/RECORD +0 -210
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -4,17 +4,20 @@ novel_downloader.config.adapter
|
|
4
4
|
-------------------------------
|
5
5
|
|
6
6
|
Defines ConfigAdapter, which maps a raw configuration dictionary and
|
7
|
-
site
|
7
|
+
site into structured dataclass-based config models.
|
8
8
|
"""
|
9
9
|
|
10
|
+
import contextlib
|
10
11
|
import json
|
11
|
-
from
|
12
|
+
from collections.abc import Mapping
|
13
|
+
from typing import Any, TypeVar
|
12
14
|
|
13
15
|
from novel_downloader.models import (
|
14
16
|
BookConfig,
|
15
17
|
DownloaderConfig,
|
16
18
|
ExporterConfig,
|
17
19
|
FetcherConfig,
|
20
|
+
FontOCRConfig,
|
18
21
|
ParserConfig,
|
19
22
|
TextCleanerConfig,
|
20
23
|
)
|
@@ -26,96 +29,109 @@ class ConfigAdapter:
|
|
26
29
|
"""
|
27
30
|
Adapter to map a raw configuration dictionary and site name
|
28
31
|
into structured dataclass configuration models.
|
32
|
+
|
33
|
+
Resolution order for each field:
|
34
|
+
1. ``config["sites"][<site>]`` (if present)
|
35
|
+
2. ``config["general"]`` (if present)
|
36
|
+
3. Hard-coded default passed by the caller
|
29
37
|
"""
|
30
38
|
|
31
|
-
def __init__(self, config:
|
39
|
+
def __init__(self, config: Mapping[str, Any], site: str):
|
32
40
|
"""
|
33
|
-
Initialize the adapter.
|
41
|
+
Initialize the adapter with a configuration mapping and a site key.
|
34
42
|
|
35
|
-
:param config:
|
36
|
-
:param site:
|
43
|
+
:param config: Fully loaded configuration mapping.
|
44
|
+
:param site: Current site key (e.g., ``"qidian"``).
|
37
45
|
"""
|
38
|
-
self._config = config
|
39
|
-
self._site = site
|
40
|
-
self._site_cfg: dict[str, Any] = self._get_site_cfg()
|
41
|
-
self._gen_cfg: dict[str, Any] = config.get("general") or {}
|
46
|
+
self._config: dict[str, Any] = dict(config)
|
47
|
+
self._site: str = site
|
42
48
|
|
43
49
|
def get_fetcher_config(self) -> FetcherConfig:
|
44
50
|
"""
|
45
|
-
Build a FetcherConfig
|
51
|
+
Build a :class:`novel_downloader.models.FetcherConfig` by resolving fields
|
52
|
+
from site-specific and general settings.
|
46
53
|
|
47
|
-
:return:
|
54
|
+
:return: Fully populated configuration for the network fetcher.
|
48
55
|
"""
|
56
|
+
s, g = self._site_cfg, self._gen_cfg
|
49
57
|
return FetcherConfig(
|
50
|
-
request_interval=self.
|
51
|
-
retry_times=self.
|
52
|
-
backoff_factor=self.
|
53
|
-
timeout=self.
|
54
|
-
max_connections=self.
|
55
|
-
max_rps=self.
|
56
|
-
user_agent=self.
|
57
|
-
headers=self.
|
58
|
-
verify_ssl=self.
|
59
|
-
locale_style=self.
|
58
|
+
request_interval=self._pick("request_interval", 2.0, s, g),
|
59
|
+
retry_times=self._pick("retry_times", 3, s, g),
|
60
|
+
backoff_factor=self._pick("backoff_factor", 2.0, s, g),
|
61
|
+
timeout=self._pick("timeout", 30.0, s, g),
|
62
|
+
max_connections=self._pick("max_connections", 10, s, g),
|
63
|
+
max_rps=self._pick("max_rps", 1000.0, s, g),
|
64
|
+
user_agent=self._pick("user_agent", None, s, g),
|
65
|
+
headers=self._pick("headers", None, s, g),
|
66
|
+
verify_ssl=self._pick("verify_ssl", True, s, g),
|
67
|
+
locale_style=self._pick("locale_style", "simplified", s, g),
|
60
68
|
)
|
61
69
|
|
62
70
|
def get_downloader_config(self) -> DownloaderConfig:
|
63
71
|
"""
|
64
|
-
Build a DownloaderConfig using both
|
72
|
+
Build a :class:`novel_downloader.models.DownloaderConfig` using both
|
73
|
+
general and site-specific settings.
|
65
74
|
|
66
|
-
:return:
|
75
|
+
:return: Fully populated configuration for the chapter/page downloader.
|
67
76
|
"""
|
68
|
-
|
69
|
-
debug =
|
77
|
+
s, g = self._site_cfg, self._gen_cfg
|
78
|
+
debug = g.get("debug") or {}
|
70
79
|
return DownloaderConfig(
|
71
|
-
request_interval=self.
|
72
|
-
retry_times=self.
|
73
|
-
backoff_factor=self.
|
74
|
-
workers=self.
|
75
|
-
skip_existing=self.
|
76
|
-
login_required=
|
77
|
-
save_html=debug.get("save_html", False),
|
78
|
-
raw_data_dir=
|
79
|
-
cache_dir=
|
80
|
-
storage_batch_size=
|
80
|
+
request_interval=self._pick("request_interval", 2.0, s, g),
|
81
|
+
retry_times=self._pick("retry_times", 3, s, g),
|
82
|
+
backoff_factor=self._pick("backoff_factor", 2.0, s, g),
|
83
|
+
workers=self._pick("workers", 2, s, g),
|
84
|
+
skip_existing=self._pick("skip_existing", True, s, g),
|
85
|
+
login_required=bool(s.get("login_required", False)),
|
86
|
+
save_html=bool(debug.get("save_html", False)),
|
87
|
+
raw_data_dir=g.get("raw_data_dir", "./raw_data"),
|
88
|
+
cache_dir=g.get("cache_dir", "./novel_cache"),
|
89
|
+
storage_batch_size=g.get("storage_batch_size", 1),
|
81
90
|
)
|
82
91
|
|
83
92
|
def get_parser_config(self) -> ParserConfig:
|
84
93
|
"""
|
85
|
-
Build a ParserConfig from general,
|
94
|
+
Build a :class:`novel_downloader.models.ParserConfig` from general,
|
95
|
+
OCR-related, and site-specific settings.
|
86
96
|
|
87
|
-
:return:
|
97
|
+
:return: Fully populated configuration for the parser stage.
|
88
98
|
"""
|
89
|
-
|
90
|
-
|
99
|
+
g = self._gen_cfg
|
100
|
+
s = self._site_cfg
|
101
|
+
g_font = g.get("font_ocr") or {}
|
102
|
+
s_font = s.get("font_ocr") or {}
|
103
|
+
font_ocr: dict[str, Any] = {**g_font, **s_font}
|
91
104
|
return ParserConfig(
|
92
|
-
cache_dir=
|
93
|
-
use_truncation=
|
94
|
-
decode_font=font_ocr.get("decode_font", False),
|
95
|
-
save_font_debug=font_ocr.get("save_font_debug", False),
|
96
|
-
batch_size=font_ocr.get("batch_size", 32),
|
105
|
+
cache_dir=g.get("cache_dir", "./novel_cache"),
|
106
|
+
use_truncation=bool(s.get("use_truncation", True)),
|
107
|
+
decode_font=bool(font_ocr.get("decode_font", False)),
|
108
|
+
save_font_debug=bool(font_ocr.get("save_font_debug", False)),
|
109
|
+
batch_size=int(font_ocr.get("batch_size", 32)),
|
110
|
+
fontocr_cfg=self._dict_to_fontocr_cfg(font_ocr),
|
97
111
|
)
|
98
112
|
|
99
113
|
def get_exporter_config(self) -> ExporterConfig:
|
100
114
|
"""
|
101
|
-
Build an ExporterConfig from
|
115
|
+
Build an :class:`novel_downloader.models.ExporterConfig` from the
|
116
|
+
``output`` and ``cleaner`` sections plus general settings.
|
102
117
|
|
103
|
-
:return:
|
118
|
+
:return: Fully populated configuration for text/ebook export.
|
104
119
|
"""
|
105
|
-
|
106
|
-
out = self._config.get("output"
|
107
|
-
cln = self._config.get("cleaner"
|
108
|
-
fmt = out.get("formats"
|
109
|
-
naming = out.get("naming"
|
110
|
-
epub_opts = out.get("epub"
|
120
|
+
g = self._gen_cfg
|
121
|
+
out = self._config.get("output") or {}
|
122
|
+
cln = self._config.get("cleaner") or {}
|
123
|
+
fmt = out.get("formats") or {}
|
124
|
+
naming = out.get("naming") or {}
|
125
|
+
epub_opts = out.get("epub") or {}
|
126
|
+
|
111
127
|
cleaner_cfg = self._dict_to_cleaner_cfg(cln)
|
112
128
|
return ExporterConfig(
|
113
|
-
cache_dir=
|
114
|
-
raw_data_dir=
|
115
|
-
output_dir=
|
116
|
-
clean_text=cln.get("clean_text",
|
129
|
+
cache_dir=g.get("cache_dir", "./novel_cache"),
|
130
|
+
raw_data_dir=g.get("raw_data_dir", "./raw_data"),
|
131
|
+
output_dir=g.get("output_dir", "./downloads"),
|
132
|
+
clean_text=cln.get("clean_text", False),
|
117
133
|
make_txt=fmt.get("make_txt", True),
|
118
|
-
make_epub=fmt.get("make_epub",
|
134
|
+
make_epub=fmt.get("make_epub", True),
|
119
135
|
make_md=fmt.get("make_md", False),
|
120
136
|
make_pdf=fmt.get("make_pdf", False),
|
121
137
|
append_timestamp=naming.get("append_timestamp", True),
|
@@ -128,35 +144,36 @@ class ConfigAdapter:
|
|
128
144
|
|
129
145
|
def get_login_config(self) -> dict[str, str]:
|
130
146
|
"""
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
147
|
+
Extract login-related fields from the current site configuration.
|
148
|
+
Only non-empty string values are returned; values are stripped.
|
149
|
+
|
150
|
+
:return: A subset of ``{"username","password","cookies"}`` that are non-empty
|
135
151
|
"""
|
136
152
|
out: dict[str, str] = {}
|
137
153
|
for key in ("username", "password", "cookies"):
|
138
154
|
val = self._site_cfg.get(key, "")
|
139
|
-
val
|
140
|
-
|
141
|
-
|
155
|
+
if isinstance(val, str):
|
156
|
+
s = val.strip()
|
157
|
+
if s:
|
158
|
+
out[key] = s
|
142
159
|
return out
|
143
160
|
|
144
161
|
def get_book_ids(self) -> list[BookConfig]:
|
145
162
|
"""
|
146
|
-
Extract the list of target books
|
163
|
+
Extract and normalize the list of target books for the current site.
|
147
164
|
|
148
|
-
|
149
|
-
* a single
|
150
|
-
* a dict
|
151
|
-
* a list of the above
|
165
|
+
Accepted shapes for ``site.book_ids``:
|
166
|
+
* a single ``str`` or ``int`` (book id)
|
167
|
+
* a dict with fields: book_id and optional start_id, end_id, ignore_ids
|
168
|
+
* a ``list`` containing any mix of the above
|
152
169
|
|
153
|
-
:return:
|
154
|
-
:raises ValueError:
|
170
|
+
:return: Normalized list of :class:`BookConfig`-compatible dictionaries.
|
171
|
+
:raises ValueError: If ``book_ids`` is neither a scalar ``str|int``, ``dict``,
|
172
|
+
nor ``list``.
|
155
173
|
"""
|
156
|
-
|
157
|
-
raw = site_cfg.get("book_ids", [])
|
174
|
+
raw = self._site_cfg.get("book_ids", [])
|
158
175
|
|
159
|
-
if isinstance(raw, str | int):
|
176
|
+
if isinstance(raw, (str | int)):
|
160
177
|
return [{"book_id": str(raw)}]
|
161
178
|
|
162
179
|
if isinstance(raw, dict):
|
@@ -170,151 +187,197 @@ class ConfigAdapter:
|
|
170
187
|
result: list[BookConfig] = []
|
171
188
|
for item in raw:
|
172
189
|
try:
|
173
|
-
if isinstance(item, str | int):
|
190
|
+
if isinstance(item, (str | int)):
|
174
191
|
result.append({"book_id": str(item)})
|
175
192
|
elif isinstance(item, dict):
|
176
193
|
result.append(self._dict_to_book_cfg(item))
|
177
194
|
except ValueError:
|
178
195
|
continue
|
179
|
-
|
180
196
|
return result
|
181
197
|
|
182
198
|
def get_log_level(self) -> str:
|
183
199
|
"""
|
184
|
-
Retrieve the logging level from
|
200
|
+
Retrieve the logging level from ``general.debug``.
|
185
201
|
|
186
|
-
:return:
|
202
|
+
:return: One of ``"DEBUG"``, ``"INFO"``, ``"WARNING"``, ``"ERROR"``
|
187
203
|
"""
|
188
|
-
debug_cfg = self.
|
204
|
+
debug_cfg = self._gen_cfg.get("debug", {})
|
189
205
|
return debug_cfg.get("log_level") or "INFO"
|
190
206
|
|
191
207
|
@property
|
192
208
|
def site(self) -> str:
|
193
|
-
"""
|
194
|
-
Get the current site name.
|
195
|
-
"""
|
196
209
|
return self._site
|
197
210
|
|
198
211
|
@site.setter
|
199
212
|
def site(self, value: str) -> None:
|
200
|
-
|
201
|
-
Set a new site name for configuration lookups.
|
213
|
+
self._site = value
|
202
214
|
|
203
|
-
|
215
|
+
@property
|
216
|
+
def _gen_cfg(self) -> dict[str, Any]:
|
204
217
|
"""
|
205
|
-
|
206
|
-
self._site_cfg = self._get_site_cfg()
|
218
|
+
A read-only view of the global ``general`` settings.
|
207
219
|
|
208
|
-
|
209
|
-
|
220
|
+
:return: ``config["general"]`` if present, else ``{}``.
|
221
|
+
"""
|
222
|
+
return self._config.get("general") or {}
|
210
223
|
|
211
|
-
|
224
|
+
@property
|
225
|
+
def _site_cfg(self) -> dict[str, Any]:
|
212
226
|
"""
|
213
|
-
Retrieve the configuration for
|
227
|
+
Retrieve the configuration block for the current site.
|
214
228
|
|
215
229
|
Lookup order:
|
216
|
-
1. If
|
217
|
-
2. Otherwise, if
|
218
|
-
3.
|
230
|
+
1. If a site-specific entry exists under ``config["sites"]``, return it.
|
231
|
+
2. Otherwise, if ``config["sites"]["common"]`` exists, return it.
|
232
|
+
3. Else return an empty dict.
|
219
233
|
|
220
|
-
:
|
221
|
-
:return: The site-specific or common configuration dict.
|
234
|
+
:return: Site-specific mapping, common mapping, or ``{}``.
|
222
235
|
"""
|
223
236
|
sites_cfg = self._config.get("sites") or {}
|
224
|
-
|
225
|
-
if self._site in sites_cfg:
|
237
|
+
if self._site in sites_cfg and isinstance(sites_cfg[self._site], dict):
|
226
238
|
return sites_cfg[self._site] or {}
|
227
|
-
|
228
239
|
return sites_cfg.get("common") or {}
|
229
240
|
|
241
|
+
@staticmethod
|
242
|
+
def _has_key(d: Mapping[str, Any] | None, key: str) -> bool:
|
243
|
+
"""
|
244
|
+
Check whether a mapping contains a key.
|
245
|
+
|
246
|
+
:param d: Mapping to inspect.
|
247
|
+
:param key: Key to look up.
|
248
|
+
:return: ``True`` if ``d`` is a Mapping and contains key; otherwise ``False``.
|
249
|
+
"""
|
250
|
+
return isinstance(d, Mapping) and (key in d)
|
251
|
+
|
252
|
+
def _pick(self, key: str, default: T, *sources: Mapping[str, Any]) -> T:
|
253
|
+
"""
|
254
|
+
Resolve ``key`` from the provided ``sources`` in order of precedence.
|
255
|
+
|
256
|
+
:param key: Configuration key to resolve.
|
257
|
+
:param default: Fallback value if ``key`` is absent in all sources.
|
258
|
+
:param sources: One or more mappings to check, in order of precedence.
|
259
|
+
:return: The first present value for ``key``, otherwise ``default``.
|
260
|
+
"""
|
261
|
+
for src in sources:
|
262
|
+
if self._has_key(src, key):
|
263
|
+
return src[key] # type: ignore[no-any-return]
|
264
|
+
return default
|
265
|
+
|
230
266
|
@staticmethod
|
231
267
|
def _dict_to_book_cfg(data: dict[str, Any]) -> BookConfig:
|
232
268
|
"""
|
233
|
-
Convert a
|
269
|
+
Convert a raw dict into a :class:`novel_downloader.models.BookConfig`
|
270
|
+
with normalized types (all IDs coerced to strings).
|
234
271
|
|
235
272
|
:param data: A dict that must contain at least "book_id".
|
236
|
-
:return:
|
237
|
-
:raises ValueError:
|
273
|
+
:return: Normalized :class:`BookConfig` mapping.
|
274
|
+
:raises ValueError: If ``"book_id"`` is missing.
|
238
275
|
"""
|
239
276
|
if "book_id" not in data:
|
240
277
|
raise ValueError("Missing required field 'book_id'")
|
241
278
|
|
242
|
-
|
279
|
+
out: BookConfig = {"book_id": str(data["book_id"])}
|
243
280
|
|
244
281
|
if "start_id" in data:
|
245
|
-
|
246
|
-
|
282
|
+
out["start_id"] = str(data["start_id"])
|
247
283
|
if "end_id" in data:
|
248
|
-
|
249
|
-
|
284
|
+
out["end_id"] = str(data["end_id"])
|
250
285
|
if "ignore_ids" in data:
|
251
|
-
|
286
|
+
with contextlib.suppress(Exception):
|
287
|
+
out["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
|
288
|
+
return out
|
252
289
|
|
253
|
-
|
290
|
+
@staticmethod
|
291
|
+
def _dict_to_fontocr_cfg(data: dict[str, Any]) -> FontOCRConfig:
|
292
|
+
"""
|
293
|
+
Convert a raw ``font_ocr`` dict into a :class:`FontOCRConfig`.
|
294
|
+
"""
|
295
|
+
if not isinstance(data, dict):
|
296
|
+
return FontOCRConfig()
|
297
|
+
|
298
|
+
ishape = data.get("input_shape")
|
299
|
+
if isinstance(ishape, list):
|
300
|
+
ishape = tuple(ishape) # [C, H, W] -> (C, H, W)
|
301
|
+
|
302
|
+
return FontOCRConfig(
|
303
|
+
model_name=data.get("model_name"),
|
304
|
+
model_dir=data.get("model_dir"),
|
305
|
+
input_shape=ishape,
|
306
|
+
device=data.get("device"),
|
307
|
+
precision=data.get("precision", "fp32"),
|
308
|
+
cpu_threads=data.get("cpu_threads", 10),
|
309
|
+
enable_hpi=data.get("enable_hpi", False),
|
310
|
+
)
|
254
311
|
|
255
312
|
@classmethod
|
256
313
|
def _dict_to_cleaner_cfg(cls, cfg: dict[str, Any]) -> TextCleanerConfig:
|
257
314
|
"""
|
258
|
-
Convert a nested
|
315
|
+
Convert a nested ``cleaner`` block into a
|
316
|
+
:class:`novel_downloader.models.TextCleanerConfig`.
|
259
317
|
|
260
318
|
:param cfg: configuration dictionary
|
261
|
-
:return:
|
319
|
+
:return: Aggregated title/content rules with external file contents merged
|
262
320
|
"""
|
263
|
-
|
264
|
-
|
265
|
-
title_remove = title_section.get("remove_patterns", [])
|
266
|
-
title_repl = title_section.get("replace", {})
|
267
|
-
|
268
|
-
title_ext = title_section.get("external", {})
|
269
|
-
if title_ext.get("enabled", False):
|
270
|
-
title_ext_rm_p = title_ext.get("remove_patterns", "")
|
271
|
-
title_ext_rp_p = title_ext.get("replace", "")
|
272
|
-
|
273
|
-
title_remove_ext = cls._load_str_list(title_ext_rm_p)
|
274
|
-
title_remove += title_remove_ext
|
275
|
-
|
276
|
-
title_repl_ext = cls._load_str_dict(title_ext_rp_p)
|
277
|
-
title_repl = {**title_repl, **title_repl_ext}
|
278
|
-
|
279
|
-
# Content rules
|
280
|
-
content_section = cfg.get("content", {})
|
281
|
-
content_remove = content_section.get("remove_patterns", [])
|
282
|
-
content_repl = content_section.get("replace", {})
|
283
|
-
|
284
|
-
content_ext = content_section.get("external", {})
|
285
|
-
|
286
|
-
if content_ext.get("enabled", False):
|
287
|
-
content_ext_rm_p = content_ext.get("remove_patterns", "")
|
288
|
-
content_ext_rp_p = content_ext.get("replace", "")
|
289
|
-
|
290
|
-
content_remove_ext = cls._load_str_list(content_ext_rm_p)
|
291
|
-
content_remove += content_remove_ext
|
292
|
-
|
293
|
-
content_repl_ext = cls._load_str_dict(content_ext_rp_p)
|
294
|
-
content_repl = {**content_repl, **content_repl_ext}
|
295
|
-
|
321
|
+
t_remove, t_replace = cls._merge_rules(cfg.get("title", {}) or {})
|
322
|
+
c_remove, c_replace = cls._merge_rules(cfg.get("content", {}) or {})
|
296
323
|
return TextCleanerConfig(
|
297
324
|
remove_invisible=cfg.get("remove_invisible", True),
|
298
|
-
title_remove_patterns=
|
299
|
-
title_replacements=
|
300
|
-
content_remove_patterns=
|
301
|
-
content_replacements=
|
325
|
+
title_remove_patterns=t_remove,
|
326
|
+
title_replacements=t_replace,
|
327
|
+
content_remove_patterns=c_remove,
|
328
|
+
content_replacements=c_replace,
|
302
329
|
)
|
303
330
|
|
331
|
+
@classmethod
|
332
|
+
def _merge_rules(cls, section: dict[str, Any]) -> tuple[list[str], dict[str, str]]:
|
333
|
+
"""
|
334
|
+
Merge inline patterns/replacements with any enabled external files.
|
335
|
+
|
336
|
+
:param section: Mapping describing either the ``title`` or ``content`` rules.
|
337
|
+
:return: Tuple ``(remove_patterns, replace)`` after merging.
|
338
|
+
"""
|
339
|
+
remove = list(section.get("remove_patterns") or [])
|
340
|
+
replace = dict(section.get("replace") or {})
|
341
|
+
ext = section.get("external") or {}
|
342
|
+
if ext.get("enabled", False):
|
343
|
+
rm_path = ext.get("remove_patterns") or ""
|
344
|
+
rp_path = ext.get("replace") or ""
|
345
|
+
remove += cls._load_str_list(rm_path)
|
346
|
+
replace.update(cls._load_str_dict(rp_path))
|
347
|
+
return remove, replace
|
348
|
+
|
304
349
|
@staticmethod
|
305
350
|
def _load_str_list(path: str) -> list[str]:
|
351
|
+
"""
|
352
|
+
Load a JSON file containing a list of strings.
|
353
|
+
|
354
|
+
:param path: File path to a JSON array (e.g., ``["a", "b"]``).
|
355
|
+
:return: Parsed list on success; empty list if ``path`` is empty, file is
|
356
|
+
missing, or content is invalid.
|
357
|
+
"""
|
358
|
+
if not path:
|
359
|
+
return []
|
306
360
|
try:
|
307
361
|
with open(path, encoding="utf-8") as f:
|
308
|
-
|
309
|
-
|
362
|
+
data = json.load(f)
|
363
|
+
return list(data) if isinstance(data, list) else []
|
310
364
|
except Exception:
|
311
365
|
return []
|
312
366
|
|
313
367
|
@staticmethod
|
314
368
|
def _load_str_dict(path: str) -> dict[str, str]:
|
369
|
+
"""
|
370
|
+
Load a JSON file containing a dict of string-to-string mappings.
|
371
|
+
|
372
|
+
:param path: File path to a JSON object (e.g., ``{"old":"new"}``).
|
373
|
+
:return: Parsed dict on success; empty dict if ``path`` is empty, file is
|
374
|
+
missing, or content is invalid.
|
375
|
+
"""
|
376
|
+
if not path:
|
377
|
+
return {}
|
315
378
|
try:
|
316
379
|
with open(path, encoding="utf-8") as f:
|
317
|
-
|
318
|
-
|
380
|
+
data = json.load(f)
|
381
|
+
return dict(data) if isinstance(data, dict) else {}
|
319
382
|
except Exception:
|
320
383
|
return {}
|
@@ -7,12 +7,11 @@ This package serves as the core layer of the novel_downloader system.
|
|
7
7
|
|
8
8
|
It provides factory methods for constructing key components required for
|
9
9
|
downloading and processing online novel content, including:
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- search: Provides unified search functionality across supported novel sites.
|
10
|
+
* Downloader: Handles the full download lifecycle of a book or a batch of books.
|
11
|
+
* Parser: Extracts structured data from HTML or SSR content.
|
12
|
+
* Fetcher: Sends HTTP requests and manages sessions, including login if required.
|
13
|
+
* Exporter: Responsible for exporting downloaded data into various output formats.
|
14
|
+
* search: Provides unified search functionality across supported novel sites.
|
16
15
|
"""
|
17
16
|
|
18
17
|
__all__ = [
|
@@ -8,8 +8,6 @@ novel_downloader.core.archived.deqixs.fetcher
|
|
8
8
|
from typing import Any
|
9
9
|
|
10
10
|
from novel_downloader.core.fetchers.base import BaseSession
|
11
|
-
from novel_downloader.models import FetcherConfig
|
12
|
-
from novel_downloader.utils import async_jitter_sleep
|
13
11
|
|
14
12
|
# from novel_downloader.core.fetchers.registry import register_fetcher
|
15
13
|
|
@@ -26,25 +24,11 @@ class DeqixsSession(BaseSession):
|
|
26
24
|
BOOK_INFO_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/"
|
27
25
|
CHAPTER_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/{chapter_id}.html"
|
28
26
|
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: FetcherConfig,
|
32
|
-
cookies: dict[str, str] | None = None,
|
33
|
-
**kwargs: Any,
|
34
|
-
) -> None:
|
35
|
-
super().__init__("deqixs", config, cookies, **kwargs)
|
36
|
-
|
37
27
|
async def get_book_info(
|
38
28
|
self,
|
39
29
|
book_id: str,
|
40
30
|
**kwargs: Any,
|
41
31
|
) -> list[str]:
|
42
|
-
"""
|
43
|
-
Fetch the raw HTML of the book info page asynchronously.
|
44
|
-
|
45
|
-
:param book_id: The book identifier.
|
46
|
-
:return: The page content as a string.
|
47
|
-
"""
|
48
32
|
url = self.book_info_url(book_id=book_id)
|
49
33
|
return [await self.fetch(url, **kwargs)]
|
50
34
|
|
@@ -54,13 +38,6 @@ class DeqixsSession(BaseSession):
|
|
54
38
|
chapter_id: str,
|
55
39
|
**kwargs: Any,
|
56
40
|
) -> list[str]:
|
57
|
-
"""
|
58
|
-
Fetch the raw HTML of a single chapter asynchronously.
|
59
|
-
|
60
|
-
:param book_id: The book identifier.
|
61
|
-
:param chapter_id: The chapter identifier.
|
62
|
-
:return: The chapter content as a string.
|
63
|
-
"""
|
64
41
|
html_pages: list[str] = []
|
65
42
|
idx = 1
|
66
43
|
|
@@ -85,11 +62,7 @@ class DeqixsSession(BaseSession):
|
|
85
62
|
|
86
63
|
html_pages.append(html)
|
87
64
|
idx += 1
|
88
|
-
await
|
89
|
-
self.request_interval,
|
90
|
-
mul_spread=1.1,
|
91
|
-
max_sleep=self.request_interval + 2,
|
92
|
-
)
|
65
|
+
await self._sleep()
|
93
66
|
|
94
67
|
return html_pages
|
95
68
|
|
@@ -11,9 +11,11 @@ __all__ = [
|
|
11
11
|
"CommonDownloader",
|
12
12
|
"QianbiDownloader",
|
13
13
|
"QidianDownloader",
|
14
|
+
"QqbookDownloader",
|
14
15
|
]
|
15
16
|
|
16
17
|
from .common import CommonDownloader
|
17
18
|
from .qianbi import QianbiDownloader
|
18
19
|
from .qidian import QidianDownloader
|
20
|
+
from .qqbook import QqbookDownloader
|
19
21
|
from .registry import get_downloader
|