novel-downloader 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +3 -3
- novel_downloader/cli/export.py +1 -1
- novel_downloader/cli/ui.py +7 -7
- novel_downloader/config/adapter.py +191 -154
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/exporters/common/txt.py +9 -9
- novel_downloader/core/exporters/linovelib/txt.py +9 -9
- novel_downloader/core/fetchers/qidian.py +20 -35
- novel_downloader/core/interfaces/fetcher.py +2 -2
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/base.py +1 -0
- novel_downloader/core/parsers/eightnovel.py +2 -2
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/qidian/main_parser.py +747 -12
- novel_downloader/core/parsers/qidian/utils/__init__.py +2 -21
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
- novel_downloader/core/parsers/xiguashuwu.py +6 -12
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +1 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +5 -5
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +70 -61
- novel_downloader/utils/fontocr/loader.py +50 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/search.py +3 -3
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/METADATA +2 -1
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/RECORD +51 -55
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
novel_downloader/__init__.py
CHANGED
novel_downloader/cli/download.py
CHANGED
@@ -155,7 +155,7 @@ async def _download(
|
|
155
155
|
exporter_cfg = adapter.get_exporter_config()
|
156
156
|
login_cfg = adapter.get_login_config()
|
157
157
|
log_level = adapter.get_log_level()
|
158
|
-
setup_logging(
|
158
|
+
setup_logging(console_level=log_level)
|
159
159
|
|
160
160
|
parser = get_parser(site, parser_cfg)
|
161
161
|
exporter = None
|
@@ -214,8 +214,8 @@ async def _prompt_login_fields(
|
|
214
214
|
ui.info(t("login_use_config"))
|
215
215
|
continue
|
216
216
|
|
217
|
-
value: str | dict[str, str]
|
218
|
-
|
217
|
+
value: str | dict[str, str] = ""
|
218
|
+
for _ in range(5):
|
219
219
|
if field.type == "password":
|
220
220
|
value = ui.prompt_password(t("login_enter_password"))
|
221
221
|
elif field.type == "cookie":
|
novel_downloader/cli/export.py
CHANGED
@@ -56,7 +56,7 @@ def handle_export(args: Namespace) -> None:
|
|
56
56
|
exporter_cfg = adapter.get_exporter_config()
|
57
57
|
log_level = adapter.get_log_level()
|
58
58
|
exporter = get_exporter(site, exporter_cfg)
|
59
|
-
setup_logging(
|
59
|
+
setup_logging(console_level=log_level)
|
60
60
|
|
61
61
|
for book_id in book_ids:
|
62
62
|
ui.info(t("export_processing", book_id=book_id, format=export_format))
|
novel_downloader/cli/ui.py
CHANGED
@@ -7,12 +7,12 @@ A small set of Rich-based helpers to keep CLI presentation and prompts
|
|
7
7
|
consistent across subcommands.
|
8
8
|
|
9
9
|
Public API:
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
* info, success, warn, error
|
11
|
+
* confirm
|
12
|
+
* prompt, prompt_password
|
13
|
+
* render_table
|
14
|
+
* select_index
|
15
|
+
* print_progress
|
16
16
|
"""
|
17
17
|
|
18
18
|
from __future__ import annotations
|
@@ -71,7 +71,7 @@ def prompt(message: str, *, default: str | None = None) -> str:
|
|
71
71
|
:return: The user's input.
|
72
72
|
"""
|
73
73
|
try:
|
74
|
-
result: str = Prompt.ask(message, default=default or "")
|
74
|
+
result: str = Prompt.ask(message, default=default or "", show_default=False)
|
75
75
|
return result
|
76
76
|
except (KeyboardInterrupt, EOFError):
|
77
77
|
warn("Cancelled.")
|
@@ -4,11 +4,13 @@ novel_downloader.config.adapter
|
|
4
4
|
-------------------------------
|
5
5
|
|
6
6
|
Defines ConfigAdapter, which maps a raw configuration dictionary and
|
7
|
-
site
|
7
|
+
site into structured dataclass-based config models.
|
8
8
|
"""
|
9
9
|
|
10
|
+
import contextlib
|
10
11
|
import json
|
11
|
-
from
|
12
|
+
from collections.abc import Mapping
|
13
|
+
from typing import Any, TypeVar
|
12
14
|
|
13
15
|
from novel_downloader.models import (
|
14
16
|
BookConfig,
|
@@ -26,96 +28,106 @@ class ConfigAdapter:
|
|
26
28
|
"""
|
27
29
|
Adapter to map a raw configuration dictionary and site name
|
28
30
|
into structured dataclass configuration models.
|
31
|
+
|
32
|
+
Resolution order for each field:
|
33
|
+
1. ``config["sites"][<site>]`` (if present)
|
34
|
+
2. ``config["general"]`` (if present)
|
35
|
+
3. Hard-coded default passed by the caller
|
29
36
|
"""
|
30
37
|
|
31
|
-
def __init__(self, config:
|
38
|
+
def __init__(self, config: Mapping[str, Any], site: str):
|
32
39
|
"""
|
33
|
-
Initialize the adapter.
|
40
|
+
Initialize the adapter with a configuration mapping and a site key.
|
34
41
|
|
35
|
-
:param config:
|
36
|
-
:param site:
|
42
|
+
:param config: Fully loaded configuration mapping.
|
43
|
+
:param site: Current site key (e.g., ``"qidian"``).
|
37
44
|
"""
|
38
|
-
self._config = config
|
39
|
-
self._site = site
|
40
|
-
self._site_cfg: dict[str, Any] = self._get_site_cfg()
|
41
|
-
self._gen_cfg: dict[str, Any] = config.get("general") or {}
|
45
|
+
self._config: dict[str, Any] = dict(config)
|
46
|
+
self._site: str = site
|
42
47
|
|
43
48
|
def get_fetcher_config(self) -> FetcherConfig:
|
44
49
|
"""
|
45
|
-
Build a FetcherConfig
|
50
|
+
Build a :class:`novel_downloader.models.FetcherConfig` by resolving fields
|
51
|
+
from site-specific and general settings.
|
46
52
|
|
47
|
-
:return:
|
53
|
+
:return: Fully populated configuration for the network fetcher.
|
48
54
|
"""
|
55
|
+
s, g = self._site_cfg, self._gen_cfg
|
49
56
|
return FetcherConfig(
|
50
|
-
request_interval=self.
|
51
|
-
retry_times=self.
|
52
|
-
backoff_factor=self.
|
53
|
-
timeout=self.
|
54
|
-
max_connections=self.
|
55
|
-
max_rps=self.
|
56
|
-
user_agent=self.
|
57
|
-
headers=self.
|
58
|
-
verify_ssl=self.
|
59
|
-
locale_style=self.
|
57
|
+
request_interval=self._pick("request_interval", 2.0, s, g),
|
58
|
+
retry_times=self._pick("retry_times", 3, s, g),
|
59
|
+
backoff_factor=self._pick("backoff_factor", 2.0, s, g),
|
60
|
+
timeout=self._pick("timeout", 30.0, s, g),
|
61
|
+
max_connections=self._pick("max_connections", 10, s, g),
|
62
|
+
max_rps=self._pick("max_rps", 1000.0, s, g),
|
63
|
+
user_agent=self._pick("user_agent", None, s, g),
|
64
|
+
headers=self._pick("headers", None, s, g),
|
65
|
+
verify_ssl=self._pick("verify_ssl", True, s, g),
|
66
|
+
locale_style=self._pick("locale_style", "simplified", s, g),
|
60
67
|
)
|
61
68
|
|
62
69
|
def get_downloader_config(self) -> DownloaderConfig:
|
63
70
|
"""
|
64
|
-
Build a DownloaderConfig using both
|
71
|
+
Build a :class:`novel_downloader.models.DownloaderConfig` using both
|
72
|
+
general and site-specific settings.
|
65
73
|
|
66
|
-
:return:
|
74
|
+
:return: Fully populated configuration for the chapter/page downloader.
|
67
75
|
"""
|
68
|
-
|
69
|
-
debug =
|
76
|
+
s, g = self._site_cfg, self._gen_cfg
|
77
|
+
debug = g.get("debug") or {}
|
70
78
|
return DownloaderConfig(
|
71
|
-
request_interval=self.
|
72
|
-
retry_times=self.
|
73
|
-
backoff_factor=self.
|
74
|
-
workers=self.
|
75
|
-
skip_existing=self.
|
76
|
-
login_required=
|
77
|
-
save_html=debug.get("save_html", False),
|
78
|
-
raw_data_dir=
|
79
|
-
cache_dir=
|
80
|
-
storage_batch_size=
|
79
|
+
request_interval=self._pick("request_interval", 2.0, s, g),
|
80
|
+
retry_times=self._pick("retry_times", 3, s, g),
|
81
|
+
backoff_factor=self._pick("backoff_factor", 2.0, s, g),
|
82
|
+
workers=self._pick("workers", 2, s, g),
|
83
|
+
skip_existing=self._pick("skip_existing", True, s, g),
|
84
|
+
login_required=bool(s.get("login_required", False)),
|
85
|
+
save_html=bool(debug.get("save_html", False)),
|
86
|
+
raw_data_dir=g.get("raw_data_dir", "./raw_data"),
|
87
|
+
cache_dir=g.get("cache_dir", "./novel_cache"),
|
88
|
+
storage_batch_size=g.get("storage_batch_size", 1),
|
81
89
|
)
|
82
90
|
|
83
91
|
def get_parser_config(self) -> ParserConfig:
|
84
92
|
"""
|
85
|
-
Build a ParserConfig from general,
|
93
|
+
Build a :class:`novel_downloader.models.ParserConfig` from general,
|
94
|
+
OCR-related, and site-specific settings.
|
86
95
|
|
87
|
-
:return:
|
96
|
+
:return: Fully populated configuration for the parser stage.
|
88
97
|
"""
|
89
|
-
|
90
|
-
|
98
|
+
g = self._gen_cfg
|
99
|
+
s = self._site_cfg
|
100
|
+
font_ocr = g.get("font_ocr") or {}
|
91
101
|
return ParserConfig(
|
92
|
-
cache_dir=
|
93
|
-
use_truncation=
|
94
|
-
decode_font=font_ocr.get("decode_font", False),
|
95
|
-
save_font_debug=font_ocr.get("save_font_debug", False),
|
96
|
-
batch_size=font_ocr.get("batch_size", 32),
|
102
|
+
cache_dir=g.get("cache_dir", "./novel_cache"),
|
103
|
+
use_truncation=bool(s.get("use_truncation", True)),
|
104
|
+
decode_font=bool(font_ocr.get("decode_font", False)),
|
105
|
+
save_font_debug=bool(font_ocr.get("save_font_debug", False)),
|
106
|
+
batch_size=int(font_ocr.get("batch_size", 32)),
|
97
107
|
)
|
98
108
|
|
99
109
|
def get_exporter_config(self) -> ExporterConfig:
|
100
110
|
"""
|
101
|
-
Build an ExporterConfig from
|
111
|
+
Build an :class:`novel_downloader.models.ExporterConfig` from the
|
112
|
+
``output`` and ``cleaner`` sections plus general settings.
|
102
113
|
|
103
|
-
:return:
|
114
|
+
:return: Fully populated configuration for text/ebook export.
|
104
115
|
"""
|
105
|
-
|
106
|
-
out = self._config.get("output"
|
107
|
-
cln = self._config.get("cleaner"
|
108
|
-
fmt = out.get("formats"
|
109
|
-
naming = out.get("naming"
|
110
|
-
epub_opts = out.get("epub"
|
116
|
+
g = self._gen_cfg
|
117
|
+
out = self._config.get("output") or {}
|
118
|
+
cln = self._config.get("cleaner") or {}
|
119
|
+
fmt = out.get("formats") or {}
|
120
|
+
naming = out.get("naming") or {}
|
121
|
+
epub_opts = out.get("epub") or {}
|
122
|
+
|
111
123
|
cleaner_cfg = self._dict_to_cleaner_cfg(cln)
|
112
124
|
return ExporterConfig(
|
113
|
-
cache_dir=
|
114
|
-
raw_data_dir=
|
115
|
-
output_dir=
|
116
|
-
clean_text=cln.get("clean_text",
|
125
|
+
cache_dir=g.get("cache_dir", "./novel_cache"),
|
126
|
+
raw_data_dir=g.get("raw_data_dir", "./raw_data"),
|
127
|
+
output_dir=g.get("output_dir", "./downloads"),
|
128
|
+
clean_text=cln.get("clean_text", False),
|
117
129
|
make_txt=fmt.get("make_txt", True),
|
118
|
-
make_epub=fmt.get("make_epub",
|
130
|
+
make_epub=fmt.get("make_epub", True),
|
119
131
|
make_md=fmt.get("make_md", False),
|
120
132
|
make_pdf=fmt.get("make_pdf", False),
|
121
133
|
append_timestamp=naming.get("append_timestamp", True),
|
@@ -128,35 +140,36 @@ class ConfigAdapter:
|
|
128
140
|
|
129
141
|
def get_login_config(self) -> dict[str, str]:
|
130
142
|
"""
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
143
|
+
Extract login-related fields from the current site configuration.
|
144
|
+
Only non-empty string values are returned; values are stripped.
|
145
|
+
|
146
|
+
:return: A subset of ``{"username","password","cookies"}`` that are non-empty
|
135
147
|
"""
|
136
148
|
out: dict[str, str] = {}
|
137
149
|
for key in ("username", "password", "cookies"):
|
138
150
|
val = self._site_cfg.get(key, "")
|
139
|
-
val
|
140
|
-
|
141
|
-
|
151
|
+
if isinstance(val, str):
|
152
|
+
s = val.strip()
|
153
|
+
if s:
|
154
|
+
out[key] = s
|
142
155
|
return out
|
143
156
|
|
144
157
|
def get_book_ids(self) -> list[BookConfig]:
|
145
158
|
"""
|
146
|
-
Extract the list of target books
|
159
|
+
Extract and normalize the list of target books for the current site.
|
147
160
|
|
148
|
-
|
149
|
-
* a single
|
150
|
-
* a dict
|
151
|
-
* a list of the above
|
161
|
+
Accepted shapes for ``site.book_ids``:
|
162
|
+
* a single ``str`` or ``int`` (book id)
|
163
|
+
* a dict with fields: book_id and optional start_id, end_id, ignore_ids
|
164
|
+
* a ``list`` containing any mix of the above
|
152
165
|
|
153
|
-
:return:
|
154
|
-
:raises ValueError:
|
166
|
+
:return: Normalized list of :class:`BookConfig`-compatible dictionaries.
|
167
|
+
:raises ValueError: If ``book_ids`` is neither a scalar ``str|int``, ``dict``,
|
168
|
+
nor ``list``.
|
155
169
|
"""
|
156
|
-
|
157
|
-
raw = site_cfg.get("book_ids", [])
|
170
|
+
raw = self._site_cfg.get("book_ids", [])
|
158
171
|
|
159
|
-
if isinstance(raw, str | int):
|
172
|
+
if isinstance(raw, (str | int)):
|
160
173
|
return [{"book_id": str(raw)}]
|
161
174
|
|
162
175
|
if isinstance(raw, dict):
|
@@ -170,151 +183,175 @@ class ConfigAdapter:
|
|
170
183
|
result: list[BookConfig] = []
|
171
184
|
for item in raw:
|
172
185
|
try:
|
173
|
-
if isinstance(item, str | int):
|
186
|
+
if isinstance(item, (str | int)):
|
174
187
|
result.append({"book_id": str(item)})
|
175
188
|
elif isinstance(item, dict):
|
176
189
|
result.append(self._dict_to_book_cfg(item))
|
177
190
|
except ValueError:
|
178
191
|
continue
|
179
|
-
|
180
192
|
return result
|
181
193
|
|
182
194
|
def get_log_level(self) -> str:
|
183
195
|
"""
|
184
|
-
Retrieve the logging level from
|
196
|
+
Retrieve the logging level from ``general.debug``.
|
185
197
|
|
186
|
-
:return:
|
198
|
+
:return: One of ``"DEBUG"``, ``"INFO"``, ``"WARNING"``, ``"ERROR"``
|
187
199
|
"""
|
188
|
-
debug_cfg = self.
|
200
|
+
debug_cfg = self._gen_cfg.get("debug", {})
|
189
201
|
return debug_cfg.get("log_level") or "INFO"
|
190
202
|
|
191
203
|
@property
|
192
204
|
def site(self) -> str:
|
193
|
-
"""
|
194
|
-
Get the current site name.
|
195
|
-
"""
|
196
205
|
return self._site
|
197
206
|
|
198
207
|
@site.setter
|
199
208
|
def site(self, value: str) -> None:
|
200
|
-
|
201
|
-
Set a new site name for configuration lookups.
|
209
|
+
self._site = value
|
202
210
|
|
203
|
-
|
211
|
+
@property
|
212
|
+
def _gen_cfg(self) -> dict[str, Any]:
|
204
213
|
"""
|
205
|
-
|
206
|
-
self._site_cfg = self._get_site_cfg()
|
214
|
+
A read-only view of the global ``general`` settings.
|
207
215
|
|
208
|
-
|
209
|
-
|
216
|
+
:return: ``config["general"]`` if present, else ``{}``.
|
217
|
+
"""
|
218
|
+
return self._config.get("general") or {}
|
210
219
|
|
211
|
-
|
220
|
+
@property
|
221
|
+
def _site_cfg(self) -> dict[str, Any]:
|
212
222
|
"""
|
213
|
-
Retrieve the configuration for
|
223
|
+
Retrieve the configuration block for the current site.
|
214
224
|
|
215
225
|
Lookup order:
|
216
|
-
1. If
|
217
|
-
2. Otherwise, if
|
218
|
-
3.
|
226
|
+
1. If a site-specific entry exists under ``config["sites"]``, return it.
|
227
|
+
2. Otherwise, if ``config["sites"]["common"]`` exists, return it.
|
228
|
+
3. Else return an empty dict.
|
219
229
|
|
220
|
-
:
|
221
|
-
:return: The site-specific or common configuration dict.
|
230
|
+
:return: Site-specific mapping, common mapping, or ``{}``.
|
222
231
|
"""
|
223
232
|
sites_cfg = self._config.get("sites") or {}
|
224
|
-
|
225
|
-
if self._site in sites_cfg:
|
233
|
+
if self._site in sites_cfg and isinstance(sites_cfg[self._site], dict):
|
226
234
|
return sites_cfg[self._site] or {}
|
227
|
-
|
228
235
|
return sites_cfg.get("common") or {}
|
229
236
|
|
237
|
+
@staticmethod
|
238
|
+
def _has_key(d: Mapping[str, Any] | None, key: str) -> bool:
|
239
|
+
"""
|
240
|
+
Check whether a mapping contains a key.
|
241
|
+
|
242
|
+
:param d: Mapping to inspect.
|
243
|
+
:param key: Key to look up.
|
244
|
+
:return: ``True`` if ``d`` is a Mapping and contains key; otherwise ``False``.
|
245
|
+
"""
|
246
|
+
return isinstance(d, Mapping) and (key in d)
|
247
|
+
|
248
|
+
def _pick(self, key: str, default: T, *sources: Mapping[str, Any]) -> T:
|
249
|
+
"""
|
250
|
+
Resolve ``key`` from the provided ``sources`` in order of precedence.
|
251
|
+
|
252
|
+
:param key: Configuration key to resolve.
|
253
|
+
:param default: Fallback value if ``key`` is absent in all sources.
|
254
|
+
:param sources: One or more mappings to check, in order of precedence.
|
255
|
+
:return: The first present value for ``key``, otherwise ``default``.
|
256
|
+
"""
|
257
|
+
for src in sources:
|
258
|
+
if self._has_key(src, key):
|
259
|
+
return src[key] # type: ignore[no-any-return]
|
260
|
+
return default
|
261
|
+
|
230
262
|
@staticmethod
|
231
263
|
def _dict_to_book_cfg(data: dict[str, Any]) -> BookConfig:
|
232
264
|
"""
|
233
|
-
Convert a
|
265
|
+
Convert a raw dict into a :class:`novel_downloader.models.BookConfig`
|
266
|
+
with normalized types (all IDs coerced to strings).
|
234
267
|
|
235
268
|
:param data: A dict that must contain at least "book_id".
|
236
|
-
:return:
|
237
|
-
:raises ValueError:
|
269
|
+
:return: Normalized :class:`BookConfig` mapping.
|
270
|
+
:raises ValueError: If ``"book_id"`` is missing.
|
238
271
|
"""
|
239
272
|
if "book_id" not in data:
|
240
273
|
raise ValueError("Missing required field 'book_id'")
|
241
274
|
|
242
|
-
|
275
|
+
out: BookConfig = {"book_id": str(data["book_id"])}
|
243
276
|
|
244
277
|
if "start_id" in data:
|
245
|
-
|
246
|
-
|
278
|
+
out["start_id"] = str(data["start_id"])
|
247
279
|
if "end_id" in data:
|
248
|
-
|
249
|
-
|
280
|
+
out["end_id"] = str(data["end_id"])
|
250
281
|
if "ignore_ids" in data:
|
251
|
-
|
252
|
-
|
253
|
-
return
|
282
|
+
with contextlib.suppress(Exception):
|
283
|
+
out["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
|
284
|
+
return out
|
254
285
|
|
255
286
|
@classmethod
|
256
287
|
def _dict_to_cleaner_cfg(cls, cfg: dict[str, Any]) -> TextCleanerConfig:
|
257
288
|
"""
|
258
|
-
Convert a nested
|
289
|
+
Convert a nested ``cleaner`` block into a
|
290
|
+
:class:`novel_downloader.models.TextCleanerConfig`.
|
259
291
|
|
260
292
|
:param cfg: configuration dictionary
|
261
|
-
:return:
|
293
|
+
:return: Aggregated title/content rules with external file contents merged
|
262
294
|
"""
|
263
|
-
|
264
|
-
|
265
|
-
title_remove = title_section.get("remove_patterns", [])
|
266
|
-
title_repl = title_section.get("replace", {})
|
267
|
-
|
268
|
-
title_ext = title_section.get("external", {})
|
269
|
-
if title_ext.get("enabled", False):
|
270
|
-
title_ext_rm_p = title_ext.get("remove_patterns", "")
|
271
|
-
title_ext_rp_p = title_ext.get("replace", "")
|
272
|
-
|
273
|
-
title_remove_ext = cls._load_str_list(title_ext_rm_p)
|
274
|
-
title_remove += title_remove_ext
|
275
|
-
|
276
|
-
title_repl_ext = cls._load_str_dict(title_ext_rp_p)
|
277
|
-
title_repl = {**title_repl, **title_repl_ext}
|
278
|
-
|
279
|
-
# Content rules
|
280
|
-
content_section = cfg.get("content", {})
|
281
|
-
content_remove = content_section.get("remove_patterns", [])
|
282
|
-
content_repl = content_section.get("replace", {})
|
283
|
-
|
284
|
-
content_ext = content_section.get("external", {})
|
285
|
-
|
286
|
-
if content_ext.get("enabled", False):
|
287
|
-
content_ext_rm_p = content_ext.get("remove_patterns", "")
|
288
|
-
content_ext_rp_p = content_ext.get("replace", "")
|
289
|
-
|
290
|
-
content_remove_ext = cls._load_str_list(content_ext_rm_p)
|
291
|
-
content_remove += content_remove_ext
|
292
|
-
|
293
|
-
content_repl_ext = cls._load_str_dict(content_ext_rp_p)
|
294
|
-
content_repl = {**content_repl, **content_repl_ext}
|
295
|
-
|
295
|
+
t_remove, t_replace = cls._merge_rules(cfg.get("title", {}) or {})
|
296
|
+
c_remove, c_replace = cls._merge_rules(cfg.get("content", {}) or {})
|
296
297
|
return TextCleanerConfig(
|
297
298
|
remove_invisible=cfg.get("remove_invisible", True),
|
298
|
-
title_remove_patterns=
|
299
|
-
title_replacements=
|
300
|
-
content_remove_patterns=
|
301
|
-
content_replacements=
|
299
|
+
title_remove_patterns=t_remove,
|
300
|
+
title_replacements=t_replace,
|
301
|
+
content_remove_patterns=c_remove,
|
302
|
+
content_replacements=c_replace,
|
302
303
|
)
|
303
304
|
|
305
|
+
@classmethod
|
306
|
+
def _merge_rules(cls, section: dict[str, Any]) -> tuple[list[str], dict[str, str]]:
|
307
|
+
"""
|
308
|
+
Merge inline patterns/replacements with any enabled external files.
|
309
|
+
|
310
|
+
:param section: Mapping describing either the ``title`` or ``content`` rules.
|
311
|
+
:return: Tuple ``(remove_patterns, replace)`` after merging.
|
312
|
+
"""
|
313
|
+
remove = list(section.get("remove_patterns") or [])
|
314
|
+
replace = dict(section.get("replace") or {})
|
315
|
+
ext = section.get("external") or {}
|
316
|
+
if ext.get("enabled", False):
|
317
|
+
rm_path = ext.get("remove_patterns") or ""
|
318
|
+
rp_path = ext.get("replace") or ""
|
319
|
+
remove += cls._load_str_list(rm_path)
|
320
|
+
replace.update(cls._load_str_dict(rp_path))
|
321
|
+
return remove, replace
|
322
|
+
|
304
323
|
@staticmethod
|
305
324
|
def _load_str_list(path: str) -> list[str]:
|
325
|
+
"""
|
326
|
+
Load a JSON file containing a list of strings.
|
327
|
+
|
328
|
+
:param path: File path to a JSON array (e.g., ``["a", "b"]``).
|
329
|
+
:return: Parsed list on success; empty list if ``path`` is empty, file is
|
330
|
+
missing, or content is invalid.
|
331
|
+
"""
|
332
|
+
if not path:
|
333
|
+
return []
|
306
334
|
try:
|
307
335
|
with open(path, encoding="utf-8") as f:
|
308
|
-
|
309
|
-
|
336
|
+
data = json.load(f)
|
337
|
+
return list(data) if isinstance(data, list) else []
|
310
338
|
except Exception:
|
311
339
|
return []
|
312
340
|
|
313
341
|
@staticmethod
|
314
342
|
def _load_str_dict(path: str) -> dict[str, str]:
|
343
|
+
"""
|
344
|
+
Load a JSON file containing a dict of string-to-string mappings.
|
345
|
+
|
346
|
+
:param path: File path to a JSON object (e.g., ``{"old":"new"}``).
|
347
|
+
:return: Parsed dict on success; empty dict if ``path`` is empty, file is
|
348
|
+
missing, or content is invalid.
|
349
|
+
"""
|
350
|
+
if not path:
|
351
|
+
return {}
|
315
352
|
try:
|
316
353
|
with open(path, encoding="utf-8") as f:
|
317
|
-
|
318
|
-
|
354
|
+
data = json.load(f)
|
355
|
+
return dict(data) if isinstance(data, dict) else {}
|
319
356
|
except Exception:
|
320
357
|
return {}
|
@@ -7,12 +7,11 @@ This package serves as the core layer of the novel_downloader system.
|
|
7
7
|
|
8
8
|
It provides factory methods for constructing key components required for
|
9
9
|
downloading and processing online novel content, including:
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- search: Provides unified search functionality across supported novel sites.
|
10
|
+
* Downloader: Handles the full download lifecycle of a book or a batch of books.
|
11
|
+
* Parser: Extracts structured data from HTML or SSR content.
|
12
|
+
* Fetcher: Sends HTTP requests and manages sessions, including login if required.
|
13
|
+
* Exporter: Responsible for exporting downloaded data into various output formats.
|
14
|
+
* search: Provides unified search functionality across supported novel sites.
|
16
15
|
"""
|
17
16
|
|
18
17
|
__all__ = [
|
@@ -133,14 +133,14 @@ def common_export_as_txt(
|
|
133
133
|
out_path = out_dir / out_name
|
134
134
|
|
135
135
|
# --- Save final text ---
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
if result:
|
136
|
+
try:
|
137
|
+
result = write_file(
|
138
|
+
content=final_text,
|
139
|
+
filepath=out_path,
|
140
|
+
on_exist="overwrite",
|
141
|
+
)
|
143
142
|
exporter.logger.info("%s Novel saved to: %s", TAG, out_path)
|
144
|
-
|
145
|
-
exporter.logger.error("%s Failed to write novel to %s", TAG, out_path)
|
143
|
+
except Exception as e:
|
144
|
+
exporter.logger.error("%s Failed to write novel to %s: %s", TAG, out_path, e)
|
145
|
+
return None
|
146
146
|
return result
|
@@ -126,14 +126,14 @@ def linovelib_export_as_txt(
|
|
126
126
|
out_path = out_dir / out_name
|
127
127
|
|
128
128
|
# --- Save final text ---
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
if result:
|
129
|
+
try:
|
130
|
+
result = write_file(
|
131
|
+
content=final_text,
|
132
|
+
filepath=out_path,
|
133
|
+
on_exist="overwrite",
|
134
|
+
)
|
136
135
|
exporter.logger.info("%s Novel saved to: %s", TAG, out_path)
|
137
|
-
|
138
|
-
exporter.logger.error("%s Failed to write novel to %s", TAG, out_path)
|
136
|
+
except Exception as e:
|
137
|
+
exporter.logger.error("%s Failed to write novel to %s: %s", TAG, out_path, e)
|
138
|
+
return None
|
139
139
|
return result
|