novel-downloader 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +3 -3
  3. novel_downloader/cli/export.py +1 -1
  4. novel_downloader/cli/ui.py +7 -7
  5. novel_downloader/config/adapter.py +191 -154
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/exporters/common/txt.py +9 -9
  8. novel_downloader/core/exporters/linovelib/txt.py +9 -9
  9. novel_downloader/core/fetchers/qidian.py +20 -35
  10. novel_downloader/core/interfaces/fetcher.py +2 -2
  11. novel_downloader/core/interfaces/parser.py +2 -2
  12. novel_downloader/core/parsers/base.py +1 -0
  13. novel_downloader/core/parsers/eightnovel.py +2 -2
  14. novel_downloader/core/parsers/esjzone.py +3 -3
  15. novel_downloader/core/parsers/qidian/main_parser.py +747 -12
  16. novel_downloader/core/parsers/qidian/utils/__init__.py +2 -21
  17. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  18. novel_downloader/core/parsers/xiguashuwu.py +6 -12
  19. novel_downloader/locales/en.json +3 -3
  20. novel_downloader/locales/zh.json +3 -3
  21. novel_downloader/utils/__init__.py +0 -2
  22. novel_downloader/utils/chapter_storage.py +2 -3
  23. novel_downloader/utils/constants.py +1 -3
  24. novel_downloader/utils/cookies.py +32 -17
  25. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  26. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  27. novel_downloader/utils/epub/__init__.py +2 -3
  28. novel_downloader/utils/epub/builder.py +6 -6
  29. novel_downloader/utils/epub/constants.py +5 -5
  30. novel_downloader/utils/epub/documents.py +7 -7
  31. novel_downloader/utils/epub/models.py +8 -8
  32. novel_downloader/utils/epub/utils.py +10 -10
  33. novel_downloader/utils/file_utils/io.py +48 -73
  34. novel_downloader/utils/file_utils/normalize.py +1 -7
  35. novel_downloader/utils/file_utils/sanitize.py +4 -11
  36. novel_downloader/utils/fontocr/__init__.py +13 -0
  37. novel_downloader/utils/{fontocr.py → fontocr/core.py} +70 -61
  38. novel_downloader/utils/fontocr/loader.py +50 -0
  39. novel_downloader/utils/logger.py +80 -56
  40. novel_downloader/utils/network.py +16 -40
  41. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  42. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  43. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  44. novel_downloader/web/main.py +1 -1
  45. novel_downloader/web/pages/search.py +3 -3
  46. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/METADATA +2 -1
  47. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/RECORD +51 -55
  48. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  49. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  50. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  51. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  52. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  53. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  54. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  55. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +0 -0
  56. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  57. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ novel_downloader
6
6
  Core package for the Novel Downloader project.
7
7
  """
8
8
 
9
- __version__ = "2.0.0"
9
+ __version__ = "2.0.1"
10
10
 
11
11
  __author__ = "Saudade Z"
12
12
  __email__ = "saudadez217@gmail.com"
@@ -155,7 +155,7 @@ async def _download(
155
155
  exporter_cfg = adapter.get_exporter_config()
156
156
  login_cfg = adapter.get_login_config()
157
157
  log_level = adapter.get_log_level()
158
- setup_logging(log_level=log_level)
158
+ setup_logging(console_level=log_level)
159
159
 
160
160
  parser = get_parser(site, parser_cfg)
161
161
  exporter = None
@@ -214,8 +214,8 @@ async def _prompt_login_fields(
214
214
  ui.info(t("login_use_config"))
215
215
  continue
216
216
 
217
- value: str | dict[str, str]
218
- while True:
217
+ value: str | dict[str, str] = ""
218
+ for _ in range(5):
219
219
  if field.type == "password":
220
220
  value = ui.prompt_password(t("login_enter_password"))
221
221
  elif field.type == "cookie":
@@ -56,7 +56,7 @@ def handle_export(args: Namespace) -> None:
56
56
  exporter_cfg = adapter.get_exporter_config()
57
57
  log_level = adapter.get_log_level()
58
58
  exporter = get_exporter(site, exporter_cfg)
59
- setup_logging(log_level=log_level)
59
+ setup_logging(console_level=log_level)
60
60
 
61
61
  for book_id in book_ids:
62
62
  ui.info(t("export_processing", book_id=book_id, format=export_format))
@@ -7,12 +7,12 @@ A small set of Rich-based helpers to keep CLI presentation and prompts
7
7
  consistent across subcommands.
8
8
 
9
9
  Public API:
10
- - info, success, warn, error
11
- - confirm
12
- - prompt, prompt_password
13
- - render_table
14
- - select_index
15
- - print_progress
10
+ * info, success, warn, error
11
+ * confirm
12
+ * prompt, prompt_password
13
+ * render_table
14
+ * select_index
15
+ * print_progress
16
16
  """
17
17
 
18
18
  from __future__ import annotations
@@ -71,7 +71,7 @@ def prompt(message: str, *, default: str | None = None) -> str:
71
71
  :return: The user's input.
72
72
  """
73
73
  try:
74
- result: str = Prompt.ask(message, default=default or "")
74
+ result: str = Prompt.ask(message, default=default or "", show_default=False)
75
75
  return result
76
76
  except (KeyboardInterrupt, EOFError):
77
77
  warn("Cancelled.")
@@ -4,11 +4,13 @@ novel_downloader.config.adapter
4
4
  -------------------------------
5
5
 
6
6
  Defines ConfigAdapter, which maps a raw configuration dictionary and
7
- site name into structured dataclass-based config models.
7
+ site into structured dataclass-based config models.
8
8
  """
9
9
 
10
+ import contextlib
10
11
  import json
11
- from typing import Any, TypeVar, cast
12
+ from collections.abc import Mapping
13
+ from typing import Any, TypeVar
12
14
 
13
15
  from novel_downloader.models import (
14
16
  BookConfig,
@@ -26,96 +28,106 @@ class ConfigAdapter:
26
28
  """
27
29
  Adapter to map a raw configuration dictionary and site name
28
30
  into structured dataclass configuration models.
31
+
32
+ Resolution order for each field:
33
+ 1. ``config["sites"][<site>]`` (if present)
34
+ 2. ``config["general"]`` (if present)
35
+ 3. Hard-coded default passed by the caller
29
36
  """
30
37
 
31
- def __init__(self, config: dict[str, Any], site: str):
38
+ def __init__(self, config: Mapping[str, Any], site: str):
32
39
  """
33
- Initialize the adapter.
40
+ Initialize the adapter with a configuration mapping and a site key.
34
41
 
35
- :param config: The fully loaded configuration dictionary.
36
- :param site: The current site name (e.g. "qidian").
42
+ :param config: Fully loaded configuration mapping.
43
+ :param site: Current site key (e.g., ``"qidian"``).
37
44
  """
38
- self._config = config
39
- self._site = site
40
- self._site_cfg: dict[str, Any] = self._get_site_cfg()
41
- self._gen_cfg: dict[str, Any] = config.get("general") or {}
45
+ self._config: dict[str, Any] = dict(config)
46
+ self._site: str = site
42
47
 
43
48
  def get_fetcher_config(self) -> FetcherConfig:
44
49
  """
45
- Build a FetcherConfig from the raw configuration.
50
+ Build a :class:`novel_downloader.models.FetcherConfig` by resolving fields
51
+ from site-specific and general settings.
46
52
 
47
- :return: A FetcherConfig instance with all fields populated.
53
+ :return: Fully populated configuration for the network fetcher.
48
54
  """
55
+ s, g = self._site_cfg, self._gen_cfg
49
56
  return FetcherConfig(
50
- request_interval=self._get_gen_cfg("request_interval", 2.0),
51
- retry_times=self._get_gen_cfg("retry_times", 3),
52
- backoff_factor=self._get_gen_cfg("backoff_factor", 2.0),
53
- timeout=self._get_gen_cfg("timeout", 30.0),
54
- max_connections=self._get_gen_cfg("max_connections", 10),
55
- max_rps=self._get_gen_cfg("max_rps", 1000.0),
56
- user_agent=self._get_gen_cfg("user_agent", None),
57
- headers=self._get_gen_cfg("headers", None),
58
- verify_ssl=self._get_gen_cfg("verify_ssl", True),
59
- locale_style=self._get_gen_cfg("locale_style", "simplified"),
57
+ request_interval=self._pick("request_interval", 2.0, s, g),
58
+ retry_times=self._pick("retry_times", 3, s, g),
59
+ backoff_factor=self._pick("backoff_factor", 2.0, s, g),
60
+ timeout=self._pick("timeout", 30.0, s, g),
61
+ max_connections=self._pick("max_connections", 10, s, g),
62
+ max_rps=self._pick("max_rps", 1000.0, s, g),
63
+ user_agent=self._pick("user_agent", None, s, g),
64
+ headers=self._pick("headers", None, s, g),
65
+ verify_ssl=self._pick("verify_ssl", True, s, g),
66
+ locale_style=self._pick("locale_style", "simplified", s, g),
60
67
  )
61
68
 
62
69
  def get_downloader_config(self) -> DownloaderConfig:
63
70
  """
64
- Build a DownloaderConfig using both general and site-specific settings.
71
+ Build a :class:`novel_downloader.models.DownloaderConfig` using both
72
+ general and site-specific settings.
65
73
 
66
- :return: A DownloaderConfig instance with all fields populated.
74
+ :return: Fully populated configuration for the chapter/page downloader.
67
75
  """
68
- gen = self._config.get("general", {})
69
- debug = gen.get("debug", {})
76
+ s, g = self._site_cfg, self._gen_cfg
77
+ debug = g.get("debug") or {}
70
78
  return DownloaderConfig(
71
- request_interval=self._get_gen_cfg("request_interval", 2.0),
72
- retry_times=self._get_gen_cfg("retry_times", 3),
73
- backoff_factor=self._get_gen_cfg("backoff_factor", 2.0),
74
- workers=self._get_gen_cfg("workers", 2),
75
- skip_existing=self._get_gen_cfg("skip_existing", True),
76
- login_required=self._site_cfg.get("login_required", False),
77
- save_html=debug.get("save_html", False),
78
- raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
79
- cache_dir=gen.get("cache_dir", "./novel_cache"),
80
- storage_batch_size=gen.get("storage_batch_size", 1),
79
+ request_interval=self._pick("request_interval", 2.0, s, g),
80
+ retry_times=self._pick("retry_times", 3, s, g),
81
+ backoff_factor=self._pick("backoff_factor", 2.0, s, g),
82
+ workers=self._pick("workers", 2, s, g),
83
+ skip_existing=self._pick("skip_existing", True, s, g),
84
+ login_required=bool(s.get("login_required", False)),
85
+ save_html=bool(debug.get("save_html", False)),
86
+ raw_data_dir=g.get("raw_data_dir", "./raw_data"),
87
+ cache_dir=g.get("cache_dir", "./novel_cache"),
88
+ storage_batch_size=g.get("storage_batch_size", 1),
81
89
  )
82
90
 
83
91
  def get_parser_config(self) -> ParserConfig:
84
92
  """
85
- Build a ParserConfig from general, OCR, and site-specific settings.
93
+ Build a :class:`novel_downloader.models.ParserConfig` from general,
94
+ OCR-related, and site-specific settings.
86
95
 
87
- :return: A ParserConfig instance with all fields populated.
96
+ :return: Fully populated configuration for the parser stage.
88
97
  """
89
- gen = self._config.get("general", {})
90
- font_ocr = gen.get("font_ocr", {})
98
+ g = self._gen_cfg
99
+ s = self._site_cfg
100
+ font_ocr = g.get("font_ocr") or {}
91
101
  return ParserConfig(
92
- cache_dir=gen.get("cache_dir", "./novel_cache"),
93
- use_truncation=self._site_cfg.get("use_truncation", True),
94
- decode_font=font_ocr.get("decode_font", False),
95
- save_font_debug=font_ocr.get("save_font_debug", False),
96
- batch_size=font_ocr.get("batch_size", 32),
102
+ cache_dir=g.get("cache_dir", "./novel_cache"),
103
+ use_truncation=bool(s.get("use_truncation", True)),
104
+ decode_font=bool(font_ocr.get("decode_font", False)),
105
+ save_font_debug=bool(font_ocr.get("save_font_debug", False)),
106
+ batch_size=int(font_ocr.get("batch_size", 32)),
97
107
  )
98
108
 
99
109
  def get_exporter_config(self) -> ExporterConfig:
100
110
  """
101
- Build an ExporterConfig from output and general settings.
111
+ Build an :class:`novel_downloader.models.ExporterConfig` from the
112
+ ``output`` and ``cleaner`` sections plus general settings.
102
113
 
103
- :return: An ExporterConfig instance with all fields populated.
114
+ :return: Fully populated configuration for text/ebook export.
104
115
  """
105
- gen = self._config.get("general", {})
106
- out = self._config.get("output", {})
107
- cln = self._config.get("cleaner", {})
108
- fmt = out.get("formats", {})
109
- naming = out.get("naming", {})
110
- epub_opts = out.get("epub", {})
116
+ g = self._gen_cfg
117
+ out = self._config.get("output") or {}
118
+ cln = self._config.get("cleaner") or {}
119
+ fmt = out.get("formats") or {}
120
+ naming = out.get("naming") or {}
121
+ epub_opts = out.get("epub") or {}
122
+
111
123
  cleaner_cfg = self._dict_to_cleaner_cfg(cln)
112
124
  return ExporterConfig(
113
- cache_dir=gen.get("cache_dir", "./novel_cache"),
114
- raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
115
- output_dir=gen.get("output_dir", "./downloads"),
116
- clean_text=cln.get("clean_text", True),
125
+ cache_dir=g.get("cache_dir", "./novel_cache"),
126
+ raw_data_dir=g.get("raw_data_dir", "./raw_data"),
127
+ output_dir=g.get("output_dir", "./downloads"),
128
+ clean_text=cln.get("clean_text", False),
117
129
  make_txt=fmt.get("make_txt", True),
118
- make_epub=fmt.get("make_epub", False),
130
+ make_epub=fmt.get("make_epub", True),
119
131
  make_md=fmt.get("make_md", False),
120
132
  make_pdf=fmt.get("make_pdf", False),
121
133
  append_timestamp=naming.get("append_timestamp", True),
@@ -128,35 +140,36 @@ class ConfigAdapter:
128
140
 
129
141
  def get_login_config(self) -> dict[str, str]:
130
142
  """
131
- Return the subset of login fields present in current site config:
132
- * `username`
133
- * `password`
134
- * `cookies`
143
+ Extract login-related fields from the current site configuration.
144
+ Only non-empty string values are returned; values are stripped.
145
+
146
+ :return: A subset of ``{"username","password","cookies"}`` that are non-empty
135
147
  """
136
148
  out: dict[str, str] = {}
137
149
  for key in ("username", "password", "cookies"):
138
150
  val = self._site_cfg.get(key, "")
139
- val = val.strip()
140
- if val:
141
- out[key] = val
151
+ if isinstance(val, str):
152
+ s = val.strip()
153
+ if s:
154
+ out[key] = s
142
155
  return out
143
156
 
144
157
  def get_book_ids(self) -> list[BookConfig]:
145
158
  """
146
- Extract the list of target books from the site configuration.
159
+ Extract and normalize the list of target books for the current site.
147
160
 
148
- The site config may specify book_ids as:
149
- * a single string or integer
150
- * a dict with book_id and optional start_id, end_id, ignore_ids
151
- * a list of the above types
161
+ Accepted shapes for ``site.book_ids``:
162
+ * a single ``str`` or ``int`` (book id)
163
+ * a dict with fields: book_id and optional start_id, end_id, ignore_ids
164
+ * a ``list`` containing any mix of the above
152
165
 
153
- :return: A list of BookConfig dicts.
154
- :raises ValueError: if the raw book_ids is neither a str/int, dict, nor list.
166
+ :return: Normalized list of :class:`BookConfig`-compatible dictionaries.
167
+ :raises ValueError: If ``book_ids`` is neither a scalar ``str|int``, ``dict``,
168
+ nor ``list``.
155
169
  """
156
- site_cfg = self._get_site_cfg()
157
- raw = site_cfg.get("book_ids", [])
170
+ raw = self._site_cfg.get("book_ids", [])
158
171
 
159
- if isinstance(raw, str | int):
172
+ if isinstance(raw, (str | int)):
160
173
  return [{"book_id": str(raw)}]
161
174
 
162
175
  if isinstance(raw, dict):
@@ -170,151 +183,175 @@ class ConfigAdapter:
170
183
  result: list[BookConfig] = []
171
184
  for item in raw:
172
185
  try:
173
- if isinstance(item, str | int):
186
+ if isinstance(item, (str | int)):
174
187
  result.append({"book_id": str(item)})
175
188
  elif isinstance(item, dict):
176
189
  result.append(self._dict_to_book_cfg(item))
177
190
  except ValueError:
178
191
  continue
179
-
180
192
  return result
181
193
 
182
194
  def get_log_level(self) -> str:
183
195
  """
184
- Retrieve the logging level from [general.debug].
196
+ Retrieve the logging level from ``general.debug``.
185
197
 
186
- :return: The configured log level ("DEBUG", "INFO", "WARNING", "ERROR").
198
+ :return: One of ``"DEBUG"``, ``"INFO"``, ``"WARNING"``, ``"ERROR"``
187
199
  """
188
- debug_cfg = self._config.get("general", {}).get("debug", {})
200
+ debug_cfg = self._gen_cfg.get("debug", {})
189
201
  return debug_cfg.get("log_level") or "INFO"
190
202
 
191
203
  @property
192
204
  def site(self) -> str:
193
- """
194
- Get the current site name.
195
- """
196
205
  return self._site
197
206
 
198
207
  @site.setter
199
208
  def site(self, value: str) -> None:
200
- """
201
- Set a new site name for configuration lookups.
209
+ self._site = value
202
210
 
203
- :param value: The new site key in config["sites"] to use.
211
+ @property
212
+ def _gen_cfg(self) -> dict[str, Any]:
204
213
  """
205
- self._site = value
206
- self._site_cfg = self._get_site_cfg()
214
+ A read-only view of the global ``general`` settings.
207
215
 
208
- def _get_gen_cfg(self, key: str, default: T) -> T:
209
- return self._site_cfg.get(key) or self._gen_cfg.get(key) or default
216
+ :return: ``config["general"]`` if present, else ``{}``.
217
+ """
218
+ return self._config.get("general") or {}
210
219
 
211
- def _get_site_cfg(self) -> dict[str, Any]:
220
+ @property
221
+ def _site_cfg(self) -> dict[str, Any]:
212
222
  """
213
- Retrieve the configuration for a specific site.
223
+ Retrieve the configuration block for the current site.
214
224
 
215
225
  Lookup order:
216
- 1. If there is a site-specific entry under config["sites"], return that.
217
- 2. Otherwise, if a "common" entry exists under config["sites"], return that.
218
- 3. If neither is present, return an empty dict.
226
+ 1. If a site-specific entry exists under ``config["sites"]``, return it.
227
+ 2. Otherwise, if ``config["sites"]["common"]`` exists, return it.
228
+ 3. Else return an empty dict.
219
229
 
220
- :param site: Optional override of the site name; defaults to self._site.
221
- :return: The site-specific or common configuration dict.
230
+ :return: Site-specific mapping, common mapping, or ``{}``.
222
231
  """
223
232
  sites_cfg = self._config.get("sites") or {}
224
-
225
- if self._site in sites_cfg:
233
+ if self._site in sites_cfg and isinstance(sites_cfg[self._site], dict):
226
234
  return sites_cfg[self._site] or {}
227
-
228
235
  return sites_cfg.get("common") or {}
229
236
 
237
+ @staticmethod
238
+ def _has_key(d: Mapping[str, Any] | None, key: str) -> bool:
239
+ """
240
+ Check whether a mapping contains a key.
241
+
242
+ :param d: Mapping to inspect.
243
+ :param key: Key to look up.
244
+ :return: ``True`` if ``d`` is a Mapping and contains key; otherwise ``False``.
245
+ """
246
+ return isinstance(d, Mapping) and (key in d)
247
+
248
+ def _pick(self, key: str, default: T, *sources: Mapping[str, Any]) -> T:
249
+ """
250
+ Resolve ``key`` from the provided ``sources`` in order of precedence.
251
+
252
+ :param key: Configuration key to resolve.
253
+ :param default: Fallback value if ``key`` is absent in all sources.
254
+ :param sources: One or more mappings to check, in order of precedence.
255
+ :return: The first present value for ``key``, otherwise ``default``.
256
+ """
257
+ for src in sources:
258
+ if self._has_key(src, key):
259
+ return src[key] # type: ignore[no-any-return]
260
+ return default
261
+
230
262
  @staticmethod
231
263
  def _dict_to_book_cfg(data: dict[str, Any]) -> BookConfig:
232
264
  """
233
- Convert a dictionary to a BookConfig with normalized types.
265
+ Convert a raw dict into a :class:`novel_downloader.models.BookConfig`
266
+ with normalized types (all IDs coerced to strings).
234
267
 
235
268
  :param data: A dict that must contain at least "book_id".
236
- :return: A BookConfig dict with all values cast to strings or lists of strings.
237
- :raises ValueError: if the "book_id" field is missing.
269
+ :return: Normalized :class:`BookConfig` mapping.
270
+ :raises ValueError: If ``"book_id"`` is missing.
238
271
  """
239
272
  if "book_id" not in data:
240
273
  raise ValueError("Missing required field 'book_id'")
241
274
 
242
- result: BookConfig = {"book_id": str(data["book_id"])}
275
+ out: BookConfig = {"book_id": str(data["book_id"])}
243
276
 
244
277
  if "start_id" in data:
245
- result["start_id"] = str(data["start_id"])
246
-
278
+ out["start_id"] = str(data["start_id"])
247
279
  if "end_id" in data:
248
- result["end_id"] = str(data["end_id"])
249
-
280
+ out["end_id"] = str(data["end_id"])
250
281
  if "ignore_ids" in data:
251
- result["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
252
-
253
- return result
282
+ with contextlib.suppress(Exception):
283
+ out["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
284
+ return out
254
285
 
255
286
  @classmethod
256
287
  def _dict_to_cleaner_cfg(cls, cfg: dict[str, Any]) -> TextCleanerConfig:
257
288
  """
258
- Convert a nested dict of title/content rules into a TextCleanerConfig.
289
+ Convert a nested ``cleaner`` block into a
290
+ :class:`novel_downloader.models.TextCleanerConfig`.
259
291
 
260
292
  :param cfg: configuration dictionary
261
- :return: fully constructed TextCleanerConfig
293
+ :return: Aggregated title/content rules with external file contents merged
262
294
  """
263
- # Title rules
264
- title_section = cfg.get("title", {})
265
- title_remove = title_section.get("remove_patterns", [])
266
- title_repl = title_section.get("replace", {})
267
-
268
- title_ext = title_section.get("external", {})
269
- if title_ext.get("enabled", False):
270
- title_ext_rm_p = title_ext.get("remove_patterns", "")
271
- title_ext_rp_p = title_ext.get("replace", "")
272
-
273
- title_remove_ext = cls._load_str_list(title_ext_rm_p)
274
- title_remove += title_remove_ext
275
-
276
- title_repl_ext = cls._load_str_dict(title_ext_rp_p)
277
- title_repl = {**title_repl, **title_repl_ext}
278
-
279
- # Content rules
280
- content_section = cfg.get("content", {})
281
- content_remove = content_section.get("remove_patterns", [])
282
- content_repl = content_section.get("replace", {})
283
-
284
- content_ext = content_section.get("external", {})
285
-
286
- if content_ext.get("enabled", False):
287
- content_ext_rm_p = content_ext.get("remove_patterns", "")
288
- content_ext_rp_p = content_ext.get("replace", "")
289
-
290
- content_remove_ext = cls._load_str_list(content_ext_rm_p)
291
- content_remove += content_remove_ext
292
-
293
- content_repl_ext = cls._load_str_dict(content_ext_rp_p)
294
- content_repl = {**content_repl, **content_repl_ext}
295
-
295
+ t_remove, t_replace = cls._merge_rules(cfg.get("title", {}) or {})
296
+ c_remove, c_replace = cls._merge_rules(cfg.get("content", {}) or {})
296
297
  return TextCleanerConfig(
297
298
  remove_invisible=cfg.get("remove_invisible", True),
298
- title_remove_patterns=title_remove,
299
- title_replacements=title_repl,
300
- content_remove_patterns=content_remove,
301
- content_replacements=content_repl,
299
+ title_remove_patterns=t_remove,
300
+ title_replacements=t_replace,
301
+ content_remove_patterns=c_remove,
302
+ content_replacements=c_replace,
302
303
  )
303
304
 
305
+ @classmethod
306
+ def _merge_rules(cls, section: dict[str, Any]) -> tuple[list[str], dict[str, str]]:
307
+ """
308
+ Merge inline patterns/replacements with any enabled external files.
309
+
310
+ :param section: Mapping describing either the ``title`` or ``content`` rules.
311
+ :return: Tuple ``(remove_patterns, replace)`` after merging.
312
+ """
313
+ remove = list(section.get("remove_patterns") or [])
314
+ replace = dict(section.get("replace") or {})
315
+ ext = section.get("external") or {}
316
+ if ext.get("enabled", False):
317
+ rm_path = ext.get("remove_patterns") or ""
318
+ rp_path = ext.get("replace") or ""
319
+ remove += cls._load_str_list(rm_path)
320
+ replace.update(cls._load_str_dict(rp_path))
321
+ return remove, replace
322
+
304
323
  @staticmethod
305
324
  def _load_str_list(path: str) -> list[str]:
325
+ """
326
+ Load a JSON file containing a list of strings.
327
+
328
+ :param path: File path to a JSON array (e.g., ``["a", "b"]``).
329
+ :return: Parsed list on success; empty list if ``path`` is empty, file is
330
+ missing, or content is invalid.
331
+ """
332
+ if not path:
333
+ return []
306
334
  try:
307
335
  with open(path, encoding="utf-8") as f:
308
- parsed = json.load(f)
309
- return cast(list[str], parsed)
336
+ data = json.load(f)
337
+ return list(data) if isinstance(data, list) else []
310
338
  except Exception:
311
339
  return []
312
340
 
313
341
  @staticmethod
314
342
  def _load_str_dict(path: str) -> dict[str, str]:
343
+ """
344
+ Load a JSON file containing a dict of string-to-string mappings.
345
+
346
+ :param path: File path to a JSON object (e.g., ``{"old":"new"}``).
347
+ :return: Parsed dict on success; empty dict if ``path`` is empty, file is
348
+ missing, or content is invalid.
349
+ """
350
+ if not path:
351
+ return {}
315
352
  try:
316
353
  with open(path, encoding="utf-8") as f:
317
- parsed = json.load(f)
318
- return cast(dict[str, str], parsed)
354
+ data = json.load(f)
355
+ return dict(data) if isinstance(data, dict) else {}
319
356
  except Exception:
320
357
  return {}
@@ -7,12 +7,11 @@ This package serves as the core layer of the novel_downloader system.
7
7
 
8
8
  It provides factory methods for constructing key components required for
9
9
  downloading and processing online novel content, including:
10
-
11
- - Downloader: Handles the full download lifecycle of a book or a batch of books.
12
- - Parser: Extracts structured data from HTML or SSR content.
13
- - Fetcher: Sends HTTP requests and manages sessions, including login if required.
14
- - Exporter: Responsible for exporting downloaded data into various output formats.
15
- - search: Provides unified search functionality across supported novel sites.
10
+ * Downloader: Handles the full download lifecycle of a book or a batch of books.
11
+ * Parser: Extracts structured data from HTML or SSR content.
12
+ * Fetcher: Sends HTTP requests and manages sessions, including login if required.
13
+ * Exporter: Responsible for exporting downloaded data into various output formats.
14
+ * search: Provides unified search functionality across supported novel sites.
16
15
  """
17
16
 
18
17
  __all__ = [
@@ -133,14 +133,14 @@ def common_export_as_txt(
133
133
  out_path = out_dir / out_name
134
134
 
135
135
  # --- Save final text ---
136
- result = write_file(
137
- content=final_text,
138
- filepath=out_path,
139
- write_mode="w",
140
- on_exist="overwrite",
141
- )
142
- if result:
136
+ try:
137
+ result = write_file(
138
+ content=final_text,
139
+ filepath=out_path,
140
+ on_exist="overwrite",
141
+ )
143
142
  exporter.logger.info("%s Novel saved to: %s", TAG, out_path)
144
- else:
145
- exporter.logger.error("%s Failed to write novel to %s", TAG, out_path)
143
+ except Exception as e:
144
+ exporter.logger.error("%s Failed to write novel to %s: %s", TAG, out_path, e)
145
+ return None
146
146
  return result
@@ -126,14 +126,14 @@ def linovelib_export_as_txt(
126
126
  out_path = out_dir / out_name
127
127
 
128
128
  # --- Save final text ---
129
- result = write_file(
130
- content=final_text,
131
- filepath=out_path,
132
- write_mode="w",
133
- on_exist="overwrite",
134
- )
135
- if result:
129
+ try:
130
+ result = write_file(
131
+ content=final_text,
132
+ filepath=out_path,
133
+ on_exist="overwrite",
134
+ )
136
135
  exporter.logger.info("%s Novel saved to: %s", TAG, out_path)
137
- else:
138
- exporter.logger.error("%s Failed to write novel to %s", TAG, out_path)
136
+ except Exception as e:
137
+ exporter.logger.error("%s Failed to write novel to %s: %s", TAG, out_path, e)
138
+ return None
139
139
  return result